[llvm] [PowerPC] Change `half` to use soft promotion rather than `PromoteFloat` (PR #152632)

Trevor Gross via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 9 23:49:31 PDT 2025


https://github.com/tgross35 updated https://github.com/llvm/llvm-project/pull/152632

>From 45e07fe946949899a607c49e1c2d2ec956856865 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross at umich.edu>
Date: Fri, 8 Aug 2025 05:04:09 -0500
Subject: [PATCH 1/3] [SelectionDAG] Add `f16` soft promotion for `lrint` and
 `lround`

On platforms that soft promote `half`, using `lrint` intrinsics crashes
with the following:

    SoftPromoteHalfOperand Op #0: t5: i32 = lrint t4

    LLVM ERROR: Do not know how to soft promote this operator's operand!
    PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
    Stack dump:
    0.      Program arguments: /Users/tmgross/Documents/projects/llvm/llvm-build/bin/llc -mtriple=riscv32
    1.      Running pass 'Function Pass Manager' on module '<stdin>'.
    2.      Running pass 'RISC-V DAG->DAG Pattern Instruction Selection' on function '@test_lrint_ixx_f16'

Resolve this by adding a soft promotion.

`SoftPromoteHalfOp_FP_TO_XINT` is reused here since it provides the
correct input and output types. It is renamed `PromoteFloatOp_UnaryOp`
to match `PromoteFloatOp_UnaryOp` and similar functions that are used to
handle the same sets of intrinsics.
---
 .../SelectionDAG/LegalizeFloatTypes.cpp       | 17 ++++--
 llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h |  2 +-
 llvm/test/CodeGen/ARM/lrint-conv.ll           | 15 +++---
 llvm/test/CodeGen/LoongArch/lrint-conv.ll     | 33 ++++++++----
 llvm/test/CodeGen/Mips/llrint-conv.ll         | 23 ++++----
 llvm/test/CodeGen/Mips/lrint-conv.ll          | 27 +++++-----
 llvm/test/CodeGen/RISCV/lrint-conv.ll         | 25 ++++++---
 llvm/test/CodeGen/X86/lrint-conv-i32.ll       | 52 ++++++++++++++++---
 8 files changed, 138 insertions(+), 56 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 83bb1dfe86c6a..1a4ce333d6101 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -20,6 +20,7 @@
 
 #include "LegalizeTypes.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
@@ -3737,10 +3738,20 @@ bool DAGTypeLegalizer::SoftPromoteHalfOperand(SDNode *N, unsigned OpNo) {
     Res = SoftPromoteHalfOp_FAKE_USE(N, OpNo);
     break;
   case ISD::FCOPYSIGN:  Res = SoftPromoteHalfOp_FCOPYSIGN(N, OpNo); break;
+  case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT:
+  case ISD::LLRINT:
+  case ISD::LLROUND:
+  case ISD::LRINT:
+  case ISD::LROUND:
   case ISD::STRICT_FP_TO_SINT:
   case ISD::STRICT_FP_TO_UINT:
-  case ISD::FP_TO_SINT:
-  case ISD::FP_TO_UINT: Res = SoftPromoteHalfOp_FP_TO_XINT(N); break;
+  case ISD::STRICT_LLRINT:
+  case ISD::STRICT_LLROUND:
+  case ISD::STRICT_LRINT:
+  case ISD::STRICT_LROUND:
+    Res = SoftPromoteHalfOp_UnaryOp(N);
+    break;
   case ISD::FP_TO_SINT_SAT:
   case ISD::FP_TO_UINT_SAT:
                         Res = SoftPromoteHalfOp_FP_TO_XINT_SAT(N); break;
@@ -3819,7 +3830,7 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_EXTEND(SDNode *N) {
   return DAG.getNode(GetPromotionOpcode(SVT, RVT), SDLoc(N), RVT, Op);
 }
 
-SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_TO_XINT(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftPromoteHalfOp_UnaryOp(SDNode *N) {
   EVT RVT = N->getValueType(0);
   bool IsStrict = N->isStrictFPOpcode();
   SDValue Op = N->getOperand(IsStrict ? 1 : 0);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 586c3411791f9..a9f9af8b7d07e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -843,7 +843,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue SoftPromoteHalfOp_FAKE_USE(SDNode *N, unsigned OpNo);
   SDValue SoftPromoteHalfOp_FCOPYSIGN(SDNode *N, unsigned OpNo);
   SDValue SoftPromoteHalfOp_FP_EXTEND(SDNode *N);
-  SDValue SoftPromoteHalfOp_FP_TO_XINT(SDNode *N);
+  SDValue SoftPromoteHalfOp_UnaryOp(SDNode *N);
   SDValue SoftPromoteHalfOp_FP_TO_XINT_SAT(SDNode *N);
   SDValue SoftPromoteHalfOp_SETCC(SDNode *N);
   SDValue SoftPromoteHalfOp_SELECT_CC(SDNode *N, unsigned OpNo);
diff --git a/llvm/test/CodeGen/ARM/lrint-conv.ll b/llvm/test/CodeGen/ARM/lrint-conv.ll
index 9aa95112af533..848b14e48f2d1 100644
--- a/llvm/test/CodeGen/ARM/lrint-conv.ll
+++ b/llvm/test/CodeGen/ARM/lrint-conv.ll
@@ -1,12 +1,15 @@
 ; RUN: llc < %s -mtriple=arm-eabi -float-abi=soft | FileCheck %s --check-prefix=SOFTFP
 ; RUN: llc < %s -mtriple=arm-eabi -float-abi=hard | FileCheck %s --check-prefix=HARDFP
 
-; FIXME: crash
-; define i32 @testmswh_builtin(half %x) {
-; entry:
-;   %0 = tail call i32 @llvm.lrint.i32.f16(half %x)
-;   ret i32 %0
-; }
+; SOFTFP-LABEL: testmswh_builtin:
+; SOFTFP:       bl      lrintf
+; HARDFP-LABEL: testmswh_builtin:
+; HARDFP:       bl      lrintf
+define i32 @testmswh_builtin(half %x) {
+entry:
+  %0 = tail call i32 @llvm.lrint.i32.f16(half %x)
+  ret i32 %0
+}
 
 ; SOFTFP-LABEL: testmsws_builtin:
 ; SOFTFP:       bl      lrintf
diff --git a/llvm/test/CodeGen/LoongArch/lrint-conv.ll b/llvm/test/CodeGen/LoongArch/lrint-conv.ll
index 85de820025614..262d1c16a6486 100644
--- a/llvm/test/CodeGen/LoongArch/lrint-conv.ll
+++ b/llvm/test/CodeGen/LoongArch/lrint-conv.ll
@@ -5,16 +5,31 @@
 ; RUN: sed 's/ITy/i32/g' %s | llc -mtriple=loongarch64 | FileCheck %s --check-prefixes=LA64-I32
 ; RUN: sed 's/ITy/i64/g' %s | llc -mtriple=loongarch64 | FileCheck %s --check-prefixes=LA64-I64
 
-; FIXME: crash
-; define ITy @test_lrint_ixx_f16(half %x) nounwind {
-;   %res = tail call ITy @llvm.lrint.ITy.f16(half %x)
-;   ret ITy %res
-; }
+define ITy @test_lrint_ixx_f16(half %x) nounwind {
+; LA32-LABEL: test_lrint_ixx_f16:
+; LA32:         bl lrintf
+;
+; LA64-I32-LABEL: test_lrint_ixx_f16:
+; LA64-I32:         pcaddu18i $ra, %call36(lrintf)
+;
+; LA64-I64-LABEL: test_lrint_ixx_f16:
+; LA64-I64:         pcaddu18i $t8, %call36(lrintf)
+  %res = tail call ITy @llvm.lrint.ITy.f16(half %x)
+  ret ITy %res
+}
 
-; define ITy @test_llrint_ixx_f16(half %x) nounwind {
-;   %res = tail call ITy @llvm.llrint.ITy.f16(half %x)
-;   ret ITy %res
-; }
+define ITy @test_llrint_ixx_f16(half %x) nounwind {
+; LA32-LABEL: test_llrint_ixx_f16:
+; LA32:         bl llrintf
+;
+; LA64-I32-LABEL: test_llrint_ixx_f16:
+; LA64-I32:         pcaddu18i $ra, %call36(llrintf)
+;
+; LA64-I64-LABEL: test_llrint_ixx_f16:
+; LA64-I64:         pcaddu18i $t8, %call36(llrintf)
+  %res = tail call ITy @llvm.llrint.ITy.f16(half %x)
+  ret ITy %res
+}
 
 define ITy @test_lrint_ixx_f32(float %x) nounwind {
 ; LA32-LABEL: test_lrint_ixx_f32:
diff --git a/llvm/test/CodeGen/Mips/llrint-conv.ll b/llvm/test/CodeGen/Mips/llrint-conv.ll
index 592d40c0f65aa..8eaef5d4135bb 100644
--- a/llvm/test/CodeGen/Mips/llrint-conv.ll
+++ b/llvm/test/CodeGen/Mips/llrint-conv.ll
@@ -1,19 +1,18 @@
 ; RUN: llc < %s -mtriple=mips64el -mattr=+soft-float | FileCheck %s
 ; RUN: llc < %s -mtriple=mips -mattr=+soft-float     | FileCheck %s
 
-; FIXME: crash
-; define signext i32 @testmswh(half %x) {
-; entry:
-;   %0 = tail call i64 @llvm.llrint.i64.f16(half %x)
-;   %conv = trunc i64 %0 to i32
-;   ret i32 %conv
-; }
+define signext i32 @testmswh(half %x) {
+entry:
+  %0 = tail call i64 @llvm.llrint.i64.f16(half %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
 
-; define i64 @testmsxh(half %x) {
-; entry:
-;   %0 = tail call i64 @llvm.llrint.i64.f16(half %x)
-;   ret i64 %0
-; }
+define i64 @testmsxh(half %x) {
+entry:
+  %0 = tail call i64 @llvm.llrint.i64.f16(half %x)
+  ret i64 %0
+}
 
 define signext i32 @testmsws(float %x) {
 ; CHECK-LABEL: testmsws:
diff --git a/llvm/test/CodeGen/Mips/lrint-conv.ll b/llvm/test/CodeGen/Mips/lrint-conv.ll
index 6d2e392675f1c..64c5cb9ac5b07 100644
--- a/llvm/test/CodeGen/Mips/lrint-conv.ll
+++ b/llvm/test/CodeGen/Mips/lrint-conv.ll
@@ -1,19 +1,22 @@
 ; RUN: llc < %s -mtriple=mips64el -mattr=+soft-float | FileCheck %s
 ; RUN: llc < %s -mtriple=mips -mattr=+soft-float     | FileCheck %s
 
-; FIXME: crash
-; define signext i32 @testmswh(half %x) {
-; entry:
-;   %0 = tail call i64 @llvm.lrint.i64.f16(half %x)
-;   %conv = trunc i64 %0 to i32
-;   ret i32 %conv
-; }
+define signext i32 @testmswh(half %x) {
+; CHECK-LABEL: testmswh:
+; CHECK:       jal     lrintf
+entry:
+  %0 = tail call i64 @llvm.lrint.i64.f16(half %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
 
-; define i64 @testmsxh(half %x) {
-; entry:
-;   %0 = tail call i64 @llvm.lrint.i64.f16(half %x)
-;   ret i64 %0
-; }
+define i64 @testmsxh(half %x) {
+; CHECK-LABEL: testmsxh:
+; CHECK:       jal     lrintf
+entry:
+  %0 = tail call i64 @llvm.lrint.i64.f16(half %x)
+  ret i64 %0
+}
 
 define signext i32 @testmsws(float %x) {
 ; CHECK-LABEL: testmsws:
diff --git a/llvm/test/CodeGen/RISCV/lrint-conv.ll b/llvm/test/CodeGen/RISCV/lrint-conv.ll
index d3af2153588a1..ecb6bd0932ef3 100644
--- a/llvm/test/CodeGen/RISCV/lrint-conv.ll
+++ b/llvm/test/CodeGen/RISCV/lrint-conv.ll
@@ -5,14 +5,25 @@
 ; RUN: sed 's/ITy/i32/g' %s | llc -mtriple=riscv64 | FileCheck %s --check-prefixes=RV64
 ; RUN: sed 's/ITy/i64/g' %s | llc -mtriple=riscv64 | FileCheck %s --check-prefixes=RV64
 
-; FIXME: crash
-; define ITy @test_lrint_ixx_f16(half %x) nounwind {
-;   %res = tail call ITy @llvm.lrint.ITy.f16(half %x)
-; }
+define ITy @test_lrint_ixx_f16(half %x) nounwind {
+; RV32-LABEL: test_lrint_ixx_f16:
+; RV32:         call lrintf
+;
+; RV64-LABEL: test_lrint_ixx_f16:
+; RV64:         call lrintf
+  %res = tail call ITy @llvm.lrint.ITy.f16(half %x)
+  ret ITy %res
+}
 
-; define ITy @test_llrint_ixx_f16(half %x) nounwind {
-;   %res = tail call ITy @llvm.llrint.ITy.f16(half %x)
-; }
+define ITy @test_llrint_ixx_f16(half %x) nounwind {
+; RV32-LABEL: test_llrint_ixx_f16:
+; RV32:         call llrintf
+;
+; RV64-LABEL: test_llrint_ixx_f16:
+; RV64:         call llrintf
+  %res = tail call ITy @llvm.llrint.ITy.f16(half %x)
+  ret ITy %res
+}
 
 define ITy @test_lrint_ixx_f32(float %x) nounwind {
 ; RV32-LABEL: test_lrint_ixx_f32:
diff --git a/llvm/test/CodeGen/X86/lrint-conv-i32.ll b/llvm/test/CodeGen/X86/lrint-conv-i32.ll
index 3c50aea1095f4..5c0a64f1477e6 100644
--- a/llvm/test/CodeGen/X86/lrint-conv-i32.ll
+++ b/llvm/test/CodeGen/X86/lrint-conv-i32.ll
@@ -7,12 +7,52 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefixes=X64,X64-AVX
 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X64,X64-AVX
 
-; FIXME: crash
-; define i32 @testmswh(half %x) nounwind {
-; entry:
-;   %0 = tail call i32 @llvm.lrint.i32.f16(half %x)
-;   ret i32 %0
-; }
+define i32 @testmswh(half %x) nounwind {
+; X86-NOSSE-LABEL: testmswh:
+; X86-NOSSE:       # %bb.0: # %entry
+; X86-NOSSE-NEXT:    pushl %eax
+; X86-NOSSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT:    pushl %eax
+; X86-NOSSE-NEXT:    calll __extendhfsf2
+; X86-NOSSE-NEXT:    addl $4, %esp
+; X86-NOSSE-NEXT:    fistpl (%esp)
+; X86-NOSSE-NEXT:    movl (%esp), %eax
+; X86-NOSSE-NEXT:    popl %ecx
+; X86-NOSSE-NEXT:    retl
+;
+; X86-SSE2-LABEL: testmswh:
+; X86-SSE2:       # %bb.0: # %entry
+; X86-SSE2-NEXT:    subl $8, %esp
+; X86-SSE2-NEXT:    pinsrw $0, {{[0-9]+}}(%esp), %xmm0
+; X86-SSE2-NEXT:    pextrw $0, %xmm0, %eax
+; X86-SSE2-NEXT:    movw %ax, (%esp)
+; X86-SSE2-NEXT:    calll __extendhfsf2
+; X86-SSE2-NEXT:    fstps (%esp)
+; X86-SSE2-NEXT:    calll rintf
+; X86-SSE2-NEXT:    fstps (%esp)
+; X86-SSE2-NEXT:    calll __truncsfhf2
+; X86-SSE2-NEXT:    pextrw $0, %xmm0, %eax
+; X86-SSE2-NEXT:    movw %ax, (%esp)
+; X86-SSE2-NEXT:    calll __extendhfsf2
+; X86-SSE2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-SSE2-NEXT:    cvttss2si {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT:    addl $8, %esp
+; X86-SSE2-NEXT:    retl
+;
+; X64-SSE-LABEL: testmswh:
+; X64-SSE:       # %bb.0: # %entry
+; X64-SSE-NEXT:    pushq %rax
+; X64-SSE-NEXT:    callq __extendhfsf2 at PLT
+; X64-SSE-NEXT:    callq rintf at PLT
+; X64-SSE-NEXT:    callq __truncsfhf2 at PLT
+; X64-SSE-NEXT:    callq __extendhfsf2 at PLT
+; X64-SSE-NEXT:    cvttss2si %xmm0, %eax
+; X64-SSE-NEXT:    popq %rcx
+; X64-SSE-NEXT:    retq
+entry:
+  %0 = tail call i32 @llvm.lrint.i32.f16(half %x)
+  ret i32 %0
+}
 
 define i32 @testmsws(float %x) nounwind {
 ; X86-NOSSE-LABEL: testmsws:

>From 4e5ed79a4fb07e1a6f7bed63f7dff17274214416 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross at umich.edu>
Date: Wed, 6 Aug 2025 07:04:23 +0000
Subject: [PATCH 2/3] [PowerPC] Extend and update the test for `half` support
 (NFC)

`f16` is more functional than just a storage type on the platform,
though it does have some codegen issues [1]. To prepare for future
changes, do the following nonfunctional updates to the existing `half`
test:

* Add tests for passing and returning the type directly.
* Add tests showing bitcast behavior, which is currently incorrect but
  serves as a baseline.
* Add tests for `fabs` and `copysign` (trivial operations that shouldn't
  require libcalls).
* Add invocations for big-endian and for PPC32.
* Rename the test to `half.ll` to reflect its status, which also matches
  other backends.

[1]: https://github.com/llvm/llvm-project/issues/97975
---
 llvm/test/CodeGen/PowerPC/half.ll             | 2562 +++++++++++++++++
 .../PowerPC/handle-f16-storage-type.ll        | 1281 ---------
 2 files changed, 2562 insertions(+), 1281 deletions(-)
 create mode 100644 llvm/test/CodeGen/PowerPC/half.ll
 delete mode 100644 llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll

diff --git a/llvm/test/CodeGen/PowerPC/half.ll b/llvm/test/CodeGen/PowerPC/half.ll
new file mode 100644
index 0000000000000..fe0dccf63af80
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/half.ll
@@ -0,0 +1,2562 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=powerpc-unknown-unknown \
+; RUN:   -verify-machineinstrs -ppc-asm-full-reg-names < %s | FileCheck %s \
+; RUN:   --check-prefix=PPC32
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown \
+; RUN:   -verify-machineinstrs -ppc-asm-full-reg-names < %s | FileCheck %s \
+; RUN:   --check-prefix=P8
+; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \
+; RUN:   -verify-machineinstrs -ppc-asm-full-reg-names < %s | FileCheck %s
+; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown -mattr=-hard-float \
+; RUN:   -verify-machineinstrs -ppc-asm-full-reg-names < %s | FileCheck %s \
+; RUN:   --check-prefix=SOFT
+; RUN: llc -mtriple=powerpc64-unknown-unknown \
+; RUN:   -verify-machineinstrs -ppc-asm-full-reg-names < %s | FileCheck %s \
+; RUN:   --check-prefix=BE
+
+; Tests for various operations on half precison float. Much of the test is
+; copied from test/CodeGen/X86/half.ll.
+
+define void @store(half %x, ptr %p) nounwind {
+; PPC32-LABEL: store:
+; PPC32:       # %bb.0:
+; PPC32-NEXT:    mflr r0
+; PPC32-NEXT:    stwu r1, -16(r1)
+; PPC32-NEXT:    stw r0, 20(r1)
+; PPC32-NEXT:    stw r30, 8(r1) # 4-byte Folded Spill
+; PPC32-NEXT:    mr r30, r3
+; PPC32-NEXT:    bl __truncsfhf2
+; PPC32-NEXT:    sth r3, 0(r30)
+; PPC32-NEXT:    lwz r30, 8(r1) # 4-byte Folded Reload
+; PPC32-NEXT:    lwz r0, 20(r1)
+; PPC32-NEXT:    addi r1, r1, 16
+; PPC32-NEXT:    mtlr r0
+; PPC32-NEXT:    blr
+;
+; P8-LABEL: store:
+; P8:       # %bb.0:
+; P8-NEXT:    mflr r0
+; P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; P8-NEXT:    stdu r1, -48(r1)
+; P8-NEXT:    std r0, 64(r1)
+; P8-NEXT:    mr r30, r4
+; P8-NEXT:    bl __truncsfhf2
+; P8-NEXT:    nop
+; P8-NEXT:    sth r3, 0(r30)
+; P8-NEXT:    addi r1, r1, 48
+; P8-NEXT:    ld r0, 16(r1)
+; P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; P8-NEXT:    mtlr r0
+; P8-NEXT:    blr
+;
+; CHECK-LABEL: store:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xscvdphp f0, f1
+; CHECK-NEXT:    stxsihx f0, 0, r4
+; CHECK-NEXT:    blr
+;
+; SOFT-LABEL: store:
+; SOFT:       # %bb.0:
+; SOFT-NEXT:    sth r3, 0(r4)
+; SOFT-NEXT:    blr
+;
+; BE-LABEL: store:
+; BE:       # %bb.0:
+; BE-NEXT:    mflr r0
+; BE-NEXT:    stdu r1, -128(r1)
+; BE-NEXT:    std r0, 144(r1)
+; BE-NEXT:    std r30, 112(r1) # 8-byte Folded Spill
+; BE-NEXT:    mr r30, r4
+; BE-NEXT:    bl __truncsfhf2
+; BE-NEXT:    nop
+; BE-NEXT:    sth r3, 0(r30)
+; BE-NEXT:    ld r30, 112(r1) # 8-byte Folded Reload
+; BE-NEXT:    addi r1, r1, 128
+; BE-NEXT:    ld r0, 16(r1)
+; BE-NEXT:    mtlr r0
+; BE-NEXT:    blr
+  store half %x, ptr %p
+  ret void
+}
+
+define half @return(ptr %p) nounwind {
+; PPC32-LABEL: return:
+; PPC32:       # %bb.0:
+; PPC32-NEXT:    mflr r0
+; PPC32-NEXT:    stwu r1, -16(r1)
+; PPC32-NEXT:    stw r0, 20(r1)
+; PPC32-NEXT:    lhz r3, 0(r3)
+; PPC32-NEXT:    bl __extendhfsf2
+; PPC32-NEXT:    lwz r0, 20(r1)
+; PPC32-NEXT:    addi r1, r1, 16
+; PPC32-NEXT:    mtlr r0
+; PPC32-NEXT:    blr
+;
+; P8-LABEL: return:
+; P8:       # %bb.0:
+; P8-NEXT:    mflr r0
+; P8-NEXT:    stdu r1, -32(r1)
+; P8-NEXT:    std r0, 48(r1)
+; P8-NEXT:    lhz r3, 0(r3)
+; P8-NEXT:    bl __extendhfsf2
+; P8-NEXT:    nop
+; P8-NEXT:    addi r1, r1, 32
+; P8-NEXT:    ld r0, 16(r1)
+; P8-NEXT:    mtlr r0
+; P8-NEXT:    blr
+;
+; CHECK-LABEL: return:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lxsihzx f0, 0, r3
+; CHECK-NEXT:    xscvhpdp f1, f0
+; CHECK-NEXT:    blr
+;
+; SOFT-LABEL: return:
+; SOFT:       # %bb.0:
+; SOFT-NEXT:    lhz r3, 0(r3)
+; SOFT-NEXT:    blr
+;
+; BE-LABEL: return:
+; BE:       # %bb.0:
+; BE-NEXT:    mflr r0
+; BE-NEXT:    stdu r1, -112(r1)
+; BE-NEXT:    std r0, 128(r1)
+; BE-NEXT:    lhz r3, 0(r3)
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
+; BE-NEXT:    addi r1, r1, 112
+; BE-NEXT:    ld r0, 16(r1)
+; BE-NEXT:    mtlr r0
+; BE-NEXT:    blr
+  %r = load half, ptr %p
+  ret half %r
+}
+
+define dso_local double @loadd(ptr nocapture readonly %a) local_unnamed_addr nounwind {
+; PPC32-LABEL: loadd:
+; PPC32:       # %bb.0: # %entry
+; PPC32-NEXT:    mflr r0
+; PPC32-NEXT:    stwu r1, -16(r1)
+; PPC32-NEXT:    stw r0, 20(r1)
+; PPC32-NEXT:    lhz r3, 2(r3)
+; PPC32-NEXT:    bl __extendhfsf2
+; PPC32-NEXT:    lwz r0, 20(r1)
+; PPC32-NEXT:    addi r1, r1, 16
+; PPC32-NEXT:    mtlr r0
+; PPC32-NEXT:    blr
+;
+; P8-LABEL: loadd:
+; P8:       # %bb.0: # %entry
+; P8-NEXT:    mflr r0
+; P8-NEXT:    stdu r1, -32(r1)
+; P8-NEXT:    std r0, 48(r1)
+; P8-NEXT:    lhz r3, 2(r3)
+; P8-NEXT:    bl __extendhfsf2
+; P8-NEXT:    nop
+; P8-NEXT:    addi r1, r1, 32
+; P8-NEXT:    ld r0, 16(r1)
+; P8-NEXT:    mtlr r0
+; P8-NEXT:    blr
+;
+; CHECK-LABEL: loadd:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi r3, r3, 2
+; CHECK-NEXT:    lxsihzx f0, 0, r3
+; CHECK-NEXT:    xscvhpdp f1, f0
+; CHECK-NEXT:    blr
+;
+; SOFT-LABEL: loadd:
+; SOFT:       # %bb.0: # %entry
+; SOFT-NEXT:    mflr r0
+; SOFT-NEXT:    stdu r1, -32(r1)
+; SOFT-NEXT:    std r0, 48(r1)
+; SOFT-NEXT:    lhz r3, 2(r3)
+; SOFT-NEXT:    bl __extendhfsf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    bl __extendsfdf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    addi r1, r1, 32
+; SOFT-NEXT:    ld r0, 16(r1)
+; SOFT-NEXT:    mtlr r0
+; SOFT-NEXT:    blr
+;
+; BE-LABEL: loadd:
+; BE:       # %bb.0: # %entry
+; BE-NEXT:    mflr r0
+; BE-NEXT:    stdu r1, -112(r1)
+; BE-NEXT:    std r0, 128(r1)
+; BE-NEXT:    lhz r3, 2(r3)
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
+; BE-NEXT:    addi r1, r1, 112
+; BE-NEXT:    ld r0, 16(r1)
+; BE-NEXT:    mtlr r0
+; BE-NEXT:    blr
+entry:
+  %arrayidx = getelementptr inbounds i16, ptr %a, i64 1
+  %0 = load i16, ptr %arrayidx, align 2
+  %1 = tail call double @llvm.convert.from.fp16.f64(i16 %0)
+  ret double %1
+}
+
+declare double @llvm.convert.from.fp16.f64(i16)
+
+define dso_local float @loadf(ptr nocapture readonly %a) local_unnamed_addr nounwind {
+; PPC32-LABEL: loadf:
+; PPC32:       # %bb.0: # %entry
+; PPC32-NEXT:    mflr r0
+; PPC32-NEXT:    stwu r1, -16(r1)
+; PPC32-NEXT:    stw r0, 20(r1)
+; PPC32-NEXT:    lhz r3, 2(r3)
+; PPC32-NEXT:    bl __extendhfsf2
+; PPC32-NEXT:    lwz r0, 20(r1)
+; PPC32-NEXT:    addi r1, r1, 16
+; PPC32-NEXT:    mtlr r0
+; PPC32-NEXT:    blr
+;
+; P8-LABEL: loadf:
+; P8:       # %bb.0: # %entry
+; P8-NEXT:    mflr r0
+; P8-NEXT:    stdu r1, -32(r1)
+; P8-NEXT:    std r0, 48(r1)
+; P8-NEXT:    lhz r3, 2(r3)
+; P8-NEXT:    bl __extendhfsf2
+; P8-NEXT:    nop
+; P8-NEXT:    addi r1, r1, 32
+; P8-NEXT:    ld r0, 16(r1)
+; P8-NEXT:    mtlr r0
+; P8-NEXT:    blr
+;
+; CHECK-LABEL: loadf:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi r3, r3, 2
+; CHECK-NEXT:    lxsihzx f0, 0, r3
+; CHECK-NEXT:    xscvhpdp f1, f0
+; CHECK-NEXT:    blr
+;
+; SOFT-LABEL: loadf:
+; SOFT:       # %bb.0: # %entry
+; SOFT-NEXT:    mflr r0
+; SOFT-NEXT:    stdu r1, -32(r1)
+; SOFT-NEXT:    std r0, 48(r1)
+; SOFT-NEXT:    lhz r3, 2(r3)
+; SOFT-NEXT:    bl __extendhfsf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    addi r1, r1, 32
+; SOFT-NEXT:    ld r0, 16(r1)
+; SOFT-NEXT:    mtlr r0
+; SOFT-NEXT:    blr
+;
+; BE-LABEL: loadf:
+; BE:       # %bb.0: # %entry
+; BE-NEXT:    mflr r0
+; BE-NEXT:    stdu r1, -112(r1)
+; BE-NEXT:    std r0, 128(r1)
+; BE-NEXT:    lhz r3, 2(r3)
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
+; BE-NEXT:    addi r1, r1, 112
+; BE-NEXT:    ld r0, 16(r1)
+; BE-NEXT:    mtlr r0
+; BE-NEXT:    blr
+entry:
+  %arrayidx = getelementptr inbounds i16, ptr %a, i64 1
+  %0 = load i16, ptr %arrayidx, align 2
+  %1 = tail call float @llvm.convert.from.fp16.f32(i16 %0)
+  ret float %1
+}
+
+declare float @llvm.convert.from.fp16.f32(i16)
+
+define dso_local void @stored(ptr nocapture %a, double %b) local_unnamed_addr nounwind {
+; PPC32-LABEL: stored:
+; PPC32:       # %bb.0: # %entry
+; PPC32-NEXT:    mflr r0
+; PPC32-NEXT:    stwu r1, -16(r1)
+; PPC32-NEXT:    stw r0, 20(r1)
+; PPC32-NEXT:    stw r30, 8(r1) # 4-byte Folded Spill
+; PPC32-NEXT:    mr r30, r3
+; PPC32-NEXT:    bl __truncdfhf2
+; PPC32-NEXT:    sth r3, 0(r30)
+; PPC32-NEXT:    lwz r30, 8(r1) # 4-byte Folded Reload
+; PPC32-NEXT:    lwz r0, 20(r1)
+; PPC32-NEXT:    addi r1, r1, 16
+; PPC32-NEXT:    mtlr r0
+; PPC32-NEXT:    blr
+;
+; P8-LABEL: stored:
+; P8:       # %bb.0: # %entry
+; P8-NEXT:    mflr r0
+; P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; P8-NEXT:    stdu r1, -48(r1)
+; P8-NEXT:    std r0, 64(r1)
+; P8-NEXT:    mr r30, r3
+; P8-NEXT:    bl __truncdfhf2
+; P8-NEXT:    nop
+; P8-NEXT:    sth r3, 0(r30)
+; P8-NEXT:    addi r1, r1, 48
+; P8-NEXT:    ld r0, 16(r1)
+; P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; P8-NEXT:    mtlr r0
+; P8-NEXT:    blr
+;
+; CHECK-LABEL: stored:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xscvdphp f0, f1
+; CHECK-NEXT:    stxsihx f0, 0, r3
+; CHECK-NEXT:    blr
+;
+; SOFT-LABEL: stored:
+; SOFT:       # %bb.0: # %entry
+; SOFT-NEXT:    mflr r0
+; SOFT-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; SOFT-NEXT:    stdu r1, -48(r1)
+; SOFT-NEXT:    mr r30, r3
+; SOFT-NEXT:    mr r3, r4
+; SOFT-NEXT:    std r0, 64(r1)
+; SOFT-NEXT:    bl __truncdfhf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    clrldi r3, r3, 48
+; SOFT-NEXT:    bl __extendhfsf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    bl __truncsfhf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    sth r3, 0(r30)
+; SOFT-NEXT:    addi r1, r1, 48
+; SOFT-NEXT:    ld r0, 16(r1)
+; SOFT-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; SOFT-NEXT:    mtlr r0
+; SOFT-NEXT:    blr
+;
+; BE-LABEL: stored:
+; BE:       # %bb.0: # %entry
+; BE-NEXT:    mflr r0
+; BE-NEXT:    stdu r1, -128(r1)
+; BE-NEXT:    std r0, 144(r1)
+; BE-NEXT:    std r30, 112(r1) # 8-byte Folded Spill
+; BE-NEXT:    mr r30, r3
+; BE-NEXT:    bl __truncdfhf2
+; BE-NEXT:    nop
+; BE-NEXT:    sth r3, 0(r30)
+; BE-NEXT:    ld r30, 112(r1) # 8-byte Folded Reload
+; BE-NEXT:    addi r1, r1, 128
+; BE-NEXT:    ld r0, 16(r1)
+; BE-NEXT:    mtlr r0
+; BE-NEXT:    blr
+entry:
+  %0 = tail call i16 @llvm.convert.to.fp16.f64(double %b)
+  store i16 %0, ptr %a, align 2
+  ret void
+}
+
+declare i16 @llvm.convert.to.fp16.f64(double)
+
+define dso_local void @storef(ptr nocapture %a, float %b) local_unnamed_addr nounwind {
+; PPC32-LABEL: storef:
+; PPC32:       # %bb.0: # %entry
+; PPC32-NEXT:    mflr r0
+; PPC32-NEXT:    stwu r1, -16(r1)
+; PPC32-NEXT:    stw r0, 20(r1)
+; PPC32-NEXT:    stw r30, 8(r1) # 4-byte Folded Spill
+; PPC32-NEXT:    mr r30, r3
+; PPC32-NEXT:    bl __truncsfhf2
+; PPC32-NEXT:    sth r3, 0(r30)
+; PPC32-NEXT:    lwz r30, 8(r1) # 4-byte Folded Reload
+; PPC32-NEXT:    lwz r0, 20(r1)
+; PPC32-NEXT:    addi r1, r1, 16
+; PPC32-NEXT:    mtlr r0
+; PPC32-NEXT:    blr
+;
+; P8-LABEL: storef:
+; P8:       # %bb.0: # %entry
+; P8-NEXT:    mflr r0
+; P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; P8-NEXT:    stdu r1, -48(r1)
+; P8-NEXT:    std r0, 64(r1)
+; P8-NEXT:    mr r30, r3
+; P8-NEXT:    bl __truncsfhf2
+; P8-NEXT:    nop
+; P8-NEXT:    sth r3, 0(r30)
+; P8-NEXT:    addi r1, r1, 48
+; P8-NEXT:    ld r0, 16(r1)
+; P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; P8-NEXT:    mtlr r0
+; P8-NEXT:    blr
+;
+; CHECK-LABEL: storef:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xscvdphp f0, f1
+; CHECK-NEXT:    stxsihx f0, 0, r3
+; CHECK-NEXT:    blr
+;
+; SOFT-LABEL: storef:
+; SOFT:       # %bb.0: # %entry
+; SOFT-NEXT:    mflr r0
+; SOFT-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; SOFT-NEXT:    stdu r1, -48(r1)
+; SOFT-NEXT:    mr r30, r3
+; SOFT-NEXT:    clrldi r3, r4, 32
+; SOFT-NEXT:    std r0, 64(r1)
+; SOFT-NEXT:    bl __truncsfhf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    clrldi r3, r3, 48
+; SOFT-NEXT:    bl __extendhfsf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    bl __truncsfhf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    sth r3, 0(r30)
+; SOFT-NEXT:    addi r1, r1, 48
+; SOFT-NEXT:    ld r0, 16(r1)
+; SOFT-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; SOFT-NEXT:    mtlr r0
+; SOFT-NEXT:    blr
+;
+; BE-LABEL: storef:
+; BE:       # %bb.0: # %entry
+; BE-NEXT:    mflr r0
+; BE-NEXT:    stdu r1, -128(r1)
+; BE-NEXT:    std r0, 144(r1)
+; BE-NEXT:    std r30, 112(r1) # 8-byte Folded Spill
+; BE-NEXT:    mr r30, r3
+; BE-NEXT:    bl __truncsfhf2
+; BE-NEXT:    nop
+; BE-NEXT:    sth r3, 0(r30)
+; BE-NEXT:    ld r30, 112(r1) # 8-byte Folded Reload
+; BE-NEXT:    addi r1, r1, 128
+; BE-NEXT:    ld r0, 16(r1)
+; BE-NEXT:    mtlr r0
+; BE-NEXT:    blr
+entry:
+  %0 = tail call i16 @llvm.convert.to.fp16.f32(float %b)
+  store i16 %0, ptr %a, align 2
+  ret void
+}
+
+declare i16 @llvm.convert.to.fp16.f32(float)
+define void @test_load_store(ptr %in, ptr %out) nounwind {
+; PPC32-LABEL: test_load_store:
+; PPC32:       # %bb.0:
+; PPC32-NEXT:    lhz r3, 0(r3)
+; PPC32-NEXT:    sth r3, 0(r4)
+; PPC32-NEXT:    blr
+;
+; P8-LABEL: test_load_store:
+; P8:       # %bb.0:
+; P8-NEXT:    lhz r3, 0(r3)
+; P8-NEXT:    sth r3, 0(r4)
+; P8-NEXT:    blr
+;
+; CHECK-LABEL: test_load_store:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lhz r3, 0(r3)
+; CHECK-NEXT:    sth r3, 0(r4)
+; CHECK-NEXT:    blr
+;
+; SOFT-LABEL: test_load_store:
+; SOFT:       # %bb.0:
+; SOFT-NEXT:    mflr r0
+; SOFT-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; SOFT-NEXT:    stdu r1, -48(r1)
+; SOFT-NEXT:    std r0, 64(r1)
+; SOFT-NEXT:    mr r30, r4
+; SOFT-NEXT:    lhz r3, 0(r3)
+; SOFT-NEXT:    bl __extendhfsf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    bl __truncsfhf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    sth r3, 0(r30)
+; SOFT-NEXT:    addi r1, r1, 48
+; SOFT-NEXT:    ld r0, 16(r1)
+; SOFT-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; SOFT-NEXT:    mtlr r0
+; SOFT-NEXT:    blr
+;
+; BE-LABEL: test_load_store:
+; BE:       # %bb.0:
+; BE-NEXT:    lhz r3, 0(r3)
+; BE-NEXT:    sth r3, 0(r4)
+; BE-NEXT:    blr
+  %val = load half, ptr %in
+  store half %val, ptr %out
+  ret void
+}
+define i16 @test_bitcast_from_half(ptr %addr) nounwind {
+; PPC32-LABEL: test_bitcast_from_half:
+; PPC32:       # %bb.0:
+; PPC32-NEXT:    lhz r3, 0(r3)
+; PPC32-NEXT:    blr
+;
+; P8-LABEL: test_bitcast_from_half:
+; P8:       # %bb.0:
+; P8-NEXT:    lhz r3, 0(r3)
+; P8-NEXT:    blr
+;
+; CHECK-LABEL: test_bitcast_from_half:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lhz r3, 0(r3)
+; CHECK-NEXT:    blr
+;
+; SOFT-LABEL: test_bitcast_from_half:
+; SOFT:       # %bb.0:
+; SOFT-NEXT:    lhz r3, 0(r3)
+; SOFT-NEXT:    blr
+;
+; BE-LABEL: test_bitcast_from_half:
+; BE:       # %bb.0:
+; BE-NEXT:    lhz r3, 0(r3)
+; BE-NEXT:    blr
+  %val = load half, ptr %addr
+  %val_int = bitcast half %val to i16
+  ret i16 %val_int
+}
+define void @test_bitcast_to_half(ptr %addr, i16 %in) nounwind {
+; PPC32-LABEL: test_bitcast_to_half:
+; PPC32:       # %bb.0:
+; PPC32-NEXT:    sth r4, 0(r3)
+; PPC32-NEXT:    blr
+;
+; P8-LABEL: test_bitcast_to_half:
+; P8:       # %bb.0:
+; P8-NEXT:    sth r4, 0(r3)
+; P8-NEXT:    blr
+;
+; CHECK-LABEL: test_bitcast_to_half:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    sth r4, 0(r3)
+; CHECK-NEXT:    blr
+;
+; SOFT-LABEL: test_bitcast_to_half:
+; SOFT:       # %bb.0:
+; SOFT-NEXT:    sth r4, 0(r3)
+; SOFT-NEXT:    blr
+;
+; BE-LABEL: test_bitcast_to_half:
+; BE:       # %bb.0:
+; BE-NEXT:    sth r4, 0(r3)
+; BE-NEXT:    blr
+  %val_fp = bitcast i16 %in to half
+  store half %val_fp, ptr %addr
+  ret void
+}
+
+
+; Checks for https://github.com/llvm/llvm-project/issues/97981
+define half @from_bits(i16 %x) nounwind {
+; PPC32-LABEL: from_bits:
+; PPC32:       # %bb.0:
+; PPC32-NEXT:    mflr r0
+; PPC32-NEXT:    stwu r1, -16(r1)
+; PPC32-NEXT:    clrlwi r3, r3, 16
+; PPC32-NEXT:    stw r0, 20(r1)
+; PPC32-NEXT:    bl __extendhfsf2
+; PPC32-NEXT:    lwz r0, 20(r1)
+; PPC32-NEXT:    addi r1, r1, 16
+; PPC32-NEXT:    mtlr r0
+; PPC32-NEXT:    blr
+;
+; P8-LABEL: from_bits:
+; P8:       # %bb.0:
+; P8-NEXT:    mflr r0
+; P8-NEXT:    stdu r1, -32(r1)
+; P8-NEXT:    clrldi r3, r3, 48
+; P8-NEXT:    std r0, 48(r1)
+; P8-NEXT:    bl __extendhfsf2
+; P8-NEXT:    nop
+; P8-NEXT:    addi r1, r1, 32
+; P8-NEXT:    ld r0, 16(r1)
+; P8-NEXT:    mtlr r0
+; P8-NEXT:    blr
+;
+; CHECK-LABEL: from_bits:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    clrlwi r3, r3, 16
+; CHECK-NEXT:    mtfprwz f0, r3
+; CHECK-NEXT:    xscvhpdp f1, f0
+; CHECK-NEXT:    blr
+;
+; SOFT-LABEL: from_bits:
+; SOFT:       # %bb.0:
+; SOFT-NEXT:    blr
+;
+; BE-LABEL: from_bits:
+; BE:       # %bb.0:
+; BE-NEXT:    mflr r0
+; BE-NEXT:    stdu r1, -112(r1)
+; BE-NEXT:    clrldi r3, r3, 48
+; BE-NEXT:    std r0, 128(r1)
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
+; BE-NEXT:    addi r1, r1, 112
+; BE-NEXT:    ld r0, 16(r1)
+; BE-NEXT:    mtlr r0
+; BE-NEXT:    blr
+  %res = bitcast i16 %x to half
+  ret half %res
+}
+
+define i16 @to_bits(half %x) nounwind {
+; PPC32-LABEL: to_bits:
+; PPC32:       # %bb.0:
+; PPC32-NEXT:    mflr r0
+; PPC32-NEXT:    stwu r1, -16(r1)
+; PPC32-NEXT:    stw r0, 20(r1)
+; PPC32-NEXT:    bl __truncsfhf2
+; PPC32-NEXT:    clrlwi r3, r3, 16
+; PPC32-NEXT:    lwz r0, 20(r1)
+; PPC32-NEXT:    addi r1, r1, 16
+; PPC32-NEXT:    mtlr r0
+; PPC32-NEXT:    blr
+;
+; P8-LABEL: to_bits:
+; P8:       # %bb.0:
+; P8-NEXT:    mflr r0
+; P8-NEXT:    stdu r1, -32(r1)
+; P8-NEXT:    std r0, 48(r1)
+; P8-NEXT:    bl __truncsfhf2
+; P8-NEXT:    nop
+; P8-NEXT:    clrldi r3, r3, 48
+; P8-NEXT:    addi r1, r1, 32
+; P8-NEXT:    ld r0, 16(r1)
+; P8-NEXT:    mtlr r0
+; P8-NEXT:    blr
+;
+; CHECK-LABEL: to_bits:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xscvdphp f0, f1
+; CHECK-NEXT:    mffprwz r3, f0
+; CHECK-NEXT:    clrlwi r3, r3, 16
+; CHECK-NEXT:    blr
+;
+; SOFT-LABEL: to_bits:
+; SOFT:       # %bb.0:
+; SOFT-NEXT:    blr
+;
+; BE-LABEL: to_bits:
+; BE:       # %bb.0:
+; BE-NEXT:    mflr r0
+; BE-NEXT:    stdu r1, -112(r1)
+; BE-NEXT:    std r0, 128(r1)
+; BE-NEXT:    bl __truncsfhf2
+; BE-NEXT:    nop
+; BE-NEXT:    clrldi r3, r3, 48
+; BE-NEXT:    addi r1, r1, 112
+; BE-NEXT:    ld r0, 16(r1)
+; BE-NEXT:    mtlr r0
+; BE-NEXT:    blr
+    %res = bitcast half %x to i16
+    ret i16 %res
+}
+
+define float @test_extend32(ptr %addr) nounwind {
+; PPC32-LABEL: test_extend32:
+; PPC32:       # %bb.0:
+; PPC32-NEXT:    mflr r0
+; PPC32-NEXT:    stwu r1, -16(r1)
+; PPC32-NEXT:    stw r0, 20(r1)
+; PPC32-NEXT:    lhz r3, 0(r3)
+; PPC32-NEXT:    bl __extendhfsf2
+; PPC32-NEXT:    lwz r0, 20(r1)
+; PPC32-NEXT:    addi r1, r1, 16
+; PPC32-NEXT:    mtlr r0
+; PPC32-NEXT:    blr
+;
+; P8-LABEL: test_extend32:
+; P8:       # %bb.0:
+; P8-NEXT:    mflr r0
+; P8-NEXT:    stdu r1, -32(r1)
+; P8-NEXT:    std r0, 48(r1)
+; P8-NEXT:    lhz r3, 0(r3)
+; P8-NEXT:    bl __extendhfsf2
+; P8-NEXT:    nop
+; P8-NEXT:    addi r1, r1, 32
+; P8-NEXT:    ld r0, 16(r1)
+; P8-NEXT:    mtlr r0
+; P8-NEXT:    blr
+;
+; CHECK-LABEL: test_extend32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lxsihzx f0, 0, r3
+; CHECK-NEXT:    xscvhpdp f1, f0
+; CHECK-NEXT:    blr
+;
+; SOFT-LABEL: test_extend32:
+; SOFT:       # %bb.0:
+; SOFT-NEXT:    mflr r0
+; SOFT-NEXT:    stdu r1, -32(r1)
+; SOFT-NEXT:    std r0, 48(r1)
+; SOFT-NEXT:    lhz r3, 0(r3)
+; SOFT-NEXT:    bl __extendhfsf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    addi r1, r1, 32
+; SOFT-NEXT:    ld r0, 16(r1)
+; SOFT-NEXT:    mtlr r0
+; SOFT-NEXT:    blr
+;
+; BE-LABEL: test_extend32:
+; BE:       # %bb.0:
+; BE-NEXT:    mflr r0
+; BE-NEXT:    stdu r1, -112(r1)
+; BE-NEXT:    std r0, 128(r1)
+; BE-NEXT:    lhz r3, 0(r3)
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
+; BE-NEXT:    addi r1, r1, 112
+; BE-NEXT:    ld r0, 16(r1)
+; BE-NEXT:    mtlr r0
+; BE-NEXT:    blr
+  %val16 = load half, ptr %addr
+  %val32 = fpext half %val16 to float
+  ret float %val32
+}
+define double @test_extend64(ptr %addr) nounwind {
+; PPC32-LABEL: test_extend64:
+; PPC32:       # %bb.0:
+; PPC32-NEXT:    mflr r0
+; PPC32-NEXT:    stwu r1, -16(r1)
+; PPC32-NEXT:    stw r0, 20(r1)
+; PPC32-NEXT:    lhz r3, 0(r3)
+; PPC32-NEXT:    bl __extendhfsf2
+; PPC32-NEXT:    lwz r0, 20(r1)
+; PPC32-NEXT:    addi r1, r1, 16
+; PPC32-NEXT:    mtlr r0
+; PPC32-NEXT:    blr
+;
+; P8-LABEL: test_extend64:
+; P8:       # %bb.0:
+; P8-NEXT:    mflr r0
+; P8-NEXT:    stdu r1, -32(r1)
+; P8-NEXT:    std r0, 48(r1)
+; P8-NEXT:    lhz r3, 0(r3)
+; P8-NEXT:    bl __extendhfsf2
+; P8-NEXT:    nop
+; P8-NEXT:    addi r1, r1, 32
+; P8-NEXT:    ld r0, 16(r1)
+; P8-NEXT:    mtlr r0
+; P8-NEXT:    blr
+;
+; CHECK-LABEL: test_extend64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lxsihzx f0, 0, r3
+; CHECK-NEXT:    xscvhpdp f1, f0
+; CHECK-NEXT:    blr
+;
+; SOFT-LABEL: test_extend64:
+; SOFT:       # %bb.0:
+; SOFT-NEXT:    mflr r0
+; SOFT-NEXT:    stdu r1, -32(r1)
+; SOFT-NEXT:    std r0, 48(r1)
+; SOFT-NEXT:    lhz r3, 0(r3)
+; SOFT-NEXT:    bl __extendhfsf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    bl __extendsfdf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    addi r1, r1, 32
+; SOFT-NEXT:    ld r0, 16(r1)
+; SOFT-NEXT:    mtlr r0
+; SOFT-NEXT:    blr
+;
+; BE-LABEL: test_extend64:
+; BE:       # %bb.0:
+; BE-NEXT:    mflr r0
+; BE-NEXT:    stdu r1, -112(r1)
+; BE-NEXT:    std r0, 128(r1)
+; BE-NEXT:    lhz r3, 0(r3)
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
+; BE-NEXT:    addi r1, r1, 112
+; BE-NEXT:    ld r0, 16(r1)
+; BE-NEXT:    mtlr r0
+; BE-NEXT:    blr
+  %val16 = load half, ptr %addr
+  %val32 = fpext half %val16 to double
+  ret double %val32
+}
+define void @test_trunc32(float %in, ptr %addr) nounwind {
+; PPC32-LABEL: test_trunc32:
+; PPC32:       # %bb.0:
+; PPC32-NEXT:    mflr r0
+; PPC32-NEXT:    stwu r1, -16(r1)
+; PPC32-NEXT:    stw r0, 20(r1)
+; PPC32-NEXT:    stw r30, 8(r1) # 4-byte Folded Spill
+; PPC32-NEXT:    mr r30, r3
+; PPC32-NEXT:    bl __truncsfhf2
+; PPC32-NEXT:    sth r3, 0(r30)
+; PPC32-NEXT:    lwz r30, 8(r1) # 4-byte Folded Reload
+; PPC32-NEXT:    lwz r0, 20(r1)
+; PPC32-NEXT:    addi r1, r1, 16
+; PPC32-NEXT:    mtlr r0
+; PPC32-NEXT:    blr
+;
+; P8-LABEL: test_trunc32:
+; P8:       # %bb.0:
+; P8-NEXT:    mflr r0
+; P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; P8-NEXT:    stdu r1, -48(r1)
+; P8-NEXT:    std r0, 64(r1)
+; P8-NEXT:    mr r30, r4
+; P8-NEXT:    bl __truncsfhf2
+; P8-NEXT:    nop
+; P8-NEXT:    sth r3, 0(r30)
+; P8-NEXT:    addi r1, r1, 48
+; P8-NEXT:    ld r0, 16(r1)
+; P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; P8-NEXT:    mtlr r0
+; P8-NEXT:    blr
+;
+; CHECK-LABEL: test_trunc32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xscvdphp f0, f1
+; CHECK-NEXT:    stxsihx f0, 0, r4
+; CHECK-NEXT:    blr
+;
+; SOFT-LABEL: test_trunc32:
+; SOFT:       # %bb.0:
+; SOFT-NEXT:    mflr r0
+; SOFT-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; SOFT-NEXT:    stdu r1, -48(r1)
+; SOFT-NEXT:    clrldi r3, r3, 32
+; SOFT-NEXT:    std r0, 64(r1)
+; SOFT-NEXT:    mr r30, r4
+; SOFT-NEXT:    bl __truncsfhf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    clrldi r3, r3, 48
+; SOFT-NEXT:    bl __extendhfsf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    bl __truncsfhf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    sth r3, 0(r30)
+; SOFT-NEXT:    addi r1, r1, 48
+; SOFT-NEXT:    ld r0, 16(r1)
+; SOFT-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; SOFT-NEXT:    mtlr r0
+; SOFT-NEXT:    blr
+;
+; BE-LABEL: test_trunc32:
+; BE:       # %bb.0:
+; BE-NEXT:    mflr r0
+; BE-NEXT:    stdu r1, -128(r1)
+; BE-NEXT:    std r0, 144(r1)
+; BE-NEXT:    std r30, 112(r1) # 8-byte Folded Spill
+; BE-NEXT:    mr r30, r4
+; BE-NEXT:    bl __truncsfhf2
+; BE-NEXT:    nop
+; BE-NEXT:    sth r3, 0(r30)
+; BE-NEXT:    ld r30, 112(r1) # 8-byte Folded Reload
+; BE-NEXT:    addi r1, r1, 128
+; BE-NEXT:    ld r0, 16(r1)
+; BE-NEXT:    mtlr r0
+; BE-NEXT:    blr
+  %val16 = fptrunc float %in to half
+  store half %val16, ptr %addr
+  ret void
+}
+define void @test_trunc64(double %in, ptr %addr) nounwind {
+; PPC32-LABEL: test_trunc64:
+; PPC32:       # %bb.0:
+; PPC32-NEXT:    mflr r0
+; PPC32-NEXT:    stwu r1, -16(r1)
+; PPC32-NEXT:    stw r0, 20(r1)
+; PPC32-NEXT:    stw r30, 8(r1) # 4-byte Folded Spill
+; PPC32-NEXT:    mr r30, r3
+; PPC32-NEXT:    bl __truncdfhf2
+; PPC32-NEXT:    sth r3, 0(r30)
+; PPC32-NEXT:    lwz r30, 8(r1) # 4-byte Folded Reload
+; PPC32-NEXT:    lwz r0, 20(r1)
+; PPC32-NEXT:    addi r1, r1, 16
+; PPC32-NEXT:    mtlr r0
+; PPC32-NEXT:    blr
+;
+; P8-LABEL: test_trunc64:
+; P8:       # %bb.0:
+; P8-NEXT:    mflr r0
+; P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; P8-NEXT:    stdu r1, -48(r1)
+; P8-NEXT:    std r0, 64(r1)
+; P8-NEXT:    mr r30, r4
+; P8-NEXT:    bl __truncdfhf2
+; P8-NEXT:    nop
+; P8-NEXT:    sth r3, 0(r30)
+; P8-NEXT:    addi r1, r1, 48
+; P8-NEXT:    ld r0, 16(r1)
+; P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; P8-NEXT:    mtlr r0
+; P8-NEXT:    blr
+;
+; CHECK-LABEL: test_trunc64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xscvdphp f0, f1
+; CHECK-NEXT:    stxsihx f0, 0, r4
+; CHECK-NEXT:    blr
+;
+; SOFT-LABEL: test_trunc64:
+; SOFT:       # %bb.0:
+; SOFT-NEXT:    mflr r0
+; SOFT-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; SOFT-NEXT:    stdu r1, -48(r1)
+; SOFT-NEXT:    std r0, 64(r1)
+; SOFT-NEXT:    mr r30, r4
+; SOFT-NEXT:    bl __truncdfhf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    clrldi r3, r3, 48
+; SOFT-NEXT:    bl __extendhfsf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    bl __truncsfhf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    sth r3, 0(r30)
+; SOFT-NEXT:    addi r1, r1, 48
+; SOFT-NEXT:    ld r0, 16(r1)
+; SOFT-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; SOFT-NEXT:    mtlr r0
+; SOFT-NEXT:    blr
+;
+; BE-LABEL: test_trunc64:
+; BE:       # %bb.0:
+; BE-NEXT:    mflr r0
+; BE-NEXT:    stdu r1, -128(r1)
+; BE-NEXT:    std r0, 144(r1)
+; BE-NEXT:    std r30, 112(r1) # 8-byte Folded Spill
+; BE-NEXT:    mr r30, r4
+; BE-NEXT:    bl __truncdfhf2
+; BE-NEXT:    nop
+; BE-NEXT:    sth r3, 0(r30)
+; BE-NEXT:    ld r30, 112(r1) # 8-byte Folded Reload
+; BE-NEXT:    addi r1, r1, 128
+; BE-NEXT:    ld r0, 16(r1)
+; BE-NEXT:    mtlr r0
+; BE-NEXT:    blr
+  %val16 = fptrunc double %in to half
+  store half %val16, ptr %addr
+  ret void
+}
+define i64 @test_fptosi_i64(ptr %p) nounwind {
+; PPC32-LABEL: test_fptosi_i64:
+; PPC32:       # %bb.0:
+; PPC32-NEXT:    mflr r0
+; PPC32-NEXT:    stwu r1, -16(r1)
+; PPC32-NEXT:    stw r0, 20(r1)
+; PPC32-NEXT:    lhz r3, 0(r3)
+; PPC32-NEXT:    bl __extendhfsf2
+; PPC32-NEXT:    bl __fixsfdi
+; PPC32-NEXT:    lwz r0, 20(r1)
+; PPC32-NEXT:    addi r1, r1, 16
+; PPC32-NEXT:    mtlr r0
+; PPC32-NEXT:    blr
+;
+; P8-LABEL: test_fptosi_i64:
+; P8:       # %bb.0:
+; P8-NEXT:    mflr r0
+; P8-NEXT:    stdu r1, -32(r1)
+; P8-NEXT:    std r0, 48(r1)
+; P8-NEXT:    lhz r3, 0(r3)
+; P8-NEXT:    bl __extendhfsf2
+; P8-NEXT:    nop
+; P8-NEXT:    xscvdpsxds f0, f1
+; P8-NEXT:    mffprd r3, f0
+; P8-NEXT:    addi r1, r1, 32
+; P8-NEXT:    ld r0, 16(r1)
+; P8-NEXT:    mtlr r0
+; P8-NEXT:    blr
+;
+; CHECK-LABEL: test_fptosi_i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lhz r3, 0(r3)
+; CHECK-NEXT:    mtfprwz f0, r3
+; CHECK-NEXT:    xscvhpdp f0, f0
+; CHECK-NEXT:    xscvdpsxds f0, f0
+; CHECK-NEXT:    mffprd r3, f0
+; CHECK-NEXT:    blr
+;
+; SOFT-LABEL: test_fptosi_i64:
+; SOFT:       # %bb.0:
+; SOFT-NEXT:    mflr r0
+; SOFT-NEXT:    stdu r1, -32(r1)
+; SOFT-NEXT:    std r0, 48(r1)
+; SOFT-NEXT:    lhz r3, 0(r3)
+; SOFT-NEXT:    bl __extendhfsf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    bl __fixsfdi
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    addi r1, r1, 32
+; SOFT-NEXT:    ld r0, 16(r1)
+; SOFT-NEXT:    mtlr r0
+; SOFT-NEXT:    blr
+;
+; BE-LABEL: test_fptosi_i64:
+; BE:       # %bb.0:
+; BE-NEXT:    mflr r0
+; BE-NEXT:    stdu r1, -128(r1)
+; BE-NEXT:    std r0, 144(r1)
+; BE-NEXT:    lhz r3, 0(r3)
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
+; BE-NEXT:    fctidz f0, f1
+; BE-NEXT:    stfd f0, 120(r1)
+; BE-NEXT:    ld r3, 120(r1)
+; BE-NEXT:    addi r1, r1, 128
+; BE-NEXT:    ld r0, 16(r1)
+; BE-NEXT:    mtlr r0
+; BE-NEXT:    blr
+  %a = load half, ptr %p, align 2
+  %r = fptosi half %a to i64
+  ret i64 %r
+}
+define void @test_sitofp_i64(i64 %a, ptr %p) nounwind {
+; PPC32-LABEL: test_sitofp_i64:
+; PPC32:       # %bb.0:
+; PPC32-NEXT:    mflr r0
+; PPC32-NEXT:    stwu r1, -16(r1)
+; PPC32-NEXT:    stw r0, 20(r1)
+; PPC32-NEXT:    stw r30, 8(r1) # 4-byte Folded Spill
+; PPC32-NEXT:    mr r30, r5
+; PPC32-NEXT:    bl __floatdisf
+; PPC32-NEXT:    bl __truncsfhf2
+; PPC32-NEXT:    sth r3, 0(r30)
+; PPC32-NEXT:    lwz r30, 8(r1) # 4-byte Folded Reload
+; PPC32-NEXT:    lwz r0, 20(r1)
+; PPC32-NEXT:    addi r1, r1, 16
+; PPC32-NEXT:    mtlr r0
+; PPC32-NEXT:    blr
+;
+; P8-LABEL: test_sitofp_i64:
+; P8:       # %bb.0:
+; P8-NEXT:    mflr r0
+; P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; P8-NEXT:    stdu r1, -48(r1)
+; P8-NEXT:    mtfprd f0, r3
+; P8-NEXT:    std r0, 64(r1)
+; P8-NEXT:    mr r30, r4
+; P8-NEXT:    xscvsxdsp f1, f0
+; P8-NEXT:    bl __truncsfhf2
+; P8-NEXT:    nop
+; P8-NEXT:    sth r3, 0(r30)
+; P8-NEXT:    addi r1, r1, 48
+; P8-NEXT:    ld r0, 16(r1)
+; P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; P8-NEXT:    mtlr r0
+; P8-NEXT:    blr
+;
+; CHECK-LABEL: test_sitofp_i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    xscvsxdsp f0, f0
+; CHECK-NEXT:    xscvdphp f0, f0
+; CHECK-NEXT:    mffprwz r3, f0
+; CHECK-NEXT:    sth r3, 0(r4)
+; CHECK-NEXT:    blr
+;
+; SOFT-LABEL: test_sitofp_i64:
+; SOFT:       # %bb.0:
+; SOFT-NEXT:    mflr r0
+; SOFT-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; SOFT-NEXT:    stdu r1, -48(r1)
+; SOFT-NEXT:    std r0, 64(r1)
+; SOFT-NEXT:    mr r30, r4
+; SOFT-NEXT:    bl __floatdisf
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    clrldi r3, r3, 32
+; SOFT-NEXT:    bl __truncsfhf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    clrldi r3, r3, 48
+; SOFT-NEXT:    bl __extendhfsf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    bl __truncsfhf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    sth r3, 0(r30)
+; SOFT-NEXT:    addi r1, r1, 48
+; SOFT-NEXT:    ld r0, 16(r1)
+; SOFT-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; SOFT-NEXT:    mtlr r0
+; SOFT-NEXT:    blr
+;
+; BE-LABEL: test_sitofp_i64:
+; BE:       # %bb.0:
+; BE-NEXT:    mflr r0
+; BE-NEXT:    stdu r1, -144(r1)
+; BE-NEXT:    sradi r5, r3, 53
+; BE-NEXT:    std r0, 160(r1)
+; BE-NEXT:    addi r5, r5, 1
+; BE-NEXT:    cmpldi r5, 1
+; BE-NEXT:    std r30, 128(r1) # 8-byte Folded Spill
+; BE-NEXT:    mr r30, r4
+; BE-NEXT:    ble cr0, .LBB16_2
+; BE-NEXT:  # %bb.1:
+; BE-NEXT:    clrldi r4, r3, 53
+; BE-NEXT:    addi r4, r4, 2047
+; BE-NEXT:    or r3, r4, r3
+; BE-NEXT:    rldicr r3, r3, 0, 52
+; BE-NEXT:  .LBB16_2:
+; BE-NEXT:    std r3, 120(r1)
+; BE-NEXT:    lfd f0, 120(r1)
+; BE-NEXT:    fcfid f0, f0
+; BE-NEXT:    frsp f1, f0
+; BE-NEXT:    bl __truncsfhf2
+; BE-NEXT:    nop
+; BE-NEXT:    sth r3, 0(r30)
+; BE-NEXT:    ld r30, 128(r1) # 8-byte Folded Reload
+; BE-NEXT:    addi r1, r1, 144
+; BE-NEXT:    ld r0, 16(r1)
+; BE-NEXT:    mtlr r0
+; BE-NEXT:    blr
+  %r = sitofp i64 %a to half
+  store half %r, ptr %p
+  ret void
+}
+define i64 @test_fptoui_i64(ptr %p) nounwind {
+; PPC32-LABEL: test_fptoui_i64:
+; PPC32:       # %bb.0:
+; PPC32-NEXT:    mflr r0
+; PPC32-NEXT:    stwu r1, -16(r1)
+; PPC32-NEXT:    stw r0, 20(r1)
+; PPC32-NEXT:    lhz r3, 0(r3)
+; PPC32-NEXT:    bl __extendhfsf2
+; PPC32-NEXT:    bl __fixunssfdi
+; PPC32-NEXT:    lwz r0, 20(r1)
+; PPC32-NEXT:    addi r1, r1, 16
+; PPC32-NEXT:    mtlr r0
+; PPC32-NEXT:    blr
+;
+; P8-LABEL: test_fptoui_i64:
+; P8:       # %bb.0:
+; P8-NEXT:    mflr r0
+; P8-NEXT:    stdu r1, -32(r1)
+; P8-NEXT:    std r0, 48(r1)
+; P8-NEXT:    lhz r3, 0(r3)
+; P8-NEXT:    bl __extendhfsf2
+; P8-NEXT:    nop
+; P8-NEXT:    xscvdpuxds f0, f1
+; P8-NEXT:    mffprd r3, f0
+; P8-NEXT:    addi r1, r1, 32
+; P8-NEXT:    ld r0, 16(r1)
+; P8-NEXT:    mtlr r0
+; P8-NEXT:    blr
+;
+; CHECK-LABEL: test_fptoui_i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lhz r3, 0(r3)
+; CHECK-NEXT:    mtfprwz f0, r3
+; CHECK-NEXT:    xscvhpdp f0, f0
+; CHECK-NEXT:    xscvdpuxds f0, f0
+; CHECK-NEXT:    mffprd r3, f0
+; CHECK-NEXT:    blr
+;
+; SOFT-LABEL: test_fptoui_i64:
+; SOFT:       # %bb.0:
+; SOFT-NEXT:    mflr r0
+; SOFT-NEXT:    stdu r1, -32(r1)
+; SOFT-NEXT:    std r0, 48(r1)
+; SOFT-NEXT:    lhz r3, 0(r3)
+; SOFT-NEXT:    bl __extendhfsf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    bl __fixunssfdi
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    addi r1, r1, 32
+; SOFT-NEXT:    ld r0, 16(r1)
+; SOFT-NEXT:    mtlr r0
+; SOFT-NEXT:    blr
+;
+; BE-LABEL: test_fptoui_i64:
+; BE:       # %bb.0:
+; BE-NEXT:    mflr r0
+; BE-NEXT:    stdu r1, -128(r1)
+; BE-NEXT:    std r0, 144(r1)
+; BE-NEXT:    lhz r3, 0(r3)
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
+; BE-NEXT:    addis r3, r2, .LCPI17_0 at toc@ha
+; BE-NEXT:    lfs f0, .LCPI17_0 at toc@l(r3)
+; BE-NEXT:    fsubs f2, f1, f0
+; BE-NEXT:    fcmpu cr0, f1, f0
+; BE-NEXT:    fctidz f2, f2
+; BE-NEXT:    stfd f2, 120(r1)
+; BE-NEXT:    fctidz f2, f1
+; BE-NEXT:    stfd f2, 112(r1)
+; BE-NEXT:    blt cr0, .LBB17_2
+; BE-NEXT:  # %bb.1:
+; BE-NEXT:    ld r3, 120(r1)
+; BE-NEXT:    li r4, 1
+; BE-NEXT:    rldic r4, r4, 63, 0
+; BE-NEXT:    xor r3, r3, r4
+; BE-NEXT:    b .LBB17_3
+; BE-NEXT:  .LBB17_2:
+; BE-NEXT:    ld r3, 112(r1)
+; BE-NEXT:  .LBB17_3:
+; BE-NEXT:    addi r1, r1, 128
+; BE-NEXT:    ld r0, 16(r1)
+; BE-NEXT:    mtlr r0
+; BE-NEXT:    blr
+  %a = load half, ptr %p, align 2
+  %r = fptoui half %a to i64
+  ret i64 %r
+}
+define void @test_uitofp_i64(i64 %a, ptr %p) nounwind {
+; PPC32-LABEL: test_uitofp_i64:
+; PPC32:       # %bb.0:
+; PPC32-NEXT:    mflr r0
+; PPC32-NEXT:    stwu r1, -16(r1)
+; PPC32-NEXT:    stw r0, 20(r1)
+; PPC32-NEXT:    stw r30, 8(r1) # 4-byte Folded Spill
+; PPC32-NEXT:    mr r30, r5
+; PPC32-NEXT:    bl __floatundisf
+; PPC32-NEXT:    bl __truncsfhf2
+; PPC32-NEXT:    sth r3, 0(r30)
+; PPC32-NEXT:    lwz r30, 8(r1) # 4-byte Folded Reload
+; PPC32-NEXT:    lwz r0, 20(r1)
+; PPC32-NEXT:    addi r1, r1, 16
+; PPC32-NEXT:    mtlr r0
+; PPC32-NEXT:    blr
+;
+; P8-LABEL: test_uitofp_i64:
+; P8:       # %bb.0:
+; P8-NEXT:    mflr r0
+; P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; P8-NEXT:    stdu r1, -48(r1)
+; P8-NEXT:    mtfprd f0, r3
+; P8-NEXT:    std r0, 64(r1)
+; P8-NEXT:    mr r30, r4
+; P8-NEXT:    xscvuxdsp f1, f0
+; P8-NEXT:    bl __truncsfhf2
+; P8-NEXT:    nop
+; P8-NEXT:    sth r3, 0(r30)
+; P8-NEXT:    addi r1, r1, 48
+; P8-NEXT:    ld r0, 16(r1)
+; P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; P8-NEXT:    mtlr r0
+; P8-NEXT:    blr
+;
+; CHECK-LABEL: test_uitofp_i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    xscvuxdsp f0, f0
+; CHECK-NEXT:    xscvdphp f0, f0
+; CHECK-NEXT:    mffprwz r3, f0
+; CHECK-NEXT:    sth r3, 0(r4)
+; CHECK-NEXT:    blr
+;
+; SOFT-LABEL: test_uitofp_i64:
+; SOFT:       # %bb.0:
+; SOFT-NEXT:    mflr r0
+; SOFT-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; SOFT-NEXT:    stdu r1, -48(r1)
+; SOFT-NEXT:    std r0, 64(r1)
+; SOFT-NEXT:    mr r30, r4
+; SOFT-NEXT:    bl __floatundisf
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    bl __truncsfhf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    clrldi r3, r3, 48
+; SOFT-NEXT:    bl __extendhfsf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    bl __truncsfhf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    sth r3, 0(r30)
+; SOFT-NEXT:    addi r1, r1, 48
+; SOFT-NEXT:    ld r0, 16(r1)
+; SOFT-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; SOFT-NEXT:    mtlr r0
+; SOFT-NEXT:    blr
+;
+; BE-LABEL: test_uitofp_i64:
+; BE:       # %bb.0:
+; BE-NEXT:    mflr r0
+; BE-NEXT:    stdu r1, -144(r1)
+; BE-NEXT:    sradi r5, r3, 53
+; BE-NEXT:    std r0, 160(r1)
+; BE-NEXT:    addi r5, r5, 1
+; BE-NEXT:    cmpldi r5, 1
+; BE-NEXT:    std r30, 128(r1) # 8-byte Folded Spill
+; BE-NEXT:    mr r30, r4
+; BE-NEXT:    bgt cr0, .LBB18_2
+; BE-NEXT:  # %bb.1:
+; BE-NEXT:    mr r4, r3
+; BE-NEXT:    b .LBB18_3
+; BE-NEXT:  .LBB18_2:
+; BE-NEXT:    clrldi r4, r3, 53
+; BE-NEXT:    addi r4, r4, 2047
+; BE-NEXT:    or r4, r4, r3
+; BE-NEXT:    rldicr r4, r4, 0, 52
+; BE-NEXT:  .LBB18_3:
+; BE-NEXT:    rldicl r5, r3, 10, 54
+; BE-NEXT:    clrldi r6, r3, 63
+; BE-NEXT:    std r4, 112(r1)
+; BE-NEXT:    addi r5, r5, 1
+; BE-NEXT:    cmpldi r5, 1
+; BE-NEXT:    rldicl r5, r3, 63, 1
+; BE-NEXT:    or r4, r6, r5
+; BE-NEXT:    ble cr0, .LBB18_5
+; BE-NEXT:  # %bb.4:
+; BE-NEXT:    clrldi r4, r4, 53
+; BE-NEXT:    addi r4, r4, 2047
+; BE-NEXT:    or r4, r4, r5
+; BE-NEXT:    rldicl r4, r4, 53, 11
+; BE-NEXT:    rldicl r4, r4, 11, 1
+; BE-NEXT:  .LBB18_5:
+; BE-NEXT:    cmpdi r3, 0
+; BE-NEXT:    std r4, 120(r1)
+; BE-NEXT:    bc 12, lt, .LBB18_7
+; BE-NEXT:  # %bb.6:
+; BE-NEXT:    lfd f0, 112(r1)
+; BE-NEXT:    fcfid f0, f0
+; BE-NEXT:    frsp f1, f0
+; BE-NEXT:    b .LBB18_8
+; BE-NEXT:  .LBB18_7:
+; BE-NEXT:    lfd f0, 120(r1)
+; BE-NEXT:    fcfid f0, f0
+; BE-NEXT:    frsp f0, f0
+; BE-NEXT:    fadds f1, f0, f0
+; BE-NEXT:  .LBB18_8:
+; BE-NEXT:    bl __truncsfhf2
+; BE-NEXT:    nop
+; BE-NEXT:    sth r3, 0(r30)
+; BE-NEXT:    ld r30, 128(r1) # 8-byte Folded Reload
+; BE-NEXT:    addi r1, r1, 144
+; BE-NEXT:    ld r0, 16(r1)
+; BE-NEXT:    mtlr r0
+; BE-NEXT:    blr
+  %r = uitofp i64 %a to half
+  store half %r, ptr %p
+  ret void
+}
+define <4 x float> @test_extend32_vec4(ptr %p) nounwind {
+; PPC32-LABEL: test_extend32_vec4:
+; PPC32:       # %bb.0:
+; PPC32-NEXT:    mflr r0
+; PPC32-NEXT:    stwu r1, -48(r1)
+; PPC32-NEXT:    stw r0, 52(r1)
+; PPC32-NEXT:    stw r30, 16(r1) # 4-byte Folded Spill
+; PPC32-NEXT:    mr r30, r3
+; PPC32-NEXT:    lhz r3, 0(r3)
+; PPC32-NEXT:    stfd f29, 24(r1) # 8-byte Folded Spill
+; PPC32-NEXT:    stfd f30, 32(r1) # 8-byte Folded Spill
+; PPC32-NEXT:    stfd f31, 40(r1) # 8-byte Folded Spill
+; PPC32-NEXT:    bl __extendhfsf2
+; PPC32-NEXT:    lhz r3, 2(r30)
+; PPC32-NEXT:    fmr f31, f1
+; PPC32-NEXT:    bl __extendhfsf2
+; PPC32-NEXT:    lhz r3, 4(r30)
+; PPC32-NEXT:    fmr f30, f1
+; PPC32-NEXT:    bl __extendhfsf2
+; PPC32-NEXT:    lhz r3, 6(r30)
+; PPC32-NEXT:    fmr f29, f1
+; PPC32-NEXT:    bl __extendhfsf2
+; PPC32-NEXT:    fmr f4, f1
+; PPC32-NEXT:    fmr f1, f31
+; PPC32-NEXT:    fmr f2, f30
+; PPC32-NEXT:    fmr f3, f29
+; PPC32-NEXT:    lfd f31, 40(r1) # 8-byte Folded Reload
+; PPC32-NEXT:    lfd f30, 32(r1) # 8-byte Folded Reload
+; PPC32-NEXT:    lfd f29, 24(r1) # 8-byte Folded Reload
+; PPC32-NEXT:    lwz r30, 16(r1) # 4-byte Folded Reload
+; PPC32-NEXT:    lwz r0, 52(r1)
+; PPC32-NEXT:    addi r1, r1, 48
+; PPC32-NEXT:    mtlr r0
+; PPC32-NEXT:    blr
+;
+; P8-LABEL: test_extend32_vec4:
+; P8:       # %bb.0:
+; P8-NEXT:    mflr r0
+; P8-NEXT:    stdu r1, -112(r1)
+; P8-NEXT:    li r4, 48
+; P8-NEXT:    std r0, 128(r1)
+; P8-NEXT:    std r30, 96(r1) # 8-byte Folded Spill
+; P8-NEXT:    mr r30, r3
+; P8-NEXT:    lhz r3, 6(r3)
+; P8-NEXT:    stxvd2x vs61, r1, r4 # 16-byte Folded Spill
+; P8-NEXT:    li r4, 64
+; P8-NEXT:    stxvd2x vs62, r1, r4 # 16-byte Folded Spill
+; P8-NEXT:    li r4, 80
+; P8-NEXT:    stxvd2x vs63, r1, r4 # 16-byte Folded Spill
+; P8-NEXT:    bl __extendhfsf2
+; P8-NEXT:    nop
+; P8-NEXT:    lhz r3, 2(r30)
+; P8-NEXT:    xxlor vs63, f1, f1
+; P8-NEXT:    bl __extendhfsf2
+; P8-NEXT:    nop
+; P8-NEXT:    lhz r3, 4(r30)
+; P8-NEXT:    xxlor vs62, f1, f1
+; P8-NEXT:    bl __extendhfsf2
+; P8-NEXT:    nop
+; P8-NEXT:    lhz r3, 0(r30)
+; P8-NEXT:    xxlor vs61, f1, f1
+; P8-NEXT:    bl __extendhfsf2
+; P8-NEXT:    nop
+; P8-NEXT:    li r3, 80
+; P8-NEXT:    xxmrghd vs0, vs61, vs1
+; P8-NEXT:    xxmrghd vs1, vs63, vs62
+; P8-NEXT:    ld r30, 96(r1) # 8-byte Folded Reload
+; P8-NEXT:    lxvd2x vs63, r1, r3 # 16-byte Folded Reload
+; P8-NEXT:    li r3, 64
+; P8-NEXT:    xvcvdpsp vs34, vs0
+; P8-NEXT:    xvcvdpsp vs35, vs1
+; P8-NEXT:    lxvd2x vs62, r1, r3 # 16-byte Folded Reload
+; P8-NEXT:    li r3, 48
+; P8-NEXT:    lxvd2x vs61, r1, r3 # 16-byte Folded Reload
+; P8-NEXT:    vmrgew v2, v3, v2
+; P8-NEXT:    addi r1, r1, 112
+; P8-NEXT:    ld r0, 16(r1)
+; P8-NEXT:    mtlr r0
+; P8-NEXT:    blr
+;
+; CHECK-LABEL: test_extend32_vec4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lhz r4, 6(r3)
+; CHECK-NEXT:    mtfprwz f0, r4
+; CHECK-NEXT:    xscvhpdp f0, f0
+; CHECK-NEXT:    lhz r4, 2(r3)
+; CHECK-NEXT:    mtfprwz f1, r4
+; CHECK-NEXT:    xscvhpdp f1, f1
+; CHECK-NEXT:    lhz r4, 4(r3)
+; CHECK-NEXT:    mtfprwz f2, r4
+; CHECK-NEXT:    xscvhpdp f2, f2
+; CHECK-NEXT:    lhz r3, 0(r3)
+; CHECK-NEXT:    xxmrghd vs0, vs0, vs1
+; CHECK-NEXT:    mtfprwz f3, r3
+; CHECK-NEXT:    xvcvdpsp vs35, vs0
+; CHECK-NEXT:    xscvhpdp f3, f3
+; CHECK-NEXT:    xxmrghd vs2, vs2, vs3
+; CHECK-NEXT:    xvcvdpsp vs34, vs2
+; CHECK-NEXT:    vmrgew v2, v3, v2
+; CHECK-NEXT:    blr
+;
+; SOFT-LABEL: test_extend32_vec4:
+; SOFT:       # %bb.0:
+; SOFT-NEXT:    mflr r0
+; SOFT-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
+; SOFT-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
+; SOFT-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
+; SOFT-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; SOFT-NEXT:    stdu r1, -80(r1)
+; SOFT-NEXT:    std r0, 96(r1)
+; SOFT-NEXT:    mr r30, r3
+; SOFT-NEXT:    lhz r3, 0(r3)
+; SOFT-NEXT:    bl __extendhfsf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    mr r29, r3
+; SOFT-NEXT:    lhz r3, 2(r30)
+; SOFT-NEXT:    bl __extendhfsf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    mr r28, r3
+; SOFT-NEXT:    lhz r3, 4(r30)
+; SOFT-NEXT:    bl __extendhfsf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    mr r27, r3
+; SOFT-NEXT:    lhz r3, 6(r30)
+; SOFT-NEXT:    bl __extendhfsf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    mr r6, r3
+; SOFT-NEXT:    mr r3, r29
+; SOFT-NEXT:    mr r4, r28
+; SOFT-NEXT:    mr r5, r27
+; SOFT-NEXT:    addi r1, r1, 80
+; SOFT-NEXT:    ld r0, 16(r1)
+; SOFT-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; SOFT-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
+; SOFT-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
+; SOFT-NEXT:    mtlr r0
+; SOFT-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
+; SOFT-NEXT:    blr
+;
+; BE-LABEL: test_extend32_vec4:
+; BE:       # %bb.0:
+; BE-NEXT:    mflr r0
+; BE-NEXT:    stdu r1, -176(r1)
+; BE-NEXT:    std r0, 192(r1)
+; BE-NEXT:    std r30, 136(r1) # 8-byte Folded Spill
+; BE-NEXT:    mr r30, r3
+; BE-NEXT:    lhz r3, 0(r3)
+; BE-NEXT:    stfd f29, 152(r1) # 8-byte Folded Spill
+; BE-NEXT:    stfd f30, 160(r1) # 8-byte Folded Spill
+; BE-NEXT:    stfd f31, 168(r1) # 8-byte Folded Spill
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
+; BE-NEXT:    lhz r3, 2(r30)
+; BE-NEXT:    fmr f31, f1
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
+; BE-NEXT:    lhz r3, 4(r30)
+; BE-NEXT:    fmr f30, f1
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
+; BE-NEXT:    lhz r3, 6(r30)
+; BE-NEXT:    fmr f29, f1
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
+; BE-NEXT:    stfs f29, 120(r1)
+; BE-NEXT:    addi r3, r1, 112
+; BE-NEXT:    stfs f30, 116(r1)
+; BE-NEXT:    stfs f31, 112(r1)
+; BE-NEXT:    stfs f1, 124(r1)
+; BE-NEXT:    lvx v2, 0, r3
+; BE-NEXT:    lfd f31, 168(r1) # 8-byte Folded Reload
+; BE-NEXT:    lfd f30, 160(r1) # 8-byte Folded Reload
+; BE-NEXT:    lfd f29, 152(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r30, 136(r1) # 8-byte Folded Reload
+; BE-NEXT:    addi r1, r1, 176
+; BE-NEXT:    ld r0, 16(r1)
+; BE-NEXT:    mtlr r0
+; BE-NEXT:    blr
+  %a = load <4 x half>, ptr %p, align 8
+  %b = fpext <4 x half> %a to <4 x float>
+  ret <4 x float> %b
+}
+define <4 x double> @test_extend64_vec4(ptr %p) nounwind {
+; PPC32-LABEL: test_extend64_vec4:
+; PPC32:       # %bb.0:
+; PPC32-NEXT:    mflr r0
+; PPC32-NEXT:    stwu r1, -48(r1)
+; PPC32-NEXT:    stw r0, 52(r1)
+; PPC32-NEXT:    stw r30, 16(r1) # 4-byte Folded Spill
+; PPC32-NEXT:    mr r30, r3
+; PPC32-NEXT:    lhz r3, 0(r3)
+; PPC32-NEXT:    stfd f29, 24(r1) # 8-byte Folded Spill
+; PPC32-NEXT:    stfd f30, 32(r1) # 8-byte Folded Spill
+; PPC32-NEXT:    stfd f31, 40(r1) # 8-byte Folded Spill
+; PPC32-NEXT:    bl __extendhfsf2
+; PPC32-NEXT:    lhz r3, 2(r30)
+; PPC32-NEXT:    fmr f31, f1
+; PPC32-NEXT:    bl __extendhfsf2
+; PPC32-NEXT:    lhz r3, 4(r30)
+; PPC32-NEXT:    fmr f30, f1
+; PPC32-NEXT:    bl __extendhfsf2
+; PPC32-NEXT:    lhz r3, 6(r30)
+; PPC32-NEXT:    fmr f29, f1
+; PPC32-NEXT:    bl __extendhfsf2
+; PPC32-NEXT:    fmr f4, f1
+; PPC32-NEXT:    fmr f1, f31
+; PPC32-NEXT:    fmr f2, f30
+; PPC32-NEXT:    fmr f3, f29
+; PPC32-NEXT:    lfd f31, 40(r1) # 8-byte Folded Reload
+; PPC32-NEXT:    lfd f30, 32(r1) # 8-byte Folded Reload
+; PPC32-NEXT:    lfd f29, 24(r1) # 8-byte Folded Reload
+; PPC32-NEXT:    lwz r30, 16(r1) # 4-byte Folded Reload
+; PPC32-NEXT:    lwz r0, 52(r1)
+; PPC32-NEXT:    addi r1, r1, 48
+; PPC32-NEXT:    mtlr r0
+; PPC32-NEXT:    blr
+;
+; P8-LABEL: test_extend64_vec4:
+; P8:       # %bb.0:
+; P8-NEXT:    mflr r0
+; P8-NEXT:    stdu r1, -112(r1)
+; P8-NEXT:    li r4, 48
+; P8-NEXT:    std r0, 128(r1)
+; P8-NEXT:    std r30, 96(r1) # 8-byte Folded Spill
+; P8-NEXT:    mr r30, r3
+; P8-NEXT:    lhz r3, 6(r3)
+; P8-NEXT:    stxvd2x vs61, r1, r4 # 16-byte Folded Spill
+; P8-NEXT:    li r4, 64
+; P8-NEXT:    stxvd2x vs62, r1, r4 # 16-byte Folded Spill
+; P8-NEXT:    li r4, 80
+; P8-NEXT:    stxvd2x vs63, r1, r4 # 16-byte Folded Spill
+; P8-NEXT:    bl __extendhfsf2
+; P8-NEXT:    nop
+; P8-NEXT:    lhz r3, 4(r30)
+; P8-NEXT:    xxlor vs63, f1, f1
+; P8-NEXT:    bl __extendhfsf2
+; P8-NEXT:    nop
+; P8-NEXT:    lhz r3, 2(r30)
+; P8-NEXT:    xxlor vs62, f1, f1
+; P8-NEXT:    bl __extendhfsf2
+; P8-NEXT:    nop
+; P8-NEXT:    lhz r3, 0(r30)
+; P8-NEXT:    xxlor vs61, f1, f1
+; P8-NEXT:    bl __extendhfsf2
+; P8-NEXT:    nop
+; P8-NEXT:    li r3, 80
+; P8-NEXT:    xxmrghd vs35, vs63, vs62
+; P8-NEXT:    xxmrghd vs34, vs61, vs1
+; P8-NEXT:    ld r30, 96(r1) # 8-byte Folded Reload
+; P8-NEXT:    lxvd2x vs63, r1, r3 # 16-byte Folded Reload
+; P8-NEXT:    li r3, 64
+; P8-NEXT:    lxvd2x vs62, r1, r3 # 16-byte Folded Reload
+; P8-NEXT:    li r3, 48
+; P8-NEXT:    lxvd2x vs61, r1, r3 # 16-byte Folded Reload
+; P8-NEXT:    addi r1, r1, 112
+; P8-NEXT:    ld r0, 16(r1)
+; P8-NEXT:    mtlr r0
+; P8-NEXT:    blr
+;
+; CHECK-LABEL: test_extend64_vec4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lhz r4, 6(r3)
+; CHECK-NEXT:    lhz r5, 4(r3)
+; CHECK-NEXT:    lhz r6, 2(r3)
+; CHECK-NEXT:    lhz r3, 0(r3)
+; CHECK-NEXT:    mtfprwz f0, r3
+; CHECK-NEXT:    mtfprwz f1, r6
+; CHECK-NEXT:    xscvhpdp f0, f0
+; CHECK-NEXT:    xscvhpdp f1, f1
+; CHECK-NEXT:    xxmrghd vs34, vs1, vs0
+; CHECK-NEXT:    mtfprwz f0, r5
+; CHECK-NEXT:    mtfprwz f1, r4
+; CHECK-NEXT:    xscvhpdp f0, f0
+; CHECK-NEXT:    xscvhpdp f1, f1
+; CHECK-NEXT:    xxmrghd vs35, vs1, vs0
+; CHECK-NEXT:    blr
+;
+; SOFT-LABEL: test_extend64_vec4:
+; SOFT:       # %bb.0:
+; SOFT-NEXT:    mflr r0
+; SOFT-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
+; SOFT-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
+; SOFT-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
+; SOFT-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; SOFT-NEXT:    stdu r1, -80(r1)
+; SOFT-NEXT:    std r0, 96(r1)
+; SOFT-NEXT:    mr r30, r3
+; SOFT-NEXT:    lhz r3, 0(r3)
+; SOFT-NEXT:    bl __extendhfsf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    bl __extendsfdf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    mr r29, r3
+; SOFT-NEXT:    lhz r3, 2(r30)
+; SOFT-NEXT:    bl __extendhfsf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    bl __extendsfdf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    mr r28, r3
+; SOFT-NEXT:    lhz r3, 4(r30)
+; SOFT-NEXT:    bl __extendhfsf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    bl __extendsfdf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    mr r27, r3
+; SOFT-NEXT:    lhz r3, 6(r30)
+; SOFT-NEXT:    bl __extendhfsf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    bl __extendsfdf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    mr r6, r3
+; SOFT-NEXT:    mr r3, r29
+; SOFT-NEXT:    mr r4, r28
+; SOFT-NEXT:    mr r5, r27
+; SOFT-NEXT:    addi r1, r1, 80
+; SOFT-NEXT:    ld r0, 16(r1)
+; SOFT-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; SOFT-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
+; SOFT-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
+; SOFT-NEXT:    mtlr r0
+; SOFT-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
+; SOFT-NEXT:    blr
+;
+; BE-LABEL: test_extend64_vec4:
+; BE:       # %bb.0:
+; BE-NEXT:    mflr r0
+; BE-NEXT:    stdu r1, -160(r1)
+; BE-NEXT:    std r0, 176(r1)
+; BE-NEXT:    std r30, 120(r1) # 8-byte Folded Spill
+; BE-NEXT:    mr r30, r3
+; BE-NEXT:    lhz r3, 6(r3)
+; BE-NEXT:    stfd f29, 136(r1) # 8-byte Folded Spill
+; BE-NEXT:    stfd f30, 144(r1) # 8-byte Folded Spill
+; BE-NEXT:    stfd f31, 152(r1) # 8-byte Folded Spill
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
+; BE-NEXT:    lhz r3, 4(r30)
+; BE-NEXT:    fmr f31, f1
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
+; BE-NEXT:    lhz r3, 2(r30)
+; BE-NEXT:    fmr f30, f1
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
+; BE-NEXT:    lhz r3, 0(r30)
+; BE-NEXT:    fmr f29, f1
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
+; BE-NEXT:    fmr f2, f29
+; BE-NEXT:    fmr f3, f30
+; BE-NEXT:    lfd f30, 144(r1) # 8-byte Folded Reload
+; BE-NEXT:    lfd f29, 136(r1) # 8-byte Folded Reload
+; BE-NEXT:    fmr f4, f31
+; BE-NEXT:    lfd f31, 152(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r30, 120(r1) # 8-byte Folded Reload
+; BE-NEXT:    addi r1, r1, 160
+; BE-NEXT:    ld r0, 16(r1)
+; BE-NEXT:    mtlr r0
+; BE-NEXT:    blr
+  %a = load <4 x half>, ptr %p, align 8
+  %b = fpext <4 x half> %a to <4 x double>
+  ret <4 x double> %b
+}
+define void @test_trunc32_vec4(<4 x float> %a, ptr %p) nounwind {
+; PPC32-LABEL: test_trunc32_vec4:
+; PPC32:       # %bb.0:
+; PPC32-NEXT:    mflr r0
+; PPC32-NEXT:    stwu r1, -64(r1)
+; PPC32-NEXT:    stw r0, 68(r1)
+; PPC32-NEXT:    stw r27, 20(r1) # 4-byte Folded Spill
+; PPC32-NEXT:    stw r28, 24(r1) # 4-byte Folded Spill
+; PPC32-NEXT:    stw r29, 28(r1) # 4-byte Folded Spill
+; PPC32-NEXT:    stw r30, 32(r1) # 4-byte Folded Spill
+; PPC32-NEXT:    mr r30, r3
+; PPC32-NEXT:    stfd f29, 40(r1) # 8-byte Folded Spill
+; PPC32-NEXT:    fmr f29, f2
+; PPC32-NEXT:    stfd f30, 48(r1) # 8-byte Folded Spill
+; PPC32-NEXT:    fmr f30, f3
+; PPC32-NEXT:    stfd f31, 56(r1) # 8-byte Folded Spill
+; PPC32-NEXT:    fmr f31, f4
+; PPC32-NEXT:    bl __truncsfhf2
+; PPC32-NEXT:    fmr f1, f29
+; PPC32-NEXT:    mr r29, r3
+; PPC32-NEXT:    bl __truncsfhf2
+; PPC32-NEXT:    fmr f1, f30
+; PPC32-NEXT:    mr r28, r3
+; PPC32-NEXT:    bl __truncsfhf2
+; PPC32-NEXT:    fmr f1, f31
+; PPC32-NEXT:    mr r27, r3
+; PPC32-NEXT:    bl __truncsfhf2
+; PPC32-NEXT:    sth r27, 4(r30)
+; PPC32-NEXT:    sth r28, 2(r30)
+; PPC32-NEXT:    sth r3, 6(r30)
+; PPC32-NEXT:    sth r29, 0(r30)
+; PPC32-NEXT:    lfd f31, 56(r1) # 8-byte Folded Reload
+; PPC32-NEXT:    lfd f30, 48(r1) # 8-byte Folded Reload
+; PPC32-NEXT:    lfd f29, 40(r1) # 8-byte Folded Reload
+; PPC32-NEXT:    lwz r30, 32(r1) # 4-byte Folded Reload
+; PPC32-NEXT:    lwz r29, 28(r1) # 4-byte Folded Reload
+; PPC32-NEXT:    lwz r28, 24(r1) # 4-byte Folded Reload
+; PPC32-NEXT:    lwz r27, 20(r1) # 4-byte Folded Reload
+; PPC32-NEXT:    lwz r0, 68(r1)
+; PPC32-NEXT:    addi r1, r1, 64
+; PPC32-NEXT:    mtlr r0
+; PPC32-NEXT:    blr
+;
+; P8-LABEL: test_trunc32_vec4:
+; P8:       # %bb.0:
+; P8-NEXT:    mflr r0
+; P8-NEXT:    stdu r1, -112(r1)
+; P8-NEXT:    xxsldwi vs0, vs34, vs34, 3
+; P8-NEXT:    li r3, 48
+; P8-NEXT:    std r0, 128(r1)
+; P8-NEXT:    std r27, 72(r1) # 8-byte Folded Spill
+; P8-NEXT:    std r28, 80(r1) # 8-byte Folded Spill
+; P8-NEXT:    std r29, 88(r1) # 8-byte Folded Spill
+; P8-NEXT:    xscvspdpn f1, vs0
+; P8-NEXT:    std r30, 96(r1) # 8-byte Folded Spill
+; P8-NEXT:    stxvd2x vs63, r1, r3 # 16-byte Folded Spill
+; P8-NEXT:    mr r30, r5
+; P8-NEXT:    vmr v31, v2
+; P8-NEXT:    bl __truncsfhf2
+; P8-NEXT:    nop
+; P8-NEXT:    xxswapd vs0, vs63
+; P8-NEXT:    mr r29, r3
+; P8-NEXT:    xscvspdpn f1, vs0
+; P8-NEXT:    bl __truncsfhf2
+; P8-NEXT:    nop
+; P8-NEXT:    xxsldwi vs0, vs63, vs63, 1
+; P8-NEXT:    mr r28, r3
+; P8-NEXT:    xscvspdpn f1, vs0
+; P8-NEXT:    bl __truncsfhf2
+; P8-NEXT:    nop
+; P8-NEXT:    xscvspdpn f1, vs63
+; P8-NEXT:    mr r27, r3
+; P8-NEXT:    bl __truncsfhf2
+; P8-NEXT:    nop
+; P8-NEXT:    sth r3, 6(r30)
+; P8-NEXT:    li r3, 48
+; P8-NEXT:    sth r27, 4(r30)
+; P8-NEXT:    ld r27, 72(r1) # 8-byte Folded Reload
+; P8-NEXT:    sth r28, 2(r30)
+; P8-NEXT:    sth r29, 0(r30)
+; P8-NEXT:    ld r30, 96(r1) # 8-byte Folded Reload
+; P8-NEXT:    ld r29, 88(r1) # 8-byte Folded Reload
+; P8-NEXT:    lxvd2x vs63, r1, r3 # 16-byte Folded Reload
+; P8-NEXT:    ld r28, 80(r1) # 8-byte Folded Reload
+; P8-NEXT:    addi r1, r1, 112
+; P8-NEXT:    ld r0, 16(r1)
+; P8-NEXT:    mtlr r0
+; P8-NEXT:    blr
+;
+; CHECK-LABEL: test_trunc32_vec4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xxsldwi vs0, vs34, vs34, 3
+; CHECK-NEXT:    xxsldwi vs1, vs34, vs34, 1
+; CHECK-NEXT:    xscvspdpn f0, vs0
+; CHECK-NEXT:    xscvspdpn f1, vs1
+; CHECK-NEXT:    xscvdphp f0, f0
+; CHECK-NEXT:    mffprwz r3, f0
+; CHECK-NEXT:    xxswapd vs0, vs34
+; CHECK-NEXT:    xscvspdpn f0, vs0
+; CHECK-NEXT:    xscvdphp f0, f0
+; CHECK-NEXT:    xscvdphp f1, f1
+; CHECK-NEXT:    mffprwz r4, f1
+; CHECK-NEXT:    xscvspdpn f1, vs34
+; CHECK-NEXT:    xscvdphp f1, f1
+; CHECK-NEXT:    sth r4, 4(r5)
+; CHECK-NEXT:    mffprwz r4, f0
+; CHECK-NEXT:    sth r3, 0(r5)
+; CHECK-NEXT:    sth r4, 2(r5)
+; CHECK-NEXT:    mffprwz r6, f1
+; CHECK-NEXT:    sth r6, 6(r5)
+; CHECK-NEXT:    blr
+;
+; SOFT-LABEL: test_trunc32_vec4:
+; SOFT:       # %bb.0:
+; SOFT-NEXT:    mflr r0
+; SOFT-NEXT:    std r26, -48(r1) # 8-byte Folded Spill
+; SOFT-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
+; SOFT-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
+; SOFT-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
+; SOFT-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; SOFT-NEXT:    stdu r1, -80(r1)
+; SOFT-NEXT:    mr r27, r3
+; SOFT-NEXT:    clrldi r3, r6, 32
+; SOFT-NEXT:    std r0, 96(r1)
+; SOFT-NEXT:    mr r30, r7
+; SOFT-NEXT:    mr r29, r5
+; SOFT-NEXT:    mr r28, r4
+; SOFT-NEXT:    bl __truncsfhf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    mr r26, r3
+; SOFT-NEXT:    clrldi r3, r29, 32
+; SOFT-NEXT:    bl __truncsfhf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    mr r29, r3
+; SOFT-NEXT:    clrldi r3, r28, 32
+; SOFT-NEXT:    bl __truncsfhf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    mr r28, r3
+; SOFT-NEXT:    clrldi r3, r27, 32
+; SOFT-NEXT:    bl __truncsfhf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    clrldi r3, r3, 48
+; SOFT-NEXT:    bl __extendhfsf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    mr r27, r3
+; SOFT-NEXT:    clrldi r3, r28, 48
+; SOFT-NEXT:    bl __extendhfsf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    mr r28, r3
+; SOFT-NEXT:    clrldi r3, r29, 48
+; SOFT-NEXT:    bl __extendhfsf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    mr r29, r3
+; SOFT-NEXT:    clrldi r3, r26, 48
+; SOFT-NEXT:    bl __extendhfsf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    bl __truncsfhf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    sth r3, 6(r30)
+; SOFT-NEXT:    mr r3, r29
+; SOFT-NEXT:    bl __truncsfhf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    sth r3, 4(r30)
+; SOFT-NEXT:    mr r3, r28
+; SOFT-NEXT:    bl __truncsfhf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    sth r3, 2(r30)
+; SOFT-NEXT:    mr r3, r27
+; SOFT-NEXT:    bl __truncsfhf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    sth r3, 0(r30)
+; SOFT-NEXT:    addi r1, r1, 80
+; SOFT-NEXT:    ld r0, 16(r1)
+; SOFT-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; SOFT-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
+; SOFT-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
+; SOFT-NEXT:    mtlr r0
+; SOFT-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
+; SOFT-NEXT:    ld r26, -48(r1) # 8-byte Folded Reload
+; SOFT-NEXT:    blr
+;
+; BE-LABEL: test_trunc32_vec4:
+; BE:       # %bb.0:
+; BE-NEXT:    mflr r0
+; BE-NEXT:    stdu r1, -176(r1)
+; BE-NEXT:    addi r3, r1, 112
+; BE-NEXT:    std r0, 192(r1)
+; BE-NEXT:    std r27, 136(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r28, 144(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r29, 152(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r30, 160(r1) # 8-byte Folded Spill
+; BE-NEXT:    mr r30, r5
+; BE-NEXT:    stvx v2, 0, r3
+; BE-NEXT:    lfs f1, 112(r1)
+; BE-NEXT:    bl __truncsfhf2
+; BE-NEXT:    nop
+; BE-NEXT:    lfs f1, 116(r1)
+; BE-NEXT:    mr r29, r3
+; BE-NEXT:    bl __truncsfhf2
+; BE-NEXT:    nop
+; BE-NEXT:    lfs f1, 120(r1)
+; BE-NEXT:    mr r28, r3
+; BE-NEXT:    bl __truncsfhf2
+; BE-NEXT:    nop
+; BE-NEXT:    lfs f1, 124(r1)
+; BE-NEXT:    mr r27, r3
+; BE-NEXT:    bl __truncsfhf2
+; BE-NEXT:    nop
+; BE-NEXT:    sth r27, 4(r30)
+; BE-NEXT:    sth r28, 2(r30)
+; BE-NEXT:    sth r3, 6(r30)
+; BE-NEXT:    sth r29, 0(r30)
+; BE-NEXT:    ld r30, 160(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r29, 152(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r28, 144(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r27, 136(r1) # 8-byte Folded Reload
+; BE-NEXT:    addi r1, r1, 176
+; BE-NEXT:    ld r0, 16(r1)
+; BE-NEXT:    mtlr r0
+; BE-NEXT:    blr
+  %v = fptrunc <4 x float> %a to <4 x half>
+  store <4 x half> %v, ptr %p
+  ret void
+}
+define void @test_trunc64_vec4(<4 x double> %a, ptr %p) nounwind {
+; PPC32-LABEL: test_trunc64_vec4:
+; PPC32:       # %bb.0:
+; PPC32-NEXT:    mflr r0
+; PPC32-NEXT:    stwu r1, -64(r1)
+; PPC32-NEXT:    stw r0, 68(r1)
+; PPC32-NEXT:    stw r27, 20(r1) # 4-byte Folded Spill
+; PPC32-NEXT:    stw r28, 24(r1) # 4-byte Folded Spill
+; PPC32-NEXT:    stw r29, 28(r1) # 4-byte Folded Spill
+; PPC32-NEXT:    stw r30, 32(r1) # 4-byte Folded Spill
+; PPC32-NEXT:    mr r30, r3
+; PPC32-NEXT:    stfd f29, 40(r1) # 8-byte Folded Spill
+; PPC32-NEXT:    fmr f29, f2
+; PPC32-NEXT:    stfd f30, 48(r1) # 8-byte Folded Spill
+; PPC32-NEXT:    fmr f30, f3
+; PPC32-NEXT:    stfd f31, 56(r1) # 8-byte Folded Spill
+; PPC32-NEXT:    fmr f31, f4
+; PPC32-NEXT:    bl __truncdfhf2
+; PPC32-NEXT:    fmr f1, f29
+; PPC32-NEXT:    mr r29, r3
+; PPC32-NEXT:    bl __truncdfhf2
+; PPC32-NEXT:    fmr f1, f30
+; PPC32-NEXT:    mr r28, r3
+; PPC32-NEXT:    bl __truncdfhf2
+; PPC32-NEXT:    fmr f1, f31
+; PPC32-NEXT:    mr r27, r3
+; PPC32-NEXT:    bl __truncdfhf2
+; PPC32-NEXT:    sth r27, 4(r30)
+; PPC32-NEXT:    sth r28, 2(r30)
+; PPC32-NEXT:    sth r3, 6(r30)
+; PPC32-NEXT:    sth r29, 0(r30)
+; PPC32-NEXT:    lfd f31, 56(r1) # 8-byte Folded Reload
+; PPC32-NEXT:    lfd f30, 48(r1) # 8-byte Folded Reload
+; PPC32-NEXT:    lfd f29, 40(r1) # 8-byte Folded Reload
+; PPC32-NEXT:    lwz r30, 32(r1) # 4-byte Folded Reload
+; PPC32-NEXT:    lwz r29, 28(r1) # 4-byte Folded Reload
+; PPC32-NEXT:    lwz r28, 24(r1) # 4-byte Folded Reload
+; PPC32-NEXT:    lwz r27, 20(r1) # 4-byte Folded Reload
+; PPC32-NEXT:    lwz r0, 68(r1)
+; PPC32-NEXT:    addi r1, r1, 64
+; PPC32-NEXT:    mtlr r0
+; PPC32-NEXT:    blr
+;
+; P8-LABEL: test_trunc64_vec4:
+; P8:       # %bb.0:
+; P8-NEXT:    mflr r0
+; P8-NEXT:    stdu r1, -128(r1)
+; P8-NEXT:    li r3, 48
+; P8-NEXT:    std r0, 144(r1)
+; P8-NEXT:    xxswapd vs1, vs34
+; P8-NEXT:    std r27, 88(r1) # 8-byte Folded Spill
+; P8-NEXT:    std r28, 96(r1) # 8-byte Folded Spill
+; P8-NEXT:    std r29, 104(r1) # 8-byte Folded Spill
+; P8-NEXT:    std r30, 112(r1) # 8-byte Folded Spill
+; P8-NEXT:    mr r30, r7
+; P8-NEXT:    stxvd2x vs62, r1, r3 # 16-byte Folded Spill
+; P8-NEXT:    li r3, 64
+; P8-NEXT:    vmr v30, v2
+; P8-NEXT:    stxvd2x vs63, r1, r3 # 16-byte Folded Spill
+; P8-NEXT:    vmr v31, v3
+; P8-NEXT:    bl __truncdfhf2
+; P8-NEXT:    nop
+; P8-NEXT:    xxswapd vs1, vs63
+; P8-NEXT:    mr r29, r3
+; P8-NEXT:    bl __truncdfhf2
+; P8-NEXT:    nop
+; P8-NEXT:    xxlor f1, vs62, vs62
+; P8-NEXT:    mr r28, r3
+; P8-NEXT:    bl __truncdfhf2
+; P8-NEXT:    nop
+; P8-NEXT:    xxlor f1, vs63, vs63
+; P8-NEXT:    mr r27, r3
+; P8-NEXT:    bl __truncdfhf2
+; P8-NEXT:    nop
+; P8-NEXT:    sth r3, 6(r30)
+; P8-NEXT:    li r3, 64
+; P8-NEXT:    sth r27, 2(r30)
+; P8-NEXT:    ld r27, 88(r1) # 8-byte Folded Reload
+; P8-NEXT:    sth r28, 4(r30)
+; P8-NEXT:    sth r29, 0(r30)
+; P8-NEXT:    ld r30, 112(r1) # 8-byte Folded Reload
+; P8-NEXT:    ld r29, 104(r1) # 8-byte Folded Reload
+; P8-NEXT:    lxvd2x vs63, r1, r3 # 16-byte Folded Reload
+; P8-NEXT:    li r3, 48
+; P8-NEXT:    ld r28, 96(r1) # 8-byte Folded Reload
+; P8-NEXT:    lxvd2x vs62, r1, r3 # 16-byte Folded Reload
+; P8-NEXT:    addi r1, r1, 128
+; P8-NEXT:    ld r0, 16(r1)
+; P8-NEXT:    mtlr r0
+; P8-NEXT:    blr
+;
+; CHECK-LABEL: test_trunc64_vec4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xxswapd vs0, vs34
+; CHECK-NEXT:    xscvdphp f0, f0
+; CHECK-NEXT:    mffprwz r3, f0
+; CHECK-NEXT:    xxswapd vs0, vs35
+; CHECK-NEXT:    xscvdphp f0, f0
+; CHECK-NEXT:    xscvdphp f1, vs34
+; CHECK-NEXT:    mffprwz r4, f1
+; CHECK-NEXT:    xscvdphp f1, vs35
+; CHECK-NEXT:    sth r3, 0(r7)
+; CHECK-NEXT:    sth r4, 2(r7)
+; CHECK-NEXT:    mffprwz r4, f0
+; CHECK-NEXT:    sth r4, 4(r7)
+; CHECK-NEXT:    mffprwz r5, f1
+; CHECK-NEXT:    sth r5, 6(r7)
+; CHECK-NEXT:    blr
+;
+; SOFT-LABEL: test_trunc64_vec4:
+; SOFT:       # %bb.0:
+; SOFT-NEXT:    mflr r0
+; SOFT-NEXT:    std r26, -48(r1) # 8-byte Folded Spill
+; SOFT-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
+; SOFT-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
+; SOFT-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
+; SOFT-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; SOFT-NEXT:    stdu r1, -80(r1)
+; SOFT-NEXT:    mr r27, r3
+; SOFT-NEXT:    mr r3, r6
+; SOFT-NEXT:    std r0, 96(r1)
+; SOFT-NEXT:    mr r30, r7
+; SOFT-NEXT:    mr r29, r5
+; SOFT-NEXT:    mr r28, r4
+; SOFT-NEXT:    bl __truncdfhf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    mr r26, r3
+; SOFT-NEXT:    mr r3, r29
+; SOFT-NEXT:    bl __truncdfhf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    mr r29, r3
+; SOFT-NEXT:    mr r3, r28
+; SOFT-NEXT:    bl __truncdfhf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    mr r28, r3
+; SOFT-NEXT:    mr r3, r27
+; SOFT-NEXT:    bl __truncdfhf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    clrldi r3, r3, 48
+; SOFT-NEXT:    bl __extendhfsf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    mr r27, r3
+; SOFT-NEXT:    clrldi r3, r28, 48
+; SOFT-NEXT:    bl __extendhfsf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    mr r28, r3
+; SOFT-NEXT:    clrldi r3, r29, 48
+; SOFT-NEXT:    bl __extendhfsf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    mr r29, r3
+; SOFT-NEXT:    clrldi r3, r26, 48
+; SOFT-NEXT:    bl __extendhfsf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    bl __truncsfhf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    sth r3, 6(r30)
+; SOFT-NEXT:    mr r3, r29
+; SOFT-NEXT:    bl __truncsfhf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    sth r3, 4(r30)
+; SOFT-NEXT:    mr r3, r28
+; SOFT-NEXT:    bl __truncsfhf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    sth r3, 2(r30)
+; SOFT-NEXT:    mr r3, r27
+; SOFT-NEXT:    bl __truncsfhf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    sth r3, 0(r30)
+; SOFT-NEXT:    addi r1, r1, 80
+; SOFT-NEXT:    ld r0, 16(r1)
+; SOFT-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; SOFT-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
+; SOFT-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
+; SOFT-NEXT:    mtlr r0
+; SOFT-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
+; SOFT-NEXT:    ld r26, -48(r1) # 8-byte Folded Reload
+; SOFT-NEXT:    blr
+;
+; BE-LABEL: test_trunc64_vec4:
+; BE:       # %bb.0:
+; BE-NEXT:    mflr r0
+; BE-NEXT:    stdu r1, -176(r1)
+; BE-NEXT:    std r0, 192(r1)
+; BE-NEXT:    std r27, 112(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r28, 120(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r29, 128(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r30, 136(r1) # 8-byte Folded Spill
+; BE-NEXT:    mr r30, r7
+; BE-NEXT:    stfd f29, 152(r1) # 8-byte Folded Spill
+; BE-NEXT:    fmr f29, f2
+; BE-NEXT:    stfd f30, 160(r1) # 8-byte Folded Spill
+; BE-NEXT:    fmr f30, f3
+; BE-NEXT:    stfd f31, 168(r1) # 8-byte Folded Spill
+; BE-NEXT:    fmr f31, f4
+; BE-NEXT:    bl __truncdfhf2
+; BE-NEXT:    nop
+; BE-NEXT:    fmr f1, f29
+; BE-NEXT:    mr r29, r3
+; BE-NEXT:    bl __truncdfhf2
+; BE-NEXT:    nop
+; BE-NEXT:    fmr f1, f30
+; BE-NEXT:    mr r28, r3
+; BE-NEXT:    bl __truncdfhf2
+; BE-NEXT:    nop
+; BE-NEXT:    fmr f1, f31
+; BE-NEXT:    mr r27, r3
+; BE-NEXT:    bl __truncdfhf2
+; BE-NEXT:    nop
+; BE-NEXT:    sth r27, 4(r30)
+; BE-NEXT:    sth r28, 2(r30)
+; BE-NEXT:    sth r3, 6(r30)
+; BE-NEXT:    sth r29, 0(r30)
+; BE-NEXT:    lfd f31, 168(r1) # 8-byte Folded Reload
+; BE-NEXT:    lfd f30, 160(r1) # 8-byte Folded Reload
+; BE-NEXT:    lfd f29, 152(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r30, 136(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r29, 128(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r28, 120(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r27, 112(r1) # 8-byte Folded Reload
+; BE-NEXT:    addi r1, r1, 176
+; BE-NEXT:    ld r0, 16(r1)
+; BE-NEXT:    mtlr r0
+; BE-NEXT:    blr
+  %v = fptrunc <4 x double> %a to <4 x half>
+  store <4 x half> %v, ptr %p
+  ret void
+}
+define float @test_sitofp_fadd_i32(i32 %a, ptr %b) nounwind {
+; PPC32-LABEL: test_sitofp_fadd_i32:
+; PPC32:       # %bb.0:
+; PPC32-NEXT:    mflr r0
+; PPC32-NEXT:    stwu r1, -32(r1)
+; PPC32-NEXT:    stw r0, 36(r1)
+; PPC32-NEXT:    stw r30, 16(r1) # 4-byte Folded Spill
+; PPC32-NEXT:    mr r30, r3
+; PPC32-NEXT:    lhz r3, 0(r4)
+; PPC32-NEXT:    stfd f31, 24(r1) # 8-byte Folded Spill
+; PPC32-NEXT:    bl __extendhfsf2
+; PPC32-NEXT:    lis r3, 17200
+; PPC32-NEXT:    stw r3, 8(r1)
+; PPC32-NEXT:    xoris r3, r30, 32768
+; PPC32-NEXT:    stw r3, 12(r1)
+; PPC32-NEXT:    lis r3, .LCPI23_0 at ha
+; PPC32-NEXT:    fmr f31, f1
+; PPC32-NEXT:    lfd f0, 8(r1)
+; PPC32-NEXT:    lfs f1, .LCPI23_0 at l(r3)
+; PPC32-NEXT:    fsub f0, f0, f1
+; PPC32-NEXT:    frsp f1, f0
+; PPC32-NEXT:    bl __truncsfhf2
+; PPC32-NEXT:    clrlwi r3, r3, 16
+; PPC32-NEXT:    bl __extendhfsf2
+; PPC32-NEXT:    fadds f1, f31, f1
+; PPC32-NEXT:    lfd f31, 24(r1) # 8-byte Folded Reload
+; PPC32-NEXT:    lwz r30, 16(r1) # 4-byte Folded Reload
+; PPC32-NEXT:    lwz r0, 36(r1)
+; PPC32-NEXT:    addi r1, r1, 32
+; PPC32-NEXT:    mtlr r0
+; PPC32-NEXT:    blr
+;
+; P8-LABEL: test_sitofp_fadd_i32:
+; P8:       # %bb.0:
+; P8-NEXT:    mflr r0
+; P8-NEXT:    std r30, -24(r1) # 8-byte Folded Spill
+; P8-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
+; P8-NEXT:    stdu r1, -64(r1)
+; P8-NEXT:    std r0, 80(r1)
+; P8-NEXT:    mr r30, r3
+; P8-NEXT:    lhz r3, 0(r4)
+; P8-NEXT:    bl __extendhfsf2
+; P8-NEXT:    nop
+; P8-NEXT:    mtfprwa f0, r30
+; P8-NEXT:    fmr f31, f1
+; P8-NEXT:    xscvsxdsp f1, f0
+; P8-NEXT:    bl __truncsfhf2
+; P8-NEXT:    nop
+; P8-NEXT:    clrldi r3, r3, 48
+; P8-NEXT:    bl __extendhfsf2
+; P8-NEXT:    nop
+; P8-NEXT:    xsaddsp f1, f31, f1
+; P8-NEXT:    addi r1, r1, 64
+; P8-NEXT:    ld r0, 16(r1)
+; P8-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
+; P8-NEXT:    ld r30, -24(r1) # 8-byte Folded Reload
+; P8-NEXT:    mtlr r0
+; P8-NEXT:    blr
+;
+; CHECK-LABEL: test_sitofp_fadd_i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mtfprwa f1, r3
+; CHECK-NEXT:    lhz r4, 0(r4)
+; CHECK-NEXT:    xscvsxdsp f1, f1
+; CHECK-NEXT:    mtfprwz f0, r4
+; CHECK-NEXT:    xscvhpdp f0, f0
+; CHECK-NEXT:    xscvdphp f1, f1
+; CHECK-NEXT:    mffprwz r3, f1
+; CHECK-NEXT:    clrlwi r3, r3, 16
+; CHECK-NEXT:    mtfprwz f1, r3
+; CHECK-NEXT:    xscvhpdp f1, f1
+; CHECK-NEXT:    xsaddsp f1, f0, f1
+; CHECK-NEXT:    blr
+;
+; SOFT-LABEL: test_sitofp_fadd_i32:
+; SOFT:       # %bb.0:
+; SOFT-NEXT:    mflr r0
+; SOFT-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
+; SOFT-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; SOFT-NEXT:    stdu r1, -64(r1)
+; SOFT-NEXT:    std r0, 80(r1)
+; SOFT-NEXT:    mr r30, r3
+; SOFT-NEXT:    lhz r3, 0(r4)
+; SOFT-NEXT:    bl __extendhfsf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    mr r29, r3
+; SOFT-NEXT:    extsw r3, r30
+; SOFT-NEXT:    bl __floatsisf
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    clrldi r3, r3, 32
+; SOFT-NEXT:    bl __truncsfhf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    clrldi r3, r3, 48
+; SOFT-NEXT:    bl __extendhfsf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    mr r4, r3
+; SOFT-NEXT:    mr r3, r29
+; SOFT-NEXT:    bl __addsf3
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    addi r1, r1, 64
+; SOFT-NEXT:    ld r0, 16(r1)
+; SOFT-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; SOFT-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
+; SOFT-NEXT:    mtlr r0
+; SOFT-NEXT:    blr
+;
+; BE-LABEL: test_sitofp_fadd_i32:
+; BE:       # %bb.0:
+; BE-NEXT:    mflr r0
+; BE-NEXT:    stdu r1, -144(r1)
+; BE-NEXT:    std r0, 160(r1)
+; BE-NEXT:    std r30, 120(r1) # 8-byte Folded Spill
+; BE-NEXT:    mr r30, r3
+; BE-NEXT:    lhz r3, 0(r4)
+; BE-NEXT:    stfd f31, 136(r1) # 8-byte Folded Spill
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
+; BE-NEXT:    extsw r3, r30
+; BE-NEXT:    fmr f31, f1
+; BE-NEXT:    std r3, 112(r1)
+; BE-NEXT:    lfd f0, 112(r1)
+; BE-NEXT:    fcfid f0, f0
+; BE-NEXT:    frsp f1, f0
+; BE-NEXT:    bl __truncsfhf2
+; BE-NEXT:    nop
+; BE-NEXT:    clrldi r3, r3, 48
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
+; BE-NEXT:    fadds f1, f31, f1
+; BE-NEXT:    lfd f31, 136(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r30, 120(r1) # 8-byte Folded Reload
+; BE-NEXT:    addi r1, r1, 144
+; BE-NEXT:    ld r0, 16(r1)
+; BE-NEXT:    mtlr r0
+; BE-NEXT:    blr
+  %tmp0 = load half, ptr %b
+  %tmp1 = sitofp i32 %a to half
+  %tmp2 = fadd half %tmp0, %tmp1
+  %tmp3 = fpext half %tmp2 to float
+  ret float %tmp3
+}
+define half @PR40273(half) nounwind {
+; PPC32-LABEL: PR40273:
+; PPC32:       # %bb.0:
+; PPC32-NEXT:    mflr r0
+; PPC32-NEXT:    stwu r1, -16(r1)
+; PPC32-NEXT:    stw r0, 20(r1)
+; PPC32-NEXT:    bl __truncsfhf2
+; PPC32-NEXT:    clrlwi r3, r3, 16
+; PPC32-NEXT:    bl __extendhfsf2
+; PPC32-NEXT:    lis r3, .LCPI24_0 at ha
+; PPC32-NEXT:    lfs f0, .LCPI24_0 at l(r3)
+; PPC32-NEXT:    li r3, 0
+; PPC32-NEXT:    fcmpu cr0, f1, f0
+; PPC32-NEXT:    bc 12, eq, .LBB24_2
+; PPC32-NEXT:  # %bb.1:
+; PPC32-NEXT:    li r3, 4
+; PPC32-NEXT:  .LBB24_2:
+; PPC32-NEXT:    li r4, .LCPI24_1 at l
+; PPC32-NEXT:    addis r4, r4, .LCPI24_1 at ha
+; PPC32-NEXT:    lfsx f1, r4, r3
+; PPC32-NEXT:    lwz r0, 20(r1)
+; PPC32-NEXT:    addi r1, r1, 16
+; PPC32-NEXT:    mtlr r0
+; PPC32-NEXT:    blr
+;
+; P8-LABEL: PR40273:
+; P8:       # %bb.0:
+; P8-NEXT:    mflr r0
+; P8-NEXT:    stdu r1, -32(r1)
+; P8-NEXT:    std r0, 48(r1)
+; P8-NEXT:    bl __truncsfhf2
+; P8-NEXT:    nop
+; P8-NEXT:    clrldi r3, r3, 48
+; P8-NEXT:    bl __extendhfsf2
+; P8-NEXT:    nop
+; P8-NEXT:    fmr f0, f1
+; P8-NEXT:    xxlxor f1, f1, f1
+; P8-NEXT:    fcmpu cr0, f0, f1
+; P8-NEXT:    beq cr0, .LBB24_2
+; P8-NEXT:  # %bb.1:
+; P8-NEXT:    vspltisw v2, 1
+; P8-NEXT:    xvcvsxwdp vs1, vs34
+; P8-NEXT:  .LBB24_2:
+; P8-NEXT:    addi r1, r1, 32
+; P8-NEXT:    ld r0, 16(r1)
+; P8-NEXT:    mtlr r0
+; P8-NEXT:    blr
+;
+; CHECK-LABEL: PR40273:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xscvdphp f0, f1
+; CHECK-NEXT:    xxlxor f1, f1, f1
+; CHECK-NEXT:    mffprwz r3, f0
+; CHECK-NEXT:    clrlwi r3, r3, 16
+; CHECK-NEXT:    mtfprwz f0, r3
+; CHECK-NEXT:    xscvhpdp f0, f0
+; CHECK-NEXT:    fcmpu cr0, f0, f1
+; CHECK-NEXT:    beqlr cr0
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    vspltisw v2, 1
+; CHECK-NEXT:    xvcvsxwdp vs1, vs34
+; CHECK-NEXT:    blr
+;
+; SOFT-LABEL: PR40273:
+; SOFT:       # %bb.0:
+; SOFT-NEXT:    mflr r0
+; SOFT-NEXT:    stdu r1, -32(r1)
+; SOFT-NEXT:    clrldi r3, r3, 48
+; SOFT-NEXT:    std r0, 48(r1)
+; SOFT-NEXT:    bl __extendhfsf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    li r4, 0
+; SOFT-NEXT:    bl __nesf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    cmplwi r3, 0
+; SOFT-NEXT:    lis r3, 16256
+; SOFT-NEXT:    iseleq r3, 0, r3
+; SOFT-NEXT:    bl __truncsfhf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    addi r1, r1, 32
+; SOFT-NEXT:    ld r0, 16(r1)
+; SOFT-NEXT:    mtlr r0
+; SOFT-NEXT:    blr
+;
+; BE-LABEL: PR40273:
+; BE:       # %bb.0:
+; BE-NEXT:    mflr r0
+; BE-NEXT:    stdu r1, -112(r1)
+; BE-NEXT:    std r0, 128(r1)
+; BE-NEXT:    bl __truncsfhf2
+; BE-NEXT:    nop
+; BE-NEXT:    clrldi r3, r3, 48
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
+; BE-NEXT:    addis r3, r2, .LCPI24_0 at toc@ha
+; BE-NEXT:    lfs f0, .LCPI24_0 at toc@l(r3)
+; BE-NEXT:    li r3, 0
+; BE-NEXT:    fcmpu cr0, f1, f0
+; BE-NEXT:    bc 12, eq, .LBB24_2
+; BE-NEXT:  # %bb.1:
+; BE-NEXT:    li r3, 4
+; BE-NEXT:  .LBB24_2:
+; BE-NEXT:    addis r4, r2, .LCPI24_1 at toc@ha
+; BE-NEXT:    addi r4, r4, .LCPI24_1 at toc@l
+; BE-NEXT:    lfsx f1, r4, r3
+; BE-NEXT:    addi r1, r1, 112
+; BE-NEXT:    ld r0, 16(r1)
+; BE-NEXT:    mtlr r0
+; BE-NEXT:    blr
+  %2 = fcmp une half %0, 0xH0000
+  %3 = uitofp i1 %2 to half
+  ret half %3
+}
+
+; Trivial operations shouldn't need a libcall
+
+define half @fabs(half %x) nounwind {
+; PPC32-LABEL: fabs:
+; PPC32:       # %bb.0:
+; PPC32-NEXT:    mflr r0
+; PPC32-NEXT:    stwu r1, -16(r1)
+; PPC32-NEXT:    stw r0, 20(r1)
+; PPC32-NEXT:    bl __truncsfhf2
+; PPC32-NEXT:    clrlwi r3, r3, 16
+; PPC32-NEXT:    bl __extendhfsf2
+; PPC32-NEXT:    fabs f1, f1
+; PPC32-NEXT:    lwz r0, 20(r1)
+; PPC32-NEXT:    addi r1, r1, 16
+; PPC32-NEXT:    mtlr r0
+; PPC32-NEXT:    blr
+;
+; P8-LABEL: fabs:
+; P8:       # %bb.0:
+; P8-NEXT:    mflr r0
+; P8-NEXT:    stdu r1, -32(r1)
+; P8-NEXT:    std r0, 48(r1)
+; P8-NEXT:    bl __truncsfhf2
+; P8-NEXT:    nop
+; P8-NEXT:    clrldi r3, r3, 48
+; P8-NEXT:    bl __extendhfsf2
+; P8-NEXT:    nop
+; P8-NEXT:    xsabsdp f1, f1
+; P8-NEXT:    addi r1, r1, 32
+; P8-NEXT:    ld r0, 16(r1)
+; P8-NEXT:    mtlr r0
+; P8-NEXT:    blr
+;
+; CHECK-LABEL: fabs:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xscvdphp f0, f1
+; CHECK-NEXT:    mffprwz r3, f0
+; CHECK-NEXT:    clrlwi r3, r3, 16
+; CHECK-NEXT:    mtfprwz f0, r3
+; CHECK-NEXT:    xscvhpdp f0, f0
+; CHECK-NEXT:    xsabsdp f1, f0
+; CHECK-NEXT:    blr
+;
+; SOFT-LABEL: fabs:
+; SOFT:       # %bb.0:
+; SOFT-NEXT:    clrldi r3, r3, 49
+; SOFT-NEXT:    blr
+;
+; BE-LABEL: fabs:
+; BE:       # %bb.0:
+; BE-NEXT:    mflr r0
+; BE-NEXT:    stdu r1, -112(r1)
+; BE-NEXT:    std r0, 128(r1)
+; BE-NEXT:    bl __truncsfhf2
+; BE-NEXT:    nop
+; BE-NEXT:    clrldi r3, r3, 48
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
+; BE-NEXT:    fabs f1, f1
+; BE-NEXT:    addi r1, r1, 112
+; BE-NEXT:    ld r0, 16(r1)
+; BE-NEXT:    mtlr r0
+; BE-NEXT:    blr
+  %a = call half @llvm.fabs.f16(half %x)
+  ret half %a
+}
+
+define half @fcopysign(half %x, half %y) nounwind {
+; PPC32-LABEL: fcopysign:
+; PPC32:       # %bb.0:
+; PPC32-NEXT:    mflr r0
+; PPC32-NEXT:    stwu r1, -32(r1)
+; PPC32-NEXT:    stw r0, 36(r1)
+; PPC32-NEXT:    stfd f31, 24(r1) # 8-byte Folded Spill
+; PPC32-NEXT:    fmr f31, f2
+; PPC32-NEXT:    bl __truncsfhf2
+; PPC32-NEXT:    clrlwi r3, r3, 16
+; PPC32-NEXT:    bl __extendhfsf2
+; PPC32-NEXT:    stfs f31, 20(r1)
+; PPC32-NEXT:    lwz r3, 20(r1)
+; PPC32-NEXT:    srwi r3, r3, 31
+; PPC32-NEXT:    andi. r3, r3, 1
+; PPC32-NEXT:    bc 12, gt, .LBB26_2
+; PPC32-NEXT:  # %bb.1:
+; PPC32-NEXT:    fabs f1, f1
+; PPC32-NEXT:    b .LBB26_3
+; PPC32-NEXT:  .LBB26_2:
+; PPC32-NEXT:    fnabs f1, f1
+; PPC32-NEXT:  .LBB26_3:
+; PPC32-NEXT:    lfd f31, 24(r1) # 8-byte Folded Reload
+; PPC32-NEXT:    lwz r0, 36(r1)
+; PPC32-NEXT:    addi r1, r1, 32
+; PPC32-NEXT:    mtlr r0
+; PPC32-NEXT:    blr
+;
+; P8-LABEL: fcopysign:
+; P8:       # %bb.0:
+; P8-NEXT:    mflr r0
+; P8-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
+; P8-NEXT:    stdu r1, -48(r1)
+; P8-NEXT:    std r0, 64(r1)
+; P8-NEXT:    fmr f31, f2
+; P8-NEXT:    bl __truncsfhf2
+; P8-NEXT:    nop
+; P8-NEXT:    clrldi r3, r3, 48
+; P8-NEXT:    bl __extendhfsf2
+; P8-NEXT:    nop
+; P8-NEXT:    fcpsgn f1, f31, f1
+; P8-NEXT:    addi r1, r1, 48
+; P8-NEXT:    ld r0, 16(r1)
+; P8-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
+; P8-NEXT:    mtlr r0
+; P8-NEXT:    blr
+;
+; CHECK-LABEL: fcopysign:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xscvdphp f0, f1
+; CHECK-NEXT:    mffprwz r3, f0
+; CHECK-NEXT:    clrlwi r3, r3, 16
+; CHECK-NEXT:    mtfprwz f0, r3
+; CHECK-NEXT:    xscvhpdp f0, f0
+; CHECK-NEXT:    fcpsgn f1, f2, f0
+; CHECK-NEXT:    blr
+;
+; SOFT-LABEL: fcopysign:
+; SOFT:       # %bb.0:
+; SOFT-NEXT:    mflr r0
+; SOFT-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; SOFT-NEXT:    stdu r1, -48(r1)
+; SOFT-NEXT:    clrldi r3, r3, 48
+; SOFT-NEXT:    std r0, 64(r1)
+; SOFT-NEXT:    mr r30, r4
+; SOFT-NEXT:    bl __extendhfsf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    rlwimi r3, r30, 16, 0, 0
+; SOFT-NEXT:    clrldi r3, r3, 32
+; SOFT-NEXT:    bl __truncsfhf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    addi r1, r1, 48
+; SOFT-NEXT:    ld r0, 16(r1)
+; SOFT-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; SOFT-NEXT:    mtlr r0
+; SOFT-NEXT:    blr
+;
+; BE-LABEL: fcopysign:
+; BE:       # %bb.0:
+; BE-NEXT:    mflr r0
+; BE-NEXT:    stdu r1, -128(r1)
+; BE-NEXT:    std r0, 144(r1)
+; BE-NEXT:    stfd f31, 120(r1) # 8-byte Folded Spill
+; BE-NEXT:    fmr f31, f2
+; BE-NEXT:    bl __truncsfhf2
+; BE-NEXT:    nop
+; BE-NEXT:    clrldi r3, r3, 48
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
+; BE-NEXT:    stfs f31, 116(r1)
+; BE-NEXT:    lwz r3, 116(r1)
+; BE-NEXT:    srwi r3, r3, 31
+; BE-NEXT:    andi. r3, r3, 1
+; BE-NEXT:    bc 12, gt, .LBB26_2
+; BE-NEXT:  # %bb.1:
+; BE-NEXT:    fabs f1, f1
+; BE-NEXT:    b .LBB26_3
+; BE-NEXT:  .LBB26_2:
+; BE-NEXT:    fnabs f1, f1
+; BE-NEXT:  .LBB26_3:
+; BE-NEXT:    lfd f31, 120(r1) # 8-byte Folded Reload
+; BE-NEXT:    addi r1, r1, 128
+; BE-NEXT:    ld r0, 16(r1)
+; BE-NEXT:    mtlr r0
+; BE-NEXT:    blr
+  %a = call half @llvm.copysign.f16(half %x, half %y)
+  ret half %a
+}
diff --git a/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll b/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll
deleted file mode 100644
index 50f05cca80458..0000000000000
--- a/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll
+++ /dev/null
@@ -1,1281 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown \
-; RUN:   -verify-machineinstrs -ppc-asm-full-reg-names < %s | FileCheck %s \
-; RUN:   --check-prefix=P8
-; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \
-; RUN:   -verify-machineinstrs -ppc-asm-full-reg-names < %s | FileCheck %s
-; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown -mattr=-hard-float \
-; RUN:   -verify-machineinstrs -ppc-asm-full-reg-names < %s | FileCheck %s \
-; RUN:   --check-prefix=SOFT
-
-; Tests for various operations on half precison float. Much of the test is
-; copied from test/CodeGen/X86/half.ll.
-define dso_local double @loadd(ptr nocapture readonly %a) local_unnamed_addr #0 {
-; P8-LABEL: loadd:
-; P8:       # %bb.0: # %entry
-; P8-NEXT:    mflr r0
-; P8-NEXT:    stdu r1, -32(r1)
-; P8-NEXT:    std r0, 48(r1)
-; P8-NEXT:    lhz r3, 2(r3)
-; P8-NEXT:    bl __extendhfsf2
-; P8-NEXT:    nop
-; P8-NEXT:    addi r1, r1, 32
-; P8-NEXT:    ld r0, 16(r1)
-; P8-NEXT:    mtlr r0
-; P8-NEXT:    blr
-;
-; CHECK-LABEL: loadd:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi r3, r3, 2
-; CHECK-NEXT:    lxsihzx f0, 0, r3
-; CHECK-NEXT:    xscvhpdp f1, f0
-; CHECK-NEXT:    blr
-;
-; SOFT-LABEL: loadd:
-; SOFT:       # %bb.0: # %entry
-; SOFT-NEXT:    mflr r0
-; SOFT-NEXT:    stdu r1, -32(r1)
-; SOFT-NEXT:    std r0, 48(r1)
-; SOFT-NEXT:    lhz r3, 2(r3)
-; SOFT-NEXT:    bl __extendhfsf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    bl __extendsfdf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    addi r1, r1, 32
-; SOFT-NEXT:    ld r0, 16(r1)
-; SOFT-NEXT:    mtlr r0
-; SOFT-NEXT:    blr
-entry:
-  %arrayidx = getelementptr inbounds i16, ptr %a, i64 1
-  %0 = load i16, ptr %arrayidx, align 2
-  %1 = tail call double @llvm.convert.from.fp16.f64(i16 %0)
-  ret double %1
-}
-
-declare double @llvm.convert.from.fp16.f64(i16)
-
-define dso_local float @loadf(ptr nocapture readonly %a) local_unnamed_addr #0 {
-; P8-LABEL: loadf:
-; P8:       # %bb.0: # %entry
-; P8-NEXT:    mflr r0
-; P8-NEXT:    stdu r1, -32(r1)
-; P8-NEXT:    std r0, 48(r1)
-; P8-NEXT:    lhz r3, 2(r3)
-; P8-NEXT:    bl __extendhfsf2
-; P8-NEXT:    nop
-; P8-NEXT:    addi r1, r1, 32
-; P8-NEXT:    ld r0, 16(r1)
-; P8-NEXT:    mtlr r0
-; P8-NEXT:    blr
-;
-; CHECK-LABEL: loadf:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi r3, r3, 2
-; CHECK-NEXT:    lxsihzx f0, 0, r3
-; CHECK-NEXT:    xscvhpdp f1, f0
-; CHECK-NEXT:    blr
-;
-; SOFT-LABEL: loadf:
-; SOFT:       # %bb.0: # %entry
-; SOFT-NEXT:    mflr r0
-; SOFT-NEXT:    stdu r1, -32(r1)
-; SOFT-NEXT:    std r0, 48(r1)
-; SOFT-NEXT:    lhz r3, 2(r3)
-; SOFT-NEXT:    bl __extendhfsf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    addi r1, r1, 32
-; SOFT-NEXT:    ld r0, 16(r1)
-; SOFT-NEXT:    mtlr r0
-; SOFT-NEXT:    blr
-entry:
-  %arrayidx = getelementptr inbounds i16, ptr %a, i64 1
-  %0 = load i16, ptr %arrayidx, align 2
-  %1 = tail call float @llvm.convert.from.fp16.f32(i16 %0)
-  ret float %1
-}
-
-declare float @llvm.convert.from.fp16.f32(i16)
-
-define dso_local void @stored(ptr nocapture %a, double %b) local_unnamed_addr #0 {
-; P8-LABEL: stored:
-; P8:       # %bb.0: # %entry
-; P8-NEXT:    mflr r0
-; P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; P8-NEXT:    stdu r1, -48(r1)
-; P8-NEXT:    std r0, 64(r1)
-; P8-NEXT:    mr r30, r3
-; P8-NEXT:    bl __truncdfhf2
-; P8-NEXT:    nop
-; P8-NEXT:    sth r3, 0(r30)
-; P8-NEXT:    addi r1, r1, 48
-; P8-NEXT:    ld r0, 16(r1)
-; P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
-; P8-NEXT:    mtlr r0
-; P8-NEXT:    blr
-;
-; CHECK-LABEL: stored:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xscvdphp f0, f1
-; CHECK-NEXT:    stxsihx f0, 0, r3
-; CHECK-NEXT:    blr
-;
-; SOFT-LABEL: stored:
-; SOFT:       # %bb.0: # %entry
-; SOFT-NEXT:    mflr r0
-; SOFT-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; SOFT-NEXT:    stdu r1, -48(r1)
-; SOFT-NEXT:    mr r30, r3
-; SOFT-NEXT:    mr r3, r4
-; SOFT-NEXT:    std r0, 64(r1)
-; SOFT-NEXT:    bl __truncdfhf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    clrldi r3, r3, 48
-; SOFT-NEXT:    bl __extendhfsf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    bl __truncsfhf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    sth r3, 0(r30)
-; SOFT-NEXT:    addi r1, r1, 48
-; SOFT-NEXT:    ld r0, 16(r1)
-; SOFT-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
-; SOFT-NEXT:    mtlr r0
-; SOFT-NEXT:    blr
-entry:
-  %0 = tail call i16 @llvm.convert.to.fp16.f64(double %b)
-  store i16 %0, ptr %a, align 2
-  ret void
-}
-
-declare i16 @llvm.convert.to.fp16.f64(double)
-
-define dso_local void @storef(ptr nocapture %a, float %b) local_unnamed_addr #0 {
-; P8-LABEL: storef:
-; P8:       # %bb.0: # %entry
-; P8-NEXT:    mflr r0
-; P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; P8-NEXT:    stdu r1, -48(r1)
-; P8-NEXT:    std r0, 64(r1)
-; P8-NEXT:    mr r30, r3
-; P8-NEXT:    bl __truncsfhf2
-; P8-NEXT:    nop
-; P8-NEXT:    sth r3, 0(r30)
-; P8-NEXT:    addi r1, r1, 48
-; P8-NEXT:    ld r0, 16(r1)
-; P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
-; P8-NEXT:    mtlr r0
-; P8-NEXT:    blr
-;
-; CHECK-LABEL: storef:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xscvdphp f0, f1
-; CHECK-NEXT:    stxsihx f0, 0, r3
-; CHECK-NEXT:    blr
-;
-; SOFT-LABEL: storef:
-; SOFT:       # %bb.0: # %entry
-; SOFT-NEXT:    mflr r0
-; SOFT-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; SOFT-NEXT:    stdu r1, -48(r1)
-; SOFT-NEXT:    mr r30, r3
-; SOFT-NEXT:    clrldi r3, r4, 32
-; SOFT-NEXT:    std r0, 64(r1)
-; SOFT-NEXT:    bl __truncsfhf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    clrldi r3, r3, 48
-; SOFT-NEXT:    bl __extendhfsf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    bl __truncsfhf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    sth r3, 0(r30)
-; SOFT-NEXT:    addi r1, r1, 48
-; SOFT-NEXT:    ld r0, 16(r1)
-; SOFT-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
-; SOFT-NEXT:    mtlr r0
-; SOFT-NEXT:    blr
-entry:
-  %0 = tail call i16 @llvm.convert.to.fp16.f32(float %b)
-  store i16 %0, ptr %a, align 2
-  ret void
-}
-
-declare i16 @llvm.convert.to.fp16.f32(float)
-define void @test_load_store(ptr %in, ptr %out) #0 {
-; P8-LABEL: test_load_store:
-; P8:       # %bb.0:
-; P8-NEXT:    lhz r3, 0(r3)
-; P8-NEXT:    sth r3, 0(r4)
-; P8-NEXT:    blr
-;
-; CHECK-LABEL: test_load_store:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lhz r3, 0(r3)
-; CHECK-NEXT:    sth r3, 0(r4)
-; CHECK-NEXT:    blr
-;
-; SOFT-LABEL: test_load_store:
-; SOFT:       # %bb.0:
-; SOFT-NEXT:    mflr r0
-; SOFT-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; SOFT-NEXT:    stdu r1, -48(r1)
-; SOFT-NEXT:    std r0, 64(r1)
-; SOFT-NEXT:    mr r30, r4
-; SOFT-NEXT:    lhz r3, 0(r3)
-; SOFT-NEXT:    bl __extendhfsf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    bl __truncsfhf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    sth r3, 0(r30)
-; SOFT-NEXT:    addi r1, r1, 48
-; SOFT-NEXT:    ld r0, 16(r1)
-; SOFT-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
-; SOFT-NEXT:    mtlr r0
-; SOFT-NEXT:    blr
-  %val = load half, ptr %in
-  store half %val, ptr %out
-  ret void
-}
-define i16 @test_bitcast_from_half(ptr %addr) #0 {
-; P8-LABEL: test_bitcast_from_half:
-; P8:       # %bb.0:
-; P8-NEXT:    lhz r3, 0(r3)
-; P8-NEXT:    blr
-;
-; CHECK-LABEL: test_bitcast_from_half:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lhz r3, 0(r3)
-; CHECK-NEXT:    blr
-;
-; SOFT-LABEL: test_bitcast_from_half:
-; SOFT:       # %bb.0:
-; SOFT-NEXT:    lhz r3, 0(r3)
-; SOFT-NEXT:    blr
-  %val = load half, ptr %addr
-  %val_int = bitcast half %val to i16
-  ret i16 %val_int
-}
-define void @test_bitcast_to_half(ptr %addr, i16 %in) #0 {
-; P8-LABEL: test_bitcast_to_half:
-; P8:       # %bb.0:
-; P8-NEXT:    sth r4, 0(r3)
-; P8-NEXT:    blr
-;
-; CHECK-LABEL: test_bitcast_to_half:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    sth r4, 0(r3)
-; CHECK-NEXT:    blr
-;
-; SOFT-LABEL: test_bitcast_to_half:
-; SOFT:       # %bb.0:
-; SOFT-NEXT:    sth r4, 0(r3)
-; SOFT-NEXT:    blr
-  %val_fp = bitcast i16 %in to half
-  store half %val_fp, ptr %addr
-  ret void
-}
-define float @test_extend32(ptr %addr) #0 {
-; P8-LABEL: test_extend32:
-; P8:       # %bb.0:
-; P8-NEXT:    mflr r0
-; P8-NEXT:    stdu r1, -32(r1)
-; P8-NEXT:    std r0, 48(r1)
-; P8-NEXT:    lhz r3, 0(r3)
-; P8-NEXT:    bl __extendhfsf2
-; P8-NEXT:    nop
-; P8-NEXT:    addi r1, r1, 32
-; P8-NEXT:    ld r0, 16(r1)
-; P8-NEXT:    mtlr r0
-; P8-NEXT:    blr
-;
-; CHECK-LABEL: test_extend32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lxsihzx f0, 0, r3
-; CHECK-NEXT:    xscvhpdp f1, f0
-; CHECK-NEXT:    blr
-;
-; SOFT-LABEL: test_extend32:
-; SOFT:       # %bb.0:
-; SOFT-NEXT:    mflr r0
-; SOFT-NEXT:    stdu r1, -32(r1)
-; SOFT-NEXT:    std r0, 48(r1)
-; SOFT-NEXT:    lhz r3, 0(r3)
-; SOFT-NEXT:    bl __extendhfsf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    addi r1, r1, 32
-; SOFT-NEXT:    ld r0, 16(r1)
-; SOFT-NEXT:    mtlr r0
-; SOFT-NEXT:    blr
-  %val16 = load half, ptr %addr
-  %val32 = fpext half %val16 to float
-  ret float %val32
-}
-define double @test_extend64(ptr %addr) #0 {
-; P8-LABEL: test_extend64:
-; P8:       # %bb.0:
-; P8-NEXT:    mflr r0
-; P8-NEXT:    stdu r1, -32(r1)
-; P8-NEXT:    std r0, 48(r1)
-; P8-NEXT:    lhz r3, 0(r3)
-; P8-NEXT:    bl __extendhfsf2
-; P8-NEXT:    nop
-; P8-NEXT:    addi r1, r1, 32
-; P8-NEXT:    ld r0, 16(r1)
-; P8-NEXT:    mtlr r0
-; P8-NEXT:    blr
-;
-; CHECK-LABEL: test_extend64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lxsihzx f0, 0, r3
-; CHECK-NEXT:    xscvhpdp f1, f0
-; CHECK-NEXT:    blr
-;
-; SOFT-LABEL: test_extend64:
-; SOFT:       # %bb.0:
-; SOFT-NEXT:    mflr r0
-; SOFT-NEXT:    stdu r1, -32(r1)
-; SOFT-NEXT:    std r0, 48(r1)
-; SOFT-NEXT:    lhz r3, 0(r3)
-; SOFT-NEXT:    bl __extendhfsf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    bl __extendsfdf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    addi r1, r1, 32
-; SOFT-NEXT:    ld r0, 16(r1)
-; SOFT-NEXT:    mtlr r0
-; SOFT-NEXT:    blr
-  %val16 = load half, ptr %addr
-  %val32 = fpext half %val16 to double
-  ret double %val32
-}
-define void @test_trunc32(float %in, ptr %addr) #0 {
-; P8-LABEL: test_trunc32:
-; P8:       # %bb.0:
-; P8-NEXT:    mflr r0
-; P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; P8-NEXT:    stdu r1, -48(r1)
-; P8-NEXT:    std r0, 64(r1)
-; P8-NEXT:    mr r30, r4
-; P8-NEXT:    bl __truncsfhf2
-; P8-NEXT:    nop
-; P8-NEXT:    sth r3, 0(r30)
-; P8-NEXT:    addi r1, r1, 48
-; P8-NEXT:    ld r0, 16(r1)
-; P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
-; P8-NEXT:    mtlr r0
-; P8-NEXT:    blr
-;
-; CHECK-LABEL: test_trunc32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    xscvdphp f0, f1
-; CHECK-NEXT:    stxsihx f0, 0, r4
-; CHECK-NEXT:    blr
-;
-; SOFT-LABEL: test_trunc32:
-; SOFT:       # %bb.0:
-; SOFT-NEXT:    mflr r0
-; SOFT-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; SOFT-NEXT:    stdu r1, -48(r1)
-; SOFT-NEXT:    clrldi r3, r3, 32
-; SOFT-NEXT:    std r0, 64(r1)
-; SOFT-NEXT:    mr r30, r4
-; SOFT-NEXT:    bl __truncsfhf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    clrldi r3, r3, 48
-; SOFT-NEXT:    bl __extendhfsf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    bl __truncsfhf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    sth r3, 0(r30)
-; SOFT-NEXT:    addi r1, r1, 48
-; SOFT-NEXT:    ld r0, 16(r1)
-; SOFT-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
-; SOFT-NEXT:    mtlr r0
-; SOFT-NEXT:    blr
-  %val16 = fptrunc float %in to half
-  store half %val16, ptr %addr
-  ret void
-}
-define void @test_trunc64(double %in, ptr %addr) #0 {
-; P8-LABEL: test_trunc64:
-; P8:       # %bb.0:
-; P8-NEXT:    mflr r0
-; P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; P8-NEXT:    stdu r1, -48(r1)
-; P8-NEXT:    std r0, 64(r1)
-; P8-NEXT:    mr r30, r4
-; P8-NEXT:    bl __truncdfhf2
-; P8-NEXT:    nop
-; P8-NEXT:    sth r3, 0(r30)
-; P8-NEXT:    addi r1, r1, 48
-; P8-NEXT:    ld r0, 16(r1)
-; P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
-; P8-NEXT:    mtlr r0
-; P8-NEXT:    blr
-;
-; CHECK-LABEL: test_trunc64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    xscvdphp f0, f1
-; CHECK-NEXT:    stxsihx f0, 0, r4
-; CHECK-NEXT:    blr
-;
-; SOFT-LABEL: test_trunc64:
-; SOFT:       # %bb.0:
-; SOFT-NEXT:    mflr r0
-; SOFT-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; SOFT-NEXT:    stdu r1, -48(r1)
-; SOFT-NEXT:    std r0, 64(r1)
-; SOFT-NEXT:    mr r30, r4
-; SOFT-NEXT:    bl __truncdfhf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    clrldi r3, r3, 48
-; SOFT-NEXT:    bl __extendhfsf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    bl __truncsfhf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    sth r3, 0(r30)
-; SOFT-NEXT:    addi r1, r1, 48
-; SOFT-NEXT:    ld r0, 16(r1)
-; SOFT-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
-; SOFT-NEXT:    mtlr r0
-; SOFT-NEXT:    blr
-  %val16 = fptrunc double %in to half
-  store half %val16, ptr %addr
-  ret void
-}
-define i64 @test_fptosi_i64(ptr %p) #0 {
-; P8-LABEL: test_fptosi_i64:
-; P8:       # %bb.0:
-; P8-NEXT:    mflr r0
-; P8-NEXT:    stdu r1, -32(r1)
-; P8-NEXT:    std r0, 48(r1)
-; P8-NEXT:    lhz r3, 0(r3)
-; P8-NEXT:    bl __extendhfsf2
-; P8-NEXT:    nop
-; P8-NEXT:    xscvdpsxds f0, f1
-; P8-NEXT:    mffprd r3, f0
-; P8-NEXT:    addi r1, r1, 32
-; P8-NEXT:    ld r0, 16(r1)
-; P8-NEXT:    mtlr r0
-; P8-NEXT:    blr
-;
-; CHECK-LABEL: test_fptosi_i64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lhz r3, 0(r3)
-; CHECK-NEXT:    mtfprwz f0, r3
-; CHECK-NEXT:    xscvhpdp f0, f0
-; CHECK-NEXT:    xscvdpsxds f0, f0
-; CHECK-NEXT:    mffprd r3, f0
-; CHECK-NEXT:    blr
-;
-; SOFT-LABEL: test_fptosi_i64:
-; SOFT:       # %bb.0:
-; SOFT-NEXT:    mflr r0
-; SOFT-NEXT:    stdu r1, -32(r1)
-; SOFT-NEXT:    std r0, 48(r1)
-; SOFT-NEXT:    lhz r3, 0(r3)
-; SOFT-NEXT:    bl __extendhfsf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    bl __fixsfdi
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    addi r1, r1, 32
-; SOFT-NEXT:    ld r0, 16(r1)
-; SOFT-NEXT:    mtlr r0
-; SOFT-NEXT:    blr
-  %a = load half, ptr %p, align 2
-  %r = fptosi half %a to i64
-  ret i64 %r
-}
-define void @test_sitofp_i64(i64 %a, ptr %p) #0 {
-; P8-LABEL: test_sitofp_i64:
-; P8:       # %bb.0:
-; P8-NEXT:    mflr r0
-; P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; P8-NEXT:    stdu r1, -48(r1)
-; P8-NEXT:    mtfprd f0, r3
-; P8-NEXT:    std r0, 64(r1)
-; P8-NEXT:    mr r30, r4
-; P8-NEXT:    xscvsxdsp f1, f0
-; P8-NEXT:    bl __truncsfhf2
-; P8-NEXT:    nop
-; P8-NEXT:    sth r3, 0(r30)
-; P8-NEXT:    addi r1, r1, 48
-; P8-NEXT:    ld r0, 16(r1)
-; P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
-; P8-NEXT:    mtlr r0
-; P8-NEXT:    blr
-;
-; CHECK-LABEL: test_sitofp_i64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    xscvsxdsp f0, f0
-; CHECK-NEXT:    xscvdphp f0, f0
-; CHECK-NEXT:    mffprwz r3, f0
-; CHECK-NEXT:    sth r3, 0(r4)
-; CHECK-NEXT:    blr
-;
-; SOFT-LABEL: test_sitofp_i64:
-; SOFT:       # %bb.0:
-; SOFT-NEXT:    mflr r0
-; SOFT-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; SOFT-NEXT:    stdu r1, -48(r1)
-; SOFT-NEXT:    std r0, 64(r1)
-; SOFT-NEXT:    mr r30, r4
-; SOFT-NEXT:    bl __floatdisf
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    clrldi r3, r3, 32
-; SOFT-NEXT:    bl __truncsfhf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    clrldi r3, r3, 48
-; SOFT-NEXT:    bl __extendhfsf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    bl __truncsfhf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    sth r3, 0(r30)
-; SOFT-NEXT:    addi r1, r1, 48
-; SOFT-NEXT:    ld r0, 16(r1)
-; SOFT-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
-; SOFT-NEXT:    mtlr r0
-; SOFT-NEXT:    blr
-  %r = sitofp i64 %a to half
-  store half %r, ptr %p
-  ret void
-}
-define i64 @test_fptoui_i64(ptr %p) #0 {
-; P8-LABEL: test_fptoui_i64:
-; P8:       # %bb.0:
-; P8-NEXT:    mflr r0
-; P8-NEXT:    stdu r1, -32(r1)
-; P8-NEXT:    std r0, 48(r1)
-; P8-NEXT:    lhz r3, 0(r3)
-; P8-NEXT:    bl __extendhfsf2
-; P8-NEXT:    nop
-; P8-NEXT:    xscvdpuxds f0, f1
-; P8-NEXT:    mffprd r3, f0
-; P8-NEXT:    addi r1, r1, 32
-; P8-NEXT:    ld r0, 16(r1)
-; P8-NEXT:    mtlr r0
-; P8-NEXT:    blr
-;
-; CHECK-LABEL: test_fptoui_i64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lhz r3, 0(r3)
-; CHECK-NEXT:    mtfprwz f0, r3
-; CHECK-NEXT:    xscvhpdp f0, f0
-; CHECK-NEXT:    xscvdpuxds f0, f0
-; CHECK-NEXT:    mffprd r3, f0
-; CHECK-NEXT:    blr
-;
-; SOFT-LABEL: test_fptoui_i64:
-; SOFT:       # %bb.0:
-; SOFT-NEXT:    mflr r0
-; SOFT-NEXT:    stdu r1, -32(r1)
-; SOFT-NEXT:    std r0, 48(r1)
-; SOFT-NEXT:    lhz r3, 0(r3)
-; SOFT-NEXT:    bl __extendhfsf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    bl __fixunssfdi
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    addi r1, r1, 32
-; SOFT-NEXT:    ld r0, 16(r1)
-; SOFT-NEXT:    mtlr r0
-; SOFT-NEXT:    blr
-  %a = load half, ptr %p, align 2
-  %r = fptoui half %a to i64
-  ret i64 %r
-}
-define void @test_uitofp_i64(i64 %a, ptr %p) #0 {
-; P8-LABEL: test_uitofp_i64:
-; P8:       # %bb.0:
-; P8-NEXT:    mflr r0
-; P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; P8-NEXT:    stdu r1, -48(r1)
-; P8-NEXT:    mtfprd f0, r3
-; P8-NEXT:    std r0, 64(r1)
-; P8-NEXT:    mr r30, r4
-; P8-NEXT:    xscvuxdsp f1, f0
-; P8-NEXT:    bl __truncsfhf2
-; P8-NEXT:    nop
-; P8-NEXT:    sth r3, 0(r30)
-; P8-NEXT:    addi r1, r1, 48
-; P8-NEXT:    ld r0, 16(r1)
-; P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
-; P8-NEXT:    mtlr r0
-; P8-NEXT:    blr
-;
-; CHECK-LABEL: test_uitofp_i64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    xscvuxdsp f0, f0
-; CHECK-NEXT:    xscvdphp f0, f0
-; CHECK-NEXT:    mffprwz r3, f0
-; CHECK-NEXT:    sth r3, 0(r4)
-; CHECK-NEXT:    blr
-;
-; SOFT-LABEL: test_uitofp_i64:
-; SOFT:       # %bb.0:
-; SOFT-NEXT:    mflr r0
-; SOFT-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; SOFT-NEXT:    stdu r1, -48(r1)
-; SOFT-NEXT:    std r0, 64(r1)
-; SOFT-NEXT:    mr r30, r4
-; SOFT-NEXT:    bl __floatundisf
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    bl __truncsfhf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    clrldi r3, r3, 48
-; SOFT-NEXT:    bl __extendhfsf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    bl __truncsfhf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    sth r3, 0(r30)
-; SOFT-NEXT:    addi r1, r1, 48
-; SOFT-NEXT:    ld r0, 16(r1)
-; SOFT-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
-; SOFT-NEXT:    mtlr r0
-; SOFT-NEXT:    blr
-  %r = uitofp i64 %a to half
-  store half %r, ptr %p
-  ret void
-}
-define <4 x float> @test_extend32_vec4(ptr %p) #0 {
-; P8-LABEL: test_extend32_vec4:
-; P8:       # %bb.0:
-; P8-NEXT:    mflr r0
-; P8-NEXT:    stdu r1, -112(r1)
-; P8-NEXT:    li r4, 48
-; P8-NEXT:    std r0, 128(r1)
-; P8-NEXT:    std r30, 96(r1) # 8-byte Folded Spill
-; P8-NEXT:    mr r30, r3
-; P8-NEXT:    lhz r3, 6(r3)
-; P8-NEXT:    stxvd2x vs61, r1, r4 # 16-byte Folded Spill
-; P8-NEXT:    li r4, 64
-; P8-NEXT:    stxvd2x vs62, r1, r4 # 16-byte Folded Spill
-; P8-NEXT:    li r4, 80
-; P8-NEXT:    stxvd2x vs63, r1, r4 # 16-byte Folded Spill
-; P8-NEXT:    bl __extendhfsf2
-; P8-NEXT:    nop
-; P8-NEXT:    lhz r3, 2(r30)
-; P8-NEXT:    xxlor vs63, f1, f1
-; P8-NEXT:    bl __extendhfsf2
-; P8-NEXT:    nop
-; P8-NEXT:    lhz r3, 4(r30)
-; P8-NEXT:    xxlor vs62, f1, f1
-; P8-NEXT:    bl __extendhfsf2
-; P8-NEXT:    nop
-; P8-NEXT:    lhz r3, 0(r30)
-; P8-NEXT:    xxlor vs61, f1, f1
-; P8-NEXT:    bl __extendhfsf2
-; P8-NEXT:    nop
-; P8-NEXT:    li r3, 80
-; P8-NEXT:    xxmrghd vs0, vs61, vs1
-; P8-NEXT:    xxmrghd vs1, vs63, vs62
-; P8-NEXT:    ld r30, 96(r1) # 8-byte Folded Reload
-; P8-NEXT:    lxvd2x vs63, r1, r3 # 16-byte Folded Reload
-; P8-NEXT:    li r3, 64
-; P8-NEXT:    xvcvdpsp vs34, vs0
-; P8-NEXT:    xvcvdpsp vs35, vs1
-; P8-NEXT:    lxvd2x vs62, r1, r3 # 16-byte Folded Reload
-; P8-NEXT:    li r3, 48
-; P8-NEXT:    lxvd2x vs61, r1, r3 # 16-byte Folded Reload
-; P8-NEXT:    vmrgew v2, v3, v2
-; P8-NEXT:    addi r1, r1, 112
-; P8-NEXT:    ld r0, 16(r1)
-; P8-NEXT:    mtlr r0
-; P8-NEXT:    blr
-;
-; CHECK-LABEL: test_extend32_vec4:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lhz r4, 6(r3)
-; CHECK-NEXT:    mtfprwz f0, r4
-; CHECK-NEXT:    xscvhpdp f0, f0
-; CHECK-NEXT:    lhz r4, 2(r3)
-; CHECK-NEXT:    mtfprwz f1, r4
-; CHECK-NEXT:    xscvhpdp f1, f1
-; CHECK-NEXT:    lhz r4, 4(r3)
-; CHECK-NEXT:    mtfprwz f2, r4
-; CHECK-NEXT:    xscvhpdp f2, f2
-; CHECK-NEXT:    lhz r3, 0(r3)
-; CHECK-NEXT:    xxmrghd vs0, vs0, vs1
-; CHECK-NEXT:    mtfprwz f3, r3
-; CHECK-NEXT:    xvcvdpsp vs35, vs0
-; CHECK-NEXT:    xscvhpdp f3, f3
-; CHECK-NEXT:    xxmrghd vs2, vs2, vs3
-; CHECK-NEXT:    xvcvdpsp vs34, vs2
-; CHECK-NEXT:    vmrgew v2, v3, v2
-; CHECK-NEXT:    blr
-;
-; SOFT-LABEL: test_extend32_vec4:
-; SOFT:       # %bb.0:
-; SOFT-NEXT:    mflr r0
-; SOFT-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
-; SOFT-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
-; SOFT-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
-; SOFT-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; SOFT-NEXT:    stdu r1, -80(r1)
-; SOFT-NEXT:    std r0, 96(r1)
-; SOFT-NEXT:    mr r30, r3
-; SOFT-NEXT:    lhz r3, 0(r3)
-; SOFT-NEXT:    bl __extendhfsf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    mr r29, r3
-; SOFT-NEXT:    lhz r3, 2(r30)
-; SOFT-NEXT:    bl __extendhfsf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    mr r28, r3
-; SOFT-NEXT:    lhz r3, 4(r30)
-; SOFT-NEXT:    bl __extendhfsf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    mr r27, r3
-; SOFT-NEXT:    lhz r3, 6(r30)
-; SOFT-NEXT:    bl __extendhfsf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    mr r6, r3
-; SOFT-NEXT:    mr r3, r29
-; SOFT-NEXT:    mr r4, r28
-; SOFT-NEXT:    mr r5, r27
-; SOFT-NEXT:    addi r1, r1, 80
-; SOFT-NEXT:    ld r0, 16(r1)
-; SOFT-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
-; SOFT-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
-; SOFT-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
-; SOFT-NEXT:    mtlr r0
-; SOFT-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
-; SOFT-NEXT:    blr
-  %a = load <4 x half>, ptr %p, align 8
-  %b = fpext <4 x half> %a to <4 x float>
-  ret <4 x float> %b
-}
-define <4 x double> @test_extend64_vec4(ptr %p) #0 {
-; P8-LABEL: test_extend64_vec4:
-; P8:       # %bb.0:
-; P8-NEXT:    mflr r0
-; P8-NEXT:    stdu r1, -112(r1)
-; P8-NEXT:    li r4, 48
-; P8-NEXT:    std r0, 128(r1)
-; P8-NEXT:    std r30, 96(r1) # 8-byte Folded Spill
-; P8-NEXT:    mr r30, r3
-; P8-NEXT:    lhz r3, 6(r3)
-; P8-NEXT:    stxvd2x vs61, r1, r4 # 16-byte Folded Spill
-; P8-NEXT:    li r4, 64
-; P8-NEXT:    stxvd2x vs62, r1, r4 # 16-byte Folded Spill
-; P8-NEXT:    li r4, 80
-; P8-NEXT:    stxvd2x vs63, r1, r4 # 16-byte Folded Spill
-; P8-NEXT:    bl __extendhfsf2
-; P8-NEXT:    nop
-; P8-NEXT:    lhz r3, 4(r30)
-; P8-NEXT:    xxlor vs63, f1, f1
-; P8-NEXT:    bl __extendhfsf2
-; P8-NEXT:    nop
-; P8-NEXT:    lhz r3, 2(r30)
-; P8-NEXT:    xxlor vs62, f1, f1
-; P8-NEXT:    bl __extendhfsf2
-; P8-NEXT:    nop
-; P8-NEXT:    lhz r3, 0(r30)
-; P8-NEXT:    xxlor vs61, f1, f1
-; P8-NEXT:    bl __extendhfsf2
-; P8-NEXT:    nop
-; P8-NEXT:    li r3, 80
-; P8-NEXT:    xxmrghd vs35, vs63, vs62
-; P8-NEXT:    xxmrghd vs34, vs61, vs1
-; P8-NEXT:    ld r30, 96(r1) # 8-byte Folded Reload
-; P8-NEXT:    lxvd2x vs63, r1, r3 # 16-byte Folded Reload
-; P8-NEXT:    li r3, 64
-; P8-NEXT:    lxvd2x vs62, r1, r3 # 16-byte Folded Reload
-; P8-NEXT:    li r3, 48
-; P8-NEXT:    lxvd2x vs61, r1, r3 # 16-byte Folded Reload
-; P8-NEXT:    addi r1, r1, 112
-; P8-NEXT:    ld r0, 16(r1)
-; P8-NEXT:    mtlr r0
-; P8-NEXT:    blr
-;
-; CHECK-LABEL: test_extend64_vec4:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lhz r4, 6(r3)
-; CHECK-NEXT:    lhz r5, 4(r3)
-; CHECK-NEXT:    lhz r6, 2(r3)
-; CHECK-NEXT:    lhz r3, 0(r3)
-; CHECK-NEXT:    mtfprwz f0, r3
-; CHECK-NEXT:    mtfprwz f1, r6
-; CHECK-NEXT:    xscvhpdp f0, f0
-; CHECK-NEXT:    xscvhpdp f1, f1
-; CHECK-NEXT:    xxmrghd vs34, vs1, vs0
-; CHECK-NEXT:    mtfprwz f0, r5
-; CHECK-NEXT:    mtfprwz f1, r4
-; CHECK-NEXT:    xscvhpdp f0, f0
-; CHECK-NEXT:    xscvhpdp f1, f1
-; CHECK-NEXT:    xxmrghd vs35, vs1, vs0
-; CHECK-NEXT:    blr
-;
-; SOFT-LABEL: test_extend64_vec4:
-; SOFT:       # %bb.0:
-; SOFT-NEXT:    mflr r0
-; SOFT-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
-; SOFT-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
-; SOFT-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
-; SOFT-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; SOFT-NEXT:    stdu r1, -80(r1)
-; SOFT-NEXT:    std r0, 96(r1)
-; SOFT-NEXT:    mr r30, r3
-; SOFT-NEXT:    lhz r3, 0(r3)
-; SOFT-NEXT:    bl __extendhfsf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    bl __extendsfdf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    mr r29, r3
-; SOFT-NEXT:    lhz r3, 2(r30)
-; SOFT-NEXT:    bl __extendhfsf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    bl __extendsfdf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    mr r28, r3
-; SOFT-NEXT:    lhz r3, 4(r30)
-; SOFT-NEXT:    bl __extendhfsf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    bl __extendsfdf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    mr r27, r3
-; SOFT-NEXT:    lhz r3, 6(r30)
-; SOFT-NEXT:    bl __extendhfsf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    bl __extendsfdf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    mr r6, r3
-; SOFT-NEXT:    mr r3, r29
-; SOFT-NEXT:    mr r4, r28
-; SOFT-NEXT:    mr r5, r27
-; SOFT-NEXT:    addi r1, r1, 80
-; SOFT-NEXT:    ld r0, 16(r1)
-; SOFT-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
-; SOFT-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
-; SOFT-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
-; SOFT-NEXT:    mtlr r0
-; SOFT-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
-; SOFT-NEXT:    blr
-  %a = load <4 x half>, ptr %p, align 8
-  %b = fpext <4 x half> %a to <4 x double>
-  ret <4 x double> %b
-}
-define void @test_trunc32_vec4(<4 x float> %a, ptr %p) #0 {
-; P8-LABEL: test_trunc32_vec4:
-; P8:       # %bb.0:
-; P8-NEXT:    mflr r0
-; P8-NEXT:    stdu r1, -112(r1)
-; P8-NEXT:    xxsldwi vs0, vs34, vs34, 3
-; P8-NEXT:    li r3, 48
-; P8-NEXT:    std r0, 128(r1)
-; P8-NEXT:    std r27, 72(r1) # 8-byte Folded Spill
-; P8-NEXT:    std r28, 80(r1) # 8-byte Folded Spill
-; P8-NEXT:    std r29, 88(r1) # 8-byte Folded Spill
-; P8-NEXT:    xscvspdpn f1, vs0
-; P8-NEXT:    std r30, 96(r1) # 8-byte Folded Spill
-; P8-NEXT:    stxvd2x vs63, r1, r3 # 16-byte Folded Spill
-; P8-NEXT:    mr r30, r5
-; P8-NEXT:    vmr v31, v2
-; P8-NEXT:    bl __truncsfhf2
-; P8-NEXT:    nop
-; P8-NEXT:    xxswapd vs0, vs63
-; P8-NEXT:    mr r29, r3
-; P8-NEXT:    xscvspdpn f1, vs0
-; P8-NEXT:    bl __truncsfhf2
-; P8-NEXT:    nop
-; P8-NEXT:    xxsldwi vs0, vs63, vs63, 1
-; P8-NEXT:    mr r28, r3
-; P8-NEXT:    xscvspdpn f1, vs0
-; P8-NEXT:    bl __truncsfhf2
-; P8-NEXT:    nop
-; P8-NEXT:    xscvspdpn f1, vs63
-; P8-NEXT:    mr r27, r3
-; P8-NEXT:    bl __truncsfhf2
-; P8-NEXT:    nop
-; P8-NEXT:    sth r3, 6(r30)
-; P8-NEXT:    li r3, 48
-; P8-NEXT:    sth r27, 4(r30)
-; P8-NEXT:    ld r27, 72(r1) # 8-byte Folded Reload
-; P8-NEXT:    sth r28, 2(r30)
-; P8-NEXT:    sth r29, 0(r30)
-; P8-NEXT:    ld r30, 96(r1) # 8-byte Folded Reload
-; P8-NEXT:    ld r29, 88(r1) # 8-byte Folded Reload
-; P8-NEXT:    lxvd2x vs63, r1, r3 # 16-byte Folded Reload
-; P8-NEXT:    ld r28, 80(r1) # 8-byte Folded Reload
-; P8-NEXT:    addi r1, r1, 112
-; P8-NEXT:    ld r0, 16(r1)
-; P8-NEXT:    mtlr r0
-; P8-NEXT:    blr
-;
-; CHECK-LABEL: test_trunc32_vec4:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    xxsldwi vs0, vs34, vs34, 3
-; CHECK-NEXT:    xxsldwi vs1, vs34, vs34, 1
-; CHECK-NEXT:    xscvspdpn f0, vs0
-; CHECK-NEXT:    xscvspdpn f1, vs1
-; CHECK-NEXT:    xscvdphp f0, f0
-; CHECK-NEXT:    mffprwz r3, f0
-; CHECK-NEXT:    xxswapd vs0, vs34
-; CHECK-NEXT:    xscvspdpn f0, vs0
-; CHECK-NEXT:    xscvdphp f0, f0
-; CHECK-NEXT:    xscvdphp f1, f1
-; CHECK-NEXT:    mffprwz r4, f1
-; CHECK-NEXT:    xscvspdpn f1, vs34
-; CHECK-NEXT:    xscvdphp f1, f1
-; CHECK-NEXT:    sth r4, 4(r5)
-; CHECK-NEXT:    mffprwz r4, f0
-; CHECK-NEXT:    sth r3, 0(r5)
-; CHECK-NEXT:    sth r4, 2(r5)
-; CHECK-NEXT:    mffprwz r6, f1
-; CHECK-NEXT:    sth r6, 6(r5)
-; CHECK-NEXT:    blr
-;
-; SOFT-LABEL: test_trunc32_vec4:
-; SOFT:       # %bb.0:
-; SOFT-NEXT:    mflr r0
-; SOFT-NEXT:    std r26, -48(r1) # 8-byte Folded Spill
-; SOFT-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
-; SOFT-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
-; SOFT-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
-; SOFT-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; SOFT-NEXT:    stdu r1, -80(r1)
-; SOFT-NEXT:    mr r27, r3
-; SOFT-NEXT:    clrldi r3, r6, 32
-; SOFT-NEXT:    std r0, 96(r1)
-; SOFT-NEXT:    mr r30, r7
-; SOFT-NEXT:    mr r29, r5
-; SOFT-NEXT:    mr r28, r4
-; SOFT-NEXT:    bl __truncsfhf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    mr r26, r3
-; SOFT-NEXT:    clrldi r3, r29, 32
-; SOFT-NEXT:    bl __truncsfhf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    mr r29, r3
-; SOFT-NEXT:    clrldi r3, r28, 32
-; SOFT-NEXT:    bl __truncsfhf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    mr r28, r3
-; SOFT-NEXT:    clrldi r3, r27, 32
-; SOFT-NEXT:    bl __truncsfhf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    clrldi r3, r3, 48
-; SOFT-NEXT:    bl __extendhfsf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    mr r27, r3
-; SOFT-NEXT:    clrldi r3, r28, 48
-; SOFT-NEXT:    bl __extendhfsf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    mr r28, r3
-; SOFT-NEXT:    clrldi r3, r29, 48
-; SOFT-NEXT:    bl __extendhfsf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    mr r29, r3
-; SOFT-NEXT:    clrldi r3, r26, 48
-; SOFT-NEXT:    bl __extendhfsf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    bl __truncsfhf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    sth r3, 6(r30)
-; SOFT-NEXT:    mr r3, r29
-; SOFT-NEXT:    bl __truncsfhf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    sth r3, 4(r30)
-; SOFT-NEXT:    mr r3, r28
-; SOFT-NEXT:    bl __truncsfhf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    sth r3, 2(r30)
-; SOFT-NEXT:    mr r3, r27
-; SOFT-NEXT:    bl __truncsfhf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    sth r3, 0(r30)
-; SOFT-NEXT:    addi r1, r1, 80
-; SOFT-NEXT:    ld r0, 16(r1)
-; SOFT-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
-; SOFT-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
-; SOFT-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
-; SOFT-NEXT:    mtlr r0
-; SOFT-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
-; SOFT-NEXT:    ld r26, -48(r1) # 8-byte Folded Reload
-; SOFT-NEXT:    blr
-  %v = fptrunc <4 x float> %a to <4 x half>
-  store <4 x half> %v, ptr %p
-  ret void
-}
-define void @test_trunc64_vec4(<4 x double> %a, ptr %p) #0 {
-; P8-LABEL: test_trunc64_vec4:
-; P8:       # %bb.0:
-; P8-NEXT:    mflr r0
-; P8-NEXT:    stdu r1, -128(r1)
-; P8-NEXT:    li r3, 48
-; P8-NEXT:    std r0, 144(r1)
-; P8-NEXT:    xxswapd vs1, vs34
-; P8-NEXT:    std r27, 88(r1) # 8-byte Folded Spill
-; P8-NEXT:    std r28, 96(r1) # 8-byte Folded Spill
-; P8-NEXT:    std r29, 104(r1) # 8-byte Folded Spill
-; P8-NEXT:    std r30, 112(r1) # 8-byte Folded Spill
-; P8-NEXT:    mr r30, r7
-; P8-NEXT:    stxvd2x vs62, r1, r3 # 16-byte Folded Spill
-; P8-NEXT:    li r3, 64
-; P8-NEXT:    vmr v30, v2
-; P8-NEXT:    stxvd2x vs63, r1, r3 # 16-byte Folded Spill
-; P8-NEXT:    vmr v31, v3
-; P8-NEXT:    bl __truncdfhf2
-; P8-NEXT:    nop
-; P8-NEXT:    xxswapd vs1, vs63
-; P8-NEXT:    mr r29, r3
-; P8-NEXT:    bl __truncdfhf2
-; P8-NEXT:    nop
-; P8-NEXT:    xxlor f1, vs62, vs62
-; P8-NEXT:    mr r28, r3
-; P8-NEXT:    bl __truncdfhf2
-; P8-NEXT:    nop
-; P8-NEXT:    xxlor f1, vs63, vs63
-; P8-NEXT:    mr r27, r3
-; P8-NEXT:    bl __truncdfhf2
-; P8-NEXT:    nop
-; P8-NEXT:    sth r3, 6(r30)
-; P8-NEXT:    li r3, 64
-; P8-NEXT:    sth r27, 2(r30)
-; P8-NEXT:    ld r27, 88(r1) # 8-byte Folded Reload
-; P8-NEXT:    sth r28, 4(r30)
-; P8-NEXT:    sth r29, 0(r30)
-; P8-NEXT:    ld r30, 112(r1) # 8-byte Folded Reload
-; P8-NEXT:    ld r29, 104(r1) # 8-byte Folded Reload
-; P8-NEXT:    lxvd2x vs63, r1, r3 # 16-byte Folded Reload
-; P8-NEXT:    li r3, 48
-; P8-NEXT:    ld r28, 96(r1) # 8-byte Folded Reload
-; P8-NEXT:    lxvd2x vs62, r1, r3 # 16-byte Folded Reload
-; P8-NEXT:    addi r1, r1, 128
-; P8-NEXT:    ld r0, 16(r1)
-; P8-NEXT:    mtlr r0
-; P8-NEXT:    blr
-;
-; CHECK-LABEL: test_trunc64_vec4:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    xxswapd vs0, vs34
-; CHECK-NEXT:    xscvdphp f0, f0
-; CHECK-NEXT:    mffprwz r3, f0
-; CHECK-NEXT:    xxswapd vs0, vs35
-; CHECK-NEXT:    xscvdphp f0, f0
-; CHECK-NEXT:    xscvdphp f1, vs34
-; CHECK-NEXT:    mffprwz r4, f1
-; CHECK-NEXT:    xscvdphp f1, vs35
-; CHECK-NEXT:    sth r3, 0(r7)
-; CHECK-NEXT:    sth r4, 2(r7)
-; CHECK-NEXT:    mffprwz r4, f0
-; CHECK-NEXT:    sth r4, 4(r7)
-; CHECK-NEXT:    mffprwz r5, f1
-; CHECK-NEXT:    sth r5, 6(r7)
-; CHECK-NEXT:    blr
-;
-; SOFT-LABEL: test_trunc64_vec4:
-; SOFT:       # %bb.0:
-; SOFT-NEXT:    mflr r0
-; SOFT-NEXT:    std r26, -48(r1) # 8-byte Folded Spill
-; SOFT-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
-; SOFT-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
-; SOFT-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
-; SOFT-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; SOFT-NEXT:    stdu r1, -80(r1)
-; SOFT-NEXT:    mr r27, r3
-; SOFT-NEXT:    mr r3, r6
-; SOFT-NEXT:    std r0, 96(r1)
-; SOFT-NEXT:    mr r30, r7
-; SOFT-NEXT:    mr r29, r5
-; SOFT-NEXT:    mr r28, r4
-; SOFT-NEXT:    bl __truncdfhf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    mr r26, r3
-; SOFT-NEXT:    mr r3, r29
-; SOFT-NEXT:    bl __truncdfhf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    mr r29, r3
-; SOFT-NEXT:    mr r3, r28
-; SOFT-NEXT:    bl __truncdfhf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    mr r28, r3
-; SOFT-NEXT:    mr r3, r27
-; SOFT-NEXT:    bl __truncdfhf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    clrldi r3, r3, 48
-; SOFT-NEXT:    bl __extendhfsf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    mr r27, r3
-; SOFT-NEXT:    clrldi r3, r28, 48
-; SOFT-NEXT:    bl __extendhfsf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    mr r28, r3
-; SOFT-NEXT:    clrldi r3, r29, 48
-; SOFT-NEXT:    bl __extendhfsf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    mr r29, r3
-; SOFT-NEXT:    clrldi r3, r26, 48
-; SOFT-NEXT:    bl __extendhfsf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    bl __truncsfhf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    sth r3, 6(r30)
-; SOFT-NEXT:    mr r3, r29
-; SOFT-NEXT:    bl __truncsfhf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    sth r3, 4(r30)
-; SOFT-NEXT:    mr r3, r28
-; SOFT-NEXT:    bl __truncsfhf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    sth r3, 2(r30)
-; SOFT-NEXT:    mr r3, r27
-; SOFT-NEXT:    bl __truncsfhf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    sth r3, 0(r30)
-; SOFT-NEXT:    addi r1, r1, 80
-; SOFT-NEXT:    ld r0, 16(r1)
-; SOFT-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
-; SOFT-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
-; SOFT-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
-; SOFT-NEXT:    mtlr r0
-; SOFT-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
-; SOFT-NEXT:    ld r26, -48(r1) # 8-byte Folded Reload
-; SOFT-NEXT:    blr
-  %v = fptrunc <4 x double> %a to <4 x half>
-  store <4 x half> %v, ptr %p
-  ret void
-}
-define float @test_sitofp_fadd_i32(i32 %a, ptr %b) #0 {
-; P8-LABEL: test_sitofp_fadd_i32:
-; P8:       # %bb.0:
-; P8-NEXT:    mflr r0
-; P8-NEXT:    std r30, -24(r1) # 8-byte Folded Spill
-; P8-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
-; P8-NEXT:    stdu r1, -64(r1)
-; P8-NEXT:    std r0, 80(r1)
-; P8-NEXT:    mr r30, r3
-; P8-NEXT:    lhz r3, 0(r4)
-; P8-NEXT:    bl __extendhfsf2
-; P8-NEXT:    nop
-; P8-NEXT:    mtfprwa f0, r30
-; P8-NEXT:    fmr f31, f1
-; P8-NEXT:    xscvsxdsp f1, f0
-; P8-NEXT:    bl __truncsfhf2
-; P8-NEXT:    nop
-; P8-NEXT:    clrldi r3, r3, 48
-; P8-NEXT:    bl __extendhfsf2
-; P8-NEXT:    nop
-; P8-NEXT:    xsaddsp f1, f31, f1
-; P8-NEXT:    addi r1, r1, 64
-; P8-NEXT:    ld r0, 16(r1)
-; P8-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
-; P8-NEXT:    ld r30, -24(r1) # 8-byte Folded Reload
-; P8-NEXT:    mtlr r0
-; P8-NEXT:    blr
-;
-; CHECK-LABEL: test_sitofp_fadd_i32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    mtfprwa f1, r3
-; CHECK-NEXT:    lhz r4, 0(r4)
-; CHECK-NEXT:    xscvsxdsp f1, f1
-; CHECK-NEXT:    mtfprwz f0, r4
-; CHECK-NEXT:    xscvhpdp f0, f0
-; CHECK-NEXT:    xscvdphp f1, f1
-; CHECK-NEXT:    mffprwz r3, f1
-; CHECK-NEXT:    clrlwi r3, r3, 16
-; CHECK-NEXT:    mtfprwz f1, r3
-; CHECK-NEXT:    xscvhpdp f1, f1
-; CHECK-NEXT:    xsaddsp f1, f0, f1
-; CHECK-NEXT:    blr
-;
-; SOFT-LABEL: test_sitofp_fadd_i32:
-; SOFT:       # %bb.0:
-; SOFT-NEXT:    mflr r0
-; SOFT-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
-; SOFT-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; SOFT-NEXT:    stdu r1, -64(r1)
-; SOFT-NEXT:    std r0, 80(r1)
-; SOFT-NEXT:    mr r30, r3
-; SOFT-NEXT:    lhz r3, 0(r4)
-; SOFT-NEXT:    bl __extendhfsf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    mr r29, r3
-; SOFT-NEXT:    extsw r3, r30
-; SOFT-NEXT:    bl __floatsisf
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    clrldi r3, r3, 32
-; SOFT-NEXT:    bl __truncsfhf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    clrldi r3, r3, 48
-; SOFT-NEXT:    bl __extendhfsf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    mr r4, r3
-; SOFT-NEXT:    mr r3, r29
-; SOFT-NEXT:    bl __addsf3
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    addi r1, r1, 64
-; SOFT-NEXT:    ld r0, 16(r1)
-; SOFT-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
-; SOFT-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
-; SOFT-NEXT:    mtlr r0
-; SOFT-NEXT:    blr
-  %tmp0 = load half, ptr %b
-  %tmp1 = sitofp i32 %a to half
-  %tmp2 = fadd half %tmp0, %tmp1
-  %tmp3 = fpext half %tmp2 to float
-  ret float %tmp3
-}
-define half @PR40273(half) #0 {
-; P8-LABEL: PR40273:
-; P8:       # %bb.0:
-; P8-NEXT:    mflr r0
-; P8-NEXT:    stdu r1, -32(r1)
-; P8-NEXT:    std r0, 48(r1)
-; P8-NEXT:    bl __truncsfhf2
-; P8-NEXT:    nop
-; P8-NEXT:    clrldi r3, r3, 48
-; P8-NEXT:    bl __extendhfsf2
-; P8-NEXT:    nop
-; P8-NEXT:    fmr f0, f1
-; P8-NEXT:    xxlxor f1, f1, f1
-; P8-NEXT:    fcmpu cr0, f0, f1
-; P8-NEXT:    beq cr0, .LBB20_2
-; P8-NEXT:  # %bb.1:
-; P8-NEXT:    vspltisw v2, 1
-; P8-NEXT:    xvcvsxwdp vs1, vs34
-; P8-NEXT:  .LBB20_2:
-; P8-NEXT:    addi r1, r1, 32
-; P8-NEXT:    ld r0, 16(r1)
-; P8-NEXT:    mtlr r0
-; P8-NEXT:    blr
-;
-; CHECK-LABEL: PR40273:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    xscvdphp f0, f1
-; CHECK-NEXT:    xxlxor f1, f1, f1
-; CHECK-NEXT:    mffprwz r3, f0
-; CHECK-NEXT:    clrlwi r3, r3, 16
-; CHECK-NEXT:    mtfprwz f0, r3
-; CHECK-NEXT:    xscvhpdp f0, f0
-; CHECK-NEXT:    fcmpu cr0, f0, f1
-; CHECK-NEXT:    beqlr cr0
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    vspltisw v2, 1
-; CHECK-NEXT:    xvcvsxwdp vs1, vs34
-; CHECK-NEXT:    blr
-;
-; SOFT-LABEL: PR40273:
-; SOFT:       # %bb.0:
-; SOFT-NEXT:    mflr r0
-; SOFT-NEXT:    stdu r1, -32(r1)
-; SOFT-NEXT:    clrldi r3, r3, 48
-; SOFT-NEXT:    std r0, 48(r1)
-; SOFT-NEXT:    bl __extendhfsf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    li r4, 0
-; SOFT-NEXT:    bl __nesf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    cmplwi r3, 0
-; SOFT-NEXT:    lis r3, 16256
-; SOFT-NEXT:    iseleq r3, 0, r3
-; SOFT-NEXT:    bl __truncsfhf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    addi r1, r1, 32
-; SOFT-NEXT:    ld r0, 16(r1)
-; SOFT-NEXT:    mtlr r0
-; SOFT-NEXT:    blr
-  %2 = fcmp une half %0, 0xH0000
-  %3 = uitofp i1 %2 to half
-  ret half %3
-}
-attributes #0 = { nounwind }

>From 77d56282a573bcd028c693b809c4e65840c65640 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross at umich.edu>
Date: Wed, 6 Aug 2025 07:21:41 +0000
Subject: [PATCH 3/3] [PowerPC] Change `half` to use soft promotion rather than
 `PromoteFloat`

On PowerPC targets, `half` uses the default legalization of promoting to
a `f32`. However, this has some fundamental issues related to inability
to round trip. Resolve this by switching to the soft legalization, which
passes `f16` as an `i16`.

The PowerPC ABI Specification does not define a `_Float16` type, so the
calling convention changes are acceptable.

Fixes the PowerPC portion of [1]. A similar change was done for MIPS in
f0231b6164fd ("[MIPS] Use softPromoteHalf legalization for fp16 rather
than PromoteFloat (#110199)") and for Loongarch in 13280d99aec5
("[loongarch][DAG][FREEZE] Fix crash when FREEZE a half(f16) type on
loongarch (#107791)").

[1]: https://github.com/llvm/llvm-project/issues/97975
---
 llvm/docs/ReleaseNotes.md                  |    2 +
 llvm/lib/Target/PowerPC/PPCISelLowering.h  |    2 +
 llvm/test/CodeGen/Generic/half.ll          |    6 +-
 llvm/test/CodeGen/PowerPC/atomics.ll       |   62 +-
 llvm/test/CodeGen/PowerPC/f128-conv.ll     |   13 +-
 llvm/test/CodeGen/PowerPC/half.ll          |  783 ++--
 llvm/test/CodeGen/PowerPC/ldexp.ll         |    6 +-
 llvm/test/CodeGen/PowerPC/llvm.frexp.ll    |  143 +-
 llvm/test/CodeGen/PowerPC/llvm.modf.ll     |   83 +-
 llvm/test/CodeGen/PowerPC/pr48519.ll       |  105 +-
 llvm/test/CodeGen/PowerPC/pr49092.ll       |   12 -
 llvm/test/CodeGen/PowerPC/vector-llrint.ll | 3854 ++++++++------------
 llvm/test/CodeGen/PowerPC/vector-lrint.ll  | 3852 ++++++++-----------
 13 files changed, 3301 insertions(+), 5622 deletions(-)

diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md
index 16174553ba7f2..95b20dd822abf 100644
--- a/llvm/docs/ReleaseNotes.md
+++ b/llvm/docs/ReleaseNotes.md
@@ -113,6 +113,8 @@ Changes to the MIPS Backend
 Changes to the PowerPC Backend
 ------------------------------
 
+* `half` now uses a soft float ABI, which works correctly in more cases.
+
 Changes to the RISC-V Backend
 -----------------------------
 
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 669430550f4e6..f50c6adc2c883 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -790,6 +790,8 @@ namespace llvm {
 
     bool useSoftFloat() const override;
 
+    bool softPromoteHalfType() const override { return true; }
+
     bool hasSPE() const;
 
     MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
diff --git a/llvm/test/CodeGen/Generic/half.ll b/llvm/test/CodeGen/Generic/half.ll
index ef7bfe2f2d9ce..f19d6da4eeb7b 100644
--- a/llvm/test/CodeGen/Generic/half.ll
+++ b/llvm/test/CodeGen/Generic/half.ll
@@ -29,9 +29,9 @@
 ; RUN: %if mips-registered-target        %{ llc %s -o - -mtriple=mipsel-unknown-linux-gnu        | FileCheck %s --check-prefixes=ALL,CHECK %}
 ; RUN: %if msp430-registered-target      %{ llc %s -o - -mtriple=msp430-none-elf                 | FileCheck %s --check-prefixes=ALL,CHECK %}
 ; RUN: %if nvptx-registered-target       %{ llc %s -o - -mtriple=nvptx64-nvidia-cuda             | FileCheck %s --check-prefixes=NOCRASH   %}
-; RUN: %if powerpc-registered-target     %{ llc %s -o - -mtriple=powerpc-unknown-linux-gnu       | FileCheck %s --check-prefixes=ALL,BAD   %}
-; RUN: %if powerpc-registered-target     %{ llc %s -o - -mtriple=powerpc64-unknown-linux-gnu     | FileCheck %s --check-prefixes=ALL,BAD   %}
-; RUN: %if powerpc-registered-target     %{ llc %s -o - -mtriple=powerpc64le-unknown-linux-gnu   | FileCheck %s --check-prefixes=ALL,BAD   %}
+; RUN: %if powerpc-registered-target     %{ llc %s -o - -mtriple=powerpc-unknown-linux-gnu       | FileCheck %s --check-prefixes=ALL,CHECK %}
+; RUN: %if powerpc-registered-target     %{ llc %s -o - -mtriple=powerpc64-unknown-linux-gnu     | FileCheck %s --check-prefixes=ALL,CHECK %}
+; RUN: %if powerpc-registered-target     %{ llc %s -o - -mtriple=powerpc64le-unknown-linux-gnu   | FileCheck %s --check-prefixes=ALL,CHECK %}
 ; RUN: %if riscv-registered-target       %{ llc %s -o - -mtriple=riscv32-unknown-linux-gnu       | FileCheck %s --check-prefixes=ALL,CHECK %}
 ; RUN: %if riscv-registered-target       %{ llc %s -o - -mtriple=riscv64-unknown-linux-gnu       | FileCheck %s --check-prefixes=ALL,CHECK %}
 ; RUN: %if sparc-registered-target       %{ llc %s -o - -mtriple=sparc-unknown-linux-gnu         | FileCheck %s --check-prefixes=ALL,CHECK %}
diff --git a/llvm/test/CodeGen/PowerPC/atomics.ll b/llvm/test/CodeGen/PowerPC/atomics.ll
index 183c8e1323f2e..c2c4a6ded6c86 100644
--- a/llvm/test/CodeGen/PowerPC/atomics.ll
+++ b/llvm/test/CodeGen/PowerPC/atomics.ll
@@ -469,39 +469,20 @@ define i64 @and_i64_release(ptr %mem, i64 %operand) {
 define half @load_atomic_f16__seq_cst(ptr %ptr) {
 ; PPC32-LABEL: load_atomic_f16__seq_cst:
 ; PPC32:       # %bb.0:
-; PPC32-NEXT:    mflr r0
-; PPC32-NEXT:    stwu r1, -16(r1)
-; PPC32-NEXT:    stw r0, 20(r1)
-; PPC32-NEXT:    .cfi_def_cfa_offset 16
-; PPC32-NEXT:    .cfi_offset lr, 4
 ; PPC32-NEXT:    sync
 ; PPC32-NEXT:    lhz r3, 0(r3)
 ; PPC32-NEXT:    cmpw cr7, r3, r3
 ; PPC32-NEXT:    bne- cr7, .+4
 ; PPC32-NEXT:    isync
-; PPC32-NEXT:    bl __extendhfsf2
-; PPC32-NEXT:    lwz r0, 20(r1)
-; PPC32-NEXT:    addi r1, r1, 16
-; PPC32-NEXT:    mtlr r0
 ; PPC32-NEXT:    blr
 ;
 ; PPC64-LABEL: load_atomic_f16__seq_cst:
 ; PPC64:       # %bb.0:
-; PPC64-NEXT:    mflr r0
-; PPC64-NEXT:    stdu r1, -112(r1)
-; PPC64-NEXT:    std r0, 128(r1)
-; PPC64-NEXT:    .cfi_def_cfa_offset 112
-; PPC64-NEXT:    .cfi_offset lr, 16
 ; PPC64-NEXT:    sync
 ; PPC64-NEXT:    lhz r3, 0(r3)
 ; PPC64-NEXT:    cmpd cr7, r3, r3
 ; PPC64-NEXT:    bne- cr7, .+4
 ; PPC64-NEXT:    isync
-; PPC64-NEXT:    bl __extendhfsf2
-; PPC64-NEXT:    nop
-; PPC64-NEXT:    addi r1, r1, 112
-; PPC64-NEXT:    ld r0, 16(r1)
-; PPC64-NEXT:    mtlr r0
 ; PPC64-NEXT:    blr
   %val = load atomic half, ptr %ptr seq_cst, align 2
   ret half %val
@@ -575,44 +556,11 @@ define double @load_atomic_f64__seq_cst(ptr %ptr) {
 }
 
 define void @store_atomic_f16__seq_cst(ptr %ptr, half %val1) {
-; PPC32-LABEL: store_atomic_f16__seq_cst:
-; PPC32:       # %bb.0:
-; PPC32-NEXT:    mflr r0
-; PPC32-NEXT:    stwu r1, -16(r1)
-; PPC32-NEXT:    stw r0, 20(r1)
-; PPC32-NEXT:    .cfi_def_cfa_offset 16
-; PPC32-NEXT:    .cfi_offset lr, 4
-; PPC32-NEXT:    .cfi_offset r30, -8
-; PPC32-NEXT:    stw r30, 8(r1) # 4-byte Folded Spill
-; PPC32-NEXT:    mr r30, r3
-; PPC32-NEXT:    bl __truncsfhf2
-; PPC32-NEXT:    sync
-; PPC32-NEXT:    sth r3, 0(r30)
-; PPC32-NEXT:    lwz r30, 8(r1) # 4-byte Folded Reload
-; PPC32-NEXT:    lwz r0, 20(r1)
-; PPC32-NEXT:    addi r1, r1, 16
-; PPC32-NEXT:    mtlr r0
-; PPC32-NEXT:    blr
-;
-; PPC64-LABEL: store_atomic_f16__seq_cst:
-; PPC64:       # %bb.0:
-; PPC64-NEXT:    mflr r0
-; PPC64-NEXT:    stdu r1, -128(r1)
-; PPC64-NEXT:    std r0, 144(r1)
-; PPC64-NEXT:    .cfi_def_cfa_offset 128
-; PPC64-NEXT:    .cfi_offset lr, 16
-; PPC64-NEXT:    .cfi_offset r30, -16
-; PPC64-NEXT:    std r30, 112(r1) # 8-byte Folded Spill
-; PPC64-NEXT:    mr r30, r3
-; PPC64-NEXT:    bl __truncsfhf2
-; PPC64-NEXT:    nop
-; PPC64-NEXT:    sync
-; PPC64-NEXT:    sth r3, 0(r30)
-; PPC64-NEXT:    ld r30, 112(r1) # 8-byte Folded Reload
-; PPC64-NEXT:    addi r1, r1, 128
-; PPC64-NEXT:    ld r0, 16(r1)
-; PPC64-NEXT:    mtlr r0
-; PPC64-NEXT:    blr
+; CHECK-LABEL: store_atomic_f16__seq_cst:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    sync
+; CHECK-NEXT:    sth r4, 0(r3)
+; CHECK-NEXT:    blr
   store atomic half %val1, ptr %ptr seq_cst, align 2
   ret void
 }
diff --git a/llvm/test/CodeGen/PowerPC/f128-conv.ll b/llvm/test/CodeGen/PowerPC/f128-conv.ll
index f8b2861156db4..080843217e8c9 100644
--- a/llvm/test/CodeGen/PowerPC/f128-conv.ll
+++ b/llvm/test/CodeGen/PowerPC/f128-conv.ll
@@ -1349,9 +1349,6 @@ define half @trunc(fp128 %a) nounwind {
 ; CHECK-NEXT:    std r0, 48(r1)
 ; CHECK-NEXT:    bl __trunckfhf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrlwi r3, r3, 16
-; CHECK-NEXT:    mtfprwz f0, r3
-; CHECK-NEXT:    xscvhpdp f1, f0
 ; CHECK-NEXT:    addi r1, r1, 32
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
@@ -1364,9 +1361,6 @@ define half @trunc(fp128 %a) nounwind {
 ; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    bl __trunckfhf2
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    clrldi r3, r3, 48
-; CHECK-P8-NEXT:    bl __extendhfsf2
-; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    addi r1, r1, 32
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    mtlr r0
@@ -1379,7 +1373,9 @@ entry:
 define fp128 @ext(half %a) nounwind {
 ; CHECK-LABEL: ext:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xscpsgndp v2, f1, f1
+; CHECK-NEXT:    clrlwi r3, r3, 16
+; CHECK-NEXT:    mtfprwz f0, r3
+; CHECK-NEXT:    xscvhpdp v2, f0
 ; CHECK-NEXT:    xscvdpqp v2, v2
 ; CHECK-NEXT:    blr
 ;
@@ -1387,7 +1383,10 @@ define fp128 @ext(half %a) nounwind {
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    clrldi r3, r3, 48
 ; CHECK-P8-NEXT:    std r0, 48(r1)
+; CHECK-P8-NEXT:    bl __extendhfsf2
+; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    bl __extendsfkf2
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    addi r1, r1, 32
diff --git a/llvm/test/CodeGen/PowerPC/half.ll b/llvm/test/CodeGen/PowerPC/half.ll
index fe0dccf63af80..3cd4f8b5ff9b8 100644
--- a/llvm/test/CodeGen/PowerPC/half.ll
+++ b/llvm/test/CodeGen/PowerPC/half.ll
@@ -20,39 +20,17 @@
 define void @store(half %x, ptr %p) nounwind {
 ; PPC32-LABEL: store:
 ; PPC32:       # %bb.0:
-; PPC32-NEXT:    mflr r0
-; PPC32-NEXT:    stwu r1, -16(r1)
-; PPC32-NEXT:    stw r0, 20(r1)
-; PPC32-NEXT:    stw r30, 8(r1) # 4-byte Folded Spill
-; PPC32-NEXT:    mr r30, r3
-; PPC32-NEXT:    bl __truncsfhf2
-; PPC32-NEXT:    sth r3, 0(r30)
-; PPC32-NEXT:    lwz r30, 8(r1) # 4-byte Folded Reload
-; PPC32-NEXT:    lwz r0, 20(r1)
-; PPC32-NEXT:    addi r1, r1, 16
-; PPC32-NEXT:    mtlr r0
+; PPC32-NEXT:    sth r3, 0(r4)
 ; PPC32-NEXT:    blr
 ;
 ; P8-LABEL: store:
 ; P8:       # %bb.0:
-; P8-NEXT:    mflr r0
-; P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; P8-NEXT:    stdu r1, -48(r1)
-; P8-NEXT:    std r0, 64(r1)
-; P8-NEXT:    mr r30, r4
-; P8-NEXT:    bl __truncsfhf2
-; P8-NEXT:    nop
-; P8-NEXT:    sth r3, 0(r30)
-; P8-NEXT:    addi r1, r1, 48
-; P8-NEXT:    ld r0, 16(r1)
-; P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
-; P8-NEXT:    mtlr r0
+; P8-NEXT:    sth r3, 0(r4)
 ; P8-NEXT:    blr
 ;
 ; CHECK-LABEL: store:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xscvdphp f0, f1
-; CHECK-NEXT:    stxsihx f0, 0, r4
+; CHECK-NEXT:    sth r3, 0(r4)
 ; CHECK-NEXT:    blr
 ;
 ; SOFT-LABEL: store:
@@ -62,18 +40,7 @@ define void @store(half %x, ptr %p) nounwind {
 ;
 ; BE-LABEL: store:
 ; BE:       # %bb.0:
-; BE-NEXT:    mflr r0
-; BE-NEXT:    stdu r1, -128(r1)
-; BE-NEXT:    std r0, 144(r1)
-; BE-NEXT:    std r30, 112(r1) # 8-byte Folded Spill
-; BE-NEXT:    mr r30, r4
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    sth r3, 0(r30)
-; BE-NEXT:    ld r30, 112(r1) # 8-byte Folded Reload
-; BE-NEXT:    addi r1, r1, 128
-; BE-NEXT:    ld r0, 16(r1)
-; BE-NEXT:    mtlr r0
+; BE-NEXT:    sth r3, 0(r4)
 ; BE-NEXT:    blr
   store half %x, ptr %p
   ret void
@@ -82,33 +49,17 @@ define void @store(half %x, ptr %p) nounwind {
 define half @return(ptr %p) nounwind {
 ; PPC32-LABEL: return:
 ; PPC32:       # %bb.0:
-; PPC32-NEXT:    mflr r0
-; PPC32-NEXT:    stwu r1, -16(r1)
-; PPC32-NEXT:    stw r0, 20(r1)
 ; PPC32-NEXT:    lhz r3, 0(r3)
-; PPC32-NEXT:    bl __extendhfsf2
-; PPC32-NEXT:    lwz r0, 20(r1)
-; PPC32-NEXT:    addi r1, r1, 16
-; PPC32-NEXT:    mtlr r0
 ; PPC32-NEXT:    blr
 ;
 ; P8-LABEL: return:
 ; P8:       # %bb.0:
-; P8-NEXT:    mflr r0
-; P8-NEXT:    stdu r1, -32(r1)
-; P8-NEXT:    std r0, 48(r1)
 ; P8-NEXT:    lhz r3, 0(r3)
-; P8-NEXT:    bl __extendhfsf2
-; P8-NEXT:    nop
-; P8-NEXT:    addi r1, r1, 32
-; P8-NEXT:    ld r0, 16(r1)
-; P8-NEXT:    mtlr r0
 ; P8-NEXT:    blr
 ;
 ; CHECK-LABEL: return:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lxsihzx f0, 0, r3
-; CHECK-NEXT:    xscvhpdp f1, f0
+; CHECK-NEXT:    lhz r3, 0(r3)
 ; CHECK-NEXT:    blr
 ;
 ; SOFT-LABEL: return:
@@ -118,15 +69,7 @@ define half @return(ptr %p) nounwind {
 ;
 ; BE-LABEL: return:
 ; BE:       # %bb.0:
-; BE-NEXT:    mflr r0
-; BE-NEXT:    stdu r1, -112(r1)
-; BE-NEXT:    std r0, 128(r1)
 ; BE-NEXT:    lhz r3, 0(r3)
-; BE-NEXT:    bl __extendhfsf2
-; BE-NEXT:    nop
-; BE-NEXT:    addi r1, r1, 112
-; BE-NEXT:    ld r0, 16(r1)
-; BE-NEXT:    mtlr r0
 ; BE-NEXT:    blr
   %r = load half, ptr %p
   ret half %r
@@ -316,11 +259,6 @@ define dso_local void @stored(ptr nocapture %a, double %b) local_unnamed_addr no
 ; SOFT-NEXT:    std r0, 64(r1)
 ; SOFT-NEXT:    bl __truncdfhf2
 ; SOFT-NEXT:    nop
-; SOFT-NEXT:    clrldi r3, r3, 48
-; SOFT-NEXT:    bl __extendhfsf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    bl __truncsfhf2
-; SOFT-NEXT:    nop
 ; SOFT-NEXT:    sth r3, 0(r30)
 ; SOFT-NEXT:    addi r1, r1, 48
 ; SOFT-NEXT:    ld r0, 16(r1)
@@ -399,11 +337,6 @@ define dso_local void @storef(ptr nocapture %a, float %b) local_unnamed_addr nou
 ; SOFT-NEXT:    std r0, 64(r1)
 ; SOFT-NEXT:    bl __truncsfhf2
 ; SOFT-NEXT:    nop
-; SOFT-NEXT:    clrldi r3, r3, 48
-; SOFT-NEXT:    bl __extendhfsf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    bl __truncsfhf2
-; SOFT-NEXT:    nop
 ; SOFT-NEXT:    sth r3, 0(r30)
 ; SOFT-NEXT:    addi r1, r1, 48
 ; SOFT-NEXT:    ld r0, 16(r1)
@@ -454,21 +387,8 @@ define void @test_load_store(ptr %in, ptr %out) nounwind {
 ;
 ; SOFT-LABEL: test_load_store:
 ; SOFT:       # %bb.0:
-; SOFT-NEXT:    mflr r0
-; SOFT-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; SOFT-NEXT:    stdu r1, -48(r1)
-; SOFT-NEXT:    std r0, 64(r1)
-; SOFT-NEXT:    mr r30, r4
 ; SOFT-NEXT:    lhz r3, 0(r3)
-; SOFT-NEXT:    bl __extendhfsf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    bl __truncsfhf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    sth r3, 0(r30)
-; SOFT-NEXT:    addi r1, r1, 48
-; SOFT-NEXT:    ld r0, 16(r1)
-; SOFT-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
-; SOFT-NEXT:    mtlr r0
+; SOFT-NEXT:    sth r3, 0(r4)
 ; SOFT-NEXT:    blr
 ;
 ; BE-LABEL: test_load_store:
@@ -544,34 +464,14 @@ define void @test_bitcast_to_half(ptr %addr, i16 %in) nounwind {
 define half @from_bits(i16 %x) nounwind {
 ; PPC32-LABEL: from_bits:
 ; PPC32:       # %bb.0:
-; PPC32-NEXT:    mflr r0
-; PPC32-NEXT:    stwu r1, -16(r1)
-; PPC32-NEXT:    clrlwi r3, r3, 16
-; PPC32-NEXT:    stw r0, 20(r1)
-; PPC32-NEXT:    bl __extendhfsf2
-; PPC32-NEXT:    lwz r0, 20(r1)
-; PPC32-NEXT:    addi r1, r1, 16
-; PPC32-NEXT:    mtlr r0
 ; PPC32-NEXT:    blr
 ;
 ; P8-LABEL: from_bits:
 ; P8:       # %bb.0:
-; P8-NEXT:    mflr r0
-; P8-NEXT:    stdu r1, -32(r1)
-; P8-NEXT:    clrldi r3, r3, 48
-; P8-NEXT:    std r0, 48(r1)
-; P8-NEXT:    bl __extendhfsf2
-; P8-NEXT:    nop
-; P8-NEXT:    addi r1, r1, 32
-; P8-NEXT:    ld r0, 16(r1)
-; P8-NEXT:    mtlr r0
 ; P8-NEXT:    blr
 ;
 ; CHECK-LABEL: from_bits:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    clrlwi r3, r3, 16
-; CHECK-NEXT:    mtfprwz f0, r3
-; CHECK-NEXT:    xscvhpdp f1, f0
 ; CHECK-NEXT:    blr
 ;
 ; SOFT-LABEL: from_bits:
@@ -580,15 +480,6 @@ define half @from_bits(i16 %x) nounwind {
 ;
 ; BE-LABEL: from_bits:
 ; BE:       # %bb.0:
-; BE-NEXT:    mflr r0
-; BE-NEXT:    stdu r1, -112(r1)
-; BE-NEXT:    clrldi r3, r3, 48
-; BE-NEXT:    std r0, 128(r1)
-; BE-NEXT:    bl __extendhfsf2
-; BE-NEXT:    nop
-; BE-NEXT:    addi r1, r1, 112
-; BE-NEXT:    ld r0, 16(r1)
-; BE-NEXT:    mtlr r0
 ; BE-NEXT:    blr
   %res = bitcast i16 %x to half
   ret half %res
@@ -597,34 +488,14 @@ define half @from_bits(i16 %x) nounwind {
 define i16 @to_bits(half %x) nounwind {
 ; PPC32-LABEL: to_bits:
 ; PPC32:       # %bb.0:
-; PPC32-NEXT:    mflr r0
-; PPC32-NEXT:    stwu r1, -16(r1)
-; PPC32-NEXT:    stw r0, 20(r1)
-; PPC32-NEXT:    bl __truncsfhf2
-; PPC32-NEXT:    clrlwi r3, r3, 16
-; PPC32-NEXT:    lwz r0, 20(r1)
-; PPC32-NEXT:    addi r1, r1, 16
-; PPC32-NEXT:    mtlr r0
 ; PPC32-NEXT:    blr
 ;
 ; P8-LABEL: to_bits:
 ; P8:       # %bb.0:
-; P8-NEXT:    mflr r0
-; P8-NEXT:    stdu r1, -32(r1)
-; P8-NEXT:    std r0, 48(r1)
-; P8-NEXT:    bl __truncsfhf2
-; P8-NEXT:    nop
-; P8-NEXT:    clrldi r3, r3, 48
-; P8-NEXT:    addi r1, r1, 32
-; P8-NEXT:    ld r0, 16(r1)
-; P8-NEXT:    mtlr r0
 ; P8-NEXT:    blr
 ;
 ; CHECK-LABEL: to_bits:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xscvdphp f0, f1
-; CHECK-NEXT:    mffprwz r3, f0
-; CHECK-NEXT:    clrlwi r3, r3, 16
 ; CHECK-NEXT:    blr
 ;
 ; SOFT-LABEL: to_bits:
@@ -633,15 +504,6 @@ define i16 @to_bits(half %x) nounwind {
 ;
 ; BE-LABEL: to_bits:
 ; BE:       # %bb.0:
-; BE-NEXT:    mflr r0
-; BE-NEXT:    stdu r1, -112(r1)
-; BE-NEXT:    std r0, 128(r1)
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r3, 48
-; BE-NEXT:    addi r1, r1, 112
-; BE-NEXT:    ld r0, 16(r1)
-; BE-NEXT:    mtlr r0
 ; BE-NEXT:    blr
     %res = bitcast half %x to i16
     ret i16 %res
@@ -819,11 +681,6 @@ define void @test_trunc32(float %in, ptr %addr) nounwind {
 ; SOFT-NEXT:    mr r30, r4
 ; SOFT-NEXT:    bl __truncsfhf2
 ; SOFT-NEXT:    nop
-; SOFT-NEXT:    clrldi r3, r3, 48
-; SOFT-NEXT:    bl __extendhfsf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    bl __truncsfhf2
-; SOFT-NEXT:    nop
 ; SOFT-NEXT:    sth r3, 0(r30)
 ; SOFT-NEXT:    addi r1, r1, 48
 ; SOFT-NEXT:    ld r0, 16(r1)
@@ -897,11 +754,6 @@ define void @test_trunc64(double %in, ptr %addr) nounwind {
 ; SOFT-NEXT:    mr r30, r4
 ; SOFT-NEXT:    bl __truncdfhf2
 ; SOFT-NEXT:    nop
-; SOFT-NEXT:    clrldi r3, r3, 48
-; SOFT-NEXT:    bl __extendhfsf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    bl __truncsfhf2
-; SOFT-NEXT:    nop
 ; SOFT-NEXT:    sth r3, 0(r30)
 ; SOFT-NEXT:    addi r1, r1, 48
 ; SOFT-NEXT:    ld r0, 16(r1)
@@ -1056,11 +908,6 @@ define void @test_sitofp_i64(i64 %a, ptr %p) nounwind {
 ; SOFT-NEXT:    clrldi r3, r3, 32
 ; SOFT-NEXT:    bl __truncsfhf2
 ; SOFT-NEXT:    nop
-; SOFT-NEXT:    clrldi r3, r3, 48
-; SOFT-NEXT:    bl __extendhfsf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    bl __truncsfhf2
-; SOFT-NEXT:    nop
 ; SOFT-NEXT:    sth r3, 0(r30)
 ; SOFT-NEXT:    addi r1, r1, 48
 ; SOFT-NEXT:    ld r0, 16(r1)
@@ -1243,11 +1090,6 @@ define void @test_uitofp_i64(i64 %a, ptr %p) nounwind {
 ; SOFT-NEXT:    nop
 ; SOFT-NEXT:    bl __truncsfhf2
 ; SOFT-NEXT:    nop
-; SOFT-NEXT:    clrldi r3, r3, 48
-; SOFT-NEXT:    bl __extendhfsf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    bl __truncsfhf2
-; SOFT-NEXT:    nop
 ; SOFT-NEXT:    sth r3, 0(r30)
 ; SOFT-NEXT:    addi r1, r1, 48
 ; SOFT-NEXT:    ld r0, 16(r1)
@@ -1354,67 +1196,89 @@ define <4 x float> @test_extend32_vec4(ptr %p) nounwind {
 ; P8-LABEL: test_extend32_vec4:
 ; P8:       # %bb.0:
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    stdu r1, -112(r1)
-; P8-NEXT:    li r4, 48
-; P8-NEXT:    std r0, 128(r1)
-; P8-NEXT:    std r30, 96(r1) # 8-byte Folded Spill
-; P8-NEXT:    mr r30, r3
-; P8-NEXT:    lhz r3, 6(r3)
-; P8-NEXT:    stxvd2x vs61, r1, r4 # 16-byte Folded Spill
-; P8-NEXT:    li r4, 64
-; P8-NEXT:    stxvd2x vs62, r1, r4 # 16-byte Folded Spill
+; P8-NEXT:    stdu r1, -144(r1)
 ; P8-NEXT:    li r4, 80
+; P8-NEXT:    std r0, 160(r1)
+; P8-NEXT:    std r29, 120(r1) # 8-byte Folded Spill
+; P8-NEXT:    std r30, 128(r1) # 8-byte Folded Spill
+; P8-NEXT:    stxvd2x vs62, r1, r4 # 16-byte Folded Spill
+; P8-NEXT:    li r4, 96
 ; P8-NEXT:    stxvd2x vs63, r1, r4 # 16-byte Folded Spill
+; P8-NEXT:    lwz r4, 4(r3)
+; P8-NEXT:    stw r4, 64(r1)
+; P8-NEXT:    lwz r3, 0(r3)
+; P8-NEXT:    stw r3, 48(r1)
+; P8-NEXT:    addi r3, r1, 64
+; P8-NEXT:    lxvd2x vs62, 0, r3
+; P8-NEXT:    addi r3, r1, 48
+; P8-NEXT:    lxvd2x vs0, 0, r3
+; P8-NEXT:    mffprd r30, f0
+; P8-NEXT:    clrldi r3, r30, 48
+; P8-NEXT:    clrlwi r3, r3, 16
 ; P8-NEXT:    bl __extendhfsf2
 ; P8-NEXT:    nop
-; P8-NEXT:    lhz r3, 2(r30)
+; P8-NEXT:    mfvsrd r29, vs62
 ; P8-NEXT:    xxlor vs63, f1, f1
+; P8-NEXT:    clrldi r3, r29, 48
+; P8-NEXT:    clrlwi r3, r3, 16
 ; P8-NEXT:    bl __extendhfsf2
 ; P8-NEXT:    nop
-; P8-NEXT:    lhz r3, 4(r30)
-; P8-NEXT:    xxlor vs62, f1, f1
+; P8-NEXT:    rldicl r3, r30, 48, 48
+; P8-NEXT:    xxmrghd vs0, vs1, vs63
+; P8-NEXT:    clrlwi r3, r3, 16
+; P8-NEXT:    xvcvdpsp vs62, vs0
 ; P8-NEXT:    bl __extendhfsf2
 ; P8-NEXT:    nop
-; P8-NEXT:    lhz r3, 0(r30)
-; P8-NEXT:    xxlor vs61, f1, f1
+; P8-NEXT:    rldicl r3, r29, 48, 48
+; P8-NEXT:    xxlor vs63, f1, f1
+; P8-NEXT:    clrlwi r3, r3, 16
 ; P8-NEXT:    bl __extendhfsf2
 ; P8-NEXT:    nop
-; P8-NEXT:    li r3, 80
-; P8-NEXT:    xxmrghd vs0, vs61, vs1
-; P8-NEXT:    xxmrghd vs1, vs63, vs62
-; P8-NEXT:    ld r30, 96(r1) # 8-byte Folded Reload
-; P8-NEXT:    lxvd2x vs63, r1, r3 # 16-byte Folded Reload
-; P8-NEXT:    li r3, 64
+; P8-NEXT:    xxmrghd vs0, vs1, vs63
+; P8-NEXT:    li r3, 96
+; P8-NEXT:    ld r30, 128(r1) # 8-byte Folded Reload
+; P8-NEXT:    ld r29, 120(r1) # 8-byte Folded Reload
 ; P8-NEXT:    xvcvdpsp vs34, vs0
-; P8-NEXT:    xvcvdpsp vs35, vs1
+; P8-NEXT:    lxvd2x vs63, r1, r3 # 16-byte Folded Reload
+; P8-NEXT:    li r3, 80
+; P8-NEXT:    vmrgew v2, v2, v30
 ; P8-NEXT:    lxvd2x vs62, r1, r3 # 16-byte Folded Reload
-; P8-NEXT:    li r3, 48
-; P8-NEXT:    lxvd2x vs61, r1, r3 # 16-byte Folded Reload
-; P8-NEXT:    vmrgew v2, v3, v2
-; P8-NEXT:    addi r1, r1, 112
+; P8-NEXT:    addi r1, r1, 144
 ; P8-NEXT:    ld r0, 16(r1)
 ; P8-NEXT:    mtlr r0
 ; P8-NEXT:    blr
 ;
 ; CHECK-LABEL: test_extend32_vec4:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lhz r4, 6(r3)
+; CHECK-NEXT:    lwz r4, 4(r3)
+; CHECK-NEXT:    stw r4, -16(r1)
+; CHECK-NEXT:    lwz r3, 0(r3)
+; CHECK-NEXT:    lxv vs34, -16(r1)
+; CHECK-NEXT:    stw r3, -32(r1)
+; CHECK-NEXT:    li r3, 0
+; CHECK-NEXT:    lxv vs35, -32(r1)
+; CHECK-NEXT:    vextuhrx r4, r3, v3
+; CHECK-NEXT:    vextuhrx r3, r3, v2
+; CHECK-NEXT:    clrlwi r4, r4, 16
+; CHECK-NEXT:    clrlwi r3, r3, 16
 ; CHECK-NEXT:    mtfprwz f0, r4
+; CHECK-NEXT:    mtfprwz f1, r3
+; CHECK-NEXT:    li r3, 2
 ; CHECK-NEXT:    xscvhpdp f0, f0
-; CHECK-NEXT:    lhz r4, 2(r3)
-; CHECK-NEXT:    mtfprwz f1, r4
 ; CHECK-NEXT:    xscvhpdp f1, f1
-; CHECK-NEXT:    lhz r4, 4(r3)
-; CHECK-NEXT:    mtfprwz f2, r4
-; CHECK-NEXT:    xscvhpdp f2, f2
-; CHECK-NEXT:    lhz r3, 0(r3)
-; CHECK-NEXT:    xxmrghd vs0, vs0, vs1
-; CHECK-NEXT:    mtfprwz f3, r3
-; CHECK-NEXT:    xvcvdpsp vs35, vs0
-; CHECK-NEXT:    xscvhpdp f3, f3
-; CHECK-NEXT:    xxmrghd vs2, vs2, vs3
-; CHECK-NEXT:    xvcvdpsp vs34, vs2
-; CHECK-NEXT:    vmrgew v2, v3, v2
+; CHECK-NEXT:    vextuhrx r4, r3, v3
+; CHECK-NEXT:    vextuhrx r3, r3, v2
+; CHECK-NEXT:    clrlwi r4, r4, 16
+; CHECK-NEXT:    clrlwi r3, r3, 16
+; CHECK-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-NEXT:    mtfprwz f1, r3
+; CHECK-NEXT:    xvcvdpsp vs36, vs0
+; CHECK-NEXT:    mtfprwz f0, r4
+; CHECK-NEXT:    xscvhpdp f0, f0
+; CHECK-NEXT:    xscvhpdp f1, f1
+; CHECK-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-NEXT:    xvcvdpsp vs34, vs0
+; CHECK-NEXT:    vmrgew v2, v2, v4
 ; CHECK-NEXT:    blr
 ;
 ; SOFT-LABEL: test_extend32_vec4:
@@ -1458,39 +1322,39 @@ define <4 x float> @test_extend32_vec4(ptr %p) nounwind {
 ; BE-LABEL: test_extend32_vec4:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
-; BE-NEXT:    stdu r1, -176(r1)
-; BE-NEXT:    std r0, 192(r1)
-; BE-NEXT:    std r30, 136(r1) # 8-byte Folded Spill
-; BE-NEXT:    mr r30, r3
-; BE-NEXT:    lhz r3, 0(r3)
-; BE-NEXT:    stfd f29, 152(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f30, 160(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f31, 168(r1) # 8-byte Folded Spill
+; BE-NEXT:    stdu r1, -192(r1)
+; BE-NEXT:    std r0, 208(r1)
+; BE-NEXT:    lwz r4, 0(r3)
+; BE-NEXT:    stw r4, 160(r1)
+; BE-NEXT:    lwz r3, 4(r3)
+; BE-NEXT:    stw r3, 176(r1)
+; BE-NEXT:    addi r3, r1, 160
+; BE-NEXT:    lvx v2, 0, r3
+; BE-NEXT:    addi r3, r1, 176
+; BE-NEXT:    lvx v3, 0, r3
+; BE-NEXT:    addi r3, r1, 128
+; BE-NEXT:    stvx v3, 0, r3
+; BE-NEXT:    addi r3, r1, 112
+; BE-NEXT:    stvx v2, 0, r3
+; BE-NEXT:    lhz r3, 130(r1)
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    lhz r3, 2(r30)
-; BE-NEXT:    fmr f31, f1
+; BE-NEXT:    lhz r3, 128(r1)
+; BE-NEXT:    stfs f1, 156(r1)
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    lhz r3, 4(r30)
-; BE-NEXT:    fmr f30, f1
+; BE-NEXT:    lhz r3, 114(r1)
+; BE-NEXT:    stfs f1, 152(r1)
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    lhz r3, 6(r30)
-; BE-NEXT:    fmr f29, f1
+; BE-NEXT:    lhz r3, 112(r1)
+; BE-NEXT:    stfs f1, 148(r1)
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    stfs f29, 120(r1)
-; BE-NEXT:    addi r3, r1, 112
-; BE-NEXT:    stfs f30, 116(r1)
-; BE-NEXT:    stfs f31, 112(r1)
-; BE-NEXT:    stfs f1, 124(r1)
+; BE-NEXT:    stfs f1, 144(r1)
+; BE-NEXT:    addi r3, r1, 144
 ; BE-NEXT:    lvx v2, 0, r3
-; BE-NEXT:    lfd f31, 168(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f30, 160(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f29, 152(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r30, 136(r1) # 8-byte Folded Reload
-; BE-NEXT:    addi r1, r1, 176
+; BE-NEXT:    addi r1, r1, 192
 ; BE-NEXT:    ld r0, 16(r1)
 ; BE-NEXT:    mtlr r0
 ; BE-NEXT:    blr
@@ -1537,39 +1401,41 @@ define <4 x double> @test_extend64_vec4(ptr %p) nounwind {
 ; P8:       # %bb.0:
 ; P8-NEXT:    mflr r0
 ; P8-NEXT:    stdu r1, -112(r1)
-; P8-NEXT:    li r4, 48
 ; P8-NEXT:    std r0, 128(r1)
+; P8-NEXT:    li r4, 48
+; P8-NEXT:    std r28, 80(r1) # 8-byte Folded Spill
+; P8-NEXT:    lhz r28, 2(r3)
+; P8-NEXT:    std r29, 88(r1) # 8-byte Folded Spill
 ; P8-NEXT:    std r30, 96(r1) # 8-byte Folded Spill
-; P8-NEXT:    mr r30, r3
-; P8-NEXT:    lhz r3, 6(r3)
-; P8-NEXT:    stxvd2x vs61, r1, r4 # 16-byte Folded Spill
-; P8-NEXT:    li r4, 64
+; P8-NEXT:    lhz r30, 6(r3)
+; P8-NEXT:    lhz r29, 4(r3)
+; P8-NEXT:    lhz r3, 0(r3)
 ; P8-NEXT:    stxvd2x vs62, r1, r4 # 16-byte Folded Spill
-; P8-NEXT:    li r4, 80
+; P8-NEXT:    li r4, 64
 ; P8-NEXT:    stxvd2x vs63, r1, r4 # 16-byte Folded Spill
 ; P8-NEXT:    bl __extendhfsf2
 ; P8-NEXT:    nop
-; P8-NEXT:    lhz r3, 4(r30)
+; P8-NEXT:    mr r3, r28
 ; P8-NEXT:    xxlor vs63, f1, f1
 ; P8-NEXT:    bl __extendhfsf2
 ; P8-NEXT:    nop
-; P8-NEXT:    lhz r3, 2(r30)
-; P8-NEXT:    xxlor vs62, f1, f1
+; P8-NEXT:    mr r3, r29
+; P8-NEXT:    xxmrghd vs63, vs1, vs63
 ; P8-NEXT:    bl __extendhfsf2
 ; P8-NEXT:    nop
-; P8-NEXT:    lhz r3, 0(r30)
-; P8-NEXT:    xxlor vs61, f1, f1
+; P8-NEXT:    mr r3, r30
+; P8-NEXT:    xxlor vs62, f1, f1
 ; P8-NEXT:    bl __extendhfsf2
 ; P8-NEXT:    nop
-; P8-NEXT:    li r3, 80
-; P8-NEXT:    xxmrghd vs35, vs63, vs62
-; P8-NEXT:    xxmrghd vs34, vs61, vs1
+; P8-NEXT:    li r3, 64
+; P8-NEXT:    vmr v2, v31
+; P8-NEXT:    xxmrghd vs35, vs1, vs62
 ; P8-NEXT:    ld r30, 96(r1) # 8-byte Folded Reload
+; P8-NEXT:    ld r29, 88(r1) # 8-byte Folded Reload
+; P8-NEXT:    ld r28, 80(r1) # 8-byte Folded Reload
 ; P8-NEXT:    lxvd2x vs63, r1, r3 # 16-byte Folded Reload
-; P8-NEXT:    li r3, 64
-; P8-NEXT:    lxvd2x vs62, r1, r3 # 16-byte Folded Reload
 ; P8-NEXT:    li r3, 48
-; P8-NEXT:    lxvd2x vs61, r1, r3 # 16-byte Folded Reload
+; P8-NEXT:    lxvd2x vs62, r1, r3 # 16-byte Folded Reload
 ; P8-NEXT:    addi r1, r1, 112
 ; P8-NEXT:    ld r0, 16(r1)
 ; P8-NEXT:    mtlr r0
@@ -1642,36 +1508,43 @@ define <4 x double> @test_extend64_vec4(ptr %p) nounwind {
 ; BE-LABEL: test_extend64_vec4:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
-; BE-NEXT:    stdu r1, -160(r1)
-; BE-NEXT:    std r0, 176(r1)
-; BE-NEXT:    std r30, 120(r1) # 8-byte Folded Spill
-; BE-NEXT:    mr r30, r3
-; BE-NEXT:    lhz r3, 6(r3)
-; BE-NEXT:    stfd f29, 136(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f30, 144(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f31, 152(r1) # 8-byte Folded Spill
+; BE-NEXT:    stdu r1, -176(r1)
+; BE-NEXT:    std r0, 192(r1)
+; BE-NEXT:    std r28, 120(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r29, 128(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r30, 136(r1) # 8-byte Folded Spill
+; BE-NEXT:    lhz r30, 6(r3)
+; BE-NEXT:    lhz r29, 4(r3)
+; BE-NEXT:    lhz r28, 2(r3)
+; BE-NEXT:    lhz r3, 0(r3)
+; BE-NEXT:    stfd f29, 152(r1) # 8-byte Folded Spill
+; BE-NEXT:    stfd f30, 160(r1) # 8-byte Folded Spill
+; BE-NEXT:    stfd f31, 168(r1) # 8-byte Folded Spill
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    lhz r3, 4(r30)
+; BE-NEXT:    mr r3, r28
 ; BE-NEXT:    fmr f31, f1
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    lhz r3, 2(r30)
+; BE-NEXT:    mr r3, r29
 ; BE-NEXT:    fmr f30, f1
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    lhz r3, 0(r30)
+; BE-NEXT:    mr r3, r30
 ; BE-NEXT:    fmr f29, f1
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f2, f29
-; BE-NEXT:    fmr f3, f30
-; BE-NEXT:    lfd f30, 144(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f29, 136(r1) # 8-byte Folded Reload
-; BE-NEXT:    fmr f4, f31
-; BE-NEXT:    lfd f31, 152(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r30, 120(r1) # 8-byte Folded Reload
-; BE-NEXT:    addi r1, r1, 160
+; BE-NEXT:    fmr f4, f1
+; BE-NEXT:    fmr f1, f31
+; BE-NEXT:    lfd f31, 168(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r30, 136(r1) # 8-byte Folded Reload
+; BE-NEXT:    fmr f2, f30
+; BE-NEXT:    fmr f3, f29
+; BE-NEXT:    lfd f30, 160(r1) # 8-byte Folded Reload
+; BE-NEXT:    lfd f29, 152(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r29, 128(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r28, 120(r1) # 8-byte Folded Reload
+; BE-NEXT:    addi r1, r1, 176
 ; BE-NEXT:    ld r0, 16(r1)
 ; BE-NEXT:    mtlr r0
 ; BE-NEXT:    blr
@@ -1800,56 +1673,30 @@ define void @test_trunc32_vec4(<4 x float> %a, ptr %p) nounwind {
 ; SOFT-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
 ; SOFT-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; SOFT-NEXT:    stdu r1, -80(r1)
-; SOFT-NEXT:    mr r27, r3
-; SOFT-NEXT:    clrldi r3, r6, 32
+; SOFT-NEXT:    clrldi r3, r3, 32
 ; SOFT-NEXT:    std r0, 96(r1)
 ; SOFT-NEXT:    mr r30, r7
-; SOFT-NEXT:    mr r29, r5
-; SOFT-NEXT:    mr r28, r4
+; SOFT-NEXT:    mr r29, r6
+; SOFT-NEXT:    mr r28, r5
+; SOFT-NEXT:    mr r27, r4
 ; SOFT-NEXT:    bl __truncsfhf2
 ; SOFT-NEXT:    nop
 ; SOFT-NEXT:    mr r26, r3
-; SOFT-NEXT:    clrldi r3, r29, 32
-; SOFT-NEXT:    bl __truncsfhf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    mr r29, r3
-; SOFT-NEXT:    clrldi r3, r28, 32
-; SOFT-NEXT:    bl __truncsfhf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    mr r28, r3
 ; SOFT-NEXT:    clrldi r3, r27, 32
 ; SOFT-NEXT:    bl __truncsfhf2
 ; SOFT-NEXT:    nop
-; SOFT-NEXT:    clrldi r3, r3, 48
-; SOFT-NEXT:    bl __extendhfsf2
-; SOFT-NEXT:    nop
 ; SOFT-NEXT:    mr r27, r3
-; SOFT-NEXT:    clrldi r3, r28, 48
-; SOFT-NEXT:    bl __extendhfsf2
+; SOFT-NEXT:    clrldi r3, r28, 32
+; SOFT-NEXT:    bl __truncsfhf2
 ; SOFT-NEXT:    nop
 ; SOFT-NEXT:    mr r28, r3
-; SOFT-NEXT:    clrldi r3, r29, 48
-; SOFT-NEXT:    bl __extendhfsf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    mr r29, r3
-; SOFT-NEXT:    clrldi r3, r26, 48
-; SOFT-NEXT:    bl __extendhfsf2
-; SOFT-NEXT:    nop
+; SOFT-NEXT:    clrldi r3, r29, 32
 ; SOFT-NEXT:    bl __truncsfhf2
 ; SOFT-NEXT:    nop
 ; SOFT-NEXT:    sth r3, 6(r30)
-; SOFT-NEXT:    mr r3, r29
-; SOFT-NEXT:    bl __truncsfhf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    sth r3, 4(r30)
-; SOFT-NEXT:    mr r3, r28
-; SOFT-NEXT:    bl __truncsfhf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    sth r3, 2(r30)
-; SOFT-NEXT:    mr r3, r27
-; SOFT-NEXT:    bl __truncsfhf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    sth r3, 0(r30)
+; SOFT-NEXT:    sth r28, 4(r30)
+; SOFT-NEXT:    sth r27, 2(r30)
+; SOFT-NEXT:    sth r26, 0(r30)
 ; SOFT-NEXT:    addi r1, r1, 80
 ; SOFT-NEXT:    ld r0, 16(r1)
 ; SOFT-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -2021,56 +1868,29 @@ define void @test_trunc64_vec4(<4 x double> %a, ptr %p) nounwind {
 ; SOFT-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
 ; SOFT-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; SOFT-NEXT:    stdu r1, -80(r1)
-; SOFT-NEXT:    mr r27, r3
-; SOFT-NEXT:    mr r3, r6
 ; SOFT-NEXT:    std r0, 96(r1)
 ; SOFT-NEXT:    mr r30, r7
-; SOFT-NEXT:    mr r29, r5
-; SOFT-NEXT:    mr r28, r4
+; SOFT-NEXT:    mr r29, r6
+; SOFT-NEXT:    mr r28, r5
+; SOFT-NEXT:    mr r27, r4
 ; SOFT-NEXT:    bl __truncdfhf2
 ; SOFT-NEXT:    nop
 ; SOFT-NEXT:    mr r26, r3
-; SOFT-NEXT:    mr r3, r29
+; SOFT-NEXT:    mr r3, r27
 ; SOFT-NEXT:    bl __truncdfhf2
 ; SOFT-NEXT:    nop
-; SOFT-NEXT:    mr r29, r3
+; SOFT-NEXT:    mr r27, r3
 ; SOFT-NEXT:    mr r3, r28
 ; SOFT-NEXT:    bl __truncdfhf2
 ; SOFT-NEXT:    nop
 ; SOFT-NEXT:    mr r28, r3
-; SOFT-NEXT:    mr r3, r27
+; SOFT-NEXT:    mr r3, r29
 ; SOFT-NEXT:    bl __truncdfhf2
 ; SOFT-NEXT:    nop
-; SOFT-NEXT:    clrldi r3, r3, 48
-; SOFT-NEXT:    bl __extendhfsf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    mr r27, r3
-; SOFT-NEXT:    clrldi r3, r28, 48
-; SOFT-NEXT:    bl __extendhfsf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    mr r28, r3
-; SOFT-NEXT:    clrldi r3, r29, 48
-; SOFT-NEXT:    bl __extendhfsf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    mr r29, r3
-; SOFT-NEXT:    clrldi r3, r26, 48
-; SOFT-NEXT:    bl __extendhfsf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    bl __truncsfhf2
-; SOFT-NEXT:    nop
 ; SOFT-NEXT:    sth r3, 6(r30)
-; SOFT-NEXT:    mr r3, r29
-; SOFT-NEXT:    bl __truncsfhf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    sth r3, 4(r30)
-; SOFT-NEXT:    mr r3, r28
-; SOFT-NEXT:    bl __truncsfhf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    sth r3, 2(r30)
-; SOFT-NEXT:    mr r3, r27
-; SOFT-NEXT:    bl __truncsfhf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    sth r3, 0(r30)
+; SOFT-NEXT:    sth r28, 4(r30)
+; SOFT-NEXT:    sth r27, 2(r30)
+; SOFT-NEXT:    sth r26, 0(r30)
 ; SOFT-NEXT:    addi r1, r1, 80
 ; SOFT-NEXT:    ld r0, 16(r1)
 ; SOFT-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -2136,25 +1956,28 @@ define float @test_sitofp_fadd_i32(i32 %a, ptr %b) nounwind {
 ; PPC32-NEXT:    mflr r0
 ; PPC32-NEXT:    stwu r1, -32(r1)
 ; PPC32-NEXT:    stw r0, 36(r1)
+; PPC32-NEXT:    lis r5, 17200
+; PPC32-NEXT:    xoris r3, r3, 32768
 ; PPC32-NEXT:    stw r30, 16(r1) # 4-byte Folded Spill
-; PPC32-NEXT:    mr r30, r3
-; PPC32-NEXT:    lhz r3, 0(r4)
-; PPC32-NEXT:    stfd f31, 24(r1) # 8-byte Folded Spill
-; PPC32-NEXT:    bl __extendhfsf2
-; PPC32-NEXT:    lis r3, 17200
-; PPC32-NEXT:    stw r3, 8(r1)
-; PPC32-NEXT:    xoris r3, r30, 32768
+; PPC32-NEXT:    lhz r30, 0(r4)
+; PPC32-NEXT:    stw r5, 8(r1)
 ; PPC32-NEXT:    stw r3, 12(r1)
 ; PPC32-NEXT:    lis r3, .LCPI23_0 at ha
-; PPC32-NEXT:    fmr f31, f1
 ; PPC32-NEXT:    lfd f0, 8(r1)
 ; PPC32-NEXT:    lfs f1, .LCPI23_0 at l(r3)
+; PPC32-NEXT:    stfd f31, 24(r1) # 8-byte Folded Spill
 ; PPC32-NEXT:    fsub f0, f0, f1
 ; PPC32-NEXT:    frsp f1, f0
 ; PPC32-NEXT:    bl __truncsfhf2
 ; PPC32-NEXT:    clrlwi r3, r3, 16
 ; PPC32-NEXT:    bl __extendhfsf2
-; PPC32-NEXT:    fadds f1, f31, f1
+; PPC32-NEXT:    mr r3, r30
+; PPC32-NEXT:    fmr f31, f1
+; PPC32-NEXT:    bl __extendhfsf2
+; PPC32-NEXT:    fadds f1, f1, f31
+; PPC32-NEXT:    bl __truncsfhf2
+; PPC32-NEXT:    clrlwi r3, r3, 16
+; PPC32-NEXT:    bl __extendhfsf2
 ; PPC32-NEXT:    lfd f31, 24(r1) # 8-byte Folded Reload
 ; PPC32-NEXT:    lwz r30, 16(r1) # 4-byte Folded Reload
 ; PPC32-NEXT:    lwz r0, 36(r1)
@@ -2168,20 +1991,25 @@ define float @test_sitofp_fadd_i32(i32 %a, ptr %b) nounwind {
 ; P8-NEXT:    std r30, -24(r1) # 8-byte Folded Spill
 ; P8-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
 ; P8-NEXT:    stdu r1, -64(r1)
+; P8-NEXT:    mtfprwa f0, r3
 ; P8-NEXT:    std r0, 80(r1)
-; P8-NEXT:    mr r30, r3
-; P8-NEXT:    lhz r3, 0(r4)
+; P8-NEXT:    lhz r30, 0(r4)
+; P8-NEXT:    xscvsxdsp f1, f0
+; P8-NEXT:    bl __truncsfhf2
+; P8-NEXT:    nop
+; P8-NEXT:    clrldi r3, r3, 48
 ; P8-NEXT:    bl __extendhfsf2
 ; P8-NEXT:    nop
-; P8-NEXT:    mtfprwa f0, r30
+; P8-NEXT:    mr r3, r30
 ; P8-NEXT:    fmr f31, f1
-; P8-NEXT:    xscvsxdsp f1, f0
+; P8-NEXT:    bl __extendhfsf2
+; P8-NEXT:    nop
+; P8-NEXT:    xsaddsp f1, f1, f31
 ; P8-NEXT:    bl __truncsfhf2
 ; P8-NEXT:    nop
 ; P8-NEXT:    clrldi r3, r3, 48
 ; P8-NEXT:    bl __extendhfsf2
 ; P8-NEXT:    nop
-; P8-NEXT:    xsaddsp f1, f31, f1
 ; P8-NEXT:    addi r1, r1, 64
 ; P8-NEXT:    ld r0, 16(r1)
 ; P8-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
@@ -2191,17 +2019,22 @@ define float @test_sitofp_fadd_i32(i32 %a, ptr %b) nounwind {
 ;
 ; CHECK-LABEL: test_sitofp_fadd_i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    mtfprwa f1, r3
+; CHECK-NEXT:    mtfprwa f0, r3
 ; CHECK-NEXT:    lhz r4, 0(r4)
-; CHECK-NEXT:    xscvsxdsp f1, f1
+; CHECK-NEXT:    xscvsxdsp f0, f0
+; CHECK-NEXT:    xscvdphp f0, f0
+; CHECK-NEXT:    mffprwz r3, f0
 ; CHECK-NEXT:    mtfprwz f0, r4
-; CHECK-NEXT:    xscvhpdp f0, f0
-; CHECK-NEXT:    xscvdphp f1, f1
-; CHECK-NEXT:    mffprwz r3, f1
 ; CHECK-NEXT:    clrlwi r3, r3, 16
+; CHECK-NEXT:    xscvhpdp f0, f0
 ; CHECK-NEXT:    mtfprwz f1, r3
 ; CHECK-NEXT:    xscvhpdp f1, f1
-; CHECK-NEXT:    xsaddsp f1, f0, f1
+; CHECK-NEXT:    xsaddsp f0, f0, f1
+; CHECK-NEXT:    xscvdphp f0, f0
+; CHECK-NEXT:    mffprwz r3, f0
+; CHECK-NEXT:    clrlwi r3, r3, 16
+; CHECK-NEXT:    mtfprwz f0, r3
+; CHECK-NEXT:    xscvhpdp f1, f0
 ; CHECK-NEXT:    blr
 ;
 ; SOFT-LABEL: test_sitofp_fadd_i32:
@@ -2210,25 +2043,31 @@ define float @test_sitofp_fadd_i32(i32 %a, ptr %b) nounwind {
 ; SOFT-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
 ; SOFT-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; SOFT-NEXT:    stdu r1, -64(r1)
+; SOFT-NEXT:    extsw r3, r3
 ; SOFT-NEXT:    std r0, 80(r1)
-; SOFT-NEXT:    mr r30, r3
-; SOFT-NEXT:    lhz r3, 0(r4)
-; SOFT-NEXT:    bl __extendhfsf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    mr r29, r3
-; SOFT-NEXT:    extsw r3, r30
+; SOFT-NEXT:    mr r30, r4
 ; SOFT-NEXT:    bl __floatsisf
 ; SOFT-NEXT:    nop
 ; SOFT-NEXT:    clrldi r3, r3, 32
 ; SOFT-NEXT:    bl __truncsfhf2
 ; SOFT-NEXT:    nop
-; SOFT-NEXT:    clrldi r3, r3, 48
+; SOFT-NEXT:    mr r29, r3
+; SOFT-NEXT:    lhz r3, 0(r30)
+; SOFT-NEXT:    bl __extendhfsf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    mr r30, r3
+; SOFT-NEXT:    clrldi r3, r29, 48
 ; SOFT-NEXT:    bl __extendhfsf2
 ; SOFT-NEXT:    nop
 ; SOFT-NEXT:    mr r4, r3
-; SOFT-NEXT:    mr r3, r29
+; SOFT-NEXT:    mr r3, r30
 ; SOFT-NEXT:    bl __addsf3
 ; SOFT-NEXT:    nop
+; SOFT-NEXT:    bl __truncsfhf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    clrldi r3, r3, 48
+; SOFT-NEXT:    bl __extendhfsf2
+; SOFT-NEXT:    nop
 ; SOFT-NEXT:    addi r1, r1, 64
 ; SOFT-NEXT:    ld r0, 16(r1)
 ; SOFT-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -2241,24 +2080,29 @@ define float @test_sitofp_fadd_i32(i32 %a, ptr %b) nounwind {
 ; BE-NEXT:    mflr r0
 ; BE-NEXT:    stdu r1, -144(r1)
 ; BE-NEXT:    std r0, 160(r1)
+; BE-NEXT:    extsw r3, r3
 ; BE-NEXT:    std r30, 120(r1) # 8-byte Folded Spill
-; BE-NEXT:    mr r30, r3
-; BE-NEXT:    lhz r3, 0(r4)
-; BE-NEXT:    stfd f31, 136(r1) # 8-byte Folded Spill
-; BE-NEXT:    bl __extendhfsf2
-; BE-NEXT:    nop
-; BE-NEXT:    extsw r3, r30
-; BE-NEXT:    fmr f31, f1
+; BE-NEXT:    lhz r30, 0(r4)
 ; BE-NEXT:    std r3, 112(r1)
 ; BE-NEXT:    lfd f0, 112(r1)
 ; BE-NEXT:    fcfid f0, f0
+; BE-NEXT:    stfd f31, 136(r1) # 8-byte Folded Spill
 ; BE-NEXT:    frsp f1, f0
 ; BE-NEXT:    bl __truncsfhf2
 ; BE-NEXT:    nop
 ; BE-NEXT:    clrldi r3, r3, 48
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    fadds f1, f31, f1
+; BE-NEXT:    mr r3, r30
+; BE-NEXT:    fmr f31, f1
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
+; BE-NEXT:    fadds f1, f1, f31
+; BE-NEXT:    bl __truncsfhf2
+; BE-NEXT:    nop
+; BE-NEXT:    clrldi r3, r3, 48
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
 ; BE-NEXT:    lfd f31, 136(r1) # 8-byte Folded Reload
 ; BE-NEXT:    ld r30, 120(r1) # 8-byte Folded Reload
 ; BE-NEXT:    addi r1, r1, 144
@@ -2276,21 +2120,17 @@ define half @PR40273(half) nounwind {
 ; PPC32:       # %bb.0:
 ; PPC32-NEXT:    mflr r0
 ; PPC32-NEXT:    stwu r1, -16(r1)
-; PPC32-NEXT:    stw r0, 20(r1)
-; PPC32-NEXT:    bl __truncsfhf2
 ; PPC32-NEXT:    clrlwi r3, r3, 16
+; PPC32-NEXT:    stw r0, 20(r1)
 ; PPC32-NEXT:    bl __extendhfsf2
 ; PPC32-NEXT:    lis r3, .LCPI24_0 at ha
 ; PPC32-NEXT:    lfs f0, .LCPI24_0 at l(r3)
-; PPC32-NEXT:    li r3, 0
+; PPC32-NEXT:    li r3, 15360
 ; PPC32-NEXT:    fcmpu cr0, f1, f0
-; PPC32-NEXT:    bc 12, eq, .LBB24_2
+; PPC32-NEXT:    bne cr0, .LBB24_2
 ; PPC32-NEXT:  # %bb.1:
-; PPC32-NEXT:    li r3, 4
+; PPC32-NEXT:    li r3, 0
 ; PPC32-NEXT:  .LBB24_2:
-; PPC32-NEXT:    li r4, .LCPI24_1 at l
-; PPC32-NEXT:    addis r4, r4, .LCPI24_1 at ha
-; PPC32-NEXT:    lfsx f1, r4, r3
 ; PPC32-NEXT:    lwz r0, 20(r1)
 ; PPC32-NEXT:    addi r1, r1, 16
 ; PPC32-NEXT:    mtlr r0
@@ -2300,20 +2140,14 @@ define half @PR40273(half) nounwind {
 ; P8:       # %bb.0:
 ; P8-NEXT:    mflr r0
 ; P8-NEXT:    stdu r1, -32(r1)
-; P8-NEXT:    std r0, 48(r1)
-; P8-NEXT:    bl __truncsfhf2
-; P8-NEXT:    nop
 ; P8-NEXT:    clrldi r3, r3, 48
+; P8-NEXT:    std r0, 48(r1)
 ; P8-NEXT:    bl __extendhfsf2
 ; P8-NEXT:    nop
-; P8-NEXT:    fmr f0, f1
-; P8-NEXT:    xxlxor f1, f1, f1
-; P8-NEXT:    fcmpu cr0, f0, f1
-; P8-NEXT:    beq cr0, .LBB24_2
-; P8-NEXT:  # %bb.1:
-; P8-NEXT:    vspltisw v2, 1
-; P8-NEXT:    xvcvsxwdp vs1, vs34
-; P8-NEXT:  .LBB24_2:
+; P8-NEXT:    xxlxor f0, f0, f0
+; P8-NEXT:    li r3, 15360
+; P8-NEXT:    fcmpu cr0, f1, f0
+; P8-NEXT:    iseleq r3, 0, r3
 ; P8-NEXT:    addi r1, r1, 32
 ; P8-NEXT:    ld r0, 16(r1)
 ; P8-NEXT:    mtlr r0
@@ -2321,17 +2155,13 @@ define half @PR40273(half) nounwind {
 ;
 ; CHECK-LABEL: PR40273:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xscvdphp f0, f1
-; CHECK-NEXT:    xxlxor f1, f1, f1
-; CHECK-NEXT:    mffprwz r3, f0
 ; CHECK-NEXT:    clrlwi r3, r3, 16
+; CHECK-NEXT:    xxlxor f1, f1, f1
 ; CHECK-NEXT:    mtfprwz f0, r3
+; CHECK-NEXT:    li r3, 15360
 ; CHECK-NEXT:    xscvhpdp f0, f0
 ; CHECK-NEXT:    fcmpu cr0, f0, f1
-; CHECK-NEXT:    beqlr cr0
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    vspltisw v2, 1
-; CHECK-NEXT:    xvcvsxwdp vs1, vs34
+; CHECK-NEXT:    iseleq r3, 0, r3
 ; CHECK-NEXT:    blr
 ;
 ; SOFT-LABEL: PR40273:
@@ -2346,10 +2176,8 @@ define half @PR40273(half) nounwind {
 ; SOFT-NEXT:    bl __nesf2
 ; SOFT-NEXT:    nop
 ; SOFT-NEXT:    cmplwi r3, 0
-; SOFT-NEXT:    lis r3, 16256
+; SOFT-NEXT:    li r3, 15360
 ; SOFT-NEXT:    iseleq r3, 0, r3
-; SOFT-NEXT:    bl __truncsfhf2
-; SOFT-NEXT:    nop
 ; SOFT-NEXT:    addi r1, r1, 32
 ; SOFT-NEXT:    ld r0, 16(r1)
 ; SOFT-NEXT:    mtlr r0
@@ -2359,23 +2187,18 @@ define half @PR40273(half) nounwind {
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
 ; BE-NEXT:    stdu r1, -112(r1)
-; BE-NEXT:    std r0, 128(r1)
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
 ; BE-NEXT:    clrldi r3, r3, 48
+; BE-NEXT:    std r0, 128(r1)
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
 ; BE-NEXT:    addis r3, r2, .LCPI24_0 at toc@ha
 ; BE-NEXT:    lfs f0, .LCPI24_0 at toc@l(r3)
-; BE-NEXT:    li r3, 0
+; BE-NEXT:    li r3, 15360
 ; BE-NEXT:    fcmpu cr0, f1, f0
-; BE-NEXT:    bc 12, eq, .LBB24_2
+; BE-NEXT:    bne cr0, .LBB24_2
 ; BE-NEXT:  # %bb.1:
-; BE-NEXT:    li r3, 4
+; BE-NEXT:    li r3, 0
 ; BE-NEXT:  .LBB24_2:
-; BE-NEXT:    addis r4, r2, .LCPI24_1 at toc@ha
-; BE-NEXT:    addi r4, r4, .LCPI24_1 at toc@l
-; BE-NEXT:    lfsx f1, r4, r3
 ; BE-NEXT:    addi r1, r1, 112
 ; BE-NEXT:    ld r0, 16(r1)
 ; BE-NEXT:    mtlr r0
@@ -2390,42 +2213,17 @@ define half @PR40273(half) nounwind {
 define half @fabs(half %x) nounwind {
 ; PPC32-LABEL: fabs:
 ; PPC32:       # %bb.0:
-; PPC32-NEXT:    mflr r0
-; PPC32-NEXT:    stwu r1, -16(r1)
-; PPC32-NEXT:    stw r0, 20(r1)
-; PPC32-NEXT:    bl __truncsfhf2
-; PPC32-NEXT:    clrlwi r3, r3, 16
-; PPC32-NEXT:    bl __extendhfsf2
-; PPC32-NEXT:    fabs f1, f1
-; PPC32-NEXT:    lwz r0, 20(r1)
-; PPC32-NEXT:    addi r1, r1, 16
-; PPC32-NEXT:    mtlr r0
+; PPC32-NEXT:    clrlwi r3, r3, 17
 ; PPC32-NEXT:    blr
 ;
 ; P8-LABEL: fabs:
 ; P8:       # %bb.0:
-; P8-NEXT:    mflr r0
-; P8-NEXT:    stdu r1, -32(r1)
-; P8-NEXT:    std r0, 48(r1)
-; P8-NEXT:    bl __truncsfhf2
-; P8-NEXT:    nop
-; P8-NEXT:    clrldi r3, r3, 48
-; P8-NEXT:    bl __extendhfsf2
-; P8-NEXT:    nop
-; P8-NEXT:    xsabsdp f1, f1
-; P8-NEXT:    addi r1, r1, 32
-; P8-NEXT:    ld r0, 16(r1)
-; P8-NEXT:    mtlr r0
+; P8-NEXT:    clrldi r3, r3, 49
 ; P8-NEXT:    blr
 ;
 ; CHECK-LABEL: fabs:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xscvdphp f0, f1
-; CHECK-NEXT:    mffprwz r3, f0
-; CHECK-NEXT:    clrlwi r3, r3, 16
-; CHECK-NEXT:    mtfprwz f0, r3
-; CHECK-NEXT:    xscvhpdp f0, f0
-; CHECK-NEXT:    xsabsdp f1, f0
+; CHECK-NEXT:    clrldi r3, r3, 49
 ; CHECK-NEXT:    blr
 ;
 ; SOFT-LABEL: fabs:
@@ -2435,18 +2233,7 @@ define half @fabs(half %x) nounwind {
 ;
 ; BE-LABEL: fabs:
 ; BE:       # %bb.0:
-; BE-NEXT:    mflr r0
-; BE-NEXT:    stdu r1, -112(r1)
-; BE-NEXT:    std r0, 128(r1)
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r3, 48
-; BE-NEXT:    bl __extendhfsf2
-; BE-NEXT:    nop
-; BE-NEXT:    fabs f1, f1
-; BE-NEXT:    addi r1, r1, 112
-; BE-NEXT:    ld r0, 16(r1)
-; BE-NEXT:    mtlr r0
+; BE-NEXT:    clrldi r3, r3, 49
 ; BE-NEXT:    blr
   %a = call half @llvm.fabs.f16(half %x)
   ret half %a
@@ -2455,107 +2242,35 @@ define half @fabs(half %x) nounwind {
 define half @fcopysign(half %x, half %y) nounwind {
 ; PPC32-LABEL: fcopysign:
 ; PPC32:       # %bb.0:
-; PPC32-NEXT:    mflr r0
-; PPC32-NEXT:    stwu r1, -32(r1)
-; PPC32-NEXT:    stw r0, 36(r1)
-; PPC32-NEXT:    stfd f31, 24(r1) # 8-byte Folded Spill
-; PPC32-NEXT:    fmr f31, f2
-; PPC32-NEXT:    bl __truncsfhf2
-; PPC32-NEXT:    clrlwi r3, r3, 16
-; PPC32-NEXT:    bl __extendhfsf2
-; PPC32-NEXT:    stfs f31, 20(r1)
-; PPC32-NEXT:    lwz r3, 20(r1)
-; PPC32-NEXT:    srwi r3, r3, 31
-; PPC32-NEXT:    andi. r3, r3, 1
-; PPC32-NEXT:    bc 12, gt, .LBB26_2
-; PPC32-NEXT:  # %bb.1:
-; PPC32-NEXT:    fabs f1, f1
-; PPC32-NEXT:    b .LBB26_3
-; PPC32-NEXT:  .LBB26_2:
-; PPC32-NEXT:    fnabs f1, f1
-; PPC32-NEXT:  .LBB26_3:
-; PPC32-NEXT:    lfd f31, 24(r1) # 8-byte Folded Reload
-; PPC32-NEXT:    lwz r0, 36(r1)
-; PPC32-NEXT:    addi r1, r1, 32
-; PPC32-NEXT:    mtlr r0
+; PPC32-NEXT:    rlwimi r3, r4, 0, 0, 16
 ; PPC32-NEXT:    blr
 ;
 ; P8-LABEL: fcopysign:
 ; P8:       # %bb.0:
-; P8-NEXT:    mflr r0
-; P8-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
-; P8-NEXT:    stdu r1, -48(r1)
-; P8-NEXT:    std r0, 64(r1)
-; P8-NEXT:    fmr f31, f2
-; P8-NEXT:    bl __truncsfhf2
-; P8-NEXT:    nop
-; P8-NEXT:    clrldi r3, r3, 48
-; P8-NEXT:    bl __extendhfsf2
-; P8-NEXT:    nop
-; P8-NEXT:    fcpsgn f1, f31, f1
-; P8-NEXT:    addi r1, r1, 48
-; P8-NEXT:    ld r0, 16(r1)
-; P8-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
-; P8-NEXT:    mtlr r0
+; P8-NEXT:    rotldi r4, r4, 49
+; P8-NEXT:    clrldi r3, r3, 49
+; P8-NEXT:    rldimi r3, r4, 15, 32
 ; P8-NEXT:    blr
 ;
 ; CHECK-LABEL: fcopysign:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xscvdphp f0, f1
-; CHECK-NEXT:    mffprwz r3, f0
-; CHECK-NEXT:    clrlwi r3, r3, 16
-; CHECK-NEXT:    mtfprwz f0, r3
-; CHECK-NEXT:    xscvhpdp f0, f0
-; CHECK-NEXT:    fcpsgn f1, f2, f0
+; CHECK-NEXT:    rotldi r4, r4, 49
+; CHECK-NEXT:    clrldi r3, r3, 49
+; CHECK-NEXT:    rldimi r3, r4, 15, 32
 ; CHECK-NEXT:    blr
 ;
 ; SOFT-LABEL: fcopysign:
 ; SOFT:       # %bb.0:
-; SOFT-NEXT:    mflr r0
-; SOFT-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; SOFT-NEXT:    stdu r1, -48(r1)
-; SOFT-NEXT:    clrldi r3, r3, 48
-; SOFT-NEXT:    std r0, 64(r1)
-; SOFT-NEXT:    mr r30, r4
-; SOFT-NEXT:    bl __extendhfsf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    rlwimi r3, r30, 16, 0, 0
-; SOFT-NEXT:    clrldi r3, r3, 32
-; SOFT-NEXT:    bl __truncsfhf2
-; SOFT-NEXT:    nop
-; SOFT-NEXT:    addi r1, r1, 48
-; SOFT-NEXT:    ld r0, 16(r1)
-; SOFT-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
-; SOFT-NEXT:    mtlr r0
+; SOFT-NEXT:    rotldi r4, r4, 49
+; SOFT-NEXT:    clrldi r3, r3, 49
+; SOFT-NEXT:    rldimi r3, r4, 15, 32
 ; SOFT-NEXT:    blr
 ;
 ; BE-LABEL: fcopysign:
 ; BE:       # %bb.0:
-; BE-NEXT:    mflr r0
-; BE-NEXT:    stdu r1, -128(r1)
-; BE-NEXT:    std r0, 144(r1)
-; BE-NEXT:    stfd f31, 120(r1) # 8-byte Folded Spill
-; BE-NEXT:    fmr f31, f2
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r3, 48
-; BE-NEXT:    bl __extendhfsf2
-; BE-NEXT:    nop
-; BE-NEXT:    stfs f31, 116(r1)
-; BE-NEXT:    lwz r3, 116(r1)
-; BE-NEXT:    srwi r3, r3, 31
-; BE-NEXT:    andi. r3, r3, 1
-; BE-NEXT:    bc 12, gt, .LBB26_2
-; BE-NEXT:  # %bb.1:
-; BE-NEXT:    fabs f1, f1
-; BE-NEXT:    b .LBB26_3
-; BE-NEXT:  .LBB26_2:
-; BE-NEXT:    fnabs f1, f1
-; BE-NEXT:  .LBB26_3:
-; BE-NEXT:    lfd f31, 120(r1) # 8-byte Folded Reload
-; BE-NEXT:    addi r1, r1, 128
-; BE-NEXT:    ld r0, 16(r1)
-; BE-NEXT:    mtlr r0
+; BE-NEXT:    rotldi r4, r4, 49
+; BE-NEXT:    clrldi r3, r3, 49
+; BE-NEXT:    rldimi r3, r4, 15, 32
 ; BE-NEXT:    blr
   %a = call half @llvm.copysign.f16(half %x, half %y)
   ret half %a
diff --git a/llvm/test/CodeGen/PowerPC/ldexp.ll b/llvm/test/CodeGen/PowerPC/ldexp.ll
index 8d7253b5ce8e3..3c8439683cba5 100644
--- a/llvm/test/CodeGen/PowerPC/ldexp.ll
+++ b/llvm/test/CodeGen/PowerPC/ldexp.ll
@@ -143,15 +143,15 @@ define half @ldexp_f16(half %arg0, i32 %arg1) nounwind {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -32(r1)
+; CHECK-NEXT:    clrlwi r3, r3, 16
 ; CHECK-NEXT:    std r0, 48(r1)
-; CHECK-NEXT:    xscvdphp f0, f1
 ; CHECK-NEXT:    extsw r4, r4
-; CHECK-NEXT:    mffprwz r3, f0
-; CHECK-NEXT:    clrlwi r3, r3, 16
 ; CHECK-NEXT:    mtfprwz f0, r3
 ; CHECK-NEXT:    xscvhpdp f1, f0
 ; CHECK-NEXT:    bl ldexpf
 ; CHECK-NEXT:    nop
+; CHECK-NEXT:    xscvdphp f0, f1
+; CHECK-NEXT:    mffprwz r3, f0
 ; CHECK-NEXT:    addi r1, r1, 32
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
diff --git a/llvm/test/CodeGen/PowerPC/llvm.frexp.ll b/llvm/test/CodeGen/PowerPC/llvm.frexp.ll
index 95d763d7179ed..b0f9fd47a1e54 100644
--- a/llvm/test/CodeGen/PowerPC/llvm.frexp.ll
+++ b/llvm/test/CodeGen/PowerPC/llvm.frexp.ll
@@ -7,16 +7,16 @@ define { half, i32 } @test_frexp_f16_i32(half %a) nounwind {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -48(r1)
+; CHECK-NEXT:    clrlwi r3, r3, 16
 ; CHECK-NEXT:    std r0, 64(r1)
-; CHECK-NEXT:    xscvdphp f0, f1
 ; CHECK-NEXT:    addi r4, r1, 44
-; CHECK-NEXT:    mffprwz r3, f0
-; CHECK-NEXT:    clrlwi r3, r3, 16
 ; CHECK-NEXT:    mtfprwz f0, r3
 ; CHECK-NEXT:    xscvhpdp f1, f0
 ; CHECK-NEXT:    bl frexpf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    lwz r3, 44(r1)
+; CHECK-NEXT:    xscvdphp f0, f1
+; CHECK-NEXT:    lwz r4, 44(r1)
+; CHECK-NEXT:    mffprwz r3, f0
 ; CHECK-NEXT:    addi r1, r1, 48
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
@@ -30,15 +30,15 @@ define half @test_frexp_f16_i32_only_use_fract(half %a) nounwind {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -48(r1)
+; CHECK-NEXT:    clrlwi r3, r3, 16
 ; CHECK-NEXT:    std r0, 64(r1)
-; CHECK-NEXT:    xscvdphp f0, f1
 ; CHECK-NEXT:    addi r4, r1, 44
-; CHECK-NEXT:    mffprwz r3, f0
-; CHECK-NEXT:    clrlwi r3, r3, 16
 ; CHECK-NEXT:    mtfprwz f0, r3
 ; CHECK-NEXT:    xscvhpdp f1, f0
 ; CHECK-NEXT:    bl frexpf
 ; CHECK-NEXT:    nop
+; CHECK-NEXT:    xscvdphp f0, f1
+; CHECK-NEXT:    mffprwz r3, f0
 ; CHECK-NEXT:    addi r1, r1, 48
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
@@ -53,11 +53,9 @@ define i32 @test_frexp_f16_i32_only_use_exp(half %a) nounwind {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -48(r1)
+; CHECK-NEXT:    clrlwi r3, r3, 16
 ; CHECK-NEXT:    std r0, 64(r1)
-; CHECK-NEXT:    xscvdphp f0, f1
 ; CHECK-NEXT:    addi r4, r1, 44
-; CHECK-NEXT:    mffprwz r3, f0
-; CHECK-NEXT:    clrlwi r3, r3, 16
 ; CHECK-NEXT:    mtfprwz f0, r3
 ; CHECK-NEXT:    xscvhpdp f1, f0
 ; CHECK-NEXT:    bl frexpf
@@ -76,43 +74,42 @@ define { <2 x half>, <2 x i32> } @test_frexp_v2f16_v2i32(<2 x half> %a) nounwind
 ; CHECK-LABEL: test_frexp_v2f16_v2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    std r29, -40(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r30, -32(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stdu r1, -80(r1)
+; CHECK-NEXT:    clrlwi r29, r3, 16
+; CHECK-NEXT:    clrlwi r3, r4, 16
+; CHECK-NEXT:    addi r30, r1, 44
+; CHECK-NEXT:    mtfprwz f0, r3
 ; CHECK-NEXT:    std r0, 96(r1)
-; CHECK-NEXT:    xscvdphp f0, f2
-; CHECK-NEXT:    addi r30, r1, 32
 ; CHECK-NEXT:    mr r4, r30
-; CHECK-NEXT:    mffprwz r3, f0
-; CHECK-NEXT:    clrlwi r3, r3, 16
-; CHECK-NEXT:    mtfprwz f0, r3
-; CHECK-NEXT:    xscvhpdp f31, f0
-; CHECK-NEXT:    xscvdphp f0, f1
-; CHECK-NEXT:    mffprwz r3, f0
-; CHECK-NEXT:    clrlwi r3, r3, 16
-; CHECK-NEXT:    mtfprwz f0, r3
 ; CHECK-NEXT:    xscvhpdp f1, f0
 ; CHECK-NEXT:    bl frexpf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    addi r29, r1, 36
-; CHECK-NEXT:    fmr f30, f1
-; CHECK-NEXT:    fmr f1, f31
+; CHECK-NEXT:    xscvdphp f0, f1
+; CHECK-NEXT:    mffprwz r3, f0
+; CHECK-NEXT:    mtfprwz f0, r29
+; CHECK-NEXT:    addi r29, r1, 40
+; CHECK-NEXT:    sth r3, 50(r1)
+; CHECK-NEXT:    xscvhpdp f1, f0
 ; CHECK-NEXT:    mr r4, r29
 ; CHECK-NEXT:    bl frexpf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f2, f1
-; CHECK-NEXT:    lfiwzx f0, 0, r30
-; CHECK-NEXT:    lfiwzx f1, 0, r29
+; CHECK-NEXT:    xscvdphp f0, f1
+; CHECK-NEXT:    li r4, 2
+; CHECK-NEXT:    mffprwz r3, f0
+; CHECK-NEXT:    sth r3, 48(r1)
+; CHECK-NEXT:    li r3, 0
+; CHECK-NEXT:    lxv v3, 48(r1)
+; CHECK-NEXT:    lfiwzx f0, 0, r29
+; CHECK-NEXT:    lfiwzx f1, 0, r30
 ; CHECK-NEXT:    xxmrghw v2, vs1, vs0
-; CHECK-NEXT:    fmr f1, f30
+; CHECK-NEXT:    vextuhrx r3, r3, v3
+; CHECK-NEXT:    vextuhrx r4, r4, v3
 ; CHECK-NEXT:    addi r1, r1, 80
 ; CHECK-NEXT:    ld r0, 16(r1)
-; CHECK-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r30, -32(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r29, -40(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    mtlr r0
 ; CHECK-NEXT:    blr
   %result = call { <2 x half>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x half> %a)
@@ -123,34 +120,35 @@ define <2 x half> @test_frexp_v2f16_v2i32_only_use_fract(<2 x half> %a) nounwind
 ; CHECK-LABEL: test_frexp_v2f16_v2i32_only_use_fract:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stdu r1, -64(r1)
-; CHECK-NEXT:    std r0, 80(r1)
-; CHECK-NEXT:    xscvdphp f0, f2
-; CHECK-NEXT:    addi r4, r1, 40
-; CHECK-NEXT:    mffprwz r3, f0
-; CHECK-NEXT:    clrlwi r3, r3, 16
-; CHECK-NEXT:    mtfprwz f0, r3
-; CHECK-NEXT:    xscvhpdp f31, f0
-; CHECK-NEXT:    xscvdphp f0, f1
-; CHECK-NEXT:    mffprwz r3, f0
-; CHECK-NEXT:    clrlwi r3, r3, 16
+; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    stdu r1, -80(r1)
+; CHECK-NEXT:    clrlwi r30, r3, 16
+; CHECK-NEXT:    clrlwi r3, r4, 16
+; CHECK-NEXT:    addi r4, r1, 44
 ; CHECK-NEXT:    mtfprwz f0, r3
+; CHECK-NEXT:    std r0, 96(r1)
 ; CHECK-NEXT:    xscvhpdp f1, f0
 ; CHECK-NEXT:    bl frexpf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    addi r4, r1, 44
-; CHECK-NEXT:    fmr f30, f1
-; CHECK-NEXT:    fmr f1, f31
+; CHECK-NEXT:    xscvdphp f0, f1
+; CHECK-NEXT:    addi r4, r1, 40
+; CHECK-NEXT:    mffprwz r3, f0
+; CHECK-NEXT:    mtfprwz f0, r30
+; CHECK-NEXT:    sth r3, 50(r1)
+; CHECK-NEXT:    xscvhpdp f1, f0
 ; CHECK-NEXT:    bl frexpf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f2, f1
-; CHECK-NEXT:    fmr f1, f30
-; CHECK-NEXT:    addi r1, r1, 64
+; CHECK-NEXT:    xscvdphp f0, f1
+; CHECK-NEXT:    li r4, 2
+; CHECK-NEXT:    mffprwz r3, f0
+; CHECK-NEXT:    sth r3, 48(r1)
+; CHECK-NEXT:    li r3, 0
+; CHECK-NEXT:    lxv v2, 48(r1)
+; CHECK-NEXT:    vextuhrx r3, r3, v2
+; CHECK-NEXT:    vextuhrx r4, r4, v2
+; CHECK-NEXT:    addi r1, r1, 80
 ; CHECK-NEXT:    ld r0, 16(r1)
-; CHECK-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    mtlr r0
 ; CHECK-NEXT:    blr
   %result = call { <2 x half>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x half> %a)
@@ -162,38 +160,31 @@ define <2 x i32> @test_frexp_v2f16_v2i32_only_use_exp(<2 x half> %a) nounwind {
 ; CHECK-LABEL: test_frexp_v2f16_v2i32_only_use_exp:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    std r29, -32(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r30, -24(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stdu r1, -80(r1)
-; CHECK-NEXT:    std r0, 96(r1)
-; CHECK-NEXT:    xscvdphp f0, f2
-; CHECK-NEXT:    addi r30, r1, 40
-; CHECK-NEXT:    mr r4, r30
-; CHECK-NEXT:    mffprwz r3, f0
-; CHECK-NEXT:    clrlwi r3, r3, 16
-; CHECK-NEXT:    mtfprwz f0, r3
-; CHECK-NEXT:    xscvhpdp f31, f0
-; CHECK-NEXT:    xscvdphp f0, f1
-; CHECK-NEXT:    mffprwz r3, f0
+; CHECK-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    stdu r1, -64(r1)
 ; CHECK-NEXT:    clrlwi r3, r3, 16
+; CHECK-NEXT:    std r0, 80(r1)
+; CHECK-NEXT:    addi r30, r1, 32
 ; CHECK-NEXT:    mtfprwz f0, r3
+; CHECK-NEXT:    clrlwi r29, r4, 16
+; CHECK-NEXT:    mr r4, r30
 ; CHECK-NEXT:    xscvhpdp f1, f0
 ; CHECK-NEXT:    bl frexpf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    addi r29, r1, 44
-; CHECK-NEXT:    fmr f1, f31
+; CHECK-NEXT:    mtfprwz f0, r29
+; CHECK-NEXT:    addi r29, r1, 36
+; CHECK-NEXT:    xscvhpdp f1, f0
 ; CHECK-NEXT:    mr r4, r29
 ; CHECK-NEXT:    bl frexpf
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    lfiwzx f0, 0, r30
 ; CHECK-NEXT:    lfiwzx f1, 0, r29
 ; CHECK-NEXT:    xxmrghw v2, vs1, vs0
-; CHECK-NEXT:    addi r1, r1, 80
+; CHECK-NEXT:    addi r1, r1, 64
 ; CHECK-NEXT:    ld r0, 16(r1)
-; CHECK-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r30, -24(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r29, -32(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    mtlr r0
 ; CHECK-NEXT:    blr
   %result = call { <2 x half>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x half> %a)
diff --git a/llvm/test/CodeGen/PowerPC/llvm.modf.ll b/llvm/test/CodeGen/PowerPC/llvm.modf.ll
index 1b137c786cc91..fa9082278826c 100644
--- a/llvm/test/CodeGen/PowerPC/llvm.modf.ll
+++ b/llvm/test/CodeGen/PowerPC/llvm.modf.ll
@@ -10,15 +10,17 @@ define { half, half } @test_modf_f16(half %a) {
 ; CHECK-NEXT:    std r0, 64(r1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    xscvdphp f0, f1
-; CHECK-NEXT:    addi r4, r1, 44
-; CHECK-NEXT:    mffprwz r3, f0
 ; CHECK-NEXT:    clrlwi r3, r3, 16
+; CHECK-NEXT:    addi r4, r1, 44
 ; CHECK-NEXT:    mtfprwz f0, r3
 ; CHECK-NEXT:    xscvhpdp f1, f0
 ; CHECK-NEXT:    bl modff
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    lfs f2, 44(r1)
+; CHECK-NEXT:    xscvdphp f0, f1
+; CHECK-NEXT:    mffprwz r3, f0
+; CHECK-NEXT:    lfs f0, 44(r1)
+; CHECK-NEXT:    xscvdphp f0, f0
+; CHECK-NEXT:    mffprwz r4, f0
 ; CHECK-NEXT:    addi r1, r1, 48
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
@@ -35,14 +37,14 @@ define half @test_modf_f16_only_use_fractional_part(half %a) {
 ; CHECK-NEXT:    std r0, 64(r1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    xscvdphp f0, f1
-; CHECK-NEXT:    addi r4, r1, 44
-; CHECK-NEXT:    mffprwz r3, f0
 ; CHECK-NEXT:    clrlwi r3, r3, 16
+; CHECK-NEXT:    addi r4, r1, 44
 ; CHECK-NEXT:    mtfprwz f0, r3
 ; CHECK-NEXT:    xscvhpdp f1, f0
 ; CHECK-NEXT:    bl modff
 ; CHECK-NEXT:    nop
+; CHECK-NEXT:    xscvdphp f0, f1
+; CHECK-NEXT:    mffprwz r3, f0
 ; CHECK-NEXT:    addi r1, r1, 48
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
@@ -60,15 +62,15 @@ define half @test_modf_f16_only_use_integral_part(half %a) {
 ; CHECK-NEXT:    std r0, 64(r1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    xscvdphp f0, f1
-; CHECK-NEXT:    addi r4, r1, 44
-; CHECK-NEXT:    mffprwz r3, f0
 ; CHECK-NEXT:    clrlwi r3, r3, 16
+; CHECK-NEXT:    addi r4, r1, 44
 ; CHECK-NEXT:    mtfprwz f0, r3
 ; CHECK-NEXT:    xscvhpdp f1, f0
 ; CHECK-NEXT:    bl modff
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    lfs f1, 44(r1)
+; CHECK-NEXT:    lfs f0, 44(r1)
+; CHECK-NEXT:    xscvdphp f0, f0
+; CHECK-NEXT:    mffprwz r3, f0
 ; CHECK-NEXT:    addi r1, r1, 48
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
@@ -82,40 +84,53 @@ define { <2 x half>, <2 x half> } @test_modf_v2f16(<2 x half> %a) {
 ; CHECK-LABEL: test_modf_v2f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    .cfi_def_cfa_offset 96
 ; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    .cfi_offset f30, -16
+; CHECK-NEXT:    .cfi_offset r30, -24
 ; CHECK-NEXT:    .cfi_offset f31, -8
-; CHECK-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r30, -24(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stdu r1, -64(r1)
-; CHECK-NEXT:    std r0, 80(r1)
-; CHECK-NEXT:    xscvdphp f0, f2
-; CHECK-NEXT:    addi r4, r1, 40
-; CHECK-NEXT:    mffprwz r3, f0
-; CHECK-NEXT:    clrlwi r3, r3, 16
-; CHECK-NEXT:    mtfprwz f0, r3
-; CHECK-NEXT:    xscvhpdp f31, f0
-; CHECK-NEXT:    xscvdphp f0, f1
-; CHECK-NEXT:    mffprwz r3, f0
-; CHECK-NEXT:    clrlwi r3, r3, 16
+; CHECK-NEXT:    stdu r1, -96(r1)
+; CHECK-NEXT:    clrlwi r30, r3, 16
+; CHECK-NEXT:    clrlwi r3, r4, 16
+; CHECK-NEXT:    addi r4, r1, 44
 ; CHECK-NEXT:    mtfprwz f0, r3
+; CHECK-NEXT:    std r0, 112(r1)
 ; CHECK-NEXT:    xscvhpdp f1, f0
 ; CHECK-NEXT:    bl modff
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    addi r4, r1, 44
-; CHECK-NEXT:    fmr f30, f1
-; CHECK-NEXT:    fmr f1, f31
+; CHECK-NEXT:    lfs f0, 44(r1)
+; CHECK-NEXT:    addi r4, r1, 40
+; CHECK-NEXT:    xscvdphp f0, f0
+; CHECK-NEXT:    fmr f31, f1
+; CHECK-NEXT:    mffprwz r3, f0
+; CHECK-NEXT:    mtfprwz f0, r30
+; CHECK-NEXT:    sth r3, 50(r1)
+; CHECK-NEXT:    xscvhpdp f1, f0
 ; CHECK-NEXT:    bl modff
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    lfs f3, 40(r1)
-; CHECK-NEXT:    fmr f2, f1
-; CHECK-NEXT:    fmr f1, f30
-; CHECK-NEXT:    lfs f4, 44(r1)
-; CHECK-NEXT:    addi r1, r1, 64
+; CHECK-NEXT:    lfs f0, 40(r1)
+; CHECK-NEXT:    li r5, 0
+; CHECK-NEXT:    li r6, 2
+; CHECK-NEXT:    xscvdphp f0, f0
+; CHECK-NEXT:    mffprwz r3, f0
+; CHECK-NEXT:    sth r3, 48(r1)
+; CHECK-NEXT:    xscvdphp f0, f31
+; CHECK-NEXT:    mffprwz r3, f0
+; CHECK-NEXT:    sth r3, 66(r1)
+; CHECK-NEXT:    xscvdphp f0, f1
+; CHECK-NEXT:    lxv v2, 48(r1)
+; CHECK-NEXT:    mffprwz r3, f0
+; CHECK-NEXT:    sth r3, 64(r1)
+; CHECK-NEXT:    lxv v3, 64(r1)
+; CHECK-NEXT:    vextuhrx r3, r5, v3
+; CHECK-NEXT:    vextuhrx r4, r6, v3
+; CHECK-NEXT:    vextuhrx r5, r5, v2
+; CHECK-NEXT:    vextuhrx r6, r6, v2
+; CHECK-NEXT:    addi r1, r1, 96
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r30, -24(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    mtlr r0
 ; CHECK-NEXT:    blr
   %result = call { <2 x half>, <2 x half> } @llvm.modf.v2f16(<2 x half> %a)
diff --git a/llvm/test/CodeGen/PowerPC/pr48519.ll b/llvm/test/CodeGen/PowerPC/pr48519.ll
index b610f12159ee2..61a8ebe49e6dd 100644
--- a/llvm/test/CodeGen/PowerPC/pr48519.ll
+++ b/llvm/test/CodeGen/PowerPC/pr48519.ll
@@ -12,26 +12,21 @@ define void @julia__typed_vcat_20() #0 {
 ; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stdu r1, -48(r1)
 ; CHECK-NEXT:    li r30, 0
-; CHECK-NEXT:    li r3, 1
+; CHECK-NEXT:    li r4, 1
 ; CHECK-NEXT:    std r0, 64(r1)
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB0_1: # %bb3
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    addi r3, r3, -1
+; CHECK-NEXT:    addi r3, r4, -1
 ; CHECK-NEXT:    mtfprd f0, r3
 ; CHECK-NEXT:    xscvsxdsp f1, f0
 ; CHECK-NEXT:    bl __truncsfhf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r3, 48
-; CHECK-NEXT:    bl __extendhfsf2
-; CHECK-NEXT:    nop
 ; CHECK-NEXT:    addi r30, r30, -1
-; CHECK-NEXT:    li r3, 0
+; CHECK-NEXT:    li r4, 0
 ; CHECK-NEXT:    cmpldi r30, 0
 ; CHECK-NEXT:    bc 12, gt, .LBB0_1
 ; CHECK-NEXT:  # %bb.2: # %bb11
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
 ; CHECK-NEXT:    sth r3, 128(0)
 ;
 ; CHECK-P9-LABEL: julia__typed_vcat_20:
@@ -39,23 +34,18 @@ define void @julia__typed_vcat_20() #0 {
 ; CHECK-P9-NEXT:    li r3, 0
 ; CHECK-P9-NEXT:    mtctr r3
 ; CHECK-P9-NEXT:    li r3, 1
-; CHECK-P9-NEXT:    .p2align 4
+; CHECK-P9-NEXT:    .p2align 5
 ; CHECK-P9-NEXT:  .LBB0_1: # %bb3
 ; CHECK-P9-NEXT:    #
 ; CHECK-P9-NEXT:    addi r3, r3, -1
 ; CHECK-P9-NEXT:    mtfprd f0, r3
+; CHECK-P9-NEXT:    li r3, 0
 ; CHECK-P9-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P9-NEXT:    xscvdphp f0, f0
-; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    clrlwi r3, r3, 16
-; CHECK-P9-NEXT:    mtfprwz f0, r3
-; CHECK-P9-NEXT:    li r3, 0
-; CHECK-P9-NEXT:    xscvhpdp f0, f0
 ; CHECK-P9-NEXT:    bdnz .LBB0_1
 ; CHECK-P9-NEXT:  # %bb.2: # %bb11
-; CHECK-P9-NEXT:    xscvdphp f0, f0
-; CHECK-P9-NEXT:    li r3, 128
-; CHECK-P9-NEXT:    stxsihx f0, 0, r3
+; CHECK-P9-NEXT:    mffprwz r3, f0
+; CHECK-P9-NEXT:    sth r3, 128(0)
 bb:
   %i = load i64, ptr addrspace(11) null, align 8
   %i1 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %i, i64 0)
@@ -147,54 +137,33 @@ bb9:                                              ; preds = %bb3, %bb1
 define void @func_48786() #0 {
 ; CHECK-LABEL: func_48786:
 ; CHECK:       # %bb.0: # %bb
-; CHECK-NEXT:    mfocrf r12, 32
-; CHECK-NEXT:    stw r12, 8(r1)
-; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    stdu r1, -48(r1)
-; CHECK-NEXT:    std r0, 64(r1)
-; CHECK-NEXT:    std r30, 32(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    # implicit-def: $x30
 ; CHECK-NEXT:    ld r3, 0(r3)
 ; CHECK-NEXT:    cmpdi r3, 0
-; CHECK-NEXT:    crnot 4*cr2+lt, eq
+; CHECK-NEXT:    mtctr r3
+; CHECK-NEXT:    crnot 4*cr5+lt, eq
 ; CHECK-NEXT:    b .LBB2_2
-; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB2_1: # %bb10
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    addi r30, r30, -1
-; CHECK-NEXT:    cmpldi r30, 0
-; CHECK-NEXT:    bc 4, gt, .LBB2_5
+; CHECK-NEXT:    bdzlr
 ; CHECK-NEXT:  .LBB2_2: # %bb2
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    bc 12, 4*cr5+lt, .LBB2_1
 ; CHECK-NEXT:  # %bb.3: # %bb4
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    lhz r3, 0(r3)
-; CHECK-NEXT:    bl __extendhfsf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    bc 4, 4*cr2+lt, .LBB2_6
+; CHECK-NEXT:    bc 4, 4*cr5+lt, .LBB2_5
 ; CHECK-NEXT:  # %bb.4: # %bb8
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
+; CHECK-NEXT:    lhz r3, 0(r3)
 ; CHECK-NEXT:    sth r3, 0(0)
 ; CHECK-NEXT:    b .LBB2_1
-; CHECK-NEXT:  .LBB2_5: # %bb14
-; CHECK-NEXT:    ld r30, 32(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    addi r1, r1, 48
-; CHECK-NEXT:    ld r0, 16(r1)
-; CHECK-NEXT:    lwz r12, 8(r1)
-; CHECK-NEXT:    mtlr r0
-; CHECK-NEXT:    mtocrf 32, r12
-; CHECK-NEXT:    blr
-; CHECK-NEXT:  .LBB2_6: # %bb15
+; CHECK-NEXT:  .LBB2_5: # %bb15
 ;
 ; CHECK-P9-LABEL: func_48786:
 ; CHECK-P9:       # %bb.0: # %bb
 ; CHECK-P9-NEXT:    ld r3, 0(r3)
 ; CHECK-P9-NEXT:    cmpdi r3, 0
 ; CHECK-P9-NEXT:    mtctr r3
-; CHECK-P9-NEXT:    li r3, 0
 ; CHECK-P9-NEXT:    crnot 4*cr5+lt, eq
 ; CHECK-P9-NEXT:    b .LBB2_2
 ; CHECK-P9-NEXT:    .p2align 5
@@ -206,13 +175,11 @@ define void @func_48786() #0 {
 ; CHECK-P9-NEXT:    bc 12, 4*cr5+lt, .LBB2_1
 ; CHECK-P9-NEXT:  # %bb.3: # %bb4
 ; CHECK-P9-NEXT:    #
-; CHECK-P9-NEXT:    lxsihzx f0, 0, r3
-; CHECK-P9-NEXT:    xscvhpdp f0, f0
 ; CHECK-P9-NEXT:    bc 4, 4*cr5+lt, .LBB2_5
 ; CHECK-P9-NEXT:  # %bb.4: # %bb8
 ; CHECK-P9-NEXT:    #
-; CHECK-P9-NEXT:    xscvdphp f0, f0
-; CHECK-P9-NEXT:    stxsihx f0, 0, r3
+; CHECK-P9-NEXT:    lhz r3, 0(r3)
+; CHECK-P9-NEXT:    sth r3, 0(0)
 ; CHECK-P9-NEXT:    b .LBB2_1
 ; CHECK-P9-NEXT:  .LBB2_5: # %bb15
 bb:
@@ -260,41 +227,29 @@ bb15:                                             ; preds = %bb5
 define void @func_48785(half %arg) #0 {
 ; CHECK-LABEL: func_48785:
 ; CHECK:       # %bb.0: # %bb
-; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    std r29, -32(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r30, -24(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stdu r1, -64(r1)
-; CHECK-NEXT:    fmr f31, f1
-; CHECK-NEXT:    li r3, 1
-; CHECK-NEXT:    li r29, 0
-; CHECK-NEXT:    std r0, 80(r1)
-; CHECK-NEXT:    rldic r30, r3, 62, 1
-; CHECK-NEXT:    .p2align 5
+; CHECK-NEXT:    li r4, 1
+; CHECK-NEXT:    rldic r4, r4, 62, 1
+; CHECK-NEXT:    mtctr r4
+; CHECK-NEXT:    li r4, 0
+; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB3_1: # %bb1
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    fmr f1, f31
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    addi r30, r30, -1
-; CHECK-NEXT:    sth r3, 0(r29)
-; CHECK-NEXT:    addi r29, r29, 24
-; CHECK-NEXT:    cmpldi r30, 0
-; CHECK-NEXT:    bc 12, gt, .LBB3_1
+; CHECK-NEXT:    sth r3, 0(r4)
+; CHECK-NEXT:    addi r4, r4, 24
+; CHECK-NEXT:    bdnz .LBB3_1
 ; CHECK-NEXT:  # %bb.2: # %bb5
 ;
 ; CHECK-P9-LABEL: func_48785:
 ; CHECK-P9:       # %bb.0: # %bb
-; CHECK-P9-NEXT:    li r3, 1
-; CHECK-P9-NEXT:    rldic r3, r3, 62, 1
-; CHECK-P9-NEXT:    mtctr r3
-; CHECK-P9-NEXT:    li r3, 0
+; CHECK-P9-NEXT:    li r4, 1
+; CHECK-P9-NEXT:    rldic r4, r4, 62, 1
+; CHECK-P9-NEXT:    mtctr r4
+; CHECK-P9-NEXT:    li r4, 0
 ; CHECK-P9-NEXT:    .p2align 4
 ; CHECK-P9-NEXT:  .LBB3_1: # %bb1
 ; CHECK-P9-NEXT:    #
-; CHECK-P9-NEXT:    xscvdphp f0, f1
-; CHECK-P9-NEXT:    stxsihx f0, 0, r3
-; CHECK-P9-NEXT:    addi r3, r3, 24
+; CHECK-P9-NEXT:    sth r3, 0(r4)
+; CHECK-P9-NEXT:    addi r4, r4, 24
 ; CHECK-P9-NEXT:    bdnz .LBB3_1
 ; CHECK-P9-NEXT:  # %bb.2: # %bb5
 bb:
diff --git a/llvm/test/CodeGen/PowerPC/pr49092.ll b/llvm/test/CodeGen/PowerPC/pr49092.ll
index 7b524a6d2f69b..3c028e9005ee6 100644
--- a/llvm/test/CodeGen/PowerPC/pr49092.ll
+++ b/llvm/test/CodeGen/PowerPC/pr49092.ll
@@ -8,26 +8,14 @@
 define dso_local half @test2(i64 %a, i64 %b) local_unnamed_addr #0 {
 ; CHECK-LABEL: test2:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    stdu r1, -32(r1)
 ; CHECK-NEXT:    add r3, r4, r3
-; CHECK-NEXT:    std r0, 48(r1)
 ; CHECK-NEXT:    addi r3, r3, 11
-; CHECK-NEXT:    clrlwi r3, r3, 16
-; CHECK-NEXT:    bl __extendhfsf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    addi r1, r1, 32
-; CHECK-NEXT:    ld r0, 16(r1)
-; CHECK-NEXT:    mtlr r0
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test2:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    add r3, r4, r3
 ; CHECK-P9-NEXT:    addi r3, r3, 11
-; CHECK-P9-NEXT:    clrlwi r3, r3, 16
-; CHECK-P9-NEXT:    mtfprwz f0, r3
-; CHECK-P9-NEXT:    xscvhpdp f1, f0
 ; CHECK-P9-NEXT:    blr
 entry:
   %add = add i64 %b, %a
diff --git a/llvm/test/CodeGen/PowerPC/vector-llrint.ll b/llvm/test/CodeGen/PowerPC/vector-llrint.ll
index 8a9e48e002381..fef9c039c043e 100644
--- a/llvm/test/CodeGen/PowerPC/vector-llrint.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-llrint.ll
@@ -18,10 +18,8 @@ define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) nounwind {
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
 ; BE-NEXT:    stdu r1, -112(r1)
-; BE-NEXT:    std r0, 128(r1)
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
 ; BE-NEXT:    clrldi r3, r3, 48
+; BE-NEXT:    std r0, 128(r1)
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
 ; BE-NEXT:    bl llrintf
@@ -35,10 +33,8 @@ define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) nounwind {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -32(r1)
-; CHECK-NEXT:    std r0, 48(r1)
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
 ; CHECK-NEXT:    clrldi r3, r3, 48
+; CHECK-NEXT:    std r0, 48(r1)
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    bl llrintf
@@ -52,10 +48,8 @@ define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) nounwind {
 ; FAST:       # %bb.0:
 ; FAST-NEXT:    mflr r0
 ; FAST-NEXT:    stdu r1, -32(r1)
-; FAST-NEXT:    std r0, 48(r1)
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
 ; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    std r0, 48(r1)
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
 ; FAST-NEXT:    fctid f0, f1
@@ -73,37 +67,26 @@ define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) nounwind {
 ; BE-LABEL: llrint_v1i64_v2f16:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
-; BE-NEXT:    stdu r1, -160(r1)
-; BE-NEXT:    std r0, 176(r1)
-; BE-NEXT:    stfd f31, 152(r1) # 8-byte Folded Spill
-; BE-NEXT:    fmr f31, f1
-; BE-NEXT:    fmr f1, f2
-; BE-NEXT:    std r30, 136(r1) # 8-byte Folded Spill
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f31
+; BE-NEXT:    stdu r1, -144(r1)
+; BE-NEXT:    std r0, 160(r1)
+; BE-NEXT:    std r30, 128(r1) # 8-byte Folded Spill
 ; BE-NEXT:    mr r30, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r3, 48
-; BE-NEXT:    bl __extendhfsf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r30, 48
-; BE-NEXT:    fmr f31, f1
+; BE-NEXT:    clrldi r3, r4, 48
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
 ; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f31
 ; BE-NEXT:    std r3, 120(r1)
+; BE-NEXT:    clrldi r3, r30, 48
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
 ; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
 ; BE-NEXT:    std r3, 112(r1)
 ; BE-NEXT:    addi r3, r1, 112
-; BE-NEXT:    ld r30, 136(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f31, 152(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r30, 128(r1) # 8-byte Folded Reload
 ; BE-NEXT:    lxvd2x v2, 0, r3
-; BE-NEXT:    addi r1, r1, 160
+; BE-NEXT:    addi r1, r1, 144
 ; BE-NEXT:    ld r0, 16(r1)
 ; BE-NEXT:    mtlr r0
 ; BE-NEXT:    blr
@@ -112,35 +95,28 @@ define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) nounwind {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -96(r1)
-; CHECK-NEXT:    li r3, 48
-; CHECK-NEXT:    std r0, 112(r1)
-; CHECK-NEXT:    std r30, 72(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd f31, 88(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f31, f2
-; CHECK-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f31
-; CHECK-NEXT:    mr r30, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
+; CHECK-NEXT:    li r5, 48
 ; CHECK-NEXT:    clrldi r3, r3, 48
+; CHECK-NEXT:    std r0, 112(r1)
+; CHECK-NEXT:    std r29, 72(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r30, 80(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    mr r30, r4
+; CHECK-NEXT:    stxvd2x v31, r1, r5 # 16-byte Folded Spill
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
+; CHECK-NEXT:    bl llrintf
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mr r29, r3
 ; CHECK-NEXT:    clrldi r3, r30, 48
-; CHECK-NEXT:    fmr f31, f1
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    bl llrintf
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f31
-; CHECK-NEXT:    mtvsrd v31, r3
+; CHECK-NEXT:    mtvsrd v31, r29
 ; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    mtfprd f0, r3
 ; CHECK-NEXT:    li r3, 48
-; CHECK-NEXT:    lfd f31, 88(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r30, 72(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r30, 80(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r29, 72(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    xxmrghd v2, vs0, v31
 ; CHECK-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    addi r1, r1, 96
@@ -151,35 +127,30 @@ define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) nounwind {
 ; FAST-LABEL: llrint_v1i64_v2f16:
 ; FAST:       # %bb.0:
 ; FAST-NEXT:    mflr r0
-; FAST-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stdu r1, -48(r1)
-; FAST-NEXT:    fmr f31, f1
-; FAST-NEXT:    fmr f1, f2
-; FAST-NEXT:    std r0, 64(r1)
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
+; FAST-NEXT:    stdu r1, -80(r1)
+; FAST-NEXT:    li r5, 48
 ; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    std r0, 96(r1)
+; FAST-NEXT:    std r30, 64(r1) # 8-byte Folded Spill
+; FAST-NEXT:    mr r30, r4
+; FAST-NEXT:    stxvd2x v31, r1, r5 # 16-byte Folded Spill
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f30, f1
-; FAST-NEXT:    fmr f1, f31
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v31, r3
+; FAST-NEXT:    clrldi r3, r30, 48
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
 ; FAST-NEXT:    fctid f0, f1
-; FAST-NEXT:    fctid f1, f30
+; FAST-NEXT:    ld r30, 64(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    mffprd r3, f0
 ; FAST-NEXT:    mtfprd f0, r3
-; FAST-NEXT:    mffprd r3, f1
-; FAST-NEXT:    mtfprd f1, r3
-; FAST-NEXT:    xxmrghd v2, vs1, vs0
-; FAST-NEXT:    addi r1, r1, 48
+; FAST-NEXT:    li r3, 48
+; FAST-NEXT:    xxmrghd v2, vs0, v31
+; FAST-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    addi r1, r1, 80
 ; FAST-NEXT:    ld r0, 16(r1)
-; FAST-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    mtlr r0
 ; FAST-NEXT:    blr
   %a = call <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half> %x)
@@ -191,73 +162,46 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) nounwind {
 ; BE-LABEL: llrint_v4i64_v4f16:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
-; BE-NEXT:    stdu r1, -208(r1)
-; BE-NEXT:    std r0, 224(r1)
-; BE-NEXT:    stfd f29, 184(r1) # 8-byte Folded Spill
-; BE-NEXT:    fmr f29, f1
-; BE-NEXT:    fmr f1, f2
-; BE-NEXT:    std r28, 152(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r29, 160(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r30, 168(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f30, 192(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f31, 200(r1) # 8-byte Folded Spill
-; BE-NEXT:    fmr f31, f4
-; BE-NEXT:    fmr f30, f3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f29
-; BE-NEXT:    mr r30, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f31
-; BE-NEXT:    mr r29, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f30
+; BE-NEXT:    stdu r1, -176(r1)
+; BE-NEXT:    std r0, 192(r1)
+; BE-NEXT:    std r28, 144(r1) # 8-byte Folded Spill
 ; BE-NEXT:    mr r28, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r3, 48
+; BE-NEXT:    clrldi r3, r4, 48
+; BE-NEXT:    std r29, 152(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r30, 160(r1) # 8-byte Folded Spill
+; BE-NEXT:    mr r30, r6
+; BE-NEXT:    mr r29, r5
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    nop
+; BE-NEXT:    std r3, 120(r1)
 ; BE-NEXT:    clrldi r3, r28, 48
-; BE-NEXT:    fmr f31, f1
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r29, 48
-; BE-NEXT:    fmr f30, f1
-; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
+; BE-NEXT:    std r3, 112(r1)
 ; BE-NEXT:    clrldi r3, r30, 48
-; BE-NEXT:    fmr f29, f1
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
 ; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f29
-; BE-NEXT:    std r3, 120(r1)
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f30
-; BE-NEXT:    std r3, 112(r1)
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f31
 ; BE-NEXT:    std r3, 136(r1)
+; BE-NEXT:    clrldi r3, r29, 48
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
 ; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
 ; BE-NEXT:    std r3, 128(r1)
 ; BE-NEXT:    addi r3, r1, 112
-; BE-NEXT:    ld r30, 168(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f31, 200(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f30, 192(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f29, 184(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r30, 160(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r29, 152(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r28, 144(r1) # 8-byte Folded Reload
 ; BE-NEXT:    lxvd2x v2, 0, r3
 ; BE-NEXT:    addi r3, r1, 128
-; BE-NEXT:    ld r29, 160(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r28, 152(r1) # 8-byte Folded Reload
 ; BE-NEXT:    lxvd2x v3, 0, r3
-; BE-NEXT:    addi r1, r1, 208
+; BE-NEXT:    addi r1, r1, 176
 ; BE-NEXT:    ld r0, 16(r1)
 ; BE-NEXT:    mtlr r0
 ; BE-NEXT:    blr
@@ -265,79 +209,57 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) nounwind {
 ; CHECK-LABEL: llrint_v4i64_v4f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    stdu r1, -144(r1)
-; CHECK-NEXT:    li r3, 48
-; CHECK-NEXT:    std r0, 160(r1)
-; CHECK-NEXT:    std r28, 88(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r29, 96(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r30, 104(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd f29, 120(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f29, f2
-; CHECK-NEXT:    stfd f30, 128(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f30, f3
-; CHECK-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT:    li r3, 64
-; CHECK-NEXT:    stfd f31, 136(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f31, f4
-; CHECK-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f29
-; CHECK-NEXT:    mr r30, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f30
-; CHECK-NEXT:    mr r29, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f31
-; CHECK-NEXT:    mr r28, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
+; CHECK-NEXT:    stdu r1, -128(r1)
+; CHECK-NEXT:    li r7, 48
+; CHECK-NEXT:    std r0, 144(r1)
 ; CHECK-NEXT:    clrldi r3, r3, 48
+; CHECK-NEXT:    std r27, 88(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r28, 96(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r29, 104(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    mr r29, r5
+; CHECK-NEXT:    mr r28, r4
+; CHECK-NEXT:    stxvd2x v30, r1, r7 # 16-byte Folded Spill
+; CHECK-NEXT:    li r7, 64
+; CHECK-NEXT:    std r30, 112(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    mr r30, r6
+; CHECK-NEXT:    stxvd2x v31, r1, r7 # 16-byte Folded Spill
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r28, 48
-; CHECK-NEXT:    fmr f31, f1
-; CHECK-NEXT:    bl __extendhfsf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r29, 48
-; CHECK-NEXT:    fmr f30, f1
-; CHECK-NEXT:    bl __extendhfsf2
+; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r30, 48
-; CHECK-NEXT:    fmr f29, f1
+; CHECK-NEXT:    mr r27, r3
+; CHECK-NEXT:    clrldi r3, r28, 48
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtvsrd v31, r27
 ; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f29
-; CHECK-NEXT:    mtvsrd v31, r3
-; CHECK-NEXT:    bl llrintf
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f30
 ; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    clrldi r3, r29, 48
 ; CHECK-NEXT:    xxmrghd v31, vs0, v31
+; CHECK-NEXT:    bl __extendhfsf2
+; CHECK-NEXT:    nop
 ; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f31
-; CHECK-NEXT:    mtvsrd v30, r3
+; CHECK-NEXT:    mr r29, r3
+; CHECK-NEXT:    clrldi r3, r30, 48
+; CHECK-NEXT:    bl __extendhfsf2
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtvsrd v30, r29
 ; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    mtfprd f0, r3
 ; CHECK-NEXT:    li r3, 64
 ; CHECK-NEXT:    vmr v2, v31
-; CHECK-NEXT:    lfd f31, 136(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f30, 128(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f29, 120(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r30, 104(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r29, 96(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r30, 112(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r29, 104(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r28, 96(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r27, 88(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 48
-; CHECK-NEXT:    ld r28, 88(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    xxmrghd v3, vs0, v30
 ; CHECK-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT:    addi r1, r1, 144
+; CHECK-NEXT:    addi r1, r1, 128
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
 ; CHECK-NEXT:    blr
@@ -345,63 +267,55 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) nounwind {
 ; FAST-LABEL: llrint_v4i64_v4f16:
 ; FAST:       # %bb.0:
 ; FAST-NEXT:    mflr r0
-; FAST-NEXT:    stfd f28, -32(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f29, -24(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stdu r1, -64(r1)
-; FAST-NEXT:    fmr f29, f1
-; FAST-NEXT:    fmr f1, f4
-; FAST-NEXT:    std r0, 80(r1)
-; FAST-NEXT:    fmr f31, f3
-; FAST-NEXT:    fmr f30, f2
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
+; FAST-NEXT:    stdu r1, -112(r1)
+; FAST-NEXT:    li r7, 48
+; FAST-NEXT:    std r0, 128(r1)
 ; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    std r28, 80(r1) # 8-byte Folded Spill
+; FAST-NEXT:    std r29, 88(r1) # 8-byte Folded Spill
+; FAST-NEXT:    std r30, 96(r1) # 8-byte Folded Spill
+; FAST-NEXT:    mr r30, r6
+; FAST-NEXT:    mr r29, r5
+; FAST-NEXT:    stxvd2x v30, r1, r7 # 16-byte Folded Spill
+; FAST-NEXT:    li r7, 64
+; FAST-NEXT:    mr r28, r4
+; FAST-NEXT:    stxvd2x v31, r1, r7 # 16-byte Folded Spill
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f28, f1
-; FAST-NEXT:    fmr f1, f31
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v31, r3
+; FAST-NEXT:    clrldi r3, r28, 48
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f31, f1
-; FAST-NEXT:    fmr f1, f30
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    clrldi r3, r29, 48
+; FAST-NEXT:    xxmrghd v31, vs0, v31
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f30, f1
-; FAST-NEXT:    fmr f1, f29
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v30, r3
+; FAST-NEXT:    clrldi r3, r30, 48
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fctid f0, f30
-; FAST-NEXT:    fctid f2, f31
-; FAST-NEXT:    mffprd r3, f0
-; FAST-NEXT:    fctid f1, f1
-; FAST-NEXT:    mtfprd f0, r3
-; FAST-NEXT:    mffprd r3, f2
-; FAST-NEXT:    mtfprd f2, r3
-; FAST-NEXT:    mffprd r3, f1
-; FAST-NEXT:    mtfprd f1, r3
-; FAST-NEXT:    xxmrghd v2, vs0, vs1
-; FAST-NEXT:    fctid f0, f28
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    vmr v2, v31
+; FAST-NEXT:    ld r30, 96(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r29, 88(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r28, 80(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    mffprd r3, f0
 ; FAST-NEXT:    mtfprd f0, r3
-; FAST-NEXT:    xxmrghd v3, vs0, vs2
-; FAST-NEXT:    addi r1, r1, 64
+; FAST-NEXT:    li r3, 64
+; FAST-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 48
+; FAST-NEXT:    xxmrghd v3, vs0, v30
+; FAST-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    addi r1, r1, 112
 ; FAST-NEXT:    ld r0, 16(r1)
-; FAST-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    mtlr r0
-; FAST-NEXT:    lfd f29, -24(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f28, -32(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    blr
   %a = call <4 x i64> @llvm.llrint.v4i64.v4f16(<4 x half> %x)
   ret <4 x i64> %a
@@ -412,145 +326,86 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) nounwind {
 ; BE-LABEL: llrint_v8i64_v8f16:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
-; BE-NEXT:    stdu r1, -304(r1)
-; BE-NEXT:    std r0, 320(r1)
-; BE-NEXT:    stfd f25, 248(r1) # 8-byte Folded Spill
-; BE-NEXT:    fmr f25, f1
-; BE-NEXT:    fmr f1, f2
-; BE-NEXT:    std r24, 184(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r25, 192(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r26, 200(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r27, 208(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r28, 216(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r29, 224(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r30, 232(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f26, 256(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f27, 264(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f28, 272(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f29, 280(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f30, 288(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f31, 296(r1) # 8-byte Folded Spill
-; BE-NEXT:    fmr f31, f8
-; BE-NEXT:    fmr f30, f7
-; BE-NEXT:    fmr f29, f6
-; BE-NEXT:    fmr f28, f5
-; BE-NEXT:    fmr f27, f4
-; BE-NEXT:    fmr f26, f3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f25
-; BE-NEXT:    mr r30, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f27
-; BE-NEXT:    mr r29, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f26
-; BE-NEXT:    mr r28, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f29
-; BE-NEXT:    mr r27, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f28
-; BE-NEXT:    mr r26, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f31
-; BE-NEXT:    mr r25, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f30
+; BE-NEXT:    stdu r1, -240(r1)
+; BE-NEXT:    std r0, 256(r1)
+; BE-NEXT:    std r24, 176(r1) # 8-byte Folded Spill
 ; BE-NEXT:    mr r24, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r3, 48
+; BE-NEXT:    clrldi r3, r4, 48
+; BE-NEXT:    std r25, 184(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r26, 192(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r27, 200(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r28, 208(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r29, 216(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r30, 224(r1) # 8-byte Folded Spill
+; BE-NEXT:    mr r29, r10
+; BE-NEXT:    mr r30, r9
+; BE-NEXT:    mr r27, r8
+; BE-NEXT:    mr r28, r7
+; BE-NEXT:    mr r26, r6
+; BE-NEXT:    mr r25, r5
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    nop
+; BE-NEXT:    std r3, 120(r1)
 ; BE-NEXT:    clrldi r3, r24, 48
-; BE-NEXT:    fmr f31, f1
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r25, 48
-; BE-NEXT:    fmr f30, f1
-; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
+; BE-NEXT:    std r3, 112(r1)
 ; BE-NEXT:    clrldi r3, r26, 48
-; BE-NEXT:    fmr f29, f1
-; BE-NEXT:    bl __extendhfsf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r27, 48
-; BE-NEXT:    fmr f28, f1
-; BE-NEXT:    bl __extendhfsf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r28, 48
-; BE-NEXT:    fmr f27, f1
-; BE-NEXT:    bl __extendhfsf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r29, 48
-; BE-NEXT:    fmr f26, f1
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r30, 48
-; BE-NEXT:    fmr f25, f1
-; BE-NEXT:    bl __extendhfsf2
-; BE-NEXT:    nop
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f25
-; BE-NEXT:    std r3, 120(r1)
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f26
-; BE-NEXT:    std r3, 112(r1)
 ; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f27
 ; BE-NEXT:    std r3, 136(r1)
+; BE-NEXT:    clrldi r3, r25, 48
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
 ; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f28
 ; BE-NEXT:    std r3, 128(r1)
+; BE-NEXT:    clrldi r3, r27, 48
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
 ; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f29
 ; BE-NEXT:    std r3, 152(r1)
+; BE-NEXT:    clrldi r3, r28, 48
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
 ; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f30
 ; BE-NEXT:    std r3, 144(r1)
+; BE-NEXT:    clrldi r3, r29, 48
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
 ; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f31
 ; BE-NEXT:    std r3, 168(r1)
+; BE-NEXT:    clrldi r3, r30, 48
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
 ; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
 ; BE-NEXT:    std r3, 160(r1)
 ; BE-NEXT:    addi r3, r1, 112
-; BE-NEXT:    ld r30, 232(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f31, 296(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f30, 288(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f29, 280(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r30, 224(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r29, 216(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r28, 208(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r27, 200(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r26, 192(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r25, 184(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r24, 176(r1) # 8-byte Folded Reload
 ; BE-NEXT:    lxvd2x v2, 0, r3
 ; BE-NEXT:    addi r3, r1, 128
-; BE-NEXT:    lfd f28, 272(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f27, 264(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f26, 256(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r29, 224(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r28, 216(r1) # 8-byte Folded Reload
 ; BE-NEXT:    lxvd2x v3, 0, r3
 ; BE-NEXT:    addi r3, r1, 144
-; BE-NEXT:    lfd f25, 248(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r27, 208(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r26, 200(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r25, 192(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r24, 184(r1) # 8-byte Folded Reload
 ; BE-NEXT:    lxvd2x v4, 0, r3
 ; BE-NEXT:    addi r3, r1, 160
 ; BE-NEXT:    lxvd2x v5, 0, r3
-; BE-NEXT:    addi r1, r1, 304
+; BE-NEXT:    addi r1, r1, 240
 ; BE-NEXT:    ld r0, 16(r1)
 ; BE-NEXT:    mtlr r0
 ; BE-NEXT:    blr
@@ -558,159 +413,107 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) nounwind {
 ; CHECK-LABEL: llrint_v8i64_v8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    stdu r1, -240(r1)
-; CHECK-NEXT:    li r3, 48
-; CHECK-NEXT:    std r0, 256(r1)
-; CHECK-NEXT:    std r24, 120(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r25, 128(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r26, 136(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r27, 144(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r28, 152(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r29, 160(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stxvd2x v28, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT:    li r3, 64
-; CHECK-NEXT:    std r30, 168(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd f25, 184(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f25, f2
-; CHECK-NEXT:    stfd f26, 192(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f26, f3
-; CHECK-NEXT:    stfd f27, 200(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f27, f4
-; CHECK-NEXT:    stxvd2x v29, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT:    li r3, 80
-; CHECK-NEXT:    stfd f28, 208(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f28, f5
-; CHECK-NEXT:    stfd f29, 216(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f29, f6
-; CHECK-NEXT:    stfd f30, 224(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f30, f7
-; CHECK-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT:    li r3, 96
-; CHECK-NEXT:    stfd f31, 232(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f31, f8
-; CHECK-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f25
-; CHECK-NEXT:    mr r30, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f26
-; CHECK-NEXT:    mr r29, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f27
-; CHECK-NEXT:    mr r28, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f28
-; CHECK-NEXT:    mr r27, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f29
-; CHECK-NEXT:    mr r26, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f30
-; CHECK-NEXT:    mr r25, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f31
-; CHECK-NEXT:    mr r24, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
+; CHECK-NEXT:    stdu r1, -192(r1)
+; CHECK-NEXT:    li r11, 48
+; CHECK-NEXT:    std r0, 208(r1)
 ; CHECK-NEXT:    clrldi r3, r3, 48
+; CHECK-NEXT:    std r23, 120(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r24, 128(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r25, 136(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    mr r25, r5
+; CHECK-NEXT:    mr r24, r4
+; CHECK-NEXT:    stxvd2x v28, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT:    li r11, 64
+; CHECK-NEXT:    std r26, 144(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    mr r26, r6
+; CHECK-NEXT:    std r27, 152(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r28, 160(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    mr r28, r8
+; CHECK-NEXT:    mr r27, r7
+; CHECK-NEXT:    stxvd2x v29, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT:    li r11, 80
+; CHECK-NEXT:    std r29, 168(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    mr r29, r9
+; CHECK-NEXT:    std r30, 176(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    mr r30, r10
+; CHECK-NEXT:    stxvd2x v30, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT:    li r11, 96
+; CHECK-NEXT:    stxvd2x v31, r1, r11 # 16-byte Folded Spill
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r24, 48
-; CHECK-NEXT:    fmr f31, f1
-; CHECK-NEXT:    bl __extendhfsf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r25, 48
-; CHECK-NEXT:    fmr f30, f1
-; CHECK-NEXT:    bl __extendhfsf2
+; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r26, 48
-; CHECK-NEXT:    fmr f29, f1
+; CHECK-NEXT:    mr r23, r3
+; CHECK-NEXT:    clrldi r3, r24, 48
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r27, 48
-; CHECK-NEXT:    fmr f28, f1
-; CHECK-NEXT:    bl __extendhfsf2
+; CHECK-NEXT:    mtvsrd v31, r23
+; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r28, 48
-; CHECK-NEXT:    fmr f27, f1
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    clrldi r3, r25, 48
+; CHECK-NEXT:    xxmrghd v31, vs0, v31
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r29, 48
-; CHECK-NEXT:    fmr f26, f1
-; CHECK-NEXT:    bl __extendhfsf2
+; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r30, 48
-; CHECK-NEXT:    fmr f25, f1
+; CHECK-NEXT:    mr r25, r3
+; CHECK-NEXT:    clrldi r3, r26, 48
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtvsrd v30, r25
 ; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f25
-; CHECK-NEXT:    mtvsrd v31, r3
-; CHECK-NEXT:    bl llrintf
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f26
 ; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    xxmrghd v31, vs0, v31
-; CHECK-NEXT:    bl llrintf
+; CHECK-NEXT:    clrldi r3, r27, 48
+; CHECK-NEXT:    xxmrghd v30, vs0, v30
+; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f27
-; CHECK-NEXT:    mtvsrd v30, r3
 ; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f28
-; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    xxmrghd v30, vs0, v30
-; CHECK-NEXT:    bl llrintf
+; CHECK-NEXT:    mr r27, r3
+; CHECK-NEXT:    clrldi r3, r28, 48
+; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f29
-; CHECK-NEXT:    mtvsrd v29, r3
+; CHECK-NEXT:    mtvsrd v29, r27
 ; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f30
 ; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    clrldi r3, r29, 48
 ; CHECK-NEXT:    xxmrghd v29, vs0, v29
+; CHECK-NEXT:    bl __extendhfsf2
+; CHECK-NEXT:    nop
 ; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f31
-; CHECK-NEXT:    mtvsrd v28, r3
+; CHECK-NEXT:    mr r29, r3
+; CHECK-NEXT:    clrldi r3, r30, 48
+; CHECK-NEXT:    bl __extendhfsf2
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtvsrd v28, r29
 ; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    mtfprd f0, r3
 ; CHECK-NEXT:    li r3, 96
 ; CHECK-NEXT:    vmr v2, v31
-; CHECK-NEXT:    lfd f31, 232(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r30, 176(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    vmr v3, v30
 ; CHECK-NEXT:    vmr v4, v29
-; CHECK-NEXT:    lfd f30, 224(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f29, 216(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r29, 168(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r28, 160(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 80
-; CHECK-NEXT:    lfd f28, 208(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f27, 200(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f26, 192(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f25, 184(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r30, 168(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r29, 160(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r27, 152(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r26, 144(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r25, 136(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r24, 128(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r23, 120(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 64
-; CHECK-NEXT:    ld r28, 152(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r27, 144(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    xxmrghd v5, vs0, v28
-; CHECK-NEXT:    ld r26, 136(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r25, 128(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r24, 120(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lxvd2x v29, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 48
+; CHECK-NEXT:    xxmrghd v5, vs0, v28
 ; CHECK-NEXT:    lxvd2x v28, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT:    addi r1, r1, 240
+; CHECK-NEXT:    addi r1, r1, 192
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
 ; CHECK-NEXT:    blr
@@ -718,117 +521,103 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) nounwind {
 ; FAST-LABEL: llrint_v8i64_v8f16:
 ; FAST:       # %bb.0:
 ; FAST-NEXT:    mflr r0
-; FAST-NEXT:    stfd f24, -64(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f25, -56(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f26, -48(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f27, -40(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f28, -32(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f29, -24(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stdu r1, -96(r1)
-; FAST-NEXT:    fmr f24, f1
-; FAST-NEXT:    fmr f1, f8
-; FAST-NEXT:    std r0, 112(r1)
-; FAST-NEXT:    fmr f30, f7
-; FAST-NEXT:    fmr f29, f6
-; FAST-NEXT:    fmr f28, f5
-; FAST-NEXT:    fmr f27, f4
-; FAST-NEXT:    fmr f26, f3
-; FAST-NEXT:    fmr f25, f2
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
+; FAST-NEXT:    stdu r1, -176(r1)
+; FAST-NEXT:    li r11, 48
+; FAST-NEXT:    std r0, 192(r1)
 ; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    std r24, 112(r1) # 8-byte Folded Spill
+; FAST-NEXT:    std r25, 120(r1) # 8-byte Folded Spill
+; FAST-NEXT:    std r26, 128(r1) # 8-byte Folded Spill
+; FAST-NEXT:    mr r26, r6
+; FAST-NEXT:    mr r25, r5
+; FAST-NEXT:    stxvd2x v28, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT:    li r11, 64
+; FAST-NEXT:    std r27, 136(r1) # 8-byte Folded Spill
+; FAST-NEXT:    mr r27, r7
+; FAST-NEXT:    std r28, 144(r1) # 8-byte Folded Spill
+; FAST-NEXT:    std r29, 152(r1) # 8-byte Folded Spill
+; FAST-NEXT:    mr r29, r9
+; FAST-NEXT:    mr r28, r8
+; FAST-NEXT:    stxvd2x v29, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT:    li r11, 80
+; FAST-NEXT:    std r30, 160(r1) # 8-byte Folded Spill
+; FAST-NEXT:    mr r30, r10
+; FAST-NEXT:    mr r24, r4
+; FAST-NEXT:    stxvd2x v30, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT:    li r11, 96
+; FAST-NEXT:    stxvd2x v31, r1, r11 # 16-byte Folded Spill
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f31, f1
-; FAST-NEXT:    fmr f1, f30
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v31, r3
+; FAST-NEXT:    clrldi r3, r24, 48
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f30, f1
-; FAST-NEXT:    fmr f1, f29
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    clrldi r3, r25, 48
+; FAST-NEXT:    xxmrghd v31, vs0, v31
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f29, f1
-; FAST-NEXT:    fmr f1, f28
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v30, r3
+; FAST-NEXT:    clrldi r3, r26, 48
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f28, f1
-; FAST-NEXT:    fmr f1, f27
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    clrldi r3, r27, 48
+; FAST-NEXT:    xxmrghd v30, vs0, v30
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f27, f1
-; FAST-NEXT:    fmr f1, f26
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v29, r3
+; FAST-NEXT:    clrldi r3, r28, 48
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f26, f1
-; FAST-NEXT:    fmr f1, f25
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    clrldi r3, r29, 48
+; FAST-NEXT:    xxmrghd v29, vs0, v29
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f25, f1
-; FAST-NEXT:    fmr f1, f24
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v28, r3
+; FAST-NEXT:    clrldi r3, r30, 48
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fctid f0, f25
-; FAST-NEXT:    fctid f2, f26
-; FAST-NEXT:    mffprd r3, f0
-; FAST-NEXT:    fctid f3, f27
-; FAST-NEXT:    fctid f4, f28
-; FAST-NEXT:    fctid f5, f29
-; FAST-NEXT:    fctid f6, f30
-; FAST-NEXT:    fctid f1, f1
-; FAST-NEXT:    mtfprd f0, r3
-; FAST-NEXT:    mffprd r3, f2
-; FAST-NEXT:    mtfprd f2, r3
-; FAST-NEXT:    mffprd r3, f3
-; FAST-NEXT:    mtfprd f3, r3
-; FAST-NEXT:    mffprd r3, f4
-; FAST-NEXT:    mtfprd f4, r3
-; FAST-NEXT:    mffprd r3, f5
-; FAST-NEXT:    mtfprd f5, r3
-; FAST-NEXT:    mffprd r3, f6
-; FAST-NEXT:    mtfprd f6, r3
-; FAST-NEXT:    mffprd r3, f1
-; FAST-NEXT:    mtfprd f1, r3
-; FAST-NEXT:    xxmrghd v3, vs3, vs2
-; FAST-NEXT:    xxmrghd v4, vs5, vs4
-; FAST-NEXT:    xxmrghd v2, vs0, vs1
-; FAST-NEXT:    fctid f0, f31
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    vmr v2, v31
+; FAST-NEXT:    ld r30, 160(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r29, 152(r1) # 8-byte Folded Reload
+; FAST-NEXT:    vmr v3, v30
+; FAST-NEXT:    vmr v4, v29
+; FAST-NEXT:    ld r28, 144(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r27, 136(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r26, 128(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r25, 120(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    ld r24, 112(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    mtfprd f0, r3
-; FAST-NEXT:    xxmrghd v5, vs0, vs6
-; FAST-NEXT:    addi r1, r1, 96
+; FAST-NEXT:    li r3, 96
+; FAST-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 80
+; FAST-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 64
+; FAST-NEXT:    lxvd2x v29, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 48
+; FAST-NEXT:    xxmrghd v5, vs0, v28
+; FAST-NEXT:    lxvd2x v28, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    addi r1, r1, 176
 ; FAST-NEXT:    ld r0, 16(r1)
-; FAST-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    mtlr r0
-; FAST-NEXT:    lfd f29, -24(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f28, -32(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f27, -40(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f26, -48(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f25, -56(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f24, -64(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    blr
   %a = call <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half> %x)
   ret <8 x i64> %a
@@ -839,286 +628,166 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) nounwind {
 ; BE-LABEL: llrint_v16i64_v16f16:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
-; BE-NEXT:    stdu r1, -496(r1)
-; BE-NEXT:    std r0, 512(r1)
-; BE-NEXT:    stfd f20, 400(r1) # 8-byte Folded Spill
-; BE-NEXT:    fmr f20, f1
-; BE-NEXT:    fmr f1, f2
-; BE-NEXT:    std r16, 248(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r17, 256(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r18, 264(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r19, 272(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r20, 280(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r21, 288(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r22, 296(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r23, 304(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r24, 312(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r25, 320(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r26, 328(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r27, 336(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r28, 344(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r29, 352(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r30, 360(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f17, 376(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f18, 384(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f19, 392(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f21, 408(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f22, 416(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f23, 424(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f24, 432(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f25, 440(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f26, 448(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f27, 456(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f28, 464(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f29, 472(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f30, 480(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f31, 488(r1) # 8-byte Folded Spill
-; BE-NEXT:    fmr f31, f13
-; BE-NEXT:    fmr f29, f12
-; BE-NEXT:    fmr f30, f11
-; BE-NEXT:    fmr f28, f10
-; BE-NEXT:    fmr f27, f9
-; BE-NEXT:    fmr f26, f8
-; BE-NEXT:    fmr f25, f7
-; BE-NEXT:    fmr f24, f6
-; BE-NEXT:    fmr f23, f5
-; BE-NEXT:    fmr f22, f4
-; BE-NEXT:    fmr f21, f3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f20
-; BE-NEXT:    mr r30, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f22
-; BE-NEXT:    mr r29, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f21
-; BE-NEXT:    mr r28, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f24
-; BE-NEXT:    mr r27, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f23
-; BE-NEXT:    mr r26, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f26
-; BE-NEXT:    mr r25, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f25
+; BE-NEXT:    stdu r1, -368(r1)
+; BE-NEXT:    std r0, 384(r1)
+; BE-NEXT:    std r24, 304(r1) # 8-byte Folded Spill
 ; BE-NEXT:    mr r24, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f28
-; BE-NEXT:    mr r23, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f27
-; BE-NEXT:    mr r22, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f29
-; BE-NEXT:    mr r21, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f30
-; BE-NEXT:    mr r20, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 652(r1)
-; BE-NEXT:    mr r19, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f31
-; BE-NEXT:    mr r18, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 668(r1)
-; BE-NEXT:    mr r17, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 660(r1)
-; BE-NEXT:    mr r16, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r3, 48
-; BE-NEXT:    bl __extendhfsf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r16, 48
-; BE-NEXT:    fmr f31, f1
-; BE-NEXT:    bl __extendhfsf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r17, 48
-; BE-NEXT:    fmr f30, f1
-; BE-NEXT:    bl __extendhfsf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r18, 48
-; BE-NEXT:    fmr f29, f1
-; BE-NEXT:    bl __extendhfsf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r19, 48
-; BE-NEXT:    fmr f28, f1
-; BE-NEXT:    bl __extendhfsf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r20, 48
-; BE-NEXT:    fmr f27, f1
-; BE-NEXT:    bl __extendhfsf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r21, 48
-; BE-NEXT:    fmr f26, f1
-; BE-NEXT:    bl __extendhfsf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r22, 48
-; BE-NEXT:    fmr f25, f1
-; BE-NEXT:    bl __extendhfsf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r23, 48
-; BE-NEXT:    fmr f24, f1
-; BE-NEXT:    bl __extendhfsf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r24, 48
-; BE-NEXT:    fmr f23, f1
-; BE-NEXT:    bl __extendhfsf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r25, 48
-; BE-NEXT:    fmr f22, f1
-; BE-NEXT:    bl __extendhfsf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r26, 48
-; BE-NEXT:    fmr f21, f1
-; BE-NEXT:    bl __extendhfsf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r27, 48
-; BE-NEXT:    fmr f20, f1
-; BE-NEXT:    bl __extendhfsf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r28, 48
-; BE-NEXT:    fmr f19, f1
-; BE-NEXT:    bl __extendhfsf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r29, 48
-; BE-NEXT:    fmr f18, f1
-; BE-NEXT:    bl __extendhfsf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r30, 48
-; BE-NEXT:    fmr f17, f1
+; BE-NEXT:    lhz r3, 494(r1)
+; BE-NEXT:    std r16, 240(r1) # 8-byte Folded Spill
+; BE-NEXT:    lhz r16, 486(r1)
+; BE-NEXT:    std r17, 248(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r18, 256(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r19, 264(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r20, 272(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r21, 280(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r22, 288(r1) # 8-byte Folded Spill
+; BE-NEXT:    lhz r22, 534(r1)
+; BE-NEXT:    lhz r21, 542(r1)
+; BE-NEXT:    lhz r20, 518(r1)
+; BE-NEXT:    lhz r19, 526(r1)
+; BE-NEXT:    lhz r18, 502(r1)
+; BE-NEXT:    lhz r17, 510(r1)
+; BE-NEXT:    std r23, 296(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r25, 312(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r26, 320(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r27, 328(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r28, 336(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r29, 344(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r30, 352(r1) # 8-byte Folded Spill
+; BE-NEXT:    mr r29, r10
+; BE-NEXT:    mr r30, r9
+; BE-NEXT:    mr r27, r8
+; BE-NEXT:    mr r28, r7
+; BE-NEXT:    mr r25, r6
+; BE-NEXT:    mr r26, r5
+; BE-NEXT:    mr r23, r4
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
 ; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f17
 ; BE-NEXT:    std r3, 120(r1)
+; BE-NEXT:    mr r3, r16
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
 ; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f18
 ; BE-NEXT:    std r3, 112(r1)
+; BE-NEXT:    mr r3, r17
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
 ; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f19
 ; BE-NEXT:    std r3, 136(r1)
+; BE-NEXT:    mr r3, r18
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
 ; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f20
 ; BE-NEXT:    std r3, 128(r1)
+; BE-NEXT:    mr r3, r19
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
 ; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f21
 ; BE-NEXT:    std r3, 152(r1)
+; BE-NEXT:    mr r3, r20
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
 ; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f22
 ; BE-NEXT:    std r3, 144(r1)
+; BE-NEXT:    mr r3, r21
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
 ; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f23
 ; BE-NEXT:    std r3, 168(r1)
+; BE-NEXT:    mr r3, r22
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
 ; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f24
 ; BE-NEXT:    std r3, 160(r1)
+; BE-NEXT:    clrldi r3, r23, 48
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
 ; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f25
 ; BE-NEXT:    std r3, 184(r1)
+; BE-NEXT:    clrldi r3, r24, 48
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
 ; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f26
 ; BE-NEXT:    std r3, 176(r1)
+; BE-NEXT:    clrldi r3, r25, 48
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
 ; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f27
 ; BE-NEXT:    std r3, 200(r1)
+; BE-NEXT:    clrldi r3, r26, 48
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
 ; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f28
 ; BE-NEXT:    std r3, 192(r1)
+; BE-NEXT:    clrldi r3, r27, 48
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
 ; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f29
 ; BE-NEXT:    std r3, 216(r1)
+; BE-NEXT:    clrldi r3, r28, 48
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
 ; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f30
 ; BE-NEXT:    std r3, 208(r1)
+; BE-NEXT:    clrldi r3, r29, 48
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
 ; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f31
 ; BE-NEXT:    std r3, 232(r1)
+; BE-NEXT:    clrldi r3, r30, 48
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
 ; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
 ; BE-NEXT:    std r3, 224(r1)
 ; BE-NEXT:    addi r3, r1, 112
-; BE-NEXT:    ld r30, 360(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f31, 488(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f30, 480(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f29, 472(r1) # 8-byte Folded Reload
-; BE-NEXT:    lxvd2x v2, 0, r3
+; BE-NEXT:    ld r30, 352(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r29, 344(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r28, 336(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r27, 328(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r26, 320(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r25, 312(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r24, 304(r1) # 8-byte Folded Reload
+; BE-NEXT:    lxvd2x v6, 0, r3
 ; BE-NEXT:    addi r3, r1, 128
-; BE-NEXT:    lfd f28, 464(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f27, 456(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f26, 448(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r29, 352(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r28, 344(r1) # 8-byte Folded Reload
-; BE-NEXT:    lxvd2x v3, 0, r3
+; BE-NEXT:    ld r23, 296(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r22, 288(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r21, 280(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r20, 272(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r19, 264(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r18, 256(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r17, 248(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r16, 240(r1) # 8-byte Folded Reload
+; BE-NEXT:    lxvd2x v7, 0, r3
 ; BE-NEXT:    addi r3, r1, 144
-; BE-NEXT:    lfd f25, 440(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f24, 432(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f23, 424(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r27, 336(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r26, 328(r1) # 8-byte Folded Reload
-; BE-NEXT:    lxvd2x v4, 0, r3
+; BE-NEXT:    lxvd2x v8, 0, r3
 ; BE-NEXT:    addi r3, r1, 160
-; BE-NEXT:    lfd f22, 416(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f21, 408(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f20, 400(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r25, 320(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r24, 312(r1) # 8-byte Folded Reload
-; BE-NEXT:    lxvd2x v5, 0, r3
+; BE-NEXT:    lxvd2x v9, 0, r3
 ; BE-NEXT:    addi r3, r1, 176
-; BE-NEXT:    lfd f19, 392(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f18, 384(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f17, 376(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r23, 304(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r22, 296(r1) # 8-byte Folded Reload
-; BE-NEXT:    lxvd2x v6, 0, r3
+; BE-NEXT:    lxvd2x v2, 0, r3
 ; BE-NEXT:    addi r3, r1, 192
-; BE-NEXT:    ld r21, 288(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r20, 280(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r19, 272(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r18, 264(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r17, 256(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r16, 248(r1) # 8-byte Folded Reload
-; BE-NEXT:    lxvd2x v7, 0, r3
+; BE-NEXT:    lxvd2x v3, 0, r3
 ; BE-NEXT:    addi r3, r1, 208
-; BE-NEXT:    lxvd2x v8, 0, r3
+; BE-NEXT:    lxvd2x v4, 0, r3
 ; BE-NEXT:    addi r3, r1, 224
-; BE-NEXT:    lxvd2x v9, 0, r3
-; BE-NEXT:    addi r1, r1, 496
+; BE-NEXT:    lxvd2x v5, 0, r3
+; BE-NEXT:    addi r1, r1, 368
 ; BE-NEXT:    ld r0, 16(r1)
 ; BE-NEXT:    mtlr r0
 ; BE-NEXT:    blr
@@ -1126,316 +795,207 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) nounwind {
 ; CHECK-LABEL: llrint_v16i64_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    stdu r1, -432(r1)
-; CHECK-NEXT:    li r3, 48
-; CHECK-NEXT:    std r0, 448(r1)
-; CHECK-NEXT:    std r16, 184(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r17, 192(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r18, 200(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r19, 208(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r20, 216(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r21, 224(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stxvd2x v24, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT:    li r3, 64
-; CHECK-NEXT:    std r22, 232(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r23, 240(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r24, 248(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r25, 256(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r26, 264(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r27, 272(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stxvd2x v25, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT:    li r3, 80
-; CHECK-NEXT:    std r28, 280(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r29, 288(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r30, 296(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd f17, 312(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd f18, 320(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd f19, 328(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stxvd2x v26, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT:    li r3, 96
-; CHECK-NEXT:    stfd f20, 336(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f20, f2
-; CHECK-NEXT:    stfd f21, 344(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f21, f3
-; CHECK-NEXT:    stfd f22, 352(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f22, f4
-; CHECK-NEXT:    stxvd2x v27, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT:    li r3, 112
-; CHECK-NEXT:    stfd f23, 360(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f23, f5
-; CHECK-NEXT:    stfd f24, 368(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f24, f6
-; CHECK-NEXT:    stfd f25, 376(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f25, f7
-; CHECK-NEXT:    stxvd2x v28, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT:    li r3, 128
-; CHECK-NEXT:    stfd f26, 384(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f26, f8
-; CHECK-NEXT:    stfd f27, 392(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f27, f9
-; CHECK-NEXT:    stfd f28, 400(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f28, f10
-; CHECK-NEXT:    stxvd2x v29, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT:    li r3, 144
-; CHECK-NEXT:    stfd f29, 408(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f29, f11
-; CHECK-NEXT:    stfd f30, 416(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f30, f12
-; CHECK-NEXT:    stfd f31, 424(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f31, f13
-; CHECK-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT:    li r3, 160
-; CHECK-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f20
-; CHECK-NEXT:    mr r30, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f21
-; CHECK-NEXT:    mr r29, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f22
-; CHECK-NEXT:    mr r28, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f23
-; CHECK-NEXT:    mr r27, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f24
-; CHECK-NEXT:    mr r26, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f25
-; CHECK-NEXT:    mr r25, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f26
-; CHECK-NEXT:    mr r24, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f27
+; CHECK-NEXT:    stdu r1, -320(r1)
+; CHECK-NEXT:    li r11, 48
+; CHECK-NEXT:    std r0, 336(r1)
+; CHECK-NEXT:    std r23, 248(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    mr r23, r3
-; CHECK-NEXT:    bl __truncsfhf2
+; CHECK-NEXT:    lhz r3, 416(r1)
+; CHECK-NEXT:    std r16, 192(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r17, 200(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    lhz r17, 432(r1)
+; CHECK-NEXT:    stxvd2x v24, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT:    li r11, 64
+; CHECK-NEXT:    std r18, 208(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    lhz r18, 440(r1)
+; CHECK-NEXT:    std r19, 216(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    lhz r19, 448(r1)
+; CHECK-NEXT:    lhz r16, 424(r1)
+; CHECK-NEXT:    stxvd2x v25, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT:    li r11, 80
+; CHECK-NEXT:    std r20, 224(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    lhz r20, 456(r1)
+; CHECK-NEXT:    stxvd2x v26, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT:    li r11, 96
+; CHECK-NEXT:    std r21, 232(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r22, 240(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    lhz r22, 472(r1)
+; CHECK-NEXT:    lhz r21, 464(r1)
+; CHECK-NEXT:    stxvd2x v27, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT:    li r11, 112
+; CHECK-NEXT:    std r15, 184(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r24, 256(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r25, 264(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    mr r25, r5
+; CHECK-NEXT:    mr r24, r4
+; CHECK-NEXT:    stxvd2x v28, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT:    li r11, 128
+; CHECK-NEXT:    std r26, 272(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r27, 280(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r28, 288(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r29, 296(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    mr r29, r9
+; CHECK-NEXT:    mr r28, r8
+; CHECK-NEXT:    mr r27, r7
+; CHECK-NEXT:    mr r26, r6
+; CHECK-NEXT:    stxvd2x v29, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT:    li r11, 144
+; CHECK-NEXT:    std r30, 304(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    mr r30, r10
+; CHECK-NEXT:    stxvd2x v30, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT:    li r11, 160
+; CHECK-NEXT:    stxvd2x v31, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f28
-; CHECK-NEXT:    mr r22, r3
-; CHECK-NEXT:    bl __truncsfhf2
+; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f29
-; CHECK-NEXT:    mr r21, r3
-; CHECK-NEXT:    bl __truncsfhf2
+; CHECK-NEXT:    mr r15, r3
+; CHECK-NEXT:    mr r3, r16
+; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f30
-; CHECK-NEXT:    mr r20, r3
-; CHECK-NEXT:    bl __truncsfhf2
+; CHECK-NEXT:    mtvsrd v31, r15
+; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f31
-; CHECK-NEXT:    mr r19, r3
-; CHECK-NEXT:    bl __truncsfhf2
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    mr r3, r17
+; CHECK-NEXT:    xxmrghd v31, vs0, v31
+; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    lfs f1, 568(r1)
-; CHECK-NEXT:    mr r18, r3
-; CHECK-NEXT:    bl __truncsfhf2
+; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    lfs f1, 576(r1)
 ; CHECK-NEXT:    mr r17, r3
-; CHECK-NEXT:    bl __truncsfhf2
+; CHECK-NEXT:    mr r3, r18
+; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    lfs f1, 584(r1)
-; CHECK-NEXT:    mr r16, r3
-; CHECK-NEXT:    bl __truncsfhf2
+; CHECK-NEXT:    mtvsrd v30, r17
+; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r3, 48
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    mr r3, r19
+; CHECK-NEXT:    xxmrghd v30, vs0, v30
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r16, 48
-; CHECK-NEXT:    fmr f31, f1
-; CHECK-NEXT:    bl __extendhfsf2
+; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r17, 48
-; CHECK-NEXT:    fmr f30, f1
+; CHECK-NEXT:    mr r19, r3
+; CHECK-NEXT:    mr r3, r20
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r18, 48
-; CHECK-NEXT:    fmr f29, f1
-; CHECK-NEXT:    bl __extendhfsf2
+; CHECK-NEXT:    mtvsrd v29, r19
+; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r19, 48
-; CHECK-NEXT:    fmr f28, f1
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    mr r3, r21
+; CHECK-NEXT:    xxmrghd v29, vs0, v29
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r20, 48
-; CHECK-NEXT:    fmr f27, f1
-; CHECK-NEXT:    bl __extendhfsf2
+; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r21, 48
-; CHECK-NEXT:    fmr f26, f1
+; CHECK-NEXT:    mr r21, r3
+; CHECK-NEXT:    mr r3, r22
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r22, 48
-; CHECK-NEXT:    fmr f25, f1
-; CHECK-NEXT:    bl __extendhfsf2
+; CHECK-NEXT:    mtvsrd v28, r21
+; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtfprd f0, r3
 ; CHECK-NEXT:    clrldi r3, r23, 48
-; CHECK-NEXT:    fmr f24, f1
+; CHECK-NEXT:    xxmrghd v28, vs0, v28
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
+; CHECK-NEXT:    bl llrintf
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mr r23, r3
 ; CHECK-NEXT:    clrldi r3, r24, 48
-; CHECK-NEXT:    fmr f23, f1
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtvsrd v27, r23
+; CHECK-NEXT:    bl llrintf
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtfprd f0, r3
 ; CHECK-NEXT:    clrldi r3, r25, 48
-; CHECK-NEXT:    fmr f22, f1
+; CHECK-NEXT:    xxmrghd v27, vs0, v27
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
+; CHECK-NEXT:    bl llrintf
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mr r25, r3
 ; CHECK-NEXT:    clrldi r3, r26, 48
-; CHECK-NEXT:    fmr f21, f1
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtvsrd v26, r25
+; CHECK-NEXT:    bl llrintf
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtfprd f0, r3
 ; CHECK-NEXT:    clrldi r3, r27, 48
-; CHECK-NEXT:    fmr f20, f1
+; CHECK-NEXT:    xxmrghd v26, vs0, v26
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
+; CHECK-NEXT:    bl llrintf
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mr r27, r3
 ; CHECK-NEXT:    clrldi r3, r28, 48
-; CHECK-NEXT:    fmr f19, f1
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r29, 48
-; CHECK-NEXT:    fmr f18, f1
-; CHECK-NEXT:    bl __extendhfsf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r30, 48
-; CHECK-NEXT:    fmr f17, f1
-; CHECK-NEXT:    bl __extendhfsf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    bl llrintf
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f17
-; CHECK-NEXT:    mtvsrd v31, r3
-; CHECK-NEXT:    bl llrintf
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f18
-; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    xxmrghd v31, vs0, v31
-; CHECK-NEXT:    bl llrintf
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f19
-; CHECK-NEXT:    mtvsrd v30, r3
-; CHECK-NEXT:    bl llrintf
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f20
-; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    xxmrghd v30, vs0, v30
-; CHECK-NEXT:    bl llrintf
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f21
-; CHECK-NEXT:    mtvsrd v29, r3
-; CHECK-NEXT:    bl llrintf
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f22
-; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    xxmrghd v29, vs0, v29
-; CHECK-NEXT:    bl llrintf
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f23
-; CHECK-NEXT:    mtvsrd v28, r3
-; CHECK-NEXT:    bl llrintf
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f24
-; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    xxmrghd v28, vs0, v28
-; CHECK-NEXT:    bl llrintf
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f25
-; CHECK-NEXT:    mtvsrd v27, r3
-; CHECK-NEXT:    bl llrintf
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f26
-; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    xxmrghd v27, vs0, v27
-; CHECK-NEXT:    bl llrintf
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f27
-; CHECK-NEXT:    mtvsrd v26, r3
+; CHECK-NEXT:    mtvsrd v25, r27
 ; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f28
 ; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    xxmrghd v26, vs0, v26
-; CHECK-NEXT:    bl llrintf
+; CHECK-NEXT:    clrldi r3, r29, 48
+; CHECK-NEXT:    xxmrghd v25, vs0, v25
+; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f29
-; CHECK-NEXT:    mtvsrd v25, r3
 ; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f30
-; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    xxmrghd v25, vs0, v25
-; CHECK-NEXT:    bl llrintf
+; CHECK-NEXT:    mr r29, r3
+; CHECK-NEXT:    clrldi r3, r30, 48
+; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f31
-; CHECK-NEXT:    mtvsrd v24, r3
+; CHECK-NEXT:    mtvsrd v24, r29
 ; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    mtfprd f0, r3
 ; CHECK-NEXT:    li r3, 160
-; CHECK-NEXT:    vmr v2, v31
-; CHECK-NEXT:    lfd f31, 424(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    vmr v3, v30
-; CHECK-NEXT:    vmr v4, v29
-; CHECK-NEXT:    lfd f30, 416(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f29, 408(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    vmr v6, v31
+; CHECK-NEXT:    ld r30, 304(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    vmr v7, v30
+; CHECK-NEXT:    vmr v8, v29
+; CHECK-NEXT:    ld r29, 296(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r28, 288(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 144
-; CHECK-NEXT:    vmr v5, v28
-; CHECK-NEXT:    vmr v6, v27
-; CHECK-NEXT:    vmr v7, v26
-; CHECK-NEXT:    vmr v8, v25
-; CHECK-NEXT:    lfd f28, 400(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f27, 392(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    vmr v9, v28
+; CHECK-NEXT:    vmr v2, v27
+; CHECK-NEXT:    vmr v3, v26
+; CHECK-NEXT:    vmr v4, v25
+; CHECK-NEXT:    ld r27, 280(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r26, 272(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 128
-; CHECK-NEXT:    lfd f26, 384(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f25, 376(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    xxmrghd v9, vs0, v24
-; CHECK-NEXT:    lfd f24, 368(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f23, 360(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f22, 352(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r25, 264(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r24, 256(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    xxmrghd v5, vs0, v24
+; CHECK-NEXT:    ld r23, 248(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r22, 240(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r21, 232(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lxvd2x v29, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 112
-; CHECK-NEXT:    lfd f21, 344(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r30, 296(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f20, 336(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f19, 328(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r29, 288(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r28, 280(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r20, 224(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r19, 216(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r18, 208(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r17, 200(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r16, 192(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r15, 184(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lxvd2x v28, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 96
-; CHECK-NEXT:    lfd f18, 320(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r27, 272(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f17, 312(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r26, 264(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r25, 256(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r24, 248(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lxvd2x v27, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 80
-; CHECK-NEXT:    ld r23, 240(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r22, 232(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r21, 224(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r20, 216(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r19, 208(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r18, 200(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lxvd2x v26, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 64
-; CHECK-NEXT:    ld r17, 192(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r16, 184(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lxvd2x v25, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 48
 ; CHECK-NEXT:    lxvd2x v24, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT:    addi r1, r1, 432
+; CHECK-NEXT:    addi r1, r1, 320
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
 ; CHECK-NEXT:    blr
@@ -1443,223 +1003,199 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) nounwind {
 ; FAST-LABEL: llrint_v16i64_v16f16:
 ; FAST:       # %bb.0:
 ; FAST-NEXT:    mflr r0
-; FAST-NEXT:    stfd f16, -128(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f17, -120(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f18, -112(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f19, -104(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f20, -96(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f21, -88(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f22, -80(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f23, -72(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f24, -64(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f25, -56(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f26, -48(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f27, -40(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f28, -32(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f29, -24(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stdu r1, -160(r1)
-; FAST-NEXT:    fmr f26, f1
-; FAST-NEXT:    lfs f1, 312(r1)
-; FAST-NEXT:    std r0, 176(r1)
-; FAST-NEXT:    fmr f28, f13
-; FAST-NEXT:    fmr f27, f12
-; FAST-NEXT:    fmr f24, f11
-; FAST-NEXT:    fmr f21, f10
-; FAST-NEXT:    fmr f19, f9
-; FAST-NEXT:    fmr f18, f8
-; FAST-NEXT:    fmr f17, f7
-; FAST-NEXT:    fmr f16, f6
-; FAST-NEXT:    fmr f20, f5
-; FAST-NEXT:    fmr f22, f4
-; FAST-NEXT:    fmr f23, f3
-; FAST-NEXT:    fmr f25, f2
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    stdu r1, -304(r1)
+; FAST-NEXT:    li r11, 48
+; FAST-NEXT:    std r0, 320(r1)
+; FAST-NEXT:    std r23, 232(r1) # 8-byte Folded Spill
+; FAST-NEXT:    mr r23, r3
+; FAST-NEXT:    lhz r3, 400(r1)
+; FAST-NEXT:    std r16, 176(r1) # 8-byte Folded Spill
+; FAST-NEXT:    std r17, 184(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r17, 416(r1)
+; FAST-NEXT:    stxvd2x v24, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT:    li r11, 64
+; FAST-NEXT:    std r18, 192(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r18, 424(r1)
+; FAST-NEXT:    std r19, 200(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r19, 432(r1)
+; FAST-NEXT:    lhz r16, 408(r1)
+; FAST-NEXT:    stxvd2x v25, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT:    li r11, 80
+; FAST-NEXT:    std r20, 208(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r20, 440(r1)
+; FAST-NEXT:    stxvd2x v26, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT:    li r11, 96
+; FAST-NEXT:    std r21, 216(r1) # 8-byte Folded Spill
+; FAST-NEXT:    std r22, 224(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r22, 456(r1)
+; FAST-NEXT:    lhz r21, 448(r1)
+; FAST-NEXT:    stxvd2x v27, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT:    li r11, 112
+; FAST-NEXT:    std r24, 240(r1) # 8-byte Folded Spill
+; FAST-NEXT:    std r25, 248(r1) # 8-byte Folded Spill
+; FAST-NEXT:    std r26, 256(r1) # 8-byte Folded Spill
+; FAST-NEXT:    mr r26, r6
+; FAST-NEXT:    mr r25, r5
+; FAST-NEXT:    mr r24, r4
+; FAST-NEXT:    stxvd2x v28, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT:    li r11, 128
+; FAST-NEXT:    std r27, 264(r1) # 8-byte Folded Spill
+; FAST-NEXT:    std r28, 272(r1) # 8-byte Folded Spill
+; FAST-NEXT:    std r29, 280(r1) # 8-byte Folded Spill
+; FAST-NEXT:    std r30, 288(r1) # 8-byte Folded Spill
+; FAST-NEXT:    mr r30, r10
+; FAST-NEXT:    mr r29, r9
+; FAST-NEXT:    mr r28, r8
+; FAST-NEXT:    mr r27, r7
+; FAST-NEXT:    stxvd2x v29, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT:    li r11, 144
+; FAST-NEXT:    stxvd2x v30, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT:    li r11, 160
+; FAST-NEXT:    stxvd2x v31, r1, r11 # 16-byte Folded Spill
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f31, f1
-; FAST-NEXT:    lfs f1, 304(r1)
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v31, r3
+; FAST-NEXT:    mr r3, r16
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f30, f1
-; FAST-NEXT:    lfs f1, 296(r1)
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    mr r3, r17
+; FAST-NEXT:    xxmrghd v31, vs0, v31
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f29, f1
-; FAST-NEXT:    fmr f1, f28
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v30, r3
+; FAST-NEXT:    mr r3, r18
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f28, f1
-; FAST-NEXT:    fmr f1, f27
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    mr r3, r19
+; FAST-NEXT:    xxmrghd v30, vs0, v30
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f27, f1
-; FAST-NEXT:    fmr f1, f24
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v29, r3
+; FAST-NEXT:    mr r3, r20
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f24, f1
-; FAST-NEXT:    fmr f1, f21
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    mr r3, r21
+; FAST-NEXT:    xxmrghd v29, vs0, v29
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f21, f1
-; FAST-NEXT:    fmr f1, f19
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v28, r3
+; FAST-NEXT:    mr r3, r22
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f19, f1
-; FAST-NEXT:    fmr f1, f18
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    clrldi r3, r23, 48
+; FAST-NEXT:    xxmrghd v28, vs0, v28
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f18, f1
-; FAST-NEXT:    fmr f1, f17
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v27, r3
+; FAST-NEXT:    clrldi r3, r24, 48
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f17, f1
-; FAST-NEXT:    fmr f1, f16
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    clrldi r3, r25, 48
+; FAST-NEXT:    xxmrghd v27, vs0, v27
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f16, f1
-; FAST-NEXT:    fmr f1, f20
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v26, r3
+; FAST-NEXT:    clrldi r3, r26, 48
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f20, f1
-; FAST-NEXT:    fmr f1, f22
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    clrldi r3, r27, 48
+; FAST-NEXT:    xxmrghd v26, vs0, v26
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f22, f1
-; FAST-NEXT:    fmr f1, f23
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v25, r3
+; FAST-NEXT:    clrldi r3, r28, 48
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f23, f1
-; FAST-NEXT:    fmr f1, f25
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    clrldi r3, r29, 48
+; FAST-NEXT:    xxmrghd v25, vs0, v25
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f25, f1
-; FAST-NEXT:    fmr f1, f26
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v24, r3
+; FAST-NEXT:    clrldi r3, r30, 48
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fctid f0, f25
-; FAST-NEXT:    fctid f2, f23
-; FAST-NEXT:    mffprd r3, f0
-; FAST-NEXT:    fctid f3, f22
-; FAST-NEXT:    fctid f4, f20
-; FAST-NEXT:    fctid f5, f16
-; FAST-NEXT:    fctid f6, f17
-; FAST-NEXT:    fctid f7, f18
-; FAST-NEXT:    fctid f8, f19
-; FAST-NEXT:    fctid f9, f21
-; FAST-NEXT:    fctid f10, f24
-; FAST-NEXT:    fctid f1, f1
-; FAST-NEXT:    mtfprd f0, r3
-; FAST-NEXT:    mffprd r3, f2
-; FAST-NEXT:    mtfprd f2, r3
-; FAST-NEXT:    mffprd r3, f3
-; FAST-NEXT:    mtfprd f3, r3
-; FAST-NEXT:    mffprd r3, f4
-; FAST-NEXT:    mtfprd f4, r3
-; FAST-NEXT:    mffprd r3, f5
-; FAST-NEXT:    mtfprd f5, r3
-; FAST-NEXT:    mffprd r3, f6
-; FAST-NEXT:    mtfprd f6, r3
-; FAST-NEXT:    mffprd r3, f7
-; FAST-NEXT:    mtfprd f7, r3
-; FAST-NEXT:    mffprd r3, f8
-; FAST-NEXT:    mtfprd f8, r3
-; FAST-NEXT:    mffprd r3, f9
-; FAST-NEXT:    mtfprd f9, r3
-; FAST-NEXT:    mffprd r3, f10
-; FAST-NEXT:    mtfprd f10, r3
-; FAST-NEXT:    mffprd r3, f1
-; FAST-NEXT:    mtfprd f1, r3
-; FAST-NEXT:    xxmrghd v3, vs3, vs2
-; FAST-NEXT:    xxmrghd v4, vs5, vs4
-; FAST-NEXT:    xxmrghd v5, vs7, vs6
-; FAST-NEXT:    xxmrghd v6, vs9, vs8
-; FAST-NEXT:    xxmrghd v2, vs0, vs1
-; FAST-NEXT:    fctid f0, f27
-; FAST-NEXT:    fctid f1, f29
-; FAST-NEXT:    mffprd r3, f0
-; FAST-NEXT:    mtfprd f0, r3
-; FAST-NEXT:    xxmrghd v7, vs0, vs10
-; FAST-NEXT:    fctid f0, f28
-; FAST-NEXT:    mffprd r3, f0
-; FAST-NEXT:    mtfprd f0, r3
-; FAST-NEXT:    mffprd r3, f1
-; FAST-NEXT:    mtfprd f1, r3
-; FAST-NEXT:    xxmrghd v8, vs1, vs0
-; FAST-NEXT:    fctid f0, f30
-; FAST-NEXT:    fctid f1, f31
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    vmr v6, v31
+; FAST-NEXT:    ld r30, 288(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r29, 280(r1) # 8-byte Folded Reload
+; FAST-NEXT:    vmr v7, v30
+; FAST-NEXT:    vmr v8, v29
+; FAST-NEXT:    ld r28, 272(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r27, 264(r1) # 8-byte Folded Reload
+; FAST-NEXT:    vmr v9, v28
+; FAST-NEXT:    vmr v2, v27
+; FAST-NEXT:    ld r26, 256(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r25, 248(r1) # 8-byte Folded Reload
+; FAST-NEXT:    vmr v3, v26
+; FAST-NEXT:    vmr v4, v25
+; FAST-NEXT:    ld r24, 240(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r23, 232(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r22, 224(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r21, 216(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    ld r20, 208(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r19, 200(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r18, 192(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r17, 184(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    mtfprd f0, r3
-; FAST-NEXT:    mffprd r3, f1
-; FAST-NEXT:    mtfprd f1, r3
-; FAST-NEXT:    xxmrghd v9, vs1, vs0
-; FAST-NEXT:    addi r1, r1, 160
+; FAST-NEXT:    li r3, 160
+; FAST-NEXT:    ld r16, 176(r1) # 8-byte Folded Reload
+; FAST-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 144
+; FAST-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 128
+; FAST-NEXT:    lxvd2x v29, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 112
+; FAST-NEXT:    lxvd2x v28, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 96
+; FAST-NEXT:    lxvd2x v27, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 80
+; FAST-NEXT:    lxvd2x v26, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 64
+; FAST-NEXT:    lxvd2x v25, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 48
+; FAST-NEXT:    xxmrghd v5, vs0, v24
+; FAST-NEXT:    lxvd2x v24, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    addi r1, r1, 304
 ; FAST-NEXT:    ld r0, 16(r1)
-; FAST-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    mtlr r0
-; FAST-NEXT:    lfd f29, -24(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f28, -32(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f27, -40(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f26, -48(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f25, -56(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f24, -64(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f23, -72(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f22, -80(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f21, -88(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f20, -96(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f19, -104(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f18, -112(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f17, -120(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f16, -128(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    blr
   %a = call <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half> %x)
   ret <16 x i64> %a
@@ -1670,483 +1206,295 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) nounwind {
 ; BE-LABEL: llrint_v32i64_v32f16:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
-; BE-NEXT:    stdu r1, -864(r1)
-; BE-NEXT:    std r0, 880(r1)
-; BE-NEXT:    stfd f20, 768(r1) # 8-byte Folded Spill
-; BE-NEXT:    fmr f20, f1
-; BE-NEXT:    fmr f1, f2
-; BE-NEXT:    std r14, 576(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r15, 584(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r16, 592(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r17, 600(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r18, 608(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r19, 616(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r20, 624(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r21, 632(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r22, 640(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r23, 648(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r24, 656(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r25, 664(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r26, 672(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r27, 680(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r28, 688(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r29, 696(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r30, 704(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r31, 712(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f14, 720(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f15, 728(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f16, 736(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f17, 744(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f18, 752(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f19, 760(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f21, 776(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f22, 784(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f23, 792(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f24, 800(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f25, 808(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f26, 816(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f27, 824(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f28, 832(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f29, 840(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f30, 848(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f31, 856(r1) # 8-byte Folded Spill
-; BE-NEXT:    fmr f31, f13
+; BE-NEXT:    stdu r1, -624(r1)
+; BE-NEXT:    std r0, 640(r1)
+; BE-NEXT:    std r30, 608(r1) # 8-byte Folded Spill
 ; BE-NEXT:    mr r30, r3
-; BE-NEXT:    fmr f29, f12
-; BE-NEXT:    fmr f30, f11
-; BE-NEXT:    fmr f28, f10
-; BE-NEXT:    fmr f27, f9
-; BE-NEXT:    fmr f26, f8
-; BE-NEXT:    fmr f25, f7
-; BE-NEXT:    fmr f24, f6
-; BE-NEXT:    fmr f23, f5
-; BE-NEXT:    fmr f22, f4
-; BE-NEXT:    fmr f21, f3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f20
-; BE-NEXT:    std r3, 304(r1) # 8-byte Folded Spill
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f22
-; BE-NEXT:    std r3, 296(r1) # 8-byte Folded Spill
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f21
-; BE-NEXT:    std r3, 280(r1) # 8-byte Folded Spill
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f24
-; BE-NEXT:    std r3, 264(r1) # 8-byte Folded Spill
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f23
-; BE-NEXT:    std r3, 248(r1) # 8-byte Folded Spill
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f26
-; BE-NEXT:    std r3, 232(r1) # 8-byte Folded Spill
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f25
+; BE-NEXT:    lhz r3, 926(r1)
+; BE-NEXT:    std r14, 480(r1) # 8-byte Folded Spill
+; BE-NEXT:    lhz r14, 822(r1)
+; BE-NEXT:    std r15, 488(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r19, 520(r1) # 8-byte Folded Spill
 ; BE-NEXT:    std r3, 216(r1) # 8-byte Folded Spill
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f28
+; BE-NEXT:    lhz r3, 934(r1)
+; BE-NEXT:    lhz r15, 814(r1)
+; BE-NEXT:    lhz r19, 742(r1)
+; BE-NEXT:    std r22, 544(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r23, 552(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r25, 568(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r26, 576(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r3, 208(r1) # 8-byte Folded Spill
+; BE-NEXT:    lhz r3, 910(r1)
+; BE-NEXT:    lhz r26, 766(r1)
+; BE-NEXT:    lhz r25, 774(r1)
+; BE-NEXT:    std r27, 584(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r28, 592(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r29, 600(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r31, 616(r1) # 8-byte Folded Spill
 ; BE-NEXT:    std r3, 200(r1) # 8-byte Folded Spill
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f27
+; BE-NEXT:    lhz r3, 918(r1)
+; BE-NEXT:    lhz r31, 798(r1)
+; BE-NEXT:    lhz r29, 806(r1)
+; BE-NEXT:    lhz r28, 782(r1)
+; BE-NEXT:    lhz r27, 790(r1)
+; BE-NEXT:    lhz r23, 750(r1)
+; BE-NEXT:    lhz r22, 758(r1)
+; BE-NEXT:    std r16, 496(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r17, 504(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r3, 192(r1) # 8-byte Folded Spill
+; BE-NEXT:    lhz r3, 894(r1)
+; BE-NEXT:    mr r17, r7
+; BE-NEXT:    mr r16, r4
+; BE-NEXT:    std r18, 512(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r20, 528(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r21, 536(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r24, 560(r1) # 8-byte Folded Spill
 ; BE-NEXT:    std r3, 184(r1) # 8-byte Folded Spill
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f29
+; BE-NEXT:    lhz r3, 902(r1)
+; BE-NEXT:    mr r24, r10
+; BE-NEXT:    mr r20, r9
+; BE-NEXT:    mr r21, r8
+; BE-NEXT:    mr r18, r6
+; BE-NEXT:    std r3, 176(r1) # 8-byte Folded Spill
+; BE-NEXT:    lhz r3, 878(r1)
 ; BE-NEXT:    std r3, 168(r1) # 8-byte Folded Spill
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f30
+; BE-NEXT:    lhz r3, 886(r1)
+; BE-NEXT:    std r3, 160(r1) # 8-byte Folded Spill
+; BE-NEXT:    lhz r3, 862(r1)
 ; BE-NEXT:    std r3, 152(r1) # 8-byte Folded Spill
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 1028(r1)
+; BE-NEXT:    lhz r3, 870(r1)
+; BE-NEXT:    std r3, 144(r1) # 8-byte Folded Spill
+; BE-NEXT:    lhz r3, 846(r1)
 ; BE-NEXT:    std r3, 136(r1) # 8-byte Folded Spill
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f31
+; BE-NEXT:    lhz r3, 854(r1)
+; BE-NEXT:    std r3, 128(r1) # 8-byte Folded Spill
+; BE-NEXT:    lhz r3, 830(r1)
 ; BE-NEXT:    std r3, 120(r1) # 8-byte Folded Spill
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 1044(r1)
+; BE-NEXT:    lhz r3, 838(r1)
 ; BE-NEXT:    std r3, 112(r1) # 8-byte Folded Spill
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 1036(r1)
-; BE-NEXT:    mr r15, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 1060(r1)
-; BE-NEXT:    mr r14, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 1052(r1)
-; BE-NEXT:    mr r31, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 1076(r1)
-; BE-NEXT:    mr r29, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 1068(r1)
-; BE-NEXT:    mr r28, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 1092(r1)
-; BE-NEXT:    mr r27, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 1084(r1)
-; BE-NEXT:    mr r26, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 1108(r1)
-; BE-NEXT:    mr r25, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 1100(r1)
-; BE-NEXT:    mr r24, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 1124(r1)
-; BE-NEXT:    mr r23, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 1116(r1)
-; BE-NEXT:    mr r22, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 1140(r1)
-; BE-NEXT:    mr r21, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 1132(r1)
-; BE-NEXT:    mr r20, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 1156(r1)
-; BE-NEXT:    mr r19, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 1148(r1)
-; BE-NEXT:    mr r18, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 1172(r1)
-; BE-NEXT:    mr r17, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 1164(r1)
-; BE-NEXT:    mr r16, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r3, 48
+; BE-NEXT:    clrldi r3, r5, 48
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    nop
+; BE-NEXT:    std r3, 424(r1)
 ; BE-NEXT:    clrldi r3, r16, 48
-; BE-NEXT:    stfs f1, 316(r1) # 4-byte Folded Spill
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    nop
+; BE-NEXT:    std r3, 416(r1)
 ; BE-NEXT:    clrldi r3, r17, 48
-; BE-NEXT:    stfs f1, 312(r1) # 4-byte Folded Spill
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    nop
+; BE-NEXT:    std r3, 440(r1)
 ; BE-NEXT:    clrldi r3, r18, 48
-; BE-NEXT:    stfs f1, 292(r1) # 4-byte Folded Spill
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r19, 48
-; BE-NEXT:    stfs f1, 276(r1) # 4-byte Folded Spill
-; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
+; BE-NEXT:    std r3, 432(r1)
 ; BE-NEXT:    clrldi r3, r20, 48
-; BE-NEXT:    stfs f1, 260(r1) # 4-byte Folded Spill
-; BE-NEXT:    bl __extendhfsf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r21, 48
-; BE-NEXT:    stfs f1, 244(r1) # 4-byte Folded Spill
-; BE-NEXT:    bl __extendhfsf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r22, 48
-; BE-NEXT:    stfs f1, 228(r1) # 4-byte Folded Spill
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r23, 48
-; BE-NEXT:    stfs f1, 212(r1) # 4-byte Folded Spill
-; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r24, 48
-; BE-NEXT:    stfs f1, 196(r1) # 4-byte Folded Spill
+; BE-NEXT:    std r3, 456(r1)
+; BE-NEXT:    clrldi r3, r21, 48
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r25, 48
-; BE-NEXT:    stfs f1, 180(r1) # 4-byte Folded Spill
-; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r26, 48
-; BE-NEXT:    stfs f1, 164(r1) # 4-byte Folded Spill
+; BE-NEXT:    std r3, 448(r1)
+; BE-NEXT:    mr r3, r19
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r27, 48
-; BE-NEXT:    stfs f1, 148(r1) # 4-byte Folded Spill
-; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r28, 48
-; BE-NEXT:    stfs f1, 132(r1) # 4-byte Folded Spill
+; BE-NEXT:    std r3, 472(r1)
+; BE-NEXT:    clrldi r3, r24, 48
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r29, 48
-; BE-NEXT:    fmr f18, f1
-; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r31, 48
-; BE-NEXT:    fmr f17, f1
+; BE-NEXT:    std r3, 464(r1)
+; BE-NEXT:    mr r3, r22
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r14, 48
-; BE-NEXT:    fmr f16, f1
-; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r15, 48
-; BE-NEXT:    fmr f15, f1
+; BE-NEXT:    std r3, 232(r1)
+; BE-NEXT:    mr r3, r23
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    ld r3, 112(r1) # 8-byte Folded Reload
-; BE-NEXT:    fmr f14, f1
-; BE-NEXT:    clrldi r3, r3, 48
-; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    ld r3, 120(r1) # 8-byte Folded Reload
-; BE-NEXT:    fmr f31, f1
-; BE-NEXT:    clrldi r3, r3, 48
+; BE-NEXT:    std r3, 224(r1)
+; BE-NEXT:    mr r3, r25
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    ld r3, 136(r1) # 8-byte Folded Reload
-; BE-NEXT:    fmr f30, f1
-; BE-NEXT:    clrldi r3, r3, 48
-; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    ld r3, 152(r1) # 8-byte Folded Reload
-; BE-NEXT:    fmr f29, f1
-; BE-NEXT:    clrldi r3, r3, 48
+; BE-NEXT:    std r3, 248(r1)
+; BE-NEXT:    mr r3, r26
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    ld r3, 168(r1) # 8-byte Folded Reload
-; BE-NEXT:    fmr f28, f1
-; BE-NEXT:    clrldi r3, r3, 48
-; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    ld r3, 184(r1) # 8-byte Folded Reload
-; BE-NEXT:    fmr f27, f1
-; BE-NEXT:    clrldi r3, r3, 48
+; BE-NEXT:    std r3, 240(r1)
+; BE-NEXT:    mr r3, r27
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    ld r3, 200(r1) # 8-byte Folded Reload
-; BE-NEXT:    fmr f26, f1
-; BE-NEXT:    clrldi r3, r3, 48
-; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    ld r3, 216(r1) # 8-byte Folded Reload
-; BE-NEXT:    fmr f25, f1
-; BE-NEXT:    clrldi r3, r3, 48
+; BE-NEXT:    std r3, 264(r1)
+; BE-NEXT:    mr r3, r28
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    ld r3, 232(r1) # 8-byte Folded Reload
-; BE-NEXT:    fmr f24, f1
-; BE-NEXT:    clrldi r3, r3, 48
-; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    ld r3, 248(r1) # 8-byte Folded Reload
-; BE-NEXT:    fmr f23, f1
-; BE-NEXT:    clrldi r3, r3, 48
+; BE-NEXT:    std r3, 256(r1)
+; BE-NEXT:    mr r3, r29
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    ld r3, 264(r1) # 8-byte Folded Reload
-; BE-NEXT:    fmr f22, f1
-; BE-NEXT:    clrldi r3, r3, 48
-; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    ld r3, 280(r1) # 8-byte Folded Reload
-; BE-NEXT:    fmr f21, f1
-; BE-NEXT:    clrldi r3, r3, 48
+; BE-NEXT:    std r3, 280(r1)
+; BE-NEXT:    mr r3, r31
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    ld r3, 296(r1) # 8-byte Folded Reload
-; BE-NEXT:    fmr f20, f1
-; BE-NEXT:    clrldi r3, r3, 48
-; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    ld r3, 304(r1) # 8-byte Folded Reload
-; BE-NEXT:    fmr f19, f1
-; BE-NEXT:    clrldi r3, r3, 48
+; BE-NEXT:    std r3, 272(r1)
+; BE-NEXT:    mr r3, r14
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
 ; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f19
-; BE-NEXT:    std r3, 328(r1)
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f20
-; BE-NEXT:    std r3, 320(r1)
-; BE-NEXT:    bl llrintf
+; BE-NEXT:    std r3, 296(r1)
+; BE-NEXT:    mr r3, r15
+; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f21
-; BE-NEXT:    std r3, 344(r1)
 ; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f22
-; BE-NEXT:    std r3, 336(r1)
-; BE-NEXT:    bl llrintf
+; BE-NEXT:    std r3, 288(r1)
+; BE-NEXT:    ld r3, 112(r1) # 8-byte Folded Reload
+; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f23
-; BE-NEXT:    std r3, 360(r1)
 ; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f24
-; BE-NEXT:    std r3, 352(r1)
-; BE-NEXT:    bl llrintf
+; BE-NEXT:    std r3, 312(r1)
+; BE-NEXT:    ld r3, 120(r1) # 8-byte Folded Reload
+; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f25
-; BE-NEXT:    std r3, 376(r1)
 ; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f26
-; BE-NEXT:    std r3, 368(r1)
-; BE-NEXT:    bl llrintf
+; BE-NEXT:    std r3, 304(r1)
+; BE-NEXT:    ld r3, 128(r1) # 8-byte Folded Reload
+; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f27
-; BE-NEXT:    std r3, 392(r1)
 ; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f28
-; BE-NEXT:    std r3, 384(r1)
-; BE-NEXT:    bl llrintf
+; BE-NEXT:    std r3, 328(r1)
+; BE-NEXT:    ld r3, 136(r1) # 8-byte Folded Reload
+; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f29
-; BE-NEXT:    std r3, 408(r1)
 ; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f30
-; BE-NEXT:    std r3, 400(r1)
-; BE-NEXT:    bl llrintf
+; BE-NEXT:    std r3, 320(r1)
+; BE-NEXT:    ld r3, 144(r1) # 8-byte Folded Reload
+; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f31
-; BE-NEXT:    std r3, 424(r1)
 ; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f14
-; BE-NEXT:    std r3, 416(r1)
-; BE-NEXT:    bl llrintf
+; BE-NEXT:    std r3, 344(r1)
+; BE-NEXT:    ld r3, 152(r1) # 8-byte Folded Reload
+; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f15
-; BE-NEXT:    std r3, 440(r1)
 ; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f16
-; BE-NEXT:    std r3, 432(r1)
-; BE-NEXT:    bl llrintf
+; BE-NEXT:    std r3, 336(r1)
+; BE-NEXT:    ld r3, 160(r1) # 8-byte Folded Reload
+; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f17
-; BE-NEXT:    std r3, 456(r1)
 ; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f18
-; BE-NEXT:    std r3, 448(r1)
-; BE-NEXT:    bl llrintf
+; BE-NEXT:    std r3, 360(r1)
+; BE-NEXT:    ld r3, 168(r1) # 8-byte Folded Reload
+; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 132(r1) # 4-byte Folded Reload
-; BE-NEXT:    std r3, 472(r1)
 ; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 148(r1) # 4-byte Folded Reload
-; BE-NEXT:    std r3, 464(r1)
-; BE-NEXT:    bl llrintf
+; BE-NEXT:    std r3, 352(r1)
+; BE-NEXT:    ld r3, 176(r1) # 8-byte Folded Reload
+; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 164(r1) # 4-byte Folded Reload
-; BE-NEXT:    std r3, 488(r1)
 ; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 180(r1) # 4-byte Folded Reload
-; BE-NEXT:    std r3, 480(r1)
-; BE-NEXT:    bl llrintf
+; BE-NEXT:    std r3, 376(r1)
+; BE-NEXT:    ld r3, 184(r1) # 8-byte Folded Reload
+; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 196(r1) # 4-byte Folded Reload
-; BE-NEXT:    std r3, 504(r1)
 ; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 212(r1) # 4-byte Folded Reload
-; BE-NEXT:    std r3, 496(r1)
-; BE-NEXT:    bl llrintf
+; BE-NEXT:    std r3, 368(r1)
+; BE-NEXT:    ld r3, 192(r1) # 8-byte Folded Reload
+; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 228(r1) # 4-byte Folded Reload
-; BE-NEXT:    std r3, 520(r1)
 ; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 244(r1) # 4-byte Folded Reload
-; BE-NEXT:    std r3, 512(r1)
-; BE-NEXT:    bl llrintf
+; BE-NEXT:    std r3, 392(r1)
+; BE-NEXT:    ld r3, 200(r1) # 8-byte Folded Reload
+; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 260(r1) # 4-byte Folded Reload
-; BE-NEXT:    std r3, 536(r1)
 ; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 276(r1) # 4-byte Folded Reload
-; BE-NEXT:    std r3, 528(r1)
-; BE-NEXT:    bl llrintf
+; BE-NEXT:    std r3, 384(r1)
+; BE-NEXT:    ld r3, 208(r1) # 8-byte Folded Reload
+; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 292(r1) # 4-byte Folded Reload
-; BE-NEXT:    std r3, 552(r1)
 ; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 312(r1) # 4-byte Folded Reload
-; BE-NEXT:    std r3, 544(r1)
-; BE-NEXT:    bl llrintf
+; BE-NEXT:    std r3, 408(r1)
+; BE-NEXT:    ld r3, 216(r1) # 8-byte Folded Reload
+; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 316(r1) # 4-byte Folded Reload
-; BE-NEXT:    std r3, 568(r1)
 ; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    std r3, 560(r1)
-; BE-NEXT:    addi r3, r1, 320
+; BE-NEXT:    std r3, 400(r1)
+; BE-NEXT:    addi r3, r1, 416
 ; BE-NEXT:    lxvd2x vs0, 0, r3
-; BE-NEXT:    addi r3, r1, 336
+; BE-NEXT:    addi r3, r1, 432
 ; BE-NEXT:    lxvd2x vs1, 0, r3
-; BE-NEXT:    addi r3, r1, 352
+; BE-NEXT:    addi r3, r1, 448
 ; BE-NEXT:    lxvd2x vs2, 0, r3
-; BE-NEXT:    addi r3, r1, 368
+; BE-NEXT:    addi r3, r1, 464
 ; BE-NEXT:    lxvd2x vs3, 0, r3
-; BE-NEXT:    addi r3, r1, 384
+; BE-NEXT:    addi r3, r1, 224
 ; BE-NEXT:    lxvd2x vs4, 0, r3
-; BE-NEXT:    addi r3, r1, 400
+; BE-NEXT:    addi r3, r1, 240
 ; BE-NEXT:    lxvd2x vs5, 0, r3
-; BE-NEXT:    addi r3, r1, 416
+; BE-NEXT:    addi r3, r1, 256
 ; BE-NEXT:    lxvd2x vs6, 0, r3
-; BE-NEXT:    addi r3, r1, 432
+; BE-NEXT:    addi r3, r1, 272
 ; BE-NEXT:    lxvd2x vs7, 0, r3
-; BE-NEXT:    addi r3, r1, 448
+; BE-NEXT:    addi r3, r1, 288
 ; BE-NEXT:    lxvd2x vs8, 0, r3
-; BE-NEXT:    addi r3, r1, 464
+; BE-NEXT:    addi r3, r1, 304
 ; BE-NEXT:    lxvd2x vs9, 0, r3
-; BE-NEXT:    addi r3, r1, 480
+; BE-NEXT:    addi r3, r1, 320
 ; BE-NEXT:    lxvd2x vs10, 0, r3
-; BE-NEXT:    addi r3, r1, 496
+; BE-NEXT:    addi r3, r1, 336
 ; BE-NEXT:    lxvd2x vs11, 0, r3
-; BE-NEXT:    addi r3, r1, 512
+; BE-NEXT:    addi r3, r1, 352
 ; BE-NEXT:    lxvd2x vs12, 0, r3
-; BE-NEXT:    addi r3, r1, 528
+; BE-NEXT:    addi r3, r1, 368
 ; BE-NEXT:    lxvd2x vs13, 0, r3
-; BE-NEXT:    addi r3, r1, 544
+; BE-NEXT:    addi r3, r1, 384
 ; BE-NEXT:    lxvd2x v2, 0, r3
-; BE-NEXT:    addi r3, r1, 560
+; BE-NEXT:    addi r3, r1, 400
 ; BE-NEXT:    lxvd2x v3, 0, r3
 ; BE-NEXT:    li r3, 240
 ; BE-NEXT:    stxvd2x v3, r30, r3
@@ -2179,43 +1527,25 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) nounwind {
 ; BE-NEXT:    li r3, 16
 ; BE-NEXT:    stxvd2x vs1, r30, r3
 ; BE-NEXT:    stxvd2x vs0, 0, r30
-; BE-NEXT:    lfd f31, 856(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f30, 848(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f29, 840(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f28, 832(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f27, 824(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f26, 816(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f25, 808(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f24, 800(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f23, 792(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f22, 784(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f21, 776(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f20, 768(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f19, 760(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f18, 752(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f17, 744(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f16, 736(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f15, 728(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f14, 720(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r31, 712(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r30, 704(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r29, 696(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r28, 688(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r27, 680(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r26, 672(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r25, 664(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r24, 656(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r23, 648(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r22, 640(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r21, 632(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r20, 624(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r19, 616(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r18, 608(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r17, 600(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r16, 592(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r15, 584(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r14, 576(r1) # 8-byte Folded Reload
-; BE-NEXT:    addi r1, r1, 864
+; BE-NEXT:    ld r31, 616(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r30, 608(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r29, 600(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r28, 592(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r27, 584(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r26, 576(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r25, 568(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r24, 560(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r23, 552(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r22, 544(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r21, 536(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r20, 528(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r19, 520(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r18, 512(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r17, 504(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r16, 496(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r15, 488(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r14, 480(r1) # 8-byte Folded Reload
+; BE-NEXT:    addi r1, r1, 624
 ; BE-NEXT:    ld r0, 16(r1)
 ; BE-NEXT:    mtlr r0
 ; BE-NEXT:    blr
@@ -2223,508 +1553,334 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) nounwind {
 ; CHECK-LABEL: llrint_v32i64_v32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    stdu r1, -688(r1)
-; CHECK-NEXT:    li r4, 208
-; CHECK-NEXT:    std r0, 704(r1)
-; CHECK-NEXT:    std r14, 400(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r15, 408(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r16, 416(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r17, 424(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r18, 432(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r19, 440(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stxvd2x v20, r1, r4 # 16-byte Folded Spill
-; CHECK-NEXT:    li r4, 224
-; CHECK-NEXT:    std r20, 448(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r21, 456(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r22, 464(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r23, 472(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r24, 480(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r25, 488(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stxvd2x v21, r1, r4 # 16-byte Folded Spill
-; CHECK-NEXT:    li r4, 240
-; CHECK-NEXT:    std r26, 496(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r27, 504(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r28, 512(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r29, 520(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r30, 528(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    stdu r1, -576(r1)
+; CHECK-NEXT:    std r0, 592(r1)
+; CHECK-NEXT:    std r30, 560(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    mr r30, r3
-; CHECK-NEXT:    stxvd2x v22, r1, r4 # 16-byte Folded Spill
-; CHECK-NEXT:    li r4, 256
-; CHECK-NEXT:    std r31, 536(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd f14, 544(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd f15, 552(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd f16, 560(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd f17, 568(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd f18, 576(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stxvd2x v23, r1, r4 # 16-byte Folded Spill
-; CHECK-NEXT:    li r4, 272
-; CHECK-NEXT:    stfd f19, 584(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd f20, 592(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f20, f2
-; CHECK-NEXT:    stfd f21, 600(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f21, f3
-; CHECK-NEXT:    stfd f22, 608(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f22, f4
-; CHECK-NEXT:    stxvd2x v24, r1, r4 # 16-byte Folded Spill
-; CHECK-NEXT:    li r4, 288
-; CHECK-NEXT:    stfd f23, 616(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f23, f5
-; CHECK-NEXT:    stfd f24, 624(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f24, f6
-; CHECK-NEXT:    stfd f25, 632(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f25, f7
-; CHECK-NEXT:    stxvd2x v25, r1, r4 # 16-byte Folded Spill
-; CHECK-NEXT:    li r4, 304
-; CHECK-NEXT:    stfd f26, 640(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f26, f8
-; CHECK-NEXT:    stfd f27, 648(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f27, f9
-; CHECK-NEXT:    stfd f28, 656(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f28, f10
-; CHECK-NEXT:    stxvd2x v26, r1, r4 # 16-byte Folded Spill
-; CHECK-NEXT:    li r4, 320
-; CHECK-NEXT:    stfd f29, 664(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f29, f11
-; CHECK-NEXT:    stfd f30, 672(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f30, f12
-; CHECK-NEXT:    stfd f31, 680(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f31, f13
-; CHECK-NEXT:    stxvd2x v27, r1, r4 # 16-byte Folded Spill
-; CHECK-NEXT:    li r4, 336
-; CHECK-NEXT:    stxvd2x v28, r1, r4 # 16-byte Folded Spill
-; CHECK-NEXT:    li r4, 352
-; CHECK-NEXT:    stxvd2x v29, r1, r4 # 16-byte Folded Spill
-; CHECK-NEXT:    li r4, 368
-; CHECK-NEXT:    stxvd2x v30, r1, r4 # 16-byte Folded Spill
-; CHECK-NEXT:    li r4, 384
-; CHECK-NEXT:    stxvd2x v31, r1, r4 # 16-byte Folded Spill
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f20
+; CHECK-NEXT:    lhz r3, 864(r1)
+; CHECK-NEXT:    li r11, 240
+; CHECK-NEXT:    std r14, 432(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r19, 472(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    lhz r14, 744(r1)
+; CHECK-NEXT:    stxvd2x v20, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT:    li r11, 256
+; CHECK-NEXT:    std r22, 496(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    lhz r22, 680(r1)
+; CHECK-NEXT:    std r3, 216(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    lhz r3, 856(r1)
+; CHECK-NEXT:    lhz r19, 672(r1)
+; CHECK-NEXT:    stxvd2x v21, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT:    li r11, 272
+; CHECK-NEXT:    std r23, 504(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    lhz r23, 688(r1)
+; CHECK-NEXT:    stxvd2x v22, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT:    std r3, 184(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    lhz r3, 848(r1)
+; CHECK-NEXT:    li r11, 288
+; CHECK-NEXT:    std r25, 520(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    lhz r25, 696(r1)
+; CHECK-NEXT:    stxvd2x v23, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT:    li r11, 304
+; CHECK-NEXT:    std r26, 528(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r27, 536(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r28, 544(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    lhz r28, 720(r1)
+; CHECK-NEXT:    lhz r27, 712(r1)
+; CHECK-NEXT:    lhz r26, 704(r1)
+; CHECK-NEXT:    stxvd2x v24, r1, r11 # 16-byte Folded Spill
 ; CHECK-NEXT:    std r3, 176(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f21
-; CHECK-NEXT:    std r3, 160(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f22
+; CHECK-NEXT:    lhz r3, 840(r1)
+; CHECK-NEXT:    li r11, 320
+; CHECK-NEXT:    std r29, 552(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    lhz r29, 728(r1)
+; CHECK-NEXT:    stxvd2x v25, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT:    std r3, 152(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    lhz r3, 832(r1)
+; CHECK-NEXT:    li r11, 336
+; CHECK-NEXT:    std r31, 568(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    lhz r31, 736(r1)
+; CHECK-NEXT:    stxvd2x v26, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT:    li r11, 352
+; CHECK-NEXT:    std r15, 440(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r16, 448(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r17, 456(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    mr r17, r6
+; CHECK-NEXT:    mr r16, r5
+; CHECK-NEXT:    stxvd2x v27, r1, r11 # 16-byte Folded Spill
 ; CHECK-NEXT:    std r3, 144(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f23
-; CHECK-NEXT:    std r3, 128(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f24
+; CHECK-NEXT:    lhz r3, 824(r1)
+; CHECK-NEXT:    li r11, 368
+; CHECK-NEXT:    std r18, 464(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r20, 480(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    mr r20, r8
+; CHECK-NEXT:    mr r18, r7
+; CHECK-NEXT:    stxvd2x v28, r1, r11 # 16-byte Folded Spill
 ; CHECK-NEXT:    std r3, 120(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f25
+; CHECK-NEXT:    lhz r3, 816(r1)
+; CHECK-NEXT:    li r11, 384
+; CHECK-NEXT:    std r21, 488(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r24, 512(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    mr r24, r10
+; CHECK-NEXT:    mr r21, r9
+; CHECK-NEXT:    stxvd2x v29, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT:    li r11, 400
 ; CHECK-NEXT:    std r3, 112(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f26
+; CHECK-NEXT:    lhz r3, 808(r1)
+; CHECK-NEXT:    stxvd2x v30, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT:    li r11, 416
 ; CHECK-NEXT:    std r3, 104(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f27
+; CHECK-NEXT:    lhz r3, 800(r1)
+; CHECK-NEXT:    stxvd2x v31, r1, r11 # 16-byte Folded Spill
 ; CHECK-NEXT:    std r3, 96(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f28
+; CHECK-NEXT:    lhz r3, 792(r1)
 ; CHECK-NEXT:    std r3, 88(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f29
+; CHECK-NEXT:    lhz r3, 784(r1)
 ; CHECK-NEXT:    std r3, 80(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f30
+; CHECK-NEXT:    lhz r3, 776(r1)
 ; CHECK-NEXT:    std r3, 72(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f31
+; CHECK-NEXT:    lhz r3, 768(r1)
 ; CHECK-NEXT:    std r3, 64(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    lfs f1, 832(r1)
+; CHECK-NEXT:    lhz r3, 760(r1)
 ; CHECK-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    lfs f1, 840(r1)
+; CHECK-NEXT:    lhz r3, 752(r1)
 ; CHECK-NEXT:    std r3, 48(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    lfs f1, 848(r1)
-; CHECK-NEXT:    mr r15, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    lfs f1, 856(r1)
-; CHECK-NEXT:    mr r14, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    lfs f1, 864(r1)
-; CHECK-NEXT:    mr r31, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    lfs f1, 872(r1)
-; CHECK-NEXT:    mr r29, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    lfs f1, 880(r1)
-; CHECK-NEXT:    mr r28, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    lfs f1, 888(r1)
-; CHECK-NEXT:    mr r27, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    lfs f1, 896(r1)
-; CHECK-NEXT:    mr r26, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    lfs f1, 904(r1)
-; CHECK-NEXT:    mr r25, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    lfs f1, 912(r1)
-; CHECK-NEXT:    mr r24, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    lfs f1, 920(r1)
-; CHECK-NEXT:    mr r23, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    lfs f1, 928(r1)
-; CHECK-NEXT:    mr r22, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    lfs f1, 936(r1)
-; CHECK-NEXT:    mr r21, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    lfs f1, 944(r1)
-; CHECK-NEXT:    mr r20, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    lfs f1, 952(r1)
-; CHECK-NEXT:    mr r19, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    lfs f1, 960(r1)
-; CHECK-NEXT:    mr r18, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    lfs f1, 968(r1)
-; CHECK-NEXT:    mr r17, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    lfs f1, 976(r1)
-; CHECK-NEXT:    mr r16, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r3, 48
+; CHECK-NEXT:    clrldi r3, r4, 48
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    li r3, 204
-; CHECK-NEXT:    stxsspx f1, r1, r3 # 4-byte Folded Spill
+; CHECK-NEXT:    bl llrintf
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mr r15, r3
 ; CHECK-NEXT:    clrldi r3, r16, 48
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    li r3, 200
-; CHECK-NEXT:    stxsspx f1, r1, r3 # 4-byte Folded Spill
+; CHECK-NEXT:    mtvsrd v31, r15
+; CHECK-NEXT:    bl llrintf
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    li r3, 224
+; CHECK-NEXT:    xxmrghd vs0, vs0, v31
+; CHECK-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    clrldi r3, r17, 48
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
+; CHECK-NEXT:    bl llrintf
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mr r17, r3
 ; CHECK-NEXT:    clrldi r3, r18, 48
-; CHECK-NEXT:    fmr f29, f1
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r19, 48
-; CHECK-NEXT:    fmr f28, f1
-; CHECK-NEXT:    bl __extendhfsf2
+; CHECK-NEXT:    mtvsrd v31, r17
+; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    li r3, 192
+; CHECK-NEXT:    xxmrghd vs0, vs0, v31
+; CHECK-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    clrldi r3, r20, 48
-; CHECK-NEXT:    fmr f27, f1
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r21, 48
-; CHECK-NEXT:    fmr f26, f1
-; CHECK-NEXT:    bl __extendhfsf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r22, 48
-; CHECK-NEXT:    fmr f25, f1
-; CHECK-NEXT:    bl __extendhfsf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r23, 48
-; CHECK-NEXT:    fmr f24, f1
-; CHECK-NEXT:    bl __extendhfsf2
+; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r24, 48
-; CHECK-NEXT:    fmr f23, f1
+; CHECK-NEXT:    mr r20, r3
+; CHECK-NEXT:    clrldi r3, r21, 48
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r25, 48
-; CHECK-NEXT:    fmr f22, f1
-; CHECK-NEXT:    bl __extendhfsf2
+; CHECK-NEXT:    mtvsrd v31, r20
+; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r26, 48
-; CHECK-NEXT:    fmr f21, f1
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    li r3, 160
+; CHECK-NEXT:    xxmrghd vs0, vs0, v31
+; CHECK-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    mr r3, r19
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r27, 48
-; CHECK-NEXT:    fmr f20, f1
-; CHECK-NEXT:    bl __extendhfsf2
+; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r28, 48
-; CHECK-NEXT:    fmr f19, f1
+; CHECK-NEXT:    mr r21, r3
+; CHECK-NEXT:    clrldi r3, r24, 48
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r29, 48
-; CHECK-NEXT:    fmr f18, f1
-; CHECK-NEXT:    bl __extendhfsf2
+; CHECK-NEXT:    mtvsrd v31, r21
+; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r31, 48
-; CHECK-NEXT:    fmr f17, f1
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    li r3, 128
+; CHECK-NEXT:    xxmrghd vs0, v31, vs0
+; CHECK-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    mr r3, r22
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r14, 48
-; CHECK-NEXT:    fmr f16, f1
-; CHECK-NEXT:    bl __extendhfsf2
+; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r15, 48
-; CHECK-NEXT:    fmr f15, f1
+; CHECK-NEXT:    mr r24, r3
+; CHECK-NEXT:    mr r3, r23
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    ld r3, 48(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    fmr f14, f1
-; CHECK-NEXT:    clrldi r3, r3, 48
-; CHECK-NEXT:    bl __extendhfsf2
+; CHECK-NEXT:    mtvsrd v31, r24
+; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    ld r3, 56(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    fmr f30, f1
-; CHECK-NEXT:    clrldi r3, r3, 48
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    mr r3, r25
+; CHECK-NEXT:    xxmrghd v27, vs0, v31
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    ld r3, 64(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    xxlor v30, f1, f1
-; CHECK-NEXT:    clrldi r3, r3, 48
-; CHECK-NEXT:    bl __extendhfsf2
+; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    ld r3, 72(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    xxlor v29, f1, f1
-; CHECK-NEXT:    clrldi r3, r3, 48
+; CHECK-NEXT:    mr r25, r3
+; CHECK-NEXT:    mr r3, r26
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    ld r3, 80(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    xxlor v28, f1, f1
-; CHECK-NEXT:    clrldi r3, r3, 48
-; CHECK-NEXT:    bl __extendhfsf2
+; CHECK-NEXT:    mtvsrd v31, r25
+; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    ld r3, 88(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    xxlor v27, f1, f1
-; CHECK-NEXT:    clrldi r3, r3, 48
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    mr r3, r27
+; CHECK-NEXT:    xxmrghd v26, vs0, v31
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    ld r3, 96(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    xxlor v26, f1, f1
-; CHECK-NEXT:    clrldi r3, r3, 48
-; CHECK-NEXT:    bl __extendhfsf2
+; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    ld r3, 104(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    xxlor v25, f1, f1
-; CHECK-NEXT:    clrldi r3, r3, 48
+; CHECK-NEXT:    mr r27, r3
+; CHECK-NEXT:    mr r3, r28
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    ld r3, 112(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    xxlor v24, f1, f1
-; CHECK-NEXT:    clrldi r3, r3, 48
-; CHECK-NEXT:    bl __extendhfsf2
+; CHECK-NEXT:    mtvsrd v31, r27
+; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    ld r3, 120(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    xxlor v23, f1, f1
-; CHECK-NEXT:    clrldi r3, r3, 48
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    mr r3, r29
+; CHECK-NEXT:    xxmrghd v25, vs0, v31
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    ld r3, 128(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    xxlor v22, f1, f1
-; CHECK-NEXT:    clrldi r3, r3, 48
-; CHECK-NEXT:    bl __extendhfsf2
+; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    ld r3, 144(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    xxlor v21, f1, f1
-; CHECK-NEXT:    clrldi r3, r3, 48
+; CHECK-NEXT:    mr r29, r3
+; CHECK-NEXT:    mr r3, r31
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    ld r3, 160(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    xxlor v20, f1, f1
-; CHECK-NEXT:    clrldi r3, r3, 48
-; CHECK-NEXT:    bl __extendhfsf2
+; CHECK-NEXT:    mtvsrd v31, r29
+; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    ld r3, 176(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    fmr f31, f1
-; CHECK-NEXT:    clrldi r3, r3, 48
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    mr r3, r14
+; CHECK-NEXT:    xxmrghd v24, vs0, v31
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f31
-; CHECK-NEXT:    mtvsrd v31, r3
-; CHECK-NEXT:    bl llrintf
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    li r3, 176
-; CHECK-NEXT:    xxlor f1, v20, v20
-; CHECK-NEXT:    xxmrghd vs0, vs0, v31
-; CHECK-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT:    bl llrintf
+; CHECK-NEXT:    mr r29, r3
+; CHECK-NEXT:    ld r3, 48(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    xxlor f1, v21, v21
-; CHECK-NEXT:    mtvsrd v31, r3
+; CHECK-NEXT:    mtvsrd v31, r29
 ; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    li r3, 160
-; CHECK-NEXT:    xxlor f1, v22, v22
-; CHECK-NEXT:    xxmrghd vs0, vs0, v31
-; CHECK-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT:    bl llrintf
+; CHECK-NEXT:    ld r3, 56(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    xxmrghd v23, vs0, v31
+; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    xxlor f1, v23, v23
-; CHECK-NEXT:    mtvsrd v31, r3
 ; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    li r3, 144
-; CHECK-NEXT:    xxlor f1, v24, v24
-; CHECK-NEXT:    xxmrghd vs0, vs0, v31
-; CHECK-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT:    bl llrintf
+; CHECK-NEXT:    mr r29, r3
+; CHECK-NEXT:    ld r3, 64(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    xxlor f1, v25, v25
-; CHECK-NEXT:    mtvsrd v31, r3
+; CHECK-NEXT:    mtvsrd v31, r29
 ; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    li r3, 128
-; CHECK-NEXT:    xxlor f1, v26, v26
-; CHECK-NEXT:    xxmrghd vs0, vs0, v31
-; CHECK-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT:    bl llrintf
+; CHECK-NEXT:    ld r3, 72(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    xxmrghd v22, vs0, v31
+; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    xxlor f1, v27, v27
-; CHECK-NEXT:    mtvsrd v31, r3
 ; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    xxlor f1, v28, v28
-; CHECK-NEXT:    xxmrghd v27, vs0, v31
-; CHECK-NEXT:    bl llrintf
+; CHECK-NEXT:    mr r29, r3
+; CHECK-NEXT:    ld r3, 80(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    xxlor f1, v29, v29
-; CHECK-NEXT:    mtvsrd v31, r3
+; CHECK-NEXT:    mtvsrd v31, r29
 ; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    xxlor f1, v30, v30
-; CHECK-NEXT:    xxmrghd v29, vs0, v31
-; CHECK-NEXT:    bl llrintf
+; CHECK-NEXT:    ld r3, 88(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    xxmrghd v21, vs0, v31
+; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f30
-; CHECK-NEXT:    mtvsrd v31, r3
 ; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f14
-; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    xxmrghd v31, vs0, v31
-; CHECK-NEXT:    bl llrintf
+; CHECK-NEXT:    mr r29, r3
+; CHECK-NEXT:    ld r3, 96(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f15
-; CHECK-NEXT:    mtvsrd v30, r3
+; CHECK-NEXT:    mtvsrd v31, r29
 ; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f16
 ; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    xxmrghd v30, vs0, v30
-; CHECK-NEXT:    bl llrintf
+; CHECK-NEXT:    ld r3, 104(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    xxmrghd v20, vs0, v31
+; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f17
-; CHECK-NEXT:    mtvsrd v28, r3
 ; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f18
-; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    xxmrghd v28, vs0, v28
-; CHECK-NEXT:    bl llrintf
+; CHECK-NEXT:    mr r29, r3
+; CHECK-NEXT:    ld r3, 112(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f19
-; CHECK-NEXT:    mtvsrd v26, r3
+; CHECK-NEXT:    mtvsrd v31, r29
 ; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f20
 ; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    xxmrghd v26, vs0, v26
-; CHECK-NEXT:    bl llrintf
+; CHECK-NEXT:    ld r3, 120(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    xxmrghd v31, vs0, v31
+; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f21
-; CHECK-NEXT:    mtvsrd v24, r3
 ; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f22
-; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    xxmrghd v24, vs0, v24
-; CHECK-NEXT:    bl llrintf
+; CHECK-NEXT:    mr r29, r3
+; CHECK-NEXT:    ld r3, 144(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f23
-; CHECK-NEXT:    mtvsrd v22, r3
+; CHECK-NEXT:    mtvsrd v30, r29
 ; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f24
 ; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    xxmrghd v22, vs0, v22
-; CHECK-NEXT:    bl llrintf
+; CHECK-NEXT:    ld r3, 152(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    xxmrghd v30, vs0, v30
+; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f25
-; CHECK-NEXT:    mtvsrd v20, r3
 ; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f26
-; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    xxmrghd v20, vs0, v20
-; CHECK-NEXT:    bl llrintf
+; CHECK-NEXT:    mr r29, r3
+; CHECK-NEXT:    ld r3, 176(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f27
-; CHECK-NEXT:    mtvsrd v21, r3
+; CHECK-NEXT:    mtvsrd v29, r29
 ; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f28
 ; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    xxmrghd v21, vs0, v21
-; CHECK-NEXT:    bl llrintf
+; CHECK-NEXT:    ld r3, 184(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    xxmrghd v29, vs0, v29
+; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f29
-; CHECK-NEXT:    mtvsrd v23, r3
 ; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    li r3, 200
-; CHECK-NEXT:    lxsspx f1, r1, r3 # 4-byte Folded Reload
-; CHECK-NEXT:    xxmrghd v23, vs0, v23
-; CHECK-NEXT:    bl llrintf
+; CHECK-NEXT:    mr r29, r3
+; CHECK-NEXT:    ld r3, 216(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    mtvsrd v25, r3
-; CHECK-NEXT:    li r3, 204
-; CHECK-NEXT:    lxsspx f1, r1, r3 # 4-byte Folded Reload
+; CHECK-NEXT:    mtvsrd v28, r29
 ; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    mtfprd f0, r3
 ; CHECK-NEXT:    li r3, 240
-; CHECK-NEXT:    xxswapd vs1, v23
+; CHECK-NEXT:    xxswapd vs1, v29
 ; CHECK-NEXT:    li r4, 128
-; CHECK-NEXT:    xxswapd vs2, v21
-; CHECK-NEXT:    xxswapd vs3, v31
-; CHECK-NEXT:    xxmrghd v2, vs0, v25
+; CHECK-NEXT:    xxswapd vs2, v30
+; CHECK-NEXT:    xxswapd vs3, v25
+; CHECK-NEXT:    xxmrghd v2, vs0, v28
 ; CHECK-NEXT:    xxswapd vs0, v2
 ; CHECK-NEXT:    stxvd2x vs0, r30, r3
 ; CHECK-NEXT:    li r3, 224
@@ -2732,35 +1888,35 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) nounwind {
 ; CHECK-NEXT:    li r3, 208
 ; CHECK-NEXT:    stxvd2x vs2, r30, r3
 ; CHECK-NEXT:    li r3, 192
-; CHECK-NEXT:    xxswapd vs0, v20
+; CHECK-NEXT:    xxswapd vs0, v31
 ; CHECK-NEXT:    stxvd2x vs0, r30, r3
 ; CHECK-NEXT:    li r3, 176
-; CHECK-NEXT:    xxswapd vs1, v22
+; CHECK-NEXT:    xxswapd vs1, v20
 ; CHECK-NEXT:    stxvd2x vs1, r30, r3
 ; CHECK-NEXT:    li r3, 160
-; CHECK-NEXT:    xxswapd vs2, v28
-; CHECK-NEXT:    xxswapd vs0, v24
+; CHECK-NEXT:    xxswapd vs2, v23
+; CHECK-NEXT:    xxswapd vs0, v21
 ; CHECK-NEXT:    stxvd2x vs0, r30, r3
 ; CHECK-NEXT:    li r3, 144
-; CHECK-NEXT:    xxswapd vs1, v26
+; CHECK-NEXT:    xxswapd vs1, v22
 ; CHECK-NEXT:    stxvd2x vs1, r30, r3
 ; CHECK-NEXT:    li r3, 128
 ; CHECK-NEXT:    stxvd2x vs2, r30, r3
 ; CHECK-NEXT:    li r3, 112
-; CHECK-NEXT:    xxswapd vs0, v30
+; CHECK-NEXT:    xxswapd vs0, v24
 ; CHECK-NEXT:    stxvd2x vs0, r30, r3
 ; CHECK-NEXT:    li r3, 96
 ; CHECK-NEXT:    stxvd2x vs3, r30, r3
 ; CHECK-NEXT:    li r3, 80
 ; CHECK-NEXT:    lxvd2x vs2, r1, r4 # 16-byte Folded Reload
-; CHECK-NEXT:    li r4, 144
-; CHECK-NEXT:    xxswapd vs1, v29
+; CHECK-NEXT:    li r4, 160
+; CHECK-NEXT:    xxswapd vs1, v26
 ; CHECK-NEXT:    stxvd2x vs1, r30, r3
 ; CHECK-NEXT:    li r3, 64
 ; CHECK-NEXT:    lxvd2x vs1, r1, r4 # 16-byte Folded Reload
-; CHECK-NEXT:    li r4, 160
+; CHECK-NEXT:    li r4, 192
 ; CHECK-NEXT:    lxvd2x vs3, r1, r4 # 16-byte Folded Reload
-; CHECK-NEXT:    li r4, 176
+; CHECK-NEXT:    li r4, 224
 ; CHECK-NEXT:    lxvd2x vs4, r1, r4 # 16-byte Folded Reload
 ; CHECK-NEXT:    xxswapd vs0, v27
 ; CHECK-NEXT:    stxvd2x vs0, r30, r3
@@ -2773,69 +1929,51 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) nounwind {
 ; CHECK-NEXT:    li r3, 16
 ; CHECK-NEXT:    xxswapd vs3, vs3
 ; CHECK-NEXT:    stxvd2x vs3, r30, r3
-; CHECK-NEXT:    li r3, 384
+; CHECK-NEXT:    li r3, 416
 ; CHECK-NEXT:    xxswapd vs4, vs4
 ; CHECK-NEXT:    stxvd2x vs4, 0, r30
 ; CHECK-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT:    li r3, 368
-; CHECK-NEXT:    lfd f31, 680(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f30, 672(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f29, 664(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f28, 656(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f27, 648(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f26, 640(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f25, 632(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f24, 624(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f23, 616(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f22, 608(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f21, 600(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f20, 592(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f19, 584(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f18, 576(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f17, 568(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f16, 560(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    li r3, 400
+; CHECK-NEXT:    ld r31, 568(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r30, 560(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r29, 552(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r28, 544(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r27, 536(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r26, 528(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r25, 520(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r24, 512(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r23, 504(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r22, 496(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r21, 488(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r20, 480(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r19, 472(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r18, 464(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r17, 456(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r16, 448(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT:    li r3, 352
-; CHECK-NEXT:    lfd f15, 552(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f14, 544(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r31, 536(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r30, 528(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r29, 520(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r28, 512(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    li r3, 384
+; CHECK-NEXT:    ld r15, 440(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r14, 432(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lxvd2x v29, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT:    li r3, 336
-; CHECK-NEXT:    ld r27, 504(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r26, 496(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r25, 488(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r24, 480(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r23, 472(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r22, 464(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    li r3, 368
 ; CHECK-NEXT:    lxvd2x v28, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT:    li r3, 320
-; CHECK-NEXT:    ld r21, 456(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r20, 448(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r19, 440(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r18, 432(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r17, 424(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r16, 416(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    li r3, 352
 ; CHECK-NEXT:    lxvd2x v27, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT:    li r3, 304
-; CHECK-NEXT:    ld r15, 408(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r14, 400(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    li r3, 336
 ; CHECK-NEXT:    lxvd2x v26, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT:    li r3, 288
+; CHECK-NEXT:    li r3, 320
 ; CHECK-NEXT:    lxvd2x v25, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT:    li r3, 272
+; CHECK-NEXT:    li r3, 304
 ; CHECK-NEXT:    lxvd2x v24, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT:    li r3, 256
+; CHECK-NEXT:    li r3, 288
 ; CHECK-NEXT:    lxvd2x v23, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT:    li r3, 240
+; CHECK-NEXT:    li r3, 272
 ; CHECK-NEXT:    lxvd2x v22, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT:    li r3, 224
+; CHECK-NEXT:    li r3, 256
 ; CHECK-NEXT:    lxvd2x v21, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT:    li r3, 208
+; CHECK-NEXT:    li r3, 240
 ; CHECK-NEXT:    lxvd2x v20, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT:    addi r1, r1, 688
+; CHECK-NEXT:    addi r1, r1, 576
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
 ; CHECK-NEXT:    blr
@@ -2843,516 +1981,410 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) nounwind {
 ; FAST-LABEL: llrint_v32i64_v32f16:
 ; FAST:       # %bb.0:
 ; FAST-NEXT:    mflr r0
-; FAST-NEXT:    stdu r1, -480(r1)
-; FAST-NEXT:    li r4, 128
-; FAST-NEXT:    std r0, 496(r1)
-; FAST-NEXT:    std r30, 320(r1) # 8-byte Folded Spill
+; FAST-NEXT:    stdu r1, -560(r1)
+; FAST-NEXT:    std r0, 576(r1)
+; FAST-NEXT:    std r30, 544(r1) # 8-byte Folded Spill
 ; FAST-NEXT:    mr r30, r3
-; FAST-NEXT:    stfd f14, 336(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f15, 344(r1) # 8-byte Folded Spill
-; FAST-NEXT:    fmr f14, f5
-; FAST-NEXT:    stfd f16, 352(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stxvd2x v20, r1, r4 # 16-byte Folded Spill
-; FAST-NEXT:    li r4, 144
-; FAST-NEXT:    fmr f16, f4
-; FAST-NEXT:    stfd f17, 360(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f18, 368(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f19, 376(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f20, 384(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f21, 392(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stxvd2x v21, r1, r4 # 16-byte Folded Spill
-; FAST-NEXT:    li r4, 160
-; FAST-NEXT:    stfd f22, 400(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f23, 408(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f24, 416(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f25, 424(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f26, 432(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f27, 440(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stxvd2x v22, r1, r4 # 16-byte Folded Spill
-; FAST-NEXT:    li r4, 176
-; FAST-NEXT:    xxlor v22, f3, f3
-; FAST-NEXT:    stfd f28, 448(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f29, 456(r1) # 8-byte Folded Spill
-; FAST-NEXT:    fmr f29, f9
-; FAST-NEXT:    stfd f30, 464(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f31, 472(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stxvd2x v23, r1, r4 # 16-byte Folded Spill
-; FAST-NEXT:    li r4, 192
-; FAST-NEXT:    xxlor v23, f2, f2
-; FAST-NEXT:    stxvd2x v24, r1, r4 # 16-byte Folded Spill
-; FAST-NEXT:    li r4, 208
-; FAST-NEXT:    stxvd2x v25, r1, r4 # 16-byte Folded Spill
-; FAST-NEXT:    li r4, 224
-; FAST-NEXT:    xxlor v25, f13, f13
-; FAST-NEXT:    stxvd2x v26, r1, r4 # 16-byte Folded Spill
-; FAST-NEXT:    li r4, 240
-; FAST-NEXT:    xxlor v26, f12, f12
-; FAST-NEXT:    stxvd2x v27, r1, r4 # 16-byte Folded Spill
-; FAST-NEXT:    li r4, 256
-; FAST-NEXT:    xxlor v27, f11, f11
-; FAST-NEXT:    stxvd2x v28, r1, r4 # 16-byte Folded Spill
-; FAST-NEXT:    li r4, 272
-; FAST-NEXT:    xxlor v28, f10, f10
-; FAST-NEXT:    stxvd2x v29, r1, r4 # 16-byte Folded Spill
-; FAST-NEXT:    li r4, 288
-; FAST-NEXT:    xxlor v29, f8, f8
-; FAST-NEXT:    stxvd2x v30, r1, r4 # 16-byte Folded Spill
-; FAST-NEXT:    li r4, 304
-; FAST-NEXT:    xxlor v30, f7, f7
-; FAST-NEXT:    stxvd2x v31, r1, r4 # 16-byte Folded Spill
-; FAST-NEXT:    li r4, 44
-; FAST-NEXT:    xxlor v31, f6, f6
-; FAST-NEXT:    stxsspx f1, r1, r4 # 4-byte Folded Spill
-; FAST-NEXT:    lfs f1, 768(r1)
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    lhz r3, 848(r1)
+; FAST-NEXT:    li r11, 224
+; FAST-NEXT:    std r14, 416(r1) # 8-byte Folded Spill
+; FAST-NEXT:    std r15, 424(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r15, 736(r1)
+; FAST-NEXT:    stxvd2x v20, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT:    li r11, 240
+; FAST-NEXT:    std r19, 456(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r14, 728(r1)
+; FAST-NEXT:    std r3, 184(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r3, 840(r1)
+; FAST-NEXT:    lhz r19, 656(r1)
+; FAST-NEXT:    stxvd2x v21, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT:    li r11, 256
+; FAST-NEXT:    std r21, 472(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r21, 664(r1)
+; FAST-NEXT:    stxvd2x v22, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT:    std r3, 176(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r3, 832(r1)
+; FAST-NEXT:    li r11, 272
+; FAST-NEXT:    std r23, 488(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r23, 672(r1)
+; FAST-NEXT:    stxvd2x v23, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT:    li r11, 288
+; FAST-NEXT:    std r24, 496(r1) # 8-byte Folded Spill
+; FAST-NEXT:    std r26, 512(r1) # 8-byte Folded Spill
+; FAST-NEXT:    std r27, 520(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r27, 696(r1)
+; FAST-NEXT:    lhz r26, 688(r1)
+; FAST-NEXT:    lhz r24, 680(r1)
+; FAST-NEXT:    stxvd2x v24, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT:    std r3, 152(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r3, 824(r1)
+; FAST-NEXT:    li r11, 304
+; FAST-NEXT:    std r28, 528(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r28, 704(r1)
+; FAST-NEXT:    stxvd2x v25, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT:    std r3, 144(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r3, 816(r1)
+; FAST-NEXT:    li r11, 320
+; FAST-NEXT:    std r29, 536(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r29, 712(r1)
+; FAST-NEXT:    stxvd2x v26, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT:    li r11, 336
+; FAST-NEXT:    std r31, 552(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r31, 720(r1)
+; FAST-NEXT:    std r16, 432(r1) # 8-byte Folded Spill
+; FAST-NEXT:    std r17, 440(r1) # 8-byte Folded Spill
+; FAST-NEXT:    mr r17, r6
+; FAST-NEXT:    mr r16, r5
+; FAST-NEXT:    stxvd2x v27, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT:    std r3, 136(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r3, 808(r1)
+; FAST-NEXT:    li r11, 352
+; FAST-NEXT:    std r18, 448(r1) # 8-byte Folded Spill
+; FAST-NEXT:    std r20, 464(r1) # 8-byte Folded Spill
+; FAST-NEXT:    mr r20, r8
+; FAST-NEXT:    mr r18, r7
+; FAST-NEXT:    stxvd2x v28, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT:    std r3, 104(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r3, 800(r1)
+; FAST-NEXT:    li r11, 368
+; FAST-NEXT:    std r22, 480(r1) # 8-byte Folded Spill
+; FAST-NEXT:    std r25, 504(r1) # 8-byte Folded Spill
+; FAST-NEXT:    mr r25, r10
+; FAST-NEXT:    mr r22, r9
+; FAST-NEXT:    stxvd2x v29, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT:    li r11, 384
+; FAST-NEXT:    std r3, 96(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r3, 792(r1)
+; FAST-NEXT:    stxvd2x v30, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT:    li r11, 400
+; FAST-NEXT:    std r3, 88(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r3, 784(r1)
+; FAST-NEXT:    stxvd2x v31, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT:    std r3, 80(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r3, 776(r1)
+; FAST-NEXT:    std r3, 72(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r3, 768(r1)
+; FAST-NEXT:    std r3, 64(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r3, 760(r1)
+; FAST-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r3, 752(r1)
+; FAST-NEXT:    std r3, 48(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r3, 744(r1)
+; FAST-NEXT:    std r3, 40(r1) # 8-byte Folded Spill
+; FAST-NEXT:    clrldi r3, r4, 48
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    li r3, 120
-; FAST-NEXT:    stxsdx f1, r1, r3 # 8-byte Folded Spill
-; FAST-NEXT:    lfs f1, 760(r1)
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v31, r3
+; FAST-NEXT:    clrldi r3, r16, 48
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    li r3, 112
-; FAST-NEXT:    stxsdx f1, r1, r3 # 8-byte Folded Spill
-; FAST-NEXT:    lfs f1, 752(r1)
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    li r3, 208
+; FAST-NEXT:    xxmrghd vs0, vs0, v31
+; FAST-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    clrldi r3, r17, 48
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    li r3, 104
-; FAST-NEXT:    stxsdx f1, r1, r3 # 8-byte Folded Spill
-; FAST-NEXT:    lfs f1, 744(r1)
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v31, r3
+; FAST-NEXT:    clrldi r3, r18, 48
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    li r3, 96
-; FAST-NEXT:    stxsdx f1, r1, r3 # 8-byte Folded Spill
-; FAST-NEXT:    lfs f1, 736(r1)
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    li r3, 192
+; FAST-NEXT:    xxmrghd vs0, vs0, v31
+; FAST-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    clrldi r3, r20, 48
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    li r3, 88
-; FAST-NEXT:    stxsdx f1, r1, r3 # 8-byte Folded Spill
-; FAST-NEXT:    lfs f1, 728(r1)
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v31, r3
+; FAST-NEXT:    clrldi r3, r22, 48
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    li r3, 80
-; FAST-NEXT:    stxsdx f1, r1, r3 # 8-byte Folded Spill
-; FAST-NEXT:    lfs f1, 720(r1)
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    li r3, 160
+; FAST-NEXT:    xxmrghd vs0, vs0, v31
+; FAST-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    mr r3, r19
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    li r3, 72
-; FAST-NEXT:    stxsdx f1, r1, r3 # 8-byte Folded Spill
-; FAST-NEXT:    lfs f1, 712(r1)
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v31, r3
+; FAST-NEXT:    clrldi r3, r25, 48
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    li r3, 64
-; FAST-NEXT:    stxsdx f1, r1, r3 # 8-byte Folded Spill
-; FAST-NEXT:    lfs f1, 704(r1)
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    li r3, 112
+; FAST-NEXT:    xxmrghd vs0, v31, vs0
+; FAST-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    mr r3, r21
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    li r3, 56
-; FAST-NEXT:    stxsdx f1, r1, r3 # 8-byte Folded Spill
-; FAST-NEXT:    lfs f1, 696(r1)
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v31, r3
+; FAST-NEXT:    mr r3, r23
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    li r3, 48
-; FAST-NEXT:    stxsdx f1, r1, r3 # 8-byte Folded Spill
-; FAST-NEXT:    lfs f1, 688(r1)
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    mr r3, r24
+; FAST-NEXT:    xxmrghd v27, vs0, v31
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    xxlor v21, f1, f1
-; FAST-NEXT:    lfs f1, 680(r1)
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v31, r3
+; FAST-NEXT:    mr r3, r26
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    xxlor v20, f1, f1
-; FAST-NEXT:    lfs f1, 672(r1)
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    mr r3, r27
+; FAST-NEXT:    xxmrghd v26, vs0, v31
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    xxlor v24, f1, f1
-; FAST-NEXT:    lfs f1, 664(r1)
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v31, r3
+; FAST-NEXT:    mr r3, r28
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f31, f1
-; FAST-NEXT:    lfs f1, 656(r1)
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    mr r3, r29
+; FAST-NEXT:    xxmrghd v25, vs0, v31
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f30, f1
-; FAST-NEXT:    lfs f1, 648(r1)
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v31, r3
+; FAST-NEXT:    mr r3, r31
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f28, f1
-; FAST-NEXT:    lfs f1, 640(r1)
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    mr r3, r14
+; FAST-NEXT:    xxmrghd v24, vs0, v31
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f27, f1
-; FAST-NEXT:    lfs f1, 632(r1)
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v31, r3
+; FAST-NEXT:    mr r3, r15
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f26, f1
-; FAST-NEXT:    lfs f1, 624(r1)
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    ld r3, 40(r1) # 8-byte Folded Reload
+; FAST-NEXT:    xxmrghd v23, vs0, v31
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f25, f1
-; FAST-NEXT:    xxlor f1, v25, v25
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v31, r3
+; FAST-NEXT:    ld r3, 48(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f24, f1
-; FAST-NEXT:    xxlor f1, v26, v26
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    ld r3, 56(r1) # 8-byte Folded Reload
+; FAST-NEXT:    xxmrghd v22, vs0, v31
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f23, f1
-; FAST-NEXT:    xxlor f1, v27, v27
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v31, r3
+; FAST-NEXT:    ld r3, 64(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f22, f1
-; FAST-NEXT:    xxlor f1, v28, v28
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    ld r3, 72(r1) # 8-byte Folded Reload
+; FAST-NEXT:    xxmrghd v21, vs0, v31
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f21, f1
-; FAST-NEXT:    fmr f1, f29
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v31, r3
+; FAST-NEXT:    ld r3, 80(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f20, f1
-; FAST-NEXT:    xxlor f1, v29, v29
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    ld r3, 88(r1) # 8-byte Folded Reload
+; FAST-NEXT:    xxmrghd v20, vs0, v31
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f19, f1
-; FAST-NEXT:    xxlor f1, v30, v30
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v31, r3
+; FAST-NEXT:    ld r3, 96(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f18, f1
-; FAST-NEXT:    xxlor f1, v31, v31
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    ld r3, 104(r1) # 8-byte Folded Reload
+; FAST-NEXT:    xxmrghd v31, vs0, v31
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f29, f1
-; FAST-NEXT:    fmr f1, f14
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v30, r3
+; FAST-NEXT:    ld r3, 136(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f14, f1
-; FAST-NEXT:    fmr f1, f16
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    ld r3, 144(r1) # 8-byte Folded Reload
+; FAST-NEXT:    xxmrghd v30, vs0, v30
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f16, f1
-; FAST-NEXT:    xxlor f1, v22, v22
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v29, r3
+; FAST-NEXT:    ld r3, 152(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f17, f1
-; FAST-NEXT:    xxlor f1, v23, v23
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    ld r3, 176(r1) # 8-byte Folded Reload
+; FAST-NEXT:    xxmrghd v29, vs0, v29
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    li r3, 44
-; FAST-NEXT:    fmr f15, f1
-; FAST-NEXT:    lxsspx f1, r1, r3 # 4-byte Folded Reload
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v28, r3
+; FAST-NEXT:    ld r3, 184(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fctid f3, f15
-; FAST-NEXT:    fctid f4, f17
-; FAST-NEXT:    mffprd r3, f3
-; FAST-NEXT:    fctid f5, f16
-; FAST-NEXT:    fctid f6, f14
-; FAST-NEXT:    fctid f7, f18
-; FAST-NEXT:    fctid f8, f19
-; FAST-NEXT:    fctid f13, f1
-; FAST-NEXT:    fctid f9, f20
-; FAST-NEXT:    fctid f10, f22
-; FAST-NEXT:    fctid f11, f24
-; FAST-NEXT:    fctid f12, f25
-; FAST-NEXT:    fctid f2, f23
-; FAST-NEXT:    fctid f0, f21
-; FAST-NEXT:    mtvsrd v2, r3
-; FAST-NEXT:    mffprd r3, f4
-; FAST-NEXT:    mtvsrd v3, r3
-; FAST-NEXT:    mffprd r3, f5
-; FAST-NEXT:    mtfprd f5, r3
-; FAST-NEXT:    mffprd r3, f6
-; FAST-NEXT:    mtfprd f1, r3
-; FAST-NEXT:    mffprd r3, f7
-; FAST-NEXT:    mtfprd f6, r3
-; FAST-NEXT:    mffprd r3, f8
-; FAST-NEXT:    mtfprd f7, r3
-; FAST-NEXT:    mffprd r3, f9
-; FAST-NEXT:    mtfprd f3, r3
-; FAST-NEXT:    mffprd r3, f10
-; FAST-NEXT:    mtfprd f4, r3
-; FAST-NEXT:    mffprd r3, f11
-; FAST-NEXT:    fctid f11, f31
-; FAST-NEXT:    lfd f31, 56(r1) # 8-byte Folded Reload
-; FAST-NEXT:    mtfprd f8, r3
-; FAST-NEXT:    mffprd r3, f12
-; FAST-NEXT:    xxlor f12, v24, v24
-; FAST-NEXT:    fctid f31, f31
-; FAST-NEXT:    fctid f12, f12
-; FAST-NEXT:    mtfprd f9, r3
-; FAST-NEXT:    mffprd r3, f13
-; FAST-NEXT:    lfd f13, 48(r1) # 8-byte Folded Reload
-; FAST-NEXT:    mtfprd f10, r3
-; FAST-NEXT:    fctid f13, f13
-; FAST-NEXT:    xxmrghd v3, vs5, v3
-; FAST-NEXT:    fctid f5, f26
-; FAST-NEXT:    mffprd r3, f5
-; FAST-NEXT:    mtfprd f5, r3
-; FAST-NEXT:    xxmrghd v4, vs7, vs6
-; FAST-NEXT:    fctid f6, f27
-; FAST-NEXT:    fctid f7, f28
-; FAST-NEXT:    mffprd r3, f6
-; FAST-NEXT:    lfd f28, 96(r1) # 8-byte Folded Reload
-; FAST-NEXT:    fctid f28, f28
-; FAST-NEXT:    mtfprd f6, r3
-; FAST-NEXT:    mffprd r3, f7
-; FAST-NEXT:    mtfprd f7, r3
-; FAST-NEXT:    xxmrghd v2, v2, vs10
-; FAST-NEXT:    fctid f10, f30
-; FAST-NEXT:    mffprd r3, f10
-; FAST-NEXT:    lfd f30, 80(r1) # 8-byte Folded Reload
-; FAST-NEXT:    fctid f30, f30
-; FAST-NEXT:    mtfprd f10, r3
-; FAST-NEXT:    mffprd r3, f11
-; FAST-NEXT:    mtfprd f11, r3
-; FAST-NEXT:    mffprd r3, f12
-; FAST-NEXT:    mtfprd f12, r3
-; FAST-NEXT:    xxmrghd v5, vs12, vs11
-; FAST-NEXT:    xxlor f11, v20, v20
-; FAST-NEXT:    xxlor f12, v21, v21
-; FAST-NEXT:    fctid f11, f11
-; FAST-NEXT:    fctid f12, f12
-; FAST-NEXT:    mffprd r3, f11
-; FAST-NEXT:    mtfprd f11, r3
-; FAST-NEXT:    mffprd r3, f12
-; FAST-NEXT:    mtfprd f12, r3
-; FAST-NEXT:    mffprd r3, f13
-; FAST-NEXT:    mtfprd f13, r3
-; FAST-NEXT:    mffprd r3, f31
-; FAST-NEXT:    lfd f31, 64(r1) # 8-byte Folded Reload
-; FAST-NEXT:    fctid f31, f31
-; FAST-NEXT:    mtvsrd v0, r3
-; FAST-NEXT:    mffprd r3, f31
-; FAST-NEXT:    lfd f31, 72(r1) # 8-byte Folded Reload
-; FAST-NEXT:    mtvsrd v1, r3
-; FAST-NEXT:    mffprd r3, f30
-; FAST-NEXT:    lfd f30, 88(r1) # 8-byte Folded Reload
-; FAST-NEXT:    fctid f31, f31
-; FAST-NEXT:    mtvsrd v6, r3
-; FAST-NEXT:    mffprd r3, f28
-; FAST-NEXT:    lfd f28, 104(r1) # 8-byte Folded Reload
-; FAST-NEXT:    fctid f30, f30
-; FAST-NEXT:    fctid f28, f28
-; FAST-NEXT:    mtvsrd v7, r3
-; FAST-NEXT:    mffprd r3, f28
-; FAST-NEXT:    lfd f28, 112(r1) # 8-byte Folded Reload
-; FAST-NEXT:    fctid f28, f28
-; FAST-NEXT:    mtvsrd v8, r3
-; FAST-NEXT:    mffprd r3, f28
-; FAST-NEXT:    lfd f28, 120(r1) # 8-byte Folded Reload
-; FAST-NEXT:    fctid f28, f28
-; FAST-NEXT:    xxmrghd v10, vs12, vs11
-; FAST-NEXT:    xxmrghd v0, v0, vs13
-; FAST-NEXT:    xxswapd vs12, v0
-; FAST-NEXT:    xxmrghd v0, vs9, vs8
-; FAST-NEXT:    xxmrghd v7, v8, v7
-; FAST-NEXT:    mtvsrd v8, r3
-; FAST-NEXT:    mffprd r3, f28
-; FAST-NEXT:    mtvsrd v9, r3
-; FAST-NEXT:    mffprd r3, f30
-; FAST-NEXT:    xxswapd v7, v7
-; FAST-NEXT:    xxmrghd v8, v9, v8
-; FAST-NEXT:    mtvsrd v9, r3
-; FAST-NEXT:    mffprd r3, f31
-; FAST-NEXT:    xxswapd v8, v8
-; FAST-NEXT:    xxmrghd v6, v9, v6
-; FAST-NEXT:    mtvsrd v9, r3
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    xxswapd vs1, v29
+; FAST-NEXT:    li r4, 112
+; FAST-NEXT:    xxswapd vs2, v30
+; FAST-NEXT:    xxswapd vs3, v25
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
 ; FAST-NEXT:    li r3, 240
-; FAST-NEXT:    stxvd2x v8, r30, r3
+; FAST-NEXT:    xxmrghd v2, vs0, v28
+; FAST-NEXT:    xxswapd vs0, v2
+; FAST-NEXT:    stxvd2x vs0, r30, r3
 ; FAST-NEXT:    li r3, 224
-; FAST-NEXT:    stxvd2x v7, r30, r3
+; FAST-NEXT:    stxvd2x vs1, r30, r3
 ; FAST-NEXT:    li r3, 208
-; FAST-NEXT:    xxswapd vs11, v6
-; FAST-NEXT:    xxmrghd v6, vs10, vs7
-; FAST-NEXT:    stxvd2x vs11, r30, r3
+; FAST-NEXT:    stxvd2x vs2, r30, r3
 ; FAST-NEXT:    li r3, 192
-; FAST-NEXT:    xxmrghd v1, v9, v1
-; FAST-NEXT:    xxswapd vs11, v1
-; FAST-NEXT:    xxmrghd v1, vs6, vs5
-; FAST-NEXT:    xxswapd vs5, v10
-; FAST-NEXT:    xxswapd vs6, v5
-; FAST-NEXT:    stxvd2x vs11, r30, r3
+; FAST-NEXT:    xxswapd vs0, v31
+; FAST-NEXT:    stxvd2x vs0, r30, r3
 ; FAST-NEXT:    li r3, 176
-; FAST-NEXT:    stxvd2x vs12, r30, r3
+; FAST-NEXT:    xxswapd vs1, v20
+; FAST-NEXT:    stxvd2x vs1, r30, r3
 ; FAST-NEXT:    li r3, 160
-; FAST-NEXT:    stxvd2x vs5, r30, r3
+; FAST-NEXT:    xxswapd vs2, v23
+; FAST-NEXT:    xxswapd vs0, v21
+; FAST-NEXT:    stxvd2x vs0, r30, r3
 ; FAST-NEXT:    li r3, 144
-; FAST-NEXT:    stxvd2x vs6, r30, r3
-; FAST-NEXT:    mffprd r3, f2
-; FAST-NEXT:    mtfprd f7, r3
+; FAST-NEXT:    xxswapd vs1, v22
+; FAST-NEXT:    stxvd2x vs1, r30, r3
 ; FAST-NEXT:    li r3, 128
-; FAST-NEXT:    xxswapd vs5, v6
-; FAST-NEXT:    stxvd2x vs5, r30, r3
-; FAST-NEXT:    li r3, 112
-; FAST-NEXT:    xxswapd vs2, v1
-; FAST-NEXT:    xxswapd vs6, v0
 ; FAST-NEXT:    stxvd2x vs2, r30, r3
+; FAST-NEXT:    li r3, 112
+; FAST-NEXT:    xxswapd vs0, v24
+; FAST-NEXT:    stxvd2x vs0, r30, r3
 ; FAST-NEXT:    li r3, 96
-; FAST-NEXT:    fctid f2, f29
-; FAST-NEXT:    stxvd2x vs6, r30, r3
-; FAST-NEXT:    mffprd r3, f0
-; FAST-NEXT:    mtfprd f0, r3
-; FAST-NEXT:    mffprd r3, f2
-; FAST-NEXT:    mtfprd f2, r3
+; FAST-NEXT:    stxvd2x vs3, r30, r3
 ; FAST-NEXT:    li r3, 80
-; FAST-NEXT:    xxmrghd v5, vs7, vs4
-; FAST-NEXT:    xxswapd vs4, v2
-; FAST-NEXT:    xxmrghd v0, vs0, vs3
-; FAST-NEXT:    xxswapd vs0, v5
-; FAST-NEXT:    xxswapd vs3, v3
-; FAST-NEXT:    stxvd2x vs0, r30, r3
+; FAST-NEXT:    lxvd2x vs2, r1, r4 # 16-byte Folded Reload
+; FAST-NEXT:    li r4, 160
+; FAST-NEXT:    xxswapd vs1, v26
+; FAST-NEXT:    stxvd2x vs1, r30, r3
 ; FAST-NEXT:    li r3, 64
-; FAST-NEXT:    xxswapd vs0, v0
+; FAST-NEXT:    lxvd2x vs1, r1, r4 # 16-byte Folded Reload
+; FAST-NEXT:    li r4, 192
+; FAST-NEXT:    lxvd2x vs3, r1, r4 # 16-byte Folded Reload
+; FAST-NEXT:    li r4, 208
+; FAST-NEXT:    lxvd2x vs4, r1, r4 # 16-byte Folded Reload
+; FAST-NEXT:    xxswapd vs0, v27
 ; FAST-NEXT:    stxvd2x vs0, r30, r3
 ; FAST-NEXT:    li r3, 48
-; FAST-NEXT:    xxmrghd v5, vs2, vs1
-; FAST-NEXT:    xxswapd vs1, v4
-; FAST-NEXT:    stxvd2x vs1, r30, r3
-; FAST-NEXT:    li r3, 32
-; FAST-NEXT:    xxswapd vs2, v5
+; FAST-NEXT:    xxswapd vs2, vs2
 ; FAST-NEXT:    stxvd2x vs2, r30, r3
+; FAST-NEXT:    li r3, 32
+; FAST-NEXT:    xxswapd vs1, vs1
+; FAST-NEXT:    stxvd2x vs1, r30, r3
 ; FAST-NEXT:    li r3, 16
+; FAST-NEXT:    xxswapd vs3, vs3
 ; FAST-NEXT:    stxvd2x vs3, r30, r3
-; FAST-NEXT:    li r3, 304
+; FAST-NEXT:    li r3, 400
+; FAST-NEXT:    xxswapd vs4, vs4
 ; FAST-NEXT:    stxvd2x vs4, 0, r30
-; FAST-NEXT:    lfd f31, 472(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f30, 464(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f29, 456(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f28, 448(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f27, 440(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f26, 432(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f25, 424(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f24, 416(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f23, 408(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f22, 400(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f21, 392(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f20, 384(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f19, 376(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f18, 368(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f17, 360(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f16, 352(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f15, 344(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f14, 336(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
-; FAST-NEXT:    li r3, 288
-; FAST-NEXT:    ld r30, 320(r1) # 8-byte Folded Reload
+; FAST-NEXT:    li r3, 384
+; FAST-NEXT:    ld r31, 552(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r30, 544(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r29, 536(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r28, 528(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r27, 520(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r26, 512(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r25, 504(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r24, 496(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r23, 488(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r22, 480(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r21, 472(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r20, 464(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r19, 456(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r18, 448(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r17, 440(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r16, 432(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
-; FAST-NEXT:    li r3, 272
+; FAST-NEXT:    li r3, 368
+; FAST-NEXT:    ld r15, 424(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r14, 416(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    lxvd2x v29, r1, r3 # 16-byte Folded Reload
-; FAST-NEXT:    li r3, 256
+; FAST-NEXT:    li r3, 352
 ; FAST-NEXT:    lxvd2x v28, r1, r3 # 16-byte Folded Reload
-; FAST-NEXT:    li r3, 240
+; FAST-NEXT:    li r3, 336
 ; FAST-NEXT:    lxvd2x v27, r1, r3 # 16-byte Folded Reload
-; FAST-NEXT:    li r3, 224
+; FAST-NEXT:    li r3, 320
 ; FAST-NEXT:    lxvd2x v26, r1, r3 # 16-byte Folded Reload
-; FAST-NEXT:    li r3, 208
+; FAST-NEXT:    li r3, 304
 ; FAST-NEXT:    lxvd2x v25, r1, r3 # 16-byte Folded Reload
-; FAST-NEXT:    li r3, 192
+; FAST-NEXT:    li r3, 288
 ; FAST-NEXT:    lxvd2x v24, r1, r3 # 16-byte Folded Reload
-; FAST-NEXT:    li r3, 176
+; FAST-NEXT:    li r3, 272
 ; FAST-NEXT:    lxvd2x v23, r1, r3 # 16-byte Folded Reload
-; FAST-NEXT:    li r3, 160
+; FAST-NEXT:    li r3, 256
 ; FAST-NEXT:    lxvd2x v22, r1, r3 # 16-byte Folded Reload
-; FAST-NEXT:    li r3, 144
+; FAST-NEXT:    li r3, 240
 ; FAST-NEXT:    lxvd2x v21, r1, r3 # 16-byte Folded Reload
-; FAST-NEXT:    li r3, 128
+; FAST-NEXT:    li r3, 224
 ; FAST-NEXT:    lxvd2x v20, r1, r3 # 16-byte Folded Reload
-; FAST-NEXT:    addi r1, r1, 480
+; FAST-NEXT:    addi r1, r1, 560
 ; FAST-NEXT:    ld r0, 16(r1)
 ; FAST-NEXT:    mtlr r0
 ; FAST-NEXT:    blr
diff --git a/llvm/test/CodeGen/PowerPC/vector-lrint.ll b/llvm/test/CodeGen/PowerPC/vector-lrint.ll
index f4375362f861c..d0a709c884a25 100644
--- a/llvm/test/CodeGen/PowerPC/vector-lrint.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-lrint.ll
@@ -33,10 +33,8 @@ define <1 x i64> @lrint_v1f16(<1 x half> %x) nounwind {
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
 ; BE-NEXT:    stdu r1, -112(r1)
-; BE-NEXT:    std r0, 128(r1)
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
 ; BE-NEXT:    clrldi r3, r3, 48
+; BE-NEXT:    std r0, 128(r1)
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
 ; BE-NEXT:    bl lrintf
@@ -50,10 +48,8 @@ define <1 x i64> @lrint_v1f16(<1 x half> %x) nounwind {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -32(r1)
-; CHECK-NEXT:    std r0, 48(r1)
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
 ; CHECK-NEXT:    clrldi r3, r3, 48
+; CHECK-NEXT:    std r0, 48(r1)
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    bl lrintf
@@ -67,10 +63,8 @@ define <1 x i64> @lrint_v1f16(<1 x half> %x) nounwind {
 ; FAST:       # %bb.0:
 ; FAST-NEXT:    mflr r0
 ; FAST-NEXT:    stdu r1, -32(r1)
-; FAST-NEXT:    std r0, 48(r1)
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
 ; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    std r0, 48(r1)
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
 ; FAST-NEXT:    fctid f0, f1
@@ -88,37 +82,26 @@ define <2 x i64> @lrint_v2f16(<2 x half> %x) nounwind {
 ; BE-LABEL: lrint_v2f16:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
-; BE-NEXT:    stdu r1, -160(r1)
-; BE-NEXT:    std r0, 176(r1)
-; BE-NEXT:    stfd f31, 152(r1) # 8-byte Folded Spill
-; BE-NEXT:    fmr f31, f1
-; BE-NEXT:    fmr f1, f2
-; BE-NEXT:    std r30, 136(r1) # 8-byte Folded Spill
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f31
+; BE-NEXT:    stdu r1, -144(r1)
+; BE-NEXT:    std r0, 160(r1)
+; BE-NEXT:    std r30, 128(r1) # 8-byte Folded Spill
 ; BE-NEXT:    mr r30, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r3, 48
-; BE-NEXT:    bl __extendhfsf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r30, 48
-; BE-NEXT:    fmr f31, f1
+; BE-NEXT:    clrldi r3, r4, 48
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
 ; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f31
 ; BE-NEXT:    std r3, 120(r1)
+; BE-NEXT:    clrldi r3, r30, 48
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
 ; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
 ; BE-NEXT:    std r3, 112(r1)
 ; BE-NEXT:    addi r3, r1, 112
-; BE-NEXT:    ld r30, 136(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f31, 152(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r30, 128(r1) # 8-byte Folded Reload
 ; BE-NEXT:    lxvd2x v2, 0, r3
-; BE-NEXT:    addi r1, r1, 160
+; BE-NEXT:    addi r1, r1, 144
 ; BE-NEXT:    ld r0, 16(r1)
 ; BE-NEXT:    mtlr r0
 ; BE-NEXT:    blr
@@ -127,35 +110,28 @@ define <2 x i64> @lrint_v2f16(<2 x half> %x) nounwind {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -96(r1)
-; CHECK-NEXT:    li r3, 48
-; CHECK-NEXT:    std r0, 112(r1)
-; CHECK-NEXT:    std r30, 72(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd f31, 88(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f31, f2
-; CHECK-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f31
-; CHECK-NEXT:    mr r30, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
+; CHECK-NEXT:    li r5, 48
 ; CHECK-NEXT:    clrldi r3, r3, 48
+; CHECK-NEXT:    std r0, 112(r1)
+; CHECK-NEXT:    std r29, 72(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r30, 80(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    mr r30, r4
+; CHECK-NEXT:    stxvd2x v31, r1, r5 # 16-byte Folded Spill
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
+; CHECK-NEXT:    bl lrintf
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mr r29, r3
 ; CHECK-NEXT:    clrldi r3, r30, 48
-; CHECK-NEXT:    fmr f31, f1
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    bl lrintf
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f31
-; CHECK-NEXT:    mtvsrd v31, r3
+; CHECK-NEXT:    mtvsrd v31, r29
 ; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    mtfprd f0, r3
 ; CHECK-NEXT:    li r3, 48
-; CHECK-NEXT:    lfd f31, 88(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r30, 72(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r30, 80(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r29, 72(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    xxmrghd v2, vs0, v31
 ; CHECK-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    addi r1, r1, 96
@@ -166,35 +142,30 @@ define <2 x i64> @lrint_v2f16(<2 x half> %x) nounwind {
 ; FAST-LABEL: lrint_v2f16:
 ; FAST:       # %bb.0:
 ; FAST-NEXT:    mflr r0
-; FAST-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stdu r1, -48(r1)
-; FAST-NEXT:    fmr f31, f1
-; FAST-NEXT:    fmr f1, f2
-; FAST-NEXT:    std r0, 64(r1)
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
+; FAST-NEXT:    stdu r1, -80(r1)
+; FAST-NEXT:    li r5, 48
 ; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    std r0, 96(r1)
+; FAST-NEXT:    std r30, 64(r1) # 8-byte Folded Spill
+; FAST-NEXT:    mr r30, r4
+; FAST-NEXT:    stxvd2x v31, r1, r5 # 16-byte Folded Spill
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f30, f1
-; FAST-NEXT:    fmr f1, f31
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v31, r3
+; FAST-NEXT:    clrldi r3, r30, 48
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
 ; FAST-NEXT:    fctid f0, f1
-; FAST-NEXT:    fctid f1, f30
+; FAST-NEXT:    ld r30, 64(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    mffprd r3, f0
 ; FAST-NEXT:    mtfprd f0, r3
-; FAST-NEXT:    mffprd r3, f1
-; FAST-NEXT:    mtfprd f1, r3
-; FAST-NEXT:    xxmrghd v2, vs1, vs0
-; FAST-NEXT:    addi r1, r1, 48
+; FAST-NEXT:    li r3, 48
+; FAST-NEXT:    xxmrghd v2, vs0, v31
+; FAST-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    addi r1, r1, 80
 ; FAST-NEXT:    ld r0, 16(r1)
-; FAST-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    mtlr r0
 ; FAST-NEXT:    blr
   %a = call <2 x i64> @llvm.lrint.v2i64.v2f16(<2 x half> %x)
@@ -206,73 +177,46 @@ define <4 x i64> @lrint_v4f16(<4 x half> %x) nounwind {
 ; BE-LABEL: lrint_v4f16:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
-; BE-NEXT:    stdu r1, -208(r1)
-; BE-NEXT:    std r0, 224(r1)
-; BE-NEXT:    stfd f29, 184(r1) # 8-byte Folded Spill
-; BE-NEXT:    fmr f29, f1
-; BE-NEXT:    fmr f1, f2
-; BE-NEXT:    std r28, 152(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r29, 160(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r30, 168(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f30, 192(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f31, 200(r1) # 8-byte Folded Spill
-; BE-NEXT:    fmr f31, f4
-; BE-NEXT:    fmr f30, f3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f29
-; BE-NEXT:    mr r30, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f31
-; BE-NEXT:    mr r29, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f30
+; BE-NEXT:    stdu r1, -176(r1)
+; BE-NEXT:    std r0, 192(r1)
+; BE-NEXT:    std r28, 144(r1) # 8-byte Folded Spill
 ; BE-NEXT:    mr r28, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r3, 48
+; BE-NEXT:    clrldi r3, r4, 48
+; BE-NEXT:    std r29, 152(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r30, 160(r1) # 8-byte Folded Spill
+; BE-NEXT:    mr r30, r6
+; BE-NEXT:    mr r29, r5
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
+; BE-NEXT:    bl lrintf
+; BE-NEXT:    nop
+; BE-NEXT:    std r3, 120(r1)
 ; BE-NEXT:    clrldi r3, r28, 48
-; BE-NEXT:    fmr f31, f1
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r29, 48
-; BE-NEXT:    fmr f30, f1
-; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
+; BE-NEXT:    std r3, 112(r1)
 ; BE-NEXT:    clrldi r3, r30, 48
-; BE-NEXT:    fmr f29, f1
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
 ; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f29
-; BE-NEXT:    std r3, 120(r1)
-; BE-NEXT:    bl lrintf
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f30
-; BE-NEXT:    std r3, 112(r1)
-; BE-NEXT:    bl lrintf
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f31
 ; BE-NEXT:    std r3, 136(r1)
+; BE-NEXT:    clrldi r3, r29, 48
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
 ; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
 ; BE-NEXT:    std r3, 128(r1)
 ; BE-NEXT:    addi r3, r1, 112
-; BE-NEXT:    ld r30, 168(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f31, 200(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f30, 192(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f29, 184(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r30, 160(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r29, 152(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r28, 144(r1) # 8-byte Folded Reload
 ; BE-NEXT:    lxvd2x v2, 0, r3
 ; BE-NEXT:    addi r3, r1, 128
-; BE-NEXT:    ld r29, 160(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r28, 152(r1) # 8-byte Folded Reload
 ; BE-NEXT:    lxvd2x v3, 0, r3
-; BE-NEXT:    addi r1, r1, 208
+; BE-NEXT:    addi r1, r1, 176
 ; BE-NEXT:    ld r0, 16(r1)
 ; BE-NEXT:    mtlr r0
 ; BE-NEXT:    blr
@@ -280,79 +224,57 @@ define <4 x i64> @lrint_v4f16(<4 x half> %x) nounwind {
 ; CHECK-LABEL: lrint_v4f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    stdu r1, -144(r1)
-; CHECK-NEXT:    li r3, 48
-; CHECK-NEXT:    std r0, 160(r1)
-; CHECK-NEXT:    std r28, 88(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r29, 96(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r30, 104(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd f29, 120(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f29, f2
-; CHECK-NEXT:    stfd f30, 128(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f30, f3
-; CHECK-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT:    li r3, 64
-; CHECK-NEXT:    stfd f31, 136(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f31, f4
-; CHECK-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f29
-; CHECK-NEXT:    mr r30, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f30
-; CHECK-NEXT:    mr r29, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f31
-; CHECK-NEXT:    mr r28, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
+; CHECK-NEXT:    stdu r1, -128(r1)
+; CHECK-NEXT:    li r7, 48
+; CHECK-NEXT:    std r0, 144(r1)
 ; CHECK-NEXT:    clrldi r3, r3, 48
+; CHECK-NEXT:    std r27, 88(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r28, 96(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r29, 104(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    mr r29, r5
+; CHECK-NEXT:    mr r28, r4
+; CHECK-NEXT:    stxvd2x v30, r1, r7 # 16-byte Folded Spill
+; CHECK-NEXT:    li r7, 64
+; CHECK-NEXT:    std r30, 112(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    mr r30, r6
+; CHECK-NEXT:    stxvd2x v31, r1, r7 # 16-byte Folded Spill
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r28, 48
-; CHECK-NEXT:    fmr f31, f1
-; CHECK-NEXT:    bl __extendhfsf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r29, 48
-; CHECK-NEXT:    fmr f30, f1
-; CHECK-NEXT:    bl __extendhfsf2
+; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r30, 48
-; CHECK-NEXT:    fmr f29, f1
+; CHECK-NEXT:    mr r27, r3
+; CHECK-NEXT:    clrldi r3, r28, 48
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtvsrd v31, r27
 ; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f29
-; CHECK-NEXT:    mtvsrd v31, r3
-; CHECK-NEXT:    bl lrintf
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f30
 ; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    clrldi r3, r29, 48
 ; CHECK-NEXT:    xxmrghd v31, vs0, v31
+; CHECK-NEXT:    bl __extendhfsf2
+; CHECK-NEXT:    nop
 ; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f31
-; CHECK-NEXT:    mtvsrd v30, r3
+; CHECK-NEXT:    mr r29, r3
+; CHECK-NEXT:    clrldi r3, r30, 48
+; CHECK-NEXT:    bl __extendhfsf2
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtvsrd v30, r29
 ; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    mtfprd f0, r3
 ; CHECK-NEXT:    li r3, 64
 ; CHECK-NEXT:    vmr v2, v31
-; CHECK-NEXT:    lfd f31, 136(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f30, 128(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f29, 120(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r30, 104(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r29, 96(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r30, 112(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r29, 104(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r28, 96(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r27, 88(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 48
-; CHECK-NEXT:    ld r28, 88(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    xxmrghd v3, vs0, v30
 ; CHECK-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT:    addi r1, r1, 144
+; CHECK-NEXT:    addi r1, r1, 128
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
 ; CHECK-NEXT:    blr
@@ -360,63 +282,55 @@ define <4 x i64> @lrint_v4f16(<4 x half> %x) nounwind {
 ; FAST-LABEL: lrint_v4f16:
 ; FAST:       # %bb.0:
 ; FAST-NEXT:    mflr r0
-; FAST-NEXT:    stfd f28, -32(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f29, -24(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stdu r1, -64(r1)
-; FAST-NEXT:    fmr f29, f1
-; FAST-NEXT:    fmr f1, f4
-; FAST-NEXT:    std r0, 80(r1)
-; FAST-NEXT:    fmr f31, f3
-; FAST-NEXT:    fmr f30, f2
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
+; FAST-NEXT:    stdu r1, -112(r1)
+; FAST-NEXT:    li r7, 48
+; FAST-NEXT:    std r0, 128(r1)
 ; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    std r28, 80(r1) # 8-byte Folded Spill
+; FAST-NEXT:    std r29, 88(r1) # 8-byte Folded Spill
+; FAST-NEXT:    std r30, 96(r1) # 8-byte Folded Spill
+; FAST-NEXT:    mr r30, r6
+; FAST-NEXT:    mr r29, r5
+; FAST-NEXT:    stxvd2x v30, r1, r7 # 16-byte Folded Spill
+; FAST-NEXT:    li r7, 64
+; FAST-NEXT:    mr r28, r4
+; FAST-NEXT:    stxvd2x v31, r1, r7 # 16-byte Folded Spill
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f28, f1
-; FAST-NEXT:    fmr f1, f31
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v31, r3
+; FAST-NEXT:    clrldi r3, r28, 48
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f31, f1
-; FAST-NEXT:    fmr f1, f30
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    clrldi r3, r29, 48
+; FAST-NEXT:    xxmrghd v31, vs0, v31
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f30, f1
-; FAST-NEXT:    fmr f1, f29
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v30, r3
+; FAST-NEXT:    clrldi r3, r30, 48
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fctid f0, f30
-; FAST-NEXT:    fctid f2, f31
-; FAST-NEXT:    mffprd r3, f0
-; FAST-NEXT:    fctid f1, f1
-; FAST-NEXT:    mtfprd f0, r3
-; FAST-NEXT:    mffprd r3, f2
-; FAST-NEXT:    mtfprd f2, r3
-; FAST-NEXT:    mffprd r3, f1
-; FAST-NEXT:    mtfprd f1, r3
-; FAST-NEXT:    xxmrghd v2, vs0, vs1
-; FAST-NEXT:    fctid f0, f28
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    vmr v2, v31
+; FAST-NEXT:    ld r30, 96(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r29, 88(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r28, 80(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    mffprd r3, f0
 ; FAST-NEXT:    mtfprd f0, r3
-; FAST-NEXT:    xxmrghd v3, vs0, vs2
-; FAST-NEXT:    addi r1, r1, 64
+; FAST-NEXT:    li r3, 64
+; FAST-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 48
+; FAST-NEXT:    xxmrghd v3, vs0, v30
+; FAST-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    addi r1, r1, 112
 ; FAST-NEXT:    ld r0, 16(r1)
-; FAST-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    mtlr r0
-; FAST-NEXT:    lfd f29, -24(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f28, -32(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    blr
   %a = call <4 x i64> @llvm.lrint.v4i64.v4f16(<4 x half> %x)
   ret <4 x i64> %a
@@ -427,145 +341,86 @@ define <8 x i64> @lrint_v8f16(<8 x half> %x) nounwind {
 ; BE-LABEL: lrint_v8f16:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
-; BE-NEXT:    stdu r1, -304(r1)
-; BE-NEXT:    std r0, 320(r1)
-; BE-NEXT:    stfd f25, 248(r1) # 8-byte Folded Spill
-; BE-NEXT:    fmr f25, f1
-; BE-NEXT:    fmr f1, f2
-; BE-NEXT:    std r24, 184(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r25, 192(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r26, 200(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r27, 208(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r28, 216(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r29, 224(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r30, 232(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f26, 256(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f27, 264(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f28, 272(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f29, 280(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f30, 288(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f31, 296(r1) # 8-byte Folded Spill
-; BE-NEXT:    fmr f31, f8
-; BE-NEXT:    fmr f30, f7
-; BE-NEXT:    fmr f29, f6
-; BE-NEXT:    fmr f28, f5
-; BE-NEXT:    fmr f27, f4
-; BE-NEXT:    fmr f26, f3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f25
-; BE-NEXT:    mr r30, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f27
-; BE-NEXT:    mr r29, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f26
-; BE-NEXT:    mr r28, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f29
-; BE-NEXT:    mr r27, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f28
-; BE-NEXT:    mr r26, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f31
-; BE-NEXT:    mr r25, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f30
+; BE-NEXT:    stdu r1, -240(r1)
+; BE-NEXT:    std r0, 256(r1)
+; BE-NEXT:    std r24, 176(r1) # 8-byte Folded Spill
 ; BE-NEXT:    mr r24, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r3, 48
+; BE-NEXT:    clrldi r3, r4, 48
+; BE-NEXT:    std r25, 184(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r26, 192(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r27, 200(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r28, 208(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r29, 216(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r30, 224(r1) # 8-byte Folded Spill
+; BE-NEXT:    mr r29, r10
+; BE-NEXT:    mr r30, r9
+; BE-NEXT:    mr r27, r8
+; BE-NEXT:    mr r28, r7
+; BE-NEXT:    mr r26, r6
+; BE-NEXT:    mr r25, r5
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
+; BE-NEXT:    bl lrintf
+; BE-NEXT:    nop
+; BE-NEXT:    std r3, 120(r1)
 ; BE-NEXT:    clrldi r3, r24, 48
-; BE-NEXT:    fmr f31, f1
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r25, 48
-; BE-NEXT:    fmr f30, f1
-; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
+; BE-NEXT:    std r3, 112(r1)
 ; BE-NEXT:    clrldi r3, r26, 48
-; BE-NEXT:    fmr f29, f1
-; BE-NEXT:    bl __extendhfsf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r27, 48
-; BE-NEXT:    fmr f28, f1
-; BE-NEXT:    bl __extendhfsf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r28, 48
-; BE-NEXT:    fmr f27, f1
-; BE-NEXT:    bl __extendhfsf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r29, 48
-; BE-NEXT:    fmr f26, f1
-; BE-NEXT:    bl __extendhfsf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r30, 48
-; BE-NEXT:    fmr f25, f1
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
 ; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f25
-; BE-NEXT:    std r3, 120(r1)
-; BE-NEXT:    bl lrintf
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f26
-; BE-NEXT:    std r3, 112(r1)
-; BE-NEXT:    bl lrintf
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f27
 ; BE-NEXT:    std r3, 136(r1)
+; BE-NEXT:    clrldi r3, r25, 48
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
 ; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f28
 ; BE-NEXT:    std r3, 128(r1)
+; BE-NEXT:    clrldi r3, r27, 48
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
 ; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f29
 ; BE-NEXT:    std r3, 152(r1)
+; BE-NEXT:    clrldi r3, r28, 48
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
 ; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f30
 ; BE-NEXT:    std r3, 144(r1)
+; BE-NEXT:    clrldi r3, r29, 48
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
 ; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f31
 ; BE-NEXT:    std r3, 168(r1)
+; BE-NEXT:    clrldi r3, r30, 48
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
 ; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
 ; BE-NEXT:    std r3, 160(r1)
 ; BE-NEXT:    addi r3, r1, 112
-; BE-NEXT:    ld r30, 232(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f31, 296(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f30, 288(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f29, 280(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r30, 224(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r29, 216(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r28, 208(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r27, 200(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r26, 192(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r25, 184(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r24, 176(r1) # 8-byte Folded Reload
 ; BE-NEXT:    lxvd2x v2, 0, r3
 ; BE-NEXT:    addi r3, r1, 128
-; BE-NEXT:    lfd f28, 272(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f27, 264(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f26, 256(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r29, 224(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r28, 216(r1) # 8-byte Folded Reload
 ; BE-NEXT:    lxvd2x v3, 0, r3
 ; BE-NEXT:    addi r3, r1, 144
-; BE-NEXT:    lfd f25, 248(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r27, 208(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r26, 200(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r25, 192(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r24, 184(r1) # 8-byte Folded Reload
 ; BE-NEXT:    lxvd2x v4, 0, r3
 ; BE-NEXT:    addi r3, r1, 160
 ; BE-NEXT:    lxvd2x v5, 0, r3
-; BE-NEXT:    addi r1, r1, 304
+; BE-NEXT:    addi r1, r1, 240
 ; BE-NEXT:    ld r0, 16(r1)
 ; BE-NEXT:    mtlr r0
 ; BE-NEXT:    blr
@@ -573,159 +428,107 @@ define <8 x i64> @lrint_v8f16(<8 x half> %x) nounwind {
 ; CHECK-LABEL: lrint_v8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    stdu r1, -240(r1)
-; CHECK-NEXT:    li r3, 48
-; CHECK-NEXT:    std r0, 256(r1)
-; CHECK-NEXT:    std r24, 120(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r25, 128(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r26, 136(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r27, 144(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r28, 152(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r29, 160(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stxvd2x v28, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT:    li r3, 64
-; CHECK-NEXT:    std r30, 168(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd f25, 184(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f25, f2
-; CHECK-NEXT:    stfd f26, 192(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f26, f3
-; CHECK-NEXT:    stfd f27, 200(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f27, f4
-; CHECK-NEXT:    stxvd2x v29, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT:    li r3, 80
-; CHECK-NEXT:    stfd f28, 208(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f28, f5
-; CHECK-NEXT:    stfd f29, 216(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f29, f6
-; CHECK-NEXT:    stfd f30, 224(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f30, f7
-; CHECK-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT:    li r3, 96
-; CHECK-NEXT:    stfd f31, 232(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f31, f8
-; CHECK-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f25
-; CHECK-NEXT:    mr r30, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f26
-; CHECK-NEXT:    mr r29, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f27
-; CHECK-NEXT:    mr r28, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f28
-; CHECK-NEXT:    mr r27, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f29
-; CHECK-NEXT:    mr r26, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f30
-; CHECK-NEXT:    mr r25, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f31
-; CHECK-NEXT:    mr r24, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
+; CHECK-NEXT:    stdu r1, -192(r1)
+; CHECK-NEXT:    li r11, 48
+; CHECK-NEXT:    std r0, 208(r1)
 ; CHECK-NEXT:    clrldi r3, r3, 48
+; CHECK-NEXT:    std r23, 120(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r24, 128(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r25, 136(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    mr r25, r5
+; CHECK-NEXT:    mr r24, r4
+; CHECK-NEXT:    stxvd2x v28, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT:    li r11, 64
+; CHECK-NEXT:    std r26, 144(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    mr r26, r6
+; CHECK-NEXT:    std r27, 152(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r28, 160(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    mr r28, r8
+; CHECK-NEXT:    mr r27, r7
+; CHECK-NEXT:    stxvd2x v29, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT:    li r11, 80
+; CHECK-NEXT:    std r29, 168(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    mr r29, r9
+; CHECK-NEXT:    std r30, 176(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    mr r30, r10
+; CHECK-NEXT:    stxvd2x v30, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT:    li r11, 96
+; CHECK-NEXT:    stxvd2x v31, r1, r11 # 16-byte Folded Spill
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
+; CHECK-NEXT:    bl lrintf
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mr r23, r3
 ; CHECK-NEXT:    clrldi r3, r24, 48
-; CHECK-NEXT:    fmr f31, f1
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtvsrd v31, r23
+; CHECK-NEXT:    bl lrintf
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtfprd f0, r3
 ; CHECK-NEXT:    clrldi r3, r25, 48
-; CHECK-NEXT:    fmr f30, f1
+; CHECK-NEXT:    xxmrghd v31, vs0, v31
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
+; CHECK-NEXT:    bl lrintf
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mr r25, r3
 ; CHECK-NEXT:    clrldi r3, r26, 48
-; CHECK-NEXT:    fmr f29, f1
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r27, 48
-; CHECK-NEXT:    fmr f28, f1
-; CHECK-NEXT:    bl __extendhfsf2
+; CHECK-NEXT:    mtvsrd v30, r25
+; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r28, 48
-; CHECK-NEXT:    fmr f27, f1
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    clrldi r3, r27, 48
+; CHECK-NEXT:    xxmrghd v30, vs0, v30
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r29, 48
-; CHECK-NEXT:    fmr f26, f1
-; CHECK-NEXT:    bl __extendhfsf2
+; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r30, 48
-; CHECK-NEXT:    fmr f25, f1
+; CHECK-NEXT:    mr r27, r3
+; CHECK-NEXT:    clrldi r3, r28, 48
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtvsrd v29, r27
 ; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f25
-; CHECK-NEXT:    mtvsrd v31, r3
-; CHECK-NEXT:    bl lrintf
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f26
 ; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    xxmrghd v31, vs0, v31
-; CHECK-NEXT:    bl lrintf
+; CHECK-NEXT:    clrldi r3, r29, 48
+; CHECK-NEXT:    xxmrghd v29, vs0, v29
+; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f27
-; CHECK-NEXT:    mtvsrd v30, r3
 ; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f28
-; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    xxmrghd v30, vs0, v30
-; CHECK-NEXT:    bl lrintf
+; CHECK-NEXT:    mr r29, r3
+; CHECK-NEXT:    clrldi r3, r30, 48
+; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f29
-; CHECK-NEXT:    mtvsrd v29, r3
-; CHECK-NEXT:    bl lrintf
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f30
-; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    xxmrghd v29, vs0, v29
-; CHECK-NEXT:    bl lrintf
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f31
-; CHECK-NEXT:    mtvsrd v28, r3
+; CHECK-NEXT:    mtvsrd v28, r29
 ; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    mtfprd f0, r3
 ; CHECK-NEXT:    li r3, 96
 ; CHECK-NEXT:    vmr v2, v31
-; CHECK-NEXT:    lfd f31, 232(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r30, 176(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    vmr v3, v30
 ; CHECK-NEXT:    vmr v4, v29
-; CHECK-NEXT:    lfd f30, 224(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f29, 216(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r29, 168(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r28, 160(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 80
-; CHECK-NEXT:    lfd f28, 208(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f27, 200(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f26, 192(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f25, 184(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r30, 168(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r29, 160(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r27, 152(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r26, 144(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r25, 136(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r24, 128(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r23, 120(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 64
-; CHECK-NEXT:    ld r28, 152(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r27, 144(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    xxmrghd v5, vs0, v28
-; CHECK-NEXT:    ld r26, 136(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r25, 128(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r24, 120(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lxvd2x v29, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 48
+; CHECK-NEXT:    xxmrghd v5, vs0, v28
 ; CHECK-NEXT:    lxvd2x v28, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT:    addi r1, r1, 240
+; CHECK-NEXT:    addi r1, r1, 192
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
 ; CHECK-NEXT:    blr
@@ -733,117 +536,103 @@ define <8 x i64> @lrint_v8f16(<8 x half> %x) nounwind {
 ; FAST-LABEL: lrint_v8f16:
 ; FAST:       # %bb.0:
 ; FAST-NEXT:    mflr r0
-; FAST-NEXT:    stfd f24, -64(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f25, -56(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f26, -48(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f27, -40(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f28, -32(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f29, -24(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stdu r1, -96(r1)
-; FAST-NEXT:    fmr f24, f1
-; FAST-NEXT:    fmr f1, f8
-; FAST-NEXT:    std r0, 112(r1)
-; FAST-NEXT:    fmr f30, f7
-; FAST-NEXT:    fmr f29, f6
-; FAST-NEXT:    fmr f28, f5
-; FAST-NEXT:    fmr f27, f4
-; FAST-NEXT:    fmr f26, f3
-; FAST-NEXT:    fmr f25, f2
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
+; FAST-NEXT:    stdu r1, -176(r1)
+; FAST-NEXT:    li r11, 48
+; FAST-NEXT:    std r0, 192(r1)
 ; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    std r24, 112(r1) # 8-byte Folded Spill
+; FAST-NEXT:    std r25, 120(r1) # 8-byte Folded Spill
+; FAST-NEXT:    std r26, 128(r1) # 8-byte Folded Spill
+; FAST-NEXT:    mr r26, r6
+; FAST-NEXT:    mr r25, r5
+; FAST-NEXT:    stxvd2x v28, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT:    li r11, 64
+; FAST-NEXT:    std r27, 136(r1) # 8-byte Folded Spill
+; FAST-NEXT:    mr r27, r7
+; FAST-NEXT:    std r28, 144(r1) # 8-byte Folded Spill
+; FAST-NEXT:    std r29, 152(r1) # 8-byte Folded Spill
+; FAST-NEXT:    mr r29, r9
+; FAST-NEXT:    mr r28, r8
+; FAST-NEXT:    stxvd2x v29, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT:    li r11, 80
+; FAST-NEXT:    std r30, 160(r1) # 8-byte Folded Spill
+; FAST-NEXT:    mr r30, r10
+; FAST-NEXT:    mr r24, r4
+; FAST-NEXT:    stxvd2x v30, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT:    li r11, 96
+; FAST-NEXT:    stxvd2x v31, r1, r11 # 16-byte Folded Spill
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f31, f1
-; FAST-NEXT:    fmr f1, f30
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v31, r3
+; FAST-NEXT:    clrldi r3, r24, 48
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f30, f1
-; FAST-NEXT:    fmr f1, f29
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    clrldi r3, r25, 48
+; FAST-NEXT:    xxmrghd v31, vs0, v31
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f29, f1
-; FAST-NEXT:    fmr f1, f28
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v30, r3
+; FAST-NEXT:    clrldi r3, r26, 48
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f28, f1
-; FAST-NEXT:    fmr f1, f27
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    clrldi r3, r27, 48
+; FAST-NEXT:    xxmrghd v30, vs0, v30
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f27, f1
-; FAST-NEXT:    fmr f1, f26
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v29, r3
+; FAST-NEXT:    clrldi r3, r28, 48
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f26, f1
-; FAST-NEXT:    fmr f1, f25
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    clrldi r3, r29, 48
+; FAST-NEXT:    xxmrghd v29, vs0, v29
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f25, f1
-; FAST-NEXT:    fmr f1, f24
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v28, r3
+; FAST-NEXT:    clrldi r3, r30, 48
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fctid f0, f25
-; FAST-NEXT:    fctid f2, f26
-; FAST-NEXT:    mffprd r3, f0
-; FAST-NEXT:    fctid f3, f27
-; FAST-NEXT:    fctid f4, f28
-; FAST-NEXT:    fctid f5, f29
-; FAST-NEXT:    fctid f6, f30
-; FAST-NEXT:    fctid f1, f1
-; FAST-NEXT:    mtfprd f0, r3
-; FAST-NEXT:    mffprd r3, f2
-; FAST-NEXT:    mtfprd f2, r3
-; FAST-NEXT:    mffprd r3, f3
-; FAST-NEXT:    mtfprd f3, r3
-; FAST-NEXT:    mffprd r3, f4
-; FAST-NEXT:    mtfprd f4, r3
-; FAST-NEXT:    mffprd r3, f5
-; FAST-NEXT:    mtfprd f5, r3
-; FAST-NEXT:    mffprd r3, f6
-; FAST-NEXT:    mtfprd f6, r3
-; FAST-NEXT:    mffprd r3, f1
-; FAST-NEXT:    mtfprd f1, r3
-; FAST-NEXT:    xxmrghd v3, vs3, vs2
-; FAST-NEXT:    xxmrghd v4, vs5, vs4
-; FAST-NEXT:    xxmrghd v2, vs0, vs1
-; FAST-NEXT:    fctid f0, f31
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    vmr v2, v31
+; FAST-NEXT:    ld r30, 160(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r29, 152(r1) # 8-byte Folded Reload
+; FAST-NEXT:    vmr v3, v30
+; FAST-NEXT:    vmr v4, v29
+; FAST-NEXT:    ld r28, 144(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r27, 136(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r26, 128(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r25, 120(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    ld r24, 112(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    mtfprd f0, r3
-; FAST-NEXT:    xxmrghd v5, vs0, vs6
-; FAST-NEXT:    addi r1, r1, 96
+; FAST-NEXT:    li r3, 96
+; FAST-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 80
+; FAST-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 64
+; FAST-NEXT:    lxvd2x v29, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 48
+; FAST-NEXT:    xxmrghd v5, vs0, v28
+; FAST-NEXT:    lxvd2x v28, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    addi r1, r1, 176
 ; FAST-NEXT:    ld r0, 16(r1)
-; FAST-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    mtlr r0
-; FAST-NEXT:    lfd f29, -24(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f28, -32(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f27, -40(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f26, -48(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f25, -56(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f24, -64(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    blr
   %a = call <8 x i64> @llvm.lrint.v8i64.v8f16(<8 x half> %x)
   ret <8 x i64> %a
@@ -854,286 +643,166 @@ define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) nounwind {
 ; BE-LABEL: lrint_v16i64_v16f16:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
-; BE-NEXT:    stdu r1, -496(r1)
-; BE-NEXT:    std r0, 512(r1)
-; BE-NEXT:    stfd f20, 400(r1) # 8-byte Folded Spill
-; BE-NEXT:    fmr f20, f1
-; BE-NEXT:    fmr f1, f2
-; BE-NEXT:    std r16, 248(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r17, 256(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r18, 264(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r19, 272(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r20, 280(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r21, 288(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r22, 296(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r23, 304(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r24, 312(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r25, 320(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r26, 328(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r27, 336(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r28, 344(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r29, 352(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r30, 360(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f17, 376(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f18, 384(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f19, 392(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f21, 408(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f22, 416(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f23, 424(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f24, 432(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f25, 440(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f26, 448(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f27, 456(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f28, 464(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f29, 472(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f30, 480(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f31, 488(r1) # 8-byte Folded Spill
-; BE-NEXT:    fmr f31, f13
-; BE-NEXT:    fmr f29, f12
-; BE-NEXT:    fmr f30, f11
-; BE-NEXT:    fmr f28, f10
-; BE-NEXT:    fmr f27, f9
-; BE-NEXT:    fmr f26, f8
-; BE-NEXT:    fmr f25, f7
-; BE-NEXT:    fmr f24, f6
-; BE-NEXT:    fmr f23, f5
-; BE-NEXT:    fmr f22, f4
-; BE-NEXT:    fmr f21, f3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f20
-; BE-NEXT:    mr r30, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f22
-; BE-NEXT:    mr r29, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f21
-; BE-NEXT:    mr r28, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f24
-; BE-NEXT:    mr r27, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f23
-; BE-NEXT:    mr r26, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f26
-; BE-NEXT:    mr r25, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f25
+; BE-NEXT:    stdu r1, -368(r1)
+; BE-NEXT:    std r0, 384(r1)
+; BE-NEXT:    std r24, 304(r1) # 8-byte Folded Spill
 ; BE-NEXT:    mr r24, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f28
-; BE-NEXT:    mr r23, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f27
-; BE-NEXT:    mr r22, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f29
-; BE-NEXT:    mr r21, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f30
-; BE-NEXT:    mr r20, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 652(r1)
-; BE-NEXT:    mr r19, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f31
-; BE-NEXT:    mr r18, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 668(r1)
-; BE-NEXT:    mr r17, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 660(r1)
-; BE-NEXT:    mr r16, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r3, 48
-; BE-NEXT:    bl __extendhfsf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r16, 48
-; BE-NEXT:    fmr f31, f1
-; BE-NEXT:    bl __extendhfsf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r17, 48
-; BE-NEXT:    fmr f30, f1
-; BE-NEXT:    bl __extendhfsf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r18, 48
-; BE-NEXT:    fmr f29, f1
-; BE-NEXT:    bl __extendhfsf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r19, 48
-; BE-NEXT:    fmr f28, f1
-; BE-NEXT:    bl __extendhfsf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r20, 48
-; BE-NEXT:    fmr f27, f1
-; BE-NEXT:    bl __extendhfsf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r21, 48
-; BE-NEXT:    fmr f26, f1
-; BE-NEXT:    bl __extendhfsf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r22, 48
-; BE-NEXT:    fmr f25, f1
-; BE-NEXT:    bl __extendhfsf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r23, 48
-; BE-NEXT:    fmr f24, f1
-; BE-NEXT:    bl __extendhfsf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r24, 48
-; BE-NEXT:    fmr f23, f1
-; BE-NEXT:    bl __extendhfsf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r25, 48
-; BE-NEXT:    fmr f22, f1
-; BE-NEXT:    bl __extendhfsf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r26, 48
-; BE-NEXT:    fmr f21, f1
-; BE-NEXT:    bl __extendhfsf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r27, 48
-; BE-NEXT:    fmr f20, f1
-; BE-NEXT:    bl __extendhfsf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r28, 48
-; BE-NEXT:    fmr f19, f1
-; BE-NEXT:    bl __extendhfsf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r29, 48
-; BE-NEXT:    fmr f18, f1
-; BE-NEXT:    bl __extendhfsf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r30, 48
-; BE-NEXT:    fmr f17, f1
+; BE-NEXT:    lhz r3, 494(r1)
+; BE-NEXT:    std r16, 240(r1) # 8-byte Folded Spill
+; BE-NEXT:    lhz r16, 486(r1)
+; BE-NEXT:    std r17, 248(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r18, 256(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r19, 264(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r20, 272(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r21, 280(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r22, 288(r1) # 8-byte Folded Spill
+; BE-NEXT:    lhz r22, 534(r1)
+; BE-NEXT:    lhz r21, 542(r1)
+; BE-NEXT:    lhz r20, 518(r1)
+; BE-NEXT:    lhz r19, 526(r1)
+; BE-NEXT:    lhz r18, 502(r1)
+; BE-NEXT:    lhz r17, 510(r1)
+; BE-NEXT:    std r23, 296(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r25, 312(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r26, 320(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r27, 328(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r28, 336(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r29, 344(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r30, 352(r1) # 8-byte Folded Spill
+; BE-NEXT:    mr r29, r10
+; BE-NEXT:    mr r30, r9
+; BE-NEXT:    mr r27, r8
+; BE-NEXT:    mr r28, r7
+; BE-NEXT:    mr r25, r6
+; BE-NEXT:    mr r26, r5
+; BE-NEXT:    mr r23, r4
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
 ; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f17
 ; BE-NEXT:    std r3, 120(r1)
+; BE-NEXT:    mr r3, r16
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
 ; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f18
 ; BE-NEXT:    std r3, 112(r1)
+; BE-NEXT:    mr r3, r17
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
 ; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f19
 ; BE-NEXT:    std r3, 136(r1)
+; BE-NEXT:    mr r3, r18
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
 ; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f20
 ; BE-NEXT:    std r3, 128(r1)
+; BE-NEXT:    mr r3, r19
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
 ; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f21
 ; BE-NEXT:    std r3, 152(r1)
+; BE-NEXT:    mr r3, r20
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
 ; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f22
 ; BE-NEXT:    std r3, 144(r1)
+; BE-NEXT:    mr r3, r21
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
 ; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f23
 ; BE-NEXT:    std r3, 168(r1)
+; BE-NEXT:    mr r3, r22
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
 ; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f24
 ; BE-NEXT:    std r3, 160(r1)
+; BE-NEXT:    clrldi r3, r23, 48
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
 ; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f25
 ; BE-NEXT:    std r3, 184(r1)
+; BE-NEXT:    clrldi r3, r24, 48
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
 ; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f26
 ; BE-NEXT:    std r3, 176(r1)
+; BE-NEXT:    clrldi r3, r25, 48
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
 ; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f27
 ; BE-NEXT:    std r3, 200(r1)
+; BE-NEXT:    clrldi r3, r26, 48
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
 ; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f28
 ; BE-NEXT:    std r3, 192(r1)
+; BE-NEXT:    clrldi r3, r27, 48
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
 ; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f29
 ; BE-NEXT:    std r3, 216(r1)
+; BE-NEXT:    clrldi r3, r28, 48
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
 ; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f30
 ; BE-NEXT:    std r3, 208(r1)
+; BE-NEXT:    clrldi r3, r29, 48
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
 ; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f31
 ; BE-NEXT:    std r3, 232(r1)
+; BE-NEXT:    clrldi r3, r30, 48
+; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    nop
 ; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
 ; BE-NEXT:    std r3, 224(r1)
 ; BE-NEXT:    addi r3, r1, 112
-; BE-NEXT:    ld r30, 360(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f31, 488(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f30, 480(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f29, 472(r1) # 8-byte Folded Reload
-; BE-NEXT:    lxvd2x v2, 0, r3
+; BE-NEXT:    ld r30, 352(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r29, 344(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r28, 336(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r27, 328(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r26, 320(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r25, 312(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r24, 304(r1) # 8-byte Folded Reload
+; BE-NEXT:    lxvd2x v6, 0, r3
 ; BE-NEXT:    addi r3, r1, 128
-; BE-NEXT:    lfd f28, 464(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f27, 456(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f26, 448(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r29, 352(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r28, 344(r1) # 8-byte Folded Reload
-; BE-NEXT:    lxvd2x v3, 0, r3
+; BE-NEXT:    ld r23, 296(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r22, 288(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r21, 280(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r20, 272(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r19, 264(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r18, 256(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r17, 248(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r16, 240(r1) # 8-byte Folded Reload
+; BE-NEXT:    lxvd2x v7, 0, r3
 ; BE-NEXT:    addi r3, r1, 144
-; BE-NEXT:    lfd f25, 440(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f24, 432(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f23, 424(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r27, 336(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r26, 328(r1) # 8-byte Folded Reload
-; BE-NEXT:    lxvd2x v4, 0, r3
+; BE-NEXT:    lxvd2x v8, 0, r3
 ; BE-NEXT:    addi r3, r1, 160
-; BE-NEXT:    lfd f22, 416(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f21, 408(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f20, 400(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r25, 320(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r24, 312(r1) # 8-byte Folded Reload
-; BE-NEXT:    lxvd2x v5, 0, r3
+; BE-NEXT:    lxvd2x v9, 0, r3
 ; BE-NEXT:    addi r3, r1, 176
-; BE-NEXT:    lfd f19, 392(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f18, 384(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f17, 376(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r23, 304(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r22, 296(r1) # 8-byte Folded Reload
-; BE-NEXT:    lxvd2x v6, 0, r3
+; BE-NEXT:    lxvd2x v2, 0, r3
 ; BE-NEXT:    addi r3, r1, 192
-; BE-NEXT:    ld r21, 288(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r20, 280(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r19, 272(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r18, 264(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r17, 256(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r16, 248(r1) # 8-byte Folded Reload
-; BE-NEXT:    lxvd2x v7, 0, r3
+; BE-NEXT:    lxvd2x v3, 0, r3
 ; BE-NEXT:    addi r3, r1, 208
-; BE-NEXT:    lxvd2x v8, 0, r3
+; BE-NEXT:    lxvd2x v4, 0, r3
 ; BE-NEXT:    addi r3, r1, 224
-; BE-NEXT:    lxvd2x v9, 0, r3
-; BE-NEXT:    addi r1, r1, 496
+; BE-NEXT:    lxvd2x v5, 0, r3
+; BE-NEXT:    addi r1, r1, 368
 ; BE-NEXT:    ld r0, 16(r1)
 ; BE-NEXT:    mtlr r0
 ; BE-NEXT:    blr
@@ -1141,316 +810,207 @@ define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) nounwind {
 ; CHECK-LABEL: lrint_v16i64_v16f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    stdu r1, -432(r1)
-; CHECK-NEXT:    li r3, 48
-; CHECK-NEXT:    std r0, 448(r1)
-; CHECK-NEXT:    std r16, 184(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r17, 192(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r18, 200(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r19, 208(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r20, 216(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r21, 224(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stxvd2x v24, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT:    li r3, 64
-; CHECK-NEXT:    std r22, 232(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r23, 240(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r24, 248(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r25, 256(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r26, 264(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r27, 272(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stxvd2x v25, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT:    li r3, 80
-; CHECK-NEXT:    std r28, 280(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r29, 288(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r30, 296(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd f17, 312(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd f18, 320(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd f19, 328(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stxvd2x v26, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT:    li r3, 96
-; CHECK-NEXT:    stfd f20, 336(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f20, f2
-; CHECK-NEXT:    stfd f21, 344(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f21, f3
-; CHECK-NEXT:    stfd f22, 352(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f22, f4
-; CHECK-NEXT:    stxvd2x v27, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT:    li r3, 112
-; CHECK-NEXT:    stfd f23, 360(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f23, f5
-; CHECK-NEXT:    stfd f24, 368(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f24, f6
-; CHECK-NEXT:    stfd f25, 376(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f25, f7
-; CHECK-NEXT:    stxvd2x v28, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT:    li r3, 128
-; CHECK-NEXT:    stfd f26, 384(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f26, f8
-; CHECK-NEXT:    stfd f27, 392(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f27, f9
-; CHECK-NEXT:    stfd f28, 400(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f28, f10
-; CHECK-NEXT:    stxvd2x v29, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT:    li r3, 144
-; CHECK-NEXT:    stfd f29, 408(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f29, f11
-; CHECK-NEXT:    stfd f30, 416(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f30, f12
-; CHECK-NEXT:    stfd f31, 424(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f31, f13
-; CHECK-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT:    li r3, 160
-; CHECK-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f20
-; CHECK-NEXT:    mr r30, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f21
-; CHECK-NEXT:    mr r29, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f22
-; CHECK-NEXT:    mr r28, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f23
-; CHECK-NEXT:    mr r27, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f24
-; CHECK-NEXT:    mr r26, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f25
-; CHECK-NEXT:    mr r25, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f26
-; CHECK-NEXT:    mr r24, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f27
+; CHECK-NEXT:    stdu r1, -320(r1)
+; CHECK-NEXT:    li r11, 48
+; CHECK-NEXT:    std r0, 336(r1)
+; CHECK-NEXT:    std r23, 248(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    mr r23, r3
-; CHECK-NEXT:    bl __truncsfhf2
+; CHECK-NEXT:    lhz r3, 416(r1)
+; CHECK-NEXT:    std r16, 192(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r17, 200(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    lhz r17, 432(r1)
+; CHECK-NEXT:    stxvd2x v24, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT:    li r11, 64
+; CHECK-NEXT:    std r18, 208(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    lhz r18, 440(r1)
+; CHECK-NEXT:    std r19, 216(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    lhz r19, 448(r1)
+; CHECK-NEXT:    lhz r16, 424(r1)
+; CHECK-NEXT:    stxvd2x v25, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT:    li r11, 80
+; CHECK-NEXT:    std r20, 224(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    lhz r20, 456(r1)
+; CHECK-NEXT:    stxvd2x v26, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT:    li r11, 96
+; CHECK-NEXT:    std r21, 232(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r22, 240(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    lhz r22, 472(r1)
+; CHECK-NEXT:    lhz r21, 464(r1)
+; CHECK-NEXT:    stxvd2x v27, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT:    li r11, 112
+; CHECK-NEXT:    std r15, 184(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r24, 256(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r25, 264(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    mr r25, r5
+; CHECK-NEXT:    mr r24, r4
+; CHECK-NEXT:    stxvd2x v28, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT:    li r11, 128
+; CHECK-NEXT:    std r26, 272(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r27, 280(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r28, 288(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r29, 296(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    mr r29, r9
+; CHECK-NEXT:    mr r28, r8
+; CHECK-NEXT:    mr r27, r7
+; CHECK-NEXT:    mr r26, r6
+; CHECK-NEXT:    stxvd2x v29, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT:    li r11, 144
+; CHECK-NEXT:    std r30, 304(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    mr r30, r10
+; CHECK-NEXT:    stxvd2x v30, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT:    li r11, 160
+; CHECK-NEXT:    stxvd2x v31, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f28
-; CHECK-NEXT:    mr r22, r3
-; CHECK-NEXT:    bl __truncsfhf2
+; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f29
-; CHECK-NEXT:    mr r21, r3
-; CHECK-NEXT:    bl __truncsfhf2
+; CHECK-NEXT:    mr r15, r3
+; CHECK-NEXT:    mr r3, r16
+; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f30
-; CHECK-NEXT:    mr r20, r3
-; CHECK-NEXT:    bl __truncsfhf2
+; CHECK-NEXT:    mtvsrd v31, r15
+; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f31
-; CHECK-NEXT:    mr r19, r3
-; CHECK-NEXT:    bl __truncsfhf2
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    mr r3, r17
+; CHECK-NEXT:    xxmrghd v31, vs0, v31
+; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    lfs f1, 568(r1)
-; CHECK-NEXT:    mr r18, r3
-; CHECK-NEXT:    bl __truncsfhf2
+; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    lfs f1, 576(r1)
 ; CHECK-NEXT:    mr r17, r3
-; CHECK-NEXT:    bl __truncsfhf2
+; CHECK-NEXT:    mr r3, r18
+; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    lfs f1, 584(r1)
-; CHECK-NEXT:    mr r16, r3
-; CHECK-NEXT:    bl __truncsfhf2
+; CHECK-NEXT:    mtvsrd v30, r17
+; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r3, 48
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    mr r3, r19
+; CHECK-NEXT:    xxmrghd v30, vs0, v30
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r16, 48
-; CHECK-NEXT:    fmr f31, f1
-; CHECK-NEXT:    bl __extendhfsf2
+; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r17, 48
-; CHECK-NEXT:    fmr f30, f1
+; CHECK-NEXT:    mr r19, r3
+; CHECK-NEXT:    mr r3, r20
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r18, 48
-; CHECK-NEXT:    fmr f29, f1
-; CHECK-NEXT:    bl __extendhfsf2
+; CHECK-NEXT:    mtvsrd v29, r19
+; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r19, 48
-; CHECK-NEXT:    fmr f28, f1
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    mr r3, r21
+; CHECK-NEXT:    xxmrghd v29, vs0, v29
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r20, 48
-; CHECK-NEXT:    fmr f27, f1
-; CHECK-NEXT:    bl __extendhfsf2
+; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r21, 48
-; CHECK-NEXT:    fmr f26, f1
+; CHECK-NEXT:    mr r21, r3
+; CHECK-NEXT:    mr r3, r22
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r22, 48
-; CHECK-NEXT:    fmr f25, f1
-; CHECK-NEXT:    bl __extendhfsf2
+; CHECK-NEXT:    mtvsrd v28, r21
+; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtfprd f0, r3
 ; CHECK-NEXT:    clrldi r3, r23, 48
-; CHECK-NEXT:    fmr f24, f1
+; CHECK-NEXT:    xxmrghd v28, vs0, v28
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
+; CHECK-NEXT:    bl lrintf
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mr r23, r3
 ; CHECK-NEXT:    clrldi r3, r24, 48
-; CHECK-NEXT:    fmr f23, f1
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtvsrd v27, r23
+; CHECK-NEXT:    bl lrintf
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtfprd f0, r3
 ; CHECK-NEXT:    clrldi r3, r25, 48
-; CHECK-NEXT:    fmr f22, f1
+; CHECK-NEXT:    xxmrghd v27, vs0, v27
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r26, 48
-; CHECK-NEXT:    fmr f21, f1
-; CHECK-NEXT:    bl __extendhfsf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r27, 48
-; CHECK-NEXT:    fmr f20, f1
-; CHECK-NEXT:    bl __extendhfsf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r28, 48
-; CHECK-NEXT:    fmr f19, f1
-; CHECK-NEXT:    bl __extendhfsf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r29, 48
-; CHECK-NEXT:    fmr f18, f1
-; CHECK-NEXT:    bl __extendhfsf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r30, 48
-; CHECK-NEXT:    fmr f17, f1
-; CHECK-NEXT:    bl __extendhfsf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    bl lrintf
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f17
-; CHECK-NEXT:    mtvsrd v31, r3
-; CHECK-NEXT:    bl lrintf
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f18
-; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    xxmrghd v31, vs0, v31
-; CHECK-NEXT:    bl lrintf
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f19
-; CHECK-NEXT:    mtvsrd v30, r3
 ; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f20
-; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    xxmrghd v30, vs0, v30
-; CHECK-NEXT:    bl lrintf
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f21
-; CHECK-NEXT:    mtvsrd v29, r3
-; CHECK-NEXT:    bl lrintf
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f22
-; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    xxmrghd v29, vs0, v29
-; CHECK-NEXT:    bl lrintf
+; CHECK-NEXT:    mr r25, r3
+; CHECK-NEXT:    clrldi r3, r26, 48
+; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f23
-; CHECK-NEXT:    mtvsrd v28, r3
+; CHECK-NEXT:    mtvsrd v26, r25
 ; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f24
 ; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    xxmrghd v28, vs0, v28
-; CHECK-NEXT:    bl lrintf
+; CHECK-NEXT:    clrldi r3, r27, 48
+; CHECK-NEXT:    xxmrghd v26, vs0, v26
+; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f25
-; CHECK-NEXT:    mtvsrd v27, r3
 ; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f26
-; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    xxmrghd v27, vs0, v27
-; CHECK-NEXT:    bl lrintf
+; CHECK-NEXT:    mr r27, r3
+; CHECK-NEXT:    clrldi r3, r28, 48
+; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f27
-; CHECK-NEXT:    mtvsrd v26, r3
+; CHECK-NEXT:    mtvsrd v25, r27
 ; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f28
 ; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    xxmrghd v26, vs0, v26
-; CHECK-NEXT:    bl lrintf
+; CHECK-NEXT:    clrldi r3, r29, 48
+; CHECK-NEXT:    xxmrghd v25, vs0, v25
+; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f29
-; CHECK-NEXT:    mtvsrd v25, r3
 ; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f30
-; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    xxmrghd v25, vs0, v25
-; CHECK-NEXT:    bl lrintf
+; CHECK-NEXT:    mr r29, r3
+; CHECK-NEXT:    clrldi r3, r30, 48
+; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f31
-; CHECK-NEXT:    mtvsrd v24, r3
+; CHECK-NEXT:    mtvsrd v24, r29
 ; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    mtfprd f0, r3
 ; CHECK-NEXT:    li r3, 160
-; CHECK-NEXT:    vmr v2, v31
-; CHECK-NEXT:    lfd f31, 424(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    vmr v3, v30
-; CHECK-NEXT:    vmr v4, v29
-; CHECK-NEXT:    lfd f30, 416(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f29, 408(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    vmr v6, v31
+; CHECK-NEXT:    ld r30, 304(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    vmr v7, v30
+; CHECK-NEXT:    vmr v8, v29
+; CHECK-NEXT:    ld r29, 296(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r28, 288(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 144
-; CHECK-NEXT:    vmr v5, v28
-; CHECK-NEXT:    vmr v6, v27
-; CHECK-NEXT:    vmr v7, v26
-; CHECK-NEXT:    vmr v8, v25
-; CHECK-NEXT:    lfd f28, 400(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f27, 392(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    vmr v9, v28
+; CHECK-NEXT:    vmr v2, v27
+; CHECK-NEXT:    vmr v3, v26
+; CHECK-NEXT:    vmr v4, v25
+; CHECK-NEXT:    ld r27, 280(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r26, 272(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 128
-; CHECK-NEXT:    lfd f26, 384(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f25, 376(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    xxmrghd v9, vs0, v24
-; CHECK-NEXT:    lfd f24, 368(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f23, 360(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f22, 352(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r25, 264(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r24, 256(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    xxmrghd v5, vs0, v24
+; CHECK-NEXT:    ld r23, 248(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r22, 240(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r21, 232(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lxvd2x v29, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 112
-; CHECK-NEXT:    lfd f21, 344(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r30, 296(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f20, 336(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f19, 328(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r29, 288(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r28, 280(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r20, 224(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r19, 216(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r18, 208(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r17, 200(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r16, 192(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r15, 184(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lxvd2x v28, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 96
-; CHECK-NEXT:    lfd f18, 320(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r27, 272(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f17, 312(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r26, 264(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r25, 256(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r24, 248(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lxvd2x v27, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 80
-; CHECK-NEXT:    ld r23, 240(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r22, 232(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r21, 224(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r20, 216(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r19, 208(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r18, 200(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lxvd2x v26, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 64
-; CHECK-NEXT:    ld r17, 192(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r16, 184(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lxvd2x v25, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 48
 ; CHECK-NEXT:    lxvd2x v24, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT:    addi r1, r1, 432
+; CHECK-NEXT:    addi r1, r1, 320
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
 ; CHECK-NEXT:    blr
@@ -1458,223 +1018,199 @@ define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) nounwind {
 ; FAST-LABEL: lrint_v16i64_v16f16:
 ; FAST:       # %bb.0:
 ; FAST-NEXT:    mflr r0
-; FAST-NEXT:    stfd f16, -128(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f17, -120(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f18, -112(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f19, -104(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f20, -96(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f21, -88(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f22, -80(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f23, -72(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f24, -64(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f25, -56(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f26, -48(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f27, -40(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f28, -32(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f29, -24(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stdu r1, -160(r1)
-; FAST-NEXT:    fmr f26, f1
-; FAST-NEXT:    lfs f1, 312(r1)
-; FAST-NEXT:    std r0, 176(r1)
-; FAST-NEXT:    fmr f28, f13
-; FAST-NEXT:    fmr f27, f12
-; FAST-NEXT:    fmr f24, f11
-; FAST-NEXT:    fmr f21, f10
-; FAST-NEXT:    fmr f19, f9
-; FAST-NEXT:    fmr f18, f8
-; FAST-NEXT:    fmr f17, f7
-; FAST-NEXT:    fmr f16, f6
-; FAST-NEXT:    fmr f20, f5
-; FAST-NEXT:    fmr f22, f4
-; FAST-NEXT:    fmr f23, f3
-; FAST-NEXT:    fmr f25, f2
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    stdu r1, -304(r1)
+; FAST-NEXT:    li r11, 48
+; FAST-NEXT:    std r0, 320(r1)
+; FAST-NEXT:    std r23, 232(r1) # 8-byte Folded Spill
+; FAST-NEXT:    mr r23, r3
+; FAST-NEXT:    lhz r3, 400(r1)
+; FAST-NEXT:    std r16, 176(r1) # 8-byte Folded Spill
+; FAST-NEXT:    std r17, 184(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r17, 416(r1)
+; FAST-NEXT:    stxvd2x v24, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT:    li r11, 64
+; FAST-NEXT:    std r18, 192(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r18, 424(r1)
+; FAST-NEXT:    std r19, 200(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r19, 432(r1)
+; FAST-NEXT:    lhz r16, 408(r1)
+; FAST-NEXT:    stxvd2x v25, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT:    li r11, 80
+; FAST-NEXT:    std r20, 208(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r20, 440(r1)
+; FAST-NEXT:    stxvd2x v26, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT:    li r11, 96
+; FAST-NEXT:    std r21, 216(r1) # 8-byte Folded Spill
+; FAST-NEXT:    std r22, 224(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r22, 456(r1)
+; FAST-NEXT:    lhz r21, 448(r1)
+; FAST-NEXT:    stxvd2x v27, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT:    li r11, 112
+; FAST-NEXT:    std r24, 240(r1) # 8-byte Folded Spill
+; FAST-NEXT:    std r25, 248(r1) # 8-byte Folded Spill
+; FAST-NEXT:    std r26, 256(r1) # 8-byte Folded Spill
+; FAST-NEXT:    mr r26, r6
+; FAST-NEXT:    mr r25, r5
+; FAST-NEXT:    mr r24, r4
+; FAST-NEXT:    stxvd2x v28, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT:    li r11, 128
+; FAST-NEXT:    std r27, 264(r1) # 8-byte Folded Spill
+; FAST-NEXT:    std r28, 272(r1) # 8-byte Folded Spill
+; FAST-NEXT:    std r29, 280(r1) # 8-byte Folded Spill
+; FAST-NEXT:    std r30, 288(r1) # 8-byte Folded Spill
+; FAST-NEXT:    mr r30, r10
+; FAST-NEXT:    mr r29, r9
+; FAST-NEXT:    mr r28, r8
+; FAST-NEXT:    mr r27, r7
+; FAST-NEXT:    stxvd2x v29, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT:    li r11, 144
+; FAST-NEXT:    stxvd2x v30, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT:    li r11, 160
+; FAST-NEXT:    stxvd2x v31, r1, r11 # 16-byte Folded Spill
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f31, f1
-; FAST-NEXT:    lfs f1, 304(r1)
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v31, r3
+; FAST-NEXT:    mr r3, r16
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f30, f1
-; FAST-NEXT:    lfs f1, 296(r1)
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    mr r3, r17
+; FAST-NEXT:    xxmrghd v31, vs0, v31
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f29, f1
-; FAST-NEXT:    fmr f1, f28
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v30, r3
+; FAST-NEXT:    mr r3, r18
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f28, f1
-; FAST-NEXT:    fmr f1, f27
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    mr r3, r19
+; FAST-NEXT:    xxmrghd v30, vs0, v30
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f27, f1
-; FAST-NEXT:    fmr f1, f24
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v29, r3
+; FAST-NEXT:    mr r3, r20
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f24, f1
-; FAST-NEXT:    fmr f1, f21
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    mr r3, r21
+; FAST-NEXT:    xxmrghd v29, vs0, v29
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f21, f1
-; FAST-NEXT:    fmr f1, f19
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v28, r3
+; FAST-NEXT:    mr r3, r22
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f19, f1
-; FAST-NEXT:    fmr f1, f18
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    clrldi r3, r23, 48
+; FAST-NEXT:    xxmrghd v28, vs0, v28
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f18, f1
-; FAST-NEXT:    fmr f1, f17
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v27, r3
+; FAST-NEXT:    clrldi r3, r24, 48
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f17, f1
-; FAST-NEXT:    fmr f1, f16
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    clrldi r3, r25, 48
+; FAST-NEXT:    xxmrghd v27, vs0, v27
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f16, f1
-; FAST-NEXT:    fmr f1, f20
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v26, r3
+; FAST-NEXT:    clrldi r3, r26, 48
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f20, f1
-; FAST-NEXT:    fmr f1, f22
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    clrldi r3, r27, 48
+; FAST-NEXT:    xxmrghd v26, vs0, v26
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f22, f1
-; FAST-NEXT:    fmr f1, f23
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v25, r3
+; FAST-NEXT:    clrldi r3, r28, 48
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f23, f1
-; FAST-NEXT:    fmr f1, f25
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    clrldi r3, r29, 48
+; FAST-NEXT:    xxmrghd v25, vs0, v25
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f25, f1
-; FAST-NEXT:    fmr f1, f26
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v24, r3
+; FAST-NEXT:    clrldi r3, r30, 48
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fctid f0, f25
-; FAST-NEXT:    fctid f2, f23
-; FAST-NEXT:    mffprd r3, f0
-; FAST-NEXT:    fctid f3, f22
-; FAST-NEXT:    fctid f4, f20
-; FAST-NEXT:    fctid f5, f16
-; FAST-NEXT:    fctid f6, f17
-; FAST-NEXT:    fctid f7, f18
-; FAST-NEXT:    fctid f8, f19
-; FAST-NEXT:    fctid f9, f21
-; FAST-NEXT:    fctid f10, f24
-; FAST-NEXT:    fctid f1, f1
-; FAST-NEXT:    mtfprd f0, r3
-; FAST-NEXT:    mffprd r3, f2
-; FAST-NEXT:    mtfprd f2, r3
-; FAST-NEXT:    mffprd r3, f3
-; FAST-NEXT:    mtfprd f3, r3
-; FAST-NEXT:    mffprd r3, f4
-; FAST-NEXT:    mtfprd f4, r3
-; FAST-NEXT:    mffprd r3, f5
-; FAST-NEXT:    mtfprd f5, r3
-; FAST-NEXT:    mffprd r3, f6
-; FAST-NEXT:    mtfprd f6, r3
-; FAST-NEXT:    mffprd r3, f7
-; FAST-NEXT:    mtfprd f7, r3
-; FAST-NEXT:    mffprd r3, f8
-; FAST-NEXT:    mtfprd f8, r3
-; FAST-NEXT:    mffprd r3, f9
-; FAST-NEXT:    mtfprd f9, r3
-; FAST-NEXT:    mffprd r3, f10
-; FAST-NEXT:    mtfprd f10, r3
-; FAST-NEXT:    mffprd r3, f1
-; FAST-NEXT:    mtfprd f1, r3
-; FAST-NEXT:    xxmrghd v3, vs3, vs2
-; FAST-NEXT:    xxmrghd v4, vs5, vs4
-; FAST-NEXT:    xxmrghd v5, vs7, vs6
-; FAST-NEXT:    xxmrghd v6, vs9, vs8
-; FAST-NEXT:    xxmrghd v2, vs0, vs1
-; FAST-NEXT:    fctid f0, f27
-; FAST-NEXT:    fctid f1, f29
-; FAST-NEXT:    mffprd r3, f0
-; FAST-NEXT:    mtfprd f0, r3
-; FAST-NEXT:    xxmrghd v7, vs0, vs10
-; FAST-NEXT:    fctid f0, f28
-; FAST-NEXT:    mffprd r3, f0
-; FAST-NEXT:    mtfprd f0, r3
-; FAST-NEXT:    mffprd r3, f1
-; FAST-NEXT:    mtfprd f1, r3
-; FAST-NEXT:    xxmrghd v8, vs1, vs0
-; FAST-NEXT:    fctid f0, f30
-; FAST-NEXT:    fctid f1, f31
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    vmr v6, v31
+; FAST-NEXT:    ld r30, 288(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r29, 280(r1) # 8-byte Folded Reload
+; FAST-NEXT:    vmr v7, v30
+; FAST-NEXT:    vmr v8, v29
+; FAST-NEXT:    ld r28, 272(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r27, 264(r1) # 8-byte Folded Reload
+; FAST-NEXT:    vmr v9, v28
+; FAST-NEXT:    vmr v2, v27
+; FAST-NEXT:    ld r26, 256(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r25, 248(r1) # 8-byte Folded Reload
+; FAST-NEXT:    vmr v3, v26
+; FAST-NEXT:    vmr v4, v25
+; FAST-NEXT:    ld r24, 240(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r23, 232(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r22, 224(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r21, 216(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    ld r20, 208(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r19, 200(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r18, 192(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r17, 184(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    mtfprd f0, r3
-; FAST-NEXT:    mffprd r3, f1
-; FAST-NEXT:    mtfprd f1, r3
-; FAST-NEXT:    xxmrghd v9, vs1, vs0
-; FAST-NEXT:    addi r1, r1, 160
+; FAST-NEXT:    li r3, 160
+; FAST-NEXT:    ld r16, 176(r1) # 8-byte Folded Reload
+; FAST-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 144
+; FAST-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 128
+; FAST-NEXT:    lxvd2x v29, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 112
+; FAST-NEXT:    lxvd2x v28, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 96
+; FAST-NEXT:    lxvd2x v27, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 80
+; FAST-NEXT:    lxvd2x v26, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 64
+; FAST-NEXT:    lxvd2x v25, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 48
+; FAST-NEXT:    xxmrghd v5, vs0, v24
+; FAST-NEXT:    lxvd2x v24, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    addi r1, r1, 304
 ; FAST-NEXT:    ld r0, 16(r1)
-; FAST-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    mtlr r0
-; FAST-NEXT:    lfd f29, -24(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f28, -32(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f27, -40(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f26, -48(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f25, -56(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f24, -64(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f23, -72(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f22, -80(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f21, -88(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f20, -96(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f19, -104(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f18, -112(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f17, -120(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f16, -128(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    blr
   %a = call <16 x i64> @llvm.lrint.v16i64.v16f16(<16 x half> %x)
   ret <16 x i64> %a
@@ -1685,483 +1221,295 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) nounwind {
 ; BE-LABEL: lrint_v32i64_v32f16:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
-; BE-NEXT:    stdu r1, -864(r1)
-; BE-NEXT:    std r0, 880(r1)
-; BE-NEXT:    stfd f20, 768(r1) # 8-byte Folded Spill
-; BE-NEXT:    fmr f20, f1
-; BE-NEXT:    fmr f1, f2
-; BE-NEXT:    std r14, 576(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r15, 584(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r16, 592(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r17, 600(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r18, 608(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r19, 616(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r20, 624(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r21, 632(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r22, 640(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r23, 648(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r24, 656(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r25, 664(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r26, 672(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r27, 680(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r28, 688(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r29, 696(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r30, 704(r1) # 8-byte Folded Spill
-; BE-NEXT:    std r31, 712(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f14, 720(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f15, 728(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f16, 736(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f17, 744(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f18, 752(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f19, 760(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f21, 776(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f22, 784(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f23, 792(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f24, 800(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f25, 808(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f26, 816(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f27, 824(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f28, 832(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f29, 840(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f30, 848(r1) # 8-byte Folded Spill
-; BE-NEXT:    stfd f31, 856(r1) # 8-byte Folded Spill
-; BE-NEXT:    fmr f31, f13
+; BE-NEXT:    stdu r1, -624(r1)
+; BE-NEXT:    std r0, 640(r1)
+; BE-NEXT:    std r30, 608(r1) # 8-byte Folded Spill
 ; BE-NEXT:    mr r30, r3
-; BE-NEXT:    fmr f29, f12
-; BE-NEXT:    fmr f30, f11
-; BE-NEXT:    fmr f28, f10
-; BE-NEXT:    fmr f27, f9
-; BE-NEXT:    fmr f26, f8
-; BE-NEXT:    fmr f25, f7
-; BE-NEXT:    fmr f24, f6
-; BE-NEXT:    fmr f23, f5
-; BE-NEXT:    fmr f22, f4
-; BE-NEXT:    fmr f21, f3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f20
-; BE-NEXT:    std r3, 304(r1) # 8-byte Folded Spill
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f22
-; BE-NEXT:    std r3, 296(r1) # 8-byte Folded Spill
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f21
-; BE-NEXT:    std r3, 280(r1) # 8-byte Folded Spill
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f24
-; BE-NEXT:    std r3, 264(r1) # 8-byte Folded Spill
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f23
-; BE-NEXT:    std r3, 248(r1) # 8-byte Folded Spill
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f26
-; BE-NEXT:    std r3, 232(r1) # 8-byte Folded Spill
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f25
+; BE-NEXT:    lhz r3, 926(r1)
+; BE-NEXT:    std r14, 480(r1) # 8-byte Folded Spill
+; BE-NEXT:    lhz r14, 822(r1)
+; BE-NEXT:    std r15, 488(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r19, 520(r1) # 8-byte Folded Spill
 ; BE-NEXT:    std r3, 216(r1) # 8-byte Folded Spill
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f28
+; BE-NEXT:    lhz r3, 934(r1)
+; BE-NEXT:    lhz r15, 814(r1)
+; BE-NEXT:    lhz r19, 742(r1)
+; BE-NEXT:    std r22, 544(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r23, 552(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r25, 568(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r26, 576(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r3, 208(r1) # 8-byte Folded Spill
+; BE-NEXT:    lhz r3, 910(r1)
+; BE-NEXT:    lhz r26, 766(r1)
+; BE-NEXT:    lhz r25, 774(r1)
+; BE-NEXT:    std r27, 584(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r28, 592(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r29, 600(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r31, 616(r1) # 8-byte Folded Spill
 ; BE-NEXT:    std r3, 200(r1) # 8-byte Folded Spill
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f27
+; BE-NEXT:    lhz r3, 918(r1)
+; BE-NEXT:    lhz r31, 798(r1)
+; BE-NEXT:    lhz r29, 806(r1)
+; BE-NEXT:    lhz r28, 782(r1)
+; BE-NEXT:    lhz r27, 790(r1)
+; BE-NEXT:    lhz r23, 750(r1)
+; BE-NEXT:    lhz r22, 758(r1)
+; BE-NEXT:    std r16, 496(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r17, 504(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r3, 192(r1) # 8-byte Folded Spill
+; BE-NEXT:    lhz r3, 894(r1)
+; BE-NEXT:    mr r17, r7
+; BE-NEXT:    mr r16, r4
+; BE-NEXT:    std r18, 512(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r20, 528(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r21, 536(r1) # 8-byte Folded Spill
+; BE-NEXT:    std r24, 560(r1) # 8-byte Folded Spill
 ; BE-NEXT:    std r3, 184(r1) # 8-byte Folded Spill
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f29
+; BE-NEXT:    lhz r3, 902(r1)
+; BE-NEXT:    mr r24, r10
+; BE-NEXT:    mr r20, r9
+; BE-NEXT:    mr r21, r8
+; BE-NEXT:    mr r18, r6
+; BE-NEXT:    std r3, 176(r1) # 8-byte Folded Spill
+; BE-NEXT:    lhz r3, 878(r1)
 ; BE-NEXT:    std r3, 168(r1) # 8-byte Folded Spill
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f30
+; BE-NEXT:    lhz r3, 886(r1)
+; BE-NEXT:    std r3, 160(r1) # 8-byte Folded Spill
+; BE-NEXT:    lhz r3, 862(r1)
 ; BE-NEXT:    std r3, 152(r1) # 8-byte Folded Spill
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 1028(r1)
+; BE-NEXT:    lhz r3, 870(r1)
+; BE-NEXT:    std r3, 144(r1) # 8-byte Folded Spill
+; BE-NEXT:    lhz r3, 846(r1)
 ; BE-NEXT:    std r3, 136(r1) # 8-byte Folded Spill
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f31
+; BE-NEXT:    lhz r3, 854(r1)
+; BE-NEXT:    std r3, 128(r1) # 8-byte Folded Spill
+; BE-NEXT:    lhz r3, 830(r1)
 ; BE-NEXT:    std r3, 120(r1) # 8-byte Folded Spill
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 1044(r1)
+; BE-NEXT:    lhz r3, 838(r1)
 ; BE-NEXT:    std r3, 112(r1) # 8-byte Folded Spill
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 1036(r1)
-; BE-NEXT:    mr r15, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 1060(r1)
-; BE-NEXT:    mr r14, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 1052(r1)
-; BE-NEXT:    mr r31, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 1076(r1)
-; BE-NEXT:    mr r29, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 1068(r1)
-; BE-NEXT:    mr r28, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 1092(r1)
-; BE-NEXT:    mr r27, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 1084(r1)
-; BE-NEXT:    mr r26, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 1108(r1)
-; BE-NEXT:    mr r25, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 1100(r1)
-; BE-NEXT:    mr r24, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 1124(r1)
-; BE-NEXT:    mr r23, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 1116(r1)
-; BE-NEXT:    mr r22, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 1140(r1)
-; BE-NEXT:    mr r21, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 1132(r1)
-; BE-NEXT:    mr r20, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 1156(r1)
-; BE-NEXT:    mr r19, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 1148(r1)
-; BE-NEXT:    mr r18, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 1172(r1)
-; BE-NEXT:    mr r17, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 1164(r1)
-; BE-NEXT:    mr r16, r3
-; BE-NEXT:    bl __truncsfhf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r3, 48
+; BE-NEXT:    clrldi r3, r5, 48
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
+; BE-NEXT:    bl lrintf
+; BE-NEXT:    nop
+; BE-NEXT:    std r3, 424(r1)
 ; BE-NEXT:    clrldi r3, r16, 48
-; BE-NEXT:    stfs f1, 316(r1) # 4-byte Folded Spill
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
+; BE-NEXT:    bl lrintf
+; BE-NEXT:    nop
+; BE-NEXT:    std r3, 416(r1)
 ; BE-NEXT:    clrldi r3, r17, 48
-; BE-NEXT:    stfs f1, 312(r1) # 4-byte Folded Spill
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
+; BE-NEXT:    bl lrintf
+; BE-NEXT:    nop
+; BE-NEXT:    std r3, 440(r1)
 ; BE-NEXT:    clrldi r3, r18, 48
-; BE-NEXT:    stfs f1, 292(r1) # 4-byte Folded Spill
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r19, 48
-; BE-NEXT:    stfs f1, 276(r1) # 4-byte Folded Spill
-; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
+; BE-NEXT:    std r3, 432(r1)
 ; BE-NEXT:    clrldi r3, r20, 48
-; BE-NEXT:    stfs f1, 260(r1) # 4-byte Folded Spill
-; BE-NEXT:    bl __extendhfsf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r21, 48
-; BE-NEXT:    stfs f1, 244(r1) # 4-byte Folded Spill
-; BE-NEXT:    bl __extendhfsf2
-; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r22, 48
-; BE-NEXT:    stfs f1, 228(r1) # 4-byte Folded Spill
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r23, 48
-; BE-NEXT:    stfs f1, 212(r1) # 4-byte Folded Spill
-; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r24, 48
-; BE-NEXT:    stfs f1, 196(r1) # 4-byte Folded Spill
+; BE-NEXT:    std r3, 456(r1)
+; BE-NEXT:    clrldi r3, r21, 48
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r25, 48
-; BE-NEXT:    stfs f1, 180(r1) # 4-byte Folded Spill
-; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r26, 48
-; BE-NEXT:    stfs f1, 164(r1) # 4-byte Folded Spill
+; BE-NEXT:    std r3, 448(r1)
+; BE-NEXT:    mr r3, r19
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r27, 48
-; BE-NEXT:    stfs f1, 148(r1) # 4-byte Folded Spill
-; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r28, 48
-; BE-NEXT:    stfs f1, 132(r1) # 4-byte Folded Spill
+; BE-NEXT:    std r3, 472(r1)
+; BE-NEXT:    clrldi r3, r24, 48
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r29, 48
-; BE-NEXT:    fmr f18, f1
-; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r31, 48
-; BE-NEXT:    fmr f17, f1
+; BE-NEXT:    std r3, 464(r1)
+; BE-NEXT:    mr r3, r22
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r14, 48
-; BE-NEXT:    fmr f16, f1
-; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    clrldi r3, r15, 48
-; BE-NEXT:    fmr f15, f1
+; BE-NEXT:    std r3, 232(r1)
+; BE-NEXT:    mr r3, r23
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    ld r3, 112(r1) # 8-byte Folded Reload
-; BE-NEXT:    fmr f14, f1
-; BE-NEXT:    clrldi r3, r3, 48
-; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    ld r3, 120(r1) # 8-byte Folded Reload
-; BE-NEXT:    fmr f31, f1
-; BE-NEXT:    clrldi r3, r3, 48
+; BE-NEXT:    std r3, 224(r1)
+; BE-NEXT:    mr r3, r25
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    ld r3, 136(r1) # 8-byte Folded Reload
-; BE-NEXT:    fmr f30, f1
-; BE-NEXT:    clrldi r3, r3, 48
-; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    ld r3, 152(r1) # 8-byte Folded Reload
-; BE-NEXT:    fmr f29, f1
-; BE-NEXT:    clrldi r3, r3, 48
+; BE-NEXT:    std r3, 248(r1)
+; BE-NEXT:    mr r3, r26
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    ld r3, 168(r1) # 8-byte Folded Reload
-; BE-NEXT:    fmr f28, f1
-; BE-NEXT:    clrldi r3, r3, 48
-; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    ld r3, 184(r1) # 8-byte Folded Reload
-; BE-NEXT:    fmr f27, f1
-; BE-NEXT:    clrldi r3, r3, 48
+; BE-NEXT:    std r3, 240(r1)
+; BE-NEXT:    mr r3, r27
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    ld r3, 200(r1) # 8-byte Folded Reload
-; BE-NEXT:    fmr f26, f1
-; BE-NEXT:    clrldi r3, r3, 48
-; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    ld r3, 216(r1) # 8-byte Folded Reload
-; BE-NEXT:    fmr f25, f1
-; BE-NEXT:    clrldi r3, r3, 48
+; BE-NEXT:    std r3, 264(r1)
+; BE-NEXT:    mr r3, r28
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    ld r3, 232(r1) # 8-byte Folded Reload
-; BE-NEXT:    fmr f24, f1
-; BE-NEXT:    clrldi r3, r3, 48
-; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    ld r3, 248(r1) # 8-byte Folded Reload
-; BE-NEXT:    fmr f23, f1
-; BE-NEXT:    clrldi r3, r3, 48
+; BE-NEXT:    std r3, 256(r1)
+; BE-NEXT:    mr r3, r29
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    ld r3, 264(r1) # 8-byte Folded Reload
-; BE-NEXT:    fmr f22, f1
-; BE-NEXT:    clrldi r3, r3, 48
-; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    ld r3, 280(r1) # 8-byte Folded Reload
-; BE-NEXT:    fmr f21, f1
-; BE-NEXT:    clrldi r3, r3, 48
+; BE-NEXT:    std r3, 280(r1)
+; BE-NEXT:    mr r3, r31
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    ld r3, 296(r1) # 8-byte Folded Reload
-; BE-NEXT:    fmr f20, f1
-; BE-NEXT:    clrldi r3, r3, 48
-; BE-NEXT:    bl __extendhfsf2
+; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    ld r3, 304(r1) # 8-byte Folded Reload
-; BE-NEXT:    fmr f19, f1
-; BE-NEXT:    clrldi r3, r3, 48
+; BE-NEXT:    std r3, 272(r1)
+; BE-NEXT:    mr r3, r14
 ; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
 ; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f19
-; BE-NEXT:    std r3, 328(r1)
-; BE-NEXT:    bl lrintf
-; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f20
-; BE-NEXT:    std r3, 320(r1)
-; BE-NEXT:    bl lrintf
+; BE-NEXT:    std r3, 296(r1)
+; BE-NEXT:    mr r3, r15
+; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f21
-; BE-NEXT:    std r3, 344(r1)
 ; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f22
-; BE-NEXT:    std r3, 336(r1)
-; BE-NEXT:    bl lrintf
+; BE-NEXT:    std r3, 288(r1)
+; BE-NEXT:    ld r3, 112(r1) # 8-byte Folded Reload
+; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f23
-; BE-NEXT:    std r3, 360(r1)
 ; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f24
-; BE-NEXT:    std r3, 352(r1)
-; BE-NEXT:    bl lrintf
+; BE-NEXT:    std r3, 312(r1)
+; BE-NEXT:    ld r3, 120(r1) # 8-byte Folded Reload
+; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f25
-; BE-NEXT:    std r3, 376(r1)
 ; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f26
-; BE-NEXT:    std r3, 368(r1)
-; BE-NEXT:    bl lrintf
+; BE-NEXT:    std r3, 304(r1)
+; BE-NEXT:    ld r3, 128(r1) # 8-byte Folded Reload
+; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f27
-; BE-NEXT:    std r3, 392(r1)
 ; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f28
-; BE-NEXT:    std r3, 384(r1)
-; BE-NEXT:    bl lrintf
+; BE-NEXT:    std r3, 328(r1)
+; BE-NEXT:    ld r3, 136(r1) # 8-byte Folded Reload
+; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f29
-; BE-NEXT:    std r3, 408(r1)
 ; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f30
-; BE-NEXT:    std r3, 400(r1)
-; BE-NEXT:    bl lrintf
+; BE-NEXT:    std r3, 320(r1)
+; BE-NEXT:    ld r3, 144(r1) # 8-byte Folded Reload
+; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f31
-; BE-NEXT:    std r3, 424(r1)
 ; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f14
-; BE-NEXT:    std r3, 416(r1)
-; BE-NEXT:    bl lrintf
+; BE-NEXT:    std r3, 344(r1)
+; BE-NEXT:    ld r3, 152(r1) # 8-byte Folded Reload
+; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f15
-; BE-NEXT:    std r3, 440(r1)
 ; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f16
-; BE-NEXT:    std r3, 432(r1)
-; BE-NEXT:    bl lrintf
+; BE-NEXT:    std r3, 336(r1)
+; BE-NEXT:    ld r3, 160(r1) # 8-byte Folded Reload
+; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f17
-; BE-NEXT:    std r3, 456(r1)
 ; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    fmr f1, f18
-; BE-NEXT:    std r3, 448(r1)
-; BE-NEXT:    bl lrintf
+; BE-NEXT:    std r3, 360(r1)
+; BE-NEXT:    ld r3, 168(r1) # 8-byte Folded Reload
+; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 132(r1) # 4-byte Folded Reload
-; BE-NEXT:    std r3, 472(r1)
 ; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 148(r1) # 4-byte Folded Reload
-; BE-NEXT:    std r3, 464(r1)
-; BE-NEXT:    bl lrintf
+; BE-NEXT:    std r3, 352(r1)
+; BE-NEXT:    ld r3, 176(r1) # 8-byte Folded Reload
+; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 164(r1) # 4-byte Folded Reload
-; BE-NEXT:    std r3, 488(r1)
 ; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 180(r1) # 4-byte Folded Reload
-; BE-NEXT:    std r3, 480(r1)
-; BE-NEXT:    bl lrintf
+; BE-NEXT:    std r3, 376(r1)
+; BE-NEXT:    ld r3, 184(r1) # 8-byte Folded Reload
+; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 196(r1) # 4-byte Folded Reload
-; BE-NEXT:    std r3, 504(r1)
 ; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 212(r1) # 4-byte Folded Reload
-; BE-NEXT:    std r3, 496(r1)
-; BE-NEXT:    bl lrintf
+; BE-NEXT:    std r3, 368(r1)
+; BE-NEXT:    ld r3, 192(r1) # 8-byte Folded Reload
+; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 228(r1) # 4-byte Folded Reload
-; BE-NEXT:    std r3, 520(r1)
 ; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 244(r1) # 4-byte Folded Reload
-; BE-NEXT:    std r3, 512(r1)
-; BE-NEXT:    bl lrintf
+; BE-NEXT:    std r3, 392(r1)
+; BE-NEXT:    ld r3, 200(r1) # 8-byte Folded Reload
+; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 260(r1) # 4-byte Folded Reload
-; BE-NEXT:    std r3, 536(r1)
 ; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 276(r1) # 4-byte Folded Reload
-; BE-NEXT:    std r3, 528(r1)
-; BE-NEXT:    bl lrintf
+; BE-NEXT:    std r3, 384(r1)
+; BE-NEXT:    ld r3, 208(r1) # 8-byte Folded Reload
+; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 292(r1) # 4-byte Folded Reload
-; BE-NEXT:    std r3, 552(r1)
 ; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 312(r1) # 4-byte Folded Reload
-; BE-NEXT:    std r3, 544(r1)
-; BE-NEXT:    bl lrintf
+; BE-NEXT:    std r3, 408(r1)
+; BE-NEXT:    ld r3, 216(r1) # 8-byte Folded Reload
+; BE-NEXT:    bl __extendhfsf2
 ; BE-NEXT:    nop
-; BE-NEXT:    lfs f1, 316(r1) # 4-byte Folded Reload
-; BE-NEXT:    std r3, 568(r1)
 ; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
-; BE-NEXT:    std r3, 560(r1)
-; BE-NEXT:    addi r3, r1, 320
+; BE-NEXT:    std r3, 400(r1)
+; BE-NEXT:    addi r3, r1, 416
 ; BE-NEXT:    lxvd2x vs0, 0, r3
-; BE-NEXT:    addi r3, r1, 336
+; BE-NEXT:    addi r3, r1, 432
 ; BE-NEXT:    lxvd2x vs1, 0, r3
-; BE-NEXT:    addi r3, r1, 352
+; BE-NEXT:    addi r3, r1, 448
 ; BE-NEXT:    lxvd2x vs2, 0, r3
-; BE-NEXT:    addi r3, r1, 368
+; BE-NEXT:    addi r3, r1, 464
 ; BE-NEXT:    lxvd2x vs3, 0, r3
-; BE-NEXT:    addi r3, r1, 384
+; BE-NEXT:    addi r3, r1, 224
 ; BE-NEXT:    lxvd2x vs4, 0, r3
-; BE-NEXT:    addi r3, r1, 400
+; BE-NEXT:    addi r3, r1, 240
 ; BE-NEXT:    lxvd2x vs5, 0, r3
-; BE-NEXT:    addi r3, r1, 416
+; BE-NEXT:    addi r3, r1, 256
 ; BE-NEXT:    lxvd2x vs6, 0, r3
-; BE-NEXT:    addi r3, r1, 432
+; BE-NEXT:    addi r3, r1, 272
 ; BE-NEXT:    lxvd2x vs7, 0, r3
-; BE-NEXT:    addi r3, r1, 448
+; BE-NEXT:    addi r3, r1, 288
 ; BE-NEXT:    lxvd2x vs8, 0, r3
-; BE-NEXT:    addi r3, r1, 464
+; BE-NEXT:    addi r3, r1, 304
 ; BE-NEXT:    lxvd2x vs9, 0, r3
-; BE-NEXT:    addi r3, r1, 480
+; BE-NEXT:    addi r3, r1, 320
 ; BE-NEXT:    lxvd2x vs10, 0, r3
-; BE-NEXT:    addi r3, r1, 496
+; BE-NEXT:    addi r3, r1, 336
 ; BE-NEXT:    lxvd2x vs11, 0, r3
-; BE-NEXT:    addi r3, r1, 512
+; BE-NEXT:    addi r3, r1, 352
 ; BE-NEXT:    lxvd2x vs12, 0, r3
-; BE-NEXT:    addi r3, r1, 528
+; BE-NEXT:    addi r3, r1, 368
 ; BE-NEXT:    lxvd2x vs13, 0, r3
-; BE-NEXT:    addi r3, r1, 544
+; BE-NEXT:    addi r3, r1, 384
 ; BE-NEXT:    lxvd2x v2, 0, r3
-; BE-NEXT:    addi r3, r1, 560
+; BE-NEXT:    addi r3, r1, 400
 ; BE-NEXT:    lxvd2x v3, 0, r3
 ; BE-NEXT:    li r3, 240
 ; BE-NEXT:    stxvd2x v3, r30, r3
@@ -2194,43 +1542,25 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) nounwind {
 ; BE-NEXT:    li r3, 16
 ; BE-NEXT:    stxvd2x vs1, r30, r3
 ; BE-NEXT:    stxvd2x vs0, 0, r30
-; BE-NEXT:    lfd f31, 856(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f30, 848(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f29, 840(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f28, 832(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f27, 824(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f26, 816(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f25, 808(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f24, 800(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f23, 792(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f22, 784(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f21, 776(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f20, 768(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f19, 760(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f18, 752(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f17, 744(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f16, 736(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f15, 728(r1) # 8-byte Folded Reload
-; BE-NEXT:    lfd f14, 720(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r31, 712(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r30, 704(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r29, 696(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r28, 688(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r27, 680(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r26, 672(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r25, 664(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r24, 656(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r23, 648(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r22, 640(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r21, 632(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r20, 624(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r19, 616(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r18, 608(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r17, 600(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r16, 592(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r15, 584(r1) # 8-byte Folded Reload
-; BE-NEXT:    ld r14, 576(r1) # 8-byte Folded Reload
-; BE-NEXT:    addi r1, r1, 864
+; BE-NEXT:    ld r31, 616(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r30, 608(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r29, 600(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r28, 592(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r27, 584(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r26, 576(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r25, 568(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r24, 560(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r23, 552(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r22, 544(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r21, 536(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r20, 528(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r19, 520(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r18, 512(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r17, 504(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r16, 496(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r15, 488(r1) # 8-byte Folded Reload
+; BE-NEXT:    ld r14, 480(r1) # 8-byte Folded Reload
+; BE-NEXT:    addi r1, r1, 624
 ; BE-NEXT:    ld r0, 16(r1)
 ; BE-NEXT:    mtlr r0
 ; BE-NEXT:    blr
@@ -2238,508 +1568,334 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) nounwind {
 ; CHECK-LABEL: lrint_v32i64_v32f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    stdu r1, -688(r1)
-; CHECK-NEXT:    li r4, 208
-; CHECK-NEXT:    std r0, 704(r1)
-; CHECK-NEXT:    std r14, 400(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r15, 408(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r16, 416(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r17, 424(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r18, 432(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r19, 440(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stxvd2x v20, r1, r4 # 16-byte Folded Spill
-; CHECK-NEXT:    li r4, 224
-; CHECK-NEXT:    std r20, 448(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r21, 456(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r22, 464(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r23, 472(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r24, 480(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r25, 488(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stxvd2x v21, r1, r4 # 16-byte Folded Spill
-; CHECK-NEXT:    li r4, 240
-; CHECK-NEXT:    std r26, 496(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r27, 504(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r28, 512(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r29, 520(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r30, 528(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    stdu r1, -576(r1)
+; CHECK-NEXT:    std r0, 592(r1)
+; CHECK-NEXT:    std r30, 560(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    mr r30, r3
-; CHECK-NEXT:    stxvd2x v22, r1, r4 # 16-byte Folded Spill
-; CHECK-NEXT:    li r4, 256
-; CHECK-NEXT:    std r31, 536(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd f14, 544(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd f15, 552(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd f16, 560(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd f17, 568(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd f18, 576(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stxvd2x v23, r1, r4 # 16-byte Folded Spill
-; CHECK-NEXT:    li r4, 272
-; CHECK-NEXT:    stfd f19, 584(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd f20, 592(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f20, f2
-; CHECK-NEXT:    stfd f21, 600(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f21, f3
-; CHECK-NEXT:    stfd f22, 608(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f22, f4
-; CHECK-NEXT:    stxvd2x v24, r1, r4 # 16-byte Folded Spill
-; CHECK-NEXT:    li r4, 288
-; CHECK-NEXT:    stfd f23, 616(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f23, f5
-; CHECK-NEXT:    stfd f24, 624(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f24, f6
-; CHECK-NEXT:    stfd f25, 632(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f25, f7
-; CHECK-NEXT:    stxvd2x v25, r1, r4 # 16-byte Folded Spill
-; CHECK-NEXT:    li r4, 304
-; CHECK-NEXT:    stfd f26, 640(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f26, f8
-; CHECK-NEXT:    stfd f27, 648(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f27, f9
-; CHECK-NEXT:    stfd f28, 656(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f28, f10
-; CHECK-NEXT:    stxvd2x v26, r1, r4 # 16-byte Folded Spill
-; CHECK-NEXT:    li r4, 320
-; CHECK-NEXT:    stfd f29, 664(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f29, f11
-; CHECK-NEXT:    stfd f30, 672(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f30, f12
-; CHECK-NEXT:    stfd f31, 680(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    fmr f31, f13
-; CHECK-NEXT:    stxvd2x v27, r1, r4 # 16-byte Folded Spill
-; CHECK-NEXT:    li r4, 336
-; CHECK-NEXT:    stxvd2x v28, r1, r4 # 16-byte Folded Spill
-; CHECK-NEXT:    li r4, 352
-; CHECK-NEXT:    stxvd2x v29, r1, r4 # 16-byte Folded Spill
-; CHECK-NEXT:    li r4, 368
-; CHECK-NEXT:    stxvd2x v30, r1, r4 # 16-byte Folded Spill
-; CHECK-NEXT:    li r4, 384
-; CHECK-NEXT:    stxvd2x v31, r1, r4 # 16-byte Folded Spill
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f20
+; CHECK-NEXT:    lhz r3, 864(r1)
+; CHECK-NEXT:    li r11, 240
+; CHECK-NEXT:    std r14, 432(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r19, 472(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    lhz r14, 744(r1)
+; CHECK-NEXT:    stxvd2x v20, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT:    li r11, 256
+; CHECK-NEXT:    std r22, 496(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    lhz r22, 680(r1)
+; CHECK-NEXT:    std r3, 216(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    lhz r3, 856(r1)
+; CHECK-NEXT:    lhz r19, 672(r1)
+; CHECK-NEXT:    stxvd2x v21, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT:    li r11, 272
+; CHECK-NEXT:    std r23, 504(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    lhz r23, 688(r1)
+; CHECK-NEXT:    stxvd2x v22, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT:    std r3, 184(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    lhz r3, 848(r1)
+; CHECK-NEXT:    li r11, 288
+; CHECK-NEXT:    std r25, 520(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    lhz r25, 696(r1)
+; CHECK-NEXT:    stxvd2x v23, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT:    li r11, 304
+; CHECK-NEXT:    std r26, 528(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r27, 536(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r28, 544(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    lhz r28, 720(r1)
+; CHECK-NEXT:    lhz r27, 712(r1)
+; CHECK-NEXT:    lhz r26, 704(r1)
+; CHECK-NEXT:    stxvd2x v24, r1, r11 # 16-byte Folded Spill
 ; CHECK-NEXT:    std r3, 176(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f21
-; CHECK-NEXT:    std r3, 160(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f22
+; CHECK-NEXT:    lhz r3, 840(r1)
+; CHECK-NEXT:    li r11, 320
+; CHECK-NEXT:    std r29, 552(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    lhz r29, 728(r1)
+; CHECK-NEXT:    stxvd2x v25, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT:    std r3, 152(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    lhz r3, 832(r1)
+; CHECK-NEXT:    li r11, 336
+; CHECK-NEXT:    std r31, 568(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    lhz r31, 736(r1)
+; CHECK-NEXT:    stxvd2x v26, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT:    li r11, 352
+; CHECK-NEXT:    std r15, 440(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r16, 448(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r17, 456(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    mr r17, r6
+; CHECK-NEXT:    mr r16, r5
+; CHECK-NEXT:    stxvd2x v27, r1, r11 # 16-byte Folded Spill
 ; CHECK-NEXT:    std r3, 144(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f23
-; CHECK-NEXT:    std r3, 128(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f24
+; CHECK-NEXT:    lhz r3, 824(r1)
+; CHECK-NEXT:    li r11, 368
+; CHECK-NEXT:    std r18, 464(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r20, 480(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    mr r20, r8
+; CHECK-NEXT:    mr r18, r7
+; CHECK-NEXT:    stxvd2x v28, r1, r11 # 16-byte Folded Spill
 ; CHECK-NEXT:    std r3, 120(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f25
+; CHECK-NEXT:    lhz r3, 816(r1)
+; CHECK-NEXT:    li r11, 384
+; CHECK-NEXT:    std r21, 488(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r24, 512(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    mr r24, r10
+; CHECK-NEXT:    mr r21, r9
+; CHECK-NEXT:    stxvd2x v29, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT:    li r11, 400
 ; CHECK-NEXT:    std r3, 112(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f26
+; CHECK-NEXT:    lhz r3, 808(r1)
+; CHECK-NEXT:    stxvd2x v30, r1, r11 # 16-byte Folded Spill
+; CHECK-NEXT:    li r11, 416
 ; CHECK-NEXT:    std r3, 104(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f27
+; CHECK-NEXT:    lhz r3, 800(r1)
+; CHECK-NEXT:    stxvd2x v31, r1, r11 # 16-byte Folded Spill
 ; CHECK-NEXT:    std r3, 96(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f28
+; CHECK-NEXT:    lhz r3, 792(r1)
 ; CHECK-NEXT:    std r3, 88(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f29
+; CHECK-NEXT:    lhz r3, 784(r1)
 ; CHECK-NEXT:    std r3, 80(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f30
+; CHECK-NEXT:    lhz r3, 776(r1)
 ; CHECK-NEXT:    std r3, 72(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f31
+; CHECK-NEXT:    lhz r3, 768(r1)
 ; CHECK-NEXT:    std r3, 64(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    lfs f1, 832(r1)
+; CHECK-NEXT:    lhz r3, 760(r1)
 ; CHECK-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    lfs f1, 840(r1)
+; CHECK-NEXT:    lhz r3, 752(r1)
 ; CHECK-NEXT:    std r3, 48(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    lfs f1, 848(r1)
-; CHECK-NEXT:    mr r15, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    lfs f1, 856(r1)
-; CHECK-NEXT:    mr r14, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    lfs f1, 864(r1)
-; CHECK-NEXT:    mr r31, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    lfs f1, 872(r1)
-; CHECK-NEXT:    mr r29, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    lfs f1, 880(r1)
-; CHECK-NEXT:    mr r28, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    lfs f1, 888(r1)
-; CHECK-NEXT:    mr r27, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    lfs f1, 896(r1)
-; CHECK-NEXT:    mr r26, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    lfs f1, 904(r1)
-; CHECK-NEXT:    mr r25, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    lfs f1, 912(r1)
-; CHECK-NEXT:    mr r24, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    lfs f1, 920(r1)
-; CHECK-NEXT:    mr r23, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    lfs f1, 928(r1)
-; CHECK-NEXT:    mr r22, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    lfs f1, 936(r1)
-; CHECK-NEXT:    mr r21, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    lfs f1, 944(r1)
-; CHECK-NEXT:    mr r20, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    lfs f1, 952(r1)
-; CHECK-NEXT:    mr r19, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    lfs f1, 960(r1)
-; CHECK-NEXT:    mr r18, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    lfs f1, 968(r1)
-; CHECK-NEXT:    mr r17, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    lfs f1, 976(r1)
-; CHECK-NEXT:    mr r16, r3
-; CHECK-NEXT:    bl __truncsfhf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r3, 48
+; CHECK-NEXT:    clrldi r3, r4, 48
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    li r3, 204
-; CHECK-NEXT:    stxsspx f1, r1, r3 # 4-byte Folded Spill
+; CHECK-NEXT:    bl lrintf
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mr r15, r3
 ; CHECK-NEXT:    clrldi r3, r16, 48
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    li r3, 200
-; CHECK-NEXT:    stxsspx f1, r1, r3 # 4-byte Folded Spill
+; CHECK-NEXT:    mtvsrd v31, r15
+; CHECK-NEXT:    bl lrintf
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    li r3, 224
+; CHECK-NEXT:    xxmrghd vs0, vs0, v31
+; CHECK-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    clrldi r3, r17, 48
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
+; CHECK-NEXT:    bl lrintf
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mr r17, r3
 ; CHECK-NEXT:    clrldi r3, r18, 48
-; CHECK-NEXT:    fmr f29, f1
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r19, 48
-; CHECK-NEXT:    fmr f28, f1
-; CHECK-NEXT:    bl __extendhfsf2
+; CHECK-NEXT:    mtvsrd v31, r17
+; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    li r3, 192
+; CHECK-NEXT:    xxmrghd vs0, vs0, v31
+; CHECK-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    clrldi r3, r20, 48
-; CHECK-NEXT:    fmr f27, f1
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r21, 48
-; CHECK-NEXT:    fmr f26, f1
-; CHECK-NEXT:    bl __extendhfsf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r22, 48
-; CHECK-NEXT:    fmr f25, f1
-; CHECK-NEXT:    bl __extendhfsf2
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r23, 48
-; CHECK-NEXT:    fmr f24, f1
-; CHECK-NEXT:    bl __extendhfsf2
+; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r24, 48
-; CHECK-NEXT:    fmr f23, f1
+; CHECK-NEXT:    mr r20, r3
+; CHECK-NEXT:    clrldi r3, r21, 48
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r25, 48
-; CHECK-NEXT:    fmr f22, f1
-; CHECK-NEXT:    bl __extendhfsf2
+; CHECK-NEXT:    mtvsrd v31, r20
+; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r26, 48
-; CHECK-NEXT:    fmr f21, f1
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    li r3, 160
+; CHECK-NEXT:    xxmrghd vs0, vs0, v31
+; CHECK-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    mr r3, r19
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r27, 48
-; CHECK-NEXT:    fmr f20, f1
-; CHECK-NEXT:    bl __extendhfsf2
+; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r28, 48
-; CHECK-NEXT:    fmr f19, f1
+; CHECK-NEXT:    mr r21, r3
+; CHECK-NEXT:    clrldi r3, r24, 48
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r29, 48
-; CHECK-NEXT:    fmr f18, f1
-; CHECK-NEXT:    bl __extendhfsf2
+; CHECK-NEXT:    mtvsrd v31, r21
+; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r31, 48
-; CHECK-NEXT:    fmr f17, f1
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    li r3, 128
+; CHECK-NEXT:    xxmrghd vs0, v31, vs0
+; CHECK-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    mr r3, r22
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r14, 48
-; CHECK-NEXT:    fmr f16, f1
-; CHECK-NEXT:    bl __extendhfsf2
+; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    clrldi r3, r15, 48
-; CHECK-NEXT:    fmr f15, f1
+; CHECK-NEXT:    mr r24, r3
+; CHECK-NEXT:    mr r3, r23
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    ld r3, 48(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    fmr f14, f1
-; CHECK-NEXT:    clrldi r3, r3, 48
-; CHECK-NEXT:    bl __extendhfsf2
+; CHECK-NEXT:    mtvsrd v31, r24
+; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    ld r3, 56(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    fmr f30, f1
-; CHECK-NEXT:    clrldi r3, r3, 48
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    mr r3, r25
+; CHECK-NEXT:    xxmrghd v27, vs0, v31
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    ld r3, 64(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    xxlor v30, f1, f1
-; CHECK-NEXT:    clrldi r3, r3, 48
-; CHECK-NEXT:    bl __extendhfsf2
+; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    ld r3, 72(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    xxlor v29, f1, f1
-; CHECK-NEXT:    clrldi r3, r3, 48
+; CHECK-NEXT:    mr r25, r3
+; CHECK-NEXT:    mr r3, r26
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    ld r3, 80(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    xxlor v28, f1, f1
-; CHECK-NEXT:    clrldi r3, r3, 48
-; CHECK-NEXT:    bl __extendhfsf2
+; CHECK-NEXT:    mtvsrd v31, r25
+; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    ld r3, 88(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    xxlor v27, f1, f1
-; CHECK-NEXT:    clrldi r3, r3, 48
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    mr r3, r27
+; CHECK-NEXT:    xxmrghd v26, vs0, v31
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    ld r3, 96(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    xxlor v26, f1, f1
-; CHECK-NEXT:    clrldi r3, r3, 48
-; CHECK-NEXT:    bl __extendhfsf2
+; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    ld r3, 104(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    xxlor v25, f1, f1
-; CHECK-NEXT:    clrldi r3, r3, 48
+; CHECK-NEXT:    mr r27, r3
+; CHECK-NEXT:    mr r3, r28
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    ld r3, 112(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    xxlor v24, f1, f1
-; CHECK-NEXT:    clrldi r3, r3, 48
-; CHECK-NEXT:    bl __extendhfsf2
+; CHECK-NEXT:    mtvsrd v31, r27
+; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    ld r3, 120(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    xxlor v23, f1, f1
-; CHECK-NEXT:    clrldi r3, r3, 48
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    mr r3, r29
+; CHECK-NEXT:    xxmrghd v25, vs0, v31
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    ld r3, 128(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    xxlor v22, f1, f1
-; CHECK-NEXT:    clrldi r3, r3, 48
-; CHECK-NEXT:    bl __extendhfsf2
+; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    ld r3, 144(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    xxlor v21, f1, f1
-; CHECK-NEXT:    clrldi r3, r3, 48
+; CHECK-NEXT:    mr r29, r3
+; CHECK-NEXT:    mr r3, r31
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    ld r3, 160(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    xxlor v20, f1, f1
-; CHECK-NEXT:    clrldi r3, r3, 48
-; CHECK-NEXT:    bl __extendhfsf2
+; CHECK-NEXT:    mtvsrd v31, r29
+; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    ld r3, 176(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    fmr f31, f1
-; CHECK-NEXT:    clrldi r3, r3, 48
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    mr r3, r14
+; CHECK-NEXT:    xxmrghd v24, vs0, v31
 ; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f31
-; CHECK-NEXT:    mtvsrd v31, r3
-; CHECK-NEXT:    bl lrintf
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    li r3, 176
-; CHECK-NEXT:    xxlor f1, v20, v20
-; CHECK-NEXT:    xxmrghd vs0, vs0, v31
-; CHECK-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT:    bl lrintf
+; CHECK-NEXT:    mr r29, r3
+; CHECK-NEXT:    ld r3, 48(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    xxlor f1, v21, v21
-; CHECK-NEXT:    mtvsrd v31, r3
+; CHECK-NEXT:    mtvsrd v31, r29
 ; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    li r3, 160
-; CHECK-NEXT:    xxlor f1, v22, v22
-; CHECK-NEXT:    xxmrghd vs0, vs0, v31
-; CHECK-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT:    bl lrintf
+; CHECK-NEXT:    ld r3, 56(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    xxmrghd v23, vs0, v31
+; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    xxlor f1, v23, v23
-; CHECK-NEXT:    mtvsrd v31, r3
 ; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    li r3, 144
-; CHECK-NEXT:    xxlor f1, v24, v24
-; CHECK-NEXT:    xxmrghd vs0, vs0, v31
-; CHECK-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT:    bl lrintf
+; CHECK-NEXT:    mr r29, r3
+; CHECK-NEXT:    ld r3, 64(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    xxlor f1, v25, v25
-; CHECK-NEXT:    mtvsrd v31, r3
+; CHECK-NEXT:    mtvsrd v31, r29
 ; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    li r3, 128
-; CHECK-NEXT:    xxlor f1, v26, v26
-; CHECK-NEXT:    xxmrghd vs0, vs0, v31
-; CHECK-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT:    bl lrintf
+; CHECK-NEXT:    ld r3, 72(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    xxmrghd v22, vs0, v31
+; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    xxlor f1, v27, v27
-; CHECK-NEXT:    mtvsrd v31, r3
 ; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    xxlor f1, v28, v28
-; CHECK-NEXT:    xxmrghd v27, vs0, v31
-; CHECK-NEXT:    bl lrintf
+; CHECK-NEXT:    mr r29, r3
+; CHECK-NEXT:    ld r3, 80(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    xxlor f1, v29, v29
-; CHECK-NEXT:    mtvsrd v31, r3
+; CHECK-NEXT:    mtvsrd v31, r29
 ; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    xxlor f1, v30, v30
-; CHECK-NEXT:    xxmrghd v29, vs0, v31
-; CHECK-NEXT:    bl lrintf
+; CHECK-NEXT:    ld r3, 88(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    xxmrghd v21, vs0, v31
+; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f30
-; CHECK-NEXT:    mtvsrd v31, r3
 ; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f14
-; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    xxmrghd v31, vs0, v31
-; CHECK-NEXT:    bl lrintf
+; CHECK-NEXT:    mr r29, r3
+; CHECK-NEXT:    ld r3, 96(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f15
-; CHECK-NEXT:    mtvsrd v30, r3
+; CHECK-NEXT:    mtvsrd v31, r29
 ; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f16
 ; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    xxmrghd v30, vs0, v30
-; CHECK-NEXT:    bl lrintf
+; CHECK-NEXT:    ld r3, 104(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    xxmrghd v20, vs0, v31
+; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f17
-; CHECK-NEXT:    mtvsrd v28, r3
 ; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f18
-; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    xxmrghd v28, vs0, v28
-; CHECK-NEXT:    bl lrintf
+; CHECK-NEXT:    mr r29, r3
+; CHECK-NEXT:    ld r3, 112(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f19
-; CHECK-NEXT:    mtvsrd v26, r3
+; CHECK-NEXT:    mtvsrd v31, r29
 ; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f20
 ; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    xxmrghd v26, vs0, v26
-; CHECK-NEXT:    bl lrintf
+; CHECK-NEXT:    ld r3, 120(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    xxmrghd v31, vs0, v31
+; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f21
-; CHECK-NEXT:    mtvsrd v24, r3
 ; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f22
-; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    xxmrghd v24, vs0, v24
-; CHECK-NEXT:    bl lrintf
+; CHECK-NEXT:    mr r29, r3
+; CHECK-NEXT:    ld r3, 144(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f23
-; CHECK-NEXT:    mtvsrd v22, r3
+; CHECK-NEXT:    mtvsrd v30, r29
 ; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f24
 ; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    xxmrghd v22, vs0, v22
-; CHECK-NEXT:    bl lrintf
+; CHECK-NEXT:    ld r3, 152(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    xxmrghd v30, vs0, v30
+; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f25
-; CHECK-NEXT:    mtvsrd v20, r3
 ; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f26
-; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    xxmrghd v20, vs0, v20
-; CHECK-NEXT:    bl lrintf
+; CHECK-NEXT:    mr r29, r3
+; CHECK-NEXT:    ld r3, 176(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f27
-; CHECK-NEXT:    mtvsrd v21, r3
+; CHECK-NEXT:    mtvsrd v29, r29
 ; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f28
 ; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    xxmrghd v21, vs0, v21
-; CHECK-NEXT:    bl lrintf
+; CHECK-NEXT:    ld r3, 184(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    xxmrghd v29, vs0, v29
+; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    fmr f1, f29
-; CHECK-NEXT:    mtvsrd v23, r3
 ; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    li r3, 200
-; CHECK-NEXT:    lxsspx f1, r1, r3 # 4-byte Folded Reload
-; CHECK-NEXT:    xxmrghd v23, vs0, v23
-; CHECK-NEXT:    bl lrintf
+; CHECK-NEXT:    mr r29, r3
+; CHECK-NEXT:    ld r3, 216(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    bl __extendhfsf2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    mtvsrd v25, r3
-; CHECK-NEXT:    li r3, 204
-; CHECK-NEXT:    lxsspx f1, r1, r3 # 4-byte Folded Reload
+; CHECK-NEXT:    mtvsrd v28, r29
 ; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    mtfprd f0, r3
 ; CHECK-NEXT:    li r3, 240
-; CHECK-NEXT:    xxswapd vs1, v23
+; CHECK-NEXT:    xxswapd vs1, v29
 ; CHECK-NEXT:    li r4, 128
-; CHECK-NEXT:    xxswapd vs2, v21
-; CHECK-NEXT:    xxswapd vs3, v31
-; CHECK-NEXT:    xxmrghd v2, vs0, v25
+; CHECK-NEXT:    xxswapd vs2, v30
+; CHECK-NEXT:    xxswapd vs3, v25
+; CHECK-NEXT:    xxmrghd v2, vs0, v28
 ; CHECK-NEXT:    xxswapd vs0, v2
 ; CHECK-NEXT:    stxvd2x vs0, r30, r3
 ; CHECK-NEXT:    li r3, 224
@@ -2747,35 +1903,35 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) nounwind {
 ; CHECK-NEXT:    li r3, 208
 ; CHECK-NEXT:    stxvd2x vs2, r30, r3
 ; CHECK-NEXT:    li r3, 192
-; CHECK-NEXT:    xxswapd vs0, v20
+; CHECK-NEXT:    xxswapd vs0, v31
 ; CHECK-NEXT:    stxvd2x vs0, r30, r3
 ; CHECK-NEXT:    li r3, 176
-; CHECK-NEXT:    xxswapd vs1, v22
+; CHECK-NEXT:    xxswapd vs1, v20
 ; CHECK-NEXT:    stxvd2x vs1, r30, r3
 ; CHECK-NEXT:    li r3, 160
-; CHECK-NEXT:    xxswapd vs2, v28
-; CHECK-NEXT:    xxswapd vs0, v24
+; CHECK-NEXT:    xxswapd vs2, v23
+; CHECK-NEXT:    xxswapd vs0, v21
 ; CHECK-NEXT:    stxvd2x vs0, r30, r3
 ; CHECK-NEXT:    li r3, 144
-; CHECK-NEXT:    xxswapd vs1, v26
+; CHECK-NEXT:    xxswapd vs1, v22
 ; CHECK-NEXT:    stxvd2x vs1, r30, r3
 ; CHECK-NEXT:    li r3, 128
 ; CHECK-NEXT:    stxvd2x vs2, r30, r3
 ; CHECK-NEXT:    li r3, 112
-; CHECK-NEXT:    xxswapd vs0, v30
+; CHECK-NEXT:    xxswapd vs0, v24
 ; CHECK-NEXT:    stxvd2x vs0, r30, r3
 ; CHECK-NEXT:    li r3, 96
 ; CHECK-NEXT:    stxvd2x vs3, r30, r3
 ; CHECK-NEXT:    li r3, 80
 ; CHECK-NEXT:    lxvd2x vs2, r1, r4 # 16-byte Folded Reload
-; CHECK-NEXT:    li r4, 144
-; CHECK-NEXT:    xxswapd vs1, v29
+; CHECK-NEXT:    li r4, 160
+; CHECK-NEXT:    xxswapd vs1, v26
 ; CHECK-NEXT:    stxvd2x vs1, r30, r3
 ; CHECK-NEXT:    li r3, 64
 ; CHECK-NEXT:    lxvd2x vs1, r1, r4 # 16-byte Folded Reload
-; CHECK-NEXT:    li r4, 160
+; CHECK-NEXT:    li r4, 192
 ; CHECK-NEXT:    lxvd2x vs3, r1, r4 # 16-byte Folded Reload
-; CHECK-NEXT:    li r4, 176
+; CHECK-NEXT:    li r4, 224
 ; CHECK-NEXT:    lxvd2x vs4, r1, r4 # 16-byte Folded Reload
 ; CHECK-NEXT:    xxswapd vs0, v27
 ; CHECK-NEXT:    stxvd2x vs0, r30, r3
@@ -2788,69 +1944,51 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) nounwind {
 ; CHECK-NEXT:    li r3, 16
 ; CHECK-NEXT:    xxswapd vs3, vs3
 ; CHECK-NEXT:    stxvd2x vs3, r30, r3
-; CHECK-NEXT:    li r3, 384
+; CHECK-NEXT:    li r3, 416
 ; CHECK-NEXT:    xxswapd vs4, vs4
 ; CHECK-NEXT:    stxvd2x vs4, 0, r30
 ; CHECK-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT:    li r3, 368
-; CHECK-NEXT:    lfd f31, 680(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f30, 672(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f29, 664(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f28, 656(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f27, 648(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f26, 640(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f25, 632(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f24, 624(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f23, 616(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f22, 608(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f21, 600(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f20, 592(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f19, 584(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f18, 576(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f17, 568(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f16, 560(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    li r3, 400
+; CHECK-NEXT:    ld r31, 568(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r30, 560(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r29, 552(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r28, 544(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r27, 536(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r26, 528(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r25, 520(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r24, 512(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r23, 504(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r22, 496(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r21, 488(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r20, 480(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r19, 472(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r18, 464(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r17, 456(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r16, 448(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT:    li r3, 352
-; CHECK-NEXT:    lfd f15, 552(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd f14, 544(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r31, 536(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r30, 528(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r29, 520(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r28, 512(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    li r3, 384
+; CHECK-NEXT:    ld r15, 440(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r14, 432(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lxvd2x v29, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT:    li r3, 336
-; CHECK-NEXT:    ld r27, 504(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r26, 496(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r25, 488(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r24, 480(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r23, 472(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r22, 464(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    li r3, 368
 ; CHECK-NEXT:    lxvd2x v28, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT:    li r3, 320
-; CHECK-NEXT:    ld r21, 456(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r20, 448(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r19, 440(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r18, 432(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r17, 424(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r16, 416(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    li r3, 352
 ; CHECK-NEXT:    lxvd2x v27, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT:    li r3, 304
-; CHECK-NEXT:    ld r15, 408(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r14, 400(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    li r3, 336
 ; CHECK-NEXT:    lxvd2x v26, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT:    li r3, 288
+; CHECK-NEXT:    li r3, 320
 ; CHECK-NEXT:    lxvd2x v25, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT:    li r3, 272
+; CHECK-NEXT:    li r3, 304
 ; CHECK-NEXT:    lxvd2x v24, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT:    li r3, 256
+; CHECK-NEXT:    li r3, 288
 ; CHECK-NEXT:    lxvd2x v23, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT:    li r3, 240
+; CHECK-NEXT:    li r3, 272
 ; CHECK-NEXT:    lxvd2x v22, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT:    li r3, 224
+; CHECK-NEXT:    li r3, 256
 ; CHECK-NEXT:    lxvd2x v21, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT:    li r3, 208
+; CHECK-NEXT:    li r3, 240
 ; CHECK-NEXT:    lxvd2x v20, r1, r3 # 16-byte Folded Reload
-; CHECK-NEXT:    addi r1, r1, 688
+; CHECK-NEXT:    addi r1, r1, 576
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
 ; CHECK-NEXT:    blr
@@ -2858,516 +1996,410 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) nounwind {
 ; FAST-LABEL: lrint_v32i64_v32f16:
 ; FAST:       # %bb.0:
 ; FAST-NEXT:    mflr r0
-; FAST-NEXT:    stdu r1, -480(r1)
-; FAST-NEXT:    li r4, 128
-; FAST-NEXT:    std r0, 496(r1)
-; FAST-NEXT:    std r30, 320(r1) # 8-byte Folded Spill
+; FAST-NEXT:    stdu r1, -560(r1)
+; FAST-NEXT:    std r0, 576(r1)
+; FAST-NEXT:    std r30, 544(r1) # 8-byte Folded Spill
 ; FAST-NEXT:    mr r30, r3
-; FAST-NEXT:    stfd f14, 336(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f15, 344(r1) # 8-byte Folded Spill
-; FAST-NEXT:    fmr f14, f5
-; FAST-NEXT:    stfd f16, 352(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stxvd2x v20, r1, r4 # 16-byte Folded Spill
-; FAST-NEXT:    li r4, 144
-; FAST-NEXT:    fmr f16, f4
-; FAST-NEXT:    stfd f17, 360(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f18, 368(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f19, 376(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f20, 384(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f21, 392(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stxvd2x v21, r1, r4 # 16-byte Folded Spill
-; FAST-NEXT:    li r4, 160
-; FAST-NEXT:    stfd f22, 400(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f23, 408(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f24, 416(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f25, 424(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f26, 432(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f27, 440(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stxvd2x v22, r1, r4 # 16-byte Folded Spill
-; FAST-NEXT:    li r4, 176
-; FAST-NEXT:    xxlor v22, f3, f3
-; FAST-NEXT:    stfd f28, 448(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f29, 456(r1) # 8-byte Folded Spill
-; FAST-NEXT:    fmr f29, f9
-; FAST-NEXT:    stfd f30, 464(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f31, 472(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stxvd2x v23, r1, r4 # 16-byte Folded Spill
-; FAST-NEXT:    li r4, 192
-; FAST-NEXT:    xxlor v23, f2, f2
-; FAST-NEXT:    stxvd2x v24, r1, r4 # 16-byte Folded Spill
-; FAST-NEXT:    li r4, 208
-; FAST-NEXT:    stxvd2x v25, r1, r4 # 16-byte Folded Spill
-; FAST-NEXT:    li r4, 224
-; FAST-NEXT:    xxlor v25, f13, f13
-; FAST-NEXT:    stxvd2x v26, r1, r4 # 16-byte Folded Spill
-; FAST-NEXT:    li r4, 240
-; FAST-NEXT:    xxlor v26, f12, f12
-; FAST-NEXT:    stxvd2x v27, r1, r4 # 16-byte Folded Spill
-; FAST-NEXT:    li r4, 256
-; FAST-NEXT:    xxlor v27, f11, f11
-; FAST-NEXT:    stxvd2x v28, r1, r4 # 16-byte Folded Spill
-; FAST-NEXT:    li r4, 272
-; FAST-NEXT:    xxlor v28, f10, f10
-; FAST-NEXT:    stxvd2x v29, r1, r4 # 16-byte Folded Spill
-; FAST-NEXT:    li r4, 288
-; FAST-NEXT:    xxlor v29, f8, f8
-; FAST-NEXT:    stxvd2x v30, r1, r4 # 16-byte Folded Spill
-; FAST-NEXT:    li r4, 304
-; FAST-NEXT:    xxlor v30, f7, f7
-; FAST-NEXT:    stxvd2x v31, r1, r4 # 16-byte Folded Spill
-; FAST-NEXT:    li r4, 44
-; FAST-NEXT:    xxlor v31, f6, f6
-; FAST-NEXT:    stxsspx f1, r1, r4 # 4-byte Folded Spill
-; FAST-NEXT:    lfs f1, 768(r1)
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    lhz r3, 848(r1)
+; FAST-NEXT:    li r11, 224
+; FAST-NEXT:    std r14, 416(r1) # 8-byte Folded Spill
+; FAST-NEXT:    std r15, 424(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r15, 736(r1)
+; FAST-NEXT:    stxvd2x v20, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT:    li r11, 240
+; FAST-NEXT:    std r19, 456(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r14, 728(r1)
+; FAST-NEXT:    std r3, 184(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r3, 840(r1)
+; FAST-NEXT:    lhz r19, 656(r1)
+; FAST-NEXT:    stxvd2x v21, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT:    li r11, 256
+; FAST-NEXT:    std r21, 472(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r21, 664(r1)
+; FAST-NEXT:    stxvd2x v22, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT:    std r3, 176(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r3, 832(r1)
+; FAST-NEXT:    li r11, 272
+; FAST-NEXT:    std r23, 488(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r23, 672(r1)
+; FAST-NEXT:    stxvd2x v23, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT:    li r11, 288
+; FAST-NEXT:    std r24, 496(r1) # 8-byte Folded Spill
+; FAST-NEXT:    std r26, 512(r1) # 8-byte Folded Spill
+; FAST-NEXT:    std r27, 520(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r27, 696(r1)
+; FAST-NEXT:    lhz r26, 688(r1)
+; FAST-NEXT:    lhz r24, 680(r1)
+; FAST-NEXT:    stxvd2x v24, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT:    std r3, 152(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r3, 824(r1)
+; FAST-NEXT:    li r11, 304
+; FAST-NEXT:    std r28, 528(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r28, 704(r1)
+; FAST-NEXT:    stxvd2x v25, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT:    std r3, 144(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r3, 816(r1)
+; FAST-NEXT:    li r11, 320
+; FAST-NEXT:    std r29, 536(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r29, 712(r1)
+; FAST-NEXT:    stxvd2x v26, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT:    li r11, 336
+; FAST-NEXT:    std r31, 552(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r31, 720(r1)
+; FAST-NEXT:    std r16, 432(r1) # 8-byte Folded Spill
+; FAST-NEXT:    std r17, 440(r1) # 8-byte Folded Spill
+; FAST-NEXT:    mr r17, r6
+; FAST-NEXT:    mr r16, r5
+; FAST-NEXT:    stxvd2x v27, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT:    std r3, 136(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r3, 808(r1)
+; FAST-NEXT:    li r11, 352
+; FAST-NEXT:    std r18, 448(r1) # 8-byte Folded Spill
+; FAST-NEXT:    std r20, 464(r1) # 8-byte Folded Spill
+; FAST-NEXT:    mr r20, r8
+; FAST-NEXT:    mr r18, r7
+; FAST-NEXT:    stxvd2x v28, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT:    std r3, 104(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r3, 800(r1)
+; FAST-NEXT:    li r11, 368
+; FAST-NEXT:    std r22, 480(r1) # 8-byte Folded Spill
+; FAST-NEXT:    std r25, 504(r1) # 8-byte Folded Spill
+; FAST-NEXT:    mr r25, r10
+; FAST-NEXT:    mr r22, r9
+; FAST-NEXT:    stxvd2x v29, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT:    li r11, 384
+; FAST-NEXT:    std r3, 96(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r3, 792(r1)
+; FAST-NEXT:    stxvd2x v30, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT:    li r11, 400
+; FAST-NEXT:    std r3, 88(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r3, 784(r1)
+; FAST-NEXT:    stxvd2x v31, r1, r11 # 16-byte Folded Spill
+; FAST-NEXT:    std r3, 80(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r3, 776(r1)
+; FAST-NEXT:    std r3, 72(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r3, 768(r1)
+; FAST-NEXT:    std r3, 64(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r3, 760(r1)
+; FAST-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r3, 752(r1)
+; FAST-NEXT:    std r3, 48(r1) # 8-byte Folded Spill
+; FAST-NEXT:    lhz r3, 744(r1)
+; FAST-NEXT:    std r3, 40(r1) # 8-byte Folded Spill
+; FAST-NEXT:    clrldi r3, r4, 48
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    li r3, 120
-; FAST-NEXT:    stxsdx f1, r1, r3 # 8-byte Folded Spill
-; FAST-NEXT:    lfs f1, 760(r1)
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v31, r3
+; FAST-NEXT:    clrldi r3, r16, 48
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    li r3, 112
-; FAST-NEXT:    stxsdx f1, r1, r3 # 8-byte Folded Spill
-; FAST-NEXT:    lfs f1, 752(r1)
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    li r3, 208
+; FAST-NEXT:    xxmrghd vs0, vs0, v31
+; FAST-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    clrldi r3, r17, 48
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    li r3, 104
-; FAST-NEXT:    stxsdx f1, r1, r3 # 8-byte Folded Spill
-; FAST-NEXT:    lfs f1, 744(r1)
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v31, r3
+; FAST-NEXT:    clrldi r3, r18, 48
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    li r3, 96
-; FAST-NEXT:    stxsdx f1, r1, r3 # 8-byte Folded Spill
-; FAST-NEXT:    lfs f1, 736(r1)
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    li r3, 192
+; FAST-NEXT:    xxmrghd vs0, vs0, v31
+; FAST-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    clrldi r3, r20, 48
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    li r3, 88
-; FAST-NEXT:    stxsdx f1, r1, r3 # 8-byte Folded Spill
-; FAST-NEXT:    lfs f1, 728(r1)
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v31, r3
+; FAST-NEXT:    clrldi r3, r22, 48
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    li r3, 80
-; FAST-NEXT:    stxsdx f1, r1, r3 # 8-byte Folded Spill
-; FAST-NEXT:    lfs f1, 720(r1)
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    li r3, 160
+; FAST-NEXT:    xxmrghd vs0, vs0, v31
+; FAST-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    mr r3, r19
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    li r3, 72
-; FAST-NEXT:    stxsdx f1, r1, r3 # 8-byte Folded Spill
-; FAST-NEXT:    lfs f1, 712(r1)
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v31, r3
+; FAST-NEXT:    clrldi r3, r25, 48
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    li r3, 64
-; FAST-NEXT:    stxsdx f1, r1, r3 # 8-byte Folded Spill
-; FAST-NEXT:    lfs f1, 704(r1)
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    li r3, 112
+; FAST-NEXT:    xxmrghd vs0, v31, vs0
+; FAST-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    mr r3, r21
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    li r3, 56
-; FAST-NEXT:    stxsdx f1, r1, r3 # 8-byte Folded Spill
-; FAST-NEXT:    lfs f1, 696(r1)
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v31, r3
+; FAST-NEXT:    mr r3, r23
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    li r3, 48
-; FAST-NEXT:    stxsdx f1, r1, r3 # 8-byte Folded Spill
-; FAST-NEXT:    lfs f1, 688(r1)
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    mr r3, r24
+; FAST-NEXT:    xxmrghd v27, vs0, v31
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    xxlor v21, f1, f1
-; FAST-NEXT:    lfs f1, 680(r1)
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v31, r3
+; FAST-NEXT:    mr r3, r26
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    xxlor v20, f1, f1
-; FAST-NEXT:    lfs f1, 672(r1)
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    mr r3, r27
+; FAST-NEXT:    xxmrghd v26, vs0, v31
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    xxlor v24, f1, f1
-; FAST-NEXT:    lfs f1, 664(r1)
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v31, r3
+; FAST-NEXT:    mr r3, r28
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f31, f1
-; FAST-NEXT:    lfs f1, 656(r1)
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    mr r3, r29
+; FAST-NEXT:    xxmrghd v25, vs0, v31
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f30, f1
-; FAST-NEXT:    lfs f1, 648(r1)
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v31, r3
+; FAST-NEXT:    mr r3, r31
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f28, f1
-; FAST-NEXT:    lfs f1, 640(r1)
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    mr r3, r14
+; FAST-NEXT:    xxmrghd v24, vs0, v31
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f27, f1
-; FAST-NEXT:    lfs f1, 632(r1)
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v31, r3
+; FAST-NEXT:    mr r3, r15
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f26, f1
-; FAST-NEXT:    lfs f1, 624(r1)
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    ld r3, 40(r1) # 8-byte Folded Reload
+; FAST-NEXT:    xxmrghd v23, vs0, v31
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f25, f1
-; FAST-NEXT:    xxlor f1, v25, v25
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v31, r3
+; FAST-NEXT:    ld r3, 48(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f24, f1
-; FAST-NEXT:    xxlor f1, v26, v26
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    ld r3, 56(r1) # 8-byte Folded Reload
+; FAST-NEXT:    xxmrghd v22, vs0, v31
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f23, f1
-; FAST-NEXT:    xxlor f1, v27, v27
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v31, r3
+; FAST-NEXT:    ld r3, 64(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f22, f1
-; FAST-NEXT:    xxlor f1, v28, v28
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    ld r3, 72(r1) # 8-byte Folded Reload
+; FAST-NEXT:    xxmrghd v21, vs0, v31
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f21, f1
-; FAST-NEXT:    fmr f1, f29
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v31, r3
+; FAST-NEXT:    ld r3, 80(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f20, f1
-; FAST-NEXT:    xxlor f1, v29, v29
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    ld r3, 88(r1) # 8-byte Folded Reload
+; FAST-NEXT:    xxmrghd v20, vs0, v31
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f19, f1
-; FAST-NEXT:    xxlor f1, v30, v30
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v31, r3
+; FAST-NEXT:    ld r3, 96(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f18, f1
-; FAST-NEXT:    xxlor f1, v31, v31
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    ld r3, 104(r1) # 8-byte Folded Reload
+; FAST-NEXT:    xxmrghd v31, vs0, v31
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f29, f1
-; FAST-NEXT:    fmr f1, f14
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v30, r3
+; FAST-NEXT:    ld r3, 136(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f14, f1
-; FAST-NEXT:    fmr f1, f16
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    ld r3, 144(r1) # 8-byte Folded Reload
+; FAST-NEXT:    xxmrghd v30, vs0, v30
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f16, f1
-; FAST-NEXT:    xxlor f1, v22, v22
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v29, r3
+; FAST-NEXT:    ld r3, 152(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fmr f17, f1
-; FAST-NEXT:    xxlor f1, v23, v23
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    ld r3, 176(r1) # 8-byte Folded Reload
+; FAST-NEXT:    xxmrghd v29, vs0, v29
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    li r3, 44
-; FAST-NEXT:    fmr f15, f1
-; FAST-NEXT:    lxsspx f1, r1, r3 # 4-byte Folded Reload
-; FAST-NEXT:    bl __truncsfhf2
-; FAST-NEXT:    nop
-; FAST-NEXT:    clrldi r3, r3, 48
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtvsrd v28, r3
+; FAST-NEXT:    ld r3, 184(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    bl __extendhfsf2
 ; FAST-NEXT:    nop
-; FAST-NEXT:    fctid f3, f15
-; FAST-NEXT:    fctid f4, f17
-; FAST-NEXT:    mffprd r3, f3
-; FAST-NEXT:    fctid f5, f16
-; FAST-NEXT:    fctid f6, f14
-; FAST-NEXT:    fctid f7, f18
-; FAST-NEXT:    fctid f8, f19
-; FAST-NEXT:    fctid f13, f1
-; FAST-NEXT:    fctid f9, f20
-; FAST-NEXT:    fctid f10, f22
-; FAST-NEXT:    fctid f11, f24
-; FAST-NEXT:    fctid f12, f25
-; FAST-NEXT:    fctid f2, f23
-; FAST-NEXT:    fctid f0, f21
-; FAST-NEXT:    mtvsrd v2, r3
-; FAST-NEXT:    mffprd r3, f4
-; FAST-NEXT:    mtvsrd v3, r3
-; FAST-NEXT:    mffprd r3, f5
-; FAST-NEXT:    mtfprd f5, r3
-; FAST-NEXT:    mffprd r3, f6
-; FAST-NEXT:    mtfprd f1, r3
-; FAST-NEXT:    mffprd r3, f7
-; FAST-NEXT:    mtfprd f6, r3
-; FAST-NEXT:    mffprd r3, f8
-; FAST-NEXT:    mtfprd f7, r3
-; FAST-NEXT:    mffprd r3, f9
-; FAST-NEXT:    mtfprd f3, r3
-; FAST-NEXT:    mffprd r3, f10
-; FAST-NEXT:    mtfprd f4, r3
-; FAST-NEXT:    mffprd r3, f11
-; FAST-NEXT:    fctid f11, f31
-; FAST-NEXT:    lfd f31, 56(r1) # 8-byte Folded Reload
-; FAST-NEXT:    mtfprd f8, r3
-; FAST-NEXT:    mffprd r3, f12
-; FAST-NEXT:    xxlor f12, v24, v24
-; FAST-NEXT:    fctid f31, f31
-; FAST-NEXT:    fctid f12, f12
-; FAST-NEXT:    mtfprd f9, r3
-; FAST-NEXT:    mffprd r3, f13
-; FAST-NEXT:    lfd f13, 48(r1) # 8-byte Folded Reload
-; FAST-NEXT:    mtfprd f10, r3
-; FAST-NEXT:    fctid f13, f13
-; FAST-NEXT:    xxmrghd v3, vs5, v3
-; FAST-NEXT:    fctid f5, f26
-; FAST-NEXT:    mffprd r3, f5
-; FAST-NEXT:    mtfprd f5, r3
-; FAST-NEXT:    xxmrghd v4, vs7, vs6
-; FAST-NEXT:    fctid f6, f27
-; FAST-NEXT:    fctid f7, f28
-; FAST-NEXT:    mffprd r3, f6
-; FAST-NEXT:    lfd f28, 96(r1) # 8-byte Folded Reload
-; FAST-NEXT:    fctid f28, f28
-; FAST-NEXT:    mtfprd f6, r3
-; FAST-NEXT:    mffprd r3, f7
-; FAST-NEXT:    mtfprd f7, r3
-; FAST-NEXT:    xxmrghd v2, v2, vs10
-; FAST-NEXT:    fctid f10, f30
-; FAST-NEXT:    mffprd r3, f10
-; FAST-NEXT:    lfd f30, 80(r1) # 8-byte Folded Reload
-; FAST-NEXT:    fctid f30, f30
-; FAST-NEXT:    mtfprd f10, r3
-; FAST-NEXT:    mffprd r3, f11
-; FAST-NEXT:    mtfprd f11, r3
-; FAST-NEXT:    mffprd r3, f12
-; FAST-NEXT:    mtfprd f12, r3
-; FAST-NEXT:    xxmrghd v5, vs12, vs11
-; FAST-NEXT:    xxlor f11, v20, v20
-; FAST-NEXT:    xxlor f12, v21, v21
-; FAST-NEXT:    fctid f11, f11
-; FAST-NEXT:    fctid f12, f12
-; FAST-NEXT:    mffprd r3, f11
-; FAST-NEXT:    mtfprd f11, r3
-; FAST-NEXT:    mffprd r3, f12
-; FAST-NEXT:    mtfprd f12, r3
-; FAST-NEXT:    mffprd r3, f13
-; FAST-NEXT:    mtfprd f13, r3
-; FAST-NEXT:    mffprd r3, f31
-; FAST-NEXT:    lfd f31, 64(r1) # 8-byte Folded Reload
-; FAST-NEXT:    fctid f31, f31
-; FAST-NEXT:    mtvsrd v0, r3
-; FAST-NEXT:    mffprd r3, f31
-; FAST-NEXT:    lfd f31, 72(r1) # 8-byte Folded Reload
-; FAST-NEXT:    mtvsrd v1, r3
-; FAST-NEXT:    mffprd r3, f30
-; FAST-NEXT:    lfd f30, 88(r1) # 8-byte Folded Reload
-; FAST-NEXT:    fctid f31, f31
-; FAST-NEXT:    mtvsrd v6, r3
-; FAST-NEXT:    mffprd r3, f28
-; FAST-NEXT:    lfd f28, 104(r1) # 8-byte Folded Reload
-; FAST-NEXT:    fctid f30, f30
-; FAST-NEXT:    fctid f28, f28
-; FAST-NEXT:    mtvsrd v7, r3
-; FAST-NEXT:    mffprd r3, f28
-; FAST-NEXT:    lfd f28, 112(r1) # 8-byte Folded Reload
-; FAST-NEXT:    fctid f28, f28
-; FAST-NEXT:    mtvsrd v8, r3
-; FAST-NEXT:    mffprd r3, f28
-; FAST-NEXT:    lfd f28, 120(r1) # 8-byte Folded Reload
-; FAST-NEXT:    fctid f28, f28
-; FAST-NEXT:    xxmrghd v10, vs12, vs11
-; FAST-NEXT:    xxmrghd v0, v0, vs13
-; FAST-NEXT:    xxswapd vs12, v0
-; FAST-NEXT:    xxmrghd v0, vs9, vs8
-; FAST-NEXT:    xxmrghd v7, v8, v7
-; FAST-NEXT:    mtvsrd v8, r3
-; FAST-NEXT:    mffprd r3, f28
-; FAST-NEXT:    mtvsrd v9, r3
-; FAST-NEXT:    mffprd r3, f30
-; FAST-NEXT:    xxswapd v7, v7
-; FAST-NEXT:    xxmrghd v8, v9, v8
-; FAST-NEXT:    mtvsrd v9, r3
-; FAST-NEXT:    mffprd r3, f31
-; FAST-NEXT:    xxswapd v8, v8
-; FAST-NEXT:    xxmrghd v6, v9, v6
-; FAST-NEXT:    mtvsrd v9, r3
+; FAST-NEXT:    fctid f0, f1
+; FAST-NEXT:    xxswapd vs1, v29
+; FAST-NEXT:    li r4, 112
+; FAST-NEXT:    xxswapd vs2, v30
+; FAST-NEXT:    xxswapd vs3, v25
+; FAST-NEXT:    mffprd r3, f0
+; FAST-NEXT:    mtfprd f0, r3
 ; FAST-NEXT:    li r3, 240
-; FAST-NEXT:    stxvd2x v8, r30, r3
+; FAST-NEXT:    xxmrghd v2, vs0, v28
+; FAST-NEXT:    xxswapd vs0, v2
+; FAST-NEXT:    stxvd2x vs0, r30, r3
 ; FAST-NEXT:    li r3, 224
-; FAST-NEXT:    stxvd2x v7, r30, r3
+; FAST-NEXT:    stxvd2x vs1, r30, r3
 ; FAST-NEXT:    li r3, 208
-; FAST-NEXT:    xxswapd vs11, v6
-; FAST-NEXT:    xxmrghd v6, vs10, vs7
-; FAST-NEXT:    stxvd2x vs11, r30, r3
+; FAST-NEXT:    stxvd2x vs2, r30, r3
 ; FAST-NEXT:    li r3, 192
-; FAST-NEXT:    xxmrghd v1, v9, v1
-; FAST-NEXT:    xxswapd vs11, v1
-; FAST-NEXT:    xxmrghd v1, vs6, vs5
-; FAST-NEXT:    xxswapd vs5, v10
-; FAST-NEXT:    xxswapd vs6, v5
-; FAST-NEXT:    stxvd2x vs11, r30, r3
+; FAST-NEXT:    xxswapd vs0, v31
+; FAST-NEXT:    stxvd2x vs0, r30, r3
 ; FAST-NEXT:    li r3, 176
-; FAST-NEXT:    stxvd2x vs12, r30, r3
+; FAST-NEXT:    xxswapd vs1, v20
+; FAST-NEXT:    stxvd2x vs1, r30, r3
 ; FAST-NEXT:    li r3, 160
-; FAST-NEXT:    stxvd2x vs5, r30, r3
+; FAST-NEXT:    xxswapd vs2, v23
+; FAST-NEXT:    xxswapd vs0, v21
+; FAST-NEXT:    stxvd2x vs0, r30, r3
 ; FAST-NEXT:    li r3, 144
-; FAST-NEXT:    stxvd2x vs6, r30, r3
-; FAST-NEXT:    mffprd r3, f2
-; FAST-NEXT:    mtfprd f7, r3
+; FAST-NEXT:    xxswapd vs1, v22
+; FAST-NEXT:    stxvd2x vs1, r30, r3
 ; FAST-NEXT:    li r3, 128
-; FAST-NEXT:    xxswapd vs5, v6
-; FAST-NEXT:    stxvd2x vs5, r30, r3
-; FAST-NEXT:    li r3, 112
-; FAST-NEXT:    xxswapd vs2, v1
-; FAST-NEXT:    xxswapd vs6, v0
 ; FAST-NEXT:    stxvd2x vs2, r30, r3
+; FAST-NEXT:    li r3, 112
+; FAST-NEXT:    xxswapd vs0, v24
+; FAST-NEXT:    stxvd2x vs0, r30, r3
 ; FAST-NEXT:    li r3, 96
-; FAST-NEXT:    fctid f2, f29
-; FAST-NEXT:    stxvd2x vs6, r30, r3
-; FAST-NEXT:    mffprd r3, f0
-; FAST-NEXT:    mtfprd f0, r3
-; FAST-NEXT:    mffprd r3, f2
-; FAST-NEXT:    mtfprd f2, r3
+; FAST-NEXT:    stxvd2x vs3, r30, r3
 ; FAST-NEXT:    li r3, 80
-; FAST-NEXT:    xxmrghd v5, vs7, vs4
-; FAST-NEXT:    xxswapd vs4, v2
-; FAST-NEXT:    xxmrghd v0, vs0, vs3
-; FAST-NEXT:    xxswapd vs0, v5
-; FAST-NEXT:    xxswapd vs3, v3
-; FAST-NEXT:    stxvd2x vs0, r30, r3
+; FAST-NEXT:    lxvd2x vs2, r1, r4 # 16-byte Folded Reload
+; FAST-NEXT:    li r4, 160
+; FAST-NEXT:    xxswapd vs1, v26
+; FAST-NEXT:    stxvd2x vs1, r30, r3
 ; FAST-NEXT:    li r3, 64
-; FAST-NEXT:    xxswapd vs0, v0
+; FAST-NEXT:    lxvd2x vs1, r1, r4 # 16-byte Folded Reload
+; FAST-NEXT:    li r4, 192
+; FAST-NEXT:    lxvd2x vs3, r1, r4 # 16-byte Folded Reload
+; FAST-NEXT:    li r4, 208
+; FAST-NEXT:    lxvd2x vs4, r1, r4 # 16-byte Folded Reload
+; FAST-NEXT:    xxswapd vs0, v27
 ; FAST-NEXT:    stxvd2x vs0, r30, r3
 ; FAST-NEXT:    li r3, 48
-; FAST-NEXT:    xxmrghd v5, vs2, vs1
-; FAST-NEXT:    xxswapd vs1, v4
-; FAST-NEXT:    stxvd2x vs1, r30, r3
-; FAST-NEXT:    li r3, 32
-; FAST-NEXT:    xxswapd vs2, v5
+; FAST-NEXT:    xxswapd vs2, vs2
 ; FAST-NEXT:    stxvd2x vs2, r30, r3
+; FAST-NEXT:    li r3, 32
+; FAST-NEXT:    xxswapd vs1, vs1
+; FAST-NEXT:    stxvd2x vs1, r30, r3
 ; FAST-NEXT:    li r3, 16
+; FAST-NEXT:    xxswapd vs3, vs3
 ; FAST-NEXT:    stxvd2x vs3, r30, r3
-; FAST-NEXT:    li r3, 304
+; FAST-NEXT:    li r3, 400
+; FAST-NEXT:    xxswapd vs4, vs4
 ; FAST-NEXT:    stxvd2x vs4, 0, r30
-; FAST-NEXT:    lfd f31, 472(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f30, 464(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f29, 456(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f28, 448(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f27, 440(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f26, 432(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f25, 424(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f24, 416(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f23, 408(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f22, 400(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f21, 392(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f20, 384(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f19, 376(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f18, 368(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f17, 360(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f16, 352(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f15, 344(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lfd f14, 336(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
-; FAST-NEXT:    li r3, 288
-; FAST-NEXT:    ld r30, 320(r1) # 8-byte Folded Reload
+; FAST-NEXT:    li r3, 384
+; FAST-NEXT:    ld r31, 552(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r30, 544(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r29, 536(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r28, 528(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r27, 520(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r26, 512(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r25, 504(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r24, 496(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r23, 488(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r22, 480(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r21, 472(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r20, 464(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r19, 456(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r18, 448(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r17, 440(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r16, 432(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
-; FAST-NEXT:    li r3, 272
+; FAST-NEXT:    li r3, 368
+; FAST-NEXT:    ld r15, 424(r1) # 8-byte Folded Reload
+; FAST-NEXT:    ld r14, 416(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    lxvd2x v29, r1, r3 # 16-byte Folded Reload
-; FAST-NEXT:    li r3, 256
+; FAST-NEXT:    li r3, 352
 ; FAST-NEXT:    lxvd2x v28, r1, r3 # 16-byte Folded Reload
-; FAST-NEXT:    li r3, 240
+; FAST-NEXT:    li r3, 336
 ; FAST-NEXT:    lxvd2x v27, r1, r3 # 16-byte Folded Reload
-; FAST-NEXT:    li r3, 224
+; FAST-NEXT:    li r3, 320
 ; FAST-NEXT:    lxvd2x v26, r1, r3 # 16-byte Folded Reload
-; FAST-NEXT:    li r3, 208
+; FAST-NEXT:    li r3, 304
 ; FAST-NEXT:    lxvd2x v25, r1, r3 # 16-byte Folded Reload
-; FAST-NEXT:    li r3, 192
+; FAST-NEXT:    li r3, 288
 ; FAST-NEXT:    lxvd2x v24, r1, r3 # 16-byte Folded Reload
-; FAST-NEXT:    li r3, 176
+; FAST-NEXT:    li r3, 272
 ; FAST-NEXT:    lxvd2x v23, r1, r3 # 16-byte Folded Reload
-; FAST-NEXT:    li r3, 160
+; FAST-NEXT:    li r3, 256
 ; FAST-NEXT:    lxvd2x v22, r1, r3 # 16-byte Folded Reload
-; FAST-NEXT:    li r3, 144
+; FAST-NEXT:    li r3, 240
 ; FAST-NEXT:    lxvd2x v21, r1, r3 # 16-byte Folded Reload
-; FAST-NEXT:    li r3, 128
+; FAST-NEXT:    li r3, 224
 ; FAST-NEXT:    lxvd2x v20, r1, r3 # 16-byte Folded Reload
-; FAST-NEXT:    addi r1, r1, 480
+; FAST-NEXT:    addi r1, r1, 560
 ; FAST-NEXT:    ld r0, 16(r1)
 ; FAST-NEXT:    mtlr r0
 ; FAST-NEXT:    blr



More information about the llvm-commits mailing list