[llvm] r347090 - [PowerPC][NFC] Add tests for vector fp <-> int conversions

Nemanja Ivanovic via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 16 12:24:10 PST 2018


Author: nemanjai
Date: Fri Nov 16 12:24:10 2018
New Revision: 347090

URL: http://llvm.org/viewvc/llvm-project?rev=347090&view=rev
Log:
[PowerPC][NFC] Add tests for vector fp <-> int conversions

This NFC patch just adds test cases for conversions that currently
require scalarization of vectors. An updcoming patch will change
the legalization for these and it is more suitable on the review
to show the diferences in code gen rather than just the new code gen.

Added:
    llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll
    llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll
    llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll
    llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll
    llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
    llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll
    llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp_to_i_4byte_elts.ll
    llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp_to_i_8byte_elts.ll
    llvm/trunk/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
    llvm/trunk/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
    llvm/trunk/test/CodeGen/PowerPC/vec_conv_i32_to_fp64_elts.ll
    llvm/trunk/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll
    llvm/trunk/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
    llvm/trunk/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll
    llvm/trunk/test/CodeGen/PowerPC/vec_conv_i_to_fp_4byte_elts.ll
    llvm/trunk/test/CodeGen/PowerPC/vec_conv_i_to_fp_8byte_elts.ll

Added: llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll?rev=347090&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll Fri Nov 16 12:24:10 2018
@@ -0,0 +1,1474 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:     -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-P8
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-P9
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-BE
+
+define i32 @test2elt(i64 %a.coerce) local_unnamed_addr #0 {
+; CHECK-P8-LABEL: test2elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xscvspdpn f0, vs0
+; CHECK-P8-NEXT:    xxsldwi vs1, v2, v2, 3
+; CHECK-P8-NEXT:    xscvspdpn f1, vs1
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    mfvsrwz r4, f0
+; CHECK-P8-NEXT:    mfvsrwz r3, f1
+; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    vmrglh v2, v3, v2
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    mfvsrwz r3, f0
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test2elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xxsldwi vs1, v2, v2, 3
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    mfvsrwz r4, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mtvsrd f1, r4
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    xxswapd v3, vs1
+; CHECK-P9-NEXT:    li r3, 0
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    vmrglh v2, v3, v2
+; CHECK-P9-NEXT:    vextuwrx r3, r3, v2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test2elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    mtvsrd f0, r3
+; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 1
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    mfvsrwz r4, f1
+; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    li r3, 0
+; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    mtvsrd v3, r4
+; CHECK-BE-NEXT:    vmrghh v2, v2, v3
+; CHECK-BE-NEXT:    vextuwlx r3, r3, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = bitcast i64 %a.coerce to <2 x float>
+  %1 = fptoui <2 x float> %0 to <2 x i16>
+  %2 = bitcast <2 x i16> %1 to i32
+  ret i32 %2
+}
+
+define i64 @test4elt(<4 x float> %a) local_unnamed_addr #1 {
+; CHECK-P8-LABEL: test4elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-P8-NEXT:    xscvspdpn f1, v2
+; CHECK-P8-NEXT:    xxswapd vs2, v2
+; CHECK-P8-NEXT:    xxsldwi vs3, v2, v2, 1
+; CHECK-P8-NEXT:    xscvspdpn f0, vs0
+; CHECK-P8-NEXT:    xscvspdpn f2, vs2
+; CHECK-P8-NEXT:    xscvspdpn f3, vs3
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    xscvdpsxws f3, f3
+; CHECK-P8-NEXT:    mfvsrwz r3, f1
+; CHECK-P8-NEXT:    mtvsrd f1, r3
+; CHECK-P8-NEXT:    mfvsrwz r3, f0
+; CHECK-P8-NEXT:    mfvsrwz r4, f2
+; CHECK-P8-NEXT:    xxswapd v4, vs1
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mfvsrwz r3, f3
+; CHECK-P8-NEXT:    mtvsrd f2, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    mtvsrd f3, r3
+; CHECK-P8-NEXT:    xxswapd v3, vs2
+; CHECK-P8-NEXT:    xxswapd v5, vs3
+; CHECK-P8-NEXT:    vmrglh v2, v3, v2
+; CHECK-P8-NEXT:    vmrglh v3, v4, v5
+; CHECK-P8-NEXT:    vmrglw v2, v3, v2
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test4elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-P9-NEXT:    xxswapd vs1, v2
+; CHECK-P9-NEXT:    xxsldwi vs2, v2, v2, 1
+; CHECK-P9-NEXT:    xscvspdpn f3, v2
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    mfvsrwz r5, f3
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mfvsrwz r4, f1
+; CHECK-P9-NEXT:    mfvsrwz r6, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r5
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mtvsrd f1, r4
+; CHECK-P9-NEXT:    mtvsrd f3, r6
+; CHECK-P9-NEXT:    xxswapd v4, vs2
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    xxswapd v3, vs1
+; CHECK-P9-NEXT:    xxswapd v5, vs3
+; CHECK-P9-NEXT:    vmrglh v2, v3, v2
+; CHECK-P9-NEXT:    vmrglh v3, v4, v5
+; CHECK-P9-NEXT:    vmrglw v2, v3, v2
+; CHECK-P9-NEXT:    mfvsrld r3, v2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test4elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-BE-NEXT:    xxswapd vs1, v2
+; CHECK-BE-NEXT:    xxsldwi vs2, v2, v2, 1
+; CHECK-BE-NEXT:    xscvspdpn f3, v2
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mfvsrwz r5, f3
+; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mfvsrwz r4, f1
+; CHECK-BE-NEXT:    mfvsrwz r6, f2
+; CHECK-BE-NEXT:    mtvsrd v4, r5
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    sldi r6, r6, 48
+; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    mtvsrd v3, r4
+; CHECK-BE-NEXT:    mtvsrd v5, r6
+; CHECK-BE-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-NEXT:    vmrghh v3, v4, v5
+; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    mfvsrd r3, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = fptoui <4 x float> %a to <4 x i16>
+  %1 = bitcast <4 x i16> %0 to i64
+  ret i64 %1
+}
+
+define <8 x i16> @test8elt(<8 x float>* nocapture readonly) local_unnamed_addr #2 {
+; CHECK-P8-LABEL: test8elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    lvx v5, r3, r4
+; CHECK-P8-NEXT:    xxswapd vs1, v2
+; CHECK-P8-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-P8-NEXT:    xxsldwi vs2, v5, v5, 3
+; CHECK-P8-NEXT:    xscvspdpn f4, v5
+; CHECK-P8-NEXT:    xxswapd vs3, v5
+; CHECK-P8-NEXT:    xxsldwi vs5, v5, v5, 1
+; CHECK-P8-NEXT:    xscvspdpn f1, vs1
+; CHECK-P8-NEXT:    xscvspdpn f0, vs0
+; CHECK-P8-NEXT:    xscvspdpn f2, vs2
+; CHECK-P8-NEXT:    xscvspdpn f3, vs3
+; CHECK-P8-NEXT:    xscvspdpn f5, vs5
+; CHECK-P8-NEXT:    xscvdpsxws f4, f4
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    xscvdpsxws f3, f3
+; CHECK-P8-NEXT:    xscvdpsxws f5, f5
+; CHECK-P8-NEXT:    mfvsrwz r4, f4
+; CHECK-P8-NEXT:    mfvsrwz r6, f1
+; CHECK-P8-NEXT:    mfvsrwz r5, f0
+; CHECK-P8-NEXT:    mtvsrd f1, r6
+; CHECK-P8-NEXT:    mtvsrd f0, r5
+; CHECK-P8-NEXT:    xxswapd v4, vs1
+; CHECK-P8-NEXT:    xxsldwi vs1, v2, v2, 1
+; CHECK-P8-NEXT:    xxswapd v3, vs0
+; CHECK-P8-NEXT:    xscvspdpn f0, v2
+; CHECK-P8-NEXT:    mtvsrd f4, r4
+; CHECK-P8-NEXT:    xscvspdpn f1, vs1
+; CHECK-P8-NEXT:    mfvsrwz r4, f2
+; CHECK-P8-NEXT:    xxswapd v1, vs4
+; CHECK-P8-NEXT:    vmrglh v2, v4, v3
+; CHECK-P8-NEXT:    mtvsrd f2, r4
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mfvsrwz r4, f5
+; CHECK-P8-NEXT:    xxswapd v5, vs2
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    mfvsrwz r3, f0
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mfvsrwz r3, f1
+; CHECK-P8-NEXT:    xxswapd v3, vs0
+; CHECK-P8-NEXT:    mtvsrd f1, r3
+; CHECK-P8-NEXT:    mfvsrwz r3, f3
+; CHECK-P8-NEXT:    mtvsrd f3, r4
+; CHECK-P8-NEXT:    xxswapd v4, vs1
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    xxswapd v6, vs3
+; CHECK-P8-NEXT:    xxswapd v0, vs0
+; CHECK-P8-NEXT:    vmrglh v3, v3, v4
+; CHECK-P8-NEXT:    vmrglh v4, v0, v5
+; CHECK-P8-NEXT:    vmrglh v5, v1, v6
+; CHECK-P8-NEXT:    vmrglw v2, v3, v2
+; CHECK-P8-NEXT:    vmrglw v3, v5, v4
+; CHECK-P8-NEXT:    xxmrgld v2, v3, v2
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test8elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs0, 16(r3)
+; CHECK-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-P9-NEXT:    xxsldwi vs2, vs1, vs1, 3
+; CHECK-P9-NEXT:    xxswapd vs3, vs1
+; CHECK-P9-NEXT:    xxsldwi vs4, vs1, vs1, 1
+; CHECK-P9-NEXT:    xxsldwi vs5, vs0, vs0, 3
+; CHECK-P9-NEXT:    xxswapd vs6, vs0
+; CHECK-P9-NEXT:    xxsldwi vs7, vs0, vs0, 1
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xscvspdpn f3, vs3
+; CHECK-P9-NEXT:    xscvspdpn f4, vs4
+; CHECK-P9-NEXT:    xscvspdpn f5, vs5
+; CHECK-P9-NEXT:    xscvspdpn f6, vs6
+; CHECK-P9-NEXT:    xscvspdpn f7, vs7
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    xscvdpsxws f4, f4
+; CHECK-P9-NEXT:    xscvdpsxws f5, f5
+; CHECK-P9-NEXT:    xscvdpsxws f6, f6
+; CHECK-P9-NEXT:    xscvdpsxws f7, f7
+; CHECK-P9-NEXT:    mfvsrwz r5, f1
+; CHECK-P9-NEXT:    mfvsrwz r9, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f2
+; CHECK-P9-NEXT:    mfvsrwz r4, f3
+; CHECK-P9-NEXT:    mfvsrwz r6, f4
+; CHECK-P9-NEXT:    mfvsrwz r7, f5
+; CHECK-P9-NEXT:    mfvsrwz r8, f6
+; CHECK-P9-NEXT:    mfvsrwz r10, f7
+; CHECK-P9-NEXT:    mtvsrd f2, r5
+; CHECK-P9-NEXT:    mtvsrd f6, r9
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mtvsrd f1, r4
+; CHECK-P9-NEXT:    mtvsrd f3, r6
+; CHECK-P9-NEXT:    mtvsrd f4, r7
+; CHECK-P9-NEXT:    mtvsrd f5, r8
+; CHECK-P9-NEXT:    mtvsrd f7, r10
+; CHECK-P9-NEXT:    xxswapd v4, vs2
+; CHECK-P9-NEXT:    xxswapd v6, vs6
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    xxswapd v3, vs1
+; CHECK-P9-NEXT:    xxswapd v5, vs3
+; CHECK-P9-NEXT:    xxswapd v0, vs4
+; CHECK-P9-NEXT:    xxswapd v1, vs5
+; CHECK-P9-NEXT:    xxswapd v7, vs7
+; CHECK-P9-NEXT:    vmrglh v2, v3, v2
+; CHECK-P9-NEXT:    vmrglh v3, v4, v5
+; CHECK-P9-NEXT:    vmrglh v4, v1, v0
+; CHECK-P9-NEXT:    vmrglh v5, v6, v7
+; CHECK-P9-NEXT:    vmrglw v2, v3, v2
+; CHECK-P9-NEXT:    vmrglw v3, v5, v4
+; CHECK-P9-NEXT:    xxmrgld v2, v3, v2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test8elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    lxv vs1, 16(r3)
+; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
+; CHECK-BE-NEXT:    xxswapd vs3, vs1
+; CHECK-BE-NEXT:    xxsldwi vs4, vs1, vs1, 1
+; CHECK-BE-NEXT:    xxsldwi vs5, vs0, vs0, 3
+; CHECK-BE-NEXT:    xxswapd vs6, vs0
+; CHECK-BE-NEXT:    xxsldwi vs7, vs0, vs0, 1
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    xscvspdpn f4, vs4
+; CHECK-BE-NEXT:    xscvspdpn f5, vs5
+; CHECK-BE-NEXT:    xscvspdpn f6, vs6
+; CHECK-BE-NEXT:    xscvspdpn f7, vs7
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    xscvdpsxws f5, f5
+; CHECK-BE-NEXT:    xscvdpsxws f6, f6
+; CHECK-BE-NEXT:    xscvdpsxws f7, f7
+; CHECK-BE-NEXT:    mfvsrwz r5, f1
+; CHECK-BE-NEXT:    mfvsrwz r9, f0
+; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    sldi r9, r9, 48
+; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    mfvsrwz r4, f3
+; CHECK-BE-NEXT:    mfvsrwz r6, f4
+; CHECK-BE-NEXT:    mfvsrwz r7, f5
+; CHECK-BE-NEXT:    mfvsrwz r8, f6
+; CHECK-BE-NEXT:    mfvsrwz r10, f7
+; CHECK-BE-NEXT:    mtvsrd v4, r5
+; CHECK-BE-NEXT:    mtvsrd v6, r9
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    sldi r6, r6, 48
+; CHECK-BE-NEXT:    sldi r7, r7, 48
+; CHECK-BE-NEXT:    sldi r8, r8, 48
+; CHECK-BE-NEXT:    sldi r10, r10, 48
+; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    mtvsrd v3, r4
+; CHECK-BE-NEXT:    mtvsrd v5, r6
+; CHECK-BE-NEXT:    mtvsrd v0, r7
+; CHECK-BE-NEXT:    mtvsrd v1, r8
+; CHECK-BE-NEXT:    mtvsrd v7, r10
+; CHECK-BE-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-NEXT:    vmrghh v3, v4, v5
+; CHECK-BE-NEXT:    vmrghh v4, v1, v0
+; CHECK-BE-NEXT:    vmrghh v5, v6, v7
+; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    vmrghw v3, v5, v4
+; CHECK-BE-NEXT:    xxmrghd v2, v3, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <8 x float>, <8 x float>* %0, align 32
+  %1 = fptoui <8 x float> %a to <8 x i16>
+  ret <8 x i16> %1
+}
+
+define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x float>* nocapture readonly) local_unnamed_addr #3 {
+; CHECK-P8-LABEL: test16elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    lvx v5, 0, r4
+; CHECK-P8-NEXT:    li r6, 32
+; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    lvx v2, r4, r6
+; CHECK-P8-NEXT:    lvx v3, r4, r5
+; CHECK-P8-NEXT:    li r6, 48
+; CHECK-P8-NEXT:    xscvspdpn f0, v5
+; CHECK-P8-NEXT:    xxsldwi vs1, v5, v5, 3
+; CHECK-P8-NEXT:    lvx v4, r4, r6
+; CHECK-P8-NEXT:    xscvspdpn f4, v2
+; CHECK-P8-NEXT:    xxsldwi vs5, v5, v5, 1
+; CHECK-P8-NEXT:    xscvspdpn f2, v3
+; CHECK-P8-NEXT:    xxswapd vs3, v5
+; CHECK-P8-NEXT:    xscvspdpn f1, vs1
+; CHECK-P8-NEXT:    xxswapd vs8, v3
+; CHECK-P8-NEXT:    xscvspdpn f6, v4
+; CHECK-P8-NEXT:    xxsldwi vs7, v3, v3, 3
+; CHECK-P8-NEXT:    xscvspdpn f5, vs5
+; CHECK-P8-NEXT:    xxsldwi vs10, v2, v2, 3
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xxsldwi vs9, v3, v3, 1
+; CHECK-P8-NEXT:    xscvspdpn f3, vs3
+; CHECK-P8-NEXT:    xxsldwi vs12, v2, v2, 1
+; CHECK-P8-NEXT:    xscvspdpn f8, vs8
+; CHECK-P8-NEXT:    xxswapd vs11, v2
+; CHECK-P8-NEXT:    xscvdpsxws f4, f4
+; CHECK-P8-NEXT:    xxswapd v2, v4
+; CHECK-P8-NEXT:    xscvspdpn f7, vs7
+; CHECK-P8-NEXT:    xxsldwi vs13, v4, v4, 3
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    xxsldwi v3, v4, v4, 1
+; CHECK-P8-NEXT:    xscvspdpn f10, vs10
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xscvspdpn f9, vs9
+; CHECK-P8-NEXT:    xscvdpsxws f6, f6
+; CHECK-P8-NEXT:    xscvspdpn f12, vs12
+; CHECK-P8-NEXT:    xscvdpsxws f5, f5
+; CHECK-P8-NEXT:    mfvsrwz r4, f0
+; CHECK-P8-NEXT:    xscvspdpn f11, vs11
+; CHECK-P8-NEXT:    xscvdpsxws f3, f3
+; CHECK-P8-NEXT:    xscvspdpn v2, v2
+; CHECK-P8-NEXT:    xscvdpsxws f8, f8
+; CHECK-P8-NEXT:    mtvsrd f0, r4
+; CHECK-P8-NEXT:    mfvsrwz r4, f4
+; CHECK-P8-NEXT:    xscvdpsxws f7, f7
+; CHECK-P8-NEXT:    mfvsrwz r6, f2
+; CHECK-P8-NEXT:    xscvspdpn f13, vs13
+; CHECK-P8-NEXT:    xscvspdpn v3, v3
+; CHECK-P8-NEXT:    xscvdpsxws f10, f10
+; CHECK-P8-NEXT:    mtvsrd f4, r4
+; CHECK-P8-NEXT:    mfvsrwz r4, f1
+; CHECK-P8-NEXT:    xscvdpsxws f9, f9
+; CHECK-P8-NEXT:    mtvsrd f2, r6
+; CHECK-P8-NEXT:    mfvsrwz r6, f6
+; CHECK-P8-NEXT:    xscvdpsxws f12, f12
+; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    mfvsrwz r4, f5
+; CHECK-P8-NEXT:    xscvdpsxws f11, f11
+; CHECK-P8-NEXT:    xxswapd v4, vs1
+; CHECK-P8-NEXT:    mtvsrd f6, r6
+; CHECK-P8-NEXT:    mfvsrwz r6, f3
+; CHECK-P8-NEXT:    xscvdpsxws v2, v2
+; CHECK-P8-NEXT:    xxswapd v9, vs6
+; CHECK-P8-NEXT:    mtvsrd f5, r4
+; CHECK-P8-NEXT:    mfvsrwz r4, f8
+; CHECK-P8-NEXT:    mtvsrd f3, r6
+; CHECK-P8-NEXT:    xxswapd v0, vs5
+; CHECK-P8-NEXT:    mfvsrwz r6, f7
+; CHECK-P8-NEXT:    xscvdpsxws f13, f13
+; CHECK-P8-NEXT:    xxswapd v5, vs3
+; CHECK-P8-NEXT:    xscvdpsxws v3, v3
+; CHECK-P8-NEXT:    mtvsrd f8, r4
+; CHECK-P8-NEXT:    mfvsrwz r4, f10
+; CHECK-P8-NEXT:    mtvsrd f7, r6
+; CHECK-P8-NEXT:    mfvsrwz r6, f9
+; CHECK-P8-NEXT:    mtvsrd f10, r4
+; CHECK-P8-NEXT:    mfvsrwz r4, f12
+; CHECK-P8-NEXT:    mtvsrd f9, r6
+; CHECK-P8-NEXT:    xxswapd v6, vs10
+; CHECK-P8-NEXT:    mfvsrwz r6, f11
+; CHECK-P8-NEXT:    mtvsrd f12, r4
+; CHECK-P8-NEXT:    xxswapd v1, vs9
+; CHECK-P8-NEXT:    mfvsrwz r4, v2
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    mtvsrd f11, r6
+; CHECK-P8-NEXT:    mfvsrwz r6, f13
+; CHECK-P8-NEXT:    mtvsrd f0, r4
+; CHECK-P8-NEXT:    xxswapd v7, vs11
+; CHECK-P8-NEXT:    mfvsrwz r4, v3
+; CHECK-P8-NEXT:    vmrglh v3, v5, v4
+; CHECK-P8-NEXT:    xxswapd v4, vs7
+; CHECK-P8-NEXT:    vmrglh v2, v2, v0
+; CHECK-P8-NEXT:    xxswapd v5, vs8
+; CHECK-P8-NEXT:    xxswapd v0, vs2
+; CHECK-P8-NEXT:    mtvsrd f13, r6
+; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    xxswapd v8, vs0
+; CHECK-P8-NEXT:    vmrglh v4, v5, v4
+; CHECK-P8-NEXT:    vmrglh v5, v0, v1
+; CHECK-P8-NEXT:    xxswapd v1, vs4
+; CHECK-P8-NEXT:    vmrglh v0, v7, v6
+; CHECK-P8-NEXT:    xxswapd v6, vs12
+; CHECK-P8-NEXT:    xxswapd v7, vs13
+; CHECK-P8-NEXT:    xxswapd v10, vs1
+; CHECK-P8-NEXT:    vmrglw v2, v2, v3
+; CHECK-P8-NEXT:    vmrglh v1, v1, v6
+; CHECK-P8-NEXT:    vmrglh v6, v8, v7
+; CHECK-P8-NEXT:    vmrglh v7, v9, v10
+; CHECK-P8-NEXT:    vmrglw v3, v5, v4
+; CHECK-P8-NEXT:    vmrglw v4, v1, v0
+; CHECK-P8-NEXT:    vmrglw v5, v7, v6
+; CHECK-P8-NEXT:    xxmrgld v2, v3, v2
+; CHECK-P8-NEXT:    stvx v2, 0, r3
+; CHECK-P8-NEXT:    xxmrgld v3, v5, v4
+; CHECK-P8-NEXT:    stvx v3, r3, r5
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test16elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs2, 16(r4)
+; CHECK-P9-NEXT:    lxv vs3, 0(r4)
+; CHECK-P9-NEXT:    lxv vs0, 48(r4)
+; CHECK-P9-NEXT:    lxv vs1, 32(r4)
+; CHECK-P9-NEXT:    std r25, -56(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r26, -48(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    xxsldwi vs4, vs3, vs3, 3
+; CHECK-P9-NEXT:    xxswapd vs5, vs3
+; CHECK-P9-NEXT:    xxsldwi vs6, vs3, vs3, 1
+; CHECK-P9-NEXT:    xxsldwi vs7, vs2, vs2, 3
+; CHECK-P9-NEXT:    xxswapd vs8, vs2
+; CHECK-P9-NEXT:    xxsldwi vs9, vs2, vs2, 1
+; CHECK-P9-NEXT:    xxsldwi vs10, vs1, vs1, 3
+; CHECK-P9-NEXT:    xxswapd vs11, vs1
+; CHECK-P9-NEXT:    xxsldwi vs12, vs1, vs1, 1
+; CHECK-P9-NEXT:    xxsldwi vs13, vs0, vs0, 3
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    xxsldwi v3, vs0, vs0, 1
+; CHECK-P9-NEXT:    xscvspdpn f3, vs3
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvspdpn f4, vs4
+; CHECK-P9-NEXT:    xscvspdpn f5, vs5
+; CHECK-P9-NEXT:    xscvspdpn f6, vs6
+; CHECK-P9-NEXT:    xscvspdpn f7, vs7
+; CHECK-P9-NEXT:    xscvspdpn f8, vs8
+; CHECK-P9-NEXT:    xscvspdpn f9, vs9
+; CHECK-P9-NEXT:    xscvspdpn f10, vs10
+; CHECK-P9-NEXT:    xscvspdpn f11, vs11
+; CHECK-P9-NEXT:    xscvspdpn f12, vs12
+; CHECK-P9-NEXT:    xscvspdpn f13, vs13
+; CHECK-P9-NEXT:    xscvspdpn v2, v2
+; CHECK-P9-NEXT:    xscvspdpn v3, v3
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    xscvdpsxws f4, f4
+; CHECK-P9-NEXT:    xscvdpsxws f5, f5
+; CHECK-P9-NEXT:    xscvdpsxws f6, f6
+; CHECK-P9-NEXT:    xscvdpsxws f7, f7
+; CHECK-P9-NEXT:    xscvdpsxws f8, f8
+; CHECK-P9-NEXT:    xscvdpsxws f9, f9
+; CHECK-P9-NEXT:    xscvdpsxws f10, f10
+; CHECK-P9-NEXT:    xscvdpsxws f11, f11
+; CHECK-P9-NEXT:    xscvdpsxws f12, f12
+; CHECK-P9-NEXT:    xscvdpsxws f13, f13
+; CHECK-P9-NEXT:    xscvdpsxws v2, v2
+; CHECK-P9-NEXT:    xscvdpsxws v3, v3
+; CHECK-P9-NEXT:    mfvsrwz r4, f3
+; CHECK-P9-NEXT:    mfvsrwz r5, f2
+; CHECK-P9-NEXT:    mfvsrwz r12, f1
+; CHECK-P9-NEXT:    mfvsrwz r0, f0
+; CHECK-P9-NEXT:    mfvsrwz r6, f4
+; CHECK-P9-NEXT:    mfvsrwz r7, f5
+; CHECK-P9-NEXT:    mfvsrwz r8, f6
+; CHECK-P9-NEXT:    mfvsrwz r9, f7
+; CHECK-P9-NEXT:    mfvsrwz r10, f8
+; CHECK-P9-NEXT:    mfvsrwz r11, f9
+; CHECK-P9-NEXT:    mfvsrwz r30, f10
+; CHECK-P9-NEXT:    mfvsrwz r29, f11
+; CHECK-P9-NEXT:    mfvsrwz r28, f12
+; CHECK-P9-NEXT:    mfvsrwz r27, f13
+; CHECK-P9-NEXT:    mfvsrwz r26, v2
+; CHECK-P9-NEXT:    mfvsrwz r25, v3
+; CHECK-P9-NEXT:    mtvsrd f0, r4
+; CHECK-P9-NEXT:    mtvsrd f1, r5
+; CHECK-P9-NEXT:    mtvsrd f8, r12
+; CHECK-P9-NEXT:    mtvsrd f9, r0
+; CHECK-P9-NEXT:    mtvsrd f2, r6
+; CHECK-P9-NEXT:    mtvsrd f3, r7
+; CHECK-P9-NEXT:    mtvsrd f4, r8
+; CHECK-P9-NEXT:    mtvsrd f5, r9
+; CHECK-P9-NEXT:    mtvsrd f6, r10
+; CHECK-P9-NEXT:    mtvsrd f7, r11
+; CHECK-P9-NEXT:    mtvsrd f10, r30
+; CHECK-P9-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrd f11, r29
+; CHECK-P9-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrd f12, r28
+; CHECK-P9-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrd f13, r27
+; CHECK-P9-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrd v2, r26
+; CHECK-P9-NEXT:    ld r26, -48(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrd v3, r25
+; CHECK-P9-NEXT:    ld r25, -56(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    xxswapd v4, vs0
+; CHECK-P9-NEXT:    xxswapd v5, vs2
+; CHECK-P9-NEXT:    xxswapd v0, vs3
+; CHECK-P9-NEXT:    xxswapd v1, vs4
+; CHECK-P9-NEXT:    xxswapd v6, vs5
+; CHECK-P9-NEXT:    xxswapd v7, vs6
+; CHECK-P9-NEXT:    xxswapd v8, vs1
+; CHECK-P9-NEXT:    xxswapd v9, vs7
+; CHECK-P9-NEXT:    xxswapd v10, vs10
+; CHECK-P9-NEXT:    xxswapd v11, vs11
+; CHECK-P9-NEXT:    xxswapd v12, vs8
+; CHECK-P9-NEXT:    xxswapd v13, vs12
+; CHECK-P9-NEXT:    xxswapd v14, vs13
+; CHECK-P9-NEXT:    xxswapd v2, v2
+; CHECK-P9-NEXT:    xxswapd v15, vs9
+; CHECK-P9-NEXT:    xxswapd v3, v3
+; CHECK-P9-NEXT:    vmrglh v5, v0, v5
+; CHECK-P9-NEXT:    vmrglh v4, v4, v1
+; CHECK-P9-NEXT:    vmrglh v0, v7, v6
+; CHECK-P9-NEXT:    vmrglh v1, v8, v9
+; CHECK-P9-NEXT:    vmrglh v6, v11, v10
+; CHECK-P9-NEXT:    vmrglh v7, v12, v13
+; CHECK-P9-NEXT:    vmrglh v2, v2, v14
+; CHECK-P9-NEXT:    vmrglh v3, v15, v3
+; CHECK-P9-NEXT:    vmrglw v4, v4, v5
+; CHECK-P9-NEXT:    vmrglw v5, v1, v0
+; CHECK-P9-NEXT:    vmrglw v0, v7, v6
+; CHECK-P9-NEXT:    vmrglw v2, v3, v2
+; CHECK-P9-NEXT:    xxmrgld vs0, v5, v4
+; CHECK-P9-NEXT:    xxmrgld vs1, v2, v0
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test16elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs2, 0(r4)
+; CHECK-BE-NEXT:    lxv vs3, 16(r4)
+; CHECK-BE-NEXT:    lxv vs0, 32(r4)
+; CHECK-BE-NEXT:    lxv vs1, 48(r4)
+; CHECK-BE-NEXT:    std r25, -56(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r26, -48(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    xxsldwi vs4, vs3, vs3, 3
+; CHECK-BE-NEXT:    xxswapd vs5, vs3
+; CHECK-BE-NEXT:    xxsldwi vs6, vs3, vs3, 1
+; CHECK-BE-NEXT:    xxsldwi vs7, vs2, vs2, 3
+; CHECK-BE-NEXT:    xxswapd vs8, vs2
+; CHECK-BE-NEXT:    xxsldwi vs9, vs2, vs2, 1
+; CHECK-BE-NEXT:    xxsldwi vs10, vs1, vs1, 3
+; CHECK-BE-NEXT:    xxswapd vs11, vs1
+; CHECK-BE-NEXT:    xxsldwi vs12, vs1, vs1, 1
+; CHECK-BE-NEXT:    xxsldwi vs13, vs0, vs0, 3
+; CHECK-BE-NEXT:    xxswapd v2, vs0
+; CHECK-BE-NEXT:    xxsldwi v3, vs0, vs0, 1
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    xscvspdpn f4, vs4
+; CHECK-BE-NEXT:    xscvspdpn f5, vs5
+; CHECK-BE-NEXT:    xscvspdpn f6, vs6
+; CHECK-BE-NEXT:    xscvspdpn f7, vs7
+; CHECK-BE-NEXT:    xscvspdpn f8, vs8
+; CHECK-BE-NEXT:    xscvspdpn f9, vs9
+; CHECK-BE-NEXT:    xscvspdpn f10, vs10
+; CHECK-BE-NEXT:    xscvspdpn f11, vs11
+; CHECK-BE-NEXT:    xscvspdpn f12, vs12
+; CHECK-BE-NEXT:    xscvspdpn f13, vs13
+; CHECK-BE-NEXT:    xscvspdpn v2, v2
+; CHECK-BE-NEXT:    xscvspdpn v3, v3
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    xscvdpsxws f5, f5
+; CHECK-BE-NEXT:    xscvdpsxws f6, f6
+; CHECK-BE-NEXT:    xscvdpsxws f7, f7
+; CHECK-BE-NEXT:    xscvdpsxws f8, f8
+; CHECK-BE-NEXT:    xscvdpsxws f9, f9
+; CHECK-BE-NEXT:    xscvdpsxws f10, f10
+; CHECK-BE-NEXT:    xscvdpsxws f11, f11
+; CHECK-BE-NEXT:    xscvdpsxws f12, f12
+; CHECK-BE-NEXT:    xscvdpsxws f13, f13
+; CHECK-BE-NEXT:    xscvdpsxws v2, v2
+; CHECK-BE-NEXT:    xscvdpsxws v3, v3
+; CHECK-BE-NEXT:    mfvsrwz r4, f3
+; CHECK-BE-NEXT:    mfvsrwz r5, f2
+; CHECK-BE-NEXT:    mfvsrwz r12, f1
+; CHECK-BE-NEXT:    mfvsrwz r0, f0
+; CHECK-BE-NEXT:    mfvsrwz r6, f4
+; CHECK-BE-NEXT:    mfvsrwz r7, f5
+; CHECK-BE-NEXT:    mfvsrwz r8, f6
+; CHECK-BE-NEXT:    mfvsrwz r9, f7
+; CHECK-BE-NEXT:    mfvsrwz r10, f8
+; CHECK-BE-NEXT:    mfvsrwz r11, f9
+; CHECK-BE-NEXT:    mfvsrwz r30, f10
+; CHECK-BE-NEXT:    mfvsrwz r29, f11
+; CHECK-BE-NEXT:    mfvsrwz r28, f12
+; CHECK-BE-NEXT:    mfvsrwz r27, f13
+; CHECK-BE-NEXT:    mfvsrwz r26, v2
+; CHECK-BE-NEXT:    mfvsrwz r25, v3
+; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    sldi r12, r12, 48
+; CHECK-BE-NEXT:    sldi r0, r0, 48
+; CHECK-BE-NEXT:    sldi r6, r6, 48
+; CHECK-BE-NEXT:    sldi r7, r7, 48
+; CHECK-BE-NEXT:    sldi r8, r8, 48
+; CHECK-BE-NEXT:    sldi r9, r9, 48
+; CHECK-BE-NEXT:    sldi r10, r10, 48
+; CHECK-BE-NEXT:    sldi r11, r11, 48
+; CHECK-BE-NEXT:    sldi r30, r30, 48
+; CHECK-BE-NEXT:    sldi r29, r29, 48
+; CHECK-BE-NEXT:    sldi r28, r28, 48
+; CHECK-BE-NEXT:    sldi r27, r27, 48
+; CHECK-BE-NEXT:    sldi r26, r26, 48
+; CHECK-BE-NEXT:    sldi r25, r25, 48
+; CHECK-BE-NEXT:    mtvsrd v2, r4
+; CHECK-BE-NEXT:    mtvsrd v3, r5
+; CHECK-BE-NEXT:    mtvsrd v10, r12
+; CHECK-BE-NEXT:    mtvsrd v14, r0
+; CHECK-BE-NEXT:    mtvsrd v4, r6
+; CHECK-BE-NEXT:    mtvsrd v5, r7
+; CHECK-BE-NEXT:    mtvsrd v0, r8
+; CHECK-BE-NEXT:    mtvsrd v1, r9
+; CHECK-BE-NEXT:    mtvsrd v6, r10
+; CHECK-BE-NEXT:    mtvsrd v7, r11
+; CHECK-BE-NEXT:    mtvsrd v8, r30
+; CHECK-BE-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrd v9, r29
+; CHECK-BE-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrd v11, r28
+; CHECK-BE-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrd v12, r27
+; CHECK-BE-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrd v13, r26
+; CHECK-BE-NEXT:    ld r26, -48(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrd v15, r25
+; CHECK-BE-NEXT:    ld r25, -56(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    vmrghh v4, v5, v4
+; CHECK-BE-NEXT:    vmrghh v2, v2, v0
+; CHECK-BE-NEXT:    vmrghh v5, v6, v1
+; CHECK-BE-NEXT:    vmrghh v3, v3, v7
+; CHECK-BE-NEXT:    vmrghh v0, v9, v8
+; CHECK-BE-NEXT:    vmrghh v1, v10, v11
+; CHECK-BE-NEXT:    vmrghh v6, v13, v12
+; CHECK-BE-NEXT:    vmrghh v7, v14, v15
+; CHECK-BE-NEXT:    vmrghw v2, v2, v4
+; CHECK-BE-NEXT:    vmrghw v3, v3, v5
+; CHECK-BE-NEXT:    vmrghw v4, v1, v0
+; CHECK-BE-NEXT:    vmrghw v5, v7, v6
+; CHECK-BE-NEXT:    xxmrghd vs0, v3, v2
+; CHECK-BE-NEXT:    xxmrghd vs1, v5, v4
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <16 x float>, <16 x float>* %0, align 64
+  %1 = fptoui <16 x float> %a to <16 x i16>
+  store <16 x i16> %1, <16 x i16>* %agg.result, align 32
+  ret void
+}
+
+define i32 @test2elt_signed(i64 %a.coerce) local_unnamed_addr #0 {
+; CHECK-P8-LABEL: test2elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xscvspdpn f0, vs0
+; CHECK-P8-NEXT:    xxsldwi vs1, v2, v2, 3
+; CHECK-P8-NEXT:    xscvspdpn f1, vs1
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    mfvsrwz r4, f0
+; CHECK-P8-NEXT:    mfvsrwz r3, f1
+; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    vmrglh v2, v3, v2
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    mfvsrwz r3, f0
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test2elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xxsldwi vs1, v2, v2, 3
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    mfvsrwz r4, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mtvsrd f1, r4
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    xxswapd v3, vs1
+; CHECK-P9-NEXT:    li r3, 0
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    vmrglh v2, v3, v2
+; CHECK-P9-NEXT:    vextuwrx r3, r3, v2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test2elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    mtvsrd f0, r3
+; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 1
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    mfvsrwz r4, f1
+; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    li r3, 0
+; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    mtvsrd v3, r4
+; CHECK-BE-NEXT:    vmrghh v2, v2, v3
+; CHECK-BE-NEXT:    vextuwlx r3, r3, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = bitcast i64 %a.coerce to <2 x float>
+  %1 = fptosi <2 x float> %0 to <2 x i16>
+  %2 = bitcast <2 x i16> %1 to i32
+  ret i32 %2
+}
+
+define i64 @test4elt_signed(<4 x float> %a) local_unnamed_addr #1 {
+; CHECK-P8-LABEL: test4elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-P8-NEXT:    xscvspdpn f1, v2
+; CHECK-P8-NEXT:    xxswapd vs2, v2
+; CHECK-P8-NEXT:    xxsldwi vs3, v2, v2, 1
+; CHECK-P8-NEXT:    xscvspdpn f0, vs0
+; CHECK-P8-NEXT:    xscvspdpn f2, vs2
+; CHECK-P8-NEXT:    xscvspdpn f3, vs3
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    xscvdpsxws f3, f3
+; CHECK-P8-NEXT:    mfvsrwz r3, f1
+; CHECK-P8-NEXT:    mtvsrd f1, r3
+; CHECK-P8-NEXT:    mfvsrwz r3, f0
+; CHECK-P8-NEXT:    mfvsrwz r4, f2
+; CHECK-P8-NEXT:    xxswapd v4, vs1
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mfvsrwz r3, f3
+; CHECK-P8-NEXT:    mtvsrd f2, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    mtvsrd f3, r3
+; CHECK-P8-NEXT:    xxswapd v3, vs2
+; CHECK-P8-NEXT:    xxswapd v5, vs3
+; CHECK-P8-NEXT:    vmrglh v2, v3, v2
+; CHECK-P8-NEXT:    vmrglh v3, v4, v5
+; CHECK-P8-NEXT:    vmrglw v2, v3, v2
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test4elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-P9-NEXT:    xxswapd vs1, v2
+; CHECK-P9-NEXT:    xxsldwi vs2, v2, v2, 1
+; CHECK-P9-NEXT:    xscvspdpn f3, v2
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    mfvsrwz r5, f3
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mfvsrwz r4, f1
+; CHECK-P9-NEXT:    mfvsrwz r6, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r5
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mtvsrd f1, r4
+; CHECK-P9-NEXT:    mtvsrd f3, r6
+; CHECK-P9-NEXT:    xxswapd v4, vs2
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    xxswapd v3, vs1
+; CHECK-P9-NEXT:    xxswapd v5, vs3
+; CHECK-P9-NEXT:    vmrglh v2, v3, v2
+; CHECK-P9-NEXT:    vmrglh v3, v4, v5
+; CHECK-P9-NEXT:    vmrglw v2, v3, v2
+; CHECK-P9-NEXT:    mfvsrld r3, v2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test4elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-BE-NEXT:    xxswapd vs1, v2
+; CHECK-BE-NEXT:    xxsldwi vs2, v2, v2, 1
+; CHECK-BE-NEXT:    xscvspdpn f3, v2
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mfvsrwz r5, f3
+; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mfvsrwz r4, f1
+; CHECK-BE-NEXT:    mfvsrwz r6, f2
+; CHECK-BE-NEXT:    mtvsrd v4, r5
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    sldi r6, r6, 48
+; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    mtvsrd v3, r4
+; CHECK-BE-NEXT:    mtvsrd v5, r6
+; CHECK-BE-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-NEXT:    vmrghh v3, v4, v5
+; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    mfvsrd r3, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = fptosi <4 x float> %a to <4 x i16>
+  %1 = bitcast <4 x i16> %0 to i64
+  ret i64 %1
+}
+
+define <8 x i16> @test8elt_signed(<8 x float>* nocapture readonly) local_unnamed_addr #2 {
+; CHECK-P8-LABEL: test8elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    lvx v5, r3, r4
+; CHECK-P8-NEXT:    xxswapd vs1, v2
+; CHECK-P8-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-P8-NEXT:    xxsldwi vs2, v5, v5, 3
+; CHECK-P8-NEXT:    xscvspdpn f4, v5
+; CHECK-P8-NEXT:    xxswapd vs3, v5
+; CHECK-P8-NEXT:    xxsldwi vs5, v5, v5, 1
+; CHECK-P8-NEXT:    xscvspdpn f1, vs1
+; CHECK-P8-NEXT:    xscvspdpn f0, vs0
+; CHECK-P8-NEXT:    xscvspdpn f2, vs2
+; CHECK-P8-NEXT:    xscvspdpn f3, vs3
+; CHECK-P8-NEXT:    xscvspdpn f5, vs5
+; CHECK-P8-NEXT:    xscvdpsxws f4, f4
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    xscvdpsxws f3, f3
+; CHECK-P8-NEXT:    xscvdpsxws f5, f5
+; CHECK-P8-NEXT:    mfvsrwz r4, f4
+; CHECK-P8-NEXT:    mfvsrwz r6, f1
+; CHECK-P8-NEXT:    mfvsrwz r5, f0
+; CHECK-P8-NEXT:    mtvsrd f1, r6
+; CHECK-P8-NEXT:    mtvsrd f0, r5
+; CHECK-P8-NEXT:    xxswapd v4, vs1
+; CHECK-P8-NEXT:    xxsldwi vs1, v2, v2, 1
+; CHECK-P8-NEXT:    xxswapd v3, vs0
+; CHECK-P8-NEXT:    xscvspdpn f0, v2
+; CHECK-P8-NEXT:    mtvsrd f4, r4
+; CHECK-P8-NEXT:    xscvspdpn f1, vs1
+; CHECK-P8-NEXT:    mfvsrwz r4, f2
+; CHECK-P8-NEXT:    xxswapd v1, vs4
+; CHECK-P8-NEXT:    vmrglh v2, v4, v3
+; CHECK-P8-NEXT:    mtvsrd f2, r4
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mfvsrwz r4, f5
+; CHECK-P8-NEXT:    xxswapd v5, vs2
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    mfvsrwz r3, f0
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mfvsrwz r3, f1
+; CHECK-P8-NEXT:    xxswapd v3, vs0
+; CHECK-P8-NEXT:    mtvsrd f1, r3
+; CHECK-P8-NEXT:    mfvsrwz r3, f3
+; CHECK-P8-NEXT:    mtvsrd f3, r4
+; CHECK-P8-NEXT:    xxswapd v4, vs1
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    xxswapd v6, vs3
+; CHECK-P8-NEXT:    xxswapd v0, vs0
+; CHECK-P8-NEXT:    vmrglh v3, v3, v4
+; CHECK-P8-NEXT:    vmrglh v4, v0, v5
+; CHECK-P8-NEXT:    vmrglh v5, v1, v6
+; CHECK-P8-NEXT:    vmrglw v2, v3, v2
+; CHECK-P8-NEXT:    vmrglw v3, v5, v4
+; CHECK-P8-NEXT:    xxmrgld v2, v3, v2
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test8elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs0, 16(r3)
+; CHECK-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-P9-NEXT:    xxsldwi vs2, vs1, vs1, 3
+; CHECK-P9-NEXT:    xxswapd vs3, vs1
+; CHECK-P9-NEXT:    xxsldwi vs4, vs1, vs1, 1
+; CHECK-P9-NEXT:    xxsldwi vs5, vs0, vs0, 3
+; CHECK-P9-NEXT:    xxswapd vs6, vs0
+; CHECK-P9-NEXT:    xxsldwi vs7, vs0, vs0, 1
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xscvspdpn f3, vs3
+; CHECK-P9-NEXT:    xscvspdpn f4, vs4
+; CHECK-P9-NEXT:    xscvspdpn f5, vs5
+; CHECK-P9-NEXT:    xscvspdpn f6, vs6
+; CHECK-P9-NEXT:    xscvspdpn f7, vs7
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    xscvdpsxws f4, f4
+; CHECK-P9-NEXT:    xscvdpsxws f5, f5
+; CHECK-P9-NEXT:    xscvdpsxws f6, f6
+; CHECK-P9-NEXT:    xscvdpsxws f7, f7
+; CHECK-P9-NEXT:    mfvsrwz r5, f1
+; CHECK-P9-NEXT:    mfvsrwz r9, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f2
+; CHECK-P9-NEXT:    mfvsrwz r4, f3
+; CHECK-P9-NEXT:    mfvsrwz r6, f4
+; CHECK-P9-NEXT:    mfvsrwz r7, f5
+; CHECK-P9-NEXT:    mfvsrwz r8, f6
+; CHECK-P9-NEXT:    mfvsrwz r10, f7
+; CHECK-P9-NEXT:    mtvsrd f2, r5
+; CHECK-P9-NEXT:    mtvsrd f6, r9
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mtvsrd f1, r4
+; CHECK-P9-NEXT:    mtvsrd f3, r6
+; CHECK-P9-NEXT:    mtvsrd f4, r7
+; CHECK-P9-NEXT:    mtvsrd f5, r8
+; CHECK-P9-NEXT:    mtvsrd f7, r10
+; CHECK-P9-NEXT:    xxswapd v4, vs2
+; CHECK-P9-NEXT:    xxswapd v6, vs6
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    xxswapd v3, vs1
+; CHECK-P9-NEXT:    xxswapd v5, vs3
+; CHECK-P9-NEXT:    xxswapd v0, vs4
+; CHECK-P9-NEXT:    xxswapd v1, vs5
+; CHECK-P9-NEXT:    xxswapd v7, vs7
+; CHECK-P9-NEXT:    vmrglh v2, v3, v2
+; CHECK-P9-NEXT:    vmrglh v3, v4, v5
+; CHECK-P9-NEXT:    vmrglh v4, v1, v0
+; CHECK-P9-NEXT:    vmrglh v5, v6, v7
+; CHECK-P9-NEXT:    vmrglw v2, v3, v2
+; CHECK-P9-NEXT:    vmrglw v3, v5, v4
+; CHECK-P9-NEXT:    xxmrgld v2, v3, v2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test8elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    lxv vs1, 16(r3)
+; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
+; CHECK-BE-NEXT:    xxswapd vs3, vs1
+; CHECK-BE-NEXT:    xxsldwi vs4, vs1, vs1, 1
+; CHECK-BE-NEXT:    xxsldwi vs5, vs0, vs0, 3
+; CHECK-BE-NEXT:    xxswapd vs6, vs0
+; CHECK-BE-NEXT:    xxsldwi vs7, vs0, vs0, 1
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    xscvspdpn f4, vs4
+; CHECK-BE-NEXT:    xscvspdpn f5, vs5
+; CHECK-BE-NEXT:    xscvspdpn f6, vs6
+; CHECK-BE-NEXT:    xscvspdpn f7, vs7
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    xscvdpsxws f5, f5
+; CHECK-BE-NEXT:    xscvdpsxws f6, f6
+; CHECK-BE-NEXT:    xscvdpsxws f7, f7
+; CHECK-BE-NEXT:    mfvsrwz r5, f1
+; CHECK-BE-NEXT:    mfvsrwz r9, f0
+; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    sldi r9, r9, 48
+; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    mfvsrwz r4, f3
+; CHECK-BE-NEXT:    mfvsrwz r6, f4
+; CHECK-BE-NEXT:    mfvsrwz r7, f5
+; CHECK-BE-NEXT:    mfvsrwz r8, f6
+; CHECK-BE-NEXT:    mfvsrwz r10, f7
+; CHECK-BE-NEXT:    mtvsrd v4, r5
+; CHECK-BE-NEXT:    mtvsrd v6, r9
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    sldi r6, r6, 48
+; CHECK-BE-NEXT:    sldi r7, r7, 48
+; CHECK-BE-NEXT:    sldi r8, r8, 48
+; CHECK-BE-NEXT:    sldi r10, r10, 48
+; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    mtvsrd v3, r4
+; CHECK-BE-NEXT:    mtvsrd v5, r6
+; CHECK-BE-NEXT:    mtvsrd v0, r7
+; CHECK-BE-NEXT:    mtvsrd v1, r8
+; CHECK-BE-NEXT:    mtvsrd v7, r10
+; CHECK-BE-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-NEXT:    vmrghh v3, v4, v5
+; CHECK-BE-NEXT:    vmrghh v4, v1, v0
+; CHECK-BE-NEXT:    vmrghh v5, v6, v7
+; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    vmrghw v3, v5, v4
+; CHECK-BE-NEXT:    xxmrghd v2, v3, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <8 x float>, <8 x float>* %0, align 32
+  %1 = fptosi <8 x float> %a to <8 x i16>
+  ret <8 x i16> %1
+}
+
+define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <16 x float>* nocapture readonly) local_unnamed_addr #3 {
+; CHECK-P8-LABEL: test16elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    lvx v5, 0, r4
+; CHECK-P8-NEXT:    li r6, 32
+; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    lvx v2, r4, r6
+; CHECK-P8-NEXT:    lvx v3, r4, r5
+; CHECK-P8-NEXT:    li r6, 48
+; CHECK-P8-NEXT:    xscvspdpn f0, v5
+; CHECK-P8-NEXT:    xxsldwi vs1, v5, v5, 3
+; CHECK-P8-NEXT:    lvx v4, r4, r6
+; CHECK-P8-NEXT:    xscvspdpn f4, v2
+; CHECK-P8-NEXT:    xxsldwi vs5, v5, v5, 1
+; CHECK-P8-NEXT:    xscvspdpn f2, v3
+; CHECK-P8-NEXT:    xxswapd vs3, v5
+; CHECK-P8-NEXT:    xscvspdpn f1, vs1
+; CHECK-P8-NEXT:    xxswapd vs8, v3
+; CHECK-P8-NEXT:    xscvspdpn f6, v4
+; CHECK-P8-NEXT:    xxsldwi vs7, v3, v3, 3
+; CHECK-P8-NEXT:    xscvspdpn f5, vs5
+; CHECK-P8-NEXT:    xxsldwi vs10, v2, v2, 3
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xxsldwi vs9, v3, v3, 1
+; CHECK-P8-NEXT:    xscvspdpn f3, vs3
+; CHECK-P8-NEXT:    xxsldwi vs12, v2, v2, 1
+; CHECK-P8-NEXT:    xscvspdpn f8, vs8
+; CHECK-P8-NEXT:    xxswapd vs11, v2
+; CHECK-P8-NEXT:    xscvdpsxws f4, f4
+; CHECK-P8-NEXT:    xxswapd v2, v4
+; CHECK-P8-NEXT:    xscvspdpn f7, vs7
+; CHECK-P8-NEXT:    xxsldwi vs13, v4, v4, 3
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    xxsldwi v3, v4, v4, 1
+; CHECK-P8-NEXT:    xscvspdpn f10, vs10
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xscvspdpn f9, vs9
+; CHECK-P8-NEXT:    xscvdpsxws f6, f6
+; CHECK-P8-NEXT:    xscvspdpn f12, vs12
+; CHECK-P8-NEXT:    xscvdpsxws f5, f5
+; CHECK-P8-NEXT:    mfvsrwz r4, f0
+; CHECK-P8-NEXT:    xscvspdpn f11, vs11
+; CHECK-P8-NEXT:    xscvdpsxws f3, f3
+; CHECK-P8-NEXT:    xscvspdpn v2, v2
+; CHECK-P8-NEXT:    xscvdpsxws f8, f8
+; CHECK-P8-NEXT:    mtvsrd f0, r4
+; CHECK-P8-NEXT:    mfvsrwz r4, f4
+; CHECK-P8-NEXT:    xscvdpsxws f7, f7
+; CHECK-P8-NEXT:    mfvsrwz r6, f2
+; CHECK-P8-NEXT:    xscvspdpn f13, vs13
+; CHECK-P8-NEXT:    xscvspdpn v3, v3
+; CHECK-P8-NEXT:    xscvdpsxws f10, f10
+; CHECK-P8-NEXT:    mtvsrd f4, r4
+; CHECK-P8-NEXT:    mfvsrwz r4, f1
+; CHECK-P8-NEXT:    xscvdpsxws f9, f9
+; CHECK-P8-NEXT:    mtvsrd f2, r6
+; CHECK-P8-NEXT:    mfvsrwz r6, f6
+; CHECK-P8-NEXT:    xscvdpsxws f12, f12
+; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    mfvsrwz r4, f5
+; CHECK-P8-NEXT:    xscvdpsxws f11, f11
+; CHECK-P8-NEXT:    xxswapd v4, vs1
+; CHECK-P8-NEXT:    mtvsrd f6, r6
+; CHECK-P8-NEXT:    mfvsrwz r6, f3
+; CHECK-P8-NEXT:    xscvdpsxws v2, v2
+; CHECK-P8-NEXT:    xxswapd v9, vs6
+; CHECK-P8-NEXT:    mtvsrd f5, r4
+; CHECK-P8-NEXT:    mfvsrwz r4, f8
+; CHECK-P8-NEXT:    mtvsrd f3, r6
+; CHECK-P8-NEXT:    xxswapd v0, vs5
+; CHECK-P8-NEXT:    mfvsrwz r6, f7
+; CHECK-P8-NEXT:    xscvdpsxws f13, f13
+; CHECK-P8-NEXT:    xxswapd v5, vs3
+; CHECK-P8-NEXT:    xscvdpsxws v3, v3
+; CHECK-P8-NEXT:    mtvsrd f8, r4
+; CHECK-P8-NEXT:    mfvsrwz r4, f10
+; CHECK-P8-NEXT:    mtvsrd f7, r6
+; CHECK-P8-NEXT:    mfvsrwz r6, f9
+; CHECK-P8-NEXT:    mtvsrd f10, r4
+; CHECK-P8-NEXT:    mfvsrwz r4, f12
+; CHECK-P8-NEXT:    mtvsrd f9, r6
+; CHECK-P8-NEXT:    xxswapd v6, vs10
+; CHECK-P8-NEXT:    mfvsrwz r6, f11
+; CHECK-P8-NEXT:    mtvsrd f12, r4
+; CHECK-P8-NEXT:    xxswapd v1, vs9
+; CHECK-P8-NEXT:    mfvsrwz r4, v2
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    mtvsrd f11, r6
+; CHECK-P8-NEXT:    mfvsrwz r6, f13
+; CHECK-P8-NEXT:    mtvsrd f0, r4
+; CHECK-P8-NEXT:    xxswapd v7, vs11
+; CHECK-P8-NEXT:    mfvsrwz r4, v3
+; CHECK-P8-NEXT:    vmrglh v3, v5, v4
+; CHECK-P8-NEXT:    xxswapd v4, vs7
+; CHECK-P8-NEXT:    vmrglh v2, v2, v0
+; CHECK-P8-NEXT:    xxswapd v5, vs8
+; CHECK-P8-NEXT:    xxswapd v0, vs2
+; CHECK-P8-NEXT:    mtvsrd f13, r6
+; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    xxswapd v8, vs0
+; CHECK-P8-NEXT:    vmrglh v4, v5, v4
+; CHECK-P8-NEXT:    vmrglh v5, v0, v1
+; CHECK-P8-NEXT:    xxswapd v1, vs4
+; CHECK-P8-NEXT:    vmrglh v0, v7, v6
+; CHECK-P8-NEXT:    xxswapd v6, vs12
+; CHECK-P8-NEXT:    xxswapd v7, vs13
+; CHECK-P8-NEXT:    xxswapd v10, vs1
+; CHECK-P8-NEXT:    vmrglw v2, v2, v3
+; CHECK-P8-NEXT:    vmrglh v1, v1, v6
+; CHECK-P8-NEXT:    vmrglh v6, v8, v7
+; CHECK-P8-NEXT:    vmrglh v7, v9, v10
+; CHECK-P8-NEXT:    vmrglw v3, v5, v4
+; CHECK-P8-NEXT:    vmrglw v4, v1, v0
+; CHECK-P8-NEXT:    vmrglw v5, v7, v6
+; CHECK-P8-NEXT:    xxmrgld v2, v3, v2
+; CHECK-P8-NEXT:    stvx v2, 0, r3
+; CHECK-P8-NEXT:    xxmrgld v3, v5, v4
+; CHECK-P8-NEXT:    stvx v3, r3, r5
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test16elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs2, 16(r4)
+; CHECK-P9-NEXT:    lxv vs3, 0(r4)
+; CHECK-P9-NEXT:    lxv vs0, 48(r4)
+; CHECK-P9-NEXT:    lxv vs1, 32(r4)
+; CHECK-P9-NEXT:    std r25, -56(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r26, -48(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    xxsldwi vs4, vs3, vs3, 3
+; CHECK-P9-NEXT:    xxswapd vs5, vs3
+; CHECK-P9-NEXT:    xxsldwi vs6, vs3, vs3, 1
+; CHECK-P9-NEXT:    xxsldwi vs7, vs2, vs2, 3
+; CHECK-P9-NEXT:    xxswapd vs8, vs2
+; CHECK-P9-NEXT:    xxsldwi vs9, vs2, vs2, 1
+; CHECK-P9-NEXT:    xxsldwi vs10, vs1, vs1, 3
+; CHECK-P9-NEXT:    xxswapd vs11, vs1
+; CHECK-P9-NEXT:    xxsldwi vs12, vs1, vs1, 1
+; CHECK-P9-NEXT:    xxsldwi vs13, vs0, vs0, 3
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    xxsldwi v3, vs0, vs0, 1
+; CHECK-P9-NEXT:    xscvspdpn f3, vs3
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvspdpn f4, vs4
+; CHECK-P9-NEXT:    xscvspdpn f5, vs5
+; CHECK-P9-NEXT:    xscvspdpn f6, vs6
+; CHECK-P9-NEXT:    xscvspdpn f7, vs7
+; CHECK-P9-NEXT:    xscvspdpn f8, vs8
+; CHECK-P9-NEXT:    xscvspdpn f9, vs9
+; CHECK-P9-NEXT:    xscvspdpn f10, vs10
+; CHECK-P9-NEXT:    xscvspdpn f11, vs11
+; CHECK-P9-NEXT:    xscvspdpn f12, vs12
+; CHECK-P9-NEXT:    xscvspdpn f13, vs13
+; CHECK-P9-NEXT:    xscvspdpn v2, v2
+; CHECK-P9-NEXT:    xscvspdpn v3, v3
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    xscvdpsxws f4, f4
+; CHECK-P9-NEXT:    xscvdpsxws f5, f5
+; CHECK-P9-NEXT:    xscvdpsxws f6, f6
+; CHECK-P9-NEXT:    xscvdpsxws f7, f7
+; CHECK-P9-NEXT:    xscvdpsxws f8, f8
+; CHECK-P9-NEXT:    xscvdpsxws f9, f9
+; CHECK-P9-NEXT:    xscvdpsxws f10, f10
+; CHECK-P9-NEXT:    xscvdpsxws f11, f11
+; CHECK-P9-NEXT:    xscvdpsxws f12, f12
+; CHECK-P9-NEXT:    xscvdpsxws f13, f13
+; CHECK-P9-NEXT:    xscvdpsxws v2, v2
+; CHECK-P9-NEXT:    xscvdpsxws v3, v3
+; CHECK-P9-NEXT:    mfvsrwz r4, f3
+; CHECK-P9-NEXT:    mfvsrwz r5, f2
+; CHECK-P9-NEXT:    mfvsrwz r12, f1
+; CHECK-P9-NEXT:    mfvsrwz r0, f0
+; CHECK-P9-NEXT:    mfvsrwz r6, f4
+; CHECK-P9-NEXT:    mfvsrwz r7, f5
+; CHECK-P9-NEXT:    mfvsrwz r8, f6
+; CHECK-P9-NEXT:    mfvsrwz r9, f7
+; CHECK-P9-NEXT:    mfvsrwz r10, f8
+; CHECK-P9-NEXT:    mfvsrwz r11, f9
+; CHECK-P9-NEXT:    mfvsrwz r30, f10
+; CHECK-P9-NEXT:    mfvsrwz r29, f11
+; CHECK-P9-NEXT:    mfvsrwz r28, f12
+; CHECK-P9-NEXT:    mfvsrwz r27, f13
+; CHECK-P9-NEXT:    mfvsrwz r26, v2
+; CHECK-P9-NEXT:    mfvsrwz r25, v3
+; CHECK-P9-NEXT:    mtvsrd f0, r4
+; CHECK-P9-NEXT:    mtvsrd f1, r5
+; CHECK-P9-NEXT:    mtvsrd f8, r12
+; CHECK-P9-NEXT:    mtvsrd f9, r0
+; CHECK-P9-NEXT:    mtvsrd f2, r6
+; CHECK-P9-NEXT:    mtvsrd f3, r7
+; CHECK-P9-NEXT:    mtvsrd f4, r8
+; CHECK-P9-NEXT:    mtvsrd f5, r9
+; CHECK-P9-NEXT:    mtvsrd f6, r10
+; CHECK-P9-NEXT:    mtvsrd f7, r11
+; CHECK-P9-NEXT:    mtvsrd f10, r30
+; CHECK-P9-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrd f11, r29
+; CHECK-P9-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrd f12, r28
+; CHECK-P9-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrd f13, r27
+; CHECK-P9-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrd v2, r26
+; CHECK-P9-NEXT:    ld r26, -48(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrd v3, r25
+; CHECK-P9-NEXT:    ld r25, -56(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    xxswapd v4, vs0
+; CHECK-P9-NEXT:    xxswapd v5, vs2
+; CHECK-P9-NEXT:    xxswapd v0, vs3
+; CHECK-P9-NEXT:    xxswapd v1, vs4
+; CHECK-P9-NEXT:    xxswapd v6, vs5
+; CHECK-P9-NEXT:    xxswapd v7, vs6
+; CHECK-P9-NEXT:    xxswapd v8, vs1
+; CHECK-P9-NEXT:    xxswapd v9, vs7
+; CHECK-P9-NEXT:    xxswapd v10, vs10
+; CHECK-P9-NEXT:    xxswapd v11, vs11
+; CHECK-P9-NEXT:    xxswapd v12, vs8
+; CHECK-P9-NEXT:    xxswapd v13, vs12
+; CHECK-P9-NEXT:    xxswapd v14, vs13
+; CHECK-P9-NEXT:    xxswapd v2, v2
+; CHECK-P9-NEXT:    xxswapd v15, vs9
+; CHECK-P9-NEXT:    xxswapd v3, v3
+; CHECK-P9-NEXT:    vmrglh v5, v0, v5
+; CHECK-P9-NEXT:    vmrglh v4, v4, v1
+; CHECK-P9-NEXT:    vmrglh v0, v7, v6
+; CHECK-P9-NEXT:    vmrglh v1, v8, v9
+; CHECK-P9-NEXT:    vmrglh v6, v11, v10
+; CHECK-P9-NEXT:    vmrglh v7, v12, v13
+; CHECK-P9-NEXT:    vmrglh v2, v2, v14
+; CHECK-P9-NEXT:    vmrglh v3, v15, v3
+; CHECK-P9-NEXT:    vmrglw v4, v4, v5
+; CHECK-P9-NEXT:    vmrglw v5, v1, v0
+; CHECK-P9-NEXT:    vmrglw v0, v7, v6
+; CHECK-P9-NEXT:    vmrglw v2, v3, v2
+; CHECK-P9-NEXT:    xxmrgld vs0, v5, v4
+; CHECK-P9-NEXT:    xxmrgld vs1, v2, v0
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test16elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs2, 0(r4)
+; CHECK-BE-NEXT:    lxv vs3, 16(r4)
+; CHECK-BE-NEXT:    lxv vs0, 32(r4)
+; CHECK-BE-NEXT:    lxv vs1, 48(r4)
+; CHECK-BE-NEXT:    std r25, -56(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r26, -48(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    xxsldwi vs4, vs3, vs3, 3
+; CHECK-BE-NEXT:    xxswapd vs5, vs3
+; CHECK-BE-NEXT:    xxsldwi vs6, vs3, vs3, 1
+; CHECK-BE-NEXT:    xxsldwi vs7, vs2, vs2, 3
+; CHECK-BE-NEXT:    xxswapd vs8, vs2
+; CHECK-BE-NEXT:    xxsldwi vs9, vs2, vs2, 1
+; CHECK-BE-NEXT:    xxsldwi vs10, vs1, vs1, 3
+; CHECK-BE-NEXT:    xxswapd vs11, vs1
+; CHECK-BE-NEXT:    xxsldwi vs12, vs1, vs1, 1
+; CHECK-BE-NEXT:    xxsldwi vs13, vs0, vs0, 3
+; CHECK-BE-NEXT:    xxswapd v2, vs0
+; CHECK-BE-NEXT:    xxsldwi v3, vs0, vs0, 1
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    xscvspdpn f4, vs4
+; CHECK-BE-NEXT:    xscvspdpn f5, vs5
+; CHECK-BE-NEXT:    xscvspdpn f6, vs6
+; CHECK-BE-NEXT:    xscvspdpn f7, vs7
+; CHECK-BE-NEXT:    xscvspdpn f8, vs8
+; CHECK-BE-NEXT:    xscvspdpn f9, vs9
+; CHECK-BE-NEXT:    xscvspdpn f10, vs10
+; CHECK-BE-NEXT:    xscvspdpn f11, vs11
+; CHECK-BE-NEXT:    xscvspdpn f12, vs12
+; CHECK-BE-NEXT:    xscvspdpn f13, vs13
+; CHECK-BE-NEXT:    xscvspdpn v2, v2
+; CHECK-BE-NEXT:    xscvspdpn v3, v3
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    xscvdpsxws f5, f5
+; CHECK-BE-NEXT:    xscvdpsxws f6, f6
+; CHECK-BE-NEXT:    xscvdpsxws f7, f7
+; CHECK-BE-NEXT:    xscvdpsxws f8, f8
+; CHECK-BE-NEXT:    xscvdpsxws f9, f9
+; CHECK-BE-NEXT:    xscvdpsxws f10, f10
+; CHECK-BE-NEXT:    xscvdpsxws f11, f11
+; CHECK-BE-NEXT:    xscvdpsxws f12, f12
+; CHECK-BE-NEXT:    xscvdpsxws f13, f13
+; CHECK-BE-NEXT:    xscvdpsxws v2, v2
+; CHECK-BE-NEXT:    xscvdpsxws v3, v3
+; CHECK-BE-NEXT:    mfvsrwz r4, f3
+; CHECK-BE-NEXT:    mfvsrwz r5, f2
+; CHECK-BE-NEXT:    mfvsrwz r12, f1
+; CHECK-BE-NEXT:    mfvsrwz r0, f0
+; CHECK-BE-NEXT:    mfvsrwz r6, f4
+; CHECK-BE-NEXT:    mfvsrwz r7, f5
+; CHECK-BE-NEXT:    mfvsrwz r8, f6
+; CHECK-BE-NEXT:    mfvsrwz r9, f7
+; CHECK-BE-NEXT:    mfvsrwz r10, f8
+; CHECK-BE-NEXT:    mfvsrwz r11, f9
+; CHECK-BE-NEXT:    mfvsrwz r30, f10
+; CHECK-BE-NEXT:    mfvsrwz r29, f11
+; CHECK-BE-NEXT:    mfvsrwz r28, f12
+; CHECK-BE-NEXT:    mfvsrwz r27, f13
+; CHECK-BE-NEXT:    mfvsrwz r26, v2
+; CHECK-BE-NEXT:    mfvsrwz r25, v3
+; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    sldi r12, r12, 48
+; CHECK-BE-NEXT:    sldi r0, r0, 48
+; CHECK-BE-NEXT:    sldi r6, r6, 48
+; CHECK-BE-NEXT:    sldi r7, r7, 48
+; CHECK-BE-NEXT:    sldi r8, r8, 48
+; CHECK-BE-NEXT:    sldi r9, r9, 48
+; CHECK-BE-NEXT:    sldi r10, r10, 48
+; CHECK-BE-NEXT:    sldi r11, r11, 48
+; CHECK-BE-NEXT:    sldi r30, r30, 48
+; CHECK-BE-NEXT:    sldi r29, r29, 48
+; CHECK-BE-NEXT:    sldi r28, r28, 48
+; CHECK-BE-NEXT:    sldi r27, r27, 48
+; CHECK-BE-NEXT:    sldi r26, r26, 48
+; CHECK-BE-NEXT:    sldi r25, r25, 48
+; CHECK-BE-NEXT:    mtvsrd v2, r4
+; CHECK-BE-NEXT:    mtvsrd v3, r5
+; CHECK-BE-NEXT:    mtvsrd v10, r12
+; CHECK-BE-NEXT:    mtvsrd v14, r0
+; CHECK-BE-NEXT:    mtvsrd v4, r6
+; CHECK-BE-NEXT:    mtvsrd v5, r7
+; CHECK-BE-NEXT:    mtvsrd v0, r8
+; CHECK-BE-NEXT:    mtvsrd v1, r9
+; CHECK-BE-NEXT:    mtvsrd v6, r10
+; CHECK-BE-NEXT:    mtvsrd v7, r11
+; CHECK-BE-NEXT:    mtvsrd v8, r30
+; CHECK-BE-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrd v9, r29
+; CHECK-BE-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrd v11, r28
+; CHECK-BE-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrd v12, r27
+; CHECK-BE-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrd v13, r26
+; CHECK-BE-NEXT:    ld r26, -48(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrd v15, r25
+; CHECK-BE-NEXT:    ld r25, -56(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    vmrghh v4, v5, v4
+; CHECK-BE-NEXT:    vmrghh v2, v2, v0
+; CHECK-BE-NEXT:    vmrghh v5, v6, v1
+; CHECK-BE-NEXT:    vmrghh v3, v3, v7
+; CHECK-BE-NEXT:    vmrghh v0, v9, v8
+; CHECK-BE-NEXT:    vmrghh v1, v10, v11
+; CHECK-BE-NEXT:    vmrghh v6, v13, v12
+; CHECK-BE-NEXT:    vmrghh v7, v14, v15
+; CHECK-BE-NEXT:    vmrghw v2, v2, v4
+; CHECK-BE-NEXT:    vmrghw v3, v3, v5
+; CHECK-BE-NEXT:    vmrghw v4, v1, v0
+; CHECK-BE-NEXT:    vmrghw v5, v7, v6
+; CHECK-BE-NEXT:    xxmrghd vs0, v3, v2
+; CHECK-BE-NEXT:    xxmrghd vs1, v5, v4
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <16 x float>, <16 x float>* %0, align 64
+  %1 = fptosi <16 x float> %a to <16 x i16>
+  store <16 x i16> %1, <16 x i16>* %agg.result, align 32
+  ret void
+}

Added: llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll?rev=347090&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll Fri Nov 16 12:24:10 2018
@@ -0,0 +1,846 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:     -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-P8
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-P9
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-BE
+
+define <2 x i64> @test2elt(i64 %a.coerce) local_unnamed_addr #0 {
+; CHECK-P8-LABEL: test2elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xscvspdpn f0, vs0
+; CHECK-P8-NEXT:    xxsldwi vs1, v2, v2, 3
+; CHECK-P8-NEXT:    xscvspdpn f1, vs1
+; CHECK-P8-NEXT:    xxmrghd vs0, vs0, vs1
+; CHECK-P8-NEXT:    xvcvdpuxds v2, vs0
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test2elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xxsldwi vs1, v2, v2, 3
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xxmrghd vs0, vs0, vs1
+; CHECK-P9-NEXT:    xvcvdpuxds v2, vs0
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test2elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    mtvsrd f0, r3
+; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 1
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xxmrghd vs0, vs0, vs1
+; CHECK-BE-NEXT:    xvcvdpuxds v2, vs0
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = bitcast i64 %a.coerce to <2 x float>
+  %1 = fptoui <2 x float> %0 to <2 x i64>
+  ret <2 x i64> %1
+}
+
+define void @test4elt(<4 x i64>* noalias nocapture sret %agg.result, <4 x float> %a) local_unnamed_addr #1 {
+; CHECK-P8-LABEL: test4elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-P8-NEXT:    xxswapd vs1, v2
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    xxsldwi vs3, v2, v2, 1
+; CHECK-P8-NEXT:    xscvspdpn f2, v2
+; CHECK-P8-NEXT:    xscvspdpn f0, vs0
+; CHECK-P8-NEXT:    xscvspdpn f1, vs1
+; CHECK-P8-NEXT:    xscvspdpn f3, vs3
+; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P8-NEXT:    xxmrghd vs1, vs2, vs3
+; CHECK-P8-NEXT:    xvcvdpuxds v2, vs0
+; CHECK-P8-NEXT:    xvcvdpuxds v3, vs1
+; CHECK-P8-NEXT:    xxswapd vs1, v2
+; CHECK-P8-NEXT:    xxswapd vs0, v3
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r4
+; CHECK-P8-NEXT:    stxvd2x vs1, 0, r3
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test4elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-P9-NEXT:    xxswapd vs1, v2
+; CHECK-P9-NEXT:    xxsldwi vs2, v2, v2, 1
+; CHECK-P9-NEXT:    xscvspdpn f3, v2
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P9-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-P9-NEXT:    xvcvdpuxds vs0, vs0
+; CHECK-P9-NEXT:    xvcvdpuxds vs1, vs1
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test4elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 1
+; CHECK-BE-NEXT:    xxsldwi vs1, v2, v2, 3
+; CHECK-BE-NEXT:    xxswapd vs2, v2
+; CHECK-BE-NEXT:    xscvspdpn f3, v2
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    xxmrghd vs0, vs3, vs0
+; CHECK-BE-NEXT:    xxmrghd vs1, vs2, vs1
+; CHECK-BE-NEXT:    xvcvdpuxds vs0, vs0
+; CHECK-BE-NEXT:    xvcvdpuxds vs1, vs1
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = fptoui <4 x float> %a to <4 x i64>
+  store <4 x i64> %0, <4 x i64>* %agg.result, align 32
+  ret void
+}
+
+define void @test8elt(<8 x i64>* noalias nocapture sret %agg.result, <8 x float>* nocapture readonly) local_unnamed_addr #2 {
+; CHECK-P8-LABEL: test8elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    li r6, 32
+; CHECK-P8-NEXT:    lvx v2, r4, r5
+; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    xxsldwi vs5, v3, v3, 3
+; CHECK-P8-NEXT:    xxswapd vs6, v3
+; CHECK-P8-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-P8-NEXT:    xxswapd vs1, v2
+; CHECK-P8-NEXT:    xxsldwi vs3, v2, v2, 1
+; CHECK-P8-NEXT:    xxsldwi vs7, v3, v3, 1
+; CHECK-P8-NEXT:    xscvspdpn f2, v2
+; CHECK-P8-NEXT:    xscvspdpn f4, v3
+; CHECK-P8-NEXT:    xscvspdpn f0, vs0
+; CHECK-P8-NEXT:    xscvspdpn f1, vs1
+; CHECK-P8-NEXT:    xscvspdpn f3, vs3
+; CHECK-P8-NEXT:    xscvspdpn f5, vs5
+; CHECK-P8-NEXT:    xscvspdpn f6, vs6
+; CHECK-P8-NEXT:    xscvspdpn f7, vs7
+; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P8-NEXT:    xxmrghd vs1, vs2, vs3
+; CHECK-P8-NEXT:    xxmrghd vs2, vs6, vs5
+; CHECK-P8-NEXT:    xvcvdpuxds v2, vs0
+; CHECK-P8-NEXT:    xxmrghd vs3, vs4, vs7
+; CHECK-P8-NEXT:    xvcvdpuxds v3, vs1
+; CHECK-P8-NEXT:    xvcvdpuxds v4, vs2
+; CHECK-P8-NEXT:    xvcvdpuxds v5, vs3
+; CHECK-P8-NEXT:    xxswapd vs1, v2
+; CHECK-P8-NEXT:    xxswapd vs0, v3
+; CHECK-P8-NEXT:    xxswapd vs3, v4
+; CHECK-P8-NEXT:    xxswapd vs2, v5
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r4
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r6
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs3, 0, r3
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test8elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs0, 16(r4)
+; CHECK-P9-NEXT:    lxv vs1, 0(r4)
+; CHECK-P9-NEXT:    xxsldwi vs2, vs1, vs1, 3
+; CHECK-P9-NEXT:    xxswapd vs3, vs1
+; CHECK-P9-NEXT:    xxsldwi vs4, vs1, vs1, 1
+; CHECK-P9-NEXT:    xxsldwi vs5, vs0, vs0, 3
+; CHECK-P9-NEXT:    xxswapd vs6, vs0
+; CHECK-P9-NEXT:    xxsldwi vs7, vs0, vs0, 1
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xscvspdpn f3, vs3
+; CHECK-P9-NEXT:    xscvspdpn f4, vs4
+; CHECK-P9-NEXT:    xscvspdpn f5, vs5
+; CHECK-P9-NEXT:    xscvspdpn f6, vs6
+; CHECK-P9-NEXT:    xscvspdpn f7, vs7
+; CHECK-P9-NEXT:    xxmrghd vs2, vs3, vs2
+; CHECK-P9-NEXT:    xxmrghd vs1, vs1, vs4
+; CHECK-P9-NEXT:    xxmrghd vs3, vs6, vs5
+; CHECK-P9-NEXT:    xxmrghd vs0, vs0, vs7
+; CHECK-P9-NEXT:    xvcvdpuxds vs2, vs2
+; CHECK-P9-NEXT:    xvcvdpuxds vs1, vs1
+; CHECK-P9-NEXT:    xvcvdpuxds vs3, vs3
+; CHECK-P9-NEXT:    xvcvdpuxds vs0, vs0
+; CHECK-P9-NEXT:    stxv vs0, 48(r3)
+; CHECK-P9-NEXT:    stxv vs3, 32(r3)
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    stxv vs2, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test8elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs0, 16(r4)
+; CHECK-BE-NEXT:    lxv vs1, 0(r4)
+; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 1
+; CHECK-BE-NEXT:    xxsldwi vs3, vs1, vs1, 3
+; CHECK-BE-NEXT:    xxswapd vs4, vs1
+; CHECK-BE-NEXT:    xxsldwi vs5, vs0, vs0, 1
+; CHECK-BE-NEXT:    xxsldwi vs6, vs0, vs0, 3
+; CHECK-BE-NEXT:    xxswapd vs7, vs0
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    xscvspdpn f4, vs4
+; CHECK-BE-NEXT:    xscvspdpn f5, vs5
+; CHECK-BE-NEXT:    xscvspdpn f6, vs6
+; CHECK-BE-NEXT:    xscvspdpn f7, vs7
+; CHECK-BE-NEXT:    xxmrghd vs1, vs1, vs2
+; CHECK-BE-NEXT:    xxmrghd vs2, vs4, vs3
+; CHECK-BE-NEXT:    xxmrghd vs0, vs0, vs5
+; CHECK-BE-NEXT:    xxmrghd vs3, vs7, vs6
+; CHECK-BE-NEXT:    xvcvdpuxds vs1, vs1
+; CHECK-BE-NEXT:    xvcvdpuxds vs2, vs2
+; CHECK-BE-NEXT:    xvcvdpuxds vs0, vs0
+; CHECK-BE-NEXT:    xvcvdpuxds vs3, vs3
+; CHECK-BE-NEXT:    stxv vs3, 48(r3)
+; CHECK-BE-NEXT:    stxv vs0, 32(r3)
+; CHECK-BE-NEXT:    stxv vs2, 16(r3)
+; CHECK-BE-NEXT:    stxv vs1, 0(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <8 x float>, <8 x float>* %0, align 32
+  %1 = fptoui <8 x float> %a to <8 x i64>
+  store <8 x i64> %1, <8 x i64>* %agg.result, align 64
+  ret void
+}
+
+define void @test16elt(<16 x i64>* noalias nocapture sret %agg.result, <16 x float>* nocapture readonly) local_unnamed_addr #2 {
+; CHECK-P8-LABEL: test16elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    li r7, 48
+; CHECK-P8-NEXT:    li r6, 32
+; CHECK-P8-NEXT:    lvx v4, 0, r4
+; CHECK-P8-NEXT:    li r8, 64
+; CHECK-P8-NEXT:    lvx v5, r4, r5
+; CHECK-P8-NEXT:    lvx v3, r4, r7
+; CHECK-P8-NEXT:    lvx v2, r4, r6
+; CHECK-P8-NEXT:    li r4, 112
+; CHECK-P8-NEXT:    xxsldwi vs13, v4, v4, 3
+; CHECK-P8-NEXT:    xscvspdpn f6, v4
+; CHECK-P8-NEXT:    xxsldwi vs1, v5, v5, 3
+; CHECK-P8-NEXT:    xxswapd vs3, v5
+; CHECK-P8-NEXT:    xxsldwi vs9, v3, v3, 1
+; CHECK-P8-NEXT:    xscvspdpn f4, v3
+; CHECK-P8-NEXT:    xxsldwi vs5, v5, v5, 1
+; CHECK-P8-NEXT:    xxsldwi vs10, v3, v3, 3
+; CHECK-P8-NEXT:    xscvspdpn f1, vs1
+; CHECK-P8-NEXT:    xxswapd vs11, v3
+; CHECK-P8-NEXT:    xscvspdpn f3, vs3
+; CHECK-P8-NEXT:    xxsldwi vs7, v2, v2, 3
+; CHECK-P8-NEXT:    xscvspdpn f9, vs9
+; CHECK-P8-NEXT:    xxswapd vs8, v2
+; CHECK-P8-NEXT:    xscvspdpn f0, v5
+; CHECK-P8-NEXT:    xxsldwi vs12, v2, v2, 1
+; CHECK-P8-NEXT:    xscvspdpn f2, v2
+; CHECK-P8-NEXT:    xxswapd v2, v4
+; CHECK-P8-NEXT:    xscvspdpn f5, vs5
+; CHECK-P8-NEXT:    xxsldwi v3, v4, v4, 1
+; CHECK-P8-NEXT:    xscvspdpn f10, vs10
+; CHECK-P8-NEXT:    xscvspdpn f11, vs11
+; CHECK-P8-NEXT:    xxmrghd vs1, vs3, vs1
+; CHECK-P8-NEXT:    xscvspdpn f7, vs7
+; CHECK-P8-NEXT:    xxmrghd vs4, vs4, vs9
+; CHECK-P8-NEXT:    xscvspdpn f8, vs8
+; CHECK-P8-NEXT:    xscvspdpn f12, vs12
+; CHECK-P8-NEXT:    xscvspdpn f13, vs13
+; CHECK-P8-NEXT:    xxmrghd vs0, vs0, vs5
+; CHECK-P8-NEXT:    xscvspdpn f3, v2
+; CHECK-P8-NEXT:    xscvspdpn f9, v3
+; CHECK-P8-NEXT:    xxmrghd vs5, vs11, vs10
+; CHECK-P8-NEXT:    xvcvdpuxds v3, vs4
+; CHECK-P8-NEXT:    xvcvdpuxds v2, vs1
+; CHECK-P8-NEXT:    xxmrghd vs1, vs2, vs12
+; CHECK-P8-NEXT:    xxmrghd vs2, vs8, vs7
+; CHECK-P8-NEXT:    xvcvdpuxds v4, vs0
+; CHECK-P8-NEXT:    xxmrghd vs0, vs3, vs13
+; CHECK-P8-NEXT:    xvcvdpuxds v5, vs5
+; CHECK-P8-NEXT:    xxmrghd vs3, vs6, vs9
+; CHECK-P8-NEXT:    xvcvdpuxds v0, vs1
+; CHECK-P8-NEXT:    xvcvdpuxds v1, vs2
+; CHECK-P8-NEXT:    xvcvdpuxds v6, vs0
+; CHECK-P8-NEXT:    xxswapd vs0, v3
+; CHECK-P8-NEXT:    xvcvdpuxds v7, vs3
+; CHECK-P8-NEXT:    xxswapd vs4, v2
+; CHECK-P8-NEXT:    xxswapd vs3, v4
+; CHECK-P8-NEXT:    xxswapd vs1, v5
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r4
+; CHECK-P8-NEXT:    li r4, 96
+; CHECK-P8-NEXT:    xxswapd vs2, v0
+; CHECK-P8-NEXT:    xxswapd vs0, v1
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    xxswapd vs5, v6
+; CHECK-P8-NEXT:    li r4, 80
+; CHECK-P8-NEXT:    xxswapd vs1, v7
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r8
+; CHECK-P8-NEXT:    stxvd2x vs3, r3, r7
+; CHECK-P8-NEXT:    stxvd2x vs4, r3, r6
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs5, 0, r3
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test16elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs0, 16(r4)
+; CHECK-P9-NEXT:    lxv vs1, 0(r4)
+; CHECK-P9-NEXT:    lxv vs2, 48(r4)
+; CHECK-P9-NEXT:    lxv vs3, 32(r4)
+; CHECK-P9-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    xxsldwi vs4, vs1, vs1, 3
+; CHECK-P9-NEXT:    xxswapd vs5, vs1
+; CHECK-P9-NEXT:    xxsldwi vs6, vs1, vs1, 1
+; CHECK-P9-NEXT:    xxsldwi vs7, vs0, vs0, 3
+; CHECK-P9-NEXT:    xxswapd vs8, vs0
+; CHECK-P9-NEXT:    xxsldwi vs9, vs0, vs0, 1
+; CHECK-P9-NEXT:    xxsldwi vs10, vs3, vs3, 3
+; CHECK-P9-NEXT:    xxswapd vs11, vs3
+; CHECK-P9-NEXT:    xxsldwi vs12, vs3, vs3, 1
+; CHECK-P9-NEXT:    xxsldwi vs13, vs2, vs2, 3
+; CHECK-P9-NEXT:    xxswapd v2, vs2
+; CHECK-P9-NEXT:    xxsldwi v3, vs2, vs2, 1
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvspdpn f3, vs3
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xscvspdpn f4, vs4
+; CHECK-P9-NEXT:    xscvspdpn f5, vs5
+; CHECK-P9-NEXT:    xscvspdpn f6, vs6
+; CHECK-P9-NEXT:    xscvspdpn f7, vs7
+; CHECK-P9-NEXT:    xscvspdpn f8, vs8
+; CHECK-P9-NEXT:    xscvspdpn f9, vs9
+; CHECK-P9-NEXT:    xscvspdpn f10, vs10
+; CHECK-P9-NEXT:    xscvspdpn f11, vs11
+; CHECK-P9-NEXT:    xscvspdpn f12, vs12
+; CHECK-P9-NEXT:    xscvspdpn f13, vs13
+; CHECK-P9-NEXT:    xscvspdpn f31, v2
+; CHECK-P9-NEXT:    xscvspdpn f30, v3
+; CHECK-P9-NEXT:    xxmrghd vs4, vs5, vs4
+; CHECK-P9-NEXT:    xxmrghd vs1, vs1, vs6
+; CHECK-P9-NEXT:    xxmrghd vs5, vs8, vs7
+; CHECK-P9-NEXT:    xxmrghd vs0, vs0, vs9
+; CHECK-P9-NEXT:    xxmrghd vs6, vs11, vs10
+; CHECK-P9-NEXT:    xxmrghd vs3, vs3, vs12
+; CHECK-P9-NEXT:    xxmrghd vs7, vs31, vs13
+; CHECK-P9-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    xxmrghd vs2, vs2, vs30
+; CHECK-P9-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    xvcvdpuxds vs4, vs4
+; CHECK-P9-NEXT:    xvcvdpuxds vs1, vs1
+; CHECK-P9-NEXT:    xvcvdpuxds vs5, vs5
+; CHECK-P9-NEXT:    xvcvdpuxds vs0, vs0
+; CHECK-P9-NEXT:    xvcvdpuxds vs6, vs6
+; CHECK-P9-NEXT:    xvcvdpuxds vs3, vs3
+; CHECK-P9-NEXT:    xvcvdpuxds vs7, vs7
+; CHECK-P9-NEXT:    xvcvdpuxds vs2, vs2
+; CHECK-P9-NEXT:    stxv vs0, 48(r3)
+; CHECK-P9-NEXT:    stxv vs5, 32(r3)
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    stxv vs4, 0(r3)
+; CHECK-P9-NEXT:    stxv vs2, 112(r3)
+; CHECK-P9-NEXT:    stxv vs7, 96(r3)
+; CHECK-P9-NEXT:    stxv vs3, 80(r3)
+; CHECK-P9-NEXT:    stxv vs6, 64(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test16elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs0, 16(r4)
+; CHECK-BE-NEXT:    lxv vs1, 0(r4)
+; CHECK-BE-NEXT:    lxv vs2, 48(r4)
+; CHECK-BE-NEXT:    lxv vs3, 32(r4)
+; CHECK-BE-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    xxsldwi vs4, vs1, vs1, 1
+; CHECK-BE-NEXT:    xxsldwi vs5, vs1, vs1, 3
+; CHECK-BE-NEXT:    xxswapd vs6, vs1
+; CHECK-BE-NEXT:    xxsldwi vs7, vs0, vs0, 1
+; CHECK-BE-NEXT:    xxsldwi vs8, vs0, vs0, 3
+; CHECK-BE-NEXT:    xxswapd vs9, vs0
+; CHECK-BE-NEXT:    xxsldwi vs10, vs3, vs3, 1
+; CHECK-BE-NEXT:    xxsldwi vs11, vs3, vs3, 3
+; CHECK-BE-NEXT:    xxswapd vs12, vs3
+; CHECK-BE-NEXT:    xxsldwi vs13, vs2, vs2, 1
+; CHECK-BE-NEXT:    xxsldwi v2, vs2, vs2, 3
+; CHECK-BE-NEXT:    xxswapd v3, vs2
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    xscvspdpn f4, vs4
+; CHECK-BE-NEXT:    xscvspdpn f5, vs5
+; CHECK-BE-NEXT:    xscvspdpn f6, vs6
+; CHECK-BE-NEXT:    xscvspdpn f7, vs7
+; CHECK-BE-NEXT:    xscvspdpn f8, vs8
+; CHECK-BE-NEXT:    xscvspdpn f9, vs9
+; CHECK-BE-NEXT:    xscvspdpn f10, vs10
+; CHECK-BE-NEXT:    xscvspdpn f11, vs11
+; CHECK-BE-NEXT:    xscvspdpn f12, vs12
+; CHECK-BE-NEXT:    xscvspdpn f13, vs13
+; CHECK-BE-NEXT:    xscvspdpn f31, v2
+; CHECK-BE-NEXT:    xscvspdpn f30, v3
+; CHECK-BE-NEXT:    xxmrghd vs1, vs1, vs4
+; CHECK-BE-NEXT:    xxmrghd vs4, vs6, vs5
+; CHECK-BE-NEXT:    xxmrghd vs0, vs0, vs7
+; CHECK-BE-NEXT:    xxmrghd vs5, vs9, vs8
+; CHECK-BE-NEXT:    xxmrghd vs3, vs3, vs10
+; CHECK-BE-NEXT:    xxmrghd vs6, vs12, vs11
+; CHECK-BE-NEXT:    xxmrghd vs2, vs2, vs13
+; CHECK-BE-NEXT:    xxmrghd vs7, vs30, vs31
+; CHECK-BE-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    xvcvdpuxds vs1, vs1
+; CHECK-BE-NEXT:    xvcvdpuxds vs4, vs4
+; CHECK-BE-NEXT:    xvcvdpuxds vs0, vs0
+; CHECK-BE-NEXT:    xvcvdpuxds vs5, vs5
+; CHECK-BE-NEXT:    xvcvdpuxds vs3, vs3
+; CHECK-BE-NEXT:    xvcvdpuxds vs6, vs6
+; CHECK-BE-NEXT:    xvcvdpuxds vs2, vs2
+; CHECK-BE-NEXT:    xvcvdpuxds vs7, vs7
+; CHECK-BE-NEXT:    stxv vs5, 48(r3)
+; CHECK-BE-NEXT:    stxv vs0, 32(r3)
+; CHECK-BE-NEXT:    stxv vs4, 16(r3)
+; CHECK-BE-NEXT:    stxv vs1, 0(r3)
+; CHECK-BE-NEXT:    stxv vs7, 112(r3)
+; CHECK-BE-NEXT:    stxv vs2, 96(r3)
+; CHECK-BE-NEXT:    stxv vs6, 80(r3)
+; CHECK-BE-NEXT:    stxv vs3, 64(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <16 x float>, <16 x float>* %0, align 64
+  %1 = fptoui <16 x float> %a to <16 x i64>
+  store <16 x i64> %1, <16 x i64>* %agg.result, align 128
+  ret void
+}
+
+define <2 x i64> @test2elt_signed(i64 %a.coerce) local_unnamed_addr #0 {
+; CHECK-P8-LABEL: test2elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xscvspdpn f0, vs0
+; CHECK-P8-NEXT:    xxsldwi vs1, v2, v2, 3
+; CHECK-P8-NEXT:    xscvspdpn f1, vs1
+; CHECK-P8-NEXT:    xxmrghd vs0, vs0, vs1
+; CHECK-P8-NEXT:    xvcvdpuxds v2, vs0
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test2elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xxsldwi vs1, v2, v2, 3
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xxmrghd vs0, vs0, vs1
+; CHECK-P9-NEXT:    xvcvdpuxds v2, vs0
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test2elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    mtvsrd f0, r3
+; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 1
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xxmrghd vs0, vs0, vs1
+; CHECK-BE-NEXT:    xvcvdpuxds v2, vs0
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = bitcast i64 %a.coerce to <2 x float>
+  %1 = fptoui <2 x float> %0 to <2 x i64>
+  ret <2 x i64> %1
+}
+
+define void @test4elt_signed(<4 x i64>* noalias nocapture sret %agg.result, <4 x float> %a) local_unnamed_addr #1 {
+; CHECK-P8-LABEL: test4elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-P8-NEXT:    xxswapd vs1, v2
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    xxsldwi vs3, v2, v2, 1
+; CHECK-P8-NEXT:    xscvspdpn f2, v2
+; CHECK-P8-NEXT:    xscvspdpn f0, vs0
+; CHECK-P8-NEXT:    xscvspdpn f1, vs1
+; CHECK-P8-NEXT:    xscvspdpn f3, vs3
+; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P8-NEXT:    xxmrghd vs1, vs2, vs3
+; CHECK-P8-NEXT:    xvcvdpuxds v2, vs0
+; CHECK-P8-NEXT:    xvcvdpuxds v3, vs1
+; CHECK-P8-NEXT:    xxswapd vs1, v2
+; CHECK-P8-NEXT:    xxswapd vs0, v3
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r4
+; CHECK-P8-NEXT:    stxvd2x vs1, 0, r3
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test4elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-P9-NEXT:    xxswapd vs1, v2
+; CHECK-P9-NEXT:    xxsldwi vs2, v2, v2, 1
+; CHECK-P9-NEXT:    xscvspdpn f3, v2
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P9-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-P9-NEXT:    xvcvdpuxds vs0, vs0
+; CHECK-P9-NEXT:    xvcvdpuxds vs1, vs1
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test4elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 1
+; CHECK-BE-NEXT:    xxsldwi vs1, v2, v2, 3
+; CHECK-BE-NEXT:    xxswapd vs2, v2
+; CHECK-BE-NEXT:    xscvspdpn f3, v2
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    xxmrghd vs0, vs3, vs0
+; CHECK-BE-NEXT:    xxmrghd vs1, vs2, vs1
+; CHECK-BE-NEXT:    xvcvdpuxds vs0, vs0
+; CHECK-BE-NEXT:    xvcvdpuxds vs1, vs1
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = fptoui <4 x float> %a to <4 x i64>
+  store <4 x i64> %0, <4 x i64>* %agg.result, align 32
+  ret void
+}
+
+define void @test8elt_signed(<8 x i64>* noalias nocapture sret %agg.result, <8 x float>* nocapture readonly) local_unnamed_addr #2 {
+; CHECK-P8-LABEL: test8elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    li r6, 32
+; CHECK-P8-NEXT:    lvx v2, r4, r5
+; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    xxsldwi vs5, v3, v3, 3
+; CHECK-P8-NEXT:    xxswapd vs6, v3
+; CHECK-P8-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-P8-NEXT:    xxswapd vs1, v2
+; CHECK-P8-NEXT:    xxsldwi vs3, v2, v2, 1
+; CHECK-P8-NEXT:    xxsldwi vs7, v3, v3, 1
+; CHECK-P8-NEXT:    xscvspdpn f2, v2
+; CHECK-P8-NEXT:    xscvspdpn f4, v3
+; CHECK-P8-NEXT:    xscvspdpn f0, vs0
+; CHECK-P8-NEXT:    xscvspdpn f1, vs1
+; CHECK-P8-NEXT:    xscvspdpn f3, vs3
+; CHECK-P8-NEXT:    xscvspdpn f5, vs5
+; CHECK-P8-NEXT:    xscvspdpn f6, vs6
+; CHECK-P8-NEXT:    xscvspdpn f7, vs7
+; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P8-NEXT:    xxmrghd vs1, vs2, vs3
+; CHECK-P8-NEXT:    xxmrghd vs2, vs6, vs5
+; CHECK-P8-NEXT:    xvcvdpuxds v2, vs0
+; CHECK-P8-NEXT:    xxmrghd vs3, vs4, vs7
+; CHECK-P8-NEXT:    xvcvdpuxds v3, vs1
+; CHECK-P8-NEXT:    xvcvdpuxds v4, vs2
+; CHECK-P8-NEXT:    xvcvdpuxds v5, vs3
+; CHECK-P8-NEXT:    xxswapd vs1, v2
+; CHECK-P8-NEXT:    xxswapd vs0, v3
+; CHECK-P8-NEXT:    xxswapd vs3, v4
+; CHECK-P8-NEXT:    xxswapd vs2, v5
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r4
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r6
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs3, 0, r3
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test8elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs0, 16(r4)
+; CHECK-P9-NEXT:    lxv vs1, 0(r4)
+; CHECK-P9-NEXT:    xxsldwi vs2, vs1, vs1, 3
+; CHECK-P9-NEXT:    xxswapd vs3, vs1
+; CHECK-P9-NEXT:    xxsldwi vs4, vs1, vs1, 1
+; CHECK-P9-NEXT:    xxsldwi vs5, vs0, vs0, 3
+; CHECK-P9-NEXT:    xxswapd vs6, vs0
+; CHECK-P9-NEXT:    xxsldwi vs7, vs0, vs0, 1
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xscvspdpn f3, vs3
+; CHECK-P9-NEXT:    xscvspdpn f4, vs4
+; CHECK-P9-NEXT:    xscvspdpn f5, vs5
+; CHECK-P9-NEXT:    xscvspdpn f6, vs6
+; CHECK-P9-NEXT:    xscvspdpn f7, vs7
+; CHECK-P9-NEXT:    xxmrghd vs2, vs3, vs2
+; CHECK-P9-NEXT:    xxmrghd vs1, vs1, vs4
+; CHECK-P9-NEXT:    xxmrghd vs3, vs6, vs5
+; CHECK-P9-NEXT:    xxmrghd vs0, vs0, vs7
+; CHECK-P9-NEXT:    xvcvdpuxds vs2, vs2
+; CHECK-P9-NEXT:    xvcvdpuxds vs1, vs1
+; CHECK-P9-NEXT:    xvcvdpuxds vs3, vs3
+; CHECK-P9-NEXT:    xvcvdpuxds vs0, vs0
+; CHECK-P9-NEXT:    stxv vs0, 48(r3)
+; CHECK-P9-NEXT:    stxv vs3, 32(r3)
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    stxv vs2, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test8elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs0, 16(r4)
+; CHECK-BE-NEXT:    lxv vs1, 0(r4)
+; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 1
+; CHECK-BE-NEXT:    xxsldwi vs3, vs1, vs1, 3
+; CHECK-BE-NEXT:    xxswapd vs4, vs1
+; CHECK-BE-NEXT:    xxsldwi vs5, vs0, vs0, 1
+; CHECK-BE-NEXT:    xxsldwi vs6, vs0, vs0, 3
+; CHECK-BE-NEXT:    xxswapd vs7, vs0
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    xscvspdpn f4, vs4
+; CHECK-BE-NEXT:    xscvspdpn f5, vs5
+; CHECK-BE-NEXT:    xscvspdpn f6, vs6
+; CHECK-BE-NEXT:    xscvspdpn f7, vs7
+; CHECK-BE-NEXT:    xxmrghd vs1, vs1, vs2
+; CHECK-BE-NEXT:    xxmrghd vs2, vs4, vs3
+; CHECK-BE-NEXT:    xxmrghd vs0, vs0, vs5
+; CHECK-BE-NEXT:    xxmrghd vs3, vs7, vs6
+; CHECK-BE-NEXT:    xvcvdpuxds vs1, vs1
+; CHECK-BE-NEXT:    xvcvdpuxds vs2, vs2
+; CHECK-BE-NEXT:    xvcvdpuxds vs0, vs0
+; CHECK-BE-NEXT:    xvcvdpuxds vs3, vs3
+; CHECK-BE-NEXT:    stxv vs3, 48(r3)
+; CHECK-BE-NEXT:    stxv vs0, 32(r3)
+; CHECK-BE-NEXT:    stxv vs2, 16(r3)
+; CHECK-BE-NEXT:    stxv vs1, 0(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <8 x float>, <8 x float>* %0, align 32
+  %1 = fptoui <8 x float> %a to <8 x i64>
+  store <8 x i64> %1, <8 x i64>* %agg.result, align 64
+  ret void
+}
+
+define void @test16elt_signed(<16 x i64>* noalias nocapture sret %agg.result, <16 x float>* nocapture readonly) local_unnamed_addr #2 {
+; CHECK-P8-LABEL: test16elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    li r7, 48
+; CHECK-P8-NEXT:    li r6, 32
+; CHECK-P8-NEXT:    lvx v4, 0, r4
+; CHECK-P8-NEXT:    li r8, 64
+; CHECK-P8-NEXT:    lvx v5, r4, r5
+; CHECK-P8-NEXT:    lvx v3, r4, r7
+; CHECK-P8-NEXT:    lvx v2, r4, r6
+; CHECK-P8-NEXT:    li r4, 112
+; CHECK-P8-NEXT:    xxsldwi vs13, v4, v4, 3
+; CHECK-P8-NEXT:    xscvspdpn f6, v4
+; CHECK-P8-NEXT:    xxsldwi vs1, v5, v5, 3
+; CHECK-P8-NEXT:    xxswapd vs3, v5
+; CHECK-P8-NEXT:    xxsldwi vs9, v3, v3, 1
+; CHECK-P8-NEXT:    xscvspdpn f4, v3
+; CHECK-P8-NEXT:    xxsldwi vs5, v5, v5, 1
+; CHECK-P8-NEXT:    xxsldwi vs10, v3, v3, 3
+; CHECK-P8-NEXT:    xscvspdpn f1, vs1
+; CHECK-P8-NEXT:    xxswapd vs11, v3
+; CHECK-P8-NEXT:    xscvspdpn f3, vs3
+; CHECK-P8-NEXT:    xxsldwi vs7, v2, v2, 3
+; CHECK-P8-NEXT:    xscvspdpn f9, vs9
+; CHECK-P8-NEXT:    xxswapd vs8, v2
+; CHECK-P8-NEXT:    xscvspdpn f0, v5
+; CHECK-P8-NEXT:    xxsldwi vs12, v2, v2, 1
+; CHECK-P8-NEXT:    xscvspdpn f2, v2
+; CHECK-P8-NEXT:    xxswapd v2, v4
+; CHECK-P8-NEXT:    xscvspdpn f5, vs5
+; CHECK-P8-NEXT:    xxsldwi v3, v4, v4, 1
+; CHECK-P8-NEXT:    xscvspdpn f10, vs10
+; CHECK-P8-NEXT:    xscvspdpn f11, vs11
+; CHECK-P8-NEXT:    xxmrghd vs1, vs3, vs1
+; CHECK-P8-NEXT:    xscvspdpn f7, vs7
+; CHECK-P8-NEXT:    xxmrghd vs4, vs4, vs9
+; CHECK-P8-NEXT:    xscvspdpn f8, vs8
+; CHECK-P8-NEXT:    xscvspdpn f12, vs12
+; CHECK-P8-NEXT:    xscvspdpn f13, vs13
+; CHECK-P8-NEXT:    xxmrghd vs0, vs0, vs5
+; CHECK-P8-NEXT:    xscvspdpn f3, v2
+; CHECK-P8-NEXT:    xscvspdpn f9, v3
+; CHECK-P8-NEXT:    xxmrghd vs5, vs11, vs10
+; CHECK-P8-NEXT:    xvcvdpuxds v3, vs4
+; CHECK-P8-NEXT:    xvcvdpuxds v2, vs1
+; CHECK-P8-NEXT:    xxmrghd vs1, vs2, vs12
+; CHECK-P8-NEXT:    xxmrghd vs2, vs8, vs7
+; CHECK-P8-NEXT:    xvcvdpuxds v4, vs0
+; CHECK-P8-NEXT:    xxmrghd vs0, vs3, vs13
+; CHECK-P8-NEXT:    xvcvdpuxds v5, vs5
+; CHECK-P8-NEXT:    xxmrghd vs3, vs6, vs9
+; CHECK-P8-NEXT:    xvcvdpuxds v0, vs1
+; CHECK-P8-NEXT:    xvcvdpuxds v1, vs2
+; CHECK-P8-NEXT:    xvcvdpuxds v6, vs0
+; CHECK-P8-NEXT:    xxswapd vs0, v3
+; CHECK-P8-NEXT:    xvcvdpuxds v7, vs3
+; CHECK-P8-NEXT:    xxswapd vs4, v2
+; CHECK-P8-NEXT:    xxswapd vs3, v4
+; CHECK-P8-NEXT:    xxswapd vs1, v5
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r4
+; CHECK-P8-NEXT:    li r4, 96
+; CHECK-P8-NEXT:    xxswapd vs2, v0
+; CHECK-P8-NEXT:    xxswapd vs0, v1
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    xxswapd vs5, v6
+; CHECK-P8-NEXT:    li r4, 80
+; CHECK-P8-NEXT:    xxswapd vs1, v7
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r8
+; CHECK-P8-NEXT:    stxvd2x vs3, r3, r7
+; CHECK-P8-NEXT:    stxvd2x vs4, r3, r6
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs5, 0, r3
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test16elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs0, 16(r4)
+; CHECK-P9-NEXT:    lxv vs1, 0(r4)
+; CHECK-P9-NEXT:    lxv vs2, 48(r4)
+; CHECK-P9-NEXT:    lxv vs3, 32(r4)
+; CHECK-P9-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    xxsldwi vs4, vs1, vs1, 3
+; CHECK-P9-NEXT:    xxswapd vs5, vs1
+; CHECK-P9-NEXT:    xxsldwi vs6, vs1, vs1, 1
+; CHECK-P9-NEXT:    xxsldwi vs7, vs0, vs0, 3
+; CHECK-P9-NEXT:    xxswapd vs8, vs0
+; CHECK-P9-NEXT:    xxsldwi vs9, vs0, vs0, 1
+; CHECK-P9-NEXT:    xxsldwi vs10, vs3, vs3, 3
+; CHECK-P9-NEXT:    xxswapd vs11, vs3
+; CHECK-P9-NEXT:    xxsldwi vs12, vs3, vs3, 1
+; CHECK-P9-NEXT:    xxsldwi vs13, vs2, vs2, 3
+; CHECK-P9-NEXT:    xxswapd v2, vs2
+; CHECK-P9-NEXT:    xxsldwi v3, vs2, vs2, 1
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvspdpn f3, vs3
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xscvspdpn f4, vs4
+; CHECK-P9-NEXT:    xscvspdpn f5, vs5
+; CHECK-P9-NEXT:    xscvspdpn f6, vs6
+; CHECK-P9-NEXT:    xscvspdpn f7, vs7
+; CHECK-P9-NEXT:    xscvspdpn f8, vs8
+; CHECK-P9-NEXT:    xscvspdpn f9, vs9
+; CHECK-P9-NEXT:    xscvspdpn f10, vs10
+; CHECK-P9-NEXT:    xscvspdpn f11, vs11
+; CHECK-P9-NEXT:    xscvspdpn f12, vs12
+; CHECK-P9-NEXT:    xscvspdpn f13, vs13
+; CHECK-P9-NEXT:    xscvspdpn f31, v2
+; CHECK-P9-NEXT:    xscvspdpn f30, v3
+; CHECK-P9-NEXT:    xxmrghd vs4, vs5, vs4
+; CHECK-P9-NEXT:    xxmrghd vs1, vs1, vs6
+; CHECK-P9-NEXT:    xxmrghd vs5, vs8, vs7
+; CHECK-P9-NEXT:    xxmrghd vs0, vs0, vs9
+; CHECK-P9-NEXT:    xxmrghd vs6, vs11, vs10
+; CHECK-P9-NEXT:    xxmrghd vs3, vs3, vs12
+; CHECK-P9-NEXT:    xxmrghd vs7, vs31, vs13
+; CHECK-P9-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    xxmrghd vs2, vs2, vs30
+; CHECK-P9-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    xvcvdpuxds vs4, vs4
+; CHECK-P9-NEXT:    xvcvdpuxds vs1, vs1
+; CHECK-P9-NEXT:    xvcvdpuxds vs5, vs5
+; CHECK-P9-NEXT:    xvcvdpuxds vs0, vs0
+; CHECK-P9-NEXT:    xvcvdpuxds vs6, vs6
+; CHECK-P9-NEXT:    xvcvdpuxds vs3, vs3
+; CHECK-P9-NEXT:    xvcvdpuxds vs7, vs7
+; CHECK-P9-NEXT:    xvcvdpuxds vs2, vs2
+; CHECK-P9-NEXT:    stxv vs0, 48(r3)
+; CHECK-P9-NEXT:    stxv vs5, 32(r3)
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    stxv vs4, 0(r3)
+; CHECK-P9-NEXT:    stxv vs2, 112(r3)
+; CHECK-P9-NEXT:    stxv vs7, 96(r3)
+; CHECK-P9-NEXT:    stxv vs3, 80(r3)
+; CHECK-P9-NEXT:    stxv vs6, 64(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test16elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs0, 16(r4)
+; CHECK-BE-NEXT:    lxv vs1, 0(r4)
+; CHECK-BE-NEXT:    lxv vs2, 48(r4)
+; CHECK-BE-NEXT:    lxv vs3, 32(r4)
+; CHECK-BE-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    xxsldwi vs4, vs1, vs1, 1
+; CHECK-BE-NEXT:    xxsldwi vs5, vs1, vs1, 3
+; CHECK-BE-NEXT:    xxswapd vs6, vs1
+; CHECK-BE-NEXT:    xxsldwi vs7, vs0, vs0, 1
+; CHECK-BE-NEXT:    xxsldwi vs8, vs0, vs0, 3
+; CHECK-BE-NEXT:    xxswapd vs9, vs0
+; CHECK-BE-NEXT:    xxsldwi vs10, vs3, vs3, 1
+; CHECK-BE-NEXT:    xxsldwi vs11, vs3, vs3, 3
+; CHECK-BE-NEXT:    xxswapd vs12, vs3
+; CHECK-BE-NEXT:    xxsldwi vs13, vs2, vs2, 1
+; CHECK-BE-NEXT:    xxsldwi v2, vs2, vs2, 3
+; CHECK-BE-NEXT:    xxswapd v3, vs2
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    xscvspdpn f4, vs4
+; CHECK-BE-NEXT:    xscvspdpn f5, vs5
+; CHECK-BE-NEXT:    xscvspdpn f6, vs6
+; CHECK-BE-NEXT:    xscvspdpn f7, vs7
+; CHECK-BE-NEXT:    xscvspdpn f8, vs8
+; CHECK-BE-NEXT:    xscvspdpn f9, vs9
+; CHECK-BE-NEXT:    xscvspdpn f10, vs10
+; CHECK-BE-NEXT:    xscvspdpn f11, vs11
+; CHECK-BE-NEXT:    xscvspdpn f12, vs12
+; CHECK-BE-NEXT:    xscvspdpn f13, vs13
+; CHECK-BE-NEXT:    xscvspdpn f31, v2
+; CHECK-BE-NEXT:    xscvspdpn f30, v3
+; CHECK-BE-NEXT:    xxmrghd vs1, vs1, vs4
+; CHECK-BE-NEXT:    xxmrghd vs4, vs6, vs5
+; CHECK-BE-NEXT:    xxmrghd vs0, vs0, vs7
+; CHECK-BE-NEXT:    xxmrghd vs5, vs9, vs8
+; CHECK-BE-NEXT:    xxmrghd vs3, vs3, vs10
+; CHECK-BE-NEXT:    xxmrghd vs6, vs12, vs11
+; CHECK-BE-NEXT:    xxmrghd vs2, vs2, vs13
+; CHECK-BE-NEXT:    xxmrghd vs7, vs30, vs31
+; CHECK-BE-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    xvcvdpuxds vs1, vs1
+; CHECK-BE-NEXT:    xvcvdpuxds vs4, vs4
+; CHECK-BE-NEXT:    xvcvdpuxds vs0, vs0
+; CHECK-BE-NEXT:    xvcvdpuxds vs5, vs5
+; CHECK-BE-NEXT:    xvcvdpuxds vs3, vs3
+; CHECK-BE-NEXT:    xvcvdpuxds vs6, vs6
+; CHECK-BE-NEXT:    xvcvdpuxds vs2, vs2
+; CHECK-BE-NEXT:    xvcvdpuxds vs7, vs7
+; CHECK-BE-NEXT:    stxv vs5, 48(r3)
+; CHECK-BE-NEXT:    stxv vs0, 32(r3)
+; CHECK-BE-NEXT:    stxv vs4, 16(r3)
+; CHECK-BE-NEXT:    stxv vs1, 0(r3)
+; CHECK-BE-NEXT:    stxv vs7, 112(r3)
+; CHECK-BE-NEXT:    stxv vs2, 96(r3)
+; CHECK-BE-NEXT:    stxv vs6, 80(r3)
+; CHECK-BE-NEXT:    stxv vs3, 64(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <16 x float>, <16 x float>* %0, align 64
+  %1 = fptoui <16 x float> %a to <16 x i64>
+  store <16 x i64> %1, <16 x i64>* %agg.result, align 128
+  ret void
+}

Added: llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll?rev=347090&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll Fri Nov 16 12:24:10 2018
@@ -0,0 +1,1486 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:     -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-P8
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-P9
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-BE
+
+define i16 @test2elt(i64 %a.coerce) local_unnamed_addr #0 {
+; CHECK-P8-LABEL: test2elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xscvspdpn f0, vs0
+; CHECK-P8-NEXT:    xxsldwi vs1, v2, v2, 3
+; CHECK-P8-NEXT:    xscvspdpn f1, vs1
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    mfvsrwz r4, f0
+; CHECK-P8-NEXT:    mfvsrwz r3, f1
+; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    vmrglb v2, v3, v2
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    clrldi r3, r3, 48
+; CHECK-P8-NEXT:    sth r3, -2(r1)
+; CHECK-P8-NEXT:    lhz r3, -2(r1)
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test2elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xxsldwi vs1, v2, v2, 3
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    mfvsrwz r4, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mtvsrd f1, r4
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    xxswapd v3, vs1
+; CHECK-P9-NEXT:    addi r3, r1, -2
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    vmrglb v2, v3, v2
+; CHECK-P9-NEXT:    vsldoi v2, v2, v2, 8
+; CHECK-P9-NEXT:    stxsihx v2, 0, r3
+; CHECK-P9-NEXT:    lhz r3, -2(r1)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test2elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    mtvsrd f0, r3
+; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 1
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mfvsrwz r4, f1
+; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    addi r3, r1, -2
+; CHECK-BE-NEXT:    sldi r4, r4, 56
+; CHECK-BE-NEXT:    mtvsrd v3, r4
+; CHECK-BE-NEXT:    vmrghb v2, v2, v3
+; CHECK-BE-NEXT:    vsldoi v2, v2, v2, 10
+; CHECK-BE-NEXT:    stxsihx v2, 0, r3
+; CHECK-BE-NEXT:    lhz r3, -2(r1)
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = bitcast i64 %a.coerce to <2 x float>
+  %1 = fptoui <2 x float> %0 to <2 x i8>
+  %2 = bitcast <2 x i8> %1 to i16
+  ret i16 %2
+}
+
+define i32 @test4elt(<4 x float> %a) local_unnamed_addr #1 {
+; CHECK-P8-LABEL: test4elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-P8-NEXT:    xscvspdpn f1, v2
+; CHECK-P8-NEXT:    xxswapd vs2, v2
+; CHECK-P8-NEXT:    xxsldwi vs3, v2, v2, 1
+; CHECK-P8-NEXT:    xscvspdpn f0, vs0
+; CHECK-P8-NEXT:    xscvspdpn f2, vs2
+; CHECK-P8-NEXT:    xscvspdpn f3, vs3
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    xscvdpsxws f3, f3
+; CHECK-P8-NEXT:    mfvsrwz r3, f1
+; CHECK-P8-NEXT:    mtvsrd f1, r3
+; CHECK-P8-NEXT:    mfvsrwz r3, f0
+; CHECK-P8-NEXT:    mfvsrwz r4, f2
+; CHECK-P8-NEXT:    xxswapd v4, vs1
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mfvsrwz r3, f3
+; CHECK-P8-NEXT:    mtvsrd f2, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    mtvsrd f3, r3
+; CHECK-P8-NEXT:    xxswapd v3, vs2
+; CHECK-P8-NEXT:    xxswapd v5, vs3
+; CHECK-P8-NEXT:    vmrglb v2, v3, v2
+; CHECK-P8-NEXT:    vmrglb v3, v4, v5
+; CHECK-P8-NEXT:    vmrglh v2, v3, v2
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    mfvsrwz r3, f0
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test4elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-P9-NEXT:    xxswapd vs1, v2
+; CHECK-P9-NEXT:    xxsldwi vs2, v2, v2, 1
+; CHECK-P9-NEXT:    xscvspdpn f3, v2
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    mfvsrwz r5, f3
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mfvsrwz r4, f1
+; CHECK-P9-NEXT:    mfvsrwz r6, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r5
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mtvsrd f1, r4
+; CHECK-P9-NEXT:    mtvsrd f3, r6
+; CHECK-P9-NEXT:    xxswapd v4, vs2
+; CHECK-P9-NEXT:    li r3, 0
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    xxswapd v3, vs1
+; CHECK-P9-NEXT:    xxswapd v5, vs3
+; CHECK-P9-NEXT:    vmrglb v2, v3, v2
+; CHECK-P9-NEXT:    vmrglb v3, v4, v5
+; CHECK-P9-NEXT:    vmrglh v2, v3, v2
+; CHECK-P9-NEXT:    vextuwrx r3, r3, v2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test4elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-BE-NEXT:    xxswapd vs1, v2
+; CHECK-BE-NEXT:    xxsldwi vs2, v2, v2, 1
+; CHECK-BE-NEXT:    xscvspdpn f3, v2
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mfvsrwz r5, f3
+; CHECK-BE-NEXT:    sldi r5, r5, 56
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mfvsrwz r4, f1
+; CHECK-BE-NEXT:    mfvsrwz r6, f2
+; CHECK-BE-NEXT:    mtvsrd v4, r5
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    sldi r4, r4, 56
+; CHECK-BE-NEXT:    sldi r6, r6, 56
+; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    mtvsrd v3, r4
+; CHECK-BE-NEXT:    mtvsrd v5, r6
+; CHECK-BE-NEXT:    li r3, 0
+; CHECK-BE-NEXT:    vmrghb v2, v3, v2
+; CHECK-BE-NEXT:    vmrghb v3, v4, v5
+; CHECK-BE-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-NEXT:    vextuwlx r3, r3, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = fptoui <4 x float> %a to <4 x i8>
+  %1 = bitcast <4 x i8> %0 to i32
+  ret i32 %1
+}
+
+define i64 @test8elt(<8 x float>* nocapture readonly) local_unnamed_addr #2 {
+; CHECK-P8-LABEL: test8elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    lvx v5, r3, r4
+; CHECK-P8-NEXT:    xxswapd vs1, v2
+; CHECK-P8-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-P8-NEXT:    xxsldwi vs2, v5, v5, 3
+; CHECK-P8-NEXT:    xscvspdpn f4, v5
+; CHECK-P8-NEXT:    xxswapd vs3, v5
+; CHECK-P8-NEXT:    xxsldwi vs5, v5, v5, 1
+; CHECK-P8-NEXT:    xscvspdpn f1, vs1
+; CHECK-P8-NEXT:    xscvspdpn f0, vs0
+; CHECK-P8-NEXT:    xscvspdpn f2, vs2
+; CHECK-P8-NEXT:    xscvspdpn f3, vs3
+; CHECK-P8-NEXT:    xscvspdpn f5, vs5
+; CHECK-P8-NEXT:    xscvdpsxws f4, f4
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    xscvdpsxws f3, f3
+; CHECK-P8-NEXT:    xscvdpsxws f5, f5
+; CHECK-P8-NEXT:    mfvsrwz r4, f4
+; CHECK-P8-NEXT:    mfvsrwz r6, f1
+; CHECK-P8-NEXT:    mfvsrwz r5, f0
+; CHECK-P8-NEXT:    mtvsrd f1, r6
+; CHECK-P8-NEXT:    mtvsrd f0, r5
+; CHECK-P8-NEXT:    xxswapd v4, vs1
+; CHECK-P8-NEXT:    xxsldwi vs1, v2, v2, 1
+; CHECK-P8-NEXT:    xxswapd v3, vs0
+; CHECK-P8-NEXT:    xscvspdpn f0, v2
+; CHECK-P8-NEXT:    mtvsrd f4, r4
+; CHECK-P8-NEXT:    xscvspdpn f1, vs1
+; CHECK-P8-NEXT:    mfvsrwz r4, f2
+; CHECK-P8-NEXT:    xxswapd v1, vs4
+; CHECK-P8-NEXT:    vmrglb v2, v4, v3
+; CHECK-P8-NEXT:    mtvsrd f2, r4
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mfvsrwz r4, f5
+; CHECK-P8-NEXT:    xxswapd v5, vs2
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    mfvsrwz r3, f0
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mfvsrwz r3, f1
+; CHECK-P8-NEXT:    xxswapd v3, vs0
+; CHECK-P8-NEXT:    mtvsrd f1, r3
+; CHECK-P8-NEXT:    mfvsrwz r3, f3
+; CHECK-P8-NEXT:    mtvsrd f3, r4
+; CHECK-P8-NEXT:    xxswapd v4, vs1
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    xxswapd v6, vs3
+; CHECK-P8-NEXT:    xxswapd v0, vs0
+; CHECK-P8-NEXT:    vmrglb v3, v3, v4
+; CHECK-P8-NEXT:    vmrglb v4, v0, v5
+; CHECK-P8-NEXT:    vmrglb v5, v1, v6
+; CHECK-P8-NEXT:    vmrglh v2, v3, v2
+; CHECK-P8-NEXT:    vmrglh v3, v5, v4
+; CHECK-P8-NEXT:    vmrglw v2, v3, v2
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test8elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs0, 16(r3)
+; CHECK-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-P9-NEXT:    xxsldwi vs2, vs1, vs1, 3
+; CHECK-P9-NEXT:    xxswapd vs3, vs1
+; CHECK-P9-NEXT:    xxsldwi vs4, vs1, vs1, 1
+; CHECK-P9-NEXT:    xxsldwi vs5, vs0, vs0, 3
+; CHECK-P9-NEXT:    xxswapd vs6, vs0
+; CHECK-P9-NEXT:    xxsldwi vs7, vs0, vs0, 1
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xscvspdpn f3, vs3
+; CHECK-P9-NEXT:    xscvspdpn f4, vs4
+; CHECK-P9-NEXT:    xscvspdpn f5, vs5
+; CHECK-P9-NEXT:    xscvspdpn f6, vs6
+; CHECK-P9-NEXT:    xscvspdpn f7, vs7
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    xscvdpsxws f4, f4
+; CHECK-P9-NEXT:    xscvdpsxws f5, f5
+; CHECK-P9-NEXT:    xscvdpsxws f6, f6
+; CHECK-P9-NEXT:    xscvdpsxws f7, f7
+; CHECK-P9-NEXT:    mfvsrwz r5, f1
+; CHECK-P9-NEXT:    mfvsrwz r9, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f2
+; CHECK-P9-NEXT:    mfvsrwz r4, f3
+; CHECK-P9-NEXT:    mfvsrwz r6, f4
+; CHECK-P9-NEXT:    mfvsrwz r7, f5
+; CHECK-P9-NEXT:    mfvsrwz r8, f6
+; CHECK-P9-NEXT:    mfvsrwz r10, f7
+; CHECK-P9-NEXT:    mtvsrd f2, r5
+; CHECK-P9-NEXT:    mtvsrd f6, r9
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mtvsrd f1, r4
+; CHECK-P9-NEXT:    mtvsrd f3, r6
+; CHECK-P9-NEXT:    mtvsrd f4, r7
+; CHECK-P9-NEXT:    mtvsrd f5, r8
+; CHECK-P9-NEXT:    mtvsrd f7, r10
+; CHECK-P9-NEXT:    xxswapd v4, vs2
+; CHECK-P9-NEXT:    xxswapd v6, vs6
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    xxswapd v3, vs1
+; CHECK-P9-NEXT:    xxswapd v5, vs3
+; CHECK-P9-NEXT:    xxswapd v0, vs4
+; CHECK-P9-NEXT:    xxswapd v1, vs5
+; CHECK-P9-NEXT:    xxswapd v7, vs7
+; CHECK-P9-NEXT:    vmrglb v2, v3, v2
+; CHECK-P9-NEXT:    vmrglb v3, v4, v5
+; CHECK-P9-NEXT:    vmrglb v4, v1, v0
+; CHECK-P9-NEXT:    vmrglb v5, v6, v7
+; CHECK-P9-NEXT:    vmrglh v2, v3, v2
+; CHECK-P9-NEXT:    vmrglh v3, v5, v4
+; CHECK-P9-NEXT:    vmrglw v2, v3, v2
+; CHECK-P9-NEXT:    mfvsrld r3, v2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test8elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    lxv vs1, 16(r3)
+; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
+; CHECK-BE-NEXT:    xxswapd vs3, vs1
+; CHECK-BE-NEXT:    xxsldwi vs4, vs1, vs1, 1
+; CHECK-BE-NEXT:    xxsldwi vs5, vs0, vs0, 3
+; CHECK-BE-NEXT:    xxswapd vs6, vs0
+; CHECK-BE-NEXT:    xxsldwi vs7, vs0, vs0, 1
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    xscvspdpn f4, vs4
+; CHECK-BE-NEXT:    xscvspdpn f5, vs5
+; CHECK-BE-NEXT:    xscvspdpn f6, vs6
+; CHECK-BE-NEXT:    xscvspdpn f7, vs7
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    xscvdpsxws f5, f5
+; CHECK-BE-NEXT:    xscvdpsxws f6, f6
+; CHECK-BE-NEXT:    xscvdpsxws f7, f7
+; CHECK-BE-NEXT:    mfvsrwz r5, f1
+; CHECK-BE-NEXT:    mfvsrwz r9, f0
+; CHECK-BE-NEXT:    sldi r5, r5, 56
+; CHECK-BE-NEXT:    sldi r9, r9, 56
+; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    mfvsrwz r4, f3
+; CHECK-BE-NEXT:    mfvsrwz r6, f4
+; CHECK-BE-NEXT:    mfvsrwz r7, f5
+; CHECK-BE-NEXT:    mfvsrwz r8, f6
+; CHECK-BE-NEXT:    mfvsrwz r10, f7
+; CHECK-BE-NEXT:    mtvsrd v4, r5
+; CHECK-BE-NEXT:    mtvsrd v6, r9
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    sldi r4, r4, 56
+; CHECK-BE-NEXT:    sldi r6, r6, 56
+; CHECK-BE-NEXT:    sldi r7, r7, 56
+; CHECK-BE-NEXT:    sldi r8, r8, 56
+; CHECK-BE-NEXT:    sldi r10, r10, 56
+; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    mtvsrd v3, r4
+; CHECK-BE-NEXT:    mtvsrd v5, r6
+; CHECK-BE-NEXT:    mtvsrd v0, r7
+; CHECK-BE-NEXT:    mtvsrd v1, r8
+; CHECK-BE-NEXT:    mtvsrd v7, r10
+; CHECK-BE-NEXT:    vmrghb v2, v3, v2
+; CHECK-BE-NEXT:    vmrghb v3, v4, v5
+; CHECK-BE-NEXT:    vmrghb v4, v1, v0
+; CHECK-BE-NEXT:    vmrghb v5, v6, v7
+; CHECK-BE-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-NEXT:    vmrghh v3, v5, v4
+; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    mfvsrd r3, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <8 x float>, <8 x float>* %0, align 32
+  %1 = fptoui <8 x float> %a to <8 x i8>
+  %2 = bitcast <8 x i8> %1 to i64
+  ret i64 %2
+}
+
+define <16 x i8> @test16elt(<16 x float>* nocapture readonly) local_unnamed_addr #3 {
+; CHECK-P8-LABEL: test16elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    lvx v3, r3, r4
+; CHECK-P8-NEXT:    li r4, 32
+; CHECK-P8-NEXT:    xscvspdpn f2, v2
+; CHECK-P8-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-P8-NEXT:    xscvspdpn f4, v3
+; CHECK-P8-NEXT:    xxswapd vs1, v2
+; CHECK-P8-NEXT:    xxsldwi vs3, v2, v2, 1
+; CHECK-P8-NEXT:    xxsldwi vs5, v3, v3, 3
+; CHECK-P8-NEXT:    lvx v2, r3, r4
+; CHECK-P8-NEXT:    xscvspdpn f0, vs0
+; CHECK-P8-NEXT:    xxswapd vs6, v3
+; CHECK-P8-NEXT:    xscvspdpn f1, vs1
+; CHECK-P8-NEXT:    xxsldwi vs7, v3, v3, 1
+; CHECK-P8-NEXT:    xscvspdpn f3, vs3
+; CHECK-P8-NEXT:    xxsldwi vs8, v2, v2, 3
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    xxswapd vs9, v2
+; CHECK-P8-NEXT:    xscvdpsxws f4, f4
+; CHECK-P8-NEXT:    xscvspdpn f5, vs5
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xscvspdpn f6, vs6
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    mfvsrwz r4, f2
+; CHECK-P8-NEXT:    xscvspdpn f7, vs7
+; CHECK-P8-NEXT:    mtvsrd f2, r4
+; CHECK-P8-NEXT:    mfvsrwz r4, f4
+; CHECK-P8-NEXT:    xscvdpsxws f3, f3
+; CHECK-P8-NEXT:    xscvspdpn f8, vs8
+; CHECK-P8-NEXT:    mtvsrd f4, r4
+; CHECK-P8-NEXT:    mfvsrwz r4, f0
+; CHECK-P8-NEXT:    xscvdpsxws f0, f5
+; CHECK-P8-NEXT:    xxswapd v0, vs4
+; CHECK-P8-NEXT:    xscvspdpn f9, vs9
+; CHECK-P8-NEXT:    mtvsrd f5, r4
+; CHECK-P8-NEXT:    mfvsrwz r4, f1
+; CHECK-P8-NEXT:    xscvdpsxws f1, f6
+; CHECK-P8-NEXT:    xxswapd v3, vs5
+; CHECK-P8-NEXT:    mtvsrd f6, r4
+; CHECK-P8-NEXT:    mfvsrwz r4, f3
+; CHECK-P8-NEXT:    xscvdpsxws f3, f7
+; CHECK-P8-NEXT:    xxswapd v4, vs6
+; CHECK-P8-NEXT:    mtvsrd f7, r4
+; CHECK-P8-NEXT:    mfvsrwz r4, f0
+; CHECK-P8-NEXT:    xscvdpsxws f0, f8
+; CHECK-P8-NEXT:    xxswapd v5, vs7
+; CHECK-P8-NEXT:    mtvsrd f8, r4
+; CHECK-P8-NEXT:    mfvsrwz r4, f1
+; CHECK-P8-NEXT:    xscvdpsxws f1, f9
+; CHECK-P8-NEXT:    xxswapd v1, vs8
+; CHECK-P8-NEXT:    mtvsrd f9, r4
+; CHECK-P8-NEXT:    mfvsrwz r4, f3
+; CHECK-P8-NEXT:    vmrglb v3, v4, v3
+; CHECK-P8-NEXT:    xxswapd v4, vs2
+; CHECK-P8-NEXT:    mtvsrd f3, r4
+; CHECK-P8-NEXT:    xxswapd v6, vs9
+; CHECK-P8-NEXT:    mfvsrwz r4, f0
+; CHECK-P8-NEXT:    xscvspdpn f0, v2
+; CHECK-P8-NEXT:    xxswapd v7, vs3
+; CHECK-P8-NEXT:    mtvsrd f5, r4
+; CHECK-P8-NEXT:    mfvsrwz r4, f1
+; CHECK-P8-NEXT:    vmrglb v4, v4, v5
+; CHECK-P8-NEXT:    xxswapd v5, vs5
+; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    lvx v9, r3, r4
+; CHECK-P8-NEXT:    vmrglb v1, v6, v1
+; CHECK-P8-NEXT:    xxswapd v8, vs1
+; CHECK-P8-NEXT:    xxsldwi vs1, v2, v2, 1
+; CHECK-P8-NEXT:    xxsldwi vs2, v9, v9, 3
+; CHECK-P8-NEXT:    xscvspdpn f4, v9
+; CHECK-P8-NEXT:    xxswapd vs3, v9
+; CHECK-P8-NEXT:    xxsldwi vs5, v9, v9, 1
+; CHECK-P8-NEXT:    xscvspdpn f1, vs1
+; CHECK-P8-NEXT:    xscvspdpn f2, vs2
+; CHECK-P8-NEXT:    xscvspdpn f3, vs3
+; CHECK-P8-NEXT:    xscvspdpn f5, vs5
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xscvdpsxws f4, f4
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    xscvdpsxws f3, f3
+; CHECK-P8-NEXT:    xscvdpsxws f5, f5
+; CHECK-P8-NEXT:    mfvsrwz r3, f0
+; CHECK-P8-NEXT:    mfvsrwz r4, f4
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mtvsrd f4, r4
+; CHECK-P8-NEXT:    mfvsrwz r3, f1
+; CHECK-P8-NEXT:    mfvsrwz r4, f2
+; CHECK-P8-NEXT:    xxswapd v9, vs4
+; CHECK-P8-NEXT:    mtvsrd f1, r3
+; CHECK-P8-NEXT:    mfvsrwz r3, f3
+; CHECK-P8-NEXT:    mtvsrd f2, r4
+; CHECK-P8-NEXT:    xxswapd v6, vs1
+; CHECK-P8-NEXT:    mfvsrwz r4, f5
+; CHECK-P8-NEXT:    vmrglb v2, v0, v7
+; CHECK-P8-NEXT:    xxswapd v0, vs0
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    xxswapd v7, vs2
+; CHECK-P8-NEXT:    mtvsrd f3, r4
+; CHECK-P8-NEXT:    vmrglb v5, v8, v5
+; CHECK-P8-NEXT:    xxswapd v8, vs0
+; CHECK-P8-NEXT:    xxswapd v10, vs3
+; CHECK-P8-NEXT:    vmrglb v0, v0, v6
+; CHECK-P8-NEXT:    vmrglh v3, v4, v3
+; CHECK-P8-NEXT:    vmrglb v6, v8, v7
+; CHECK-P8-NEXT:    vmrglb v7, v9, v10
+; CHECK-P8-NEXT:    vmrglh v2, v2, v1
+; CHECK-P8-NEXT:    vmrglh v4, v0, v5
+; CHECK-P8-NEXT:    vmrglh v5, v7, v6
+; CHECK-P8-NEXT:    vmrglw v2, v2, v3
+; CHECK-P8-NEXT:    vmrglw v3, v5, v4
+; CHECK-P8-NEXT:    xxmrgld v2, v3, v2
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test16elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs2, 16(r3)
+; CHECK-P9-NEXT:    lxv vs3, 0(r3)
+; CHECK-P9-NEXT:    lxv vs0, 48(r3)
+; CHECK-P9-NEXT:    lxv vs1, 32(r3)
+; CHECK-P9-NEXT:    std r26, -48(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    xxsldwi vs4, vs3, vs3, 3
+; CHECK-P9-NEXT:    xxswapd vs5, vs3
+; CHECK-P9-NEXT:    xxsldwi vs6, vs3, vs3, 1
+; CHECK-P9-NEXT:    xxsldwi vs7, vs2, vs2, 3
+; CHECK-P9-NEXT:    xxswapd vs8, vs2
+; CHECK-P9-NEXT:    xxsldwi vs9, vs2, vs2, 1
+; CHECK-P9-NEXT:    xxsldwi vs10, vs1, vs1, 3
+; CHECK-P9-NEXT:    xxswapd vs11, vs1
+; CHECK-P9-NEXT:    xxsldwi vs12, vs1, vs1, 1
+; CHECK-P9-NEXT:    xxsldwi vs13, vs0, vs0, 3
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    xxsldwi v3, vs0, vs0, 1
+; CHECK-P9-NEXT:    xscvspdpn f3, vs3
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvspdpn f4, vs4
+; CHECK-P9-NEXT:    xscvspdpn f5, vs5
+; CHECK-P9-NEXT:    xscvspdpn f6, vs6
+; CHECK-P9-NEXT:    xscvspdpn f7, vs7
+; CHECK-P9-NEXT:    xscvspdpn f8, vs8
+; CHECK-P9-NEXT:    xscvspdpn f9, vs9
+; CHECK-P9-NEXT:    xscvspdpn f10, vs10
+; CHECK-P9-NEXT:    xscvspdpn f11, vs11
+; CHECK-P9-NEXT:    xscvspdpn f12, vs12
+; CHECK-P9-NEXT:    xscvspdpn f13, vs13
+; CHECK-P9-NEXT:    xscvspdpn v2, v2
+; CHECK-P9-NEXT:    xscvspdpn v3, v3
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    xscvdpsxws f4, f4
+; CHECK-P9-NEXT:    xscvdpsxws f5, f5
+; CHECK-P9-NEXT:    xscvdpsxws f6, f6
+; CHECK-P9-NEXT:    xscvdpsxws f7, f7
+; CHECK-P9-NEXT:    xscvdpsxws f8, f8
+; CHECK-P9-NEXT:    xscvdpsxws f9, f9
+; CHECK-P9-NEXT:    xscvdpsxws f10, f10
+; CHECK-P9-NEXT:    xscvdpsxws f11, f11
+; CHECK-P9-NEXT:    xscvdpsxws f12, f12
+; CHECK-P9-NEXT:    xscvdpsxws f13, f13
+; CHECK-P9-NEXT:    xscvdpsxws v2, v2
+; CHECK-P9-NEXT:    xscvdpsxws v3, v3
+; CHECK-P9-NEXT:    mfvsrwz r3, f3
+; CHECK-P9-NEXT:    mfvsrwz r4, f2
+; CHECK-P9-NEXT:    mfvsrwz r11, f1
+; CHECK-P9-NEXT:    mfvsrwz r12, f0
+; CHECK-P9-NEXT:    mfvsrwz r5, f4
+; CHECK-P9-NEXT:    mfvsrwz r6, f5
+; CHECK-P9-NEXT:    mfvsrwz r7, f6
+; CHECK-P9-NEXT:    mfvsrwz r8, f7
+; CHECK-P9-NEXT:    mfvsrwz r9, f8
+; CHECK-P9-NEXT:    mfvsrwz r10, f9
+; CHECK-P9-NEXT:    mfvsrwz r0, f10
+; CHECK-P9-NEXT:    mfvsrwz r30, f11
+; CHECK-P9-NEXT:    mfvsrwz r29, f12
+; CHECK-P9-NEXT:    mfvsrwz r28, f13
+; CHECK-P9-NEXT:    mfvsrwz r27, v2
+; CHECK-P9-NEXT:    mfvsrwz r26, v3
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mtvsrd f1, r4
+; CHECK-P9-NEXT:    mtvsrd f8, r11
+; CHECK-P9-NEXT:    mtvsrd f9, r12
+; CHECK-P9-NEXT:    mtvsrd f2, r5
+; CHECK-P9-NEXT:    mtvsrd f3, r6
+; CHECK-P9-NEXT:    mtvsrd f4, r7
+; CHECK-P9-NEXT:    mtvsrd f5, r8
+; CHECK-P9-NEXT:    mtvsrd f6, r9
+; CHECK-P9-NEXT:    mtvsrd f7, r10
+; CHECK-P9-NEXT:    mtvsrd f10, r0
+; CHECK-P9-NEXT:    mtvsrd f11, r30
+; CHECK-P9-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrd f12, r29
+; CHECK-P9-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrd f13, r28
+; CHECK-P9-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrd v2, r27
+; CHECK-P9-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrd v3, r26
+; CHECK-P9-NEXT:    ld r26, -48(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    xxswapd v4, vs0
+; CHECK-P9-NEXT:    xxswapd v5, vs2
+; CHECK-P9-NEXT:    xxswapd v0, vs3
+; CHECK-P9-NEXT:    xxswapd v1, vs4
+; CHECK-P9-NEXT:    xxswapd v6, vs5
+; CHECK-P9-NEXT:    xxswapd v7, vs6
+; CHECK-P9-NEXT:    xxswapd v8, vs1
+; CHECK-P9-NEXT:    xxswapd v9, vs7
+; CHECK-P9-NEXT:    xxswapd v10, vs10
+; CHECK-P9-NEXT:    xxswapd v11, vs11
+; CHECK-P9-NEXT:    xxswapd v12, vs8
+; CHECK-P9-NEXT:    xxswapd v13, vs12
+; CHECK-P9-NEXT:    xxswapd v14, vs13
+; CHECK-P9-NEXT:    xxswapd v2, v2
+; CHECK-P9-NEXT:    xxswapd v15, vs9
+; CHECK-P9-NEXT:    xxswapd v3, v3
+; CHECK-P9-NEXT:    vmrglb v5, v0, v5
+; CHECK-P9-NEXT:    vmrglb v4, v4, v1
+; CHECK-P9-NEXT:    vmrglb v0, v7, v6
+; CHECK-P9-NEXT:    vmrglb v1, v8, v9
+; CHECK-P9-NEXT:    vmrglb v6, v11, v10
+; CHECK-P9-NEXT:    vmrglb v7, v12, v13
+; CHECK-P9-NEXT:    vmrglb v2, v2, v14
+; CHECK-P9-NEXT:    vmrglb v3, v15, v3
+; CHECK-P9-NEXT:    vmrglh v4, v4, v5
+; CHECK-P9-NEXT:    vmrglh v5, v1, v0
+; CHECK-P9-NEXT:    vmrglh v0, v7, v6
+; CHECK-P9-NEXT:    vmrglh v2, v3, v2
+; CHECK-P9-NEXT:    vmrglw v3, v5, v4
+; CHECK-P9-NEXT:    vmrglw v2, v2, v0
+; CHECK-P9-NEXT:    xxmrgld v2, v2, v3
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test16elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs2, 32(r3)
+; CHECK-BE-NEXT:    lxv vs3, 48(r3)
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    lxv vs1, 16(r3)
+; CHECK-BE-NEXT:    std r26, -48(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    xxsldwi vs4, vs3, vs3, 3
+; CHECK-BE-NEXT:    xxswapd vs5, vs3
+; CHECK-BE-NEXT:    xxsldwi vs6, vs3, vs3, 1
+; CHECK-BE-NEXT:    xxsldwi vs7, vs2, vs2, 3
+; CHECK-BE-NEXT:    xxswapd vs8, vs2
+; CHECK-BE-NEXT:    xxsldwi vs9, vs2, vs2, 1
+; CHECK-BE-NEXT:    xxsldwi vs10, vs1, vs1, 3
+; CHECK-BE-NEXT:    xxswapd vs11, vs1
+; CHECK-BE-NEXT:    xxsldwi vs12, vs1, vs1, 1
+; CHECK-BE-NEXT:    xxsldwi vs13, vs0, vs0, 3
+; CHECK-BE-NEXT:    xxswapd v2, vs0
+; CHECK-BE-NEXT:    xxsldwi v3, vs0, vs0, 1
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    xscvspdpn f4, vs4
+; CHECK-BE-NEXT:    xscvspdpn f5, vs5
+; CHECK-BE-NEXT:    xscvspdpn f6, vs6
+; CHECK-BE-NEXT:    xscvspdpn f7, vs7
+; CHECK-BE-NEXT:    xscvspdpn f8, vs8
+; CHECK-BE-NEXT:    xscvspdpn f9, vs9
+; CHECK-BE-NEXT:    xscvspdpn f10, vs10
+; CHECK-BE-NEXT:    xscvspdpn f11, vs11
+; CHECK-BE-NEXT:    xscvspdpn f12, vs12
+; CHECK-BE-NEXT:    xscvspdpn f13, vs13
+; CHECK-BE-NEXT:    xscvspdpn v2, v2
+; CHECK-BE-NEXT:    xscvspdpn v3, v3
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    xscvdpsxws f5, f5
+; CHECK-BE-NEXT:    xscvdpsxws f6, f6
+; CHECK-BE-NEXT:    xscvdpsxws f7, f7
+; CHECK-BE-NEXT:    xscvdpsxws f8, f8
+; CHECK-BE-NEXT:    xscvdpsxws f9, f9
+; CHECK-BE-NEXT:    xscvdpsxws f10, f10
+; CHECK-BE-NEXT:    xscvdpsxws f11, f11
+; CHECK-BE-NEXT:    xscvdpsxws f12, f12
+; CHECK-BE-NEXT:    xscvdpsxws f13, f13
+; CHECK-BE-NEXT:    xscvdpsxws v2, v2
+; CHECK-BE-NEXT:    xscvdpsxws v3, v3
+; CHECK-BE-NEXT:    mfvsrwz r3, f3
+; CHECK-BE-NEXT:    mfvsrwz r4, f2
+; CHECK-BE-NEXT:    mfvsrwz r11, f1
+; CHECK-BE-NEXT:    mfvsrwz r12, f0
+; CHECK-BE-NEXT:    mfvsrwz r5, f4
+; CHECK-BE-NEXT:    mfvsrwz r6, f5
+; CHECK-BE-NEXT:    mfvsrwz r7, f6
+; CHECK-BE-NEXT:    mfvsrwz r8, f7
+; CHECK-BE-NEXT:    mfvsrwz r9, f8
+; CHECK-BE-NEXT:    mfvsrwz r10, f9
+; CHECK-BE-NEXT:    mfvsrwz r0, f10
+; CHECK-BE-NEXT:    mfvsrwz r30, f11
+; CHECK-BE-NEXT:    mfvsrwz r29, f12
+; CHECK-BE-NEXT:    mfvsrwz r28, f13
+; CHECK-BE-NEXT:    mfvsrwz r27, v2
+; CHECK-BE-NEXT:    mfvsrwz r26, v3
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    sldi r4, r4, 56
+; CHECK-BE-NEXT:    sldi r11, r11, 56
+; CHECK-BE-NEXT:    sldi r12, r12, 56
+; CHECK-BE-NEXT:    sldi r5, r5, 56
+; CHECK-BE-NEXT:    sldi r6, r6, 56
+; CHECK-BE-NEXT:    sldi r7, r7, 56
+; CHECK-BE-NEXT:    sldi r8, r8, 56
+; CHECK-BE-NEXT:    sldi r9, r9, 56
+; CHECK-BE-NEXT:    sldi r10, r10, 56
+; CHECK-BE-NEXT:    sldi r0, r0, 56
+; CHECK-BE-NEXT:    sldi r30, r30, 56
+; CHECK-BE-NEXT:    sldi r29, r29, 56
+; CHECK-BE-NEXT:    sldi r28, r28, 56
+; CHECK-BE-NEXT:    sldi r27, r27, 56
+; CHECK-BE-NEXT:    sldi r26, r26, 56
+; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    mtvsrd v3, r4
+; CHECK-BE-NEXT:    mtvsrd v10, r11
+; CHECK-BE-NEXT:    mtvsrd v14, r12
+; CHECK-BE-NEXT:    mtvsrd v4, r5
+; CHECK-BE-NEXT:    mtvsrd v5, r6
+; CHECK-BE-NEXT:    mtvsrd v0, r7
+; CHECK-BE-NEXT:    mtvsrd v1, r8
+; CHECK-BE-NEXT:    mtvsrd v6, r9
+; CHECK-BE-NEXT:    mtvsrd v7, r10
+; CHECK-BE-NEXT:    mtvsrd v8, r0
+; CHECK-BE-NEXT:    mtvsrd v9, r30
+; CHECK-BE-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrd v11, r29
+; CHECK-BE-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrd v12, r28
+; CHECK-BE-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrd v13, r27
+; CHECK-BE-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrd v15, r26
+; CHECK-BE-NEXT:    ld r26, -48(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    vmrghb v4, v5, v4
+; CHECK-BE-NEXT:    vmrghb v2, v2, v0
+; CHECK-BE-NEXT:    vmrghb v5, v6, v1
+; CHECK-BE-NEXT:    vmrghb v3, v3, v7
+; CHECK-BE-NEXT:    vmrghb v0, v9, v8
+; CHECK-BE-NEXT:    vmrghb v1, v10, v11
+; CHECK-BE-NEXT:    vmrghb v6, v13, v12
+; CHECK-BE-NEXT:    vmrghb v7, v14, v15
+; CHECK-BE-NEXT:    vmrghh v2, v2, v4
+; CHECK-BE-NEXT:    vmrghh v3, v3, v5
+; CHECK-BE-NEXT:    vmrghh v4, v1, v0
+; CHECK-BE-NEXT:    vmrghh v5, v7, v6
+; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    vmrghw v3, v5, v4
+; CHECK-BE-NEXT:    xxmrghd v2, v3, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <16 x float>, <16 x float>* %0, align 64
+  %1 = fptoui <16 x float> %a to <16 x i8>
+  ret <16 x i8> %1
+}
+
+define i16 @test2elt_signed(i64 %a.coerce) local_unnamed_addr #0 {
+; CHECK-P8-LABEL: test2elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xscvspdpn f0, vs0
+; CHECK-P8-NEXT:    xxsldwi vs1, v2, v2, 3
+; CHECK-P8-NEXT:    xscvspdpn f1, vs1
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    mfvsrwz r4, f0
+; CHECK-P8-NEXT:    mfvsrwz r3, f1
+; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    vmrglb v2, v3, v2
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    clrldi r3, r3, 48
+; CHECK-P8-NEXT:    sth r3, -2(r1)
+; CHECK-P8-NEXT:    lhz r3, -2(r1)
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test2elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xxsldwi vs1, v2, v2, 3
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    mfvsrwz r4, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mtvsrd f1, r4
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    xxswapd v3, vs1
+; CHECK-P9-NEXT:    addi r3, r1, -2
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    vmrglb v2, v3, v2
+; CHECK-P9-NEXT:    vsldoi v2, v2, v2, 8
+; CHECK-P9-NEXT:    stxsihx v2, 0, r3
+; CHECK-P9-NEXT:    lhz r3, -2(r1)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test2elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    mtvsrd f0, r3
+; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 1
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mfvsrwz r4, f1
+; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    addi r3, r1, -2
+; CHECK-BE-NEXT:    sldi r4, r4, 56
+; CHECK-BE-NEXT:    mtvsrd v3, r4
+; CHECK-BE-NEXT:    vmrghb v2, v2, v3
+; CHECK-BE-NEXT:    vsldoi v2, v2, v2, 10
+; CHECK-BE-NEXT:    stxsihx v2, 0, r3
+; CHECK-BE-NEXT:    lhz r3, -2(r1)
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = bitcast i64 %a.coerce to <2 x float>
+  %1 = fptosi <2 x float> %0 to <2 x i8>
+  %2 = bitcast <2 x i8> %1 to i16
+  ret i16 %2
+}
+
+define i32 @test4elt_signed(<4 x float> %a) local_unnamed_addr #1 {
+; CHECK-P8-LABEL: test4elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-P8-NEXT:    xscvspdpn f1, v2
+; CHECK-P8-NEXT:    xxswapd vs2, v2
+; CHECK-P8-NEXT:    xxsldwi vs3, v2, v2, 1
+; CHECK-P8-NEXT:    xscvspdpn f0, vs0
+; CHECK-P8-NEXT:    xscvspdpn f2, vs2
+; CHECK-P8-NEXT:    xscvspdpn f3, vs3
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    xscvdpsxws f3, f3
+; CHECK-P8-NEXT:    mfvsrwz r3, f1
+; CHECK-P8-NEXT:    mtvsrd f1, r3
+; CHECK-P8-NEXT:    mfvsrwz r3, f0
+; CHECK-P8-NEXT:    mfvsrwz r4, f2
+; CHECK-P8-NEXT:    xxswapd v4, vs1
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mfvsrwz r3, f3
+; CHECK-P8-NEXT:    mtvsrd f2, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    mtvsrd f3, r3
+; CHECK-P8-NEXT:    xxswapd v3, vs2
+; CHECK-P8-NEXT:    xxswapd v5, vs3
+; CHECK-P8-NEXT:    vmrglb v2, v3, v2
+; CHECK-P8-NEXT:    vmrglb v3, v4, v5
+; CHECK-P8-NEXT:    vmrglh v2, v3, v2
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    mfvsrwz r3, f0
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test4elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-P9-NEXT:    xxswapd vs1, v2
+; CHECK-P9-NEXT:    xxsldwi vs2, v2, v2, 1
+; CHECK-P9-NEXT:    xscvspdpn f3, v2
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    mfvsrwz r5, f3
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mfvsrwz r4, f1
+; CHECK-P9-NEXT:    mfvsrwz r6, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r5
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mtvsrd f1, r4
+; CHECK-P9-NEXT:    mtvsrd f3, r6
+; CHECK-P9-NEXT:    xxswapd v4, vs2
+; CHECK-P9-NEXT:    li r3, 0
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    xxswapd v3, vs1
+; CHECK-P9-NEXT:    xxswapd v5, vs3
+; CHECK-P9-NEXT:    vmrglb v2, v3, v2
+; CHECK-P9-NEXT:    vmrglb v3, v4, v5
+; CHECK-P9-NEXT:    vmrglh v2, v3, v2
+; CHECK-P9-NEXT:    vextuwrx r3, r3, v2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test4elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-BE-NEXT:    xxswapd vs1, v2
+; CHECK-BE-NEXT:    xxsldwi vs2, v2, v2, 1
+; CHECK-BE-NEXT:    xscvspdpn f3, v2
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mfvsrwz r5, f3
+; CHECK-BE-NEXT:    sldi r5, r5, 56
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mfvsrwz r4, f1
+; CHECK-BE-NEXT:    mfvsrwz r6, f2
+; CHECK-BE-NEXT:    mtvsrd v4, r5
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    sldi r4, r4, 56
+; CHECK-BE-NEXT:    sldi r6, r6, 56
+; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    mtvsrd v3, r4
+; CHECK-BE-NEXT:    mtvsrd v5, r6
+; CHECK-BE-NEXT:    li r3, 0
+; CHECK-BE-NEXT:    vmrghb v2, v3, v2
+; CHECK-BE-NEXT:    vmrghb v3, v4, v5
+; CHECK-BE-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-NEXT:    vextuwlx r3, r3, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = fptosi <4 x float> %a to <4 x i8>
+  %1 = bitcast <4 x i8> %0 to i32
+  ret i32 %1
+}
+
+define i64 @test8elt_signed(<8 x float>* nocapture readonly) local_unnamed_addr #2 {
+; CHECK-P8-LABEL: test8elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    lvx v5, r3, r4
+; CHECK-P8-NEXT:    xxswapd vs1, v2
+; CHECK-P8-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-P8-NEXT:    xxsldwi vs2, v5, v5, 3
+; CHECK-P8-NEXT:    xscvspdpn f4, v5
+; CHECK-P8-NEXT:    xxswapd vs3, v5
+; CHECK-P8-NEXT:    xxsldwi vs5, v5, v5, 1
+; CHECK-P8-NEXT:    xscvspdpn f1, vs1
+; CHECK-P8-NEXT:    xscvspdpn f0, vs0
+; CHECK-P8-NEXT:    xscvspdpn f2, vs2
+; CHECK-P8-NEXT:    xscvspdpn f3, vs3
+; CHECK-P8-NEXT:    xscvspdpn f5, vs5
+; CHECK-P8-NEXT:    xscvdpsxws f4, f4
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    xscvdpsxws f3, f3
+; CHECK-P8-NEXT:    xscvdpsxws f5, f5
+; CHECK-P8-NEXT:    mfvsrwz r4, f4
+; CHECK-P8-NEXT:    mfvsrwz r6, f1
+; CHECK-P8-NEXT:    mfvsrwz r5, f0
+; CHECK-P8-NEXT:    mtvsrd f1, r6
+; CHECK-P8-NEXT:    mtvsrd f0, r5
+; CHECK-P8-NEXT:    xxswapd v4, vs1
+; CHECK-P8-NEXT:    xxsldwi vs1, v2, v2, 1
+; CHECK-P8-NEXT:    xxswapd v3, vs0
+; CHECK-P8-NEXT:    xscvspdpn f0, v2
+; CHECK-P8-NEXT:    mtvsrd f4, r4
+; CHECK-P8-NEXT:    xscvspdpn f1, vs1
+; CHECK-P8-NEXT:    mfvsrwz r4, f2
+; CHECK-P8-NEXT:    xxswapd v1, vs4
+; CHECK-P8-NEXT:    vmrglb v2, v4, v3
+; CHECK-P8-NEXT:    mtvsrd f2, r4
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mfvsrwz r4, f5
+; CHECK-P8-NEXT:    xxswapd v5, vs2
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    mfvsrwz r3, f0
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mfvsrwz r3, f1
+; CHECK-P8-NEXT:    xxswapd v3, vs0
+; CHECK-P8-NEXT:    mtvsrd f1, r3
+; CHECK-P8-NEXT:    mfvsrwz r3, f3
+; CHECK-P8-NEXT:    mtvsrd f3, r4
+; CHECK-P8-NEXT:    xxswapd v4, vs1
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    xxswapd v6, vs3
+; CHECK-P8-NEXT:    xxswapd v0, vs0
+; CHECK-P8-NEXT:    vmrglb v3, v3, v4
+; CHECK-P8-NEXT:    vmrglb v4, v0, v5
+; CHECK-P8-NEXT:    vmrglb v5, v1, v6
+; CHECK-P8-NEXT:    vmrglh v2, v3, v2
+; CHECK-P8-NEXT:    vmrglh v3, v5, v4
+; CHECK-P8-NEXT:    vmrglw v2, v3, v2
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test8elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs0, 16(r3)
+; CHECK-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-P9-NEXT:    xxsldwi vs2, vs1, vs1, 3
+; CHECK-P9-NEXT:    xxswapd vs3, vs1
+; CHECK-P9-NEXT:    xxsldwi vs4, vs1, vs1, 1
+; CHECK-P9-NEXT:    xxsldwi vs5, vs0, vs0, 3
+; CHECK-P9-NEXT:    xxswapd vs6, vs0
+; CHECK-P9-NEXT:    xxsldwi vs7, vs0, vs0, 1
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xscvspdpn f3, vs3
+; CHECK-P9-NEXT:    xscvspdpn f4, vs4
+; CHECK-P9-NEXT:    xscvspdpn f5, vs5
+; CHECK-P9-NEXT:    xscvspdpn f6, vs6
+; CHECK-P9-NEXT:    xscvspdpn f7, vs7
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    xscvdpsxws f4, f4
+; CHECK-P9-NEXT:    xscvdpsxws f5, f5
+; CHECK-P9-NEXT:    xscvdpsxws f6, f6
+; CHECK-P9-NEXT:    xscvdpsxws f7, f7
+; CHECK-P9-NEXT:    mfvsrwz r5, f1
+; CHECK-P9-NEXT:    mfvsrwz r9, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f2
+; CHECK-P9-NEXT:    mfvsrwz r4, f3
+; CHECK-P9-NEXT:    mfvsrwz r6, f4
+; CHECK-P9-NEXT:    mfvsrwz r7, f5
+; CHECK-P9-NEXT:    mfvsrwz r8, f6
+; CHECK-P9-NEXT:    mfvsrwz r10, f7
+; CHECK-P9-NEXT:    mtvsrd f2, r5
+; CHECK-P9-NEXT:    mtvsrd f6, r9
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mtvsrd f1, r4
+; CHECK-P9-NEXT:    mtvsrd f3, r6
+; CHECK-P9-NEXT:    mtvsrd f4, r7
+; CHECK-P9-NEXT:    mtvsrd f5, r8
+; CHECK-P9-NEXT:    mtvsrd f7, r10
+; CHECK-P9-NEXT:    xxswapd v4, vs2
+; CHECK-P9-NEXT:    xxswapd v6, vs6
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    xxswapd v3, vs1
+; CHECK-P9-NEXT:    xxswapd v5, vs3
+; CHECK-P9-NEXT:    xxswapd v0, vs4
+; CHECK-P9-NEXT:    xxswapd v1, vs5
+; CHECK-P9-NEXT:    xxswapd v7, vs7
+; CHECK-P9-NEXT:    vmrglb v2, v3, v2
+; CHECK-P9-NEXT:    vmrglb v3, v4, v5
+; CHECK-P9-NEXT:    vmrglb v4, v1, v0
+; CHECK-P9-NEXT:    vmrglb v5, v6, v7
+; CHECK-P9-NEXT:    vmrglh v2, v3, v2
+; CHECK-P9-NEXT:    vmrglh v3, v5, v4
+; CHECK-P9-NEXT:    vmrglw v2, v3, v2
+; CHECK-P9-NEXT:    mfvsrld r3, v2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test8elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    lxv vs1, 16(r3)
+; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
+; CHECK-BE-NEXT:    xxswapd vs3, vs1
+; CHECK-BE-NEXT:    xxsldwi vs4, vs1, vs1, 1
+; CHECK-BE-NEXT:    xxsldwi vs5, vs0, vs0, 3
+; CHECK-BE-NEXT:    xxswapd vs6, vs0
+; CHECK-BE-NEXT:    xxsldwi vs7, vs0, vs0, 1
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    xscvspdpn f4, vs4
+; CHECK-BE-NEXT:    xscvspdpn f5, vs5
+; CHECK-BE-NEXT:    xscvspdpn f6, vs6
+; CHECK-BE-NEXT:    xscvspdpn f7, vs7
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    xscvdpsxws f5, f5
+; CHECK-BE-NEXT:    xscvdpsxws f6, f6
+; CHECK-BE-NEXT:    xscvdpsxws f7, f7
+; CHECK-BE-NEXT:    mfvsrwz r5, f1
+; CHECK-BE-NEXT:    mfvsrwz r9, f0
+; CHECK-BE-NEXT:    sldi r5, r5, 56
+; CHECK-BE-NEXT:    sldi r9, r9, 56
+; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    mfvsrwz r4, f3
+; CHECK-BE-NEXT:    mfvsrwz r6, f4
+; CHECK-BE-NEXT:    mfvsrwz r7, f5
+; CHECK-BE-NEXT:    mfvsrwz r8, f6
+; CHECK-BE-NEXT:    mfvsrwz r10, f7
+; CHECK-BE-NEXT:    mtvsrd v4, r5
+; CHECK-BE-NEXT:    mtvsrd v6, r9
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    sldi r4, r4, 56
+; CHECK-BE-NEXT:    sldi r6, r6, 56
+; CHECK-BE-NEXT:    sldi r7, r7, 56
+; CHECK-BE-NEXT:    sldi r8, r8, 56
+; CHECK-BE-NEXT:    sldi r10, r10, 56
+; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    mtvsrd v3, r4
+; CHECK-BE-NEXT:    mtvsrd v5, r6
+; CHECK-BE-NEXT:    mtvsrd v0, r7
+; CHECK-BE-NEXT:    mtvsrd v1, r8
+; CHECK-BE-NEXT:    mtvsrd v7, r10
+; CHECK-BE-NEXT:    vmrghb v2, v3, v2
+; CHECK-BE-NEXT:    vmrghb v3, v4, v5
+; CHECK-BE-NEXT:    vmrghb v4, v1, v0
+; CHECK-BE-NEXT:    vmrghb v5, v6, v7
+; CHECK-BE-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-NEXT:    vmrghh v3, v5, v4
+; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    mfvsrd r3, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <8 x float>, <8 x float>* %0, align 32
+  %1 = fptosi <8 x float> %a to <8 x i8>
+  %2 = bitcast <8 x i8> %1 to i64
+  ret i64 %2
+}
+
+define <16 x i8> @test16elt_signed(<16 x float>* nocapture readonly) local_unnamed_addr #3 {
+; CHECK-P8-LABEL: test16elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    lvx v3, r3, r4
+; CHECK-P8-NEXT:    li r4, 32
+; CHECK-P8-NEXT:    xscvspdpn f2, v2
+; CHECK-P8-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-P8-NEXT:    xscvspdpn f4, v3
+; CHECK-P8-NEXT:    xxswapd vs1, v2
+; CHECK-P8-NEXT:    xxsldwi vs3, v2, v2, 1
+; CHECK-P8-NEXT:    xxsldwi vs5, v3, v3, 3
+; CHECK-P8-NEXT:    lvx v2, r3, r4
+; CHECK-P8-NEXT:    xscvspdpn f0, vs0
+; CHECK-P8-NEXT:    xxswapd vs6, v3
+; CHECK-P8-NEXT:    xscvspdpn f1, vs1
+; CHECK-P8-NEXT:    xxsldwi vs7, v3, v3, 1
+; CHECK-P8-NEXT:    xscvspdpn f3, vs3
+; CHECK-P8-NEXT:    xxsldwi vs8, v2, v2, 3
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    xxswapd vs9, v2
+; CHECK-P8-NEXT:    xscvdpsxws f4, f4
+; CHECK-P8-NEXT:    xscvspdpn f5, vs5
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xscvspdpn f6, vs6
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    mfvsrwz r4, f2
+; CHECK-P8-NEXT:    xscvspdpn f7, vs7
+; CHECK-P8-NEXT:    mtvsrd f2, r4
+; CHECK-P8-NEXT:    mfvsrwz r4, f4
+; CHECK-P8-NEXT:    xscvdpsxws f3, f3
+; CHECK-P8-NEXT:    xscvspdpn f8, vs8
+; CHECK-P8-NEXT:    mtvsrd f4, r4
+; CHECK-P8-NEXT:    mfvsrwz r4, f0
+; CHECK-P8-NEXT:    xscvdpsxws f0, f5
+; CHECK-P8-NEXT:    xxswapd v0, vs4
+; CHECK-P8-NEXT:    xscvspdpn f9, vs9
+; CHECK-P8-NEXT:    mtvsrd f5, r4
+; CHECK-P8-NEXT:    mfvsrwz r4, f1
+; CHECK-P8-NEXT:    xscvdpsxws f1, f6
+; CHECK-P8-NEXT:    xxswapd v3, vs5
+; CHECK-P8-NEXT:    mtvsrd f6, r4
+; CHECK-P8-NEXT:    mfvsrwz r4, f3
+; CHECK-P8-NEXT:    xscvdpsxws f3, f7
+; CHECK-P8-NEXT:    xxswapd v4, vs6
+; CHECK-P8-NEXT:    mtvsrd f7, r4
+; CHECK-P8-NEXT:    mfvsrwz r4, f0
+; CHECK-P8-NEXT:    xscvdpsxws f0, f8
+; CHECK-P8-NEXT:    xxswapd v5, vs7
+; CHECK-P8-NEXT:    mtvsrd f8, r4
+; CHECK-P8-NEXT:    mfvsrwz r4, f1
+; CHECK-P8-NEXT:    xscvdpsxws f1, f9
+; CHECK-P8-NEXT:    xxswapd v1, vs8
+; CHECK-P8-NEXT:    mtvsrd f9, r4
+; CHECK-P8-NEXT:    mfvsrwz r4, f3
+; CHECK-P8-NEXT:    vmrglb v3, v4, v3
+; CHECK-P8-NEXT:    xxswapd v4, vs2
+; CHECK-P8-NEXT:    mtvsrd f3, r4
+; CHECK-P8-NEXT:    xxswapd v6, vs9
+; CHECK-P8-NEXT:    mfvsrwz r4, f0
+; CHECK-P8-NEXT:    xscvspdpn f0, v2
+; CHECK-P8-NEXT:    xxswapd v7, vs3
+; CHECK-P8-NEXT:    mtvsrd f5, r4
+; CHECK-P8-NEXT:    mfvsrwz r4, f1
+; CHECK-P8-NEXT:    vmrglb v4, v4, v5
+; CHECK-P8-NEXT:    xxswapd v5, vs5
+; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    lvx v9, r3, r4
+; CHECK-P8-NEXT:    vmrglb v1, v6, v1
+; CHECK-P8-NEXT:    xxswapd v8, vs1
+; CHECK-P8-NEXT:    xxsldwi vs1, v2, v2, 1
+; CHECK-P8-NEXT:    xxsldwi vs2, v9, v9, 3
+; CHECK-P8-NEXT:    xscvspdpn f4, v9
+; CHECK-P8-NEXT:    xxswapd vs3, v9
+; CHECK-P8-NEXT:    xxsldwi vs5, v9, v9, 1
+; CHECK-P8-NEXT:    xscvspdpn f1, vs1
+; CHECK-P8-NEXT:    xscvspdpn f2, vs2
+; CHECK-P8-NEXT:    xscvspdpn f3, vs3
+; CHECK-P8-NEXT:    xscvspdpn f5, vs5
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xscvdpsxws f4, f4
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    xscvdpsxws f3, f3
+; CHECK-P8-NEXT:    xscvdpsxws f5, f5
+; CHECK-P8-NEXT:    mfvsrwz r3, f0
+; CHECK-P8-NEXT:    mfvsrwz r4, f4
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mtvsrd f4, r4
+; CHECK-P8-NEXT:    mfvsrwz r3, f1
+; CHECK-P8-NEXT:    mfvsrwz r4, f2
+; CHECK-P8-NEXT:    xxswapd v9, vs4
+; CHECK-P8-NEXT:    mtvsrd f1, r3
+; CHECK-P8-NEXT:    mfvsrwz r3, f3
+; CHECK-P8-NEXT:    mtvsrd f2, r4
+; CHECK-P8-NEXT:    xxswapd v6, vs1
+; CHECK-P8-NEXT:    mfvsrwz r4, f5
+; CHECK-P8-NEXT:    vmrglb v2, v0, v7
+; CHECK-P8-NEXT:    xxswapd v0, vs0
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    xxswapd v7, vs2
+; CHECK-P8-NEXT:    mtvsrd f3, r4
+; CHECK-P8-NEXT:    vmrglb v5, v8, v5
+; CHECK-P8-NEXT:    xxswapd v8, vs0
+; CHECK-P8-NEXT:    xxswapd v10, vs3
+; CHECK-P8-NEXT:    vmrglb v0, v0, v6
+; CHECK-P8-NEXT:    vmrglh v3, v4, v3
+; CHECK-P8-NEXT:    vmrglb v6, v8, v7
+; CHECK-P8-NEXT:    vmrglb v7, v9, v10
+; CHECK-P8-NEXT:    vmrglh v2, v2, v1
+; CHECK-P8-NEXT:    vmrglh v4, v0, v5
+; CHECK-P8-NEXT:    vmrglh v5, v7, v6
+; CHECK-P8-NEXT:    vmrglw v2, v2, v3
+; CHECK-P8-NEXT:    vmrglw v3, v5, v4
+; CHECK-P8-NEXT:    xxmrgld v2, v3, v2
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test16elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs2, 16(r3)
+; CHECK-P9-NEXT:    lxv vs3, 0(r3)
+; CHECK-P9-NEXT:    lxv vs0, 48(r3)
+; CHECK-P9-NEXT:    lxv vs1, 32(r3)
+; CHECK-P9-NEXT:    std r26, -48(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    xxsldwi vs4, vs3, vs3, 3
+; CHECK-P9-NEXT:    xxswapd vs5, vs3
+; CHECK-P9-NEXT:    xxsldwi vs6, vs3, vs3, 1
+; CHECK-P9-NEXT:    xxsldwi vs7, vs2, vs2, 3
+; CHECK-P9-NEXT:    xxswapd vs8, vs2
+; CHECK-P9-NEXT:    xxsldwi vs9, vs2, vs2, 1
+; CHECK-P9-NEXT:    xxsldwi vs10, vs1, vs1, 3
+; CHECK-P9-NEXT:    xxswapd vs11, vs1
+; CHECK-P9-NEXT:    xxsldwi vs12, vs1, vs1, 1
+; CHECK-P9-NEXT:    xxsldwi vs13, vs0, vs0, 3
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    xxsldwi v3, vs0, vs0, 1
+; CHECK-P9-NEXT:    xscvspdpn f3, vs3
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvspdpn f4, vs4
+; CHECK-P9-NEXT:    xscvspdpn f5, vs5
+; CHECK-P9-NEXT:    xscvspdpn f6, vs6
+; CHECK-P9-NEXT:    xscvspdpn f7, vs7
+; CHECK-P9-NEXT:    xscvspdpn f8, vs8
+; CHECK-P9-NEXT:    xscvspdpn f9, vs9
+; CHECK-P9-NEXT:    xscvspdpn f10, vs10
+; CHECK-P9-NEXT:    xscvspdpn f11, vs11
+; CHECK-P9-NEXT:    xscvspdpn f12, vs12
+; CHECK-P9-NEXT:    xscvspdpn f13, vs13
+; CHECK-P9-NEXT:    xscvspdpn v2, v2
+; CHECK-P9-NEXT:    xscvspdpn v3, v3
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    xscvdpsxws f4, f4
+; CHECK-P9-NEXT:    xscvdpsxws f5, f5
+; CHECK-P9-NEXT:    xscvdpsxws f6, f6
+; CHECK-P9-NEXT:    xscvdpsxws f7, f7
+; CHECK-P9-NEXT:    xscvdpsxws f8, f8
+; CHECK-P9-NEXT:    xscvdpsxws f9, f9
+; CHECK-P9-NEXT:    xscvdpsxws f10, f10
+; CHECK-P9-NEXT:    xscvdpsxws f11, f11
+; CHECK-P9-NEXT:    xscvdpsxws f12, f12
+; CHECK-P9-NEXT:    xscvdpsxws f13, f13
+; CHECK-P9-NEXT:    xscvdpsxws v2, v2
+; CHECK-P9-NEXT:    xscvdpsxws v3, v3
+; CHECK-P9-NEXT:    mfvsrwz r3, f3
+; CHECK-P9-NEXT:    mfvsrwz r4, f2
+; CHECK-P9-NEXT:    mfvsrwz r11, f1
+; CHECK-P9-NEXT:    mfvsrwz r12, f0
+; CHECK-P9-NEXT:    mfvsrwz r5, f4
+; CHECK-P9-NEXT:    mfvsrwz r6, f5
+; CHECK-P9-NEXT:    mfvsrwz r7, f6
+; CHECK-P9-NEXT:    mfvsrwz r8, f7
+; CHECK-P9-NEXT:    mfvsrwz r9, f8
+; CHECK-P9-NEXT:    mfvsrwz r10, f9
+; CHECK-P9-NEXT:    mfvsrwz r0, f10
+; CHECK-P9-NEXT:    mfvsrwz r30, f11
+; CHECK-P9-NEXT:    mfvsrwz r29, f12
+; CHECK-P9-NEXT:    mfvsrwz r28, f13
+; CHECK-P9-NEXT:    mfvsrwz r27, v2
+; CHECK-P9-NEXT:    mfvsrwz r26, v3
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mtvsrd f1, r4
+; CHECK-P9-NEXT:    mtvsrd f8, r11
+; CHECK-P9-NEXT:    mtvsrd f9, r12
+; CHECK-P9-NEXT:    mtvsrd f2, r5
+; CHECK-P9-NEXT:    mtvsrd f3, r6
+; CHECK-P9-NEXT:    mtvsrd f4, r7
+; CHECK-P9-NEXT:    mtvsrd f5, r8
+; CHECK-P9-NEXT:    mtvsrd f6, r9
+; CHECK-P9-NEXT:    mtvsrd f7, r10
+; CHECK-P9-NEXT:    mtvsrd f10, r0
+; CHECK-P9-NEXT:    mtvsrd f11, r30
+; CHECK-P9-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrd f12, r29
+; CHECK-P9-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrd f13, r28
+; CHECK-P9-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrd v2, r27
+; CHECK-P9-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrd v3, r26
+; CHECK-P9-NEXT:    ld r26, -48(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    xxswapd v4, vs0
+; CHECK-P9-NEXT:    xxswapd v5, vs2
+; CHECK-P9-NEXT:    xxswapd v0, vs3
+; CHECK-P9-NEXT:    xxswapd v1, vs4
+; CHECK-P9-NEXT:    xxswapd v6, vs5
+; CHECK-P9-NEXT:    xxswapd v7, vs6
+; CHECK-P9-NEXT:    xxswapd v8, vs1
+; CHECK-P9-NEXT:    xxswapd v9, vs7
+; CHECK-P9-NEXT:    xxswapd v10, vs10
+; CHECK-P9-NEXT:    xxswapd v11, vs11
+; CHECK-P9-NEXT:    xxswapd v12, vs8
+; CHECK-P9-NEXT:    xxswapd v13, vs12
+; CHECK-P9-NEXT:    xxswapd v14, vs13
+; CHECK-P9-NEXT:    xxswapd v2, v2
+; CHECK-P9-NEXT:    xxswapd v15, vs9
+; CHECK-P9-NEXT:    xxswapd v3, v3
+; CHECK-P9-NEXT:    vmrglb v5, v0, v5
+; CHECK-P9-NEXT:    vmrglb v4, v4, v1
+; CHECK-P9-NEXT:    vmrglb v0, v7, v6
+; CHECK-P9-NEXT:    vmrglb v1, v8, v9
+; CHECK-P9-NEXT:    vmrglb v6, v11, v10
+; CHECK-P9-NEXT:    vmrglb v7, v12, v13
+; CHECK-P9-NEXT:    vmrglb v2, v2, v14
+; CHECK-P9-NEXT:    vmrglb v3, v15, v3
+; CHECK-P9-NEXT:    vmrglh v4, v4, v5
+; CHECK-P9-NEXT:    vmrglh v5, v1, v0
+; CHECK-P9-NEXT:    vmrglh v0, v7, v6
+; CHECK-P9-NEXT:    vmrglh v2, v3, v2
+; CHECK-P9-NEXT:    vmrglw v3, v5, v4
+; CHECK-P9-NEXT:    vmrglw v2, v2, v0
+; CHECK-P9-NEXT:    xxmrgld v2, v2, v3
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test16elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs2, 32(r3)
+; CHECK-BE-NEXT:    lxv vs3, 48(r3)
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    lxv vs1, 16(r3)
+; CHECK-BE-NEXT:    std r26, -48(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    xxsldwi vs4, vs3, vs3, 3
+; CHECK-BE-NEXT:    xxswapd vs5, vs3
+; CHECK-BE-NEXT:    xxsldwi vs6, vs3, vs3, 1
+; CHECK-BE-NEXT:    xxsldwi vs7, vs2, vs2, 3
+; CHECK-BE-NEXT:    xxswapd vs8, vs2
+; CHECK-BE-NEXT:    xxsldwi vs9, vs2, vs2, 1
+; CHECK-BE-NEXT:    xxsldwi vs10, vs1, vs1, 3
+; CHECK-BE-NEXT:    xxswapd vs11, vs1
+; CHECK-BE-NEXT:    xxsldwi vs12, vs1, vs1, 1
+; CHECK-BE-NEXT:    xxsldwi vs13, vs0, vs0, 3
+; CHECK-BE-NEXT:    xxswapd v2, vs0
+; CHECK-BE-NEXT:    xxsldwi v3, vs0, vs0, 1
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    xscvspdpn f4, vs4
+; CHECK-BE-NEXT:    xscvspdpn f5, vs5
+; CHECK-BE-NEXT:    xscvspdpn f6, vs6
+; CHECK-BE-NEXT:    xscvspdpn f7, vs7
+; CHECK-BE-NEXT:    xscvspdpn f8, vs8
+; CHECK-BE-NEXT:    xscvspdpn f9, vs9
+; CHECK-BE-NEXT:    xscvspdpn f10, vs10
+; CHECK-BE-NEXT:    xscvspdpn f11, vs11
+; CHECK-BE-NEXT:    xscvspdpn f12, vs12
+; CHECK-BE-NEXT:    xscvspdpn f13, vs13
+; CHECK-BE-NEXT:    xscvspdpn v2, v2
+; CHECK-BE-NEXT:    xscvspdpn v3, v3
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    xscvdpsxws f5, f5
+; CHECK-BE-NEXT:    xscvdpsxws f6, f6
+; CHECK-BE-NEXT:    xscvdpsxws f7, f7
+; CHECK-BE-NEXT:    xscvdpsxws f8, f8
+; CHECK-BE-NEXT:    xscvdpsxws f9, f9
+; CHECK-BE-NEXT:    xscvdpsxws f10, f10
+; CHECK-BE-NEXT:    xscvdpsxws f11, f11
+; CHECK-BE-NEXT:    xscvdpsxws f12, f12
+; CHECK-BE-NEXT:    xscvdpsxws f13, f13
+; CHECK-BE-NEXT:    xscvdpsxws v2, v2
+; CHECK-BE-NEXT:    xscvdpsxws v3, v3
+; CHECK-BE-NEXT:    mfvsrwz r3, f3
+; CHECK-BE-NEXT:    mfvsrwz r4, f2
+; CHECK-BE-NEXT:    mfvsrwz r11, f1
+; CHECK-BE-NEXT:    mfvsrwz r12, f0
+; CHECK-BE-NEXT:    mfvsrwz r5, f4
+; CHECK-BE-NEXT:    mfvsrwz r6, f5
+; CHECK-BE-NEXT:    mfvsrwz r7, f6
+; CHECK-BE-NEXT:    mfvsrwz r8, f7
+; CHECK-BE-NEXT:    mfvsrwz r9, f8
+; CHECK-BE-NEXT:    mfvsrwz r10, f9
+; CHECK-BE-NEXT:    mfvsrwz r0, f10
+; CHECK-BE-NEXT:    mfvsrwz r30, f11
+; CHECK-BE-NEXT:    mfvsrwz r29, f12
+; CHECK-BE-NEXT:    mfvsrwz r28, f13
+; CHECK-BE-NEXT:    mfvsrwz r27, v2
+; CHECK-BE-NEXT:    mfvsrwz r26, v3
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    sldi r4, r4, 56
+; CHECK-BE-NEXT:    sldi r11, r11, 56
+; CHECK-BE-NEXT:    sldi r12, r12, 56
+; CHECK-BE-NEXT:    sldi r5, r5, 56
+; CHECK-BE-NEXT:    sldi r6, r6, 56
+; CHECK-BE-NEXT:    sldi r7, r7, 56
+; CHECK-BE-NEXT:    sldi r8, r8, 56
+; CHECK-BE-NEXT:    sldi r9, r9, 56
+; CHECK-BE-NEXT:    sldi r10, r10, 56
+; CHECK-BE-NEXT:    sldi r0, r0, 56
+; CHECK-BE-NEXT:    sldi r30, r30, 56
+; CHECK-BE-NEXT:    sldi r29, r29, 56
+; CHECK-BE-NEXT:    sldi r28, r28, 56
+; CHECK-BE-NEXT:    sldi r27, r27, 56
+; CHECK-BE-NEXT:    sldi r26, r26, 56
+; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    mtvsrd v3, r4
+; CHECK-BE-NEXT:    mtvsrd v10, r11
+; CHECK-BE-NEXT:    mtvsrd v14, r12
+; CHECK-BE-NEXT:    mtvsrd v4, r5
+; CHECK-BE-NEXT:    mtvsrd v5, r6
+; CHECK-BE-NEXT:    mtvsrd v0, r7
+; CHECK-BE-NEXT:    mtvsrd v1, r8
+; CHECK-BE-NEXT:    mtvsrd v6, r9
+; CHECK-BE-NEXT:    mtvsrd v7, r10
+; CHECK-BE-NEXT:    mtvsrd v8, r0
+; CHECK-BE-NEXT:    mtvsrd v9, r30
+; CHECK-BE-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrd v11, r29
+; CHECK-BE-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrd v12, r28
+; CHECK-BE-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrd v13, r27
+; CHECK-BE-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrd v15, r26
+; CHECK-BE-NEXT:    ld r26, -48(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    vmrghb v4, v5, v4
+; CHECK-BE-NEXT:    vmrghb v2, v2, v0
+; CHECK-BE-NEXT:    vmrghb v5, v6, v1
+; CHECK-BE-NEXT:    vmrghb v3, v3, v7
+; CHECK-BE-NEXT:    vmrghb v0, v9, v8
+; CHECK-BE-NEXT:    vmrghb v1, v10, v11
+; CHECK-BE-NEXT:    vmrghb v6, v13, v12
+; CHECK-BE-NEXT:    vmrghb v7, v14, v15
+; CHECK-BE-NEXT:    vmrghh v2, v2, v4
+; CHECK-BE-NEXT:    vmrghh v3, v3, v5
+; CHECK-BE-NEXT:    vmrghh v4, v1, v0
+; CHECK-BE-NEXT:    vmrghh v5, v7, v6
+; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    vmrghw v3, v5, v4
+; CHECK-BE-NEXT:    xxmrghd v2, v3, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <16 x float>, <16 x float>* %0, align 64
+  %1 = fptosi <16 x float> %a to <16 x i8>
+  ret <16 x i8> %1
+}

Added: llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll?rev=347090&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll Fri Nov 16 12:24:10 2018
@@ -0,0 +1,1304 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:     -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-P8
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-P9
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-BE
+
+define i32 @test2elt(<2 x double> %a) local_unnamed_addr #0 {
+; CHECK-P8-LABEL: test2elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    xscvdpsxws f1, v2
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mfvsrwz r3, f1
+; CHECK-P8-NEXT:    mfvsrwz r4, f0
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    vmrglh v2, v2, v3
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    mfvsrwz r3, f0
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test2elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    xxswapd vs0, v2
+; CHECK-P9-NEXT:    xscvdpsxws f1, v2
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mfvsrwz r4, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    li r3, 0
+; CHECK-P9-NEXT:    mtvsrd f1, r4
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    xxswapd v3, vs1
+; CHECK-P9-NEXT:    vmrglh v2, v2, v3
+; CHECK-P9-NEXT:    vextuwrx r3, r3, v2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test2elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xxswapd vs0, v2
+; CHECK-BE-NEXT:    xscvdpsxws f1, v2
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    mfvsrwz r4, f0
+; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    li r3, 0
+; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    mtvsrd v3, r4
+; CHECK-BE-NEXT:    vmrghh v2, v2, v3
+; CHECK-BE-NEXT:    vextuwlx r3, r3, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = fptoui <2 x double> %a to <2 x i16>
+  %1 = bitcast <2 x i16> %0 to i32
+  ret i32 %1
+}
+
+define i64 @test4elt(<4 x double>* nocapture readonly) local_unnamed_addr #1 {
+; CHECK-P8-LABEL: test4elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    xscvdpsxws f2, f0
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xscvdpsxws f3, f1
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    mfvsrwz r3, f2
+; CHECK-P8-NEXT:    mfvsrwz r4, f3
+; CHECK-P8-NEXT:    mtvsrd f2, r3
+; CHECK-P8-NEXT:    mtvsrd f3, r4
+; CHECK-P8-NEXT:    mfvsrwz r3, f0
+; CHECK-P8-NEXT:    xxswapd v2, vs2
+; CHECK-P8-NEXT:    mfvsrwz r4, f1
+; CHECK-P8-NEXT:    xxswapd v4, vs3
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    xxswapd v3, vs0
+; CHECK-P8-NEXT:    xxswapd v5, vs1
+; CHECK-P8-NEXT:    vmrglh v2, v3, v2
+; CHECK-P8-NEXT:    vmrglh v3, v5, v4
+; CHECK-P8-NEXT:    vmrglw v2, v3, v2
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test4elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs0, 16(r3)
+; CHECK-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-P9-NEXT:    xxswapd vs2, vs1
+; CHECK-P9-NEXT:    xxswapd vs3, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mfvsrwz r5, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mfvsrwz r4, f2
+; CHECK-P9-NEXT:    mfvsrwz r6, f3
+; CHECK-P9-NEXT:    mtvsrd f2, r5
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    mtvsrd f1, r4
+; CHECK-P9-NEXT:    mtvsrd f3, r6
+; CHECK-P9-NEXT:    xxswapd v4, vs2
+; CHECK-P9-NEXT:    xxswapd v3, vs1
+; CHECK-P9-NEXT:    xxswapd v5, vs3
+; CHECK-P9-NEXT:    vmrglh v2, v2, v3
+; CHECK-P9-NEXT:    vmrglh v3, v4, v5
+; CHECK-P9-NEXT:    vmrglw v2, v3, v2
+; CHECK-P9-NEXT:    mfvsrld r3, v2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test4elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    lxv vs1, 16(r3)
+; CHECK-BE-NEXT:    xxswapd vs2, vs1
+; CHECK-BE-NEXT:    xxswapd vs3, vs0
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mfvsrwz r5, f0
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    mfvsrwz r4, f2
+; CHECK-BE-NEXT:    mfvsrwz r6, f3
+; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    mtvsrd v4, r5
+; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    sldi r6, r6, 48
+; CHECK-BE-NEXT:    mtvsrd v3, r4
+; CHECK-BE-NEXT:    mtvsrd v5, r6
+; CHECK-BE-NEXT:    vmrghh v2, v2, v3
+; CHECK-BE-NEXT:    vmrghh v3, v4, v5
+; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    mfvsrd r3, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <4 x double>, <4 x double>* %0, align 32
+  %1 = fptoui <4 x double> %a to <4 x i16>
+  %2 = bitcast <4 x i16> %1 to i64
+  ret i64 %2
+}
+
+define <8 x i16> @test8elt(<8 x double>* nocapture readonly) local_unnamed_addr #2 {
+; CHECK-P8-LABEL: test8elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    li r4, 32
+; CHECK-P8-NEXT:    lxvd2x vs2, r3, r4
+; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    lxvd2x vs3, r3, r4
+; CHECK-P8-NEXT:    xscvdpuxws f4, f0
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xscvdpuxws f5, f1
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    xscvdpuxws f6, f2
+; CHECK-P8-NEXT:    xxswapd vs2, vs2
+; CHECK-P8-NEXT:    xscvdpuxws f7, f3
+; CHECK-P8-NEXT:    xxswapd vs3, vs3
+; CHECK-P8-NEXT:    xscvdpuxws f0, f0
+; CHECK-P8-NEXT:    xscvdpuxws f1, f1
+; CHECK-P8-NEXT:    mfvsrwz r3, f4
+; CHECK-P8-NEXT:    xscvdpuxws f2, f2
+; CHECK-P8-NEXT:    xscvdpuxws f3, f3
+; CHECK-P8-NEXT:    mfvsrwz r4, f5
+; CHECK-P8-NEXT:    mtvsrd f4, r3
+; CHECK-P8-NEXT:    mfvsrwz r3, f6
+; CHECK-P8-NEXT:    mtvsrd f5, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs4
+; CHECK-P8-NEXT:    mfvsrwz r4, f7
+; CHECK-P8-NEXT:    mtvsrd f6, r3
+; CHECK-P8-NEXT:    xxswapd v3, vs5
+; CHECK-P8-NEXT:    mfvsrwz r3, f0
+; CHECK-P8-NEXT:    mtvsrd f7, r4
+; CHECK-P8-NEXT:    xxswapd v4, vs6
+; CHECK-P8-NEXT:    mfvsrwz r4, f1
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    xxswapd v1, vs7
+; CHECK-P8-NEXT:    mfvsrwz r3, f2
+; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    xxswapd v5, vs0
+; CHECK-P8-NEXT:    mfvsrwz r4, f3
+; CHECK-P8-NEXT:    mtvsrd f2, r3
+; CHECK-P8-NEXT:    xxswapd v0, vs1
+; CHECK-P8-NEXT:    mtvsrd f0, r4
+; CHECK-P8-NEXT:    xxswapd v6, vs2
+; CHECK-P8-NEXT:    vmrglh v2, v5, v2
+; CHECK-P8-NEXT:    xxswapd v5, vs0
+; CHECK-P8-NEXT:    vmrglh v3, v0, v3
+; CHECK-P8-NEXT:    vmrglh v4, v6, v4
+; CHECK-P8-NEXT:    vmrglh v5, v5, v1
+; CHECK-P8-NEXT:    vmrglw v2, v3, v2
+; CHECK-P8-NEXT:    vmrglw v3, v5, v4
+; CHECK-P8-NEXT:    xxmrgld v2, v3, v2
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test8elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs0, 48(r3)
+; CHECK-P9-NEXT:    lxv vs1, 32(r3)
+; CHECK-P9-NEXT:    lxv vs2, 16(r3)
+; CHECK-P9-NEXT:    lxv vs3, 0(r3)
+; CHECK-P9-NEXT:    xxswapd vs4, vs3
+; CHECK-P9-NEXT:    xxswapd vs5, vs2
+; CHECK-P9-NEXT:    xxswapd vs6, vs1
+; CHECK-P9-NEXT:    xxswapd vs7, vs0
+; CHECK-P9-NEXT:    xscvdpuxws f3, f3
+; CHECK-P9-NEXT:    xscvdpuxws f2, f2
+; CHECK-P9-NEXT:    xscvdpuxws f1, f1
+; CHECK-P9-NEXT:    xscvdpuxws f0, f0
+; CHECK-P9-NEXT:    xscvdpuxws f4, f4
+; CHECK-P9-NEXT:    xscvdpuxws f5, f5
+; CHECK-P9-NEXT:    xscvdpuxws f6, f6
+; CHECK-P9-NEXT:    xscvdpuxws f7, f7
+; CHECK-P9-NEXT:    mfvsrwz r3, f3
+; CHECK-P9-NEXT:    mfvsrwz r5, f2
+; CHECK-P9-NEXT:    mfvsrwz r7, f1
+; CHECK-P9-NEXT:    mfvsrwz r9, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mtvsrd f2, r5
+; CHECK-P9-NEXT:    mfvsrwz r4, f4
+; CHECK-P9-NEXT:    mfvsrwz r6, f5
+; CHECK-P9-NEXT:    mfvsrwz r8, f6
+; CHECK-P9-NEXT:    mfvsrwz r10, f7
+; CHECK-P9-NEXT:    mtvsrd f4, r7
+; CHECK-P9-NEXT:    mtvsrd f6, r9
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    xxswapd v4, vs2
+; CHECK-P9-NEXT:    mtvsrd f1, r4
+; CHECK-P9-NEXT:    mtvsrd f3, r6
+; CHECK-P9-NEXT:    mtvsrd f5, r8
+; CHECK-P9-NEXT:    mtvsrd f7, r10
+; CHECK-P9-NEXT:    xxswapd v0, vs4
+; CHECK-P9-NEXT:    xxswapd v6, vs6
+; CHECK-P9-NEXT:    xxswapd v3, vs1
+; CHECK-P9-NEXT:    xxswapd v5, vs3
+; CHECK-P9-NEXT:    xxswapd v1, vs5
+; CHECK-P9-NEXT:    xxswapd v7, vs7
+; CHECK-P9-NEXT:    vmrglh v2, v2, v3
+; CHECK-P9-NEXT:    vmrglh v3, v4, v5
+; CHECK-P9-NEXT:    vmrglh v4, v0, v1
+; CHECK-P9-NEXT:    vmrglh v5, v6, v7
+; CHECK-P9-NEXT:    vmrglw v2, v3, v2
+; CHECK-P9-NEXT:    vmrglw v3, v5, v4
+; CHECK-P9-NEXT:    xxmrgld v2, v3, v2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test8elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    lxv vs1, 16(r3)
+; CHECK-BE-NEXT:    lxv vs2, 32(r3)
+; CHECK-BE-NEXT:    lxv vs3, 48(r3)
+; CHECK-BE-NEXT:    xxswapd vs4, vs3
+; CHECK-BE-NEXT:    xxswapd vs5, vs2
+; CHECK-BE-NEXT:    xxswapd vs6, vs1
+; CHECK-BE-NEXT:    xxswapd vs7, vs0
+; CHECK-BE-NEXT:    xscvdpuxws f3, f3
+; CHECK-BE-NEXT:    xscvdpuxws f2, f2
+; CHECK-BE-NEXT:    xscvdpuxws f1, f1
+; CHECK-BE-NEXT:    xscvdpuxws f0, f0
+; CHECK-BE-NEXT:    xscvdpuxws f4, f4
+; CHECK-BE-NEXT:    xscvdpuxws f5, f5
+; CHECK-BE-NEXT:    xscvdpuxws f6, f6
+; CHECK-BE-NEXT:    xscvdpuxws f7, f7
+; CHECK-BE-NEXT:    mfvsrwz r3, f3
+; CHECK-BE-NEXT:    mfvsrwz r5, f2
+; CHECK-BE-NEXT:    mfvsrwz r7, f1
+; CHECK-BE-NEXT:    mfvsrwz r9, f0
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    sldi r7, r7, 48
+; CHECK-BE-NEXT:    sldi r9, r9, 48
+; CHECK-BE-NEXT:    mfvsrwz r4, f4
+; CHECK-BE-NEXT:    mfvsrwz r6, f5
+; CHECK-BE-NEXT:    mfvsrwz r8, f6
+; CHECK-BE-NEXT:    mfvsrwz r10, f7
+; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    mtvsrd v4, r5
+; CHECK-BE-NEXT:    mtvsrd v0, r7
+; CHECK-BE-NEXT:    mtvsrd v6, r9
+; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    sldi r6, r6, 48
+; CHECK-BE-NEXT:    sldi r8, r8, 48
+; CHECK-BE-NEXT:    sldi r10, r10, 48
+; CHECK-BE-NEXT:    mtvsrd v3, r4
+; CHECK-BE-NEXT:    mtvsrd v5, r6
+; CHECK-BE-NEXT:    mtvsrd v1, r8
+; CHECK-BE-NEXT:    mtvsrd v7, r10
+; CHECK-BE-NEXT:    vmrghh v2, v2, v3
+; CHECK-BE-NEXT:    vmrghh v3, v4, v5
+; CHECK-BE-NEXT:    vmrghh v4, v0, v1
+; CHECK-BE-NEXT:    vmrghh v5, v6, v7
+; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    vmrghw v3, v5, v4
+; CHECK-BE-NEXT:    xxmrghd v2, v3, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <8 x double>, <8 x double>* %0, align 64
+  %1 = fptoui <8 x double> %a to <8 x i16>
+  ret <8 x i16> %1
+}
+
+define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x double>* nocapture readonly) local_unnamed_addr #3 {
+; CHECK-P8-LABEL: test16elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    li r6, 32
+; CHECK-P8-NEXT:    lxvd2x vs1, r4, r5
+; CHECK-P8-NEXT:    lxvd2x vs2, r4, r6
+; CHECK-P8-NEXT:    li r6, 48
+; CHECK-P8-NEXT:    lxvd2x vs3, r4, r6
+; CHECK-P8-NEXT:    li r6, 64
+; CHECK-P8-NEXT:    xscvdpuxws f4, f0
+; CHECK-P8-NEXT:    lxvd2x vs5, r4, r6
+; CHECK-P8-NEXT:    li r6, 80
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xscvdpuxws f6, f1
+; CHECK-P8-NEXT:    lxvd2x vs7, r4, r6
+; CHECK-P8-NEXT:    li r6, 96
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    xscvdpuxws f8, f2
+; CHECK-P8-NEXT:    lxvd2x vs9, r4, r6
+; CHECK-P8-NEXT:    li r6, 112
+; CHECK-P8-NEXT:    xxswapd vs2, vs2
+; CHECK-P8-NEXT:    xscvdpuxws f10, f3
+; CHECK-P8-NEXT:    lxvd2x vs11, r4, r6
+; CHECK-P8-NEXT:    xxswapd vs3, vs3
+; CHECK-P8-NEXT:    xscvdpuxws f12, f5
+; CHECK-P8-NEXT:    xxswapd vs5, vs5
+; CHECK-P8-NEXT:    xscvdpuxws f13, f7
+; CHECK-P8-NEXT:    xxswapd vs7, vs7
+; CHECK-P8-NEXT:    xscvdpuxws v2, f9
+; CHECK-P8-NEXT:    xxswapd vs9, vs9
+; CHECK-P8-NEXT:    mfvsrwz r4, f4
+; CHECK-P8-NEXT:    xscvdpuxws v3, f11
+; CHECK-P8-NEXT:    xxswapd vs11, vs11
+; CHECK-P8-NEXT:    xscvdpuxws f0, f0
+; CHECK-P8-NEXT:    mfvsrwz r6, f6
+; CHECK-P8-NEXT:    mtvsrd f4, r4
+; CHECK-P8-NEXT:    mfvsrwz r4, f8
+; CHECK-P8-NEXT:    xscvdpuxws f1, f1
+; CHECK-P8-NEXT:    xxswapd v4, vs4
+; CHECK-P8-NEXT:    xscvdpuxws f2, f2
+; CHECK-P8-NEXT:    mtvsrd f6, r6
+; CHECK-P8-NEXT:    mfvsrwz r6, f10
+; CHECK-P8-NEXT:    mtvsrd f8, r4
+; CHECK-P8-NEXT:    xxswapd v5, vs6
+; CHECK-P8-NEXT:    mfvsrwz r4, f12
+; CHECK-P8-NEXT:    xscvdpuxws f5, f5
+; CHECK-P8-NEXT:    xxswapd v0, vs8
+; CHECK-P8-NEXT:    mtvsrd f10, r6
+; CHECK-P8-NEXT:    mfvsrwz r6, f13
+; CHECK-P8-NEXT:    mtvsrd f12, r4
+; CHECK-P8-NEXT:    xxswapd v1, vs10
+; CHECK-P8-NEXT:    mfvsrwz r4, v2
+; CHECK-P8-NEXT:    xscvdpuxws f3, f3
+; CHECK-P8-NEXT:    xxswapd v6, vs12
+; CHECK-P8-NEXT:    xscvdpuxws f9, f9
+; CHECK-P8-NEXT:    mtvsrd f13, r6
+; CHECK-P8-NEXT:    mfvsrwz r6, v3
+; CHECK-P8-NEXT:    mtvsrd v2, r4
+; CHECK-P8-NEXT:    xxswapd v7, vs13
+; CHECK-P8-NEXT:    mfvsrwz r4, f0
+; CHECK-P8-NEXT:    xscvdpuxws f7, f7
+; CHECK-P8-NEXT:    xxswapd v2, v2
+; CHECK-P8-NEXT:    xscvdpuxws f11, f11
+; CHECK-P8-NEXT:    mtvsrd v3, r6
+; CHECK-P8-NEXT:    mfvsrwz r6, f1
+; CHECK-P8-NEXT:    mtvsrd f0, r4
+; CHECK-P8-NEXT:    xxswapd v3, v3
+; CHECK-P8-NEXT:    mfvsrwz r4, f2
+; CHECK-P8-NEXT:    mtvsrd f1, r6
+; CHECK-P8-NEXT:    xxswapd v8, vs0
+; CHECK-P8-NEXT:    mtvsrd f2, r4
+; CHECK-P8-NEXT:    mfvsrwz r4, f5
+; CHECK-P8-NEXT:    xxswapd v9, vs1
+; CHECK-P8-NEXT:    mfvsrwz r6, f3
+; CHECK-P8-NEXT:    xxswapd v10, vs2
+; CHECK-P8-NEXT:    mtvsrd f5, r4
+; CHECK-P8-NEXT:    mfvsrwz r4, f9
+; CHECK-P8-NEXT:    mtvsrd f3, r6
+; CHECK-P8-NEXT:    mfvsrwz r6, f7
+; CHECK-P8-NEXT:    mtvsrd f9, r4
+; CHECK-P8-NEXT:    mfvsrwz r4, f11
+; CHECK-P8-NEXT:    vmrglh v4, v8, v4
+; CHECK-P8-NEXT:    xxswapd v8, vs3
+; CHECK-P8-NEXT:    vmrglh v5, v9, v5
+; CHECK-P8-NEXT:    xxswapd v9, vs5
+; CHECK-P8-NEXT:    mtvsrd f7, r6
+; CHECK-P8-NEXT:    mtvsrd f0, r4
+; CHECK-P8-NEXT:    vmrglh v0, v10, v0
+; CHECK-P8-NEXT:    xxswapd v10, vs7
+; CHECK-P8-NEXT:    vmrglh v1, v8, v1
+; CHECK-P8-NEXT:    xxswapd v8, vs9
+; CHECK-P8-NEXT:    vmrglh v6, v9, v6
+; CHECK-P8-NEXT:    xxswapd v9, vs0
+; CHECK-P8-NEXT:    vmrglh v7, v10, v7
+; CHECK-P8-NEXT:    vmrglh v2, v8, v2
+; CHECK-P8-NEXT:    vmrglh v3, v9, v3
+; CHECK-P8-NEXT:    vmrglw v4, v5, v4
+; CHECK-P8-NEXT:    vmrglw v5, v1, v0
+; CHECK-P8-NEXT:    vmrglw v0, v7, v6
+; CHECK-P8-NEXT:    vmrglw v2, v3, v2
+; CHECK-P8-NEXT:    xxmrgld v3, v5, v4
+; CHECK-P8-NEXT:    stvx v3, 0, r3
+; CHECK-P8-NEXT:    xxmrgld v2, v2, v0
+; CHECK-P8-NEXT:    stvx v2, r3, r5
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test16elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs2, 48(r4)
+; CHECK-P9-NEXT:    lxv vs4, 32(r4)
+; CHECK-P9-NEXT:    lxv vs5, 16(r4)
+; CHECK-P9-NEXT:    lxv vs6, 0(r4)
+; CHECK-P9-NEXT:    lxv vs0, 112(r4)
+; CHECK-P9-NEXT:    lxv vs1, 96(r4)
+; CHECK-P9-NEXT:    lxv vs3, 80(r4)
+; CHECK-P9-NEXT:    lxv vs7, 64(r4)
+; CHECK-P9-NEXT:    std r25, -56(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r26, -48(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    xxswapd vs8, vs6
+; CHECK-P9-NEXT:    xxswapd vs9, vs5
+; CHECK-P9-NEXT:    xxswapd vs10, vs4
+; CHECK-P9-NEXT:    xxswapd vs11, vs2
+; CHECK-P9-NEXT:    xxswapd vs12, vs7
+; CHECK-P9-NEXT:    xxswapd vs13, vs3
+; CHECK-P9-NEXT:    xxswapd v2, vs1
+; CHECK-P9-NEXT:    xxswapd v3, vs0
+; CHECK-P9-NEXT:    xscvdpuxws f6, f6
+; CHECK-P9-NEXT:    xscvdpuxws f5, f5
+; CHECK-P9-NEXT:    xscvdpuxws f4, f4
+; CHECK-P9-NEXT:    xscvdpuxws f2, f2
+; CHECK-P9-NEXT:    xscvdpuxws f7, f7
+; CHECK-P9-NEXT:    xscvdpuxws f3, f3
+; CHECK-P9-NEXT:    xscvdpuxws f1, f1
+; CHECK-P9-NEXT:    xscvdpuxws f0, f0
+; CHECK-P9-NEXT:    xscvdpuxws f8, f8
+; CHECK-P9-NEXT:    xscvdpuxws f9, f9
+; CHECK-P9-NEXT:    xscvdpuxws f10, f10
+; CHECK-P9-NEXT:    xscvdpuxws f11, f11
+; CHECK-P9-NEXT:    xscvdpuxws f12, f12
+; CHECK-P9-NEXT:    xscvdpuxws f13, f13
+; CHECK-P9-NEXT:    xscvdpuxws v2, v2
+; CHECK-P9-NEXT:    xscvdpuxws v3, v3
+; CHECK-P9-NEXT:    mfvsrwz r4, f6
+; CHECK-P9-NEXT:    mfvsrwz r5, f5
+; CHECK-P9-NEXT:    mfvsrwz r6, f4
+; CHECK-P9-NEXT:    mfvsrwz r7, f2
+; CHECK-P9-NEXT:    mfvsrwz r12, f7
+; CHECK-P9-NEXT:    mfvsrwz r0, f3
+; CHECK-P9-NEXT:    mfvsrwz r30, f1
+; CHECK-P9-NEXT:    mfvsrwz r29, f0
+; CHECK-P9-NEXT:    mfvsrwz r8, f8
+; CHECK-P9-NEXT:    mfvsrwz r9, f9
+; CHECK-P9-NEXT:    mfvsrwz r10, f10
+; CHECK-P9-NEXT:    mfvsrwz r11, f11
+; CHECK-P9-NEXT:    mfvsrwz r28, f12
+; CHECK-P9-NEXT:    mfvsrwz r27, f13
+; CHECK-P9-NEXT:    mfvsrwz r26, v2
+; CHECK-P9-NEXT:    mfvsrwz r25, v3
+; CHECK-P9-NEXT:    mtvsrd f0, r4
+; CHECK-P9-NEXT:    mtvsrd f1, r5
+; CHECK-P9-NEXT:    mtvsrd f2, r6
+; CHECK-P9-NEXT:    mtvsrd f3, r7
+; CHECK-P9-NEXT:    mtvsrd f8, r12
+; CHECK-P9-NEXT:    mtvsrd f9, r0
+; CHECK-P9-NEXT:    mtvsrd f10, r30
+; CHECK-P9-NEXT:    mtvsrd f11, r29
+; CHECK-P9-NEXT:    mtvsrd f4, r8
+; CHECK-P9-NEXT:    mtvsrd f5, r9
+; CHECK-P9-NEXT:    mtvsrd f6, r10
+; CHECK-P9-NEXT:    mtvsrd f7, r11
+; CHECK-P9-NEXT:    mtvsrd f12, r28
+; CHECK-P9-NEXT:    mtvsrd f13, r27
+; CHECK-P9-NEXT:    mtvsrd v2, r26
+; CHECK-P9-NEXT:    mtvsrd v3, r25
+; CHECK-P9-NEXT:    xxswapd v4, vs0
+; CHECK-P9-NEXT:    xxswapd v5, vs1
+; CHECK-P9-NEXT:    xxswapd v0, vs2
+; CHECK-P9-NEXT:    xxswapd v1, vs3
+; CHECK-P9-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    xxswapd v6, vs4
+; CHECK-P9-NEXT:    xxswapd v7, vs5
+; CHECK-P9-NEXT:    ld r26, -48(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    ld r25, -56(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    xxswapd v8, vs6
+; CHECK-P9-NEXT:    xxswapd v9, vs7
+; CHECK-P9-NEXT:    xxswapd v10, vs8
+; CHECK-P9-NEXT:    xxswapd v11, vs12
+; CHECK-P9-NEXT:    xxswapd v12, vs9
+; CHECK-P9-NEXT:    xxswapd v13, vs13
+; CHECK-P9-NEXT:    xxswapd v14, vs10
+; CHECK-P9-NEXT:    xxswapd v2, v2
+; CHECK-P9-NEXT:    xxswapd v15, vs11
+; CHECK-P9-NEXT:    xxswapd v3, v3
+; CHECK-P9-NEXT:    vmrglh v4, v4, v6
+; CHECK-P9-NEXT:    vmrglh v5, v5, v7
+; CHECK-P9-NEXT:    vmrglh v0, v0, v8
+; CHECK-P9-NEXT:    vmrglh v1, v1, v9
+; CHECK-P9-NEXT:    vmrglh v6, v10, v11
+; CHECK-P9-NEXT:    vmrglh v7, v12, v13
+; CHECK-P9-NEXT:    vmrglh v2, v14, v2
+; CHECK-P9-NEXT:    vmrglh v3, v15, v3
+; CHECK-P9-NEXT:    vmrglw v4, v5, v4
+; CHECK-P9-NEXT:    vmrglw v5, v1, v0
+; CHECK-P9-NEXT:    vmrglw v0, v7, v6
+; CHECK-P9-NEXT:    vmrglw v2, v3, v2
+; CHECK-P9-NEXT:    xxmrgld vs0, v5, v4
+; CHECK-P9-NEXT:    xxmrgld vs1, v2, v0
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test16elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs2, 0(r4)
+; CHECK-BE-NEXT:    lxv vs4, 16(r4)
+; CHECK-BE-NEXT:    lxv vs5, 32(r4)
+; CHECK-BE-NEXT:    lxv vs6, 48(r4)
+; CHECK-BE-NEXT:    lxv vs0, 64(r4)
+; CHECK-BE-NEXT:    lxv vs1, 80(r4)
+; CHECK-BE-NEXT:    lxv vs3, 96(r4)
+; CHECK-BE-NEXT:    lxv vs7, 112(r4)
+; CHECK-BE-NEXT:    std r25, -56(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r26, -48(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    xxswapd vs8, vs6
+; CHECK-BE-NEXT:    xxswapd vs9, vs5
+; CHECK-BE-NEXT:    xxswapd vs10, vs4
+; CHECK-BE-NEXT:    xxswapd vs11, vs2
+; CHECK-BE-NEXT:    xxswapd vs12, vs7
+; CHECK-BE-NEXT:    xxswapd vs13, vs3
+; CHECK-BE-NEXT:    xxswapd v2, vs1
+; CHECK-BE-NEXT:    xxswapd v3, vs0
+; CHECK-BE-NEXT:    xscvdpuxws f6, f6
+; CHECK-BE-NEXT:    xscvdpuxws f5, f5
+; CHECK-BE-NEXT:    xscvdpuxws f4, f4
+; CHECK-BE-NEXT:    xscvdpuxws f2, f2
+; CHECK-BE-NEXT:    xscvdpuxws f7, f7
+; CHECK-BE-NEXT:    xscvdpuxws f3, f3
+; CHECK-BE-NEXT:    xscvdpuxws f1, f1
+; CHECK-BE-NEXT:    xscvdpuxws f0, f0
+; CHECK-BE-NEXT:    xscvdpuxws f8, f8
+; CHECK-BE-NEXT:    xscvdpuxws f9, f9
+; CHECK-BE-NEXT:    xscvdpuxws f10, f10
+; CHECK-BE-NEXT:    xscvdpuxws f11, f11
+; CHECK-BE-NEXT:    xscvdpuxws f12, f12
+; CHECK-BE-NEXT:    xscvdpuxws f13, f13
+; CHECK-BE-NEXT:    xscvdpuxws v2, v2
+; CHECK-BE-NEXT:    xscvdpuxws v3, v3
+; CHECK-BE-NEXT:    mfvsrwz r4, f6
+; CHECK-BE-NEXT:    mfvsrwz r5, f5
+; CHECK-BE-NEXT:    mfvsrwz r6, f4
+; CHECK-BE-NEXT:    mfvsrwz r7, f2
+; CHECK-BE-NEXT:    mfvsrwz r12, f7
+; CHECK-BE-NEXT:    mfvsrwz r0, f3
+; CHECK-BE-NEXT:    mfvsrwz r30, f1
+; CHECK-BE-NEXT:    mfvsrwz r29, f0
+; CHECK-BE-NEXT:    mfvsrwz r8, f8
+; CHECK-BE-NEXT:    mfvsrwz r9, f9
+; CHECK-BE-NEXT:    mfvsrwz r10, f10
+; CHECK-BE-NEXT:    mfvsrwz r11, f11
+; CHECK-BE-NEXT:    mfvsrwz r28, f12
+; CHECK-BE-NEXT:    mfvsrwz r27, f13
+; CHECK-BE-NEXT:    mfvsrwz r26, v2
+; CHECK-BE-NEXT:    mfvsrwz r25, v3
+; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    sldi r6, r6, 48
+; CHECK-BE-NEXT:    sldi r7, r7, 48
+; CHECK-BE-NEXT:    sldi r12, r12, 48
+; CHECK-BE-NEXT:    sldi r0, r0, 48
+; CHECK-BE-NEXT:    sldi r30, r30, 48
+; CHECK-BE-NEXT:    sldi r29, r29, 48
+; CHECK-BE-NEXT:    sldi r8, r8, 48
+; CHECK-BE-NEXT:    sldi r9, r9, 48
+; CHECK-BE-NEXT:    sldi r10, r10, 48
+; CHECK-BE-NEXT:    sldi r11, r11, 48
+; CHECK-BE-NEXT:    sldi r28, r28, 48
+; CHECK-BE-NEXT:    sldi r27, r27, 48
+; CHECK-BE-NEXT:    sldi r26, r26, 48
+; CHECK-BE-NEXT:    sldi r25, r25, 48
+; CHECK-BE-NEXT:    mtvsrd v2, r4
+; CHECK-BE-NEXT:    mtvsrd v3, r5
+; CHECK-BE-NEXT:    mtvsrd v4, r6
+; CHECK-BE-NEXT:    mtvsrd v5, r7
+; CHECK-BE-NEXT:    mtvsrd v8, r12
+; CHECK-BE-NEXT:    mtvsrd v10, r0
+; CHECK-BE-NEXT:    mtvsrd v12, r30
+; CHECK-BE-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrd v0, r8
+; CHECK-BE-NEXT:    mtvsrd v1, r9
+; CHECK-BE-NEXT:    mtvsrd v6, r10
+; CHECK-BE-NEXT:    mtvsrd v7, r11
+; CHECK-BE-NEXT:    mtvsrd v9, r28
+; CHECK-BE-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrd v11, r27
+; CHECK-BE-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrd v13, r26
+; CHECK-BE-NEXT:    mtvsrd v14, r29
+; CHECK-BE-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    ld r26, -48(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrd v15, r25
+; CHECK-BE-NEXT:    ld r25, -56(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    vmrghh v2, v2, v0
+; CHECK-BE-NEXT:    vmrghh v3, v3, v1
+; CHECK-BE-NEXT:    vmrghh v4, v4, v6
+; CHECK-BE-NEXT:    vmrghh v5, v5, v7
+; CHECK-BE-NEXT:    vmrghh v0, v8, v9
+; CHECK-BE-NEXT:    vmrghh v1, v10, v11
+; CHECK-BE-NEXT:    vmrghh v6, v12, v13
+; CHECK-BE-NEXT:    vmrghh v7, v14, v15
+; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    vmrghw v3, v5, v4
+; CHECK-BE-NEXT:    vmrghw v4, v1, v0
+; CHECK-BE-NEXT:    vmrghw v5, v7, v6
+; CHECK-BE-NEXT:    xxmrghd vs0, v3, v2
+; CHECK-BE-NEXT:    xxmrghd vs1, v5, v4
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <16 x double>, <16 x double>* %0, align 128
+  %1 = fptoui <16 x double> %a to <16 x i16>
+  store <16 x i16> %1, <16 x i16>* %agg.result, align 32
+  ret void
+}
+
+define i32 @test2elt_signed(<2 x double> %a) local_unnamed_addr #0 {
+; CHECK-P8-LABEL: test2elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    xscvdpsxws f1, v2
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mfvsrwz r3, f1
+; CHECK-P8-NEXT:    mfvsrwz r4, f0
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    vmrglh v2, v2, v3
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    mfvsrwz r3, f0
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test2elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    xxswapd vs0, v2
+; CHECK-P9-NEXT:    xscvdpsxws f1, v2
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mfvsrwz r4, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    li r3, 0
+; CHECK-P9-NEXT:    mtvsrd f1, r4
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    xxswapd v3, vs1
+; CHECK-P9-NEXT:    vmrglh v2, v2, v3
+; CHECK-P9-NEXT:    vextuwrx r3, r3, v2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test2elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xxswapd vs0, v2
+; CHECK-BE-NEXT:    xscvdpsxws f1, v2
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    mfvsrwz r4, f0
+; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    li r3, 0
+; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    mtvsrd v3, r4
+; CHECK-BE-NEXT:    vmrghh v2, v2, v3
+; CHECK-BE-NEXT:    vextuwlx r3, r3, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = fptosi <2 x double> %a to <2 x i16>
+  %1 = bitcast <2 x i16> %0 to i32
+  ret i32 %1
+}
+
+define i64 @test4elt_signed(<4 x double>* nocapture readonly) local_unnamed_addr #1 {
+; CHECK-P8-LABEL: test4elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    xscvdpsxws f2, f0
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xscvdpsxws f3, f1
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    mfvsrwz r3, f2
+; CHECK-P8-NEXT:    mfvsrwz r4, f3
+; CHECK-P8-NEXT:    mtvsrd f2, r3
+; CHECK-P8-NEXT:    mtvsrd f3, r4
+; CHECK-P8-NEXT:    mfvsrwz r3, f0
+; CHECK-P8-NEXT:    xxswapd v2, vs2
+; CHECK-P8-NEXT:    mfvsrwz r4, f1
+; CHECK-P8-NEXT:    xxswapd v4, vs3
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    xxswapd v3, vs0
+; CHECK-P8-NEXT:    xxswapd v5, vs1
+; CHECK-P8-NEXT:    vmrglh v2, v3, v2
+; CHECK-P8-NEXT:    vmrglh v3, v5, v4
+; CHECK-P8-NEXT:    vmrglw v2, v3, v2
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test4elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs0, 16(r3)
+; CHECK-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-P9-NEXT:    xxswapd vs2, vs1
+; CHECK-P9-NEXT:    xxswapd vs3, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mfvsrwz r5, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mfvsrwz r4, f2
+; CHECK-P9-NEXT:    mfvsrwz r6, f3
+; CHECK-P9-NEXT:    mtvsrd f2, r5
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    mtvsrd f1, r4
+; CHECK-P9-NEXT:    mtvsrd f3, r6
+; CHECK-P9-NEXT:    xxswapd v4, vs2
+; CHECK-P9-NEXT:    xxswapd v3, vs1
+; CHECK-P9-NEXT:    xxswapd v5, vs3
+; CHECK-P9-NEXT:    vmrglh v2, v2, v3
+; CHECK-P9-NEXT:    vmrglh v3, v4, v5
+; CHECK-P9-NEXT:    vmrglw v2, v3, v2
+; CHECK-P9-NEXT:    mfvsrld r3, v2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test4elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    lxv vs1, 16(r3)
+; CHECK-BE-NEXT:    xxswapd vs2, vs1
+; CHECK-BE-NEXT:    xxswapd vs3, vs0
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mfvsrwz r5, f0
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    mfvsrwz r4, f2
+; CHECK-BE-NEXT:    mfvsrwz r6, f3
+; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    mtvsrd v4, r5
+; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    sldi r6, r6, 48
+; CHECK-BE-NEXT:    mtvsrd v3, r4
+; CHECK-BE-NEXT:    mtvsrd v5, r6
+; CHECK-BE-NEXT:    vmrghh v2, v2, v3
+; CHECK-BE-NEXT:    vmrghh v3, v4, v5
+; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    mfvsrd r3, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <4 x double>, <4 x double>* %0, align 32
+  %1 = fptosi <4 x double> %a to <4 x i16>
+  %2 = bitcast <4 x i16> %1 to i64
+  ret i64 %2
+}
+
+define <8 x i16> @test8elt_signed(<8 x double>* nocapture readonly) local_unnamed_addr #2 {
+; CHECK-P8-LABEL: test8elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    li r4, 32
+; CHECK-P8-NEXT:    lxvd2x vs2, r3, r4
+; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    lxvd2x vs3, r3, r4
+; CHECK-P8-NEXT:    xscvdpsxws f4, f0
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xscvdpsxws f5, f1
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    xscvdpsxws f6, f2
+; CHECK-P8-NEXT:    xxswapd vs2, vs2
+; CHECK-P8-NEXT:    xscvdpsxws f7, f3
+; CHECK-P8-NEXT:    xxswapd vs3, vs3
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    mfvsrwz r3, f4
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    xscvdpsxws f3, f3
+; CHECK-P8-NEXT:    mfvsrwz r4, f5
+; CHECK-P8-NEXT:    mtvsrd f4, r3
+; CHECK-P8-NEXT:    mfvsrwz r3, f6
+; CHECK-P8-NEXT:    mtvsrd f5, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs4
+; CHECK-P8-NEXT:    mfvsrwz r4, f7
+; CHECK-P8-NEXT:    mtvsrd f6, r3
+; CHECK-P8-NEXT:    xxswapd v3, vs5
+; CHECK-P8-NEXT:    mfvsrwz r3, f0
+; CHECK-P8-NEXT:    mtvsrd f7, r4
+; CHECK-P8-NEXT:    xxswapd v4, vs6
+; CHECK-P8-NEXT:    mfvsrwz r4, f1
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    xxswapd v1, vs7
+; CHECK-P8-NEXT:    mfvsrwz r3, f2
+; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    xxswapd v5, vs0
+; CHECK-P8-NEXT:    mfvsrwz r4, f3
+; CHECK-P8-NEXT:    mtvsrd f2, r3
+; CHECK-P8-NEXT:    xxswapd v0, vs1
+; CHECK-P8-NEXT:    mtvsrd f0, r4
+; CHECK-P8-NEXT:    xxswapd v6, vs2
+; CHECK-P8-NEXT:    vmrglh v2, v5, v2
+; CHECK-P8-NEXT:    xxswapd v5, vs0
+; CHECK-P8-NEXT:    vmrglh v3, v0, v3
+; CHECK-P8-NEXT:    vmrglh v4, v6, v4
+; CHECK-P8-NEXT:    vmrglh v5, v5, v1
+; CHECK-P8-NEXT:    vmrglw v2, v3, v2
+; CHECK-P8-NEXT:    vmrglw v3, v5, v4
+; CHECK-P8-NEXT:    xxmrgld v2, v3, v2
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test8elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs0, 48(r3)
+; CHECK-P9-NEXT:    lxv vs1, 32(r3)
+; CHECK-P9-NEXT:    lxv vs2, 16(r3)
+; CHECK-P9-NEXT:    lxv vs3, 0(r3)
+; CHECK-P9-NEXT:    xxswapd vs4, vs3
+; CHECK-P9-NEXT:    xxswapd vs5, vs2
+; CHECK-P9-NEXT:    xxswapd vs6, vs1
+; CHECK-P9-NEXT:    xxswapd vs7, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    xscvdpsxws f4, f4
+; CHECK-P9-NEXT:    xscvdpsxws f5, f5
+; CHECK-P9-NEXT:    xscvdpsxws f6, f6
+; CHECK-P9-NEXT:    xscvdpsxws f7, f7
+; CHECK-P9-NEXT:    mfvsrwz r3, f3
+; CHECK-P9-NEXT:    mfvsrwz r5, f2
+; CHECK-P9-NEXT:    mfvsrwz r7, f1
+; CHECK-P9-NEXT:    mfvsrwz r9, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mtvsrd f2, r5
+; CHECK-P9-NEXT:    mfvsrwz r4, f4
+; CHECK-P9-NEXT:    mfvsrwz r6, f5
+; CHECK-P9-NEXT:    mfvsrwz r8, f6
+; CHECK-P9-NEXT:    mfvsrwz r10, f7
+; CHECK-P9-NEXT:    mtvsrd f4, r7
+; CHECK-P9-NEXT:    mtvsrd f6, r9
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    xxswapd v4, vs2
+; CHECK-P9-NEXT:    mtvsrd f1, r4
+; CHECK-P9-NEXT:    mtvsrd f3, r6
+; CHECK-P9-NEXT:    mtvsrd f5, r8
+; CHECK-P9-NEXT:    mtvsrd f7, r10
+; CHECK-P9-NEXT:    xxswapd v0, vs4
+; CHECK-P9-NEXT:    xxswapd v6, vs6
+; CHECK-P9-NEXT:    xxswapd v3, vs1
+; CHECK-P9-NEXT:    xxswapd v5, vs3
+; CHECK-P9-NEXT:    xxswapd v1, vs5
+; CHECK-P9-NEXT:    xxswapd v7, vs7
+; CHECK-P9-NEXT:    vmrglh v2, v2, v3
+; CHECK-P9-NEXT:    vmrglh v3, v4, v5
+; CHECK-P9-NEXT:    vmrglh v4, v0, v1
+; CHECK-P9-NEXT:    vmrglh v5, v6, v7
+; CHECK-P9-NEXT:    vmrglw v2, v3, v2
+; CHECK-P9-NEXT:    vmrglw v3, v5, v4
+; CHECK-P9-NEXT:    xxmrgld v2, v3, v2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test8elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    lxv vs1, 16(r3)
+; CHECK-BE-NEXT:    lxv vs2, 32(r3)
+; CHECK-BE-NEXT:    lxv vs3, 48(r3)
+; CHECK-BE-NEXT:    xxswapd vs4, vs3
+; CHECK-BE-NEXT:    xxswapd vs5, vs2
+; CHECK-BE-NEXT:    xxswapd vs6, vs1
+; CHECK-BE-NEXT:    xxswapd vs7, vs0
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    xscvdpsxws f5, f5
+; CHECK-BE-NEXT:    xscvdpsxws f6, f6
+; CHECK-BE-NEXT:    xscvdpsxws f7, f7
+; CHECK-BE-NEXT:    mfvsrwz r3, f3
+; CHECK-BE-NEXT:    mfvsrwz r5, f2
+; CHECK-BE-NEXT:    mfvsrwz r7, f1
+; CHECK-BE-NEXT:    mfvsrwz r9, f0
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    sldi r7, r7, 48
+; CHECK-BE-NEXT:    sldi r9, r9, 48
+; CHECK-BE-NEXT:    mfvsrwz r4, f4
+; CHECK-BE-NEXT:    mfvsrwz r6, f5
+; CHECK-BE-NEXT:    mfvsrwz r8, f6
+; CHECK-BE-NEXT:    mfvsrwz r10, f7
+; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    mtvsrd v4, r5
+; CHECK-BE-NEXT:    mtvsrd v0, r7
+; CHECK-BE-NEXT:    mtvsrd v6, r9
+; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    sldi r6, r6, 48
+; CHECK-BE-NEXT:    sldi r8, r8, 48
+; CHECK-BE-NEXT:    sldi r10, r10, 48
+; CHECK-BE-NEXT:    mtvsrd v3, r4
+; CHECK-BE-NEXT:    mtvsrd v5, r6
+; CHECK-BE-NEXT:    mtvsrd v1, r8
+; CHECK-BE-NEXT:    mtvsrd v7, r10
+; CHECK-BE-NEXT:    vmrghh v2, v2, v3
+; CHECK-BE-NEXT:    vmrghh v3, v4, v5
+; CHECK-BE-NEXT:    vmrghh v4, v0, v1
+; CHECK-BE-NEXT:    vmrghh v5, v6, v7
+; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    vmrghw v3, v5, v4
+; CHECK-BE-NEXT:    xxmrghd v2, v3, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <8 x double>, <8 x double>* %0, align 64
+  %1 = fptosi <8 x double> %a to <8 x i16>
+  ret <8 x i16> %1
+}
+
+define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <16 x double>* nocapture readonly) local_unnamed_addr #3 {
+; CHECK-P8-LABEL: test16elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    li r6, 32
+; CHECK-P8-NEXT:    lxvd2x vs1, r4, r5
+; CHECK-P8-NEXT:    lxvd2x vs2, r4, r6
+; CHECK-P8-NEXT:    li r6, 48
+; CHECK-P8-NEXT:    lxvd2x vs3, r4, r6
+; CHECK-P8-NEXT:    li r6, 64
+; CHECK-P8-NEXT:    xscvdpsxws f4, f0
+; CHECK-P8-NEXT:    lxvd2x vs5, r4, r6
+; CHECK-P8-NEXT:    li r6, 80
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xscvdpsxws f6, f1
+; CHECK-P8-NEXT:    lxvd2x vs7, r4, r6
+; CHECK-P8-NEXT:    li r6, 96
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    xscvdpsxws f8, f2
+; CHECK-P8-NEXT:    lxvd2x vs9, r4, r6
+; CHECK-P8-NEXT:    li r6, 112
+; CHECK-P8-NEXT:    xxswapd vs2, vs2
+; CHECK-P8-NEXT:    xscvdpsxws f10, f3
+; CHECK-P8-NEXT:    lxvd2x vs11, r4, r6
+; CHECK-P8-NEXT:    xxswapd vs3, vs3
+; CHECK-P8-NEXT:    xscvdpsxws f12, f5
+; CHECK-P8-NEXT:    xxswapd vs5, vs5
+; CHECK-P8-NEXT:    xscvdpsxws f13, f7
+; CHECK-P8-NEXT:    xxswapd vs7, vs7
+; CHECK-P8-NEXT:    xscvdpsxws v2, f9
+; CHECK-P8-NEXT:    xxswapd vs9, vs9
+; CHECK-P8-NEXT:    mfvsrwz r4, f4
+; CHECK-P8-NEXT:    xscvdpsxws v3, f11
+; CHECK-P8-NEXT:    xxswapd vs11, vs11
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mfvsrwz r6, f6
+; CHECK-P8-NEXT:    mtvsrd f4, r4
+; CHECK-P8-NEXT:    mfvsrwz r4, f8
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xxswapd v4, vs4
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    mtvsrd f6, r6
+; CHECK-P8-NEXT:    mfvsrwz r6, f10
+; CHECK-P8-NEXT:    mtvsrd f8, r4
+; CHECK-P8-NEXT:    xxswapd v5, vs6
+; CHECK-P8-NEXT:    mfvsrwz r4, f12
+; CHECK-P8-NEXT:    xscvdpsxws f5, f5
+; CHECK-P8-NEXT:    xxswapd v0, vs8
+; CHECK-P8-NEXT:    mtvsrd f10, r6
+; CHECK-P8-NEXT:    mfvsrwz r6, f13
+; CHECK-P8-NEXT:    mtvsrd f12, r4
+; CHECK-P8-NEXT:    xxswapd v1, vs10
+; CHECK-P8-NEXT:    mfvsrwz r4, v2
+; CHECK-P8-NEXT:    xscvdpsxws f3, f3
+; CHECK-P8-NEXT:    xxswapd v6, vs12
+; CHECK-P8-NEXT:    xscvdpsxws f9, f9
+; CHECK-P8-NEXT:    mtvsrd f13, r6
+; CHECK-P8-NEXT:    mfvsrwz r6, v3
+; CHECK-P8-NEXT:    mtvsrd v2, r4
+; CHECK-P8-NEXT:    xxswapd v7, vs13
+; CHECK-P8-NEXT:    mfvsrwz r4, f0
+; CHECK-P8-NEXT:    xscvdpsxws f7, f7
+; CHECK-P8-NEXT:    xxswapd v2, v2
+; CHECK-P8-NEXT:    xscvdpsxws f11, f11
+; CHECK-P8-NEXT:    mtvsrd v3, r6
+; CHECK-P8-NEXT:    mfvsrwz r6, f1
+; CHECK-P8-NEXT:    mtvsrd f0, r4
+; CHECK-P8-NEXT:    xxswapd v3, v3
+; CHECK-P8-NEXT:    mfvsrwz r4, f2
+; CHECK-P8-NEXT:    mtvsrd f1, r6
+; CHECK-P8-NEXT:    xxswapd v8, vs0
+; CHECK-P8-NEXT:    mtvsrd f2, r4
+; CHECK-P8-NEXT:    mfvsrwz r4, f5
+; CHECK-P8-NEXT:    xxswapd v9, vs1
+; CHECK-P8-NEXT:    mfvsrwz r6, f3
+; CHECK-P8-NEXT:    xxswapd v10, vs2
+; CHECK-P8-NEXT:    mtvsrd f5, r4
+; CHECK-P8-NEXT:    mfvsrwz r4, f9
+; CHECK-P8-NEXT:    mtvsrd f3, r6
+; CHECK-P8-NEXT:    mfvsrwz r6, f7
+; CHECK-P8-NEXT:    mtvsrd f9, r4
+; CHECK-P8-NEXT:    mfvsrwz r4, f11
+; CHECK-P8-NEXT:    vmrglh v4, v8, v4
+; CHECK-P8-NEXT:    xxswapd v8, vs3
+; CHECK-P8-NEXT:    vmrglh v5, v9, v5
+; CHECK-P8-NEXT:    xxswapd v9, vs5
+; CHECK-P8-NEXT:    mtvsrd f7, r6
+; CHECK-P8-NEXT:    mtvsrd f0, r4
+; CHECK-P8-NEXT:    vmrglh v0, v10, v0
+; CHECK-P8-NEXT:    xxswapd v10, vs7
+; CHECK-P8-NEXT:    vmrglh v1, v8, v1
+; CHECK-P8-NEXT:    xxswapd v8, vs9
+; CHECK-P8-NEXT:    vmrglh v6, v9, v6
+; CHECK-P8-NEXT:    xxswapd v9, vs0
+; CHECK-P8-NEXT:    vmrglh v7, v10, v7
+; CHECK-P8-NEXT:    vmrglh v2, v8, v2
+; CHECK-P8-NEXT:    vmrglh v3, v9, v3
+; CHECK-P8-NEXT:    vmrglw v4, v5, v4
+; CHECK-P8-NEXT:    vmrglw v5, v1, v0
+; CHECK-P8-NEXT:    vmrglw v0, v7, v6
+; CHECK-P8-NEXT:    vmrglw v2, v3, v2
+; CHECK-P8-NEXT:    xxmrgld v3, v5, v4
+; CHECK-P8-NEXT:    stvx v3, 0, r3
+; CHECK-P8-NEXT:    xxmrgld v2, v2, v0
+; CHECK-P8-NEXT:    stvx v2, r3, r5
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test16elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs2, 48(r4)
+; CHECK-P9-NEXT:    lxv vs4, 32(r4)
+; CHECK-P9-NEXT:    lxv vs5, 16(r4)
+; CHECK-P9-NEXT:    lxv vs6, 0(r4)
+; CHECK-P9-NEXT:    lxv vs0, 112(r4)
+; CHECK-P9-NEXT:    lxv vs1, 96(r4)
+; CHECK-P9-NEXT:    lxv vs3, 80(r4)
+; CHECK-P9-NEXT:    lxv vs7, 64(r4)
+; CHECK-P9-NEXT:    std r25, -56(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r26, -48(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    xxswapd vs8, vs6
+; CHECK-P9-NEXT:    xxswapd vs9, vs5
+; CHECK-P9-NEXT:    xxswapd vs10, vs4
+; CHECK-P9-NEXT:    xxswapd vs11, vs2
+; CHECK-P9-NEXT:    xxswapd vs12, vs7
+; CHECK-P9-NEXT:    xxswapd vs13, vs3
+; CHECK-P9-NEXT:    xxswapd v2, vs1
+; CHECK-P9-NEXT:    xxswapd v3, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f6, f6
+; CHECK-P9-NEXT:    xscvdpsxws f5, f5
+; CHECK-P9-NEXT:    xscvdpsxws f4, f4
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    xscvdpsxws f7, f7
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    xscvdpsxws f8, f8
+; CHECK-P9-NEXT:    xscvdpsxws f9, f9
+; CHECK-P9-NEXT:    xscvdpsxws f10, f10
+; CHECK-P9-NEXT:    xscvdpsxws f11, f11
+; CHECK-P9-NEXT:    xscvdpsxws f12, f12
+; CHECK-P9-NEXT:    xscvdpsxws f13, f13
+; CHECK-P9-NEXT:    xscvdpsxws v2, v2
+; CHECK-P9-NEXT:    xscvdpsxws v3, v3
+; CHECK-P9-NEXT:    mfvsrwz r4, f6
+; CHECK-P9-NEXT:    mfvsrwz r5, f5
+; CHECK-P9-NEXT:    mfvsrwz r6, f4
+; CHECK-P9-NEXT:    mfvsrwz r7, f2
+; CHECK-P9-NEXT:    mfvsrwz r12, f7
+; CHECK-P9-NEXT:    mfvsrwz r0, f3
+; CHECK-P9-NEXT:    mfvsrwz r30, f1
+; CHECK-P9-NEXT:    mfvsrwz r29, f0
+; CHECK-P9-NEXT:    mfvsrwz r8, f8
+; CHECK-P9-NEXT:    mfvsrwz r9, f9
+; CHECK-P9-NEXT:    mfvsrwz r10, f10
+; CHECK-P9-NEXT:    mfvsrwz r11, f11
+; CHECK-P9-NEXT:    mfvsrwz r28, f12
+; CHECK-P9-NEXT:    mfvsrwz r27, f13
+; CHECK-P9-NEXT:    mfvsrwz r26, v2
+; CHECK-P9-NEXT:    mfvsrwz r25, v3
+; CHECK-P9-NEXT:    mtvsrd f0, r4
+; CHECK-P9-NEXT:    mtvsrd f1, r5
+; CHECK-P9-NEXT:    mtvsrd f2, r6
+; CHECK-P9-NEXT:    mtvsrd f3, r7
+; CHECK-P9-NEXT:    mtvsrd f8, r12
+; CHECK-P9-NEXT:    mtvsrd f9, r0
+; CHECK-P9-NEXT:    mtvsrd f10, r30
+; CHECK-P9-NEXT:    mtvsrd f11, r29
+; CHECK-P9-NEXT:    mtvsrd f4, r8
+; CHECK-P9-NEXT:    mtvsrd f5, r9
+; CHECK-P9-NEXT:    mtvsrd f6, r10
+; CHECK-P9-NEXT:    mtvsrd f7, r11
+; CHECK-P9-NEXT:    mtvsrd f12, r28
+; CHECK-P9-NEXT:    mtvsrd f13, r27
+; CHECK-P9-NEXT:    mtvsrd v2, r26
+; CHECK-P9-NEXT:    mtvsrd v3, r25
+; CHECK-P9-NEXT:    xxswapd v4, vs0
+; CHECK-P9-NEXT:    xxswapd v5, vs1
+; CHECK-P9-NEXT:    xxswapd v0, vs2
+; CHECK-P9-NEXT:    xxswapd v1, vs3
+; CHECK-P9-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    xxswapd v6, vs4
+; CHECK-P9-NEXT:    xxswapd v7, vs5
+; CHECK-P9-NEXT:    ld r26, -48(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    ld r25, -56(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    xxswapd v8, vs6
+; CHECK-P9-NEXT:    xxswapd v9, vs7
+; CHECK-P9-NEXT:    xxswapd v10, vs8
+; CHECK-P9-NEXT:    xxswapd v11, vs12
+; CHECK-P9-NEXT:    xxswapd v12, vs9
+; CHECK-P9-NEXT:    xxswapd v13, vs13
+; CHECK-P9-NEXT:    xxswapd v14, vs10
+; CHECK-P9-NEXT:    xxswapd v2, v2
+; CHECK-P9-NEXT:    xxswapd v15, vs11
+; CHECK-P9-NEXT:    xxswapd v3, v3
+; CHECK-P9-NEXT:    vmrglh v4, v4, v6
+; CHECK-P9-NEXT:    vmrglh v5, v5, v7
+; CHECK-P9-NEXT:    vmrglh v0, v0, v8
+; CHECK-P9-NEXT:    vmrglh v1, v1, v9
+; CHECK-P9-NEXT:    vmrglh v6, v10, v11
+; CHECK-P9-NEXT:    vmrglh v7, v12, v13
+; CHECK-P9-NEXT:    vmrglh v2, v14, v2
+; CHECK-P9-NEXT:    vmrglh v3, v15, v3
+; CHECK-P9-NEXT:    vmrglw v4, v5, v4
+; CHECK-P9-NEXT:    vmrglw v5, v1, v0
+; CHECK-P9-NEXT:    vmrglw v0, v7, v6
+; CHECK-P9-NEXT:    vmrglw v2, v3, v2
+; CHECK-P9-NEXT:    xxmrgld vs0, v5, v4
+; CHECK-P9-NEXT:    xxmrgld vs1, v2, v0
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test16elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs2, 0(r4)
+; CHECK-BE-NEXT:    lxv vs4, 16(r4)
+; CHECK-BE-NEXT:    lxv vs5, 32(r4)
+; CHECK-BE-NEXT:    lxv vs6, 48(r4)
+; CHECK-BE-NEXT:    lxv vs0, 64(r4)
+; CHECK-BE-NEXT:    lxv vs1, 80(r4)
+; CHECK-BE-NEXT:    lxv vs3, 96(r4)
+; CHECK-BE-NEXT:    lxv vs7, 112(r4)
+; CHECK-BE-NEXT:    std r25, -56(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r26, -48(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    xxswapd vs8, vs6
+; CHECK-BE-NEXT:    xxswapd vs9, vs5
+; CHECK-BE-NEXT:    xxswapd vs10, vs4
+; CHECK-BE-NEXT:    xxswapd vs11, vs2
+; CHECK-BE-NEXT:    xxswapd vs12, vs7
+; CHECK-BE-NEXT:    xxswapd vs13, vs3
+; CHECK-BE-NEXT:    xxswapd v2, vs1
+; CHECK-BE-NEXT:    xxswapd v3, vs0
+; CHECK-BE-NEXT:    xscvdpsxws f6, f6
+; CHECK-BE-NEXT:    xscvdpsxws f5, f5
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    xscvdpsxws f7, f7
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    xscvdpsxws f8, f8
+; CHECK-BE-NEXT:    xscvdpsxws f9, f9
+; CHECK-BE-NEXT:    xscvdpsxws f10, f10
+; CHECK-BE-NEXT:    xscvdpsxws f11, f11
+; CHECK-BE-NEXT:    xscvdpsxws f12, f12
+; CHECK-BE-NEXT:    xscvdpsxws f13, f13
+; CHECK-BE-NEXT:    xscvdpsxws v2, v2
+; CHECK-BE-NEXT:    xscvdpsxws v3, v3
+; CHECK-BE-NEXT:    mfvsrwz r4, f6
+; CHECK-BE-NEXT:    mfvsrwz r5, f5
+; CHECK-BE-NEXT:    mfvsrwz r6, f4
+; CHECK-BE-NEXT:    mfvsrwz r7, f2
+; CHECK-BE-NEXT:    mfvsrwz r12, f7
+; CHECK-BE-NEXT:    mfvsrwz r0, f3
+; CHECK-BE-NEXT:    mfvsrwz r30, f1
+; CHECK-BE-NEXT:    mfvsrwz r29, f0
+; CHECK-BE-NEXT:    mfvsrwz r8, f8
+; CHECK-BE-NEXT:    mfvsrwz r9, f9
+; CHECK-BE-NEXT:    mfvsrwz r10, f10
+; CHECK-BE-NEXT:    mfvsrwz r11, f11
+; CHECK-BE-NEXT:    mfvsrwz r28, f12
+; CHECK-BE-NEXT:    mfvsrwz r27, f13
+; CHECK-BE-NEXT:    mfvsrwz r26, v2
+; CHECK-BE-NEXT:    mfvsrwz r25, v3
+; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    sldi r6, r6, 48
+; CHECK-BE-NEXT:    sldi r7, r7, 48
+; CHECK-BE-NEXT:    sldi r12, r12, 48
+; CHECK-BE-NEXT:    sldi r0, r0, 48
+; CHECK-BE-NEXT:    sldi r30, r30, 48
+; CHECK-BE-NEXT:    sldi r29, r29, 48
+; CHECK-BE-NEXT:    sldi r8, r8, 48
+; CHECK-BE-NEXT:    sldi r9, r9, 48
+; CHECK-BE-NEXT:    sldi r10, r10, 48
+; CHECK-BE-NEXT:    sldi r11, r11, 48
+; CHECK-BE-NEXT:    sldi r28, r28, 48
+; CHECK-BE-NEXT:    sldi r27, r27, 48
+; CHECK-BE-NEXT:    sldi r26, r26, 48
+; CHECK-BE-NEXT:    sldi r25, r25, 48
+; CHECK-BE-NEXT:    mtvsrd v2, r4
+; CHECK-BE-NEXT:    mtvsrd v3, r5
+; CHECK-BE-NEXT:    mtvsrd v4, r6
+; CHECK-BE-NEXT:    mtvsrd v5, r7
+; CHECK-BE-NEXT:    mtvsrd v8, r12
+; CHECK-BE-NEXT:    mtvsrd v10, r0
+; CHECK-BE-NEXT:    mtvsrd v12, r30
+; CHECK-BE-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrd v0, r8
+; CHECK-BE-NEXT:    mtvsrd v1, r9
+; CHECK-BE-NEXT:    mtvsrd v6, r10
+; CHECK-BE-NEXT:    mtvsrd v7, r11
+; CHECK-BE-NEXT:    mtvsrd v9, r28
+; CHECK-BE-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrd v11, r27
+; CHECK-BE-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrd v13, r26
+; CHECK-BE-NEXT:    mtvsrd v14, r29
+; CHECK-BE-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    ld r26, -48(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrd v15, r25
+; CHECK-BE-NEXT:    ld r25, -56(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    vmrghh v2, v2, v0
+; CHECK-BE-NEXT:    vmrghh v3, v3, v1
+; CHECK-BE-NEXT:    vmrghh v4, v4, v6
+; CHECK-BE-NEXT:    vmrghh v5, v5, v7
+; CHECK-BE-NEXT:    vmrghh v0, v8, v9
+; CHECK-BE-NEXT:    vmrghh v1, v10, v11
+; CHECK-BE-NEXT:    vmrghh v6, v12, v13
+; CHECK-BE-NEXT:    vmrghh v7, v14, v15
+; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    vmrghw v3, v5, v4
+; CHECK-BE-NEXT:    vmrghw v4, v1, v0
+; CHECK-BE-NEXT:    vmrghw v5, v7, v6
+; CHECK-BE-NEXT:    xxmrghd vs0, v3, v2
+; CHECK-BE-NEXT:    xxmrghd vs1, v5, v4
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <16 x double>, <16 x double>* %0, align 128
+  %1 = fptosi <16 x double> %a to <16 x i16>
+  store <16 x i16> %1, <16 x i16>* %agg.result, align 32
+  ret void
+}

Added: llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll?rev=347090&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll Fri Nov 16 12:24:10 2018
@@ -0,0 +1,598 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:     -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-P8
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-P9
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-BE
+
+define i64 @test2elt(<2 x double> %a) local_unnamed_addr #0 {
+; CHECK-P8-LABEL: test2elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    xscvdpuxws f1, v2
+; CHECK-P8-NEXT:    xscvdpuxws f0, f0
+; CHECK-P8-NEXT:    mfvsrwz r3, f1
+; CHECK-P8-NEXT:    mfvsrwz r4, f0
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    vmrglw v2, v2, v3
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test2elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    xxswapd vs0, v2
+; CHECK-P9-NEXT:    xscvdpuxws f1, v2
+; CHECK-P9-NEXT:    xscvdpuxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mtvsrws v2, r3
+; CHECK-P9-NEXT:    mfvsrwz r4, f0
+; CHECK-P9-NEXT:    mtvsrws v3, r4
+; CHECK-P9-NEXT:    vmrglw v2, v2, v3
+; CHECK-P9-NEXT:    mfvsrld r3, v2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test2elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xxswapd vs0, v2
+; CHECK-BE-NEXT:    xscvdpuxws f1, v2
+; CHECK-BE-NEXT:    xscvdpuxws f0, f0
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mtvsrws v2, r3
+; CHECK-BE-NEXT:    mfvsrwz r4, f0
+; CHECK-BE-NEXT:    mtvsrws v3, r4
+; CHECK-BE-NEXT:    vmrghw v2, v2, v3
+; CHECK-BE-NEXT:    mfvsrd r3, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = fptoui <2 x double> %a to <2 x i32>
+  %1 = bitcast <2 x i32> %0 to i64
+  ret i64 %1
+}
+
+define <4 x i32> @test4elt(<4 x double>* nocapture readonly) local_unnamed_addr #1 {
+; CHECK-P8-LABEL: test4elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, r3, r4
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xxmrgld vs2, vs0, vs1
+; CHECK-P8-NEXT:    xxmrghd vs0, vs0, vs1
+; CHECK-P8-NEXT:    xvcvdpuxws v2, vs2
+; CHECK-P8-NEXT:    xvcvdpuxws v3, vs0
+; CHECK-P8-NEXT:    vmrgew v2, v3, v2
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test4elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs0, 0(r3)
+; CHECK-P9-NEXT:    lxv vs1, 16(r3)
+; CHECK-P9-NEXT:    xxmrgld vs2, vs1, vs0
+; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P9-NEXT:    xvcvdpuxws v2, vs2
+; CHECK-P9-NEXT:    xvcvdpuxws v3, vs0
+; CHECK-P9-NEXT:    vmrgew v2, v3, v2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test4elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs0, 16(r3)
+; CHECK-BE-NEXT:    lxv vs1, 0(r3)
+; CHECK-BE-NEXT:    xxmrgld vs2, vs1, vs0
+; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-BE-NEXT:    xvcvdpuxws v2, vs2
+; CHECK-BE-NEXT:    xvcvdpuxws v3, vs0
+; CHECK-BE-NEXT:    vmrgew v2, v3, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <4 x double>, <4 x double>* %0, align 32
+  %1 = fptoui <4 x double> %a to <4 x i32>
+  ret <4 x i32> %1
+}
+
+define void @test8elt(<8 x i32>* noalias nocapture sret %agg.result, <8 x double>* nocapture readonly) local_unnamed_addr #2 {
+; CHECK-P8-LABEL: test8elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r5, 32
+; CHECK-P8-NEXT:    li r6, 48
+; CHECK-P8-NEXT:    lxvd2x vs3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
+; CHECK-P8-NEXT:    lxvd2x vs2, r4, r5
+; CHECK-P8-NEXT:    xxswapd vs3, vs3
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    xxswapd vs2, vs2
+; CHECK-P8-NEXT:    xxmrgld vs4, vs1, vs0
+; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P8-NEXT:    xxmrgld vs1, vs2, vs3
+; CHECK-P8-NEXT:    xxmrghd vs2, vs2, vs3
+; CHECK-P8-NEXT:    xvcvdpuxws v2, vs4
+; CHECK-P8-NEXT:    xvcvdpuxws v3, vs0
+; CHECK-P8-NEXT:    xvcvdpuxws v4, vs1
+; CHECK-P8-NEXT:    xvcvdpuxws v5, vs2
+; CHECK-P8-NEXT:    vmrgew v2, v3, v2
+; CHECK-P8-NEXT:    vmrgew v3, v5, v4
+; CHECK-P8-NEXT:    stvx v2, r3, r5
+; CHECK-P8-NEXT:    stvx v3, 0, r3
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test8elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs0, 32(r4)
+; CHECK-P9-NEXT:    lxv vs1, 48(r4)
+; CHECK-P9-NEXT:    lxv vs2, 0(r4)
+; CHECK-P9-NEXT:    lxv vs3, 16(r4)
+; CHECK-P9-NEXT:    xxmrgld vs4, vs3, vs2
+; CHECK-P9-NEXT:    xxmrghd vs2, vs3, vs2
+; CHECK-P9-NEXT:    xxmrgld vs3, vs1, vs0
+; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P9-NEXT:    xvcvdpuxws v2, vs4
+; CHECK-P9-NEXT:    xvcvdpuxws v3, vs2
+; CHECK-P9-NEXT:    xvcvdpuxws v4, vs3
+; CHECK-P9-NEXT:    xvcvdpuxws v5, vs0
+; CHECK-P9-NEXT:    vmrgew v2, v3, v2
+; CHECK-P9-NEXT:    vmrgew v3, v5, v4
+; CHECK-P9-NEXT:    stxv v3, 16(r3)
+; CHECK-P9-NEXT:    stxv v2, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test8elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs0, 48(r4)
+; CHECK-BE-NEXT:    lxv vs1, 32(r4)
+; CHECK-BE-NEXT:    lxv vs2, 16(r4)
+; CHECK-BE-NEXT:    lxv vs3, 0(r4)
+; CHECK-BE-NEXT:    xxmrgld vs4, vs3, vs2
+; CHECK-BE-NEXT:    xxmrghd vs2, vs3, vs2
+; CHECK-BE-NEXT:    xxmrgld vs3, vs1, vs0
+; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-BE-NEXT:    xvcvdpuxws v2, vs4
+; CHECK-BE-NEXT:    xvcvdpuxws v3, vs2
+; CHECK-BE-NEXT:    xvcvdpuxws v4, vs3
+; CHECK-BE-NEXT:    xvcvdpuxws v5, vs0
+; CHECK-BE-NEXT:    vmrgew v2, v3, v2
+; CHECK-BE-NEXT:    vmrgew v3, v5, v4
+; CHECK-BE-NEXT:    stxv v3, 16(r3)
+; CHECK-BE-NEXT:    stxv v2, 0(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <8 x double>, <8 x double>* %0, align 64
+  %1 = fptoui <8 x double> %a to <8 x i32>
+  store <8 x i32> %1, <8 x i32>* %agg.result, align 32
+  ret void
+}
+
+define void @test16elt(<16 x i32>* noalias nocapture sret %agg.result, <16 x double>* nocapture readonly) local_unnamed_addr #2 {
+; CHECK-P8-LABEL: test16elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r5, 32
+; CHECK-P8-NEXT:    li r6, 48
+; CHECK-P8-NEXT:    li r8, 64
+; CHECK-P8-NEXT:    li r7, 16
+; CHECK-P8-NEXT:    li r9, 80
+; CHECK-P8-NEXT:    lxvd2x vs7, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
+; CHECK-P8-NEXT:    lxvd2x vs3, r4, r8
+; CHECK-P8-NEXT:    li r8, 96
+; CHECK-P8-NEXT:    lxvd2x vs2, r4, r7
+; CHECK-P8-NEXT:    lxvd2x vs5, r4, r8
+; CHECK-P8-NEXT:    li r8, 112
+; CHECK-P8-NEXT:    lxvd2x vs4, r4, r9
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    lxvd2x vs6, r4, r8
+; CHECK-P8-NEXT:    xxswapd vs2, vs2
+; CHECK-P8-NEXT:    xxswapd vs3, vs3
+; CHECK-P8-NEXT:    xxswapd vs4, vs4
+; CHECK-P8-NEXT:    xxswapd vs5, vs5
+; CHECK-P8-NEXT:    xxmrgld vs8, vs1, vs0
+; CHECK-P8-NEXT:    xxswapd vs6, vs6
+; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P8-NEXT:    xxswapd vs1, vs7
+; CHECK-P8-NEXT:    xxmrgld vs7, vs4, vs3
+; CHECK-P8-NEXT:    xxmrghd vs3, vs4, vs3
+; CHECK-P8-NEXT:    xxmrgld vs4, vs6, vs5
+; CHECK-P8-NEXT:    xvcvdpuxws v2, vs8
+; CHECK-P8-NEXT:    xvcvdpuxws v3, vs0
+; CHECK-P8-NEXT:    xxmrghd vs0, vs6, vs5
+; CHECK-P8-NEXT:    xxmrgld vs5, vs2, vs1
+; CHECK-P8-NEXT:    xxmrghd vs1, vs2, vs1
+; CHECK-P8-NEXT:    xvcvdpuxws v4, vs7
+; CHECK-P8-NEXT:    xvcvdpuxws v5, vs3
+; CHECK-P8-NEXT:    xvcvdpuxws v0, vs4
+; CHECK-P8-NEXT:    xvcvdpuxws v1, vs0
+; CHECK-P8-NEXT:    xvcvdpuxws v6, vs5
+; CHECK-P8-NEXT:    xvcvdpuxws v7, vs1
+; CHECK-P8-NEXT:    vmrgew v2, v3, v2
+; CHECK-P8-NEXT:    vmrgew v3, v5, v4
+; CHECK-P8-NEXT:    vmrgew v4, v1, v0
+; CHECK-P8-NEXT:    vmrgew v5, v7, v6
+; CHECK-P8-NEXT:    stvx v2, r3, r7
+; CHECK-P8-NEXT:    stvx v3, r3, r5
+; CHECK-P8-NEXT:    stvx v4, r3, r6
+; CHECK-P8-NEXT:    stvx v5, 0, r3
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test16elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs0, 32(r4)
+; CHECK-P9-NEXT:    lxv vs1, 48(r4)
+; CHECK-P9-NEXT:    lxv vs2, 0(r4)
+; CHECK-P9-NEXT:    lxv vs3, 16(r4)
+; CHECK-P9-NEXT:    lxv vs4, 96(r4)
+; CHECK-P9-NEXT:    lxv vs5, 112(r4)
+; CHECK-P9-NEXT:    lxv vs6, 64(r4)
+; CHECK-P9-NEXT:    lxv vs7, 80(r4)
+; CHECK-P9-NEXT:    xxmrgld vs8, vs3, vs2
+; CHECK-P9-NEXT:    xxmrghd vs2, vs3, vs2
+; CHECK-P9-NEXT:    xxmrgld vs3, vs1, vs0
+; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P9-NEXT:    xxmrgld vs1, vs7, vs6
+; CHECK-P9-NEXT:    xxmrghd vs6, vs7, vs6
+; CHECK-P9-NEXT:    xxmrgld vs7, vs5, vs4
+; CHECK-P9-NEXT:    xxmrghd vs4, vs5, vs4
+; CHECK-P9-NEXT:    xvcvdpuxws v2, vs8
+; CHECK-P9-NEXT:    xvcvdpuxws v3, vs2
+; CHECK-P9-NEXT:    xvcvdpuxws v4, vs3
+; CHECK-P9-NEXT:    xvcvdpuxws v5, vs0
+; CHECK-P9-NEXT:    xvcvdpuxws v0, vs1
+; CHECK-P9-NEXT:    xvcvdpuxws v1, vs6
+; CHECK-P9-NEXT:    xvcvdpuxws v6, vs7
+; CHECK-P9-NEXT:    xvcvdpuxws v7, vs4
+; CHECK-P9-NEXT:    vmrgew v2, v3, v2
+; CHECK-P9-NEXT:    vmrgew v3, v5, v4
+; CHECK-P9-NEXT:    vmrgew v4, v1, v0
+; CHECK-P9-NEXT:    vmrgew v5, v7, v6
+; CHECK-P9-NEXT:    stxv v3, 16(r3)
+; CHECK-P9-NEXT:    stxv v2, 0(r3)
+; CHECK-P9-NEXT:    stxv v5, 48(r3)
+; CHECK-P9-NEXT:    stxv v4, 32(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test16elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs0, 48(r4)
+; CHECK-BE-NEXT:    lxv vs1, 32(r4)
+; CHECK-BE-NEXT:    lxv vs2, 16(r4)
+; CHECK-BE-NEXT:    lxv vs3, 0(r4)
+; CHECK-BE-NEXT:    lxv vs4, 112(r4)
+; CHECK-BE-NEXT:    lxv vs5, 96(r4)
+; CHECK-BE-NEXT:    lxv vs6, 80(r4)
+; CHECK-BE-NEXT:    lxv vs7, 64(r4)
+; CHECK-BE-NEXT:    xxmrgld vs8, vs3, vs2
+; CHECK-BE-NEXT:    xxmrghd vs2, vs3, vs2
+; CHECK-BE-NEXT:    xxmrgld vs3, vs1, vs0
+; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-BE-NEXT:    xxmrgld vs1, vs7, vs6
+; CHECK-BE-NEXT:    xxmrghd vs6, vs7, vs6
+; CHECK-BE-NEXT:    xxmrgld vs7, vs5, vs4
+; CHECK-BE-NEXT:    xxmrghd vs4, vs5, vs4
+; CHECK-BE-NEXT:    xvcvdpuxws v2, vs8
+; CHECK-BE-NEXT:    xvcvdpuxws v3, vs2
+; CHECK-BE-NEXT:    xvcvdpuxws v4, vs3
+; CHECK-BE-NEXT:    xvcvdpuxws v5, vs0
+; CHECK-BE-NEXT:    xvcvdpuxws v0, vs1
+; CHECK-BE-NEXT:    xvcvdpuxws v1, vs6
+; CHECK-BE-NEXT:    xvcvdpuxws v6, vs7
+; CHECK-BE-NEXT:    xvcvdpuxws v7, vs4
+; CHECK-BE-NEXT:    vmrgew v2, v3, v2
+; CHECK-BE-NEXT:    vmrgew v3, v5, v4
+; CHECK-BE-NEXT:    vmrgew v4, v1, v0
+; CHECK-BE-NEXT:    vmrgew v5, v7, v6
+; CHECK-BE-NEXT:    stxv v3, 16(r3)
+; CHECK-BE-NEXT:    stxv v2, 0(r3)
+; CHECK-BE-NEXT:    stxv v5, 48(r3)
+; CHECK-BE-NEXT:    stxv v4, 32(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <16 x double>, <16 x double>* %0, align 128
+  %1 = fptoui <16 x double> %a to <16 x i32>
+  store <16 x i32> %1, <16 x i32>* %agg.result, align 64
+  ret void
+}
+
+define i64 @test2elt_signed(<2 x double> %a) local_unnamed_addr #0 {
+; CHECK-P8-LABEL: test2elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    xscvdpsxws f1, v2
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mfvsrwz r3, f1
+; CHECK-P8-NEXT:    mfvsrwz r4, f0
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    vmrglw v2, v2, v3
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test2elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    xxswapd vs0, v2
+; CHECK-P9-NEXT:    xscvdpsxws f1, v2
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mtvsrws v2, r3
+; CHECK-P9-NEXT:    mfvsrwz r4, f0
+; CHECK-P9-NEXT:    mtvsrws v3, r4
+; CHECK-P9-NEXT:    vmrglw v2, v2, v3
+; CHECK-P9-NEXT:    mfvsrld r3, v2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test2elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xxswapd vs0, v2
+; CHECK-BE-NEXT:    xscvdpsxws f1, v2
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mtvsrws v2, r3
+; CHECK-BE-NEXT:    mfvsrwz r4, f0
+; CHECK-BE-NEXT:    mtvsrws v3, r4
+; CHECK-BE-NEXT:    vmrghw v2, v2, v3
+; CHECK-BE-NEXT:    mfvsrd r3, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = fptosi <2 x double> %a to <2 x i32>
+  %1 = bitcast <2 x i32> %0 to i64
+  ret i64 %1
+}
+
+define <4 x i32> @test4elt_signed(<4 x double>* nocapture readonly) local_unnamed_addr #1 {
+; CHECK-P8-LABEL: test4elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, r3, r4
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xxmrgld vs2, vs0, vs1
+; CHECK-P8-NEXT:    xxmrghd vs0, vs0, vs1
+; CHECK-P8-NEXT:    xvcvdpsxws v2, vs2
+; CHECK-P8-NEXT:    xvcvdpsxws v3, vs0
+; CHECK-P8-NEXT:    vmrgew v2, v3, v2
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test4elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs0, 0(r3)
+; CHECK-P9-NEXT:    lxv vs1, 16(r3)
+; CHECK-P9-NEXT:    xxmrgld vs2, vs1, vs0
+; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P9-NEXT:    xvcvdpsxws v2, vs2
+; CHECK-P9-NEXT:    xvcvdpsxws v3, vs0
+; CHECK-P9-NEXT:    vmrgew v2, v3, v2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test4elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs0, 16(r3)
+; CHECK-BE-NEXT:    lxv vs1, 0(r3)
+; CHECK-BE-NEXT:    xxmrgld vs2, vs1, vs0
+; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-BE-NEXT:    xvcvdpsxws v2, vs2
+; CHECK-BE-NEXT:    xvcvdpsxws v3, vs0
+; CHECK-BE-NEXT:    vmrgew v2, v3, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <4 x double>, <4 x double>* %0, align 32
+  %1 = fptosi <4 x double> %a to <4 x i32>
+  ret <4 x i32> %1
+}
+
+define void @test8elt_signed(<8 x i32>* noalias nocapture sret %agg.result, <8 x double>* nocapture readonly) local_unnamed_addr #2 {
+; CHECK-P8-LABEL: test8elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r5, 32
+; CHECK-P8-NEXT:    li r6, 48
+; CHECK-P8-NEXT:    lxvd2x vs3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
+; CHECK-P8-NEXT:    lxvd2x vs2, r4, r5
+; CHECK-P8-NEXT:    xxswapd vs3, vs3
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    xxswapd vs2, vs2
+; CHECK-P8-NEXT:    xxmrgld vs4, vs1, vs0
+; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P8-NEXT:    xxmrgld vs1, vs2, vs3
+; CHECK-P8-NEXT:    xxmrghd vs2, vs2, vs3
+; CHECK-P8-NEXT:    xvcvdpsxws v2, vs4
+; CHECK-P8-NEXT:    xvcvdpsxws v3, vs0
+; CHECK-P8-NEXT:    xvcvdpsxws v4, vs1
+; CHECK-P8-NEXT:    xvcvdpsxws v5, vs2
+; CHECK-P8-NEXT:    vmrgew v2, v3, v2
+; CHECK-P8-NEXT:    vmrgew v3, v5, v4
+; CHECK-P8-NEXT:    stvx v2, r3, r5
+; CHECK-P8-NEXT:    stvx v3, 0, r3
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test8elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs0, 32(r4)
+; CHECK-P9-NEXT:    lxv vs1, 48(r4)
+; CHECK-P9-NEXT:    lxv vs2, 0(r4)
+; CHECK-P9-NEXT:    lxv vs3, 16(r4)
+; CHECK-P9-NEXT:    xxmrgld vs4, vs3, vs2
+; CHECK-P9-NEXT:    xxmrghd vs2, vs3, vs2
+; CHECK-P9-NEXT:    xxmrgld vs3, vs1, vs0
+; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P9-NEXT:    xvcvdpsxws v2, vs4
+; CHECK-P9-NEXT:    xvcvdpsxws v3, vs2
+; CHECK-P9-NEXT:    xvcvdpsxws v4, vs3
+; CHECK-P9-NEXT:    xvcvdpsxws v5, vs0
+; CHECK-P9-NEXT:    vmrgew v2, v3, v2
+; CHECK-P9-NEXT:    vmrgew v3, v5, v4
+; CHECK-P9-NEXT:    stxv v3, 16(r3)
+; CHECK-P9-NEXT:    stxv v2, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test8elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs0, 48(r4)
+; CHECK-BE-NEXT:    lxv vs1, 32(r4)
+; CHECK-BE-NEXT:    lxv vs2, 16(r4)
+; CHECK-BE-NEXT:    lxv vs3, 0(r4)
+; CHECK-BE-NEXT:    xxmrgld vs4, vs3, vs2
+; CHECK-BE-NEXT:    xxmrghd vs2, vs3, vs2
+; CHECK-BE-NEXT:    xxmrgld vs3, vs1, vs0
+; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-BE-NEXT:    xvcvdpsxws v2, vs4
+; CHECK-BE-NEXT:    xvcvdpsxws v3, vs2
+; CHECK-BE-NEXT:    xvcvdpsxws v4, vs3
+; CHECK-BE-NEXT:    xvcvdpsxws v5, vs0
+; CHECK-BE-NEXT:    vmrgew v2, v3, v2
+; CHECK-BE-NEXT:    vmrgew v3, v5, v4
+; CHECK-BE-NEXT:    stxv v3, 16(r3)
+; CHECK-BE-NEXT:    stxv v2, 0(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <8 x double>, <8 x double>* %0, align 64
+  %1 = fptosi <8 x double> %a to <8 x i32>
+  store <8 x i32> %1, <8 x i32>* %agg.result, align 32
+  ret void
+}
+
+define void @test16elt_signed(<16 x i32>* noalias nocapture sret %agg.result, <16 x double>* nocapture readonly) local_unnamed_addr #2 {
+; CHECK-P8-LABEL: test16elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r5, 32
+; CHECK-P8-NEXT:    li r6, 48
+; CHECK-P8-NEXT:    li r8, 64
+; CHECK-P8-NEXT:    li r7, 16
+; CHECK-P8-NEXT:    li r9, 80
+; CHECK-P8-NEXT:    lxvd2x vs7, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
+; CHECK-P8-NEXT:    lxvd2x vs3, r4, r8
+; CHECK-P8-NEXT:    li r8, 96
+; CHECK-P8-NEXT:    lxvd2x vs2, r4, r7
+; CHECK-P8-NEXT:    lxvd2x vs5, r4, r8
+; CHECK-P8-NEXT:    li r8, 112
+; CHECK-P8-NEXT:    lxvd2x vs4, r4, r9
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    lxvd2x vs6, r4, r8
+; CHECK-P8-NEXT:    xxswapd vs2, vs2
+; CHECK-P8-NEXT:    xxswapd vs3, vs3
+; CHECK-P8-NEXT:    xxswapd vs4, vs4
+; CHECK-P8-NEXT:    xxswapd vs5, vs5
+; CHECK-P8-NEXT:    xxmrgld vs8, vs1, vs0
+; CHECK-P8-NEXT:    xxswapd vs6, vs6
+; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P8-NEXT:    xxswapd vs1, vs7
+; CHECK-P8-NEXT:    xxmrgld vs7, vs4, vs3
+; CHECK-P8-NEXT:    xxmrghd vs3, vs4, vs3
+; CHECK-P8-NEXT:    xxmrgld vs4, vs6, vs5
+; CHECK-P8-NEXT:    xvcvdpsxws v2, vs8
+; CHECK-P8-NEXT:    xvcvdpsxws v3, vs0
+; CHECK-P8-NEXT:    xxmrghd vs0, vs6, vs5
+; CHECK-P8-NEXT:    xxmrgld vs5, vs2, vs1
+; CHECK-P8-NEXT:    xxmrghd vs1, vs2, vs1
+; CHECK-P8-NEXT:    xvcvdpsxws v4, vs7
+; CHECK-P8-NEXT:    xvcvdpsxws v5, vs3
+; CHECK-P8-NEXT:    xvcvdpsxws v0, vs4
+; CHECK-P8-NEXT:    xvcvdpsxws v1, vs0
+; CHECK-P8-NEXT:    xvcvdpsxws v6, vs5
+; CHECK-P8-NEXT:    xvcvdpsxws v7, vs1
+; CHECK-P8-NEXT:    vmrgew v2, v3, v2
+; CHECK-P8-NEXT:    vmrgew v3, v5, v4
+; CHECK-P8-NEXT:    vmrgew v4, v1, v0
+; CHECK-P8-NEXT:    vmrgew v5, v7, v6
+; CHECK-P8-NEXT:    stvx v2, r3, r7
+; CHECK-P8-NEXT:    stvx v3, r3, r5
+; CHECK-P8-NEXT:    stvx v4, r3, r6
+; CHECK-P8-NEXT:    stvx v5, 0, r3
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test16elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs0, 32(r4)
+; CHECK-P9-NEXT:    lxv vs1, 48(r4)
+; CHECK-P9-NEXT:    lxv vs2, 0(r4)
+; CHECK-P9-NEXT:    lxv vs3, 16(r4)
+; CHECK-P9-NEXT:    lxv vs4, 96(r4)
+; CHECK-P9-NEXT:    lxv vs5, 112(r4)
+; CHECK-P9-NEXT:    lxv vs6, 64(r4)
+; CHECK-P9-NEXT:    lxv vs7, 80(r4)
+; CHECK-P9-NEXT:    xxmrgld vs8, vs3, vs2
+; CHECK-P9-NEXT:    xxmrghd vs2, vs3, vs2
+; CHECK-P9-NEXT:    xxmrgld vs3, vs1, vs0
+; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P9-NEXT:    xxmrgld vs1, vs7, vs6
+; CHECK-P9-NEXT:    xxmrghd vs6, vs7, vs6
+; CHECK-P9-NEXT:    xxmrgld vs7, vs5, vs4
+; CHECK-P9-NEXT:    xxmrghd vs4, vs5, vs4
+; CHECK-P9-NEXT:    xvcvdpsxws v2, vs8
+; CHECK-P9-NEXT:    xvcvdpsxws v3, vs2
+; CHECK-P9-NEXT:    xvcvdpsxws v4, vs3
+; CHECK-P9-NEXT:    xvcvdpsxws v5, vs0
+; CHECK-P9-NEXT:    xvcvdpsxws v0, vs1
+; CHECK-P9-NEXT:    xvcvdpsxws v1, vs6
+; CHECK-P9-NEXT:    xvcvdpsxws v6, vs7
+; CHECK-P9-NEXT:    xvcvdpsxws v7, vs4
+; CHECK-P9-NEXT:    vmrgew v2, v3, v2
+; CHECK-P9-NEXT:    vmrgew v3, v5, v4
+; CHECK-P9-NEXT:    vmrgew v4, v1, v0
+; CHECK-P9-NEXT:    vmrgew v5, v7, v6
+; CHECK-P9-NEXT:    stxv v3, 16(r3)
+; CHECK-P9-NEXT:    stxv v2, 0(r3)
+; CHECK-P9-NEXT:    stxv v5, 48(r3)
+; CHECK-P9-NEXT:    stxv v4, 32(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test16elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs0, 48(r4)
+; CHECK-BE-NEXT:    lxv vs1, 32(r4)
+; CHECK-BE-NEXT:    lxv vs2, 16(r4)
+; CHECK-BE-NEXT:    lxv vs3, 0(r4)
+; CHECK-BE-NEXT:    lxv vs4, 112(r4)
+; CHECK-BE-NEXT:    lxv vs5, 96(r4)
+; CHECK-BE-NEXT:    lxv vs6, 80(r4)
+; CHECK-BE-NEXT:    lxv vs7, 64(r4)
+; CHECK-BE-NEXT:    xxmrgld vs8, vs3, vs2
+; CHECK-BE-NEXT:    xxmrghd vs2, vs3, vs2
+; CHECK-BE-NEXT:    xxmrgld vs3, vs1, vs0
+; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-BE-NEXT:    xxmrgld vs1, vs7, vs6
+; CHECK-BE-NEXT:    xxmrghd vs6, vs7, vs6
+; CHECK-BE-NEXT:    xxmrgld vs7, vs5, vs4
+; CHECK-BE-NEXT:    xxmrghd vs4, vs5, vs4
+; CHECK-BE-NEXT:    xvcvdpsxws v2, vs8
+; CHECK-BE-NEXT:    xvcvdpsxws v3, vs2
+; CHECK-BE-NEXT:    xvcvdpsxws v4, vs3
+; CHECK-BE-NEXT:    xvcvdpsxws v5, vs0
+; CHECK-BE-NEXT:    xvcvdpsxws v0, vs1
+; CHECK-BE-NEXT:    xvcvdpsxws v1, vs6
+; CHECK-BE-NEXT:    xvcvdpsxws v6, vs7
+; CHECK-BE-NEXT:    xvcvdpsxws v7, vs4
+; CHECK-BE-NEXT:    vmrgew v2, v3, v2
+; CHECK-BE-NEXT:    vmrgew v3, v5, v4
+; CHECK-BE-NEXT:    vmrgew v4, v1, v0
+; CHECK-BE-NEXT:    vmrgew v5, v7, v6
+; CHECK-BE-NEXT:    stxv v3, 16(r3)
+; CHECK-BE-NEXT:    stxv v2, 0(r3)
+; CHECK-BE-NEXT:    stxv v5, 48(r3)
+; CHECK-BE-NEXT:    stxv v4, 32(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <16 x double>, <16 x double>* %0, align 128
+  %1 = fptosi <16 x double> %a to <16 x i32>
+  store <16 x i32> %1, <16 x i32>* %agg.result, align 64
+  ret void
+}

Added: llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll?rev=347090&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll Fri Nov 16 12:24:10 2018
@@ -0,0 +1,1316 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:     -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-P8
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-P9
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-BE
+
+define i16 @test2elt(<2 x double> %a) local_unnamed_addr #0 {
+; CHECK-P8-LABEL: test2elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    xscvdpsxws f1, v2
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mfvsrwz r3, f1
+; CHECK-P8-NEXT:    mfvsrwz r4, f0
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    vmrglb v2, v2, v3
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    clrldi r3, r3, 48
+; CHECK-P8-NEXT:    sth r3, -2(r1)
+; CHECK-P8-NEXT:    lhz r3, -2(r1)
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test2elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    xxswapd vs0, v2
+; CHECK-P9-NEXT:    xscvdpsxws f1, v2
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mfvsrwz r4, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    addi r3, r1, -2
+; CHECK-P9-NEXT:    mtvsrd f1, r4
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    xxswapd v3, vs1
+; CHECK-P9-NEXT:    vmrglb v2, v2, v3
+; CHECK-P9-NEXT:    vsldoi v2, v2, v2, 8
+; CHECK-P9-NEXT:    stxsihx v2, 0, r3
+; CHECK-P9-NEXT:    lhz r3, -2(r1)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test2elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xxswapd vs0, v2
+; CHECK-BE-NEXT:    xscvdpsxws f1, v2
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mfvsrwz r4, f0
+; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    addi r3, r1, -2
+; CHECK-BE-NEXT:    sldi r4, r4, 56
+; CHECK-BE-NEXT:    mtvsrd v3, r4
+; CHECK-BE-NEXT:    vmrghb v2, v2, v3
+; CHECK-BE-NEXT:    vsldoi v2, v2, v2, 10
+; CHECK-BE-NEXT:    stxsihx v2, 0, r3
+; CHECK-BE-NEXT:    lhz r3, -2(r1)
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = fptoui <2 x double> %a to <2 x i8>
+  %1 = bitcast <2 x i8> %0 to i16
+  ret i16 %1
+}
+
+define i32 @test4elt(<4 x double>* nocapture readonly) local_unnamed_addr #1 {
+; CHECK-P8-LABEL: test4elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    xscvdpsxws f2, f0
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xscvdpsxws f3, f1
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    mfvsrwz r3, f2
+; CHECK-P8-NEXT:    mfvsrwz r4, f3
+; CHECK-P8-NEXT:    mtvsrd f2, r3
+; CHECK-P8-NEXT:    mtvsrd f3, r4
+; CHECK-P8-NEXT:    mfvsrwz r3, f0
+; CHECK-P8-NEXT:    xxswapd v2, vs2
+; CHECK-P8-NEXT:    mfvsrwz r4, f1
+; CHECK-P8-NEXT:    xxswapd v4, vs3
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    xxswapd v3, vs0
+; CHECK-P8-NEXT:    xxswapd v5, vs1
+; CHECK-P8-NEXT:    vmrglb v2, v3, v2
+; CHECK-P8-NEXT:    vmrglb v3, v5, v4
+; CHECK-P8-NEXT:    vmrglh v2, v3, v2
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    mfvsrwz r3, f0
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test4elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs0, 16(r3)
+; CHECK-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-P9-NEXT:    xxswapd vs2, vs1
+; CHECK-P9-NEXT:    xxswapd vs3, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mfvsrwz r5, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    li r3, 0
+; CHECK-P9-NEXT:    mfvsrwz r4, f2
+; CHECK-P9-NEXT:    mfvsrwz r6, f3
+; CHECK-P9-NEXT:    mtvsrd f2, r5
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    mtvsrd f1, r4
+; CHECK-P9-NEXT:    mtvsrd f3, r6
+; CHECK-P9-NEXT:    xxswapd v4, vs2
+; CHECK-P9-NEXT:    xxswapd v3, vs1
+; CHECK-P9-NEXT:    xxswapd v5, vs3
+; CHECK-P9-NEXT:    vmrglb v2, v2, v3
+; CHECK-P9-NEXT:    vmrglb v3, v4, v5
+; CHECK-P9-NEXT:    vmrglh v2, v3, v2
+; CHECK-P9-NEXT:    vextuwrx r3, r3, v2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test4elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    lxv vs1, 16(r3)
+; CHECK-BE-NEXT:    xxswapd vs2, vs1
+; CHECK-BE-NEXT:    xxswapd vs3, vs0
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mfvsrwz r5, f0
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    sldi r5, r5, 56
+; CHECK-BE-NEXT:    mfvsrwz r4, f2
+; CHECK-BE-NEXT:    mfvsrwz r6, f3
+; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    mtvsrd v4, r5
+; CHECK-BE-NEXT:    li r3, 0
+; CHECK-BE-NEXT:    sldi r4, r4, 56
+; CHECK-BE-NEXT:    sldi r6, r6, 56
+; CHECK-BE-NEXT:    mtvsrd v3, r4
+; CHECK-BE-NEXT:    mtvsrd v5, r6
+; CHECK-BE-NEXT:    vmrghb v2, v2, v3
+; CHECK-BE-NEXT:    vmrghb v3, v4, v5
+; CHECK-BE-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-NEXT:    vextuwlx r3, r3, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <4 x double>, <4 x double>* %0, align 32
+  %1 = fptoui <4 x double> %a to <4 x i8>
+  %2 = bitcast <4 x i8> %1 to i32
+  ret i32 %2
+}
+
+define i64 @test8elt(<8 x double>* nocapture readonly) local_unnamed_addr #1 {
+; CHECK-P8-LABEL: test8elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    li r4, 32
+; CHECK-P8-NEXT:    lxvd2x vs2, r3, r4
+; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    lxvd2x vs3, r3, r4
+; CHECK-P8-NEXT:    xscvdpsxws f4, f0
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xscvdpsxws f5, f1
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    xscvdpsxws f6, f2
+; CHECK-P8-NEXT:    xxswapd vs2, vs2
+; CHECK-P8-NEXT:    xscvdpsxws f7, f3
+; CHECK-P8-NEXT:    xxswapd vs3, vs3
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    mfvsrwz r3, f4
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    xscvdpsxws f3, f3
+; CHECK-P8-NEXT:    mfvsrwz r4, f5
+; CHECK-P8-NEXT:    mtvsrd f4, r3
+; CHECK-P8-NEXT:    mfvsrwz r3, f6
+; CHECK-P8-NEXT:    mtvsrd f5, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs4
+; CHECK-P8-NEXT:    mfvsrwz r4, f7
+; CHECK-P8-NEXT:    mtvsrd f6, r3
+; CHECK-P8-NEXT:    xxswapd v3, vs5
+; CHECK-P8-NEXT:    mfvsrwz r3, f0
+; CHECK-P8-NEXT:    mtvsrd f7, r4
+; CHECK-P8-NEXT:    xxswapd v4, vs6
+; CHECK-P8-NEXT:    mfvsrwz r4, f1
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    xxswapd v1, vs7
+; CHECK-P8-NEXT:    mfvsrwz r3, f2
+; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    xxswapd v5, vs0
+; CHECK-P8-NEXT:    mfvsrwz r4, f3
+; CHECK-P8-NEXT:    mtvsrd f2, r3
+; CHECK-P8-NEXT:    xxswapd v0, vs1
+; CHECK-P8-NEXT:    mtvsrd f0, r4
+; CHECK-P8-NEXT:    xxswapd v6, vs2
+; CHECK-P8-NEXT:    vmrglb v2, v5, v2
+; CHECK-P8-NEXT:    xxswapd v5, vs0
+; CHECK-P8-NEXT:    vmrglb v3, v0, v3
+; CHECK-P8-NEXT:    vmrglb v4, v6, v4
+; CHECK-P8-NEXT:    vmrglb v5, v5, v1
+; CHECK-P8-NEXT:    vmrglh v2, v3, v2
+; CHECK-P8-NEXT:    vmrglh v3, v5, v4
+; CHECK-P8-NEXT:    vmrglw v2, v3, v2
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test8elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs0, 48(r3)
+; CHECK-P9-NEXT:    lxv vs1, 32(r3)
+; CHECK-P9-NEXT:    lxv vs2, 16(r3)
+; CHECK-P9-NEXT:    lxv vs3, 0(r3)
+; CHECK-P9-NEXT:    xxswapd vs4, vs3
+; CHECK-P9-NEXT:    xxswapd vs5, vs2
+; CHECK-P9-NEXT:    xxswapd vs6, vs1
+; CHECK-P9-NEXT:    xxswapd vs7, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    xscvdpsxws f4, f4
+; CHECK-P9-NEXT:    xscvdpsxws f5, f5
+; CHECK-P9-NEXT:    xscvdpsxws f6, f6
+; CHECK-P9-NEXT:    xscvdpsxws f7, f7
+; CHECK-P9-NEXT:    mfvsrwz r3, f3
+; CHECK-P9-NEXT:    mfvsrwz r5, f2
+; CHECK-P9-NEXT:    mfvsrwz r7, f1
+; CHECK-P9-NEXT:    mfvsrwz r9, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mtvsrd f2, r5
+; CHECK-P9-NEXT:    mfvsrwz r4, f4
+; CHECK-P9-NEXT:    mfvsrwz r6, f5
+; CHECK-P9-NEXT:    mfvsrwz r8, f6
+; CHECK-P9-NEXT:    mfvsrwz r10, f7
+; CHECK-P9-NEXT:    mtvsrd f4, r7
+; CHECK-P9-NEXT:    mtvsrd f6, r9
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    xxswapd v4, vs2
+; CHECK-P9-NEXT:    mtvsrd f1, r4
+; CHECK-P9-NEXT:    mtvsrd f3, r6
+; CHECK-P9-NEXT:    mtvsrd f5, r8
+; CHECK-P9-NEXT:    mtvsrd f7, r10
+; CHECK-P9-NEXT:    xxswapd v0, vs4
+; CHECK-P9-NEXT:    xxswapd v6, vs6
+; CHECK-P9-NEXT:    xxswapd v3, vs1
+; CHECK-P9-NEXT:    xxswapd v5, vs3
+; CHECK-P9-NEXT:    xxswapd v1, vs5
+; CHECK-P9-NEXT:    xxswapd v7, vs7
+; CHECK-P9-NEXT:    vmrglb v2, v2, v3
+; CHECK-P9-NEXT:    vmrglb v3, v4, v5
+; CHECK-P9-NEXT:    vmrglb v4, v0, v1
+; CHECK-P9-NEXT:    vmrglb v5, v6, v7
+; CHECK-P9-NEXT:    vmrglh v2, v3, v2
+; CHECK-P9-NEXT:    vmrglh v3, v5, v4
+; CHECK-P9-NEXT:    vmrglw v2, v3, v2
+; CHECK-P9-NEXT:    mfvsrld r3, v2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test8elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    lxv vs1, 16(r3)
+; CHECK-BE-NEXT:    lxv vs2, 32(r3)
+; CHECK-BE-NEXT:    lxv vs3, 48(r3)
+; CHECK-BE-NEXT:    xxswapd vs4, vs3
+; CHECK-BE-NEXT:    xxswapd vs5, vs2
+; CHECK-BE-NEXT:    xxswapd vs6, vs1
+; CHECK-BE-NEXT:    xxswapd vs7, vs0
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    xscvdpsxws f5, f5
+; CHECK-BE-NEXT:    xscvdpsxws f6, f6
+; CHECK-BE-NEXT:    xscvdpsxws f7, f7
+; CHECK-BE-NEXT:    mfvsrwz r3, f3
+; CHECK-BE-NEXT:    mfvsrwz r5, f2
+; CHECK-BE-NEXT:    mfvsrwz r7, f1
+; CHECK-BE-NEXT:    mfvsrwz r9, f0
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    sldi r5, r5, 56
+; CHECK-BE-NEXT:    sldi r7, r7, 56
+; CHECK-BE-NEXT:    sldi r9, r9, 56
+; CHECK-BE-NEXT:    mfvsrwz r4, f4
+; CHECK-BE-NEXT:    mfvsrwz r6, f5
+; CHECK-BE-NEXT:    mfvsrwz r8, f6
+; CHECK-BE-NEXT:    mfvsrwz r10, f7
+; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    mtvsrd v4, r5
+; CHECK-BE-NEXT:    mtvsrd v0, r7
+; CHECK-BE-NEXT:    mtvsrd v6, r9
+; CHECK-BE-NEXT:    sldi r4, r4, 56
+; CHECK-BE-NEXT:    sldi r6, r6, 56
+; CHECK-BE-NEXT:    sldi r8, r8, 56
+; CHECK-BE-NEXT:    sldi r10, r10, 56
+; CHECK-BE-NEXT:    mtvsrd v3, r4
+; CHECK-BE-NEXT:    mtvsrd v5, r6
+; CHECK-BE-NEXT:    mtvsrd v1, r8
+; CHECK-BE-NEXT:    mtvsrd v7, r10
+; CHECK-BE-NEXT:    vmrghb v2, v2, v3
+; CHECK-BE-NEXT:    vmrghb v3, v4, v5
+; CHECK-BE-NEXT:    vmrghb v4, v0, v1
+; CHECK-BE-NEXT:    vmrghb v5, v6, v7
+; CHECK-BE-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-NEXT:    vmrghh v3, v5, v4
+; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    mfvsrd r3, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <8 x double>, <8 x double>* %0, align 64
+  %1 = fptoui <8 x double> %a to <8 x i8>
+  %2 = bitcast <8 x i8> %1 to i64
+  ret i64 %2
+}
+
+define <16 x i8> @test16elt(<16 x double>* nocapture readonly) local_unnamed_addr #2 {
+; CHECK-P8-LABEL: test16elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    li r4, 32
+; CHECK-P8-NEXT:    lxvd2x vs2, r3, r4
+; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    lxvd2x vs3, r3, r4
+; CHECK-P8-NEXT:    li r4, 64
+; CHECK-P8-NEXT:    xscvdpuxws f4, f0
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    lxvd2x vs5, r3, r4
+; CHECK-P8-NEXT:    li r4, 80
+; CHECK-P8-NEXT:    xscvdpuxws f6, f1
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    lxvd2x vs7, r3, r4
+; CHECK-P8-NEXT:    li r4, 96
+; CHECK-P8-NEXT:    xscvdpuxws f8, f2
+; CHECK-P8-NEXT:    xxswapd vs2, vs2
+; CHECK-P8-NEXT:    lxvd2x vs9, r3, r4
+; CHECK-P8-NEXT:    li r4, 112
+; CHECK-P8-NEXT:    xscvdpuxws f10, f3
+; CHECK-P8-NEXT:    xxswapd vs3, vs3
+; CHECK-P8-NEXT:    lxvd2x vs11, r3, r4
+; CHECK-P8-NEXT:    xscvdpuxws f12, f5
+; CHECK-P8-NEXT:    xxswapd vs5, vs5
+; CHECK-P8-NEXT:    xscvdpuxws f13, f7
+; CHECK-P8-NEXT:    xxswapd vs7, vs7
+; CHECK-P8-NEXT:    xscvdpuxws v2, f9
+; CHECK-P8-NEXT:    xxswapd vs9, vs9
+; CHECK-P8-NEXT:    mfvsrwz r3, f4
+; CHECK-P8-NEXT:    xscvdpuxws v3, f11
+; CHECK-P8-NEXT:    xxswapd vs11, vs11
+; CHECK-P8-NEXT:    mfvsrwz r4, f6
+; CHECK-P8-NEXT:    xscvdpuxws f0, f0
+; CHECK-P8-NEXT:    mtvsrd f4, r3
+; CHECK-P8-NEXT:    mfvsrwz r3, f8
+; CHECK-P8-NEXT:    xscvdpuxws f1, f1
+; CHECK-P8-NEXT:    xxswapd v4, vs4
+; CHECK-P8-NEXT:    mtvsrd f6, r4
+; CHECK-P8-NEXT:    mfvsrwz r4, f10
+; CHECK-P8-NEXT:    xscvdpuxws f2, f2
+; CHECK-P8-NEXT:    xxswapd v5, vs6
+; CHECK-P8-NEXT:    mtvsrd f8, r3
+; CHECK-P8-NEXT:    mfvsrwz r3, f12
+; CHECK-P8-NEXT:    xscvdpuxws f3, f3
+; CHECK-P8-NEXT:    xxswapd v0, vs8
+; CHECK-P8-NEXT:    mtvsrd f10, r4
+; CHECK-P8-NEXT:    mfvsrwz r4, f13
+; CHECK-P8-NEXT:    xscvdpuxws f5, f5
+; CHECK-P8-NEXT:    xxswapd v1, vs10
+; CHECK-P8-NEXT:    mtvsrd f12, r3
+; CHECK-P8-NEXT:    mfvsrwz r3, v2
+; CHECK-P8-NEXT:    xscvdpuxws f7, f7
+; CHECK-P8-NEXT:    xxswapd v6, vs12
+; CHECK-P8-NEXT:    mtvsrd f13, r4
+; CHECK-P8-NEXT:    mfvsrwz r4, v3
+; CHECK-P8-NEXT:    mtvsrd v2, r3
+; CHECK-P8-NEXT:    xxswapd v7, vs13
+; CHECK-P8-NEXT:    mfvsrwz r3, f0
+; CHECK-P8-NEXT:    xscvdpuxws f9, f9
+; CHECK-P8-NEXT:    xxswapd v2, v2
+; CHECK-P8-NEXT:    xscvdpuxws f11, f11
+; CHECK-P8-NEXT:    mtvsrd v3, r4
+; CHECK-P8-NEXT:    mfvsrwz r4, f1
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    xxswapd v3, v3
+; CHECK-P8-NEXT:    mfvsrwz r3, f2
+; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    xxswapd v8, vs0
+; CHECK-P8-NEXT:    mfvsrwz r4, f3
+; CHECK-P8-NEXT:    mtvsrd f2, r3
+; CHECK-P8-NEXT:    xxswapd v9, vs1
+; CHECK-P8-NEXT:    mfvsrwz r3, f5
+; CHECK-P8-NEXT:    mtvsrd f3, r4
+; CHECK-P8-NEXT:    xxswapd v10, vs2
+; CHECK-P8-NEXT:    mfvsrwz r4, f7
+; CHECK-P8-NEXT:    mtvsrd f5, r3
+; CHECK-P8-NEXT:    mfvsrwz r3, f9
+; CHECK-P8-NEXT:    mtvsrd f7, r4
+; CHECK-P8-NEXT:    mfvsrwz r4, f11
+; CHECK-P8-NEXT:    vmrglb v4, v8, v4
+; CHECK-P8-NEXT:    xxswapd v8, vs3
+; CHECK-P8-NEXT:    vmrglb v5, v9, v5
+; CHECK-P8-NEXT:    xxswapd v9, vs5
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    vmrglb v0, v10, v0
+; CHECK-P8-NEXT:    xxswapd v10, vs7
+; CHECK-P8-NEXT:    vmrglb v1, v8, v1
+; CHECK-P8-NEXT:    xxswapd v8, vs0
+; CHECK-P8-NEXT:    vmrglb v6, v9, v6
+; CHECK-P8-NEXT:    xxswapd v9, vs1
+; CHECK-P8-NEXT:    vmrglb v7, v10, v7
+; CHECK-P8-NEXT:    vmrglb v2, v8, v2
+; CHECK-P8-NEXT:    vmrglb v3, v9, v3
+; CHECK-P8-NEXT:    vmrglh v4, v5, v4
+; CHECK-P8-NEXT:    vmrglh v5, v1, v0
+; CHECK-P8-NEXT:    vmrglh v0, v7, v6
+; CHECK-P8-NEXT:    vmrglh v2, v3, v2
+; CHECK-P8-NEXT:    vmrglw v3, v5, v4
+; CHECK-P8-NEXT:    vmrglw v2, v2, v0
+; CHECK-P8-NEXT:    xxmrgld v2, v2, v3
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test16elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs2, 48(r3)
+; CHECK-P9-NEXT:    lxv vs3, 32(r3)
+; CHECK-P9-NEXT:    lxv vs4, 16(r3)
+; CHECK-P9-NEXT:    lxv vs5, 0(r3)
+; CHECK-P9-NEXT:    lxv vs0, 112(r3)
+; CHECK-P9-NEXT:    lxv vs1, 96(r3)
+; CHECK-P9-NEXT:    lxv vs6, 80(r3)
+; CHECK-P9-NEXT:    lxv vs7, 64(r3)
+; CHECK-P9-NEXT:    std r26, -48(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    xxswapd vs8, vs5
+; CHECK-P9-NEXT:    xxswapd vs9, vs4
+; CHECK-P9-NEXT:    xxswapd vs10, vs3
+; CHECK-P9-NEXT:    xxswapd vs11, vs2
+; CHECK-P9-NEXT:    xxswapd vs12, vs7
+; CHECK-P9-NEXT:    xxswapd vs13, vs6
+; CHECK-P9-NEXT:    xxswapd v2, vs1
+; CHECK-P9-NEXT:    xxswapd v3, vs0
+; CHECK-P9-NEXT:    xscvdpuxws f5, f5
+; CHECK-P9-NEXT:    xscvdpuxws f4, f4
+; CHECK-P9-NEXT:    xscvdpuxws f3, f3
+; CHECK-P9-NEXT:    xscvdpuxws f2, f2
+; CHECK-P9-NEXT:    xscvdpuxws f7, f7
+; CHECK-P9-NEXT:    xscvdpuxws f6, f6
+; CHECK-P9-NEXT:    xscvdpuxws f1, f1
+; CHECK-P9-NEXT:    xscvdpuxws f0, f0
+; CHECK-P9-NEXT:    xscvdpuxws f8, f8
+; CHECK-P9-NEXT:    xscvdpuxws f9, f9
+; CHECK-P9-NEXT:    xscvdpuxws f10, f10
+; CHECK-P9-NEXT:    xscvdpuxws f11, f11
+; CHECK-P9-NEXT:    xscvdpuxws f12, f12
+; CHECK-P9-NEXT:    xscvdpuxws f13, f13
+; CHECK-P9-NEXT:    xscvdpuxws v2, v2
+; CHECK-P9-NEXT:    xscvdpuxws v3, v3
+; CHECK-P9-NEXT:    mfvsrwz r3, f5
+; CHECK-P9-NEXT:    mfvsrwz r4, f4
+; CHECK-P9-NEXT:    mfvsrwz r5, f3
+; CHECK-P9-NEXT:    mfvsrwz r6, f2
+; CHECK-P9-NEXT:    mfvsrwz r11, f7
+; CHECK-P9-NEXT:    mfvsrwz r12, f6
+; CHECK-P9-NEXT:    mfvsrwz r0, f1
+; CHECK-P9-NEXT:    mfvsrwz r30, f0
+; CHECK-P9-NEXT:    mfvsrwz r7, f8
+; CHECK-P9-NEXT:    mfvsrwz r8, f9
+; CHECK-P9-NEXT:    mfvsrwz r9, f10
+; CHECK-P9-NEXT:    mfvsrwz r10, f11
+; CHECK-P9-NEXT:    mfvsrwz r29, f12
+; CHECK-P9-NEXT:    mfvsrwz r28, f13
+; CHECK-P9-NEXT:    mfvsrwz r27, v2
+; CHECK-P9-NEXT:    mfvsrwz r26, v3
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mtvsrd f1, r4
+; CHECK-P9-NEXT:    mtvsrd f2, r5
+; CHECK-P9-NEXT:    mtvsrd f3, r6
+; CHECK-P9-NEXT:    mtvsrd f8, r11
+; CHECK-P9-NEXT:    mtvsrd f9, r12
+; CHECK-P9-NEXT:    mtvsrd f10, r0
+; CHECK-P9-NEXT:    mtvsrd f11, r30
+; CHECK-P9-NEXT:    mtvsrd f4, r7
+; CHECK-P9-NEXT:    mtvsrd f5, r8
+; CHECK-P9-NEXT:    mtvsrd f6, r9
+; CHECK-P9-NEXT:    mtvsrd f7, r10
+; CHECK-P9-NEXT:    mtvsrd f12, r29
+; CHECK-P9-NEXT:    mtvsrd f13, r28
+; CHECK-P9-NEXT:    mtvsrd v2, r27
+; CHECK-P9-NEXT:    mtvsrd v3, r26
+; CHECK-P9-NEXT:    xxswapd v4, vs0
+; CHECK-P9-NEXT:    xxswapd v5, vs1
+; CHECK-P9-NEXT:    xxswapd v0, vs2
+; CHECK-P9-NEXT:    xxswapd v1, vs3
+; CHECK-P9-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    xxswapd v6, vs4
+; CHECK-P9-NEXT:    xxswapd v7, vs5
+; CHECK-P9-NEXT:    ld r26, -48(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    xxswapd v8, vs6
+; CHECK-P9-NEXT:    xxswapd v9, vs7
+; CHECK-P9-NEXT:    xxswapd v10, vs8
+; CHECK-P9-NEXT:    xxswapd v11, vs12
+; CHECK-P9-NEXT:    xxswapd v12, vs9
+; CHECK-P9-NEXT:    xxswapd v13, vs13
+; CHECK-P9-NEXT:    xxswapd v14, vs10
+; CHECK-P9-NEXT:    xxswapd v2, v2
+; CHECK-P9-NEXT:    xxswapd v15, vs11
+; CHECK-P9-NEXT:    xxswapd v3, v3
+; CHECK-P9-NEXT:    vmrglb v4, v4, v6
+; CHECK-P9-NEXT:    vmrglb v5, v5, v7
+; CHECK-P9-NEXT:    vmrglb v0, v0, v8
+; CHECK-P9-NEXT:    vmrglb v1, v1, v9
+; CHECK-P9-NEXT:    vmrglb v6, v10, v11
+; CHECK-P9-NEXT:    vmrglb v7, v12, v13
+; CHECK-P9-NEXT:    vmrglb v2, v14, v2
+; CHECK-P9-NEXT:    vmrglb v3, v15, v3
+; CHECK-P9-NEXT:    vmrglh v4, v5, v4
+; CHECK-P9-NEXT:    vmrglh v5, v1, v0
+; CHECK-P9-NEXT:    vmrglh v0, v7, v6
+; CHECK-P9-NEXT:    vmrglh v2, v3, v2
+; CHECK-P9-NEXT:    vmrglw v3, v5, v4
+; CHECK-P9-NEXT:    vmrglw v2, v2, v0
+; CHECK-P9-NEXT:    xxmrgld v2, v2, v3
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test16elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs2, 64(r3)
+; CHECK-BE-NEXT:    lxv vs3, 80(r3)
+; CHECK-BE-NEXT:    lxv vs4, 96(r3)
+; CHECK-BE-NEXT:    lxv vs5, 112(r3)
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    lxv vs1, 16(r3)
+; CHECK-BE-NEXT:    lxv vs6, 32(r3)
+; CHECK-BE-NEXT:    lxv vs7, 48(r3)
+; CHECK-BE-NEXT:    std r26, -48(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    xxswapd vs8, vs5
+; CHECK-BE-NEXT:    xxswapd vs9, vs4
+; CHECK-BE-NEXT:    xxswapd vs10, vs3
+; CHECK-BE-NEXT:    xxswapd vs11, vs2
+; CHECK-BE-NEXT:    xxswapd vs12, vs7
+; CHECK-BE-NEXT:    xxswapd vs13, vs6
+; CHECK-BE-NEXT:    xxswapd v2, vs1
+; CHECK-BE-NEXT:    xxswapd v3, vs0
+; CHECK-BE-NEXT:    xscvdpuxws f5, f5
+; CHECK-BE-NEXT:    xscvdpuxws f4, f4
+; CHECK-BE-NEXT:    xscvdpuxws f3, f3
+; CHECK-BE-NEXT:    xscvdpuxws f2, f2
+; CHECK-BE-NEXT:    xscvdpuxws f7, f7
+; CHECK-BE-NEXT:    xscvdpuxws f6, f6
+; CHECK-BE-NEXT:    xscvdpuxws f1, f1
+; CHECK-BE-NEXT:    xscvdpuxws f0, f0
+; CHECK-BE-NEXT:    xscvdpuxws f8, f8
+; CHECK-BE-NEXT:    xscvdpuxws f9, f9
+; CHECK-BE-NEXT:    xscvdpuxws f10, f10
+; CHECK-BE-NEXT:    xscvdpuxws f11, f11
+; CHECK-BE-NEXT:    xscvdpuxws f12, f12
+; CHECK-BE-NEXT:    xscvdpuxws f13, f13
+; CHECK-BE-NEXT:    xscvdpuxws v2, v2
+; CHECK-BE-NEXT:    xscvdpuxws v3, v3
+; CHECK-BE-NEXT:    mfvsrwz r3, f5
+; CHECK-BE-NEXT:    mfvsrwz r4, f4
+; CHECK-BE-NEXT:    mfvsrwz r5, f3
+; CHECK-BE-NEXT:    mfvsrwz r6, f2
+; CHECK-BE-NEXT:    mfvsrwz r11, f7
+; CHECK-BE-NEXT:    mfvsrwz r12, f6
+; CHECK-BE-NEXT:    mfvsrwz r0, f1
+; CHECK-BE-NEXT:    mfvsrwz r30, f0
+; CHECK-BE-NEXT:    mfvsrwz r7, f8
+; CHECK-BE-NEXT:    mfvsrwz r8, f9
+; CHECK-BE-NEXT:    mfvsrwz r9, f10
+; CHECK-BE-NEXT:    mfvsrwz r10, f11
+; CHECK-BE-NEXT:    mfvsrwz r29, f12
+; CHECK-BE-NEXT:    mfvsrwz r28, f13
+; CHECK-BE-NEXT:    mfvsrwz r27, v2
+; CHECK-BE-NEXT:    mfvsrwz r26, v3
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    sldi r4, r4, 56
+; CHECK-BE-NEXT:    sldi r5, r5, 56
+; CHECK-BE-NEXT:    sldi r6, r6, 56
+; CHECK-BE-NEXT:    sldi r11, r11, 56
+; CHECK-BE-NEXT:    sldi r12, r12, 56
+; CHECK-BE-NEXT:    sldi r0, r0, 56
+; CHECK-BE-NEXT:    sldi r30, r30, 56
+; CHECK-BE-NEXT:    sldi r7, r7, 56
+; CHECK-BE-NEXT:    sldi r8, r8, 56
+; CHECK-BE-NEXT:    sldi r9, r9, 56
+; CHECK-BE-NEXT:    sldi r10, r10, 56
+; CHECK-BE-NEXT:    sldi r29, r29, 56
+; CHECK-BE-NEXT:    sldi r28, r28, 56
+; CHECK-BE-NEXT:    sldi r27, r27, 56
+; CHECK-BE-NEXT:    sldi r26, r26, 56
+; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    mtvsrd v3, r4
+; CHECK-BE-NEXT:    mtvsrd v4, r5
+; CHECK-BE-NEXT:    mtvsrd v5, r6
+; CHECK-BE-NEXT:    mtvsrd v8, r11
+; CHECK-BE-NEXT:    mtvsrd v10, r12
+; CHECK-BE-NEXT:    mtvsrd v12, r0
+; CHECK-BE-NEXT:    mtvsrd v14, r30
+; CHECK-BE-NEXT:    mtvsrd v0, r7
+; CHECK-BE-NEXT:    mtvsrd v1, r8
+; CHECK-BE-NEXT:    mtvsrd v6, r9
+; CHECK-BE-NEXT:    mtvsrd v7, r10
+; CHECK-BE-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrd v9, r29
+; CHECK-BE-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrd v11, r28
+; CHECK-BE-NEXT:    mtvsrd v13, r27
+; CHECK-BE-NEXT:    mtvsrd v15, r26
+; CHECK-BE-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    ld r26, -48(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    vmrghb v2, v2, v0
+; CHECK-BE-NEXT:    vmrghb v3, v3, v1
+; CHECK-BE-NEXT:    vmrghb v4, v4, v6
+; CHECK-BE-NEXT:    vmrghb v5, v5, v7
+; CHECK-BE-NEXT:    vmrghb v0, v8, v9
+; CHECK-BE-NEXT:    vmrghb v1, v10, v11
+; CHECK-BE-NEXT:    vmrghb v6, v12, v13
+; CHECK-BE-NEXT:    vmrghb v7, v14, v15
+; CHECK-BE-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-NEXT:    vmrghh v3, v5, v4
+; CHECK-BE-NEXT:    vmrghh v4, v1, v0
+; CHECK-BE-NEXT:    vmrghh v5, v7, v6
+; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    vmrghw v3, v5, v4
+; CHECK-BE-NEXT:    xxmrghd v2, v3, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <16 x double>, <16 x double>* %0, align 128
+  %1 = fptoui <16 x double> %a to <16 x i8>
+  ret <16 x i8> %1
+}
+
+define i16 @test2elt_signed(<2 x double> %a) local_unnamed_addr #0 {
+; CHECK-P8-LABEL: test2elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    xscvdpsxws f1, v2
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mfvsrwz r3, f1
+; CHECK-P8-NEXT:    mfvsrwz r4, f0
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    vmrglb v2, v2, v3
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    clrldi r3, r3, 48
+; CHECK-P8-NEXT:    sth r3, -2(r1)
+; CHECK-P8-NEXT:    lhz r3, -2(r1)
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test2elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    xxswapd vs0, v2
+; CHECK-P9-NEXT:    xscvdpsxws f1, v2
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mfvsrwz r4, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    addi r3, r1, -2
+; CHECK-P9-NEXT:    mtvsrd f1, r4
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    xxswapd v3, vs1
+; CHECK-P9-NEXT:    vmrglb v2, v2, v3
+; CHECK-P9-NEXT:    vsldoi v2, v2, v2, 8
+; CHECK-P9-NEXT:    stxsihx v2, 0, r3
+; CHECK-P9-NEXT:    lhz r3, -2(r1)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test2elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xxswapd vs0, v2
+; CHECK-BE-NEXT:    xscvdpsxws f1, v2
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mfvsrwz r4, f0
+; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    addi r3, r1, -2
+; CHECK-BE-NEXT:    sldi r4, r4, 56
+; CHECK-BE-NEXT:    mtvsrd v3, r4
+; CHECK-BE-NEXT:    vmrghb v2, v2, v3
+; CHECK-BE-NEXT:    vsldoi v2, v2, v2, 10
+; CHECK-BE-NEXT:    stxsihx v2, 0, r3
+; CHECK-BE-NEXT:    lhz r3, -2(r1)
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = fptosi <2 x double> %a to <2 x i8>
+  %1 = bitcast <2 x i8> %0 to i16
+  ret i16 %1
+}
+
+define i32 @test4elt_signed(<4 x double>* nocapture readonly) local_unnamed_addr #1 {
+; CHECK-P8-LABEL: test4elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    xscvdpsxws f2, f0
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xscvdpsxws f3, f1
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    mfvsrwz r3, f2
+; CHECK-P8-NEXT:    mfvsrwz r4, f3
+; CHECK-P8-NEXT:    mtvsrd f2, r3
+; CHECK-P8-NEXT:    mtvsrd f3, r4
+; CHECK-P8-NEXT:    mfvsrwz r3, f0
+; CHECK-P8-NEXT:    xxswapd v2, vs2
+; CHECK-P8-NEXT:    mfvsrwz r4, f1
+; CHECK-P8-NEXT:    xxswapd v4, vs3
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    xxswapd v3, vs0
+; CHECK-P8-NEXT:    xxswapd v5, vs1
+; CHECK-P8-NEXT:    vmrglb v2, v3, v2
+; CHECK-P8-NEXT:    vmrglb v3, v5, v4
+; CHECK-P8-NEXT:    vmrglh v2, v3, v2
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    mfvsrwz r3, f0
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test4elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs0, 16(r3)
+; CHECK-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-P9-NEXT:    xxswapd vs2, vs1
+; CHECK-P9-NEXT:    xxswapd vs3, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mfvsrwz r5, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    li r3, 0
+; CHECK-P9-NEXT:    mfvsrwz r4, f2
+; CHECK-P9-NEXT:    mfvsrwz r6, f3
+; CHECK-P9-NEXT:    mtvsrd f2, r5
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    mtvsrd f1, r4
+; CHECK-P9-NEXT:    mtvsrd f3, r6
+; CHECK-P9-NEXT:    xxswapd v4, vs2
+; CHECK-P9-NEXT:    xxswapd v3, vs1
+; CHECK-P9-NEXT:    xxswapd v5, vs3
+; CHECK-P9-NEXT:    vmrglb v2, v2, v3
+; CHECK-P9-NEXT:    vmrglb v3, v4, v5
+; CHECK-P9-NEXT:    vmrglh v2, v3, v2
+; CHECK-P9-NEXT:    vextuwrx r3, r3, v2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test4elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    lxv vs1, 16(r3)
+; CHECK-BE-NEXT:    xxswapd vs2, vs1
+; CHECK-BE-NEXT:    xxswapd vs3, vs0
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mfvsrwz r5, f0
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    sldi r5, r5, 56
+; CHECK-BE-NEXT:    mfvsrwz r4, f2
+; CHECK-BE-NEXT:    mfvsrwz r6, f3
+; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    mtvsrd v4, r5
+; CHECK-BE-NEXT:    li r3, 0
+; CHECK-BE-NEXT:    sldi r4, r4, 56
+; CHECK-BE-NEXT:    sldi r6, r6, 56
+; CHECK-BE-NEXT:    mtvsrd v3, r4
+; CHECK-BE-NEXT:    mtvsrd v5, r6
+; CHECK-BE-NEXT:    vmrghb v2, v2, v3
+; CHECK-BE-NEXT:    vmrghb v3, v4, v5
+; CHECK-BE-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-NEXT:    vextuwlx r3, r3, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <4 x double>, <4 x double>* %0, align 32
+  %1 = fptosi <4 x double> %a to <4 x i8>
+  %2 = bitcast <4 x i8> %1 to i32
+  ret i32 %2
+}
+
+define i64 @test8elt_signed(<8 x double>* nocapture readonly) local_unnamed_addr #1 {
+; CHECK-P8-LABEL: test8elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    li r4, 32
+; CHECK-P8-NEXT:    lxvd2x vs2, r3, r4
+; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    lxvd2x vs3, r3, r4
+; CHECK-P8-NEXT:    xscvdpsxws f4, f0
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xscvdpsxws f5, f1
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    xscvdpsxws f6, f2
+; CHECK-P8-NEXT:    xxswapd vs2, vs2
+; CHECK-P8-NEXT:    xscvdpsxws f7, f3
+; CHECK-P8-NEXT:    xxswapd vs3, vs3
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    mfvsrwz r3, f4
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    xscvdpsxws f3, f3
+; CHECK-P8-NEXT:    mfvsrwz r4, f5
+; CHECK-P8-NEXT:    mtvsrd f4, r3
+; CHECK-P8-NEXT:    mfvsrwz r3, f6
+; CHECK-P8-NEXT:    mtvsrd f5, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs4
+; CHECK-P8-NEXT:    mfvsrwz r4, f7
+; CHECK-P8-NEXT:    mtvsrd f6, r3
+; CHECK-P8-NEXT:    xxswapd v3, vs5
+; CHECK-P8-NEXT:    mfvsrwz r3, f0
+; CHECK-P8-NEXT:    mtvsrd f7, r4
+; CHECK-P8-NEXT:    xxswapd v4, vs6
+; CHECK-P8-NEXT:    mfvsrwz r4, f1
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    xxswapd v1, vs7
+; CHECK-P8-NEXT:    mfvsrwz r3, f2
+; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    xxswapd v5, vs0
+; CHECK-P8-NEXT:    mfvsrwz r4, f3
+; CHECK-P8-NEXT:    mtvsrd f2, r3
+; CHECK-P8-NEXT:    xxswapd v0, vs1
+; CHECK-P8-NEXT:    mtvsrd f0, r4
+; CHECK-P8-NEXT:    xxswapd v6, vs2
+; CHECK-P8-NEXT:    vmrglb v2, v5, v2
+; CHECK-P8-NEXT:    xxswapd v5, vs0
+; CHECK-P8-NEXT:    vmrglb v3, v0, v3
+; CHECK-P8-NEXT:    vmrglb v4, v6, v4
+; CHECK-P8-NEXT:    vmrglb v5, v5, v1
+; CHECK-P8-NEXT:    vmrglh v2, v3, v2
+; CHECK-P8-NEXT:    vmrglh v3, v5, v4
+; CHECK-P8-NEXT:    vmrglw v2, v3, v2
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test8elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs0, 48(r3)
+; CHECK-P9-NEXT:    lxv vs1, 32(r3)
+; CHECK-P9-NEXT:    lxv vs2, 16(r3)
+; CHECK-P9-NEXT:    lxv vs3, 0(r3)
+; CHECK-P9-NEXT:    xxswapd vs4, vs3
+; CHECK-P9-NEXT:    xxswapd vs5, vs2
+; CHECK-P9-NEXT:    xxswapd vs6, vs1
+; CHECK-P9-NEXT:    xxswapd vs7, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    xscvdpsxws f4, f4
+; CHECK-P9-NEXT:    xscvdpsxws f5, f5
+; CHECK-P9-NEXT:    xscvdpsxws f6, f6
+; CHECK-P9-NEXT:    xscvdpsxws f7, f7
+; CHECK-P9-NEXT:    mfvsrwz r3, f3
+; CHECK-P9-NEXT:    mfvsrwz r5, f2
+; CHECK-P9-NEXT:    mfvsrwz r7, f1
+; CHECK-P9-NEXT:    mfvsrwz r9, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mtvsrd f2, r5
+; CHECK-P9-NEXT:    mfvsrwz r4, f4
+; CHECK-P9-NEXT:    mfvsrwz r6, f5
+; CHECK-P9-NEXT:    mfvsrwz r8, f6
+; CHECK-P9-NEXT:    mfvsrwz r10, f7
+; CHECK-P9-NEXT:    mtvsrd f4, r7
+; CHECK-P9-NEXT:    mtvsrd f6, r9
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    xxswapd v4, vs2
+; CHECK-P9-NEXT:    mtvsrd f1, r4
+; CHECK-P9-NEXT:    mtvsrd f3, r6
+; CHECK-P9-NEXT:    mtvsrd f5, r8
+; CHECK-P9-NEXT:    mtvsrd f7, r10
+; CHECK-P9-NEXT:    xxswapd v0, vs4
+; CHECK-P9-NEXT:    xxswapd v6, vs6
+; CHECK-P9-NEXT:    xxswapd v3, vs1
+; CHECK-P9-NEXT:    xxswapd v5, vs3
+; CHECK-P9-NEXT:    xxswapd v1, vs5
+; CHECK-P9-NEXT:    xxswapd v7, vs7
+; CHECK-P9-NEXT:    vmrglb v2, v2, v3
+; CHECK-P9-NEXT:    vmrglb v3, v4, v5
+; CHECK-P9-NEXT:    vmrglb v4, v0, v1
+; CHECK-P9-NEXT:    vmrglb v5, v6, v7
+; CHECK-P9-NEXT:    vmrglh v2, v3, v2
+; CHECK-P9-NEXT:    vmrglh v3, v5, v4
+; CHECK-P9-NEXT:    vmrglw v2, v3, v2
+; CHECK-P9-NEXT:    mfvsrld r3, v2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test8elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    lxv vs1, 16(r3)
+; CHECK-BE-NEXT:    lxv vs2, 32(r3)
+; CHECK-BE-NEXT:    lxv vs3, 48(r3)
+; CHECK-BE-NEXT:    xxswapd vs4, vs3
+; CHECK-BE-NEXT:    xxswapd vs5, vs2
+; CHECK-BE-NEXT:    xxswapd vs6, vs1
+; CHECK-BE-NEXT:    xxswapd vs7, vs0
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    xscvdpsxws f5, f5
+; CHECK-BE-NEXT:    xscvdpsxws f6, f6
+; CHECK-BE-NEXT:    xscvdpsxws f7, f7
+; CHECK-BE-NEXT:    mfvsrwz r3, f3
+; CHECK-BE-NEXT:    mfvsrwz r5, f2
+; CHECK-BE-NEXT:    mfvsrwz r7, f1
+; CHECK-BE-NEXT:    mfvsrwz r9, f0
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    sldi r5, r5, 56
+; CHECK-BE-NEXT:    sldi r7, r7, 56
+; CHECK-BE-NEXT:    sldi r9, r9, 56
+; CHECK-BE-NEXT:    mfvsrwz r4, f4
+; CHECK-BE-NEXT:    mfvsrwz r6, f5
+; CHECK-BE-NEXT:    mfvsrwz r8, f6
+; CHECK-BE-NEXT:    mfvsrwz r10, f7
+; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    mtvsrd v4, r5
+; CHECK-BE-NEXT:    mtvsrd v0, r7
+; CHECK-BE-NEXT:    mtvsrd v6, r9
+; CHECK-BE-NEXT:    sldi r4, r4, 56
+; CHECK-BE-NEXT:    sldi r6, r6, 56
+; CHECK-BE-NEXT:    sldi r8, r8, 56
+; CHECK-BE-NEXT:    sldi r10, r10, 56
+; CHECK-BE-NEXT:    mtvsrd v3, r4
+; CHECK-BE-NEXT:    mtvsrd v5, r6
+; CHECK-BE-NEXT:    mtvsrd v1, r8
+; CHECK-BE-NEXT:    mtvsrd v7, r10
+; CHECK-BE-NEXT:    vmrghb v2, v2, v3
+; CHECK-BE-NEXT:    vmrghb v3, v4, v5
+; CHECK-BE-NEXT:    vmrghb v4, v0, v1
+; CHECK-BE-NEXT:    vmrghb v5, v6, v7
+; CHECK-BE-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-NEXT:    vmrghh v3, v5, v4
+; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    mfvsrd r3, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <8 x double>, <8 x double>* %0, align 64
+  %1 = fptosi <8 x double> %a to <8 x i8>
+  %2 = bitcast <8 x i8> %1 to i64
+  ret i64 %2
+}
+
+define <16 x i8> @test16elt_signed(<16 x double>* nocapture readonly) local_unnamed_addr #2 {
+; CHECK-P8-LABEL: test16elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    li r4, 32
+; CHECK-P8-NEXT:    lxvd2x vs2, r3, r4
+; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    lxvd2x vs3, r3, r4
+; CHECK-P8-NEXT:    li r4, 64
+; CHECK-P8-NEXT:    xscvdpsxws f4, f0
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    lxvd2x vs5, r3, r4
+; CHECK-P8-NEXT:    li r4, 80
+; CHECK-P8-NEXT:    xscvdpsxws f6, f1
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    lxvd2x vs7, r3, r4
+; CHECK-P8-NEXT:    li r4, 96
+; CHECK-P8-NEXT:    xscvdpsxws f8, f2
+; CHECK-P8-NEXT:    xxswapd vs2, vs2
+; CHECK-P8-NEXT:    lxvd2x vs9, r3, r4
+; CHECK-P8-NEXT:    li r4, 112
+; CHECK-P8-NEXT:    xscvdpsxws f10, f3
+; CHECK-P8-NEXT:    xxswapd vs3, vs3
+; CHECK-P8-NEXT:    lxvd2x vs11, r3, r4
+; CHECK-P8-NEXT:    xscvdpsxws f12, f5
+; CHECK-P8-NEXT:    xxswapd vs5, vs5
+; CHECK-P8-NEXT:    xscvdpsxws f13, f7
+; CHECK-P8-NEXT:    xxswapd vs7, vs7
+; CHECK-P8-NEXT:    xscvdpsxws v2, f9
+; CHECK-P8-NEXT:    xxswapd vs9, vs9
+; CHECK-P8-NEXT:    mfvsrwz r3, f4
+; CHECK-P8-NEXT:    xscvdpsxws v3, f11
+; CHECK-P8-NEXT:    xxswapd vs11, vs11
+; CHECK-P8-NEXT:    mfvsrwz r4, f6
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mtvsrd f4, r3
+; CHECK-P8-NEXT:    mfvsrwz r3, f8
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xxswapd v4, vs4
+; CHECK-P8-NEXT:    mtvsrd f6, r4
+; CHECK-P8-NEXT:    mfvsrwz r4, f10
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    xxswapd v5, vs6
+; CHECK-P8-NEXT:    mtvsrd f8, r3
+; CHECK-P8-NEXT:    mfvsrwz r3, f12
+; CHECK-P8-NEXT:    xscvdpsxws f3, f3
+; CHECK-P8-NEXT:    xxswapd v0, vs8
+; CHECK-P8-NEXT:    mtvsrd f10, r4
+; CHECK-P8-NEXT:    mfvsrwz r4, f13
+; CHECK-P8-NEXT:    xscvdpsxws f5, f5
+; CHECK-P8-NEXT:    xxswapd v1, vs10
+; CHECK-P8-NEXT:    mtvsrd f12, r3
+; CHECK-P8-NEXT:    mfvsrwz r3, v2
+; CHECK-P8-NEXT:    xscvdpsxws f7, f7
+; CHECK-P8-NEXT:    xxswapd v6, vs12
+; CHECK-P8-NEXT:    mtvsrd f13, r4
+; CHECK-P8-NEXT:    mfvsrwz r4, v3
+; CHECK-P8-NEXT:    mtvsrd v2, r3
+; CHECK-P8-NEXT:    xxswapd v7, vs13
+; CHECK-P8-NEXT:    mfvsrwz r3, f0
+; CHECK-P8-NEXT:    xscvdpsxws f9, f9
+; CHECK-P8-NEXT:    xxswapd v2, v2
+; CHECK-P8-NEXT:    xscvdpsxws f11, f11
+; CHECK-P8-NEXT:    mtvsrd v3, r4
+; CHECK-P8-NEXT:    mfvsrwz r4, f1
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    xxswapd v3, v3
+; CHECK-P8-NEXT:    mfvsrwz r3, f2
+; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    xxswapd v8, vs0
+; CHECK-P8-NEXT:    mfvsrwz r4, f3
+; CHECK-P8-NEXT:    mtvsrd f2, r3
+; CHECK-P8-NEXT:    xxswapd v9, vs1
+; CHECK-P8-NEXT:    mfvsrwz r3, f5
+; CHECK-P8-NEXT:    mtvsrd f3, r4
+; CHECK-P8-NEXT:    xxswapd v10, vs2
+; CHECK-P8-NEXT:    mfvsrwz r4, f7
+; CHECK-P8-NEXT:    mtvsrd f5, r3
+; CHECK-P8-NEXT:    mfvsrwz r3, f9
+; CHECK-P8-NEXT:    mtvsrd f7, r4
+; CHECK-P8-NEXT:    mfvsrwz r4, f11
+; CHECK-P8-NEXT:    vmrglb v4, v8, v4
+; CHECK-P8-NEXT:    xxswapd v8, vs3
+; CHECK-P8-NEXT:    vmrglb v5, v9, v5
+; CHECK-P8-NEXT:    xxswapd v9, vs5
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    vmrglb v0, v10, v0
+; CHECK-P8-NEXT:    xxswapd v10, vs7
+; CHECK-P8-NEXT:    vmrglb v1, v8, v1
+; CHECK-P8-NEXT:    xxswapd v8, vs0
+; CHECK-P8-NEXT:    vmrglb v6, v9, v6
+; CHECK-P8-NEXT:    xxswapd v9, vs1
+; CHECK-P8-NEXT:    vmrglb v7, v10, v7
+; CHECK-P8-NEXT:    vmrglb v2, v8, v2
+; CHECK-P8-NEXT:    vmrglb v3, v9, v3
+; CHECK-P8-NEXT:    vmrglh v4, v5, v4
+; CHECK-P8-NEXT:    vmrglh v5, v1, v0
+; CHECK-P8-NEXT:    vmrglh v0, v7, v6
+; CHECK-P8-NEXT:    vmrglh v2, v3, v2
+; CHECK-P8-NEXT:    vmrglw v3, v5, v4
+; CHECK-P8-NEXT:    vmrglw v2, v2, v0
+; CHECK-P8-NEXT:    xxmrgld v2, v2, v3
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test16elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs2, 48(r3)
+; CHECK-P9-NEXT:    lxv vs3, 32(r3)
+; CHECK-P9-NEXT:    lxv vs4, 16(r3)
+; CHECK-P9-NEXT:    lxv vs5, 0(r3)
+; CHECK-P9-NEXT:    lxv vs0, 112(r3)
+; CHECK-P9-NEXT:    lxv vs1, 96(r3)
+; CHECK-P9-NEXT:    lxv vs6, 80(r3)
+; CHECK-P9-NEXT:    lxv vs7, 64(r3)
+; CHECK-P9-NEXT:    std r26, -48(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    xxswapd vs8, vs5
+; CHECK-P9-NEXT:    xxswapd vs9, vs4
+; CHECK-P9-NEXT:    xxswapd vs10, vs3
+; CHECK-P9-NEXT:    xxswapd vs11, vs2
+; CHECK-P9-NEXT:    xxswapd vs12, vs7
+; CHECK-P9-NEXT:    xxswapd vs13, vs6
+; CHECK-P9-NEXT:    xxswapd v2, vs1
+; CHECK-P9-NEXT:    xxswapd v3, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f5, f5
+; CHECK-P9-NEXT:    xscvdpsxws f4, f4
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    xscvdpsxws f7, f7
+; CHECK-P9-NEXT:    xscvdpsxws f6, f6
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    xscvdpsxws f8, f8
+; CHECK-P9-NEXT:    xscvdpsxws f9, f9
+; CHECK-P9-NEXT:    xscvdpsxws f10, f10
+; CHECK-P9-NEXT:    xscvdpsxws f11, f11
+; CHECK-P9-NEXT:    xscvdpsxws f12, f12
+; CHECK-P9-NEXT:    xscvdpsxws f13, f13
+; CHECK-P9-NEXT:    xscvdpsxws v2, v2
+; CHECK-P9-NEXT:    xscvdpsxws v3, v3
+; CHECK-P9-NEXT:    mfvsrwz r3, f5
+; CHECK-P9-NEXT:    mfvsrwz r4, f4
+; CHECK-P9-NEXT:    mfvsrwz r5, f3
+; CHECK-P9-NEXT:    mfvsrwz r6, f2
+; CHECK-P9-NEXT:    mfvsrwz r11, f7
+; CHECK-P9-NEXT:    mfvsrwz r12, f6
+; CHECK-P9-NEXT:    mfvsrwz r0, f1
+; CHECK-P9-NEXT:    mfvsrwz r30, f0
+; CHECK-P9-NEXT:    mfvsrwz r7, f8
+; CHECK-P9-NEXT:    mfvsrwz r8, f9
+; CHECK-P9-NEXT:    mfvsrwz r9, f10
+; CHECK-P9-NEXT:    mfvsrwz r10, f11
+; CHECK-P9-NEXT:    mfvsrwz r29, f12
+; CHECK-P9-NEXT:    mfvsrwz r28, f13
+; CHECK-P9-NEXT:    mfvsrwz r27, v2
+; CHECK-P9-NEXT:    mfvsrwz r26, v3
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mtvsrd f1, r4
+; CHECK-P9-NEXT:    mtvsrd f2, r5
+; CHECK-P9-NEXT:    mtvsrd f3, r6
+; CHECK-P9-NEXT:    mtvsrd f8, r11
+; CHECK-P9-NEXT:    mtvsrd f9, r12
+; CHECK-P9-NEXT:    mtvsrd f10, r0
+; CHECK-P9-NEXT:    mtvsrd f11, r30
+; CHECK-P9-NEXT:    mtvsrd f4, r7
+; CHECK-P9-NEXT:    mtvsrd f5, r8
+; CHECK-P9-NEXT:    mtvsrd f6, r9
+; CHECK-P9-NEXT:    mtvsrd f7, r10
+; CHECK-P9-NEXT:    mtvsrd f12, r29
+; CHECK-P9-NEXT:    mtvsrd f13, r28
+; CHECK-P9-NEXT:    mtvsrd v2, r27
+; CHECK-P9-NEXT:    mtvsrd v3, r26
+; CHECK-P9-NEXT:    xxswapd v4, vs0
+; CHECK-P9-NEXT:    xxswapd v5, vs1
+; CHECK-P9-NEXT:    xxswapd v0, vs2
+; CHECK-P9-NEXT:    xxswapd v1, vs3
+; CHECK-P9-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    xxswapd v6, vs4
+; CHECK-P9-NEXT:    xxswapd v7, vs5
+; CHECK-P9-NEXT:    ld r26, -48(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    xxswapd v8, vs6
+; CHECK-P9-NEXT:    xxswapd v9, vs7
+; CHECK-P9-NEXT:    xxswapd v10, vs8
+; CHECK-P9-NEXT:    xxswapd v11, vs12
+; CHECK-P9-NEXT:    xxswapd v12, vs9
+; CHECK-P9-NEXT:    xxswapd v13, vs13
+; CHECK-P9-NEXT:    xxswapd v14, vs10
+; CHECK-P9-NEXT:    xxswapd v2, v2
+; CHECK-P9-NEXT:    xxswapd v15, vs11
+; CHECK-P9-NEXT:    xxswapd v3, v3
+; CHECK-P9-NEXT:    vmrglb v4, v4, v6
+; CHECK-P9-NEXT:    vmrglb v5, v5, v7
+; CHECK-P9-NEXT:    vmrglb v0, v0, v8
+; CHECK-P9-NEXT:    vmrglb v1, v1, v9
+; CHECK-P9-NEXT:    vmrglb v6, v10, v11
+; CHECK-P9-NEXT:    vmrglb v7, v12, v13
+; CHECK-P9-NEXT:    vmrglb v2, v14, v2
+; CHECK-P9-NEXT:    vmrglb v3, v15, v3
+; CHECK-P9-NEXT:    vmrglh v4, v5, v4
+; CHECK-P9-NEXT:    vmrglh v5, v1, v0
+; CHECK-P9-NEXT:    vmrglh v0, v7, v6
+; CHECK-P9-NEXT:    vmrglh v2, v3, v2
+; CHECK-P9-NEXT:    vmrglw v3, v5, v4
+; CHECK-P9-NEXT:    vmrglw v2, v2, v0
+; CHECK-P9-NEXT:    xxmrgld v2, v2, v3
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test16elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs2, 64(r3)
+; CHECK-BE-NEXT:    lxv vs3, 80(r3)
+; CHECK-BE-NEXT:    lxv vs4, 96(r3)
+; CHECK-BE-NEXT:    lxv vs5, 112(r3)
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    lxv vs1, 16(r3)
+; CHECK-BE-NEXT:    lxv vs6, 32(r3)
+; CHECK-BE-NEXT:    lxv vs7, 48(r3)
+; CHECK-BE-NEXT:    std r26, -48(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    xxswapd vs8, vs5
+; CHECK-BE-NEXT:    xxswapd vs9, vs4
+; CHECK-BE-NEXT:    xxswapd vs10, vs3
+; CHECK-BE-NEXT:    xxswapd vs11, vs2
+; CHECK-BE-NEXT:    xxswapd vs12, vs7
+; CHECK-BE-NEXT:    xxswapd vs13, vs6
+; CHECK-BE-NEXT:    xxswapd v2, vs1
+; CHECK-BE-NEXT:    xxswapd v3, vs0
+; CHECK-BE-NEXT:    xscvdpsxws f5, f5
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    xscvdpsxws f7, f7
+; CHECK-BE-NEXT:    xscvdpsxws f6, f6
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    xscvdpsxws f8, f8
+; CHECK-BE-NEXT:    xscvdpsxws f9, f9
+; CHECK-BE-NEXT:    xscvdpsxws f10, f10
+; CHECK-BE-NEXT:    xscvdpsxws f11, f11
+; CHECK-BE-NEXT:    xscvdpsxws f12, f12
+; CHECK-BE-NEXT:    xscvdpsxws f13, f13
+; CHECK-BE-NEXT:    xscvdpsxws v2, v2
+; CHECK-BE-NEXT:    xscvdpsxws v3, v3
+; CHECK-BE-NEXT:    mfvsrwz r3, f5
+; CHECK-BE-NEXT:    mfvsrwz r4, f4
+; CHECK-BE-NEXT:    mfvsrwz r5, f3
+; CHECK-BE-NEXT:    mfvsrwz r6, f2
+; CHECK-BE-NEXT:    mfvsrwz r11, f7
+; CHECK-BE-NEXT:    mfvsrwz r12, f6
+; CHECK-BE-NEXT:    mfvsrwz r0, f1
+; CHECK-BE-NEXT:    mfvsrwz r30, f0
+; CHECK-BE-NEXT:    mfvsrwz r7, f8
+; CHECK-BE-NEXT:    mfvsrwz r8, f9
+; CHECK-BE-NEXT:    mfvsrwz r9, f10
+; CHECK-BE-NEXT:    mfvsrwz r10, f11
+; CHECK-BE-NEXT:    mfvsrwz r29, f12
+; CHECK-BE-NEXT:    mfvsrwz r28, f13
+; CHECK-BE-NEXT:    mfvsrwz r27, v2
+; CHECK-BE-NEXT:    mfvsrwz r26, v3
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    sldi r4, r4, 56
+; CHECK-BE-NEXT:    sldi r5, r5, 56
+; CHECK-BE-NEXT:    sldi r6, r6, 56
+; CHECK-BE-NEXT:    sldi r11, r11, 56
+; CHECK-BE-NEXT:    sldi r12, r12, 56
+; CHECK-BE-NEXT:    sldi r0, r0, 56
+; CHECK-BE-NEXT:    sldi r30, r30, 56
+; CHECK-BE-NEXT:    sldi r7, r7, 56
+; CHECK-BE-NEXT:    sldi r8, r8, 56
+; CHECK-BE-NEXT:    sldi r9, r9, 56
+; CHECK-BE-NEXT:    sldi r10, r10, 56
+; CHECK-BE-NEXT:    sldi r29, r29, 56
+; CHECK-BE-NEXT:    sldi r28, r28, 56
+; CHECK-BE-NEXT:    sldi r27, r27, 56
+; CHECK-BE-NEXT:    sldi r26, r26, 56
+; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    mtvsrd v3, r4
+; CHECK-BE-NEXT:    mtvsrd v4, r5
+; CHECK-BE-NEXT:    mtvsrd v5, r6
+; CHECK-BE-NEXT:    mtvsrd v8, r11
+; CHECK-BE-NEXT:    mtvsrd v10, r12
+; CHECK-BE-NEXT:    mtvsrd v12, r0
+; CHECK-BE-NEXT:    mtvsrd v14, r30
+; CHECK-BE-NEXT:    mtvsrd v0, r7
+; CHECK-BE-NEXT:    mtvsrd v1, r8
+; CHECK-BE-NEXT:    mtvsrd v6, r9
+; CHECK-BE-NEXT:    mtvsrd v7, r10
+; CHECK-BE-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrd v9, r29
+; CHECK-BE-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrd v11, r28
+; CHECK-BE-NEXT:    mtvsrd v13, r27
+; CHECK-BE-NEXT:    mtvsrd v15, r26
+; CHECK-BE-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    ld r26, -48(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    vmrghb v2, v2, v0
+; CHECK-BE-NEXT:    vmrghb v3, v3, v1
+; CHECK-BE-NEXT:    vmrghb v4, v4, v6
+; CHECK-BE-NEXT:    vmrghb v5, v5, v7
+; CHECK-BE-NEXT:    vmrghb v0, v8, v9
+; CHECK-BE-NEXT:    vmrghb v1, v10, v11
+; CHECK-BE-NEXT:    vmrghb v6, v12, v13
+; CHECK-BE-NEXT:    vmrghb v7, v14, v15
+; CHECK-BE-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-NEXT:    vmrghh v3, v5, v4
+; CHECK-BE-NEXT:    vmrghh v4, v1, v0
+; CHECK-BE-NEXT:    vmrghh v5, v7, v6
+; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    vmrghw v3, v5, v4
+; CHECK-BE-NEXT:    xxmrghd v2, v3, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <16 x double>, <16 x double>* %0, align 128
+  %1 = fptosi <16 x double> %a to <16 x i8>
+  ret <16 x i8> %1
+}

Added: llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp_to_i_4byte_elts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp_to_i_4byte_elts.ll?rev=347090&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp_to_i_4byte_elts.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp_to_i_4byte_elts.ll Fri Nov 16 12:24:10 2018
@@ -0,0 +1,304 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:     -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-P8
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-P9
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-BE
+
+define i64 @test2elt(i64 %a.coerce) local_unnamed_addr #0 {
+; CHECK-P8-LABEL: test2elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xvcvspuxws vs0, v2
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test2elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    xvcvspuxws vs0, v2
+; CHECK-P9-NEXT:    mfvsrld r3, vs0
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test2elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    mtvsrd f0, r3
+; CHECK-BE-NEXT:    xvcvspuxws vs0, vs0
+; CHECK-BE-NEXT:    mfvsrd r3, f0
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = bitcast i64 %a.coerce to <2 x float>
+  %1 = fptoui <2 x float> %0 to <2 x i32>
+  %2 = bitcast <2 x i32> %1 to i64
+  ret i64 %2
+}
+
+define <4 x i32> @test4elt(<4 x float> %a) local_unnamed_addr #1 {
+; CHECK-P8-LABEL: test4elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    xvcvspuxws v2, v2
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test4elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    xvcvspuxws v2, v2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test4elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xvcvspuxws v2, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = fptoui <4 x float> %a to <4 x i32>
+  ret <4 x i32> %0
+}
+
+define void @test8elt(<8 x i32>* noalias nocapture sret %agg.result, <8 x float>* nocapture readonly) local_unnamed_addr #2 {
+; CHECK-P8-LABEL: test8elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lvx v2, r4, r5
+; CHECK-P8-NEXT:    xvcvspuxws v3, v3
+; CHECK-P8-NEXT:    xvcvspuxws v2, v2
+; CHECK-P8-NEXT:    stvx v3, 0, r3
+; CHECK-P8-NEXT:    stvx v2, r3, r5
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test8elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs0, 16(r4)
+; CHECK-P9-NEXT:    lxv vs1, 0(r4)
+; CHECK-P9-NEXT:    xvcvspuxws vs1, vs1
+; CHECK-P9-NEXT:    xvcvspuxws vs0, vs0
+; CHECK-P9-NEXT:    stxv vs0, 16(r3)
+; CHECK-P9-NEXT:    stxv vs1, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test8elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs0, 16(r4)
+; CHECK-BE-NEXT:    lxv vs1, 0(r4)
+; CHECK-BE-NEXT:    xvcvspuxws vs1, vs1
+; CHECK-BE-NEXT:    xvcvspuxws vs0, vs0
+; CHECK-BE-NEXT:    stxv vs0, 16(r3)
+; CHECK-BE-NEXT:    stxv vs1, 0(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <8 x float>, <8 x float>* %0, align 32
+  %1 = fptoui <8 x float> %a to <8 x i32>
+  store <8 x i32> %1, <8 x i32>* %agg.result, align 32
+  ret void
+}
+
+define void @test16elt(<16 x i32>* noalias nocapture sret %agg.result, <16 x float>* nocapture readonly) local_unnamed_addr #2 {
+; CHECK-P8-LABEL: test16elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    li r6, 32
+; CHECK-P8-NEXT:    li r7, 48
+; CHECK-P8-NEXT:    lvx v5, 0, r4
+; CHECK-P8-NEXT:    lvx v2, r4, r5
+; CHECK-P8-NEXT:    lvx v3, r4, r6
+; CHECK-P8-NEXT:    lvx v4, r4, r7
+; CHECK-P8-NEXT:    xvcvspuxws v5, v5
+; CHECK-P8-NEXT:    xvcvspuxws v2, v2
+; CHECK-P8-NEXT:    xvcvspuxws v3, v3
+; CHECK-P8-NEXT:    xvcvspuxws v4, v4
+; CHECK-P8-NEXT:    stvx v5, 0, r3
+; CHECK-P8-NEXT:    stvx v2, r3, r5
+; CHECK-P8-NEXT:    stvx v3, r3, r6
+; CHECK-P8-NEXT:    stvx v4, r3, r7
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test16elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs0, 48(r4)
+; CHECK-P9-NEXT:    lxv vs1, 32(r4)
+; CHECK-P9-NEXT:    lxv vs2, 16(r4)
+; CHECK-P9-NEXT:    lxv vs3, 0(r4)
+; CHECK-P9-NEXT:    xvcvspuxws vs3, vs3
+; CHECK-P9-NEXT:    xvcvspuxws vs2, vs2
+; CHECK-P9-NEXT:    xvcvspuxws vs1, vs1
+; CHECK-P9-NEXT:    xvcvspuxws vs0, vs0
+; CHECK-P9-NEXT:    stxv vs0, 48(r3)
+; CHECK-P9-NEXT:    stxv vs1, 32(r3)
+; CHECK-P9-NEXT:    stxv vs2, 16(r3)
+; CHECK-P9-NEXT:    stxv vs3, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test16elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs0, 48(r4)
+; CHECK-BE-NEXT:    lxv vs1, 32(r4)
+; CHECK-BE-NEXT:    lxv vs2, 16(r4)
+; CHECK-BE-NEXT:    lxv vs3, 0(r4)
+; CHECK-BE-NEXT:    xvcvspuxws vs3, vs3
+; CHECK-BE-NEXT:    xvcvspuxws vs2, vs2
+; CHECK-BE-NEXT:    xvcvspuxws vs1, vs1
+; CHECK-BE-NEXT:    xvcvspuxws vs0, vs0
+; CHECK-BE-NEXT:    stxv vs0, 48(r3)
+; CHECK-BE-NEXT:    stxv vs1, 32(r3)
+; CHECK-BE-NEXT:    stxv vs2, 16(r3)
+; CHECK-BE-NEXT:    stxv vs3, 0(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <16 x float>, <16 x float>* %0, align 64
+  %1 = fptoui <16 x float> %a to <16 x i32>
+  store <16 x i32> %1, <16 x i32>* %agg.result, align 64
+  ret void
+}
+
+define i64 @test2elt_signed(i64 %a.coerce) local_unnamed_addr #0 {
+; CHECK-P8-LABEL: test2elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xvcvspsxws vs0, v2
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test2elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    xvcvspsxws vs0, v2
+; CHECK-P9-NEXT:    mfvsrld r3, vs0
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test2elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    mtvsrd f0, r3
+; CHECK-BE-NEXT:    xvcvspsxws vs0, vs0
+; CHECK-BE-NEXT:    mfvsrd r3, f0
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = bitcast i64 %a.coerce to <2 x float>
+  %1 = fptosi <2 x float> %0 to <2 x i32>
+  %2 = bitcast <2 x i32> %1 to i64
+  ret i64 %2
+}
+
+define <4 x i32> @test4elt_signed(<4 x float> %a) local_unnamed_addr #1 {
+; CHECK-P8-LABEL: test4elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    xvcvspsxws v2, v2
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test4elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    xvcvspsxws v2, v2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test4elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xvcvspsxws v2, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = fptosi <4 x float> %a to <4 x i32>
+  ret <4 x i32> %0
+}
+
+define void @test8elt_signed(<8 x i32>* noalias nocapture sret %agg.result, <8 x float>* nocapture readonly) local_unnamed_addr #2 {
+; CHECK-P8-LABEL: test8elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lvx v2, r4, r5
+; CHECK-P8-NEXT:    xvcvspsxws v3, v3
+; CHECK-P8-NEXT:    xvcvspsxws v2, v2
+; CHECK-P8-NEXT:    stvx v3, 0, r3
+; CHECK-P8-NEXT:    stvx v2, r3, r5
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test8elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs0, 16(r4)
+; CHECK-P9-NEXT:    lxv vs1, 0(r4)
+; CHECK-P9-NEXT:    xvcvspsxws vs1, vs1
+; CHECK-P9-NEXT:    xvcvspsxws vs0, vs0
+; CHECK-P9-NEXT:    stxv vs0, 16(r3)
+; CHECK-P9-NEXT:    stxv vs1, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test8elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs0, 16(r4)
+; CHECK-BE-NEXT:    lxv vs1, 0(r4)
+; CHECK-BE-NEXT:    xvcvspsxws vs1, vs1
+; CHECK-BE-NEXT:    xvcvspsxws vs0, vs0
+; CHECK-BE-NEXT:    stxv vs0, 16(r3)
+; CHECK-BE-NEXT:    stxv vs1, 0(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <8 x float>, <8 x float>* %0, align 32
+  %1 = fptosi <8 x float> %a to <8 x i32>
+  store <8 x i32> %1, <8 x i32>* %agg.result, align 32
+  ret void
+}
+
+define void @test16elt_signed(<16 x i32>* noalias nocapture sret %agg.result, <16 x float>* nocapture readonly) local_unnamed_addr #2 {
+; CHECK-P8-LABEL: test16elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    li r6, 32
+; CHECK-P8-NEXT:    li r7, 48
+; CHECK-P8-NEXT:    lvx v5, 0, r4
+; CHECK-P8-NEXT:    lvx v2, r4, r5
+; CHECK-P8-NEXT:    lvx v3, r4, r6
+; CHECK-P8-NEXT:    lvx v4, r4, r7
+; CHECK-P8-NEXT:    xvcvspsxws v5, v5
+; CHECK-P8-NEXT:    xvcvspsxws v2, v2
+; CHECK-P8-NEXT:    xvcvspsxws v3, v3
+; CHECK-P8-NEXT:    xvcvspsxws v4, v4
+; CHECK-P8-NEXT:    stvx v5, 0, r3
+; CHECK-P8-NEXT:    stvx v2, r3, r5
+; CHECK-P8-NEXT:    stvx v3, r3, r6
+; CHECK-P8-NEXT:    stvx v4, r3, r7
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test16elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs0, 48(r4)
+; CHECK-P9-NEXT:    lxv vs1, 32(r4)
+; CHECK-P9-NEXT:    lxv vs2, 16(r4)
+; CHECK-P9-NEXT:    lxv vs3, 0(r4)
+; CHECK-P9-NEXT:    xvcvspsxws vs3, vs3
+; CHECK-P9-NEXT:    xvcvspsxws vs2, vs2
+; CHECK-P9-NEXT:    xvcvspsxws vs1, vs1
+; CHECK-P9-NEXT:    xvcvspsxws vs0, vs0
+; CHECK-P9-NEXT:    stxv vs0, 48(r3)
+; CHECK-P9-NEXT:    stxv vs1, 32(r3)
+; CHECK-P9-NEXT:    stxv vs2, 16(r3)
+; CHECK-P9-NEXT:    stxv vs3, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test16elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs0, 48(r4)
+; CHECK-BE-NEXT:    lxv vs1, 32(r4)
+; CHECK-BE-NEXT:    lxv vs2, 16(r4)
+; CHECK-BE-NEXT:    lxv vs3, 0(r4)
+; CHECK-BE-NEXT:    xvcvspsxws vs3, vs3
+; CHECK-BE-NEXT:    xvcvspsxws vs2, vs2
+; CHECK-BE-NEXT:    xvcvspsxws vs1, vs1
+; CHECK-BE-NEXT:    xvcvspsxws vs0, vs0
+; CHECK-BE-NEXT:    stxv vs0, 48(r3)
+; CHECK-BE-NEXT:    stxv vs1, 32(r3)
+; CHECK-BE-NEXT:    stxv vs2, 16(r3)
+; CHECK-BE-NEXT:    stxv vs3, 0(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <16 x float>, <16 x float>* %0, align 64
+  %1 = fptosi <16 x float> %a to <16 x i32>
+  store <16 x i32> %1, <16 x i32>* %agg.result, align 64
+  ret void
+}

Added: llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp_to_i_8byte_elts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp_to_i_8byte_elts.ll?rev=347090&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp_to_i_8byte_elts.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp_to_i_8byte_elts.ll Fri Nov 16 12:24:10 2018
@@ -0,0 +1,438 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:     -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-P8
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-P9
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-BE
+
+define <2 x i64> @test2elt(<2 x double> %a) local_unnamed_addr #0 {
+; CHECK-P8-LABEL: test2elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    xvcvdpuxds v2, v2
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test2elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    xvcvdpuxds v2, v2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test2elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xvcvdpuxds v2, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = fptoui <2 x double> %a to <2 x i64>
+  ret <2 x i64> %0
+}
+
+define void @test4elt(<4 x i64>* noalias nocapture sret %agg.result, <4 x double>* nocapture readonly) local_unnamed_addr #1 {
+; CHECK-P8-LABEL: test4elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT:    xvcvdpuxds vs1, vs1
+; CHECK-P8-NEXT:    xvcvdpuxds vs0, vs0
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs1, 0, r3
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test4elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs0, 16(r4)
+; CHECK-P9-NEXT:    lxv vs1, 0(r4)
+; CHECK-P9-NEXT:    xvcvdpuxds vs1, vs1
+; CHECK-P9-NEXT:    xvcvdpuxds vs0, vs0
+; CHECK-P9-NEXT:    stxv vs0, 16(r3)
+; CHECK-P9-NEXT:    stxv vs1, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test4elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs0, 16(r4)
+; CHECK-BE-NEXT:    lxv vs1, 0(r4)
+; CHECK-BE-NEXT:    xvcvdpuxds vs1, vs1
+; CHECK-BE-NEXT:    xvcvdpuxds vs0, vs0
+; CHECK-BE-NEXT:    stxv vs0, 16(r3)
+; CHECK-BE-NEXT:    stxv vs1, 0(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <4 x double>, <4 x double>* %0, align 32
+  %1 = fptoui <4 x double> %a to <4 x i64>
+  store <4 x i64> %1, <4 x i64>* %agg.result, align 32
+  ret void
+}
+
+define void @test8elt(<8 x i64>* noalias nocapture sret %agg.result, <8 x double>* nocapture readonly) local_unnamed_addr #1 {
+; CHECK-P8-LABEL: test8elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    li r6, 32
+; CHECK-P8-NEXT:    li r7, 48
+; CHECK-P8-NEXT:    lxvd2x vs3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
+; CHECK-P8-NEXT:    lxvd2x vs2, r4, r7
+; CHECK-P8-NEXT:    xvcvdpuxds vs3, vs3
+; CHECK-P8-NEXT:    xvcvdpuxds vs0, vs0
+; CHECK-P8-NEXT:    xvcvdpuxds vs1, vs1
+; CHECK-P8-NEXT:    xvcvdpuxds vs2, vs2
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r7
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r6
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs3, 0, r3
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test8elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs0, 48(r4)
+; CHECK-P9-NEXT:    lxv vs1, 32(r4)
+; CHECK-P9-NEXT:    lxv vs2, 16(r4)
+; CHECK-P9-NEXT:    lxv vs3, 0(r4)
+; CHECK-P9-NEXT:    xvcvdpuxds vs3, vs3
+; CHECK-P9-NEXT:    xvcvdpuxds vs2, vs2
+; CHECK-P9-NEXT:    xvcvdpuxds vs1, vs1
+; CHECK-P9-NEXT:    xvcvdpuxds vs0, vs0
+; CHECK-P9-NEXT:    stxv vs0, 48(r3)
+; CHECK-P9-NEXT:    stxv vs1, 32(r3)
+; CHECK-P9-NEXT:    stxv vs2, 16(r3)
+; CHECK-P9-NEXT:    stxv vs3, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test8elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs0, 48(r4)
+; CHECK-BE-NEXT:    lxv vs1, 32(r4)
+; CHECK-BE-NEXT:    lxv vs2, 16(r4)
+; CHECK-BE-NEXT:    lxv vs3, 0(r4)
+; CHECK-BE-NEXT:    xvcvdpuxds vs3, vs3
+; CHECK-BE-NEXT:    xvcvdpuxds vs2, vs2
+; CHECK-BE-NEXT:    xvcvdpuxds vs1, vs1
+; CHECK-BE-NEXT:    xvcvdpuxds vs0, vs0
+; CHECK-BE-NEXT:    stxv vs0, 48(r3)
+; CHECK-BE-NEXT:    stxv vs1, 32(r3)
+; CHECK-BE-NEXT:    stxv vs2, 16(r3)
+; CHECK-BE-NEXT:    stxv vs3, 0(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <8 x double>, <8 x double>* %0, align 64
+  %1 = fptoui <8 x double> %a to <8 x i64>
+  store <8 x i64> %1, <8 x i64>* %agg.result, align 64
+  ret void
+}
+
+define void @test16elt(<16 x i64>* noalias nocapture sret %agg.result, <16 x double>* nocapture readonly) local_unnamed_addr #1 {
+; CHECK-P8-LABEL: test16elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    li r6, 32
+; CHECK-P8-NEXT:    li r7, 64
+; CHECK-P8-NEXT:    li r8, 96
+; CHECK-P8-NEXT:    li r9, 112
+; CHECK-P8-NEXT:    li r10, 80
+; CHECK-P8-NEXT:    li r11, 48
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
+; CHECK-P8-NEXT:    lxvd2x vs2, r4, r7
+; CHECK-P8-NEXT:    lxvd2x vs3, r4, r8
+; CHECK-P8-NEXT:    lxvd2x vs4, r4, r9
+; CHECK-P8-NEXT:    lxvd2x vs5, r4, r10
+; CHECK-P8-NEXT:    lxvd2x vs6, r4, r11
+; CHECK-P8-NEXT:    lxvd2x vs7, 0, r4
+; CHECK-P8-NEXT:    xvcvdpuxds vs0, vs0
+; CHECK-P8-NEXT:    xvcvdpuxds vs1, vs1
+; CHECK-P8-NEXT:    xvcvdpuxds vs2, vs2
+; CHECK-P8-NEXT:    xvcvdpuxds vs3, vs3
+; CHECK-P8-NEXT:    xvcvdpuxds vs4, vs4
+; CHECK-P8-NEXT:    xvcvdpuxds vs5, vs5
+; CHECK-P8-NEXT:    xvcvdpuxds vs6, vs6
+; CHECK-P8-NEXT:    xvcvdpuxds vs7, vs7
+; CHECK-P8-NEXT:    stxvd2x vs4, r3, r9
+; CHECK-P8-NEXT:    stxvd2x vs3, r3, r8
+; CHECK-P8-NEXT:    stxvd2x vs5, r3, r10
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r7
+; CHECK-P8-NEXT:    stxvd2x vs6, r3, r11
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r6
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs7, 0, r3
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test16elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs0, 48(r4)
+; CHECK-P9-NEXT:    lxv vs1, 32(r4)
+; CHECK-P9-NEXT:    lxv vs2, 16(r4)
+; CHECK-P9-NEXT:    lxv vs3, 0(r4)
+; CHECK-P9-NEXT:    lxv vs4, 112(r4)
+; CHECK-P9-NEXT:    lxv vs5, 96(r4)
+; CHECK-P9-NEXT:    lxv vs6, 80(r4)
+; CHECK-P9-NEXT:    lxv vs7, 64(r4)
+; CHECK-P9-NEXT:    xvcvdpuxds vs3, vs3
+; CHECK-P9-NEXT:    xvcvdpuxds vs2, vs2
+; CHECK-P9-NEXT:    xvcvdpuxds vs1, vs1
+; CHECK-P9-NEXT:    xvcvdpuxds vs0, vs0
+; CHECK-P9-NEXT:    xvcvdpuxds vs7, vs7
+; CHECK-P9-NEXT:    xvcvdpuxds vs6, vs6
+; CHECK-P9-NEXT:    xvcvdpuxds vs5, vs5
+; CHECK-P9-NEXT:    xvcvdpuxds vs4, vs4
+; CHECK-P9-NEXT:    stxv vs0, 48(r3)
+; CHECK-P9-NEXT:    stxv vs1, 32(r3)
+; CHECK-P9-NEXT:    stxv vs2, 16(r3)
+; CHECK-P9-NEXT:    stxv vs3, 0(r3)
+; CHECK-P9-NEXT:    stxv vs4, 112(r3)
+; CHECK-P9-NEXT:    stxv vs5, 96(r3)
+; CHECK-P9-NEXT:    stxv vs6, 80(r3)
+; CHECK-P9-NEXT:    stxv vs7, 64(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test16elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs0, 48(r4)
+; CHECK-BE-NEXT:    lxv vs1, 32(r4)
+; CHECK-BE-NEXT:    lxv vs2, 16(r4)
+; CHECK-BE-NEXT:    lxv vs3, 0(r4)
+; CHECK-BE-NEXT:    lxv vs4, 112(r4)
+; CHECK-BE-NEXT:    lxv vs5, 96(r4)
+; CHECK-BE-NEXT:    lxv vs6, 80(r4)
+; CHECK-BE-NEXT:    lxv vs7, 64(r4)
+; CHECK-BE-NEXT:    xvcvdpuxds vs3, vs3
+; CHECK-BE-NEXT:    xvcvdpuxds vs2, vs2
+; CHECK-BE-NEXT:    xvcvdpuxds vs1, vs1
+; CHECK-BE-NEXT:    xvcvdpuxds vs0, vs0
+; CHECK-BE-NEXT:    xvcvdpuxds vs7, vs7
+; CHECK-BE-NEXT:    xvcvdpuxds vs6, vs6
+; CHECK-BE-NEXT:    xvcvdpuxds vs5, vs5
+; CHECK-BE-NEXT:    xvcvdpuxds vs4, vs4
+; CHECK-BE-NEXT:    stxv vs0, 48(r3)
+; CHECK-BE-NEXT:    stxv vs1, 32(r3)
+; CHECK-BE-NEXT:    stxv vs2, 16(r3)
+; CHECK-BE-NEXT:    stxv vs3, 0(r3)
+; CHECK-BE-NEXT:    stxv vs4, 112(r3)
+; CHECK-BE-NEXT:    stxv vs5, 96(r3)
+; CHECK-BE-NEXT:    stxv vs6, 80(r3)
+; CHECK-BE-NEXT:    stxv vs7, 64(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <16 x double>, <16 x double>* %0, align 128
+  %1 = fptoui <16 x double> %a to <16 x i64>
+  store <16 x i64> %1, <16 x i64>* %agg.result, align 128
+  ret void
+}
+
+define <2 x i64> @test2elt_signed(<2 x double> %a) local_unnamed_addr #0 {
+; CHECK-P8-LABEL: test2elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    xvcvdpsxds v2, v2
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test2elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    xvcvdpsxds v2, v2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test2elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xvcvdpsxds v2, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = fptosi <2 x double> %a to <2 x i64>
+  ret <2 x i64> %0
+}
+
+define void @test4elt_signed(<4 x i64>* noalias nocapture sret %agg.result, <4 x double>* nocapture readonly) local_unnamed_addr #1 {
+; CHECK-P8-LABEL: test4elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT:    xvcvdpsxds vs1, vs1
+; CHECK-P8-NEXT:    xvcvdpsxds vs0, vs0
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs1, 0, r3
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test4elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs0, 16(r4)
+; CHECK-P9-NEXT:    lxv vs1, 0(r4)
+; CHECK-P9-NEXT:    xvcvdpsxds vs1, vs1
+; CHECK-P9-NEXT:    xvcvdpsxds vs0, vs0
+; CHECK-P9-NEXT:    stxv vs0, 16(r3)
+; CHECK-P9-NEXT:    stxv vs1, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test4elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs0, 16(r4)
+; CHECK-BE-NEXT:    lxv vs1, 0(r4)
+; CHECK-BE-NEXT:    xvcvdpsxds vs1, vs1
+; CHECK-BE-NEXT:    xvcvdpsxds vs0, vs0
+; CHECK-BE-NEXT:    stxv vs0, 16(r3)
+; CHECK-BE-NEXT:    stxv vs1, 0(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <4 x double>, <4 x double>* %0, align 32
+  %1 = fptosi <4 x double> %a to <4 x i64>
+  store <4 x i64> %1, <4 x i64>* %agg.result, align 32
+  ret void
+}
+
+define void @test8elt_signed(<8 x i64>* noalias nocapture sret %agg.result, <8 x double>* nocapture readonly) local_unnamed_addr #1 {
+; CHECK-P8-LABEL: test8elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    li r6, 32
+; CHECK-P8-NEXT:    li r7, 48
+; CHECK-P8-NEXT:    lxvd2x vs3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
+; CHECK-P8-NEXT:    lxvd2x vs2, r4, r7
+; CHECK-P8-NEXT:    xvcvdpsxds vs3, vs3
+; CHECK-P8-NEXT:    xvcvdpsxds vs0, vs0
+; CHECK-P8-NEXT:    xvcvdpsxds vs1, vs1
+; CHECK-P8-NEXT:    xvcvdpsxds vs2, vs2
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r7
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r6
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs3, 0, r3
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test8elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs0, 48(r4)
+; CHECK-P9-NEXT:    lxv vs1, 32(r4)
+; CHECK-P9-NEXT:    lxv vs2, 16(r4)
+; CHECK-P9-NEXT:    lxv vs3, 0(r4)
+; CHECK-P9-NEXT:    xvcvdpsxds vs3, vs3
+; CHECK-P9-NEXT:    xvcvdpsxds vs2, vs2
+; CHECK-P9-NEXT:    xvcvdpsxds vs1, vs1
+; CHECK-P9-NEXT:    xvcvdpsxds vs0, vs0
+; CHECK-P9-NEXT:    stxv vs0, 48(r3)
+; CHECK-P9-NEXT:    stxv vs1, 32(r3)
+; CHECK-P9-NEXT:    stxv vs2, 16(r3)
+; CHECK-P9-NEXT:    stxv vs3, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test8elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs0, 48(r4)
+; CHECK-BE-NEXT:    lxv vs1, 32(r4)
+; CHECK-BE-NEXT:    lxv vs2, 16(r4)
+; CHECK-BE-NEXT:    lxv vs3, 0(r4)
+; CHECK-BE-NEXT:    xvcvdpsxds vs3, vs3
+; CHECK-BE-NEXT:    xvcvdpsxds vs2, vs2
+; CHECK-BE-NEXT:    xvcvdpsxds vs1, vs1
+; CHECK-BE-NEXT:    xvcvdpsxds vs0, vs0
+; CHECK-BE-NEXT:    stxv vs0, 48(r3)
+; CHECK-BE-NEXT:    stxv vs1, 32(r3)
+; CHECK-BE-NEXT:    stxv vs2, 16(r3)
+; CHECK-BE-NEXT:    stxv vs3, 0(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <8 x double>, <8 x double>* %0, align 64
+  %1 = fptosi <8 x double> %a to <8 x i64>
+  store <8 x i64> %1, <8 x i64>* %agg.result, align 64
+  ret void
+}
+
+define void @test16elt_signed(<16 x i64>* noalias nocapture sret %agg.result, <16 x double>* nocapture readonly) local_unnamed_addr #1 {
+; CHECK-P8-LABEL: test16elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    li r6, 32
+; CHECK-P8-NEXT:    li r7, 64
+; CHECK-P8-NEXT:    li r8, 96
+; CHECK-P8-NEXT:    li r9, 112
+; CHECK-P8-NEXT:    li r10, 80
+; CHECK-P8-NEXT:    li r11, 48
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
+; CHECK-P8-NEXT:    lxvd2x vs2, r4, r7
+; CHECK-P8-NEXT:    lxvd2x vs3, r4, r8
+; CHECK-P8-NEXT:    lxvd2x vs4, r4, r9
+; CHECK-P8-NEXT:    lxvd2x vs5, r4, r10
+; CHECK-P8-NEXT:    lxvd2x vs6, r4, r11
+; CHECK-P8-NEXT:    lxvd2x vs7, 0, r4
+; CHECK-P8-NEXT:    xvcvdpsxds vs0, vs0
+; CHECK-P8-NEXT:    xvcvdpsxds vs1, vs1
+; CHECK-P8-NEXT:    xvcvdpsxds vs2, vs2
+; CHECK-P8-NEXT:    xvcvdpsxds vs3, vs3
+; CHECK-P8-NEXT:    xvcvdpsxds vs4, vs4
+; CHECK-P8-NEXT:    xvcvdpsxds vs5, vs5
+; CHECK-P8-NEXT:    xvcvdpsxds vs6, vs6
+; CHECK-P8-NEXT:    xvcvdpsxds vs7, vs7
+; CHECK-P8-NEXT:    stxvd2x vs4, r3, r9
+; CHECK-P8-NEXT:    stxvd2x vs3, r3, r8
+; CHECK-P8-NEXT:    stxvd2x vs5, r3, r10
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r7
+; CHECK-P8-NEXT:    stxvd2x vs6, r3, r11
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r6
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs7, 0, r3
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test16elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs0, 48(r4)
+; CHECK-P9-NEXT:    lxv vs1, 32(r4)
+; CHECK-P9-NEXT:    lxv vs2, 16(r4)
+; CHECK-P9-NEXT:    lxv vs3, 0(r4)
+; CHECK-P9-NEXT:    lxv vs4, 112(r4)
+; CHECK-P9-NEXT:    lxv vs5, 96(r4)
+; CHECK-P9-NEXT:    lxv vs6, 80(r4)
+; CHECK-P9-NEXT:    lxv vs7, 64(r4)
+; CHECK-P9-NEXT:    xvcvdpsxds vs3, vs3
+; CHECK-P9-NEXT:    xvcvdpsxds vs2, vs2
+; CHECK-P9-NEXT:    xvcvdpsxds vs1, vs1
+; CHECK-P9-NEXT:    xvcvdpsxds vs0, vs0
+; CHECK-P9-NEXT:    xvcvdpsxds vs7, vs7
+; CHECK-P9-NEXT:    xvcvdpsxds vs6, vs6
+; CHECK-P9-NEXT:    xvcvdpsxds vs5, vs5
+; CHECK-P9-NEXT:    xvcvdpsxds vs4, vs4
+; CHECK-P9-NEXT:    stxv vs0, 48(r3)
+; CHECK-P9-NEXT:    stxv vs1, 32(r3)
+; CHECK-P9-NEXT:    stxv vs2, 16(r3)
+; CHECK-P9-NEXT:    stxv vs3, 0(r3)
+; CHECK-P9-NEXT:    stxv vs4, 112(r3)
+; CHECK-P9-NEXT:    stxv vs5, 96(r3)
+; CHECK-P9-NEXT:    stxv vs6, 80(r3)
+; CHECK-P9-NEXT:    stxv vs7, 64(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test16elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs0, 48(r4)
+; CHECK-BE-NEXT:    lxv vs1, 32(r4)
+; CHECK-BE-NEXT:    lxv vs2, 16(r4)
+; CHECK-BE-NEXT:    lxv vs3, 0(r4)
+; CHECK-BE-NEXT:    lxv vs4, 112(r4)
+; CHECK-BE-NEXT:    lxv vs5, 96(r4)
+; CHECK-BE-NEXT:    lxv vs6, 80(r4)
+; CHECK-BE-NEXT:    lxv vs7, 64(r4)
+; CHECK-BE-NEXT:    xvcvdpsxds vs3, vs3
+; CHECK-BE-NEXT:    xvcvdpsxds vs2, vs2
+; CHECK-BE-NEXT:    xvcvdpsxds vs1, vs1
+; CHECK-BE-NEXT:    xvcvdpsxds vs0, vs0
+; CHECK-BE-NEXT:    xvcvdpsxds vs7, vs7
+; CHECK-BE-NEXT:    xvcvdpsxds vs6, vs6
+; CHECK-BE-NEXT:    xvcvdpsxds vs5, vs5
+; CHECK-BE-NEXT:    xvcvdpsxds vs4, vs4
+; CHECK-BE-NEXT:    stxv vs0, 48(r3)
+; CHECK-BE-NEXT:    stxv vs1, 32(r3)
+; CHECK-BE-NEXT:    stxv vs2, 16(r3)
+; CHECK-BE-NEXT:    stxv vs3, 0(r3)
+; CHECK-BE-NEXT:    stxv vs4, 112(r3)
+; CHECK-BE-NEXT:    stxv vs5, 96(r3)
+; CHECK-BE-NEXT:    stxv vs6, 80(r3)
+; CHECK-BE-NEXT:    stxv vs7, 64(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <16 x double>, <16 x double>* %0, align 128
+  %1 = fptosi <16 x double> %a to <16 x i64>
+  store <16 x i64> %1, <16 x i64>* %agg.result, align 128
+  ret void
+}

Added: llvm/trunk/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll?rev=347090&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll Fri Nov 16 12:24:10 2018
@@ -0,0 +1,1366 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:     -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-P8
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-P9
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-BE
+
+define i64 @test2elt(i32 %a.coerce) local_unnamed_addr #0 {
+; CHECK-P8-LABEL: test2elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    clrldi r4, r3, 48
+; CHECK-P8-NEXT:    rldicl r3, r3, 48, 48
+; CHECK-P8-NEXT:    rlwinm r4, r4, 0, 16, 31
+; CHECK-P8-NEXT:    rlwinm r3, r3, 0, 16, 31
+; CHECK-P8-NEXT:    mtvsrwz f0, r4
+; CHECK-P8-NEXT:    mtvsrwz f1, r3
+; CHECK-P8-NEXT:    xscvuxdsp f0, f0
+; CHECK-P8-NEXT:    xscvuxdsp f1, f1
+; CHECK-P8-NEXT:    xscvdpspn vs0, f0
+; CHECK-P8-NEXT:    xscvdpspn vs1, f1
+; CHECK-P8-NEXT:    xxsldwi v2, vs0, vs0, 1
+; CHECK-P8-NEXT:    xxsldwi v3, vs1, vs1, 1
+; CHECK-P8-NEXT:    vmrglw v2, v3, v2
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test2elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    mtvsrws v2, r3
+; CHECK-P9-NEXT:    li r3, 0
+; CHECK-P9-NEXT:    li r4, 2
+; CHECK-P9-NEXT:    vextuhrx r3, r3, v2
+; CHECK-P9-NEXT:    vextuhrx r4, r4, v2
+; CHECK-P9-NEXT:    rlwinm r3, r3, 0, 16, 31
+; CHECK-P9-NEXT:    rlwinm r4, r4, 0, 16, 31
+; CHECK-P9-NEXT:    mtvsrwz f0, r3
+; CHECK-P9-NEXT:    mtvsrwz f1, r4
+; CHECK-P9-NEXT:    xscvuxdsp f0, f0
+; CHECK-P9-NEXT:    xscvuxdsp f1, f1
+; CHECK-P9-NEXT:    xscvdpspn vs0, f0
+; CHECK-P9-NEXT:    xscvdpspn vs1, f1
+; CHECK-P9-NEXT:    xxsldwi v2, vs0, vs0, 1
+; CHECK-P9-NEXT:    xxsldwi v3, vs1, vs1, 1
+; CHECK-P9-NEXT:    vmrglw v2, v3, v2
+; CHECK-P9-NEXT:    mfvsrld r3, v2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test2elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    mtvsrws v2, r3
+; CHECK-BE-NEXT:    li r3, 2
+; CHECK-BE-NEXT:    li r4, 0
+; CHECK-BE-NEXT:    vextuhlx r3, r3, v2
+; CHECK-BE-NEXT:    vextuhlx r4, r4, v2
+; CHECK-BE-NEXT:    rlwinm r3, r3, 0, 16, 31
+; CHECK-BE-NEXT:    rlwinm r4, r4, 0, 16, 31
+; CHECK-BE-NEXT:    mtvsrwz f0, r3
+; CHECK-BE-NEXT:    mtvsrwz f1, r4
+; CHECK-BE-NEXT:    xscvuxdsp f0, f0
+; CHECK-BE-NEXT:    xscvuxdsp f1, f1
+; CHECK-BE-NEXT:    xscvdpspn v2, f0
+; CHECK-BE-NEXT:    xscvdpspn v3, f1
+; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    mfvsrd r3, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = bitcast i32 %a.coerce to <2 x i16>
+  %1 = uitofp <2 x i16> %0 to <2 x float>
+  %2 = bitcast <2 x float> %1 to i64
+  ret i64 %2
+}
+
+define <4 x float> @test4elt(i64 %a.coerce) local_unnamed_addr #1 {
+; CHECK-P8-LABEL: test4elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    clrldi r4, r3, 48
+; CHECK-P8-NEXT:    rldicl r5, r3, 32, 48
+; CHECK-P8-NEXT:    rlwinm r4, r4, 0, 16, 31
+; CHECK-P8-NEXT:    rlwinm r5, r5, 0, 16, 31
+; CHECK-P8-NEXT:    mtvsrwz f0, r4
+; CHECK-P8-NEXT:    rldicl r4, r3, 48, 48
+; CHECK-P8-NEXT:    rldicl r3, r3, 16, 48
+; CHECK-P8-NEXT:    rlwinm r4, r4, 0, 16, 31
+; CHECK-P8-NEXT:    rlwinm r3, r3, 0, 16, 31
+; CHECK-P8-NEXT:    mtvsrwz f1, r5
+; CHECK-P8-NEXT:    mtvsrwz f2, r4
+; CHECK-P8-NEXT:    mtvsrwz f3, r3
+; CHECK-P8-NEXT:    xscvuxdsp f0, f0
+; CHECK-P8-NEXT:    xscvuxdsp f1, f1
+; CHECK-P8-NEXT:    xscvuxdsp f2, f2
+; CHECK-P8-NEXT:    xscvuxdsp f3, f3
+; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P8-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-P8-NEXT:    xvcvdpsp v2, vs0
+; CHECK-P8-NEXT:    xvcvdpsp v3, vs1
+; CHECK-P8-NEXT:    vmrgew v2, v3, v2
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test4elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    li r3, 0
+; CHECK-P9-NEXT:    li r4, 4
+; CHECK-P9-NEXT:    li r5, 2
+; CHECK-P9-NEXT:    li r6, 6
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    vextuhrx r3, r3, v2
+; CHECK-P9-NEXT:    vextuhrx r4, r4, v2
+; CHECK-P9-NEXT:    vextuhrx r5, r5, v2
+; CHECK-P9-NEXT:    vextuhrx r6, r6, v2
+; CHECK-P9-NEXT:    rlwinm r3, r3, 0, 16, 31
+; CHECK-P9-NEXT:    rlwinm r4, r4, 0, 16, 31
+; CHECK-P9-NEXT:    rlwinm r5, r5, 0, 16, 31
+; CHECK-P9-NEXT:    rlwinm r6, r6, 0, 16, 31
+; CHECK-P9-NEXT:    mtvsrwz f0, r3
+; CHECK-P9-NEXT:    mtvsrwz f1, r4
+; CHECK-P9-NEXT:    mtvsrwz f2, r5
+; CHECK-P9-NEXT:    mtvsrwz f3, r6
+; CHECK-P9-NEXT:    xscvuxdsp f0, f0
+; CHECK-P9-NEXT:    xscvuxdsp f1, f1
+; CHECK-P9-NEXT:    xscvuxdsp f2, f2
+; CHECK-P9-NEXT:    xscvuxdsp f3, f3
+; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P9-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-P9-NEXT:    xvcvdpsp v2, vs0
+; CHECK-P9-NEXT:    xvcvdpsp v3, vs1
+; CHECK-P9-NEXT:    vmrgew v2, v3, v2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test4elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    li r4, 6
+; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    li r3, 2
+; CHECK-BE-NEXT:    li r5, 4
+; CHECK-BE-NEXT:    li r6, 0
+; CHECK-BE-NEXT:    vextuhlx r4, r4, v2
+; CHECK-BE-NEXT:    vextuhlx r3, r3, v2
+; CHECK-BE-NEXT:    vextuhlx r5, r5, v2
+; CHECK-BE-NEXT:    vextuhlx r6, r6, v2
+; CHECK-BE-NEXT:    rlwinm r4, r4, 0, 16, 31
+; CHECK-BE-NEXT:    rlwinm r3, r3, 0, 16, 31
+; CHECK-BE-NEXT:    rlwinm r5, r5, 0, 16, 31
+; CHECK-BE-NEXT:    rlwinm r6, r6, 0, 16, 31
+; CHECK-BE-NEXT:    mtvsrwz f0, r4
+; CHECK-BE-NEXT:    mtvsrwz f1, r3
+; CHECK-BE-NEXT:    mtvsrwz f2, r5
+; CHECK-BE-NEXT:    mtvsrwz f3, r6
+; CHECK-BE-NEXT:    xscvuxdsp f0, f0
+; CHECK-BE-NEXT:    xscvuxdsp f1, f1
+; CHECK-BE-NEXT:    xscvuxdsp f2, f2
+; CHECK-BE-NEXT:    xscvuxdsp f3, f3
+; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-BE-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-BE-NEXT:    xvcvdpsp v2, vs0
+; CHECK-BE-NEXT:    xvcvdpsp v3, vs1
+; CHECK-BE-NEXT:    vmrgew v2, v3, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = bitcast i64 %a.coerce to <4 x i16>
+  %1 = uitofp <4 x i16> %0 to <4 x float>
+  ret <4 x float> %1
+}
+
+define void @test8elt(<8 x float>* noalias nocapture sret %agg.result, <8 x i16> %a) local_unnamed_addr #2 {
+; CHECK-P8-LABEL: test8elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mfvsrd r5, v2
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    clrldi r6, r5, 48
+; CHECK-P8-NEXT:    rldicl r7, r5, 32, 48
+; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 16, 31
+; CHECK-P8-NEXT:    mfvsrd r8, f0
+; CHECK-P8-NEXT:    rlwinm r7, r7, 0, 16, 31
+; CHECK-P8-NEXT:    mtvsrwz f1, r6
+; CHECK-P8-NEXT:    rldicl r6, r5, 48, 48
+; CHECK-P8-NEXT:    rldicl r5, r5, 16, 48
+; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 16, 31
+; CHECK-P8-NEXT:    rlwinm r5, r5, 0, 16, 31
+; CHECK-P8-NEXT:    mtvsrwz f0, r7
+; CHECK-P8-NEXT:    mtvsrwz f2, r6
+; CHECK-P8-NEXT:    clrldi r6, r8, 48
+; CHECK-P8-NEXT:    mtvsrwz f3, r5
+; CHECK-P8-NEXT:    rldicl r5, r8, 32, 48
+; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 16, 31
+; CHECK-P8-NEXT:    mtvsrwz f4, r6
+; CHECK-P8-NEXT:    rldicl r6, r8, 48, 48
+; CHECK-P8-NEXT:    rlwinm r5, r5, 0, 16, 31
+; CHECK-P8-NEXT:    mtvsrwz f5, r5
+; CHECK-P8-NEXT:    rlwinm r5, r6, 0, 16, 31
+; CHECK-P8-NEXT:    mtvsrwz f6, r5
+; CHECK-P8-NEXT:    rldicl r5, r8, 16, 48
+; CHECK-P8-NEXT:    rlwinm r5, r5, 0, 16, 31
+; CHECK-P8-NEXT:    xscvuxdsp f1, f1
+; CHECK-P8-NEXT:    mtvsrwz f7, r5
+; CHECK-P8-NEXT:    xscvuxdsp f0, f0
+; CHECK-P8-NEXT:    xscvuxdsp f2, f2
+; CHECK-P8-NEXT:    xscvuxdsp f4, f4
+; CHECK-P8-NEXT:    xscvuxdsp f5, f5
+; CHECK-P8-NEXT:    xscvuxdsp f6, f6
+; CHECK-P8-NEXT:    xscvuxdsp f7, f7
+; CHECK-P8-NEXT:    xscvuxdsp f3, f3
+; CHECK-P8-NEXT:    xxmrghd vs0, vs0, vs1
+; CHECK-P8-NEXT:    xxmrghd vs1, vs5, vs4
+; CHECK-P8-NEXT:    xvcvdpsp v2, vs0
+; CHECK-P8-NEXT:    xxmrghd vs4, vs7, vs6
+; CHECK-P8-NEXT:    xxmrghd vs2, vs3, vs2
+; CHECK-P8-NEXT:    xvcvdpsp v3, vs1
+; CHECK-P8-NEXT:    xvcvdpsp v4, vs4
+; CHECK-P8-NEXT:    xvcvdpsp v5, vs2
+; CHECK-P8-NEXT:    vmrgew v3, v4, v3
+; CHECK-P8-NEXT:    vmrgew v2, v5, v2
+; CHECK-P8-NEXT:    stvx v3, 0, r3
+; CHECK-P8-NEXT:    stvx v2, r3, r4
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test8elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    li r4, 8
+; CHECK-P9-NEXT:    li r5, 12
+; CHECK-P9-NEXT:    li r6, 10
+; CHECK-P9-NEXT:    li r7, 14
+; CHECK-P9-NEXT:    li r8, 0
+; CHECK-P9-NEXT:    li r9, 4
+; CHECK-P9-NEXT:    li r10, 2
+; CHECK-P9-NEXT:    li r11, 6
+; CHECK-P9-NEXT:    vextuhrx r4, r4, v2
+; CHECK-P9-NEXT:    vextuhrx r5, r5, v2
+; CHECK-P9-NEXT:    vextuhrx r6, r6, v2
+; CHECK-P9-NEXT:    vextuhrx r7, r7, v2
+; CHECK-P9-NEXT:    vextuhrx r8, r8, v2
+; CHECK-P9-NEXT:    vextuhrx r9, r9, v2
+; CHECK-P9-NEXT:    vextuhrx r10, r10, v2
+; CHECK-P9-NEXT:    vextuhrx r11, r11, v2
+; CHECK-P9-NEXT:    rlwinm r4, r4, 0, 16, 31
+; CHECK-P9-NEXT:    rlwinm r5, r5, 0, 16, 31
+; CHECK-P9-NEXT:    rlwinm r6, r6, 0, 16, 31
+; CHECK-P9-NEXT:    rlwinm r7, r7, 0, 16, 31
+; CHECK-P9-NEXT:    rlwinm r8, r8, 0, 16, 31
+; CHECK-P9-NEXT:    rlwinm r9, r9, 0, 16, 31
+; CHECK-P9-NEXT:    rlwinm r10, r10, 0, 16, 31
+; CHECK-P9-NEXT:    rlwinm r11, r11, 0, 16, 31
+; CHECK-P9-NEXT:    mtvsrwz f0, r4
+; CHECK-P9-NEXT:    mtvsrwz f1, r5
+; CHECK-P9-NEXT:    mtvsrwz f2, r6
+; CHECK-P9-NEXT:    mtvsrwz f3, r7
+; CHECK-P9-NEXT:    mtvsrwz f4, r8
+; CHECK-P9-NEXT:    mtvsrwz f5, r9
+; CHECK-P9-NEXT:    mtvsrwz f6, r10
+; CHECK-P9-NEXT:    mtvsrwz f7, r11
+; CHECK-P9-NEXT:    xscvuxdsp f0, f0
+; CHECK-P9-NEXT:    xscvuxdsp f1, f1
+; CHECK-P9-NEXT:    xscvuxdsp f2, f2
+; CHECK-P9-NEXT:    xscvuxdsp f3, f3
+; CHECK-P9-NEXT:    xscvuxdsp f4, f4
+; CHECK-P9-NEXT:    xscvuxdsp f5, f5
+; CHECK-P9-NEXT:    xscvuxdsp f6, f6
+; CHECK-P9-NEXT:    xscvuxdsp f7, f7
+; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P9-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-P9-NEXT:    xxmrghd vs2, vs5, vs4
+; CHECK-P9-NEXT:    xxmrghd vs3, vs7, vs6
+; CHECK-P9-NEXT:    xvcvdpsp v2, vs0
+; CHECK-P9-NEXT:    xvcvdpsp v3, vs1
+; CHECK-P9-NEXT:    xvcvdpsp v4, vs2
+; CHECK-P9-NEXT:    xvcvdpsp v5, vs3
+; CHECK-P9-NEXT:    vmrgew v2, v3, v2
+; CHECK-P9-NEXT:    vmrgew v3, v5, v4
+; CHECK-P9-NEXT:    stxv v3, 0(r3)
+; CHECK-P9-NEXT:    stxv v2, 16(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test8elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    li r4, 12
+; CHECK-BE-NEXT:    li r5, 8
+; CHECK-BE-NEXT:    li r6, 10
+; CHECK-BE-NEXT:    li r7, 14
+; CHECK-BE-NEXT:    li r8, 6
+; CHECK-BE-NEXT:    li r9, 2
+; CHECK-BE-NEXT:    li r10, 4
+; CHECK-BE-NEXT:    li r11, 0
+; CHECK-BE-NEXT:    vextuhlx r4, r4, v2
+; CHECK-BE-NEXT:    vextuhlx r5, r5, v2
+; CHECK-BE-NEXT:    vextuhlx r6, r6, v2
+; CHECK-BE-NEXT:    vextuhlx r7, r7, v2
+; CHECK-BE-NEXT:    vextuhlx r8, r8, v2
+; CHECK-BE-NEXT:    vextuhlx r9, r9, v2
+; CHECK-BE-NEXT:    vextuhlx r10, r10, v2
+; CHECK-BE-NEXT:    vextuhlx r11, r11, v2
+; CHECK-BE-NEXT:    rlwinm r4, r4, 0, 16, 31
+; CHECK-BE-NEXT:    rlwinm r5, r5, 0, 16, 31
+; CHECK-BE-NEXT:    rlwinm r6, r6, 0, 16, 31
+; CHECK-BE-NEXT:    rlwinm r7, r7, 0, 16, 31
+; CHECK-BE-NEXT:    rlwinm r8, r8, 0, 16, 31
+; CHECK-BE-NEXT:    rlwinm r9, r9, 0, 16, 31
+; CHECK-BE-NEXT:    rlwinm r10, r10, 0, 16, 31
+; CHECK-BE-NEXT:    rlwinm r11, r11, 0, 16, 31
+; CHECK-BE-NEXT:    mtvsrwz f0, r4
+; CHECK-BE-NEXT:    mtvsrwz f1, r5
+; CHECK-BE-NEXT:    mtvsrwz f2, r6
+; CHECK-BE-NEXT:    mtvsrwz f3, r7
+; CHECK-BE-NEXT:    mtvsrwz f4, r8
+; CHECK-BE-NEXT:    mtvsrwz f5, r9
+; CHECK-BE-NEXT:    mtvsrwz f6, r10
+; CHECK-BE-NEXT:    mtvsrwz f7, r11
+; CHECK-BE-NEXT:    xscvuxdsp f0, f0
+; CHECK-BE-NEXT:    xscvuxdsp f1, f1
+; CHECK-BE-NEXT:    xscvuxdsp f2, f2
+; CHECK-BE-NEXT:    xscvuxdsp f3, f3
+; CHECK-BE-NEXT:    xscvuxdsp f4, f4
+; CHECK-BE-NEXT:    xscvuxdsp f5, f5
+; CHECK-BE-NEXT:    xscvuxdsp f6, f6
+; CHECK-BE-NEXT:    xscvuxdsp f7, f7
+; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-BE-NEXT:    xxmrghd vs1, vs2, vs3
+; CHECK-BE-NEXT:    xxmrghd vs2, vs5, vs4
+; CHECK-BE-NEXT:    xxmrghd vs3, vs7, vs6
+; CHECK-BE-NEXT:    xvcvdpsp v2, vs0
+; CHECK-BE-NEXT:    xvcvdpsp v3, vs1
+; CHECK-BE-NEXT:    xvcvdpsp v4, vs2
+; CHECK-BE-NEXT:    xvcvdpsp v5, vs3
+; CHECK-BE-NEXT:    vmrgew v2, v2, v3
+; CHECK-BE-NEXT:    vmrgew v3, v5, v4
+; CHECK-BE-NEXT:    stxv v3, 0(r3)
+; CHECK-BE-NEXT:    stxv v2, 16(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = uitofp <8 x i16> %a to <8 x float>
+  store <8 x float> %0, <8 x float>* %agg.result, align 32
+  ret void
+}
+
+define void @test16elt(<16 x float>* noalias nocapture sret %agg.result, <16 x i16>* nocapture readonly) local_unnamed_addr #3 {
+; CHECK-P8-LABEL: test16elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lvx v2, r4, r5
+; CHECK-P8-NEXT:    mfvsrd r7, v3
+; CHECK-P8-NEXT:    xxswapd vs8, v3
+; CHECK-P8-NEXT:    mfvsrd r6, v2
+; CHECK-P8-NEXT:    xxswapd vs2, v2
+; CHECK-P8-NEXT:    clrldi r4, r6, 48
+; CHECK-P8-NEXT:    rldicl r8, r6, 32, 48
+; CHECK-P8-NEXT:    rlwinm r4, r4, 0, 16, 31
+; CHECK-P8-NEXT:    rlwinm r8, r8, 0, 16, 31
+; CHECK-P8-NEXT:    mtvsrwz f0, r4
+; CHECK-P8-NEXT:    rldicl r4, r6, 48, 48
+; CHECK-P8-NEXT:    rldicl r6, r6, 16, 48
+; CHECK-P8-NEXT:    rlwinm r4, r4, 0, 16, 31
+; CHECK-P8-NEXT:    mtvsrwz f1, r8
+; CHECK-P8-NEXT:    clrldi r8, r7, 48
+; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 16, 31
+; CHECK-P8-NEXT:    mtvsrwz f3, r4
+; CHECK-P8-NEXT:    rlwinm r4, r8, 0, 16, 31
+; CHECK-P8-NEXT:    mtvsrwz f4, r6
+; CHECK-P8-NEXT:    rldicl r6, r7, 32, 48
+; CHECK-P8-NEXT:    mtvsrwz f5, r4
+; CHECK-P8-NEXT:    rldicl r4, r7, 48, 48
+; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 16, 31
+; CHECK-P8-NEXT:    mfvsrd r8, f2
+; CHECK-P8-NEXT:    rlwinm r4, r4, 0, 16, 31
+; CHECK-P8-NEXT:    mtvsrwz f2, r6
+; CHECK-P8-NEXT:    rldicl r6, r7, 16, 48
+; CHECK-P8-NEXT:    mtvsrwz f6, r4
+; CHECK-P8-NEXT:    clrldi r4, r8, 48
+; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 16, 31
+; CHECK-P8-NEXT:    rlwinm r4, r4, 0, 16, 31
+; CHECK-P8-NEXT:    mtvsrwz f7, r6
+; CHECK-P8-NEXT:    rldicl r6, r8, 32, 48
+; CHECK-P8-NEXT:    mtvsrwz f9, r4
+; CHECK-P8-NEXT:    rldicl r4, r8, 48, 48
+; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 16, 31
+; CHECK-P8-NEXT:    rlwinm r4, r4, 0, 16, 31
+; CHECK-P8-NEXT:    mtvsrwz f10, r6
+; CHECK-P8-NEXT:    rldicl r6, r8, 16, 48
+; CHECK-P8-NEXT:    mtvsrwz f11, r4
+; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 16, 31
+; CHECK-P8-NEXT:    mfvsrd r4, f8
+; CHECK-P8-NEXT:    mtvsrwz f8, r6
+; CHECK-P8-NEXT:    clrldi r6, r4, 48
+; CHECK-P8-NEXT:    xscvuxdsp f0, f0
+; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 16, 31
+; CHECK-P8-NEXT:    xscvuxdsp f1, f1
+; CHECK-P8-NEXT:    xscvuxdsp f3, f3
+; CHECK-P8-NEXT:    xscvuxdsp f4, f4
+; CHECK-P8-NEXT:    mtvsrwz f12, r6
+; CHECK-P8-NEXT:    rldicl r6, r4, 32, 48
+; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 16, 31
+; CHECK-P8-NEXT:    xscvuxdsp f5, f5
+; CHECK-P8-NEXT:    mtvsrwz f13, r6
+; CHECK-P8-NEXT:    rldicl r6, r4, 48, 48
+; CHECK-P8-NEXT:    rldicl r4, r4, 16, 48
+; CHECK-P8-NEXT:    xscvuxdsp f2, f2
+; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 16, 31
+; CHECK-P8-NEXT:    rlwinm r4, r4, 0, 16, 31
+; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P8-NEXT:    mtvsrwz v2, r6
+; CHECK-P8-NEXT:    mtvsrwz v3, r4
+; CHECK-P8-NEXT:    xxmrghd vs3, vs4, vs3
+; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    xscvuxdsp f6, f6
+; CHECK-P8-NEXT:    xscvuxdsp f7, f7
+; CHECK-P8-NEXT:    xscvuxdsp f9, f9
+; CHECK-P8-NEXT:    xscvuxdsp f10, f10
+; CHECK-P8-NEXT:    xxmrghd vs2, vs2, vs5
+; CHECK-P8-NEXT:    xscvuxdsp f11, f11
+; CHECK-P8-NEXT:    xscvuxdsp f8, f8
+; CHECK-P8-NEXT:    xscvuxdsp f12, f12
+; CHECK-P8-NEXT:    xscvuxdsp f13, f13
+; CHECK-P8-NEXT:    xxmrghd vs5, vs7, vs6
+; CHECK-P8-NEXT:    xscvuxdsp f1, v2
+; CHECK-P8-NEXT:    xscvuxdsp f4, v3
+; CHECK-P8-NEXT:    xvcvdpsp v2, vs0
+; CHECK-P8-NEXT:    xxmrghd vs0, vs10, vs9
+; CHECK-P8-NEXT:    xvcvdpsp v3, vs3
+; CHECK-P8-NEXT:    xxmrghd vs3, vs8, vs11
+; CHECK-P8-NEXT:    xvcvdpsp v4, vs2
+; CHECK-P8-NEXT:    xxmrghd vs2, vs13, vs12
+; CHECK-P8-NEXT:    xvcvdpsp v5, vs5
+; CHECK-P8-NEXT:    xvcvdpsp v0, vs0
+; CHECK-P8-NEXT:    xxmrghd vs1, vs4, vs1
+; CHECK-P8-NEXT:    xvcvdpsp v1, vs3
+; CHECK-P8-NEXT:    xvcvdpsp v6, vs2
+; CHECK-P8-NEXT:    vmrgew v2, v3, v2
+; CHECK-P8-NEXT:    xvcvdpsp v7, vs1
+; CHECK-P8-NEXT:    vmrgew v3, v5, v4
+; CHECK-P8-NEXT:    vmrgew v4, v1, v0
+; CHECK-P8-NEXT:    stvx v2, r3, r4
+; CHECK-P8-NEXT:    li r4, 32
+; CHECK-P8-NEXT:    vmrgew v5, v7, v6
+; CHECK-P8-NEXT:    stvx v3, r3, r5
+; CHECK-P8-NEXT:    stvx v4, r3, r4
+; CHECK-P8-NEXT:    stvx v5, 0, r3
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test16elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv v3, 0(r4)
+; CHECK-P9-NEXT:    lxv v2, 16(r4)
+; CHECK-P9-NEXT:    li r4, 0
+; CHECK-P9-NEXT:    li r5, 4
+; CHECK-P9-NEXT:    li r6, 2
+; CHECK-P9-NEXT:    li r7, 6
+; CHECK-P9-NEXT:    li r8, 8
+; CHECK-P9-NEXT:    std r25, -72(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    li r9, 12
+; CHECK-P9-NEXT:    li r10, 10
+; CHECK-P9-NEXT:    li r11, 14
+; CHECK-P9-NEXT:    std r26, -64(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r27, -56(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r28, -48(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r29, -40(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r30, -32(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    vextuhrx r12, r4, v3
+; CHECK-P9-NEXT:    vextuhrx r0, r5, v3
+; CHECK-P9-NEXT:    vextuhrx r30, r6, v3
+; CHECK-P9-NEXT:    vextuhrx r29, r7, v3
+; CHECK-P9-NEXT:    vextuhrx r28, r8, v3
+; CHECK-P9-NEXT:    vextuhrx r27, r9, v3
+; CHECK-P9-NEXT:    vextuhrx r26, r10, v3
+; CHECK-P9-NEXT:    vextuhrx r25, r11, v3
+; CHECK-P9-NEXT:    vextuhrx r4, r4, v2
+; CHECK-P9-NEXT:    vextuhrx r5, r5, v2
+; CHECK-P9-NEXT:    vextuhrx r6, r6, v2
+; CHECK-P9-NEXT:    vextuhrx r7, r7, v2
+; CHECK-P9-NEXT:    vextuhrx r8, r8, v2
+; CHECK-P9-NEXT:    vextuhrx r9, r9, v2
+; CHECK-P9-NEXT:    vextuhrx r10, r10, v2
+; CHECK-P9-NEXT:    vextuhrx r11, r11, v2
+; CHECK-P9-NEXT:    rlwinm r12, r12, 0, 16, 31
+; CHECK-P9-NEXT:    rlwinm r0, r0, 0, 16, 31
+; CHECK-P9-NEXT:    rlwinm r30, r30, 0, 16, 31
+; CHECK-P9-NEXT:    rlwinm r29, r29, 0, 16, 31
+; CHECK-P9-NEXT:    rlwinm r28, r28, 0, 16, 31
+; CHECK-P9-NEXT:    rlwinm r27, r27, 0, 16, 31
+; CHECK-P9-NEXT:    rlwinm r26, r26, 0, 16, 31
+; CHECK-P9-NEXT:    rlwinm r25, r25, 0, 16, 31
+; CHECK-P9-NEXT:    rlwinm r4, r4, 0, 16, 31
+; CHECK-P9-NEXT:    rlwinm r5, r5, 0, 16, 31
+; CHECK-P9-NEXT:    rlwinm r6, r6, 0, 16, 31
+; CHECK-P9-NEXT:    rlwinm r7, r7, 0, 16, 31
+; CHECK-P9-NEXT:    rlwinm r8, r8, 0, 16, 31
+; CHECK-P9-NEXT:    rlwinm r9, r9, 0, 16, 31
+; CHECK-P9-NEXT:    rlwinm r10, r10, 0, 16, 31
+; CHECK-P9-NEXT:    rlwinm r11, r11, 0, 16, 31
+; CHECK-P9-NEXT:    mtvsrwz f0, r12
+; CHECK-P9-NEXT:    mtvsrwz f1, r0
+; CHECK-P9-NEXT:    mtvsrwz f2, r30
+; CHECK-P9-NEXT:    ld r30, -32(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrwz f3, r29
+; CHECK-P9-NEXT:    ld r29, -40(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrwz f4, r28
+; CHECK-P9-NEXT:    ld r28, -48(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrwz f5, r27
+; CHECK-P9-NEXT:    ld r27, -56(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrwz f6, r26
+; CHECK-P9-NEXT:    ld r26, -64(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrwz f7, r25
+; CHECK-P9-NEXT:    ld r25, -72(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrwz f8, r4
+; CHECK-P9-NEXT:    mtvsrwz f9, r5
+; CHECK-P9-NEXT:    mtvsrwz f10, r6
+; CHECK-P9-NEXT:    mtvsrwz f11, r7
+; CHECK-P9-NEXT:    mtvsrwz f12, r8
+; CHECK-P9-NEXT:    mtvsrwz f13, r9
+; CHECK-P9-NEXT:    mtvsrwz v2, r10
+; CHECK-P9-NEXT:    mtvsrwz v3, r11
+; CHECK-P9-NEXT:    xscvuxdsp f0, f0
+; CHECK-P9-NEXT:    xscvuxdsp f1, f1
+; CHECK-P9-NEXT:    xscvuxdsp f2, f2
+; CHECK-P9-NEXT:    xscvuxdsp f3, f3
+; CHECK-P9-NEXT:    xscvuxdsp f4, f4
+; CHECK-P9-NEXT:    xscvuxdsp f5, f5
+; CHECK-P9-NEXT:    xscvuxdsp f6, f6
+; CHECK-P9-NEXT:    xscvuxdsp f7, f7
+; CHECK-P9-NEXT:    xscvuxdsp f8, f8
+; CHECK-P9-NEXT:    xscvuxdsp f9, f9
+; CHECK-P9-NEXT:    xscvuxdsp f10, f10
+; CHECK-P9-NEXT:    xscvuxdsp f11, f11
+; CHECK-P9-NEXT:    xscvuxdsp f12, f12
+; CHECK-P9-NEXT:    xscvuxdsp f13, f13
+; CHECK-P9-NEXT:    xscvuxdsp f31, v2
+; CHECK-P9-NEXT:    xscvuxdsp f30, v3
+; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P9-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-P9-NEXT:    xxmrghd vs2, vs5, vs4
+; CHECK-P9-NEXT:    xxmrghd vs3, vs7, vs6
+; CHECK-P9-NEXT:    xxmrghd vs4, vs9, vs8
+; CHECK-P9-NEXT:    xxmrghd vs5, vs11, vs10
+; CHECK-P9-NEXT:    xxmrghd vs6, vs13, vs12
+; CHECK-P9-NEXT:    xxmrghd vs7, vs30, vs31
+; CHECK-P9-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    xvcvdpsp v2, vs0
+; CHECK-P9-NEXT:    xvcvdpsp v3, vs1
+; CHECK-P9-NEXT:    xvcvdpsp v4, vs2
+; CHECK-P9-NEXT:    xvcvdpsp v5, vs3
+; CHECK-P9-NEXT:    xvcvdpsp v0, vs4
+; CHECK-P9-NEXT:    xvcvdpsp v1, vs5
+; CHECK-P9-NEXT:    xvcvdpsp v6, vs6
+; CHECK-P9-NEXT:    xvcvdpsp v7, vs7
+; CHECK-P9-NEXT:    vmrgew v2, v3, v2
+; CHECK-P9-NEXT:    vmrgew v3, v5, v4
+; CHECK-P9-NEXT:    vmrgew v4, v1, v0
+; CHECK-P9-NEXT:    vmrgew v5, v7, v6
+; CHECK-P9-NEXT:    stxv v3, 16(r3)
+; CHECK-P9-NEXT:    stxv v2, 0(r3)
+; CHECK-P9-NEXT:    stxv v5, 48(r3)
+; CHECK-P9-NEXT:    stxv v4, 32(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test16elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv v3, 0(r4)
+; CHECK-BE-NEXT:    lxv v2, 16(r4)
+; CHECK-BE-NEXT:    li r4, 6
+; CHECK-BE-NEXT:    li r5, 2
+; CHECK-BE-NEXT:    li r6, 4
+; CHECK-BE-NEXT:    li r7, 0
+; CHECK-BE-NEXT:    li r8, 14
+; CHECK-BE-NEXT:    std r25, -72(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    li r9, 10
+; CHECK-BE-NEXT:    li r10, 12
+; CHECK-BE-NEXT:    li r11, 8
+; CHECK-BE-NEXT:    std r26, -64(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r27, -56(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r28, -48(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r29, -40(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r30, -32(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    vextuhlx r12, r4, v3
+; CHECK-BE-NEXT:    vextuhlx r0, r5, v3
+; CHECK-BE-NEXT:    vextuhlx r30, r6, v3
+; CHECK-BE-NEXT:    vextuhlx r29, r7, v3
+; CHECK-BE-NEXT:    vextuhlx r28, r8, v3
+; CHECK-BE-NEXT:    vextuhlx r27, r9, v3
+; CHECK-BE-NEXT:    vextuhlx r26, r10, v3
+; CHECK-BE-NEXT:    vextuhlx r25, r11, v3
+; CHECK-BE-NEXT:    vextuhlx r4, r4, v2
+; CHECK-BE-NEXT:    vextuhlx r5, r5, v2
+; CHECK-BE-NEXT:    vextuhlx r6, r6, v2
+; CHECK-BE-NEXT:    vextuhlx r7, r7, v2
+; CHECK-BE-NEXT:    vextuhlx r8, r8, v2
+; CHECK-BE-NEXT:    vextuhlx r9, r9, v2
+; CHECK-BE-NEXT:    vextuhlx r10, r10, v2
+; CHECK-BE-NEXT:    vextuhlx r11, r11, v2
+; CHECK-BE-NEXT:    rlwinm r12, r12, 0, 16, 31
+; CHECK-BE-NEXT:    rlwinm r0, r0, 0, 16, 31
+; CHECK-BE-NEXT:    rlwinm r30, r30, 0, 16, 31
+; CHECK-BE-NEXT:    rlwinm r29, r29, 0, 16, 31
+; CHECK-BE-NEXT:    rlwinm r28, r28, 0, 16, 31
+; CHECK-BE-NEXT:    rlwinm r27, r27, 0, 16, 31
+; CHECK-BE-NEXT:    rlwinm r26, r26, 0, 16, 31
+; CHECK-BE-NEXT:    rlwinm r25, r25, 0, 16, 31
+; CHECK-BE-NEXT:    rlwinm r4, r4, 0, 16, 31
+; CHECK-BE-NEXT:    rlwinm r5, r5, 0, 16, 31
+; CHECK-BE-NEXT:    rlwinm r6, r6, 0, 16, 31
+; CHECK-BE-NEXT:    rlwinm r7, r7, 0, 16, 31
+; CHECK-BE-NEXT:    rlwinm r8, r8, 0, 16, 31
+; CHECK-BE-NEXT:    rlwinm r9, r9, 0, 16, 31
+; CHECK-BE-NEXT:    rlwinm r10, r10, 0, 16, 31
+; CHECK-BE-NEXT:    rlwinm r11, r11, 0, 16, 31
+; CHECK-BE-NEXT:    mtvsrwz f0, r12
+; CHECK-BE-NEXT:    mtvsrwz f1, r0
+; CHECK-BE-NEXT:    mtvsrwz f2, r30
+; CHECK-BE-NEXT:    ld r30, -32(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrwz f3, r29
+; CHECK-BE-NEXT:    ld r29, -40(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrwz f4, r28
+; CHECK-BE-NEXT:    ld r28, -48(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrwz f5, r27
+; CHECK-BE-NEXT:    ld r27, -56(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrwz f6, r26
+; CHECK-BE-NEXT:    ld r26, -64(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrwz f7, r25
+; CHECK-BE-NEXT:    ld r25, -72(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrwz f8, r4
+; CHECK-BE-NEXT:    mtvsrwz f9, r5
+; CHECK-BE-NEXT:    mtvsrwz f10, r6
+; CHECK-BE-NEXT:    mtvsrwz f11, r7
+; CHECK-BE-NEXT:    mtvsrwz f12, r8
+; CHECK-BE-NEXT:    mtvsrwz f13, r9
+; CHECK-BE-NEXT:    mtvsrwz v2, r10
+; CHECK-BE-NEXT:    mtvsrwz v3, r11
+; CHECK-BE-NEXT:    xscvuxdsp f0, f0
+; CHECK-BE-NEXT:    xscvuxdsp f1, f1
+; CHECK-BE-NEXT:    xscvuxdsp f2, f2
+; CHECK-BE-NEXT:    xscvuxdsp f3, f3
+; CHECK-BE-NEXT:    xscvuxdsp f4, f4
+; CHECK-BE-NEXT:    xscvuxdsp f5, f5
+; CHECK-BE-NEXT:    xscvuxdsp f6, f6
+; CHECK-BE-NEXT:    xscvuxdsp f7, f7
+; CHECK-BE-NEXT:    xscvuxdsp f8, f8
+; CHECK-BE-NEXT:    xscvuxdsp f9, f9
+; CHECK-BE-NEXT:    xscvuxdsp f10, f10
+; CHECK-BE-NEXT:    xscvuxdsp f11, f11
+; CHECK-BE-NEXT:    xscvuxdsp f12, f12
+; CHECK-BE-NEXT:    xscvuxdsp f13, f13
+; CHECK-BE-NEXT:    xscvuxdsp f31, v2
+; CHECK-BE-NEXT:    xscvuxdsp f30, v3
+; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-BE-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-BE-NEXT:    xxmrghd vs2, vs5, vs4
+; CHECK-BE-NEXT:    xxmrghd vs3, vs7, vs6
+; CHECK-BE-NEXT:    xxmrghd vs4, vs9, vs8
+; CHECK-BE-NEXT:    xxmrghd vs5, vs11, vs10
+; CHECK-BE-NEXT:    xxmrghd vs6, vs13, vs12
+; CHECK-BE-NEXT:    xxmrghd vs7, vs30, vs31
+; CHECK-BE-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    xvcvdpsp v2, vs0
+; CHECK-BE-NEXT:    xvcvdpsp v3, vs1
+; CHECK-BE-NEXT:    xvcvdpsp v4, vs2
+; CHECK-BE-NEXT:    xvcvdpsp v5, vs3
+; CHECK-BE-NEXT:    xvcvdpsp v0, vs4
+; CHECK-BE-NEXT:    xvcvdpsp v1, vs5
+; CHECK-BE-NEXT:    xvcvdpsp v6, vs6
+; CHECK-BE-NEXT:    xvcvdpsp v7, vs7
+; CHECK-BE-NEXT:    vmrgew v2, v3, v2
+; CHECK-BE-NEXT:    vmrgew v3, v5, v4
+; CHECK-BE-NEXT:    vmrgew v4, v1, v0
+; CHECK-BE-NEXT:    vmrgew v5, v7, v6
+; CHECK-BE-NEXT:    stxv v3, 16(r3)
+; CHECK-BE-NEXT:    stxv v2, 0(r3)
+; CHECK-BE-NEXT:    stxv v5, 48(r3)
+; CHECK-BE-NEXT:    stxv v4, 32(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <16 x i16>, <16 x i16>* %0, align 32
+  %1 = uitofp <16 x i16> %a to <16 x float>
+  store <16 x float> %1, <16 x float>* %agg.result, align 64
+  ret void
+}
+
+define i64 @test2elt_signed(i32 %a.coerce) local_unnamed_addr #0 {
+; CHECK-P8-LABEL: test2elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    clrldi r4, r3, 48
+; CHECK-P8-NEXT:    rldicl r3, r3, 48, 48
+; CHECK-P8-NEXT:    extsh r4, r4
+; CHECK-P8-NEXT:    extsh r3, r3
+; CHECK-P8-NEXT:    mtvsrwa f0, r4
+; CHECK-P8-NEXT:    mtvsrwa f1, r3
+; CHECK-P8-NEXT:    xscvsxdsp f0, f0
+; CHECK-P8-NEXT:    xscvsxdsp f1, f1
+; CHECK-P8-NEXT:    xscvdpspn vs0, f0
+; CHECK-P8-NEXT:    xscvdpspn vs1, f1
+; CHECK-P8-NEXT:    xxsldwi v2, vs0, vs0, 1
+; CHECK-P8-NEXT:    xxsldwi v3, vs1, vs1, 1
+; CHECK-P8-NEXT:    vmrglw v2, v3, v2
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test2elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    mtvsrws v2, r3
+; CHECK-P9-NEXT:    li r3, 0
+; CHECK-P9-NEXT:    li r4, 2
+; CHECK-P9-NEXT:    vextuhrx r3, r3, v2
+; CHECK-P9-NEXT:    vextuhrx r4, r4, v2
+; CHECK-P9-NEXT:    extsh r3, r3
+; CHECK-P9-NEXT:    extsh r4, r4
+; CHECK-P9-NEXT:    mtvsrwa f0, r3
+; CHECK-P9-NEXT:    mtvsrwa f1, r4
+; CHECK-P9-NEXT:    xscvsxdsp f0, f0
+; CHECK-P9-NEXT:    xscvsxdsp f1, f1
+; CHECK-P9-NEXT:    xscvdpspn vs0, f0
+; CHECK-P9-NEXT:    xscvdpspn vs1, f1
+; CHECK-P9-NEXT:    xxsldwi v2, vs0, vs0, 1
+; CHECK-P9-NEXT:    xxsldwi v3, vs1, vs1, 1
+; CHECK-P9-NEXT:    vmrglw v2, v3, v2
+; CHECK-P9-NEXT:    mfvsrld r3, v2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test2elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    mtvsrws v2, r3
+; CHECK-BE-NEXT:    li r3, 2
+; CHECK-BE-NEXT:    li r4, 0
+; CHECK-BE-NEXT:    vextuhlx r3, r3, v2
+; CHECK-BE-NEXT:    vextuhlx r4, r4, v2
+; CHECK-BE-NEXT:    extsh r3, r3
+; CHECK-BE-NEXT:    extsh r4, r4
+; CHECK-BE-NEXT:    mtvsrwa f0, r3
+; CHECK-BE-NEXT:    mtvsrwa f1, r4
+; CHECK-BE-NEXT:    xscvsxdsp f0, f0
+; CHECK-BE-NEXT:    xscvsxdsp f1, f1
+; CHECK-BE-NEXT:    xscvdpspn v2, f0
+; CHECK-BE-NEXT:    xscvdpspn v3, f1
+; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    mfvsrd r3, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = bitcast i32 %a.coerce to <2 x i16>
+  %1 = sitofp <2 x i16> %0 to <2 x float>
+  %2 = bitcast <2 x float> %1 to i64
+  ret i64 %2
+}
+
+define <4 x float> @test4elt_signed(i64 %a.coerce) local_unnamed_addr #1 {
+; CHECK-P8-LABEL: test4elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    clrldi r4, r3, 48
+; CHECK-P8-NEXT:    rldicl r5, r3, 32, 48
+; CHECK-P8-NEXT:    extsh r4, r4
+; CHECK-P8-NEXT:    extsh r5, r5
+; CHECK-P8-NEXT:    mtvsrwa f0, r4
+; CHECK-P8-NEXT:    rldicl r4, r3, 48, 48
+; CHECK-P8-NEXT:    rldicl r3, r3, 16, 48
+; CHECK-P8-NEXT:    extsh r4, r4
+; CHECK-P8-NEXT:    extsh r3, r3
+; CHECK-P8-NEXT:    mtvsrwa f1, r5
+; CHECK-P8-NEXT:    mtvsrwa f2, r4
+; CHECK-P8-NEXT:    mtvsrwa f3, r3
+; CHECK-P8-NEXT:    xscvsxdsp f0, f0
+; CHECK-P8-NEXT:    xscvsxdsp f1, f1
+; CHECK-P8-NEXT:    xscvsxdsp f2, f2
+; CHECK-P8-NEXT:    xscvsxdsp f3, f3
+; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P8-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-P8-NEXT:    xvcvdpsp v2, vs0
+; CHECK-P8-NEXT:    xvcvdpsp v3, vs1
+; CHECK-P8-NEXT:    vmrgew v2, v3, v2
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test4elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    li r3, 0
+; CHECK-P9-NEXT:    li r4, 4
+; CHECK-P9-NEXT:    li r5, 2
+; CHECK-P9-NEXT:    li r6, 6
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    vextuhrx r3, r3, v2
+; CHECK-P9-NEXT:    vextuhrx r4, r4, v2
+; CHECK-P9-NEXT:    vextuhrx r5, r5, v2
+; CHECK-P9-NEXT:    vextuhrx r6, r6, v2
+; CHECK-P9-NEXT:    extsh r3, r3
+; CHECK-P9-NEXT:    extsh r4, r4
+; CHECK-P9-NEXT:    extsh r5, r5
+; CHECK-P9-NEXT:    extsh r6, r6
+; CHECK-P9-NEXT:    mtvsrwa f0, r3
+; CHECK-P9-NEXT:    mtvsrwa f1, r4
+; CHECK-P9-NEXT:    mtvsrwa f2, r5
+; CHECK-P9-NEXT:    mtvsrwa f3, r6
+; CHECK-P9-NEXT:    xscvsxdsp f0, f0
+; CHECK-P9-NEXT:    xscvsxdsp f1, f1
+; CHECK-P9-NEXT:    xscvsxdsp f2, f2
+; CHECK-P9-NEXT:    xscvsxdsp f3, f3
+; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P9-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-P9-NEXT:    xvcvdpsp v2, vs0
+; CHECK-P9-NEXT:    xvcvdpsp v3, vs1
+; CHECK-P9-NEXT:    vmrgew v2, v3, v2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test4elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    li r4, 6
+; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    li r3, 2
+; CHECK-BE-NEXT:    li r5, 4
+; CHECK-BE-NEXT:    li r6, 0
+; CHECK-BE-NEXT:    vextuhlx r4, r4, v2
+; CHECK-BE-NEXT:    vextuhlx r3, r3, v2
+; CHECK-BE-NEXT:    vextuhlx r5, r5, v2
+; CHECK-BE-NEXT:    vextuhlx r6, r6, v2
+; CHECK-BE-NEXT:    extsh r4, r4
+; CHECK-BE-NEXT:    extsh r3, r3
+; CHECK-BE-NEXT:    extsh r5, r5
+; CHECK-BE-NEXT:    extsh r6, r6
+; CHECK-BE-NEXT:    mtvsrwa f0, r4
+; CHECK-BE-NEXT:    mtvsrwa f1, r3
+; CHECK-BE-NEXT:    mtvsrwa f2, r5
+; CHECK-BE-NEXT:    mtvsrwa f3, r6
+; CHECK-BE-NEXT:    xscvsxdsp f0, f0
+; CHECK-BE-NEXT:    xscvsxdsp f1, f1
+; CHECK-BE-NEXT:    xscvsxdsp f2, f2
+; CHECK-BE-NEXT:    xscvsxdsp f3, f3
+; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-BE-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-BE-NEXT:    xvcvdpsp v2, vs0
+; CHECK-BE-NEXT:    xvcvdpsp v3, vs1
+; CHECK-BE-NEXT:    vmrgew v2, v3, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = bitcast i64 %a.coerce to <4 x i16>
+  %1 = sitofp <4 x i16> %0 to <4 x float>
+  ret <4 x float> %1
+}
+
+define void @test8elt_signed(<8 x float>* noalias nocapture sret %agg.result, <8 x i16> %a) local_unnamed_addr #2 {
+; CHECK-P8-LABEL: test8elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mfvsrd r5, v2
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    clrldi r6, r5, 48
+; CHECK-P8-NEXT:    rldicl r7, r5, 32, 48
+; CHECK-P8-NEXT:    extsh r6, r6
+; CHECK-P8-NEXT:    mfvsrd r8, f0
+; CHECK-P8-NEXT:    extsh r7, r7
+; CHECK-P8-NEXT:    mtvsrwa f1, r6
+; CHECK-P8-NEXT:    rldicl r6, r5, 48, 48
+; CHECK-P8-NEXT:    rldicl r5, r5, 16, 48
+; CHECK-P8-NEXT:    extsh r6, r6
+; CHECK-P8-NEXT:    extsh r5, r5
+; CHECK-P8-NEXT:    mtvsrwa f0, r7
+; CHECK-P8-NEXT:    mtvsrwa f2, r6
+; CHECK-P8-NEXT:    clrldi r6, r8, 48
+; CHECK-P8-NEXT:    mtvsrwa f3, r5
+; CHECK-P8-NEXT:    rldicl r5, r8, 32, 48
+; CHECK-P8-NEXT:    extsh r6, r6
+; CHECK-P8-NEXT:    mtvsrwa f4, r6
+; CHECK-P8-NEXT:    rldicl r6, r8, 48, 48
+; CHECK-P8-NEXT:    extsh r5, r5
+; CHECK-P8-NEXT:    mtvsrwa f5, r5
+; CHECK-P8-NEXT:    extsh r5, r6
+; CHECK-P8-NEXT:    mtvsrwa f6, r5
+; CHECK-P8-NEXT:    rldicl r5, r8, 16, 48
+; CHECK-P8-NEXT:    extsh r5, r5
+; CHECK-P8-NEXT:    xscvsxdsp f1, f1
+; CHECK-P8-NEXT:    mtvsrwa f7, r5
+; CHECK-P8-NEXT:    xscvsxdsp f0, f0
+; CHECK-P8-NEXT:    xscvsxdsp f2, f2
+; CHECK-P8-NEXT:    xscvsxdsp f4, f4
+; CHECK-P8-NEXT:    xscvsxdsp f5, f5
+; CHECK-P8-NEXT:    xscvsxdsp f6, f6
+; CHECK-P8-NEXT:    xscvsxdsp f7, f7
+; CHECK-P8-NEXT:    xscvsxdsp f3, f3
+; CHECK-P8-NEXT:    xxmrghd vs0, vs0, vs1
+; CHECK-P8-NEXT:    xxmrghd vs1, vs5, vs4
+; CHECK-P8-NEXT:    xvcvdpsp v2, vs0
+; CHECK-P8-NEXT:    xxmrghd vs4, vs7, vs6
+; CHECK-P8-NEXT:    xxmrghd vs2, vs3, vs2
+; CHECK-P8-NEXT:    xvcvdpsp v3, vs1
+; CHECK-P8-NEXT:    xvcvdpsp v4, vs4
+; CHECK-P8-NEXT:    xvcvdpsp v5, vs2
+; CHECK-P8-NEXT:    vmrgew v3, v4, v3
+; CHECK-P8-NEXT:    vmrgew v2, v5, v2
+; CHECK-P8-NEXT:    stvx v3, 0, r3
+; CHECK-P8-NEXT:    stvx v2, r3, r4
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test8elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    li r4, 8
+; CHECK-P9-NEXT:    li r5, 12
+; CHECK-P9-NEXT:    li r6, 10
+; CHECK-P9-NEXT:    li r7, 14
+; CHECK-P9-NEXT:    li r8, 0
+; CHECK-P9-NEXT:    li r9, 4
+; CHECK-P9-NEXT:    li r10, 2
+; CHECK-P9-NEXT:    li r11, 6
+; CHECK-P9-NEXT:    vextuhrx r4, r4, v2
+; CHECK-P9-NEXT:    vextuhrx r5, r5, v2
+; CHECK-P9-NEXT:    vextuhrx r6, r6, v2
+; CHECK-P9-NEXT:    vextuhrx r7, r7, v2
+; CHECK-P9-NEXT:    vextuhrx r8, r8, v2
+; CHECK-P9-NEXT:    vextuhrx r9, r9, v2
+; CHECK-P9-NEXT:    vextuhrx r10, r10, v2
+; CHECK-P9-NEXT:    vextuhrx r11, r11, v2
+; CHECK-P9-NEXT:    extsh r4, r4
+; CHECK-P9-NEXT:    extsh r5, r5
+; CHECK-P9-NEXT:    extsh r6, r6
+; CHECK-P9-NEXT:    extsh r7, r7
+; CHECK-P9-NEXT:    extsh r8, r8
+; CHECK-P9-NEXT:    extsh r9, r9
+; CHECK-P9-NEXT:    extsh r10, r10
+; CHECK-P9-NEXT:    extsh r11, r11
+; CHECK-P9-NEXT:    mtvsrwa f0, r4
+; CHECK-P9-NEXT:    mtvsrwa f1, r5
+; CHECK-P9-NEXT:    mtvsrwa f2, r6
+; CHECK-P9-NEXT:    mtvsrwa f3, r7
+; CHECK-P9-NEXT:    mtvsrwa f4, r8
+; CHECK-P9-NEXT:    mtvsrwa f5, r9
+; CHECK-P9-NEXT:    mtvsrwa f6, r10
+; CHECK-P9-NEXT:    mtvsrwa f7, r11
+; CHECK-P9-NEXT:    xscvsxdsp f0, f0
+; CHECK-P9-NEXT:    xscvsxdsp f1, f1
+; CHECK-P9-NEXT:    xscvsxdsp f2, f2
+; CHECK-P9-NEXT:    xscvsxdsp f3, f3
+; CHECK-P9-NEXT:    xscvsxdsp f4, f4
+; CHECK-P9-NEXT:    xscvsxdsp f5, f5
+; CHECK-P9-NEXT:    xscvsxdsp f6, f6
+; CHECK-P9-NEXT:    xscvsxdsp f7, f7
+; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P9-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-P9-NEXT:    xxmrghd vs2, vs5, vs4
+; CHECK-P9-NEXT:    xxmrghd vs3, vs7, vs6
+; CHECK-P9-NEXT:    xvcvdpsp v2, vs0
+; CHECK-P9-NEXT:    xvcvdpsp v3, vs1
+; CHECK-P9-NEXT:    xvcvdpsp v4, vs2
+; CHECK-P9-NEXT:    xvcvdpsp v5, vs3
+; CHECK-P9-NEXT:    vmrgew v2, v3, v2
+; CHECK-P9-NEXT:    vmrgew v3, v5, v4
+; CHECK-P9-NEXT:    stxv v3, 0(r3)
+; CHECK-P9-NEXT:    stxv v2, 16(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test8elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    li r4, 12
+; CHECK-BE-NEXT:    li r5, 8
+; CHECK-BE-NEXT:    li r6, 10
+; CHECK-BE-NEXT:    li r7, 14
+; CHECK-BE-NEXT:    li r8, 6
+; CHECK-BE-NEXT:    li r9, 2
+; CHECK-BE-NEXT:    li r10, 4
+; CHECK-BE-NEXT:    li r11, 0
+; CHECK-BE-NEXT:    vextuhlx r4, r4, v2
+; CHECK-BE-NEXT:    vextuhlx r5, r5, v2
+; CHECK-BE-NEXT:    vextuhlx r6, r6, v2
+; CHECK-BE-NEXT:    vextuhlx r7, r7, v2
+; CHECK-BE-NEXT:    vextuhlx r8, r8, v2
+; CHECK-BE-NEXT:    vextuhlx r9, r9, v2
+; CHECK-BE-NEXT:    vextuhlx r10, r10, v2
+; CHECK-BE-NEXT:    vextuhlx r11, r11, v2
+; CHECK-BE-NEXT:    extsh r4, r4
+; CHECK-BE-NEXT:    extsh r5, r5
+; CHECK-BE-NEXT:    extsh r6, r6
+; CHECK-BE-NEXT:    extsh r7, r7
+; CHECK-BE-NEXT:    extsh r8, r8
+; CHECK-BE-NEXT:    extsh r9, r9
+; CHECK-BE-NEXT:    extsh r10, r10
+; CHECK-BE-NEXT:    extsh r11, r11
+; CHECK-BE-NEXT:    mtvsrwa f0, r4
+; CHECK-BE-NEXT:    mtvsrwa f1, r5
+; CHECK-BE-NEXT:    mtvsrwa f2, r6
+; CHECK-BE-NEXT:    mtvsrwa f3, r7
+; CHECK-BE-NEXT:    mtvsrwa f4, r8
+; CHECK-BE-NEXT:    mtvsrwa f5, r9
+; CHECK-BE-NEXT:    mtvsrwa f6, r10
+; CHECK-BE-NEXT:    mtvsrwa f7, r11
+; CHECK-BE-NEXT:    xscvsxdsp f0, f0
+; CHECK-BE-NEXT:    xscvsxdsp f1, f1
+; CHECK-BE-NEXT:    xscvsxdsp f2, f2
+; CHECK-BE-NEXT:    xscvsxdsp f3, f3
+; CHECK-BE-NEXT:    xscvsxdsp f4, f4
+; CHECK-BE-NEXT:    xscvsxdsp f5, f5
+; CHECK-BE-NEXT:    xscvsxdsp f6, f6
+; CHECK-BE-NEXT:    xscvsxdsp f7, f7
+; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-BE-NEXT:    xxmrghd vs1, vs2, vs3
+; CHECK-BE-NEXT:    xxmrghd vs2, vs5, vs4
+; CHECK-BE-NEXT:    xxmrghd vs3, vs7, vs6
+; CHECK-BE-NEXT:    xvcvdpsp v2, vs0
+; CHECK-BE-NEXT:    xvcvdpsp v3, vs1
+; CHECK-BE-NEXT:    xvcvdpsp v4, vs2
+; CHECK-BE-NEXT:    xvcvdpsp v5, vs3
+; CHECK-BE-NEXT:    vmrgew v2, v2, v3
+; CHECK-BE-NEXT:    vmrgew v3, v5, v4
+; CHECK-BE-NEXT:    stxv v3, 0(r3)
+; CHECK-BE-NEXT:    stxv v2, 16(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = sitofp <8 x i16> %a to <8 x float>
+  store <8 x float> %0, <8 x float>* %agg.result, align 32
+  ret void
+}
+
+define void @test16elt_signed(<16 x float>* noalias nocapture sret %agg.result, <16 x i16>* nocapture readonly) local_unnamed_addr #3 {
+; CHECK-P8-LABEL: test16elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lvx v2, r4, r5
+; CHECK-P8-NEXT:    mfvsrd r7, v3
+; CHECK-P8-NEXT:    xxswapd vs8, v3
+; CHECK-P8-NEXT:    mfvsrd r6, v2
+; CHECK-P8-NEXT:    xxswapd vs2, v2
+; CHECK-P8-NEXT:    clrldi r4, r6, 48
+; CHECK-P8-NEXT:    rldicl r8, r6, 32, 48
+; CHECK-P8-NEXT:    extsh r4, r4
+; CHECK-P8-NEXT:    extsh r8, r8
+; CHECK-P8-NEXT:    mtvsrwa f0, r4
+; CHECK-P8-NEXT:    rldicl r4, r6, 48, 48
+; CHECK-P8-NEXT:    rldicl r6, r6, 16, 48
+; CHECK-P8-NEXT:    extsh r4, r4
+; CHECK-P8-NEXT:    mtvsrwa f1, r8
+; CHECK-P8-NEXT:    clrldi r8, r7, 48
+; CHECK-P8-NEXT:    extsh r6, r6
+; CHECK-P8-NEXT:    mtvsrwa f3, r4
+; CHECK-P8-NEXT:    extsh r4, r8
+; CHECK-P8-NEXT:    mtvsrwa f4, r6
+; CHECK-P8-NEXT:    rldicl r6, r7, 32, 48
+; CHECK-P8-NEXT:    mtvsrwa f5, r4
+; CHECK-P8-NEXT:    rldicl r4, r7, 48, 48
+; CHECK-P8-NEXT:    extsh r6, r6
+; CHECK-P8-NEXT:    mfvsrd r8, f2
+; CHECK-P8-NEXT:    extsh r4, r4
+; CHECK-P8-NEXT:    mtvsrwa f2, r6
+; CHECK-P8-NEXT:    rldicl r6, r7, 16, 48
+; CHECK-P8-NEXT:    mtvsrwa f6, r4
+; CHECK-P8-NEXT:    clrldi r4, r8, 48
+; CHECK-P8-NEXT:    extsh r6, r6
+; CHECK-P8-NEXT:    extsh r4, r4
+; CHECK-P8-NEXT:    mtvsrwa f7, r6
+; CHECK-P8-NEXT:    rldicl r6, r8, 32, 48
+; CHECK-P8-NEXT:    mtvsrwa f9, r4
+; CHECK-P8-NEXT:    rldicl r4, r8, 48, 48
+; CHECK-P8-NEXT:    extsh r6, r6
+; CHECK-P8-NEXT:    extsh r4, r4
+; CHECK-P8-NEXT:    mtvsrwa f10, r6
+; CHECK-P8-NEXT:    rldicl r6, r8, 16, 48
+; CHECK-P8-NEXT:    mtvsrwa f11, r4
+; CHECK-P8-NEXT:    extsh r6, r6
+; CHECK-P8-NEXT:    mfvsrd r4, f8
+; CHECK-P8-NEXT:    mtvsrwa f8, r6
+; CHECK-P8-NEXT:    clrldi r6, r4, 48
+; CHECK-P8-NEXT:    xscvsxdsp f0, f0
+; CHECK-P8-NEXT:    extsh r6, r6
+; CHECK-P8-NEXT:    xscvsxdsp f1, f1
+; CHECK-P8-NEXT:    xscvsxdsp f3, f3
+; CHECK-P8-NEXT:    xscvsxdsp f4, f4
+; CHECK-P8-NEXT:    mtvsrwa f12, r6
+; CHECK-P8-NEXT:    rldicl r6, r4, 32, 48
+; CHECK-P8-NEXT:    extsh r6, r6
+; CHECK-P8-NEXT:    xscvsxdsp f5, f5
+; CHECK-P8-NEXT:    mtvsrwa f13, r6
+; CHECK-P8-NEXT:    rldicl r6, r4, 48, 48
+; CHECK-P8-NEXT:    rldicl r4, r4, 16, 48
+; CHECK-P8-NEXT:    xscvsxdsp f2, f2
+; CHECK-P8-NEXT:    extsh r6, r6
+; CHECK-P8-NEXT:    extsh r4, r4
+; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P8-NEXT:    mtvsrwa v2, r6
+; CHECK-P8-NEXT:    mtvsrwa v3, r4
+; CHECK-P8-NEXT:    xxmrghd vs3, vs4, vs3
+; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    xscvsxdsp f6, f6
+; CHECK-P8-NEXT:    xscvsxdsp f7, f7
+; CHECK-P8-NEXT:    xscvsxdsp f9, f9
+; CHECK-P8-NEXT:    xscvsxdsp f10, f10
+; CHECK-P8-NEXT:    xxmrghd vs2, vs2, vs5
+; CHECK-P8-NEXT:    xscvsxdsp f11, f11
+; CHECK-P8-NEXT:    xscvsxdsp f8, f8
+; CHECK-P8-NEXT:    xscvsxdsp f12, f12
+; CHECK-P8-NEXT:    xscvsxdsp f13, f13
+; CHECK-P8-NEXT:    xxmrghd vs5, vs7, vs6
+; CHECK-P8-NEXT:    xscvsxdsp f1, v2
+; CHECK-P8-NEXT:    xscvsxdsp f4, v3
+; CHECK-P8-NEXT:    xvcvdpsp v2, vs0
+; CHECK-P8-NEXT:    xxmrghd vs0, vs10, vs9
+; CHECK-P8-NEXT:    xvcvdpsp v3, vs3
+; CHECK-P8-NEXT:    xxmrghd vs3, vs8, vs11
+; CHECK-P8-NEXT:    xvcvdpsp v4, vs2
+; CHECK-P8-NEXT:    xxmrghd vs2, vs13, vs12
+; CHECK-P8-NEXT:    xvcvdpsp v5, vs5
+; CHECK-P8-NEXT:    xvcvdpsp v0, vs0
+; CHECK-P8-NEXT:    xxmrghd vs1, vs4, vs1
+; CHECK-P8-NEXT:    xvcvdpsp v1, vs3
+; CHECK-P8-NEXT:    xvcvdpsp v6, vs2
+; CHECK-P8-NEXT:    vmrgew v2, v3, v2
+; CHECK-P8-NEXT:    xvcvdpsp v7, vs1
+; CHECK-P8-NEXT:    vmrgew v3, v5, v4
+; CHECK-P8-NEXT:    vmrgew v4, v1, v0
+; CHECK-P8-NEXT:    stvx v2, r3, r4
+; CHECK-P8-NEXT:    li r4, 32
+; CHECK-P8-NEXT:    vmrgew v5, v7, v6
+; CHECK-P8-NEXT:    stvx v3, r3, r5
+; CHECK-P8-NEXT:    stvx v4, r3, r4
+; CHECK-P8-NEXT:    stvx v5, 0, r3
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test16elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv v3, 0(r4)
+; CHECK-P9-NEXT:    lxv v2, 16(r4)
+; CHECK-P9-NEXT:    li r4, 0
+; CHECK-P9-NEXT:    li r5, 4
+; CHECK-P9-NEXT:    li r6, 2
+; CHECK-P9-NEXT:    li r7, 6
+; CHECK-P9-NEXT:    li r8, 8
+; CHECK-P9-NEXT:    std r25, -72(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    li r9, 12
+; CHECK-P9-NEXT:    li r10, 10
+; CHECK-P9-NEXT:    li r11, 14
+; CHECK-P9-NEXT:    std r26, -64(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r27, -56(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r28, -48(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r29, -40(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r30, -32(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    vextuhrx r12, r4, v3
+; CHECK-P9-NEXT:    vextuhrx r0, r5, v3
+; CHECK-P9-NEXT:    vextuhrx r30, r6, v3
+; CHECK-P9-NEXT:    vextuhrx r29, r7, v3
+; CHECK-P9-NEXT:    vextuhrx r28, r8, v3
+; CHECK-P9-NEXT:    vextuhrx r27, r9, v3
+; CHECK-P9-NEXT:    vextuhrx r26, r10, v3
+; CHECK-P9-NEXT:    vextuhrx r25, r11, v3
+; CHECK-P9-NEXT:    vextuhrx r4, r4, v2
+; CHECK-P9-NEXT:    vextuhrx r5, r5, v2
+; CHECK-P9-NEXT:    vextuhrx r6, r6, v2
+; CHECK-P9-NEXT:    vextuhrx r7, r7, v2
+; CHECK-P9-NEXT:    vextuhrx r8, r8, v2
+; CHECK-P9-NEXT:    vextuhrx r9, r9, v2
+; CHECK-P9-NEXT:    vextuhrx r10, r10, v2
+; CHECK-P9-NEXT:    vextuhrx r11, r11, v2
+; CHECK-P9-NEXT:    extsh r12, r12
+; CHECK-P9-NEXT:    extsh r0, r0
+; CHECK-P9-NEXT:    extsh r30, r30
+; CHECK-P9-NEXT:    extsh r29, r29
+; CHECK-P9-NEXT:    extsh r28, r28
+; CHECK-P9-NEXT:    extsh r27, r27
+; CHECK-P9-NEXT:    extsh r26, r26
+; CHECK-P9-NEXT:    extsh r25, r25
+; CHECK-P9-NEXT:    extsh r4, r4
+; CHECK-P9-NEXT:    extsh r5, r5
+; CHECK-P9-NEXT:    extsh r6, r6
+; CHECK-P9-NEXT:    extsh r7, r7
+; CHECK-P9-NEXT:    extsh r8, r8
+; CHECK-P9-NEXT:    extsh r9, r9
+; CHECK-P9-NEXT:    extsh r10, r10
+; CHECK-P9-NEXT:    extsh r11, r11
+; CHECK-P9-NEXT:    mtvsrwa f0, r12
+; CHECK-P9-NEXT:    mtvsrwa f1, r0
+; CHECK-P9-NEXT:    mtvsrwa f2, r30
+; CHECK-P9-NEXT:    ld r30, -32(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrwa f3, r29
+; CHECK-P9-NEXT:    ld r29, -40(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrwa f4, r28
+; CHECK-P9-NEXT:    ld r28, -48(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrwa f5, r27
+; CHECK-P9-NEXT:    ld r27, -56(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrwa f6, r26
+; CHECK-P9-NEXT:    ld r26, -64(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrwa f7, r25
+; CHECK-P9-NEXT:    ld r25, -72(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrwa f8, r4
+; CHECK-P9-NEXT:    mtvsrwa f9, r5
+; CHECK-P9-NEXT:    mtvsrwa f10, r6
+; CHECK-P9-NEXT:    mtvsrwa f11, r7
+; CHECK-P9-NEXT:    mtvsrwa f12, r8
+; CHECK-P9-NEXT:    mtvsrwa f13, r9
+; CHECK-P9-NEXT:    mtvsrwa v2, r10
+; CHECK-P9-NEXT:    mtvsrwa v3, r11
+; CHECK-P9-NEXT:    xscvsxdsp f0, f0
+; CHECK-P9-NEXT:    xscvsxdsp f1, f1
+; CHECK-P9-NEXT:    xscvsxdsp f2, f2
+; CHECK-P9-NEXT:    xscvsxdsp f3, f3
+; CHECK-P9-NEXT:    xscvsxdsp f4, f4
+; CHECK-P9-NEXT:    xscvsxdsp f5, f5
+; CHECK-P9-NEXT:    xscvsxdsp f6, f6
+; CHECK-P9-NEXT:    xscvsxdsp f7, f7
+; CHECK-P9-NEXT:    xscvsxdsp f8, f8
+; CHECK-P9-NEXT:    xscvsxdsp f9, f9
+; CHECK-P9-NEXT:    xscvsxdsp f10, f10
+; CHECK-P9-NEXT:    xscvsxdsp f11, f11
+; CHECK-P9-NEXT:    xscvsxdsp f12, f12
+; CHECK-P9-NEXT:    xscvsxdsp f13, f13
+; CHECK-P9-NEXT:    xscvsxdsp f31, v2
+; CHECK-P9-NEXT:    xscvsxdsp f30, v3
+; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P9-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-P9-NEXT:    xxmrghd vs2, vs5, vs4
+; CHECK-P9-NEXT:    xxmrghd vs3, vs7, vs6
+; CHECK-P9-NEXT:    xxmrghd vs4, vs9, vs8
+; CHECK-P9-NEXT:    xxmrghd vs5, vs11, vs10
+; CHECK-P9-NEXT:    xxmrghd vs6, vs13, vs12
+; CHECK-P9-NEXT:    xxmrghd vs7, vs30, vs31
+; CHECK-P9-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    xvcvdpsp v2, vs0
+; CHECK-P9-NEXT:    xvcvdpsp v3, vs1
+; CHECK-P9-NEXT:    xvcvdpsp v4, vs2
+; CHECK-P9-NEXT:    xvcvdpsp v5, vs3
+; CHECK-P9-NEXT:    xvcvdpsp v0, vs4
+; CHECK-P9-NEXT:    xvcvdpsp v1, vs5
+; CHECK-P9-NEXT:    xvcvdpsp v6, vs6
+; CHECK-P9-NEXT:    xvcvdpsp v7, vs7
+; CHECK-P9-NEXT:    vmrgew v2, v3, v2
+; CHECK-P9-NEXT:    vmrgew v3, v5, v4
+; CHECK-P9-NEXT:    vmrgew v4, v1, v0
+; CHECK-P9-NEXT:    vmrgew v5, v7, v6
+; CHECK-P9-NEXT:    stxv v3, 16(r3)
+; CHECK-P9-NEXT:    stxv v2, 0(r3)
+; CHECK-P9-NEXT:    stxv v5, 48(r3)
+; CHECK-P9-NEXT:    stxv v4, 32(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test16elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv v3, 0(r4)
+; CHECK-BE-NEXT:    lxv v2, 16(r4)
+; CHECK-BE-NEXT:    li r4, 6
+; CHECK-BE-NEXT:    li r5, 2
+; CHECK-BE-NEXT:    li r6, 4
+; CHECK-BE-NEXT:    li r7, 0
+; CHECK-BE-NEXT:    li r8, 14
+; CHECK-BE-NEXT:    std r25, -72(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    li r9, 10
+; CHECK-BE-NEXT:    li r10, 12
+; CHECK-BE-NEXT:    li r11, 8
+; CHECK-BE-NEXT:    std r26, -64(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r27, -56(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r28, -48(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r29, -40(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r30, -32(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    vextuhlx r12, r4, v3
+; CHECK-BE-NEXT:    vextuhlx r0, r5, v3
+; CHECK-BE-NEXT:    vextuhlx r30, r6, v3
+; CHECK-BE-NEXT:    vextuhlx r29, r7, v3
+; CHECK-BE-NEXT:    vextuhlx r28, r8, v3
+; CHECK-BE-NEXT:    vextuhlx r27, r9, v3
+; CHECK-BE-NEXT:    vextuhlx r26, r10, v3
+; CHECK-BE-NEXT:    vextuhlx r25, r11, v3
+; CHECK-BE-NEXT:    vextuhlx r4, r4, v2
+; CHECK-BE-NEXT:    vextuhlx r5, r5, v2
+; CHECK-BE-NEXT:    vextuhlx r6, r6, v2
+; CHECK-BE-NEXT:    vextuhlx r7, r7, v2
+; CHECK-BE-NEXT:    vextuhlx r8, r8, v2
+; CHECK-BE-NEXT:    vextuhlx r9, r9, v2
+; CHECK-BE-NEXT:    vextuhlx r10, r10, v2
+; CHECK-BE-NEXT:    vextuhlx r11, r11, v2
+; CHECK-BE-NEXT:    extsh r12, r12
+; CHECK-BE-NEXT:    extsh r0, r0
+; CHECK-BE-NEXT:    extsh r30, r30
+; CHECK-BE-NEXT:    extsh r29, r29
+; CHECK-BE-NEXT:    extsh r28, r28
+; CHECK-BE-NEXT:    extsh r27, r27
+; CHECK-BE-NEXT:    extsh r26, r26
+; CHECK-BE-NEXT:    extsh r25, r25
+; CHECK-BE-NEXT:    extsh r4, r4
+; CHECK-BE-NEXT:    extsh r5, r5
+; CHECK-BE-NEXT:    extsh r6, r6
+; CHECK-BE-NEXT:    extsh r7, r7
+; CHECK-BE-NEXT:    extsh r8, r8
+; CHECK-BE-NEXT:    extsh r9, r9
+; CHECK-BE-NEXT:    extsh r10, r10
+; CHECK-BE-NEXT:    extsh r11, r11
+; CHECK-BE-NEXT:    mtvsrwa f0, r12
+; CHECK-BE-NEXT:    mtvsrwa f1, r0
+; CHECK-BE-NEXT:    mtvsrwa f2, r30
+; CHECK-BE-NEXT:    ld r30, -32(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrwa f3, r29
+; CHECK-BE-NEXT:    ld r29, -40(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrwa f4, r28
+; CHECK-BE-NEXT:    ld r28, -48(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrwa f5, r27
+; CHECK-BE-NEXT:    ld r27, -56(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrwa f6, r26
+; CHECK-BE-NEXT:    ld r26, -64(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrwa f7, r25
+; CHECK-BE-NEXT:    ld r25, -72(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrwa f8, r4
+; CHECK-BE-NEXT:    mtvsrwa f9, r5
+; CHECK-BE-NEXT:    mtvsrwa f10, r6
+; CHECK-BE-NEXT:    mtvsrwa f11, r7
+; CHECK-BE-NEXT:    mtvsrwa f12, r8
+; CHECK-BE-NEXT:    mtvsrwa f13, r9
+; CHECK-BE-NEXT:    mtvsrwa v2, r10
+; CHECK-BE-NEXT:    mtvsrwa v3, r11
+; CHECK-BE-NEXT:    xscvsxdsp f0, f0
+; CHECK-BE-NEXT:    xscvsxdsp f1, f1
+; CHECK-BE-NEXT:    xscvsxdsp f2, f2
+; CHECK-BE-NEXT:    xscvsxdsp f3, f3
+; CHECK-BE-NEXT:    xscvsxdsp f4, f4
+; CHECK-BE-NEXT:    xscvsxdsp f5, f5
+; CHECK-BE-NEXT:    xscvsxdsp f6, f6
+; CHECK-BE-NEXT:    xscvsxdsp f7, f7
+; CHECK-BE-NEXT:    xscvsxdsp f8, f8
+; CHECK-BE-NEXT:    xscvsxdsp f9, f9
+; CHECK-BE-NEXT:    xscvsxdsp f10, f10
+; CHECK-BE-NEXT:    xscvsxdsp f11, f11
+; CHECK-BE-NEXT:    xscvsxdsp f12, f12
+; CHECK-BE-NEXT:    xscvsxdsp f13, f13
+; CHECK-BE-NEXT:    xscvsxdsp f31, v2
+; CHECK-BE-NEXT:    xscvsxdsp f30, v3
+; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-BE-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-BE-NEXT:    xxmrghd vs2, vs5, vs4
+; CHECK-BE-NEXT:    xxmrghd vs3, vs7, vs6
+; CHECK-BE-NEXT:    xxmrghd vs4, vs9, vs8
+; CHECK-BE-NEXT:    xxmrghd vs5, vs11, vs10
+; CHECK-BE-NEXT:    xxmrghd vs6, vs13, vs12
+; CHECK-BE-NEXT:    xxmrghd vs7, vs30, vs31
+; CHECK-BE-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    xvcvdpsp v2, vs0
+; CHECK-BE-NEXT:    xvcvdpsp v3, vs1
+; CHECK-BE-NEXT:    xvcvdpsp v4, vs2
+; CHECK-BE-NEXT:    xvcvdpsp v5, vs3
+; CHECK-BE-NEXT:    xvcvdpsp v0, vs4
+; CHECK-BE-NEXT:    xvcvdpsp v1, vs5
+; CHECK-BE-NEXT:    xvcvdpsp v6, vs6
+; CHECK-BE-NEXT:    xvcvdpsp v7, vs7
+; CHECK-BE-NEXT:    vmrgew v2, v3, v2
+; CHECK-BE-NEXT:    vmrgew v3, v5, v4
+; CHECK-BE-NEXT:    vmrgew v4, v1, v0
+; CHECK-BE-NEXT:    vmrgew v5, v7, v6
+; CHECK-BE-NEXT:    stxv v3, 16(r3)
+; CHECK-BE-NEXT:    stxv v2, 0(r3)
+; CHECK-BE-NEXT:    stxv v5, 48(r3)
+; CHECK-BE-NEXT:    stxv v4, 32(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <16 x i16>, <16 x i16>* %0, align 32
+  %1 = sitofp <16 x i16> %a to <16 x float>
+  store <16 x float> %1, <16 x float>* %agg.result, align 64
+  ret void
+}

Added: llvm/trunk/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll?rev=347090&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll Fri Nov 16 12:24:10 2018
@@ -0,0 +1,828 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:     -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-P8
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-P9
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-BE
+
+define <2 x double> @test2elt(i32 %a.coerce) local_unnamed_addr #0 {
+; CHECK-P8-LABEL: test2elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI0_0 at toc@ha
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    addi r3, r4, .LCPI0_0 at toc@l
+; CHECK-P8-NEXT:    xxlxor v4, v4, v4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    lvx v3, 0, r3
+; CHECK-P8-NEXT:    vperm v2, v4, v2, v3
+; CHECK-P8-NEXT:    xvcvuxddp v2, v2
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test2elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI0_0 at toc@ha
+; CHECK-P9-NEXT:    mtvsrws v3, r3
+; CHECK-P9-NEXT:    xxlxor v4, v4, v4
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI0_0 at toc@l
+; CHECK-P9-NEXT:    lxvx v2, 0, r4
+; CHECK-P9-NEXT:    vperm v2, v4, v3, v2
+; CHECK-P9-NEXT:    xvcvuxddp v2, v2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test2elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI0_0 at toc@ha
+; CHECK-BE-NEXT:    mtvsrws v3, r3
+; CHECK-BE-NEXT:    xxlxor v4, v4, v4
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI0_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v2, 0, r4
+; CHECK-BE-NEXT:    vperm v2, v3, v4, v2
+; CHECK-BE-NEXT:    xvcvuxddp v2, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = bitcast i32 %a.coerce to <2 x i16>
+  %1 = uitofp <2 x i16> %0 to <2 x double>
+  ret <2 x double> %1
+}
+
+define void @test4elt(<4 x double>* noalias nocapture sret %agg.result, i64 %a.coerce) local_unnamed_addr #1 {
+; CHECK-P8-LABEL: test4elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI1_0 at toc@ha
+; CHECK-P8-NEXT:    mtvsrd f0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI1_1 at toc@ha
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI1_0 at toc@l
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI1_1 at toc@l
+; CHECK-P8-NEXT:    xxlxor v4, v4, v4
+; CHECK-P8-NEXT:    lvx v2, 0, r5
+; CHECK-P8-NEXT:    xxswapd v3, vs0
+; CHECK-P8-NEXT:    lvx v5, 0, r4
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    vperm v2, v4, v3, v2
+; CHECK-P8-NEXT:    vperm v3, v4, v3, v5
+; CHECK-P8-NEXT:    xvcvuxddp vs0, v2
+; CHECK-P8-NEXT:    xvcvuxddp vs1, v3
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test4elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    addis r5, r2, .LCPI1_0 at toc@ha
+; CHECK-P9-NEXT:    addis r6, r2, .LCPI1_1 at toc@ha
+; CHECK-P9-NEXT:    mtvsrd f0, r4
+; CHECK-P9-NEXT:    xxlxor v5, v5, v5
+; CHECK-P9-NEXT:    addi r5, r5, .LCPI1_0 at toc@l
+; CHECK-P9-NEXT:    addi r6, r6, .LCPI1_1 at toc@l
+; CHECK-P9-NEXT:    xxswapd v4, vs0
+; CHECK-P9-NEXT:    lxvx v2, 0, r5
+; CHECK-P9-NEXT:    lxvx v3, 0, r6
+; CHECK-P9-NEXT:    vperm v2, v5, v4, v2
+; CHECK-P9-NEXT:    vperm v3, v5, v4, v3
+; CHECK-P9-NEXT:    xvcvuxddp vs0, v2
+; CHECK-P9-NEXT:    xvcvuxddp vs1, v3
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test4elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    addis r5, r2, .LCPI1_0 at toc@ha
+; CHECK-BE-NEXT:    addis r6, r2, .LCPI1_1 at toc@ha
+; CHECK-BE-NEXT:    mtvsrd v4, r4
+; CHECK-BE-NEXT:    xxlxor v5, v5, v5
+; CHECK-BE-NEXT:    addi r5, r5, .LCPI1_0 at toc@l
+; CHECK-BE-NEXT:    addi r6, r6, .LCPI1_1 at toc@l
+; CHECK-BE-NEXT:    lxvx v2, 0, r5
+; CHECK-BE-NEXT:    lxvx v3, 0, r6
+; CHECK-BE-NEXT:    vperm v2, v4, v5, v2
+; CHECK-BE-NEXT:    vperm v3, v5, v4, v3
+; CHECK-BE-NEXT:    xvcvuxddp vs0, v2
+; CHECK-BE-NEXT:    xvcvuxddp vs1, v3
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = bitcast i64 %a.coerce to <4 x i16>
+  %1 = uitofp <4 x i16> %0 to <4 x double>
+  store <4 x double> %1, <4 x double>* %agg.result, align 32
+  ret void
+}
+
+define void @test8elt(<8 x double>* noalias nocapture sret %agg.result, <8 x i16> %a) local_unnamed_addr #2 {
+; CHECK-P8-LABEL: test8elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI2_0 at toc@ha
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI2_2 at toc@ha
+; CHECK-P8-NEXT:    xxlxor v4, v4, v4
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI2_0 at toc@l
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI2_2 at toc@l
+; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI2_3 at toc@ha
+; CHECK-P8-NEXT:    lvx v5, 0, r5
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI2_1 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI2_3 at toc@l
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI2_1 at toc@l
+; CHECK-P8-NEXT:    lvx v0, 0, r4
+; CHECK-P8-NEXT:    lvx v1, 0, r5
+; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    li r5, 32
+; CHECK-P8-NEXT:    vperm v3, v4, v2, v3
+; CHECK-P8-NEXT:    vperm v5, v4, v2, v5
+; CHECK-P8-NEXT:    vperm v0, v4, v2, v0
+; CHECK-P8-NEXT:    vperm v2, v4, v2, v1
+; CHECK-P8-NEXT:    xvcvuxddp vs0, v3
+; CHECK-P8-NEXT:    xvcvuxddp vs1, v5
+; CHECK-P8-NEXT:    xvcvuxddp vs2, v0
+; CHECK-P8-NEXT:    xvcvuxddp vs3, v2
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    xxswapd vs2, vs2
+; CHECK-P8-NEXT:    xxswapd vs3, vs3
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test8elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI2_0 at toc@ha
+; CHECK-P9-NEXT:    addis r5, r2, .LCPI2_1 at toc@ha
+; CHECK-P9-NEXT:    addis r6, r2, .LCPI2_2 at toc@ha
+; CHECK-P9-NEXT:    addis r7, r2, .LCPI2_3 at toc@ha
+; CHECK-P9-NEXT:    xxlxor v1, v1, v1
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI2_0 at toc@l
+; CHECK-P9-NEXT:    addi r5, r5, .LCPI2_1 at toc@l
+; CHECK-P9-NEXT:    addi r6, r6, .LCPI2_2 at toc@l
+; CHECK-P9-NEXT:    addi r7, r7, .LCPI2_3 at toc@l
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    lxvx v4, 0, r5
+; CHECK-P9-NEXT:    lxvx v5, 0, r6
+; CHECK-P9-NEXT:    lxvx v0, 0, r7
+; CHECK-P9-NEXT:    vperm v3, v1, v2, v3
+; CHECK-P9-NEXT:    vperm v4, v1, v2, v4
+; CHECK-P9-NEXT:    vperm v5, v1, v2, v5
+; CHECK-P9-NEXT:    vperm v2, v1, v2, v0
+; CHECK-P9-NEXT:    xvcvuxddp vs0, v3
+; CHECK-P9-NEXT:    xvcvuxddp vs1, v4
+; CHECK-P9-NEXT:    xvcvuxddp vs2, v5
+; CHECK-P9-NEXT:    xvcvuxddp vs3, v2
+; CHECK-P9-NEXT:    stxv vs3, 48(r3)
+; CHECK-P9-NEXT:    stxv vs2, 32(r3)
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test8elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI2_0 at toc@ha
+; CHECK-BE-NEXT:    addis r5, r2, .LCPI2_1 at toc@ha
+; CHECK-BE-NEXT:    addis r6, r2, .LCPI2_2 at toc@ha
+; CHECK-BE-NEXT:    addis r7, r2, .LCPI2_3 at toc@ha
+; CHECK-BE-NEXT:    xxlxor v1, v1, v1
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI2_0 at toc@l
+; CHECK-BE-NEXT:    addi r5, r5, .LCPI2_1 at toc@l
+; CHECK-BE-NEXT:    addi r6, r6, .LCPI2_2 at toc@l
+; CHECK-BE-NEXT:    addi r7, r7, .LCPI2_3 at toc@l
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    lxvx v4, 0, r5
+; CHECK-BE-NEXT:    lxvx v5, 0, r6
+; CHECK-BE-NEXT:    lxvx v0, 0, r7
+; CHECK-BE-NEXT:    vperm v3, v2, v1, v3
+; CHECK-BE-NEXT:    vperm v4, v1, v2, v4
+; CHECK-BE-NEXT:    vperm v5, v1, v2, v5
+; CHECK-BE-NEXT:    vperm v2, v1, v2, v0
+; CHECK-BE-NEXT:    xvcvuxddp vs0, v3
+; CHECK-BE-NEXT:    xvcvuxddp vs1, v4
+; CHECK-BE-NEXT:    xvcvuxddp vs2, v5
+; CHECK-BE-NEXT:    xvcvuxddp vs3, v2
+; CHECK-BE-NEXT:    stxv vs3, 48(r3)
+; CHECK-BE-NEXT:    stxv vs2, 32(r3)
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = uitofp <8 x i16> %a to <8 x double>
+  store <8 x double> %0, <8 x double>* %agg.result, align 64
+  ret void
+}
+
+define void @test16elt(<16 x double>* noalias nocapture sret %agg.result, <16 x i16>* nocapture readonly) local_unnamed_addr #3 {
+; CHECK-P8-LABEL: test16elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    addis r6, r2, .LCPI3_2 at toc@ha
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI3_0 at toc@ha
+; CHECK-P8-NEXT:    lvx v4, 0, r4
+; CHECK-P8-NEXT:    xxlxor v3, v3, v3
+; CHECK-P8-NEXT:    addi r6, r6, .LCPI3_2 at toc@l
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI3_0 at toc@l
+; CHECK-P8-NEXT:    lvx v5, 0, r6
+; CHECK-P8-NEXT:    li r6, 16
+; CHECK-P8-NEXT:    lvx v2, 0, r5
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI3_1 at toc@ha
+; CHECK-P8-NEXT:    lvx v0, r4, r6
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_3 at toc@ha
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI3_1 at toc@l
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_3 at toc@l
+; CHECK-P8-NEXT:    lvx v1, 0, r5
+; CHECK-P8-NEXT:    li r5, 96
+; CHECK-P8-NEXT:    lvx v8, 0, r4
+; CHECK-P8-NEXT:    vperm v6, v3, v4, v2
+; CHECK-P8-NEXT:    li r4, 112
+; CHECK-P8-NEXT:    vperm v7, v3, v4, v5
+; CHECK-P8-NEXT:    vperm v2, v3, v0, v2
+; CHECK-P8-NEXT:    vperm v9, v3, v0, v1
+; CHECK-P8-NEXT:    vperm v5, v3, v0, v5
+; CHECK-P8-NEXT:    vperm v0, v3, v0, v8
+; CHECK-P8-NEXT:    vperm v1, v3, v4, v1
+; CHECK-P8-NEXT:    vperm v3, v3, v4, v8
+; CHECK-P8-NEXT:    xvcvuxddp vs1, v2
+; CHECK-P8-NEXT:    xvcvuxddp vs4, v9
+; CHECK-P8-NEXT:    xvcvuxddp vs2, v5
+; CHECK-P8-NEXT:    xvcvuxddp vs3, v0
+; CHECK-P8-NEXT:    xvcvuxddp vs0, v7
+; CHECK-P8-NEXT:    xvcvuxddp vs5, v3
+; CHECK-P8-NEXT:    xvcvuxddp vs6, v6
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    xvcvuxddp vs7, v1
+; CHECK-P8-NEXT:    xxswapd vs4, vs4
+; CHECK-P8-NEXT:    xxswapd vs2, vs2
+; CHECK-P8-NEXT:    xxswapd vs3, vs3
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xxswapd vs5, vs5
+; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r5
+; CHECK-P8-NEXT:    li r4, 80
+; CHECK-P8-NEXT:    li r5, 64
+; CHECK-P8-NEXT:    xxswapd vs2, vs7
+; CHECK-P8-NEXT:    xxswapd vs3, vs6
+; CHECK-P8-NEXT:    stxvd2x vs4, r3, r4
+; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r5
+; CHECK-P8-NEXT:    li r5, 32
+; CHECK-P8-NEXT:    stxvd2x vs5, r3, r4
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r6
+; CHECK-P8-NEXT:    stxvd2x vs3, 0, r3
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test16elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    addis r5, r2, .LCPI3_0 at toc@ha
+; CHECK-P9-NEXT:    addis r6, r2, .LCPI3_1 at toc@ha
+; CHECK-P9-NEXT:    addis r7, r2, .LCPI3_2 at toc@ha
+; CHECK-P9-NEXT:    addis r8, r2, .LCPI3_3 at toc@ha
+; CHECK-P9-NEXT:    lxv v0, 0(r4)
+; CHECK-P9-NEXT:    lxv v1, 16(r4)
+; CHECK-P9-NEXT:    xxlxor v6, v6, v6
+; CHECK-P9-NEXT:    addi r5, r5, .LCPI3_0 at toc@l
+; CHECK-P9-NEXT:    addi r6, r6, .LCPI3_1 at toc@l
+; CHECK-P9-NEXT:    addi r7, r7, .LCPI3_2 at toc@l
+; CHECK-P9-NEXT:    addi r8, r8, .LCPI3_3 at toc@l
+; CHECK-P9-NEXT:    lxvx v2, 0, r5
+; CHECK-P9-NEXT:    lxvx v3, 0, r6
+; CHECK-P9-NEXT:    lxvx v4, 0, r7
+; CHECK-P9-NEXT:    lxvx v5, 0, r8
+; CHECK-P9-NEXT:    vperm v7, v6, v0, v2
+; CHECK-P9-NEXT:    vperm v8, v6, v0, v3
+; CHECK-P9-NEXT:    vperm v9, v6, v0, v4
+; CHECK-P9-NEXT:    vperm v0, v6, v0, v5
+; CHECK-P9-NEXT:    vperm v2, v6, v1, v2
+; CHECK-P9-NEXT:    vperm v3, v6, v1, v3
+; CHECK-P9-NEXT:    vperm v4, v6, v1, v4
+; CHECK-P9-NEXT:    vperm v5, v6, v1, v5
+; CHECK-P9-NEXT:    xvcvuxddp vs0, v7
+; CHECK-P9-NEXT:    xvcvuxddp vs1, v8
+; CHECK-P9-NEXT:    xvcvuxddp vs2, v9
+; CHECK-P9-NEXT:    xvcvuxddp vs3, v0
+; CHECK-P9-NEXT:    xvcvuxddp vs4, v2
+; CHECK-P9-NEXT:    xvcvuxddp vs5, v3
+; CHECK-P9-NEXT:    xvcvuxddp vs6, v4
+; CHECK-P9-NEXT:    xvcvuxddp vs7, v5
+; CHECK-P9-NEXT:    stxv vs3, 48(r3)
+; CHECK-P9-NEXT:    stxv vs2, 32(r3)
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    stxv vs7, 112(r3)
+; CHECK-P9-NEXT:    stxv vs6, 96(r3)
+; CHECK-P9-NEXT:    stxv vs5, 80(r3)
+; CHECK-P9-NEXT:    stxv vs4, 64(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test16elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    addis r5, r2, .LCPI3_0 at toc@ha
+; CHECK-BE-NEXT:    addis r6, r2, .LCPI3_1 at toc@ha
+; CHECK-BE-NEXT:    addis r7, r2, .LCPI3_2 at toc@ha
+; CHECK-BE-NEXT:    addis r8, r2, .LCPI3_3 at toc@ha
+; CHECK-BE-NEXT:    lxv v0, 0(r4)
+; CHECK-BE-NEXT:    lxv v1, 16(r4)
+; CHECK-BE-NEXT:    xxlxor v6, v6, v6
+; CHECK-BE-NEXT:    addi r5, r5, .LCPI3_0 at toc@l
+; CHECK-BE-NEXT:    addi r6, r6, .LCPI3_1 at toc@l
+; CHECK-BE-NEXT:    addi r7, r7, .LCPI3_2 at toc@l
+; CHECK-BE-NEXT:    addi r8, r8, .LCPI3_3 at toc@l
+; CHECK-BE-NEXT:    lxvx v2, 0, r5
+; CHECK-BE-NEXT:    lxvx v3, 0, r6
+; CHECK-BE-NEXT:    lxvx v4, 0, r7
+; CHECK-BE-NEXT:    lxvx v5, 0, r8
+; CHECK-BE-NEXT:    vperm v7, v0, v6, v2
+; CHECK-BE-NEXT:    vperm v8, v6, v0, v3
+; CHECK-BE-NEXT:    vperm v9, v6, v0, v4
+; CHECK-BE-NEXT:    vperm v0, v6, v0, v5
+; CHECK-BE-NEXT:    vperm v2, v1, v6, v2
+; CHECK-BE-NEXT:    vperm v3, v6, v1, v3
+; CHECK-BE-NEXT:    vperm v4, v6, v1, v4
+; CHECK-BE-NEXT:    vperm v5, v6, v1, v5
+; CHECK-BE-NEXT:    xvcvuxddp vs0, v7
+; CHECK-BE-NEXT:    xvcvuxddp vs1, v8
+; CHECK-BE-NEXT:    xvcvuxddp vs2, v9
+; CHECK-BE-NEXT:    xvcvuxddp vs3, v0
+; CHECK-BE-NEXT:    xvcvuxddp vs4, v2
+; CHECK-BE-NEXT:    xvcvuxddp vs5, v3
+; CHECK-BE-NEXT:    xvcvuxddp vs6, v4
+; CHECK-BE-NEXT:    xvcvuxddp vs7, v5
+; CHECK-BE-NEXT:    stxv vs3, 48(r3)
+; CHECK-BE-NEXT:    stxv vs2, 32(r3)
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    stxv vs7, 112(r3)
+; CHECK-BE-NEXT:    stxv vs6, 96(r3)
+; CHECK-BE-NEXT:    stxv vs5, 80(r3)
+; CHECK-BE-NEXT:    stxv vs4, 64(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <16 x i16>, <16 x i16>* %0, align 32
+  %1 = uitofp <16 x i16> %a to <16 x double>
+  store <16 x double> %1, <16 x double>* %agg.result, align 128
+  ret void
+}
+
+define <2 x double> @test2elt_signed(i32 %a.coerce) local_unnamed_addr #0 {
+; CHECK-P8-LABEL: test2elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    clrldi r4, r3, 48
+; CHECK-P8-NEXT:    rldicl r3, r3, 48, 48
+; CHECK-P8-NEXT:    extsh r4, r4
+; CHECK-P8-NEXT:    extsh r3, r3
+; CHECK-P8-NEXT:    mtvsrwa f0, r4
+; CHECK-P8-NEXT:    mtvsrwa f1, r3
+; CHECK-P8-NEXT:    xscvsxddp f0, f0
+; CHECK-P8-NEXT:    xscvsxddp f1, f1
+; CHECK-P8-NEXT:    xxmrghd v2, vs1, vs0
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test2elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI4_0 at toc@ha
+; CHECK-P9-NEXT:    mtvsrws v3, r3
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI4_0 at toc@l
+; CHECK-P9-NEXT:    lxvx v2, 0, r4
+; CHECK-P9-NEXT:    vperm v2, v3, v3, v2
+; CHECK-P9-NEXT:    vextsh2d v2, v2
+; CHECK-P9-NEXT:    xvcvsxddp v2, v2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test2elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI4_0 at toc@ha
+; CHECK-BE-NEXT:    mtvsrws v3, r3
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI4_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v2, 0, r4
+; CHECK-BE-NEXT:    vperm v2, v3, v3, v2
+; CHECK-BE-NEXT:    vextsh2d v2, v2
+; CHECK-BE-NEXT:    xvcvsxddp v2, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = bitcast i32 %a.coerce to <2 x i16>
+  %1 = sitofp <2 x i16> %0 to <2 x double>
+  ret <2 x double> %1
+}
+
+define void @test4elt_signed(<4 x double>* noalias nocapture sret %agg.result, i64 %a.coerce) local_unnamed_addr #1 {
+; CHECK-P8-LABEL: test4elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mtvsrd f0, r4
+; CHECK-P8-NEXT:    mfvsrd r4, f0
+; CHECK-P8-NEXT:    clrldi r5, r4, 48
+; CHECK-P8-NEXT:    rldicl r6, r4, 48, 48
+; CHECK-P8-NEXT:    extsh r5, r5
+; CHECK-P8-NEXT:    extsh r6, r6
+; CHECK-P8-NEXT:    mtvsrwa f0, r5
+; CHECK-P8-NEXT:    rldicl r5, r4, 32, 48
+; CHECK-P8-NEXT:    rldicl r4, r4, 16, 48
+; CHECK-P8-NEXT:    extsh r5, r5
+; CHECK-P8-NEXT:    extsh r4, r4
+; CHECK-P8-NEXT:    mtvsrwa f1, r6
+; CHECK-P8-NEXT:    mtvsrwa f2, r5
+; CHECK-P8-NEXT:    mtvsrwa f3, r4
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    xscvsxddp f0, f0
+; CHECK-P8-NEXT:    xscvsxddp f1, f1
+; CHECK-P8-NEXT:    xscvsxddp f2, f2
+; CHECK-P8-NEXT:    xscvsxddp f3, f3
+; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P8-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test4elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    addis r5, r2, .LCPI5_0 at toc@ha
+; CHECK-P9-NEXT:    addis r6, r2, .LCPI5_1 at toc@ha
+; CHECK-P9-NEXT:    mtvsrd f0, r4
+; CHECK-P9-NEXT:    addi r5, r5, .LCPI5_0 at toc@l
+; CHECK-P9-NEXT:    addi r6, r6, .LCPI5_1 at toc@l
+; CHECK-P9-NEXT:    xxswapd v4, vs0
+; CHECK-P9-NEXT:    lxvx v2, 0, r5
+; CHECK-P9-NEXT:    lxvx v3, 0, r6
+; CHECK-P9-NEXT:    vperm v2, v4, v4, v2
+; CHECK-P9-NEXT:    vperm v3, v4, v4, v3
+; CHECK-P9-NEXT:    vextsh2d v2, v2
+; CHECK-P9-NEXT:    vextsh2d v3, v3
+; CHECK-P9-NEXT:    xvcvsxddp vs0, v2
+; CHECK-P9-NEXT:    xvcvsxddp vs1, v3
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test4elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    addis r5, r2, .LCPI5_0 at toc@ha
+; CHECK-BE-NEXT:    addis r6, r2, .LCPI5_1 at toc@ha
+; CHECK-BE-NEXT:    mtvsrd v4, r4
+; CHECK-BE-NEXT:    xxlxor v5, v5, v5
+; CHECK-BE-NEXT:    addi r5, r5, .LCPI5_0 at toc@l
+; CHECK-BE-NEXT:    addi r6, r6, .LCPI5_1 at toc@l
+; CHECK-BE-NEXT:    lxvx v2, 0, r5
+; CHECK-BE-NEXT:    lxvx v3, 0, r6
+; CHECK-BE-NEXT:    vperm v2, v5, v4, v2
+; CHECK-BE-NEXT:    vperm v3, v4, v4, v3
+; CHECK-BE-NEXT:    vextsh2d v2, v2
+; CHECK-BE-NEXT:    vextsh2d v3, v3
+; CHECK-BE-NEXT:    xvcvsxddp vs0, v2
+; CHECK-BE-NEXT:    xvcvsxddp vs1, v3
+; CHECK-BE-NEXT:    stxv vs0, 16(r3)
+; CHECK-BE-NEXT:    stxv vs1, 0(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = bitcast i64 %a.coerce to <4 x i16>
+  %1 = sitofp <4 x i16> %0 to <4 x double>
+  store <4 x double> %1, <4 x double>* %agg.result, align 32
+  ret void
+}
+
+define void @test8elt_signed(<8 x double>* noalias nocapture sret %agg.result, <8 x i16> %a) local_unnamed_addr #2 {
+; CHECK-P8-LABEL: test8elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mfvsrd r4, v2
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    clrldi r5, r4, 48
+; CHECK-P8-NEXT:    rldicl r6, r4, 48, 48
+; CHECK-P8-NEXT:    extsh r5, r5
+; CHECK-P8-NEXT:    mfvsrd r7, f0
+; CHECK-P8-NEXT:    extsh r6, r6
+; CHECK-P8-NEXT:    mtvsrwa f1, r5
+; CHECK-P8-NEXT:    rldicl r5, r4, 32, 48
+; CHECK-P8-NEXT:    rldicl r4, r4, 16, 48
+; CHECK-P8-NEXT:    extsh r5, r5
+; CHECK-P8-NEXT:    extsh r4, r4
+; CHECK-P8-NEXT:    mtvsrwa f0, r6
+; CHECK-P8-NEXT:    mtvsrwa f2, r5
+; CHECK-P8-NEXT:    clrldi r5, r7, 48
+; CHECK-P8-NEXT:    mtvsrwa f3, r4
+; CHECK-P8-NEXT:    extsh r4, r5
+; CHECK-P8-NEXT:    rldicl r5, r7, 16, 48
+; CHECK-P8-NEXT:    mtvsrwa f4, r4
+; CHECK-P8-NEXT:    rldicl r4, r7, 48, 48
+; CHECK-P8-NEXT:    extsh r5, r5
+; CHECK-P8-NEXT:    extsh r4, r4
+; CHECK-P8-NEXT:    mtvsrwa f7, r5
+; CHECK-P8-NEXT:    li r5, 32
+; CHECK-P8-NEXT:    mtvsrwa f5, r4
+; CHECK-P8-NEXT:    rldicl r4, r7, 32, 48
+; CHECK-P8-NEXT:    extsh r4, r4
+; CHECK-P8-NEXT:    xscvsxddp f1, f1
+; CHECK-P8-NEXT:    mtvsrwa f6, r4
+; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    xscvsxddp f0, f0
+; CHECK-P8-NEXT:    xscvsxddp f2, f2
+; CHECK-P8-NEXT:    xscvsxddp f3, f3
+; CHECK-P8-NEXT:    xscvsxddp f4, f4
+; CHECK-P8-NEXT:    xscvsxddp f5, f5
+; CHECK-P8-NEXT:    xscvsxddp f6, f6
+; CHECK-P8-NEXT:    xscvsxddp f7, f7
+; CHECK-P8-NEXT:    xxmrghd vs0, vs0, vs1
+; CHECK-P8-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xxmrghd vs2, vs5, vs4
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    xxmrghd vs3, vs7, vs6
+; CHECK-P8-NEXT:    xxswapd vs2, vs2
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT:    xxswapd vs3, vs3
+; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
+; CHECK-P8-NEXT:    stxvd2x vs2, 0, r3
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test8elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI6_0 at toc@ha
+; CHECK-P9-NEXT:    addis r5, r2, .LCPI6_1 at toc@ha
+; CHECK-P9-NEXT:    addis r6, r2, .LCPI6_2 at toc@ha
+; CHECK-P9-NEXT:    addis r7, r2, .LCPI6_3 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI6_0 at toc@l
+; CHECK-P9-NEXT:    addi r5, r5, .LCPI6_1 at toc@l
+; CHECK-P9-NEXT:    addi r6, r6, .LCPI6_2 at toc@l
+; CHECK-P9-NEXT:    addi r7, r7, .LCPI6_3 at toc@l
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    lxvx v4, 0, r5
+; CHECK-P9-NEXT:    lxvx v5, 0, r6
+; CHECK-P9-NEXT:    lxvx v0, 0, r7
+; CHECK-P9-NEXT:    vperm v3, v2, v2, v3
+; CHECK-P9-NEXT:    vperm v4, v2, v2, v4
+; CHECK-P9-NEXT:    vperm v5, v2, v2, v5
+; CHECK-P9-NEXT:    vperm v2, v2, v2, v0
+; CHECK-P9-NEXT:    vextsh2d v3, v3
+; CHECK-P9-NEXT:    vextsh2d v4, v4
+; CHECK-P9-NEXT:    vextsh2d v5, v5
+; CHECK-P9-NEXT:    vextsh2d v2, v2
+; CHECK-P9-NEXT:    xvcvsxddp vs0, v3
+; CHECK-P9-NEXT:    xvcvsxddp vs1, v4
+; CHECK-P9-NEXT:    xvcvsxddp vs2, v5
+; CHECK-P9-NEXT:    xvcvsxddp vs3, v2
+; CHECK-P9-NEXT:    stxv vs3, 48(r3)
+; CHECK-P9-NEXT:    stxv vs2, 32(r3)
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test8elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI6_0 at toc@ha
+; CHECK-BE-NEXT:    addis r5, r2, .LCPI6_1 at toc@ha
+; CHECK-BE-NEXT:    addis r6, r2, .LCPI6_2 at toc@ha
+; CHECK-BE-NEXT:    addis r7, r2, .LCPI6_3 at toc@ha
+; CHECK-BE-NEXT:    xxlxor v1, v1, v1
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI6_0 at toc@l
+; CHECK-BE-NEXT:    addi r5, r5, .LCPI6_1 at toc@l
+; CHECK-BE-NEXT:    addi r6, r6, .LCPI6_2 at toc@l
+; CHECK-BE-NEXT:    addi r7, r7, .LCPI6_3 at toc@l
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    lxvx v4, 0, r5
+; CHECK-BE-NEXT:    lxvx v5, 0, r6
+; CHECK-BE-NEXT:    lxvx v0, 0, r7
+; CHECK-BE-NEXT:    vperm v3, v1, v2, v3
+; CHECK-BE-NEXT:    vperm v4, v1, v2, v4
+; CHECK-BE-NEXT:    vperm v5, v2, v2, v5
+; CHECK-BE-NEXT:    vperm v2, v2, v2, v0
+; CHECK-BE-NEXT:    vextsh2d v3, v3
+; CHECK-BE-NEXT:    vextsh2d v4, v4
+; CHECK-BE-NEXT:    vextsh2d v5, v5
+; CHECK-BE-NEXT:    vextsh2d v2, v2
+; CHECK-BE-NEXT:    xvcvsxddp vs0, v3
+; CHECK-BE-NEXT:    xvcvsxddp vs1, v4
+; CHECK-BE-NEXT:    xvcvsxddp vs2, v5
+; CHECK-BE-NEXT:    xvcvsxddp vs3, v2
+; CHECK-BE-NEXT:    stxv vs1, 48(r3)
+; CHECK-BE-NEXT:    stxv vs3, 32(r3)
+; CHECK-BE-NEXT:    stxv vs0, 16(r3)
+; CHECK-BE-NEXT:    stxv vs2, 0(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = sitofp <8 x i16> %a to <8 x double>
+  store <8 x double> %0, <8 x double>* %agg.result, align 64
+  ret void
+}
+
+define void @test16elt_signed(<16 x double>* noalias nocapture sret %agg.result, <16 x i16>* nocapture readonly) local_unnamed_addr #3 {
+; CHECK-P8-LABEL: test16elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
+; CHECK-P8-NEXT:    lvx v2, r4, r5
+; CHECK-P8-NEXT:    mfvsrd r7, v3
+; CHECK-P8-NEXT:    xxswapd vs8, v3
+; CHECK-P8-NEXT:    mfvsrd r6, v2
+; CHECK-P8-NEXT:    xxswapd vs2, v2
+; CHECK-P8-NEXT:    clrldi r4, r6, 48
+; CHECK-P8-NEXT:    rldicl r8, r6, 48, 48
+; CHECK-P8-NEXT:    extsh r4, r4
+; CHECK-P8-NEXT:    extsh r8, r8
+; CHECK-P8-NEXT:    mtvsrwa f0, r4
+; CHECK-P8-NEXT:    rldicl r4, r6, 32, 48
+; CHECK-P8-NEXT:    rldicl r6, r6, 16, 48
+; CHECK-P8-NEXT:    mtvsrwa f1, r8
+; CHECK-P8-NEXT:    extsh r4, r4
+; CHECK-P8-NEXT:    clrldi r8, r7, 48
+; CHECK-P8-NEXT:    extsh r6, r6
+; CHECK-P8-NEXT:    mtvsrwa f3, r4
+; CHECK-P8-NEXT:    extsh r4, r8
+; CHECK-P8-NEXT:    mtvsrwa f4, r6
+; CHECK-P8-NEXT:    rldicl r6, r7, 48, 48
+; CHECK-P8-NEXT:    mtvsrwa f5, r4
+; CHECK-P8-NEXT:    rldicl r4, r7, 32, 48
+; CHECK-P8-NEXT:    extsh r6, r6
+; CHECK-P8-NEXT:    mfvsrd r8, f2
+; CHECK-P8-NEXT:    extsh r4, r4
+; CHECK-P8-NEXT:    mtvsrwa f2, r6
+; CHECK-P8-NEXT:    rldicl r6, r7, 16, 48
+; CHECK-P8-NEXT:    mtvsrwa f6, r4
+; CHECK-P8-NEXT:    clrldi r4, r8, 48
+; CHECK-P8-NEXT:    extsh r6, r6
+; CHECK-P8-NEXT:    extsh r4, r4
+; CHECK-P8-NEXT:    mtvsrwa f7, r6
+; CHECK-P8-NEXT:    rldicl r6, r8, 48, 48
+; CHECK-P8-NEXT:    mtvsrwa f9, r4
+; CHECK-P8-NEXT:    rldicl r4, r8, 32, 48
+; CHECK-P8-NEXT:    extsh r6, r6
+; CHECK-P8-NEXT:    extsh r4, r4
+; CHECK-P8-NEXT:    mtvsrwa f10, r6
+; CHECK-P8-NEXT:    rldicl r6, r8, 16, 48
+; CHECK-P8-NEXT:    mtvsrwa f11, r4
+; CHECK-P8-NEXT:    extsh r6, r6
+; CHECK-P8-NEXT:    mfvsrd r4, f8
+; CHECK-P8-NEXT:    mtvsrwa f8, r6
+; CHECK-P8-NEXT:    clrldi r6, r4, 48
+; CHECK-P8-NEXT:    xscvsxddp f3, f3
+; CHECK-P8-NEXT:    extsh r6, r6
+; CHECK-P8-NEXT:    xscvsxddp f4, f4
+; CHECK-P8-NEXT:    mtvsrwa f12, r6
+; CHECK-P8-NEXT:    rldicl r6, r4, 48, 48
+; CHECK-P8-NEXT:    extsh r6, r6
+; CHECK-P8-NEXT:    xscvsxddp f0, f0
+; CHECK-P8-NEXT:    mtvsrwa f13, r6
+; CHECK-P8-NEXT:    rldicl r6, r4, 32, 48
+; CHECK-P8-NEXT:    rldicl r4, r4, 16, 48
+; CHECK-P8-NEXT:    xscvsxddp f1, f1
+; CHECK-P8-NEXT:    extsh r6, r6
+; CHECK-P8-NEXT:    extsh r4, r4
+; CHECK-P8-NEXT:    xscvsxddp f5, f5
+; CHECK-P8-NEXT:    xscvsxddp f2, f2
+; CHECK-P8-NEXT:    xxmrghd vs3, vs4, vs3
+; CHECK-P8-NEXT:    mtvsrwa v2, r6
+; CHECK-P8-NEXT:    li r6, 32
+; CHECK-P8-NEXT:    mtvsrwa v3, r4
+; CHECK-P8-NEXT:    li r4, 112
+; CHECK-P8-NEXT:    xscvsxddp f6, f6
+; CHECK-P8-NEXT:    xscvsxddp f7, f7
+; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P8-NEXT:    xscvsxddp f9, f9
+; CHECK-P8-NEXT:    xscvsxddp f10, f10
+; CHECK-P8-NEXT:    xxmrghd vs1, vs2, vs5
+; CHECK-P8-NEXT:    xscvsxddp f11, f11
+; CHECK-P8-NEXT:    xxswapd vs2, vs3
+; CHECK-P8-NEXT:    xscvsxddp f8, f8
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xscvsxddp f12, f12
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    xscvsxddp f13, f13
+; CHECK-P8-NEXT:    xxmrghd vs3, vs7, vs6
+; CHECK-P8-NEXT:    xscvsxddp f4, v2
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
+; CHECK-P8-NEXT:    li r4, 96
+; CHECK-P8-NEXT:    xscvsxddp f31, v3
+; CHECK-P8-NEXT:    xxmrghd vs5, vs10, vs9
+; CHECK-P8-NEXT:    xxswapd vs3, vs3
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r4
+; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    xxmrghd vs6, vs8, vs11
+; CHECK-P8-NEXT:    xxmrghd vs7, vs13, vs12
+; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
+; CHECK-P8-NEXT:    li r4, 80
+; CHECK-P8-NEXT:    xxswapd vs0, vs6
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r6
+; CHECK-P8-NEXT:    li r6, 64
+; CHECK-P8-NEXT:    xxmrghd vs2, vs31, vs4
+; CHECK-P8-NEXT:    xxswapd vs4, vs5
+; CHECK-P8-NEXT:    xxswapd vs5, vs7
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r4
+; CHECK-P8-NEXT:    xxswapd vs2, vs2
+; CHECK-P8-NEXT:    stxvd2x vs4, r3, r6
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs5, 0, r3
+; CHECK-P8-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test16elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    addis r5, r2, .LCPI7_0 at toc@ha
+; CHECK-P9-NEXT:    addis r6, r2, .LCPI7_1 at toc@ha
+; CHECK-P9-NEXT:    addis r7, r2, .LCPI7_2 at toc@ha
+; CHECK-P9-NEXT:    addis r8, r2, .LCPI7_3 at toc@ha
+; CHECK-P9-NEXT:    lxv v0, 0(r4)
+; CHECK-P9-NEXT:    lxv v1, 16(r4)
+; CHECK-P9-NEXT:    addi r5, r5, .LCPI7_0 at toc@l
+; CHECK-P9-NEXT:    addi r6, r6, .LCPI7_1 at toc@l
+; CHECK-P9-NEXT:    addi r7, r7, .LCPI7_2 at toc@l
+; CHECK-P9-NEXT:    addi r8, r8, .LCPI7_3 at toc@l
+; CHECK-P9-NEXT:    lxvx v2, 0, r5
+; CHECK-P9-NEXT:    lxvx v3, 0, r6
+; CHECK-P9-NEXT:    lxvx v4, 0, r7
+; CHECK-P9-NEXT:    lxvx v5, 0, r8
+; CHECK-P9-NEXT:    vperm v6, v0, v0, v2
+; CHECK-P9-NEXT:    vperm v7, v0, v0, v3
+; CHECK-P9-NEXT:    vperm v8, v0, v0, v4
+; CHECK-P9-NEXT:    vperm v0, v0, v0, v5
+; CHECK-P9-NEXT:    vperm v2, v1, v1, v2
+; CHECK-P9-NEXT:    vperm v3, v1, v1, v3
+; CHECK-P9-NEXT:    vperm v4, v1, v1, v4
+; CHECK-P9-NEXT:    vperm v5, v1, v1, v5
+; CHECK-P9-NEXT:    vextsh2d v1, v6
+; CHECK-P9-NEXT:    vextsh2d v6, v7
+; CHECK-P9-NEXT:    vextsh2d v7, v8
+; CHECK-P9-NEXT:    vextsh2d v0, v0
+; CHECK-P9-NEXT:    vextsh2d v2, v2
+; CHECK-P9-NEXT:    vextsh2d v3, v3
+; CHECK-P9-NEXT:    vextsh2d v4, v4
+; CHECK-P9-NEXT:    vextsh2d v5, v5
+; CHECK-P9-NEXT:    xvcvsxddp vs0, v1
+; CHECK-P9-NEXT:    xvcvsxddp vs1, v6
+; CHECK-P9-NEXT:    xvcvsxddp vs2, v7
+; CHECK-P9-NEXT:    xvcvsxddp vs3, v0
+; CHECK-P9-NEXT:    xvcvsxddp vs4, v2
+; CHECK-P9-NEXT:    xvcvsxddp vs5, v3
+; CHECK-P9-NEXT:    xvcvsxddp vs6, v4
+; CHECK-P9-NEXT:    xvcvsxddp vs7, v5
+; CHECK-P9-NEXT:    stxv vs3, 48(r3)
+; CHECK-P9-NEXT:    stxv vs2, 32(r3)
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    stxv vs7, 112(r3)
+; CHECK-P9-NEXT:    stxv vs6, 96(r3)
+; CHECK-P9-NEXT:    stxv vs5, 80(r3)
+; CHECK-P9-NEXT:    stxv vs4, 64(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test16elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    addis r5, r2, .LCPI7_0 at toc@ha
+; CHECK-BE-NEXT:    addis r6, r2, .LCPI7_1 at toc@ha
+; CHECK-BE-NEXT:    addis r7, r2, .LCPI7_2 at toc@ha
+; CHECK-BE-NEXT:    addis r8, r2, .LCPI7_3 at toc@ha
+; CHECK-BE-NEXT:    lxv v2, 16(r4)
+; CHECK-BE-NEXT:    lxv v3, 0(r4)
+; CHECK-BE-NEXT:    xxlxor v6, v6, v6
+; CHECK-BE-NEXT:    addi r5, r5, .LCPI7_0 at toc@l
+; CHECK-BE-NEXT:    addi r6, r6, .LCPI7_1 at toc@l
+; CHECK-BE-NEXT:    addi r7, r7, .LCPI7_2 at toc@l
+; CHECK-BE-NEXT:    addi r8, r8, .LCPI7_3 at toc@l
+; CHECK-BE-NEXT:    lxvx v4, 0, r5
+; CHECK-BE-NEXT:    lxvx v5, 0, r6
+; CHECK-BE-NEXT:    lxvx v0, 0, r7
+; CHECK-BE-NEXT:    lxvx v1, 0, r8
+; CHECK-BE-NEXT:    vperm v7, v6, v3, v4
+; CHECK-BE-NEXT:    vperm v8, v6, v3, v5
+; CHECK-BE-NEXT:    vperm v4, v6, v2, v4
+; CHECK-BE-NEXT:    vperm v5, v6, v2, v5
+; CHECK-BE-NEXT:    vperm v6, v3, v3, v0
+; CHECK-BE-NEXT:    vperm v3, v3, v3, v1
+; CHECK-BE-NEXT:    vperm v0, v2, v2, v0
+; CHECK-BE-NEXT:    vperm v2, v2, v2, v1
+; CHECK-BE-NEXT:    vextsh2d v1, v7
+; CHECK-BE-NEXT:    vextsh2d v7, v8
+; CHECK-BE-NEXT:    vextsh2d v4, v4
+; CHECK-BE-NEXT:    vextsh2d v5, v5
+; CHECK-BE-NEXT:    vextsh2d v6, v6
+; CHECK-BE-NEXT:    vextsh2d v3, v3
+; CHECK-BE-NEXT:    vextsh2d v0, v0
+; CHECK-BE-NEXT:    vextsh2d v2, v2
+; CHECK-BE-NEXT:    xvcvsxddp vs0, v1
+; CHECK-BE-NEXT:    xvcvsxddp vs1, v7
+; CHECK-BE-NEXT:    xvcvsxddp vs2, v4
+; CHECK-BE-NEXT:    xvcvsxddp vs3, v5
+; CHECK-BE-NEXT:    xvcvsxddp vs4, v6
+; CHECK-BE-NEXT:    xvcvsxddp vs5, v3
+; CHECK-BE-NEXT:    xvcvsxddp vs6, v0
+; CHECK-BE-NEXT:    xvcvsxddp vs7, v2
+; CHECK-BE-NEXT:    stxv vs3, 112(r3)
+; CHECK-BE-NEXT:    stxv vs2, 80(r3)
+; CHECK-BE-NEXT:    stxv vs1, 48(r3)
+; CHECK-BE-NEXT:    stxv vs0, 16(r3)
+; CHECK-BE-NEXT:    stxv vs7, 96(r3)
+; CHECK-BE-NEXT:    stxv vs6, 64(r3)
+; CHECK-BE-NEXT:    stxv vs5, 32(r3)
+; CHECK-BE-NEXT:    stxv vs4, 0(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <16 x i16>, <16 x i16>* %0, align 32
+  %1 = sitofp <16 x i16> %a to <16 x double>
+  store <16 x double> %1, <16 x double>* %agg.result, align 128
+  ret void
+}

Added: llvm/trunk/test/CodeGen/PowerPC/vec_conv_i32_to_fp64_elts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vec_conv_i32_to_fp64_elts.ll?rev=347090&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vec_conv_i32_to_fp64_elts.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/vec_conv_i32_to_fp64_elts.ll Fri Nov 16 12:24:10 2018
@@ -0,0 +1,518 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:     -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-P8
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-P9
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-BE
+
+define <2 x double> @test2elt(i64 %a.coerce) local_unnamed_addr #0 {
+; CHECK-P8-LABEL: test2elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxmrglw v2, v2, v2
+; CHECK-P8-NEXT:    xvcvuxwdp v2, v2
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test2elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    xxmrglw v2, v2, v2
+; CHECK-P9-NEXT:    xvcvuxwdp v2, v2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test2elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    mtvsrd f0, r3
+; CHECK-BE-NEXT:    xxmrghw v2, vs0, vs0
+; CHECK-BE-NEXT:    xvcvuxwdp v2, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = bitcast i64 %a.coerce to <2 x i32>
+  %1 = uitofp <2 x i32> %0 to <2 x double>
+  ret <2 x double> %1
+}
+
+define void @test4elt(<4 x double>* noalias nocapture sret %agg.result, <4 x i32> %a) local_unnamed_addr #1 {
+; CHECK-P8-LABEL: test4elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    xxmrglw v3, v2, v2
+; CHECK-P8-NEXT:    xxmrghw v2, v2, v2
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    xvcvuxwdp vs0, v3
+; CHECK-P8-NEXT:    xvcvuxwdp vs1, v2
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test4elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    xxmrglw v3, v2, v2
+; CHECK-P9-NEXT:    xxmrghw v2, v2, v2
+; CHECK-P9-NEXT:    xvcvuxwdp vs0, v3
+; CHECK-P9-NEXT:    xvcvuxwdp vs1, v2
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test4elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xxmrghw v3, v2, v2
+; CHECK-BE-NEXT:    xxmrglw v2, v2, v2
+; CHECK-BE-NEXT:    xvcvuxwdp vs0, v3
+; CHECK-BE-NEXT:    xvcvuxwdp vs1, v2
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = uitofp <4 x i32> %a to <4 x double>
+  store <4 x double> %0, <4 x double>* %agg.result, align 32
+  ret void
+}
+
+define void @test8elt(<8 x double>* noalias nocapture sret %agg.result, <8 x i32>* nocapture readonly) local_unnamed_addr #2 {
+; CHECK-P8-LABEL: test8elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    li r6, 32
+; CHECK-P8-NEXT:    lvx v2, r4, r5
+; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    xxmrglw v5, v3, v3
+; CHECK-P8-NEXT:    xxmrghw v3, v3, v3
+; CHECK-P8-NEXT:    xxmrglw v4, v2, v2
+; CHECK-P8-NEXT:    xxmrghw v2, v2, v2
+; CHECK-P8-NEXT:    xvcvuxwdp vs2, v5
+; CHECK-P8-NEXT:    xvcvuxwdp vs0, v4
+; CHECK-P8-NEXT:    xvcvuxwdp vs1, v2
+; CHECK-P8-NEXT:    xvcvuxwdp vs3, v3
+; CHECK-P8-NEXT:    xxswapd vs2, vs2
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    xxswapd vs3, vs3
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r6
+; CHECK-P8-NEXT:    stxvd2x vs3, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs2, 0, r3
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test8elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs0, 16(r4)
+; CHECK-P9-NEXT:    lxv vs1, 0(r4)
+; CHECK-P9-NEXT:    xxmrglw v2, vs1, vs1
+; CHECK-P9-NEXT:    xxmrghw v3, vs1, vs1
+; CHECK-P9-NEXT:    xxmrglw v4, vs0, vs0
+; CHECK-P9-NEXT:    xxmrghw v5, vs0, vs0
+; CHECK-P9-NEXT:    xvcvuxwdp vs0, v2
+; CHECK-P9-NEXT:    xvcvuxwdp vs1, v3
+; CHECK-P9-NEXT:    xvcvuxwdp vs2, v4
+; CHECK-P9-NEXT:    xvcvuxwdp vs3, v5
+; CHECK-P9-NEXT:    stxv vs3, 48(r3)
+; CHECK-P9-NEXT:    stxv vs2, 32(r3)
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test8elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs0, 16(r4)
+; CHECK-BE-NEXT:    lxv vs1, 0(r4)
+; CHECK-BE-NEXT:    xxmrghw v2, vs1, vs1
+; CHECK-BE-NEXT:    xxmrglw v3, vs1, vs1
+; CHECK-BE-NEXT:    xxmrghw v4, vs0, vs0
+; CHECK-BE-NEXT:    xxmrglw v5, vs0, vs0
+; CHECK-BE-NEXT:    xvcvuxwdp vs0, v2
+; CHECK-BE-NEXT:    xvcvuxwdp vs1, v3
+; CHECK-BE-NEXT:    xvcvuxwdp vs2, v4
+; CHECK-BE-NEXT:    xvcvuxwdp vs3, v5
+; CHECK-BE-NEXT:    stxv vs3, 48(r3)
+; CHECK-BE-NEXT:    stxv vs2, 32(r3)
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <8 x i32>, <8 x i32>* %0, align 32
+  %1 = uitofp <8 x i32> %a to <8 x double>
+  store <8 x double> %1, <8 x double>* %agg.result, align 64
+  ret void
+}
+
+define void @test16elt(<16 x double>* noalias nocapture sret %agg.result, <16 x i32>* nocapture readonly) local_unnamed_addr #2 {
+; CHECK-P8-LABEL: test16elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    li r6, 48
+; CHECK-P8-NEXT:    li r7, 32
+; CHECK-P8-NEXT:    li r8, 64
+; CHECK-P8-NEXT:    lvx v2, r4, r5
+; CHECK-P8-NEXT:    lvx v3, r4, r6
+; CHECK-P8-NEXT:    lvx v0, r4, r7
+; CHECK-P8-NEXT:    xxmrglw v4, v2, v2
+; CHECK-P8-NEXT:    xxmrghw v5, v3, v3
+; CHECK-P8-NEXT:    xxmrghw v2, v2, v2
+; CHECK-P8-NEXT:    xxmrglw v3, v3, v3
+; CHECK-P8-NEXT:    xvcvuxwdp vs0, v4
+; CHECK-P8-NEXT:    lvx v4, 0, r4
+; CHECK-P8-NEXT:    li r4, 112
+; CHECK-P8-NEXT:    xvcvuxwdp vs1, v5
+; CHECK-P8-NEXT:    xxmrghw v5, v0, v0
+; CHECK-P8-NEXT:    xxmrglw v0, v0, v0
+; CHECK-P8-NEXT:    xvcvuxwdp vs2, v2
+; CHECK-P8-NEXT:    xxmrglw v2, v4, v4
+; CHECK-P8-NEXT:    xvcvuxwdp vs3, v3
+; CHECK-P8-NEXT:    xxmrghw v3, v4, v4
+; CHECK-P8-NEXT:    xvcvuxwdp vs4, v5
+; CHECK-P8-NEXT:    xvcvuxwdp vs5, v0
+; CHECK-P8-NEXT:    xvcvuxwdp vs6, v2
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xvcvuxwdp vs7, v3
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    xxswapd vs2, vs2
+; CHECK-P8-NEXT:    xxswapd vs3, vs3
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    li r4, 96
+; CHECK-P8-NEXT:    xxswapd vs4, vs4
+; CHECK-P8-NEXT:    xxswapd vs1, vs5
+; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
+; CHECK-P8-NEXT:    xxswapd vs5, vs6
+; CHECK-P8-NEXT:    li r4, 80
+; CHECK-P8-NEXT:    xxswapd vs3, vs7
+; CHECK-P8-NEXT:    stxvd2x vs4, r3, r4
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r8
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r6
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r7
+; CHECK-P8-NEXT:    stxvd2x vs3, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs5, 0, r3
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test16elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs0, 16(r4)
+; CHECK-P9-NEXT:    lxv vs1, 0(r4)
+; CHECK-P9-NEXT:    lxv vs2, 48(r4)
+; CHECK-P9-NEXT:    lxv vs3, 32(r4)
+; CHECK-P9-NEXT:    xxmrglw v2, vs1, vs1
+; CHECK-P9-NEXT:    xxmrghw v3, vs1, vs1
+; CHECK-P9-NEXT:    xxmrglw v4, vs0, vs0
+; CHECK-P9-NEXT:    xxmrghw v5, vs0, vs0
+; CHECK-P9-NEXT:    xxmrglw v0, vs3, vs3
+; CHECK-P9-NEXT:    xxmrghw v1, vs3, vs3
+; CHECK-P9-NEXT:    xxmrglw v6, vs2, vs2
+; CHECK-P9-NEXT:    xxmrghw v7, vs2, vs2
+; CHECK-P9-NEXT:    xvcvuxwdp vs0, v2
+; CHECK-P9-NEXT:    xvcvuxwdp vs1, v3
+; CHECK-P9-NEXT:    xvcvuxwdp vs2, v4
+; CHECK-P9-NEXT:    xvcvuxwdp vs3, v5
+; CHECK-P9-NEXT:    xvcvuxwdp vs4, v0
+; CHECK-P9-NEXT:    xvcvuxwdp vs5, v1
+; CHECK-P9-NEXT:    xvcvuxwdp vs6, v6
+; CHECK-P9-NEXT:    xvcvuxwdp vs7, v7
+; CHECK-P9-NEXT:    stxv vs3, 48(r3)
+; CHECK-P9-NEXT:    stxv vs2, 32(r3)
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    stxv vs7, 112(r3)
+; CHECK-P9-NEXT:    stxv vs6, 96(r3)
+; CHECK-P9-NEXT:    stxv vs5, 80(r3)
+; CHECK-P9-NEXT:    stxv vs4, 64(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test16elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs0, 16(r4)
+; CHECK-BE-NEXT:    lxv vs1, 0(r4)
+; CHECK-BE-NEXT:    lxv vs2, 48(r4)
+; CHECK-BE-NEXT:    lxv vs3, 32(r4)
+; CHECK-BE-NEXT:    xxmrghw v2, vs1, vs1
+; CHECK-BE-NEXT:    xxmrglw v3, vs1, vs1
+; CHECK-BE-NEXT:    xxmrghw v4, vs0, vs0
+; CHECK-BE-NEXT:    xxmrglw v5, vs0, vs0
+; CHECK-BE-NEXT:    xxmrghw v0, vs3, vs3
+; CHECK-BE-NEXT:    xxmrglw v1, vs3, vs3
+; CHECK-BE-NEXT:    xxmrghw v6, vs2, vs2
+; CHECK-BE-NEXT:    xxmrglw v7, vs2, vs2
+; CHECK-BE-NEXT:    xvcvuxwdp vs0, v2
+; CHECK-BE-NEXT:    xvcvuxwdp vs1, v3
+; CHECK-BE-NEXT:    xvcvuxwdp vs2, v4
+; CHECK-BE-NEXT:    xvcvuxwdp vs3, v5
+; CHECK-BE-NEXT:    xvcvuxwdp vs4, v0
+; CHECK-BE-NEXT:    xvcvuxwdp vs5, v1
+; CHECK-BE-NEXT:    xvcvuxwdp vs6, v6
+; CHECK-BE-NEXT:    xvcvuxwdp vs7, v7
+; CHECK-BE-NEXT:    stxv vs3, 48(r3)
+; CHECK-BE-NEXT:    stxv vs2, 32(r3)
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    stxv vs7, 112(r3)
+; CHECK-BE-NEXT:    stxv vs6, 96(r3)
+; CHECK-BE-NEXT:    stxv vs5, 80(r3)
+; CHECK-BE-NEXT:    stxv vs4, 64(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <16 x i32>, <16 x i32>* %0, align 64
+  %1 = uitofp <16 x i32> %a to <16 x double>
+  store <16 x double> %1, <16 x double>* %agg.result, align 128
+  ret void
+}
+
+define <2 x double> @test2elt_signed(i64 %a.coerce) local_unnamed_addr #0 {
+; CHECK-P8-LABEL: test2elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxmrglw v2, v2, v2
+; CHECK-P8-NEXT:    xvcvsxwdp v2, v2
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test2elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    xxmrglw v2, v2, v2
+; CHECK-P9-NEXT:    xvcvsxwdp v2, v2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test2elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    mtvsrd f0, r3
+; CHECK-BE-NEXT:    xxmrghw v2, vs0, vs0
+; CHECK-BE-NEXT:    xvcvsxwdp v2, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = bitcast i64 %a.coerce to <2 x i32>
+  %1 = sitofp <2 x i32> %0 to <2 x double>
+  ret <2 x double> %1
+}
+
+define void @test4elt_signed(<4 x double>* noalias nocapture sret %agg.result, <4 x i32> %a) local_unnamed_addr #1 {
+; CHECK-P8-LABEL: test4elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    xxmrglw v3, v2, v2
+; CHECK-P8-NEXT:    xxmrghw v2, v2, v2
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    xvcvsxwdp vs0, v3
+; CHECK-P8-NEXT:    xvcvsxwdp vs1, v2
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test4elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    xxmrglw v3, v2, v2
+; CHECK-P9-NEXT:    xxmrghw v2, v2, v2
+; CHECK-P9-NEXT:    xvcvsxwdp vs0, v3
+; CHECK-P9-NEXT:    xvcvsxwdp vs1, v2
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test4elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xxmrghw v3, v2, v2
+; CHECK-BE-NEXT:    xxmrglw v2, v2, v2
+; CHECK-BE-NEXT:    xvcvsxwdp vs0, v3
+; CHECK-BE-NEXT:    xvcvsxwdp vs1, v2
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = sitofp <4 x i32> %a to <4 x double>
+  store <4 x double> %0, <4 x double>* %agg.result, align 32
+  ret void
+}
+
+define void @test8elt_signed(<8 x double>* noalias nocapture sret %agg.result, <8 x i32>* nocapture readonly) local_unnamed_addr #2 {
+; CHECK-P8-LABEL: test8elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    li r6, 32
+; CHECK-P8-NEXT:    lvx v2, r4, r5
+; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    xxmrglw v5, v3, v3
+; CHECK-P8-NEXT:    xxmrghw v3, v3, v3
+; CHECK-P8-NEXT:    xxmrglw v4, v2, v2
+; CHECK-P8-NEXT:    xxmrghw v2, v2, v2
+; CHECK-P8-NEXT:    xvcvsxwdp vs2, v5
+; CHECK-P8-NEXT:    xvcvsxwdp vs0, v4
+; CHECK-P8-NEXT:    xvcvsxwdp vs1, v2
+; CHECK-P8-NEXT:    xvcvsxwdp vs3, v3
+; CHECK-P8-NEXT:    xxswapd vs2, vs2
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    xxswapd vs3, vs3
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r6
+; CHECK-P8-NEXT:    stxvd2x vs3, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs2, 0, r3
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test8elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs0, 16(r4)
+; CHECK-P9-NEXT:    lxv vs1, 0(r4)
+; CHECK-P9-NEXT:    xxmrglw v2, vs1, vs1
+; CHECK-P9-NEXT:    xxmrghw v3, vs1, vs1
+; CHECK-P9-NEXT:    xxmrglw v4, vs0, vs0
+; CHECK-P9-NEXT:    xxmrghw v5, vs0, vs0
+; CHECK-P9-NEXT:    xvcvsxwdp vs0, v2
+; CHECK-P9-NEXT:    xvcvsxwdp vs1, v3
+; CHECK-P9-NEXT:    xvcvsxwdp vs2, v4
+; CHECK-P9-NEXT:    xvcvsxwdp vs3, v5
+; CHECK-P9-NEXT:    stxv vs3, 48(r3)
+; CHECK-P9-NEXT:    stxv vs2, 32(r3)
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test8elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs0, 16(r4)
+; CHECK-BE-NEXT:    lxv vs1, 0(r4)
+; CHECK-BE-NEXT:    xxmrghw v2, vs1, vs1
+; CHECK-BE-NEXT:    xxmrglw v3, vs1, vs1
+; CHECK-BE-NEXT:    xxmrghw v4, vs0, vs0
+; CHECK-BE-NEXT:    xxmrglw v5, vs0, vs0
+; CHECK-BE-NEXT:    xvcvsxwdp vs0, v2
+; CHECK-BE-NEXT:    xvcvsxwdp vs1, v3
+; CHECK-BE-NEXT:    xvcvsxwdp vs2, v4
+; CHECK-BE-NEXT:    xvcvsxwdp vs3, v5
+; CHECK-BE-NEXT:    stxv vs3, 48(r3)
+; CHECK-BE-NEXT:    stxv vs2, 32(r3)
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <8 x i32>, <8 x i32>* %0, align 32
+  %1 = sitofp <8 x i32> %a to <8 x double>
+  store <8 x double> %1, <8 x double>* %agg.result, align 64
+  ret void
+}
+
+define void @test16elt_signed(<16 x double>* noalias nocapture sret %agg.result, <16 x i32>* nocapture readonly) local_unnamed_addr #2 {
+; CHECK-P8-LABEL: test16elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    li r6, 48
+; CHECK-P8-NEXT:    li r7, 32
+; CHECK-P8-NEXT:    li r8, 64
+; CHECK-P8-NEXT:    lvx v2, r4, r5
+; CHECK-P8-NEXT:    lvx v3, r4, r6
+; CHECK-P8-NEXT:    lvx v0, r4, r7
+; CHECK-P8-NEXT:    xxmrglw v4, v2, v2
+; CHECK-P8-NEXT:    xxmrghw v5, v3, v3
+; CHECK-P8-NEXT:    xxmrghw v2, v2, v2
+; CHECK-P8-NEXT:    xxmrglw v3, v3, v3
+; CHECK-P8-NEXT:    xvcvsxwdp vs0, v4
+; CHECK-P8-NEXT:    lvx v4, 0, r4
+; CHECK-P8-NEXT:    li r4, 112
+; CHECK-P8-NEXT:    xvcvsxwdp vs1, v5
+; CHECK-P8-NEXT:    xxmrghw v5, v0, v0
+; CHECK-P8-NEXT:    xxmrglw v0, v0, v0
+; CHECK-P8-NEXT:    xvcvsxwdp vs2, v2
+; CHECK-P8-NEXT:    xxmrglw v2, v4, v4
+; CHECK-P8-NEXT:    xvcvsxwdp vs3, v3
+; CHECK-P8-NEXT:    xxmrghw v3, v4, v4
+; CHECK-P8-NEXT:    xvcvsxwdp vs4, v5
+; CHECK-P8-NEXT:    xvcvsxwdp vs5, v0
+; CHECK-P8-NEXT:    xvcvsxwdp vs6, v2
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xvcvsxwdp vs7, v3
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    xxswapd vs2, vs2
+; CHECK-P8-NEXT:    xxswapd vs3, vs3
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    li r4, 96
+; CHECK-P8-NEXT:    xxswapd vs4, vs4
+; CHECK-P8-NEXT:    xxswapd vs1, vs5
+; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
+; CHECK-P8-NEXT:    xxswapd vs5, vs6
+; CHECK-P8-NEXT:    li r4, 80
+; CHECK-P8-NEXT:    xxswapd vs3, vs7
+; CHECK-P8-NEXT:    stxvd2x vs4, r3, r4
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r8
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r6
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r7
+; CHECK-P8-NEXT:    stxvd2x vs3, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs5, 0, r3
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test16elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs0, 16(r4)
+; CHECK-P9-NEXT:    lxv vs1, 0(r4)
+; CHECK-P9-NEXT:    lxv vs2, 48(r4)
+; CHECK-P9-NEXT:    lxv vs3, 32(r4)
+; CHECK-P9-NEXT:    xxmrglw v2, vs1, vs1
+; CHECK-P9-NEXT:    xxmrghw v3, vs1, vs1
+; CHECK-P9-NEXT:    xxmrglw v4, vs0, vs0
+; CHECK-P9-NEXT:    xxmrghw v5, vs0, vs0
+; CHECK-P9-NEXT:    xxmrglw v0, vs3, vs3
+; CHECK-P9-NEXT:    xxmrghw v1, vs3, vs3
+; CHECK-P9-NEXT:    xxmrglw v6, vs2, vs2
+; CHECK-P9-NEXT:    xxmrghw v7, vs2, vs2
+; CHECK-P9-NEXT:    xvcvsxwdp vs0, v2
+; CHECK-P9-NEXT:    xvcvsxwdp vs1, v3
+; CHECK-P9-NEXT:    xvcvsxwdp vs2, v4
+; CHECK-P9-NEXT:    xvcvsxwdp vs3, v5
+; CHECK-P9-NEXT:    xvcvsxwdp vs4, v0
+; CHECK-P9-NEXT:    xvcvsxwdp vs5, v1
+; CHECK-P9-NEXT:    xvcvsxwdp vs6, v6
+; CHECK-P9-NEXT:    xvcvsxwdp vs7, v7
+; CHECK-P9-NEXT:    stxv vs3, 48(r3)
+; CHECK-P9-NEXT:    stxv vs2, 32(r3)
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    stxv vs7, 112(r3)
+; CHECK-P9-NEXT:    stxv vs6, 96(r3)
+; CHECK-P9-NEXT:    stxv vs5, 80(r3)
+; CHECK-P9-NEXT:    stxv vs4, 64(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test16elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs0, 16(r4)
+; CHECK-BE-NEXT:    lxv vs1, 0(r4)
+; CHECK-BE-NEXT:    lxv vs2, 48(r4)
+; CHECK-BE-NEXT:    lxv vs3, 32(r4)
+; CHECK-BE-NEXT:    xxmrghw v2, vs1, vs1
+; CHECK-BE-NEXT:    xxmrglw v3, vs1, vs1
+; CHECK-BE-NEXT:    xxmrghw v4, vs0, vs0
+; CHECK-BE-NEXT:    xxmrglw v5, vs0, vs0
+; CHECK-BE-NEXT:    xxmrghw v0, vs3, vs3
+; CHECK-BE-NEXT:    xxmrglw v1, vs3, vs3
+; CHECK-BE-NEXT:    xxmrghw v6, vs2, vs2
+; CHECK-BE-NEXT:    xxmrglw v7, vs2, vs2
+; CHECK-BE-NEXT:    xvcvsxwdp vs0, v2
+; CHECK-BE-NEXT:    xvcvsxwdp vs1, v3
+; CHECK-BE-NEXT:    xvcvsxwdp vs2, v4
+; CHECK-BE-NEXT:    xvcvsxwdp vs3, v5
+; CHECK-BE-NEXT:    xvcvsxwdp vs4, v0
+; CHECK-BE-NEXT:    xvcvsxwdp vs5, v1
+; CHECK-BE-NEXT:    xvcvsxwdp vs6, v6
+; CHECK-BE-NEXT:    xvcvsxwdp vs7, v7
+; CHECK-BE-NEXT:    stxv vs3, 48(r3)
+; CHECK-BE-NEXT:    stxv vs2, 32(r3)
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    stxv vs7, 112(r3)
+; CHECK-BE-NEXT:    stxv vs6, 96(r3)
+; CHECK-BE-NEXT:    stxv vs5, 80(r3)
+; CHECK-BE-NEXT:    stxv vs4, 64(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <16 x i32>, <16 x i32>* %0, align 64
+  %1 = sitofp <16 x i32> %a to <16 x double>
+  store <16 x double> %1, <16 x double>* %agg.result, align 128
+  ret void
+}

Added: llvm/trunk/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll?rev=347090&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll Fri Nov 16 12:24:10 2018
@@ -0,0 +1,844 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:     -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-P8
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-P9
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-BE
+
+define i64 @test2elt(<2 x i64> %a) local_unnamed_addr #0 {
+; CHECK-P8-LABEL: test2elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    xxlor vs1, v2, v2
+; CHECK-P8-NEXT:    xscvuxdsp f1, f1
+; CHECK-P8-NEXT:    xscvuxdsp f0, f0
+; CHECK-P8-NEXT:    xscvdpspn vs1, f1
+; CHECK-P8-NEXT:    xscvdpspn vs0, f0
+; CHECK-P8-NEXT:    xxsldwi v3, vs1, vs1, 1
+; CHECK-P8-NEXT:    xxsldwi v2, vs0, vs0, 1
+; CHECK-P8-NEXT:    vmrglw v2, v3, v2
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test2elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    xxswapd vs0, v2
+; CHECK-P9-NEXT:    xxlor vs1, v2, v2
+; CHECK-P9-NEXT:    xscvuxdsp f1, f1
+; CHECK-P9-NEXT:    xscvuxdsp f0, f0
+; CHECK-P9-NEXT:    xscvdpspn vs1, f1
+; CHECK-P9-NEXT:    xscvdpspn vs0, f0
+; CHECK-P9-NEXT:    xxsldwi v3, vs1, vs1, 1
+; CHECK-P9-NEXT:    xxsldwi v2, vs0, vs0, 1
+; CHECK-P9-NEXT:    vmrglw v2, v3, v2
+; CHECK-P9-NEXT:    mfvsrld r3, v2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test2elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xxswapd vs0, v2
+; CHECK-BE-NEXT:    xxlor vs1, v2, v2
+; CHECK-BE-NEXT:    xscvuxdsp f1, f1
+; CHECK-BE-NEXT:    xscvuxdsp f0, f0
+; CHECK-BE-NEXT:    xscvdpspn v2, f1
+; CHECK-BE-NEXT:    xscvdpspn v3, f0
+; CHECK-BE-NEXT:    vmrghw v2, v2, v3
+; CHECK-BE-NEXT:    mfvsrd r3, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = uitofp <2 x i64> %a to <2 x float>
+  %1 = bitcast <2 x float> %0 to i64
+  ret i64 %1
+}
+
+define <4 x float> @test4elt(<4 x i64>* nocapture readonly) local_unnamed_addr #1 {
+; CHECK-P8-LABEL: test4elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, r3, r4
+; CHECK-P8-NEXT:    xxswapd vs3, vs1
+; CHECK-P8-NEXT:    xscvuxdsp f1, f1
+; CHECK-P8-NEXT:    xxswapd vs2, vs0
+; CHECK-P8-NEXT:    xscvuxdsp f0, f0
+; CHECK-P8-NEXT:    xscvuxdsp f3, f3
+; CHECK-P8-NEXT:    xscvuxdsp f2, f2
+; CHECK-P8-NEXT:    xxmrghd vs0, vs0, vs1
+; CHECK-P8-NEXT:    xxmrghd vs1, vs2, vs3
+; CHECK-P8-NEXT:    xvcvdpsp v2, vs0
+; CHECK-P8-NEXT:    xvcvdpsp v3, vs1
+; CHECK-P8-NEXT:    vmrgew v2, v3, v2
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test4elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs0, 16(r3)
+; CHECK-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-P9-NEXT:    xxswapd vs2, vs1
+; CHECK-P9-NEXT:    xxswapd vs3, vs0
+; CHECK-P9-NEXT:    xscvuxdsp f1, f1
+; CHECK-P9-NEXT:    xscvuxdsp f0, f0
+; CHECK-P9-NEXT:    xscvuxdsp f2, f2
+; CHECK-P9-NEXT:    xscvuxdsp f3, f3
+; CHECK-P9-NEXT:    xxmrghd vs0, vs0, vs1
+; CHECK-P9-NEXT:    xxmrghd vs2, vs3, vs2
+; CHECK-P9-NEXT:    xvcvdpsp v3, vs0
+; CHECK-P9-NEXT:    xvcvdpsp v2, vs2
+; CHECK-P9-NEXT:    vmrgew v2, v3, v2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test4elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    lxv vs1, 16(r3)
+; CHECK-BE-NEXT:    xxswapd vs2, vs1
+; CHECK-BE-NEXT:    xxswapd vs3, vs0
+; CHECK-BE-NEXT:    xscvuxdsp f1, f1
+; CHECK-BE-NEXT:    xscvuxdsp f0, f0
+; CHECK-BE-NEXT:    xscvuxdsp f2, f2
+; CHECK-BE-NEXT:    xscvuxdsp f3, f3
+; CHECK-BE-NEXT:    xxmrghd vs0, vs0, vs1
+; CHECK-BE-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-BE-NEXT:    xvcvdpsp v2, vs0
+; CHECK-BE-NEXT:    xvcvdpsp v3, vs1
+; CHECK-BE-NEXT:    vmrgew v2, v2, v3
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <4 x i64>, <4 x i64>* %0, align 32
+  %1 = uitofp <4 x i64> %a to <4 x float>
+  ret <4 x float> %1
+}
+
+define void @test8elt(<8 x float>* noalias nocapture sret %agg.result, <8 x i64>* nocapture readonly) local_unnamed_addr #2 {
+; CHECK-P8-LABEL: test8elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r5, 32
+; CHECK-P8-NEXT:    li r6, 48
+; CHECK-P8-NEXT:    lxvd2x vs3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
+; CHECK-P8-NEXT:    lxvd2x vs2, r4, r5
+; CHECK-P8-NEXT:    xxswapd vs7, vs3
+; CHECK-P8-NEXT:    xscvuxdsp f3, f3
+; CHECK-P8-NEXT:    xxswapd vs4, vs0
+; CHECK-P8-NEXT:    xscvuxdsp f0, f0
+; CHECK-P8-NEXT:    xxswapd vs5, vs1
+; CHECK-P8-NEXT:    xscvuxdsp f1, f1
+; CHECK-P8-NEXT:    xxswapd vs6, vs2
+; CHECK-P8-NEXT:    xscvuxdsp f2, f2
+; CHECK-P8-NEXT:    xscvuxdsp f4, f4
+; CHECK-P8-NEXT:    xscvuxdsp f5, f5
+; CHECK-P8-NEXT:    xscvuxdsp f6, f6
+; CHECK-P8-NEXT:    xscvuxdsp f7, f7
+; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P8-NEXT:    xxmrghd vs1, vs2, vs3
+; CHECK-P8-NEXT:    xxmrghd vs2, vs5, vs4
+; CHECK-P8-NEXT:    xvcvdpsp v2, vs0
+; CHECK-P8-NEXT:    xvcvdpsp v3, vs1
+; CHECK-P8-NEXT:    xxmrghd vs0, vs6, vs7
+; CHECK-P8-NEXT:    xvcvdpsp v4, vs2
+; CHECK-P8-NEXT:    xvcvdpsp v5, vs0
+; CHECK-P8-NEXT:    vmrgew v2, v4, v2
+; CHECK-P8-NEXT:    vmrgew v3, v5, v3
+; CHECK-P8-NEXT:    stvx v2, r3, r5
+; CHECK-P8-NEXT:    stvx v3, 0, r3
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test8elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs0, 48(r4)
+; CHECK-P9-NEXT:    lxv vs1, 32(r4)
+; CHECK-P9-NEXT:    lxv vs2, 16(r4)
+; CHECK-P9-NEXT:    lxv vs3, 0(r4)
+; CHECK-P9-NEXT:    xxswapd vs4, vs3
+; CHECK-P9-NEXT:    xxswapd vs5, vs2
+; CHECK-P9-NEXT:    xxswapd vs6, vs1
+; CHECK-P9-NEXT:    xxswapd vs7, vs0
+; CHECK-P9-NEXT:    xscvuxdsp f3, f3
+; CHECK-P9-NEXT:    xscvuxdsp f2, f2
+; CHECK-P9-NEXT:    xscvuxdsp f1, f1
+; CHECK-P9-NEXT:    xscvuxdsp f0, f0
+; CHECK-P9-NEXT:    xscvuxdsp f4, f4
+; CHECK-P9-NEXT:    xscvuxdsp f5, f5
+; CHECK-P9-NEXT:    xscvuxdsp f6, f6
+; CHECK-P9-NEXT:    xscvuxdsp f7, f7
+; CHECK-P9-NEXT:    xxmrghd vs2, vs2, vs3
+; CHECK-P9-NEXT:    xxmrghd vs0, vs0, vs1
+; CHECK-P9-NEXT:    xxmrghd vs4, vs5, vs4
+; CHECK-P9-NEXT:    xxmrghd vs3, vs7, vs6
+; CHECK-P9-NEXT:    xvcvdpsp v3, vs2
+; CHECK-P9-NEXT:    xvcvdpsp v5, vs0
+; CHECK-P9-NEXT:    xvcvdpsp v2, vs4
+; CHECK-P9-NEXT:    xvcvdpsp v4, vs3
+; CHECK-P9-NEXT:    vmrgew v2, v3, v2
+; CHECK-P9-NEXT:    vmrgew v3, v5, v4
+; CHECK-P9-NEXT:    stxv v3, 16(r3)
+; CHECK-P9-NEXT:    stxv v2, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test8elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs0, 32(r4)
+; CHECK-BE-NEXT:    lxv vs1, 48(r4)
+; CHECK-BE-NEXT:    lxv vs2, 0(r4)
+; CHECK-BE-NEXT:    lxv vs3, 16(r4)
+; CHECK-BE-NEXT:    xxswapd vs4, vs3
+; CHECK-BE-NEXT:    xxswapd vs5, vs2
+; CHECK-BE-NEXT:    xxswapd vs6, vs1
+; CHECK-BE-NEXT:    xxswapd vs7, vs0
+; CHECK-BE-NEXT:    xscvuxdsp f3, f3
+; CHECK-BE-NEXT:    xscvuxdsp f2, f2
+; CHECK-BE-NEXT:    xscvuxdsp f1, f1
+; CHECK-BE-NEXT:    xscvuxdsp f0, f0
+; CHECK-BE-NEXT:    xscvuxdsp f4, f4
+; CHECK-BE-NEXT:    xscvuxdsp f5, f5
+; CHECK-BE-NEXT:    xscvuxdsp f6, f6
+; CHECK-BE-NEXT:    xscvuxdsp f7, f7
+; CHECK-BE-NEXT:    xxmrghd vs2, vs2, vs3
+; CHECK-BE-NEXT:    xxmrghd vs0, vs0, vs1
+; CHECK-BE-NEXT:    xxmrghd vs3, vs5, vs4
+; CHECK-BE-NEXT:    xxmrghd vs1, vs7, vs6
+; CHECK-BE-NEXT:    xvcvdpsp v2, vs2
+; CHECK-BE-NEXT:    xvcvdpsp v4, vs0
+; CHECK-BE-NEXT:    xvcvdpsp v3, vs3
+; CHECK-BE-NEXT:    xvcvdpsp v5, vs1
+; CHECK-BE-NEXT:    vmrgew v2, v2, v3
+; CHECK-BE-NEXT:    vmrgew v3, v4, v5
+; CHECK-BE-NEXT:    stxv v3, 16(r3)
+; CHECK-BE-NEXT:    stxv v2, 0(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <8 x i64>, <8 x i64>* %0, align 64
+  %1 = uitofp <8 x i64> %a to <8 x float>
+  store <8 x float> %1, <8 x float>* %agg.result, align 32
+  ret void
+}
+
+define void @test16elt(<16 x float>* noalias nocapture sret %agg.result, <16 x i64>* nocapture readonly) local_unnamed_addr #2 {
+; CHECK-P8-LABEL: test16elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r7, 64
+; CHECK-P8-NEXT:    li r5, 32
+; CHECK-P8-NEXT:    li r6, 48
+; CHECK-P8-NEXT:    lxvd2x vs11, 0, r4
+; CHECK-P8-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
+; CHECK-P8-NEXT:    lxvd2x vs8, r4, r7
+; CHECK-P8-NEXT:    li r7, 80
+; CHECK-P8-NEXT:    lxvd2x vs6, r4, r5
+; CHECK-P8-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
+; CHECK-P8-NEXT:    lxvd2x vs7, r4, r6
+; CHECK-P8-NEXT:    lxvd2x vs2, r4, r7
+; CHECK-P8-NEXT:    li r7, 96
+; CHECK-P8-NEXT:    lxvd2x vs3, r4, r7
+; CHECK-P8-NEXT:    li r7, 112
+; CHECK-P8-NEXT:    xscvuxdsp f30, f11
+; CHECK-P8-NEXT:    xxswapd vs11, vs11
+; CHECK-P8-NEXT:    lxvd2x vs4, r4, r7
+; CHECK-P8-NEXT:    li r7, 16
+; CHECK-P8-NEXT:    xscvuxdsp f0, f6
+; CHECK-P8-NEXT:    xxswapd vs6, vs6
+; CHECK-P8-NEXT:    xscvuxdsp f1, f7
+; CHECK-P8-NEXT:    lxvd2x vs9, r4, r7
+; CHECK-P8-NEXT:    xxswapd vs7, vs7
+; CHECK-P8-NEXT:    xscvuxdsp f5, f8
+; CHECK-P8-NEXT:    xxswapd vs8, vs8
+; CHECK-P8-NEXT:    xscvuxdsp f10, f2
+; CHECK-P8-NEXT:    xxswapd vs2, vs2
+; CHECK-P8-NEXT:    xscvuxdsp f12, f3
+; CHECK-P8-NEXT:    xxswapd vs3, vs3
+; CHECK-P8-NEXT:    xscvuxdsp f13, f4
+; CHECK-P8-NEXT:    xxswapd vs4, vs4
+; CHECK-P8-NEXT:    xscvuxdsp f31, f9
+; CHECK-P8-NEXT:    xxswapd vs9, vs9
+; CHECK-P8-NEXT:    xscvuxdsp f6, f6
+; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P8-NEXT:    xscvuxdsp f7, f7
+; CHECK-P8-NEXT:    xscvuxdsp f8, f8
+; CHECK-P8-NEXT:    xxmrghd vs5, vs10, vs5
+; CHECK-P8-NEXT:    xscvuxdsp f2, f2
+; CHECK-P8-NEXT:    xscvuxdsp f3, f3
+; CHECK-P8-NEXT:    xxmrghd vs10, vs13, vs12
+; CHECK-P8-NEXT:    xscvuxdsp f4, f4
+; CHECK-P8-NEXT:    xscvuxdsp f1, f9
+; CHECK-P8-NEXT:    xscvuxdsp f9, f11
+; CHECK-P8-NEXT:    xxmrghd vs11, vs31, vs30
+; CHECK-P8-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
+; CHECK-P8-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
+; CHECK-P8-NEXT:    xvcvdpsp v2, vs0
+; CHECK-P8-NEXT:    xxmrghd vs0, vs7, vs6
+; CHECK-P8-NEXT:    xxmrghd vs2, vs2, vs8
+; CHECK-P8-NEXT:    xvcvdpsp v3, vs5
+; CHECK-P8-NEXT:    xvcvdpsp v4, vs10
+; CHECK-P8-NEXT:    xxmrghd vs3, vs4, vs3
+; CHECK-P8-NEXT:    xvcvdpsp v5, vs11
+; CHECK-P8-NEXT:    xvcvdpsp v0, vs0
+; CHECK-P8-NEXT:    xxmrghd vs1, vs1, vs9
+; CHECK-P8-NEXT:    xvcvdpsp v1, vs2
+; CHECK-P8-NEXT:    xvcvdpsp v6, vs3
+; CHECK-P8-NEXT:    xvcvdpsp v7, vs1
+; CHECK-P8-NEXT:    vmrgew v2, v0, v2
+; CHECK-P8-NEXT:    vmrgew v3, v1, v3
+; CHECK-P8-NEXT:    vmrgew v4, v6, v4
+; CHECK-P8-NEXT:    vmrgew v5, v7, v5
+; CHECK-P8-NEXT:    stvx v2, r3, r7
+; CHECK-P8-NEXT:    stvx v3, r3, r5
+; CHECK-P8-NEXT:    stvx v4, r3, r6
+; CHECK-P8-NEXT:    stvx v5, 0, r3
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test16elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs4, 48(r4)
+; CHECK-P9-NEXT:    lxv vs5, 32(r4)
+; CHECK-P9-NEXT:    lxv vs6, 16(r4)
+; CHECK-P9-NEXT:    lxv vs7, 0(r4)
+; CHECK-P9-NEXT:    lxv vs8, 112(r4)
+; CHECK-P9-NEXT:    lxv vs9, 96(r4)
+; CHECK-P9-NEXT:    lxv vs10, 80(r4)
+; CHECK-P9-NEXT:    lxv vs11, 64(r4)
+; CHECK-P9-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    xxswapd vs0, vs7
+; CHECK-P9-NEXT:    xxswapd vs1, vs6
+; CHECK-P9-NEXT:    xxswapd vs2, vs5
+; CHECK-P9-NEXT:    xxswapd vs3, vs4
+; CHECK-P9-NEXT:    xxswapd vs12, vs11
+; CHECK-P9-NEXT:    xxswapd vs13, vs10
+; CHECK-P9-NEXT:    xxswapd vs31, vs9
+; CHECK-P9-NEXT:    xxswapd vs30, vs8
+; CHECK-P9-NEXT:    xscvuxdsp f7, f7
+; CHECK-P9-NEXT:    xscvuxdsp f6, f6
+; CHECK-P9-NEXT:    xscvuxdsp f5, f5
+; CHECK-P9-NEXT:    xscvuxdsp f4, f4
+; CHECK-P9-NEXT:    xscvuxdsp f11, f11
+; CHECK-P9-NEXT:    xscvuxdsp f10, f10
+; CHECK-P9-NEXT:    xscvuxdsp f9, f9
+; CHECK-P9-NEXT:    xscvuxdsp f8, f8
+; CHECK-P9-NEXT:    xscvuxdsp f0, f0
+; CHECK-P9-NEXT:    xscvuxdsp f1, f1
+; CHECK-P9-NEXT:    xscvuxdsp f2, f2
+; CHECK-P9-NEXT:    xscvuxdsp f3, f3
+; CHECK-P9-NEXT:    xscvuxdsp f12, f12
+; CHECK-P9-NEXT:    xscvuxdsp f13, f13
+; CHECK-P9-NEXT:    xscvuxdsp f31, f31
+; CHECK-P9-NEXT:    xscvuxdsp f30, f30
+; CHECK-P9-NEXT:    xxmrghd vs6, vs6, vs7
+; CHECK-P9-NEXT:    xxmrghd vs4, vs4, vs5
+; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P9-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-P9-NEXT:    xxmrghd vs2, vs10, vs11
+; CHECK-P9-NEXT:    xxmrghd vs3, vs8, vs9
+; CHECK-P9-NEXT:    xxmrghd vs5, vs13, vs12
+; CHECK-P9-NEXT:    xxmrghd vs7, vs30, vs31
+; CHECK-P9-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    xvcvdpsp v2, vs6
+; CHECK-P9-NEXT:    xvcvdpsp v3, vs4
+; CHECK-P9-NEXT:    xvcvdpsp v4, vs0
+; CHECK-P9-NEXT:    xvcvdpsp v5, vs1
+; CHECK-P9-NEXT:    xvcvdpsp v0, vs5
+; CHECK-P9-NEXT:    xvcvdpsp v1, vs2
+; CHECK-P9-NEXT:    xvcvdpsp v6, vs7
+; CHECK-P9-NEXT:    xvcvdpsp v7, vs3
+; CHECK-P9-NEXT:    vmrgew v2, v2, v4
+; CHECK-P9-NEXT:    vmrgew v3, v3, v5
+; CHECK-P9-NEXT:    vmrgew v4, v1, v0
+; CHECK-P9-NEXT:    vmrgew v5, v7, v6
+; CHECK-P9-NEXT:    stxv v4, 32(r3)
+; CHECK-P9-NEXT:    stxv v3, 16(r3)
+; CHECK-P9-NEXT:    stxv v2, 0(r3)
+; CHECK-P9-NEXT:    stxv v5, 48(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test16elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs2, 32(r4)
+; CHECK-BE-NEXT:    lxv vs3, 48(r4)
+; CHECK-BE-NEXT:    lxv vs4, 0(r4)
+; CHECK-BE-NEXT:    lxv vs5, 16(r4)
+; CHECK-BE-NEXT:    lxv vs6, 96(r4)
+; CHECK-BE-NEXT:    lxv vs7, 112(r4)
+; CHECK-BE-NEXT:    lxv vs8, 64(r4)
+; CHECK-BE-NEXT:    lxv vs9, 80(r4)
+; CHECK-BE-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    xxswapd vs0, vs5
+; CHECK-BE-NEXT:    xxswapd vs1, vs4
+; CHECK-BE-NEXT:    xxswapd vs10, vs3
+; CHECK-BE-NEXT:    xxswapd vs11, vs2
+; CHECK-BE-NEXT:    xxswapd vs12, vs9
+; CHECK-BE-NEXT:    xxswapd vs13, vs8
+; CHECK-BE-NEXT:    xxswapd vs31, vs7
+; CHECK-BE-NEXT:    xxswapd vs30, vs6
+; CHECK-BE-NEXT:    xscvuxdsp f5, f5
+; CHECK-BE-NEXT:    xscvuxdsp f4, f4
+; CHECK-BE-NEXT:    xscvuxdsp f3, f3
+; CHECK-BE-NEXT:    xscvuxdsp f2, f2
+; CHECK-BE-NEXT:    xscvuxdsp f9, f9
+; CHECK-BE-NEXT:    xscvuxdsp f8, f8
+; CHECK-BE-NEXT:    xscvuxdsp f7, f7
+; CHECK-BE-NEXT:    xscvuxdsp f6, f6
+; CHECK-BE-NEXT:    xscvuxdsp f0, f0
+; CHECK-BE-NEXT:    xscvuxdsp f1, f1
+; CHECK-BE-NEXT:    xscvuxdsp f10, f10
+; CHECK-BE-NEXT:    xscvuxdsp f11, f11
+; CHECK-BE-NEXT:    xscvuxdsp f12, f12
+; CHECK-BE-NEXT:    xscvuxdsp f13, f13
+; CHECK-BE-NEXT:    xscvuxdsp f31, f31
+; CHECK-BE-NEXT:    xscvuxdsp f30, f30
+; CHECK-BE-NEXT:    xxmrghd vs4, vs4, vs5
+; CHECK-BE-NEXT:    xxmrghd vs2, vs2, vs3
+; CHECK-BE-NEXT:    xxmrghd vs3, vs8, vs9
+; CHECK-BE-NEXT:    xxmrghd vs5, vs6, vs7
+; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-BE-NEXT:    xxmrghd vs1, vs11, vs10
+; CHECK-BE-NEXT:    xxmrghd vs6, vs13, vs12
+; CHECK-BE-NEXT:    xxmrghd vs7, vs30, vs31
+; CHECK-BE-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    xvcvdpsp v2, vs4
+; CHECK-BE-NEXT:    xvcvdpsp v3, vs2
+; CHECK-BE-NEXT:    xvcvdpsp v0, vs3
+; CHECK-BE-NEXT:    xvcvdpsp v6, vs5
+; CHECK-BE-NEXT:    xvcvdpsp v4, vs0
+; CHECK-BE-NEXT:    xvcvdpsp v5, vs1
+; CHECK-BE-NEXT:    xvcvdpsp v1, vs6
+; CHECK-BE-NEXT:    xvcvdpsp v7, vs7
+; CHECK-BE-NEXT:    vmrgew v2, v2, v4
+; CHECK-BE-NEXT:    vmrgew v3, v3, v5
+; CHECK-BE-NEXT:    vmrgew v4, v0, v1
+; CHECK-BE-NEXT:    vmrgew v5, v6, v7
+; CHECK-BE-NEXT:    stxv v5, 48(r3)
+; CHECK-BE-NEXT:    stxv v4, 32(r3)
+; CHECK-BE-NEXT:    stxv v3, 16(r3)
+; CHECK-BE-NEXT:    stxv v2, 0(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <16 x i64>, <16 x i64>* %0, align 128
+  %1 = uitofp <16 x i64> %a to <16 x float>
+  store <16 x float> %1, <16 x float>* %agg.result, align 64
+  ret void
+}
+
+define i64 @test2elt_signed(<2 x i64> %a) local_unnamed_addr #0 {
+; CHECK-P8-LABEL: test2elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    xxlor vs1, v2, v2
+; CHECK-P8-NEXT:    xscvsxdsp f1, f1
+; CHECK-P8-NEXT:    xscvsxdsp f0, f0
+; CHECK-P8-NEXT:    xscvdpspn vs1, f1
+; CHECK-P8-NEXT:    xscvdpspn vs0, f0
+; CHECK-P8-NEXT:    xxsldwi v3, vs1, vs1, 1
+; CHECK-P8-NEXT:    xxsldwi v2, vs0, vs0, 1
+; CHECK-P8-NEXT:    vmrglw v2, v3, v2
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test2elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    xxswapd vs0, v2
+; CHECK-P9-NEXT:    xxlor vs1, v2, v2
+; CHECK-P9-NEXT:    xscvsxdsp f1, f1
+; CHECK-P9-NEXT:    xscvsxdsp f0, f0
+; CHECK-P9-NEXT:    xscvdpspn vs1, f1
+; CHECK-P9-NEXT:    xscvdpspn vs0, f0
+; CHECK-P9-NEXT:    xxsldwi v3, vs1, vs1, 1
+; CHECK-P9-NEXT:    xxsldwi v2, vs0, vs0, 1
+; CHECK-P9-NEXT:    vmrglw v2, v3, v2
+; CHECK-P9-NEXT:    mfvsrld r3, v2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test2elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xxswapd vs0, v2
+; CHECK-BE-NEXT:    xxlor vs1, v2, v2
+; CHECK-BE-NEXT:    xscvsxdsp f1, f1
+; CHECK-BE-NEXT:    xscvsxdsp f0, f0
+; CHECK-BE-NEXT:    xscvdpspn v2, f1
+; CHECK-BE-NEXT:    xscvdpspn v3, f0
+; CHECK-BE-NEXT:    vmrghw v2, v2, v3
+; CHECK-BE-NEXT:    mfvsrd r3, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = sitofp <2 x i64> %a to <2 x float>
+  %1 = bitcast <2 x float> %0 to i64
+  ret i64 %1
+}
+
+define <4 x float> @test4elt_signed(<4 x i64>* nocapture readonly) local_unnamed_addr #1 {
+; CHECK-P8-LABEL: test4elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, r3, r4
+; CHECK-P8-NEXT:    xxswapd vs3, vs1
+; CHECK-P8-NEXT:    xscvsxdsp f1, f1
+; CHECK-P8-NEXT:    xxswapd vs2, vs0
+; CHECK-P8-NEXT:    xscvsxdsp f0, f0
+; CHECK-P8-NEXT:    xscvsxdsp f3, f3
+; CHECK-P8-NEXT:    xscvsxdsp f2, f2
+; CHECK-P8-NEXT:    xxmrghd vs0, vs0, vs1
+; CHECK-P8-NEXT:    xxmrghd vs1, vs2, vs3
+; CHECK-P8-NEXT:    xvcvdpsp v2, vs0
+; CHECK-P8-NEXT:    xvcvdpsp v3, vs1
+; CHECK-P8-NEXT:    vmrgew v2, v3, v2
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test4elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs0, 16(r3)
+; CHECK-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-P9-NEXT:    xxswapd vs2, vs1
+; CHECK-P9-NEXT:    xxswapd vs3, vs0
+; CHECK-P9-NEXT:    xscvsxdsp f1, f1
+; CHECK-P9-NEXT:    xscvsxdsp f0, f0
+; CHECK-P9-NEXT:    xscvsxdsp f2, f2
+; CHECK-P9-NEXT:    xscvsxdsp f3, f3
+; CHECK-P9-NEXT:    xxmrghd vs0, vs0, vs1
+; CHECK-P9-NEXT:    xxmrghd vs2, vs3, vs2
+; CHECK-P9-NEXT:    xvcvdpsp v3, vs0
+; CHECK-P9-NEXT:    xvcvdpsp v2, vs2
+; CHECK-P9-NEXT:    vmrgew v2, v3, v2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test4elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    lxv vs1, 16(r3)
+; CHECK-BE-NEXT:    xxswapd vs2, vs1
+; CHECK-BE-NEXT:    xxswapd vs3, vs0
+; CHECK-BE-NEXT:    xscvsxdsp f1, f1
+; CHECK-BE-NEXT:    xscvsxdsp f0, f0
+; CHECK-BE-NEXT:    xscvsxdsp f2, f2
+; CHECK-BE-NEXT:    xscvsxdsp f3, f3
+; CHECK-BE-NEXT:    xxmrghd vs0, vs0, vs1
+; CHECK-BE-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-BE-NEXT:    xvcvdpsp v2, vs0
+; CHECK-BE-NEXT:    xvcvdpsp v3, vs1
+; CHECK-BE-NEXT:    vmrgew v2, v2, v3
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <4 x i64>, <4 x i64>* %0, align 32
+  %1 = sitofp <4 x i64> %a to <4 x float>
+  ret <4 x float> %1
+}
+
+define void @test8elt_signed(<8 x float>* noalias nocapture sret %agg.result, <8 x i64>* nocapture readonly) local_unnamed_addr #2 {
+; CHECK-P8-LABEL: test8elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r5, 32
+; CHECK-P8-NEXT:    li r6, 48
+; CHECK-P8-NEXT:    lxvd2x vs3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
+; CHECK-P8-NEXT:    lxvd2x vs2, r4, r5
+; CHECK-P8-NEXT:    xxswapd vs7, vs3
+; CHECK-P8-NEXT:    xscvsxdsp f3, f3
+; CHECK-P8-NEXT:    xxswapd vs4, vs0
+; CHECK-P8-NEXT:    xscvsxdsp f0, f0
+; CHECK-P8-NEXT:    xxswapd vs5, vs1
+; CHECK-P8-NEXT:    xscvsxdsp f1, f1
+; CHECK-P8-NEXT:    xxswapd vs6, vs2
+; CHECK-P8-NEXT:    xscvsxdsp f2, f2
+; CHECK-P8-NEXT:    xscvsxdsp f4, f4
+; CHECK-P8-NEXT:    xscvsxdsp f5, f5
+; CHECK-P8-NEXT:    xscvsxdsp f6, f6
+; CHECK-P8-NEXT:    xscvsxdsp f7, f7
+; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P8-NEXT:    xxmrghd vs1, vs2, vs3
+; CHECK-P8-NEXT:    xxmrghd vs2, vs5, vs4
+; CHECK-P8-NEXT:    xvcvdpsp v2, vs0
+; CHECK-P8-NEXT:    xvcvdpsp v3, vs1
+; CHECK-P8-NEXT:    xxmrghd vs0, vs6, vs7
+; CHECK-P8-NEXT:    xvcvdpsp v4, vs2
+; CHECK-P8-NEXT:    xvcvdpsp v5, vs0
+; CHECK-P8-NEXT:    vmrgew v2, v4, v2
+; CHECK-P8-NEXT:    vmrgew v3, v5, v3
+; CHECK-P8-NEXT:    stvx v2, r3, r5
+; CHECK-P8-NEXT:    stvx v3, 0, r3
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test8elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs0, 48(r4)
+; CHECK-P9-NEXT:    lxv vs1, 32(r4)
+; CHECK-P9-NEXT:    lxv vs2, 16(r4)
+; CHECK-P9-NEXT:    lxv vs3, 0(r4)
+; CHECK-P9-NEXT:    xxswapd vs4, vs3
+; CHECK-P9-NEXT:    xxswapd vs5, vs2
+; CHECK-P9-NEXT:    xxswapd vs6, vs1
+; CHECK-P9-NEXT:    xxswapd vs7, vs0
+; CHECK-P9-NEXT:    xscvsxdsp f3, f3
+; CHECK-P9-NEXT:    xscvsxdsp f2, f2
+; CHECK-P9-NEXT:    xscvsxdsp f1, f1
+; CHECK-P9-NEXT:    xscvsxdsp f0, f0
+; CHECK-P9-NEXT:    xscvsxdsp f4, f4
+; CHECK-P9-NEXT:    xscvsxdsp f5, f5
+; CHECK-P9-NEXT:    xscvsxdsp f6, f6
+; CHECK-P9-NEXT:    xscvsxdsp f7, f7
+; CHECK-P9-NEXT:    xxmrghd vs2, vs2, vs3
+; CHECK-P9-NEXT:    xxmrghd vs0, vs0, vs1
+; CHECK-P9-NEXT:    xxmrghd vs4, vs5, vs4
+; CHECK-P9-NEXT:    xxmrghd vs3, vs7, vs6
+; CHECK-P9-NEXT:    xvcvdpsp v3, vs2
+; CHECK-P9-NEXT:    xvcvdpsp v5, vs0
+; CHECK-P9-NEXT:    xvcvdpsp v2, vs4
+; CHECK-P9-NEXT:    xvcvdpsp v4, vs3
+; CHECK-P9-NEXT:    vmrgew v2, v3, v2
+; CHECK-P9-NEXT:    vmrgew v3, v5, v4
+; CHECK-P9-NEXT:    stxv v3, 16(r3)
+; CHECK-P9-NEXT:    stxv v2, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test8elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs0, 32(r4)
+; CHECK-BE-NEXT:    lxv vs1, 48(r4)
+; CHECK-BE-NEXT:    lxv vs2, 0(r4)
+; CHECK-BE-NEXT:    lxv vs3, 16(r4)
+; CHECK-BE-NEXT:    xxswapd vs4, vs3
+; CHECK-BE-NEXT:    xxswapd vs5, vs2
+; CHECK-BE-NEXT:    xxswapd vs6, vs1
+; CHECK-BE-NEXT:    xxswapd vs7, vs0
+; CHECK-BE-NEXT:    xscvsxdsp f3, f3
+; CHECK-BE-NEXT:    xscvsxdsp f2, f2
+; CHECK-BE-NEXT:    xscvsxdsp f1, f1
+; CHECK-BE-NEXT:    xscvsxdsp f0, f0
+; CHECK-BE-NEXT:    xscvsxdsp f4, f4
+; CHECK-BE-NEXT:    xscvsxdsp f5, f5
+; CHECK-BE-NEXT:    xscvsxdsp f6, f6
+; CHECK-BE-NEXT:    xscvsxdsp f7, f7
+; CHECK-BE-NEXT:    xxmrghd vs2, vs2, vs3
+; CHECK-BE-NEXT:    xxmrghd vs0, vs0, vs1
+; CHECK-BE-NEXT:    xxmrghd vs3, vs5, vs4
+; CHECK-BE-NEXT:    xxmrghd vs1, vs7, vs6
+; CHECK-BE-NEXT:    xvcvdpsp v2, vs2
+; CHECK-BE-NEXT:    xvcvdpsp v4, vs0
+; CHECK-BE-NEXT:    xvcvdpsp v3, vs3
+; CHECK-BE-NEXT:    xvcvdpsp v5, vs1
+; CHECK-BE-NEXT:    vmrgew v2, v2, v3
+; CHECK-BE-NEXT:    vmrgew v3, v4, v5
+; CHECK-BE-NEXT:    stxv v3, 16(r3)
+; CHECK-BE-NEXT:    stxv v2, 0(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <8 x i64>, <8 x i64>* %0, align 64
+  %1 = sitofp <8 x i64> %a to <8 x float>
+  store <8 x float> %1, <8 x float>* %agg.result, align 32
+  ret void
+}
+
+define void @test16elt_signed(<16 x float>* noalias nocapture sret %agg.result, <16 x i64>* nocapture readonly) local_unnamed_addr #2 {
+; CHECK-P8-LABEL: test16elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r7, 64
+; CHECK-P8-NEXT:    li r5, 32
+; CHECK-P8-NEXT:    li r6, 48
+; CHECK-P8-NEXT:    lxvd2x vs11, 0, r4
+; CHECK-P8-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
+; CHECK-P8-NEXT:    lxvd2x vs8, r4, r7
+; CHECK-P8-NEXT:    li r7, 80
+; CHECK-P8-NEXT:    lxvd2x vs6, r4, r5
+; CHECK-P8-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
+; CHECK-P8-NEXT:    lxvd2x vs7, r4, r6
+; CHECK-P8-NEXT:    lxvd2x vs2, r4, r7
+; CHECK-P8-NEXT:    li r7, 96
+; CHECK-P8-NEXT:    lxvd2x vs3, r4, r7
+; CHECK-P8-NEXT:    li r7, 112
+; CHECK-P8-NEXT:    xscvsxdsp f30, f11
+; CHECK-P8-NEXT:    xxswapd vs11, vs11
+; CHECK-P8-NEXT:    lxvd2x vs4, r4, r7
+; CHECK-P8-NEXT:    li r7, 16
+; CHECK-P8-NEXT:    xscvsxdsp f0, f6
+; CHECK-P8-NEXT:    xxswapd vs6, vs6
+; CHECK-P8-NEXT:    xscvsxdsp f1, f7
+; CHECK-P8-NEXT:    lxvd2x vs9, r4, r7
+; CHECK-P8-NEXT:    xxswapd vs7, vs7
+; CHECK-P8-NEXT:    xscvsxdsp f5, f8
+; CHECK-P8-NEXT:    xxswapd vs8, vs8
+; CHECK-P8-NEXT:    xscvsxdsp f10, f2
+; CHECK-P8-NEXT:    xxswapd vs2, vs2
+; CHECK-P8-NEXT:    xscvsxdsp f12, f3
+; CHECK-P8-NEXT:    xxswapd vs3, vs3
+; CHECK-P8-NEXT:    xscvsxdsp f13, f4
+; CHECK-P8-NEXT:    xxswapd vs4, vs4
+; CHECK-P8-NEXT:    xscvsxdsp f31, f9
+; CHECK-P8-NEXT:    xxswapd vs9, vs9
+; CHECK-P8-NEXT:    xscvsxdsp f6, f6
+; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P8-NEXT:    xscvsxdsp f7, f7
+; CHECK-P8-NEXT:    xscvsxdsp f8, f8
+; CHECK-P8-NEXT:    xxmrghd vs5, vs10, vs5
+; CHECK-P8-NEXT:    xscvsxdsp f2, f2
+; CHECK-P8-NEXT:    xscvsxdsp f3, f3
+; CHECK-P8-NEXT:    xxmrghd vs10, vs13, vs12
+; CHECK-P8-NEXT:    xscvsxdsp f4, f4
+; CHECK-P8-NEXT:    xscvsxdsp f1, f9
+; CHECK-P8-NEXT:    xscvsxdsp f9, f11
+; CHECK-P8-NEXT:    xxmrghd vs11, vs31, vs30
+; CHECK-P8-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
+; CHECK-P8-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
+; CHECK-P8-NEXT:    xvcvdpsp v2, vs0
+; CHECK-P8-NEXT:    xxmrghd vs0, vs7, vs6
+; CHECK-P8-NEXT:    xxmrghd vs2, vs2, vs8
+; CHECK-P8-NEXT:    xvcvdpsp v3, vs5
+; CHECK-P8-NEXT:    xvcvdpsp v4, vs10
+; CHECK-P8-NEXT:    xxmrghd vs3, vs4, vs3
+; CHECK-P8-NEXT:    xvcvdpsp v5, vs11
+; CHECK-P8-NEXT:    xvcvdpsp v0, vs0
+; CHECK-P8-NEXT:    xxmrghd vs1, vs1, vs9
+; CHECK-P8-NEXT:    xvcvdpsp v1, vs2
+; CHECK-P8-NEXT:    xvcvdpsp v6, vs3
+; CHECK-P8-NEXT:    xvcvdpsp v7, vs1
+; CHECK-P8-NEXT:    vmrgew v2, v0, v2
+; CHECK-P8-NEXT:    vmrgew v3, v1, v3
+; CHECK-P8-NEXT:    vmrgew v4, v6, v4
+; CHECK-P8-NEXT:    vmrgew v5, v7, v5
+; CHECK-P8-NEXT:    stvx v2, r3, r7
+; CHECK-P8-NEXT:    stvx v3, r3, r5
+; CHECK-P8-NEXT:    stvx v4, r3, r6
+; CHECK-P8-NEXT:    stvx v5, 0, r3
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test16elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs4, 48(r4)
+; CHECK-P9-NEXT:    lxv vs5, 32(r4)
+; CHECK-P9-NEXT:    lxv vs6, 16(r4)
+; CHECK-P9-NEXT:    lxv vs7, 0(r4)
+; CHECK-P9-NEXT:    lxv vs8, 112(r4)
+; CHECK-P9-NEXT:    lxv vs9, 96(r4)
+; CHECK-P9-NEXT:    lxv vs10, 80(r4)
+; CHECK-P9-NEXT:    lxv vs11, 64(r4)
+; CHECK-P9-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    xxswapd vs0, vs7
+; CHECK-P9-NEXT:    xxswapd vs1, vs6
+; CHECK-P9-NEXT:    xxswapd vs2, vs5
+; CHECK-P9-NEXT:    xxswapd vs3, vs4
+; CHECK-P9-NEXT:    xxswapd vs12, vs11
+; CHECK-P9-NEXT:    xxswapd vs13, vs10
+; CHECK-P9-NEXT:    xxswapd vs31, vs9
+; CHECK-P9-NEXT:    xxswapd vs30, vs8
+; CHECK-P9-NEXT:    xscvsxdsp f7, f7
+; CHECK-P9-NEXT:    xscvsxdsp f6, f6
+; CHECK-P9-NEXT:    xscvsxdsp f5, f5
+; CHECK-P9-NEXT:    xscvsxdsp f4, f4
+; CHECK-P9-NEXT:    xscvsxdsp f11, f11
+; CHECK-P9-NEXT:    xscvsxdsp f10, f10
+; CHECK-P9-NEXT:    xscvsxdsp f9, f9
+; CHECK-P9-NEXT:    xscvsxdsp f8, f8
+; CHECK-P9-NEXT:    xscvsxdsp f0, f0
+; CHECK-P9-NEXT:    xscvsxdsp f1, f1
+; CHECK-P9-NEXT:    xscvsxdsp f2, f2
+; CHECK-P9-NEXT:    xscvsxdsp f3, f3
+; CHECK-P9-NEXT:    xscvsxdsp f12, f12
+; CHECK-P9-NEXT:    xscvsxdsp f13, f13
+; CHECK-P9-NEXT:    xscvsxdsp f31, f31
+; CHECK-P9-NEXT:    xscvsxdsp f30, f30
+; CHECK-P9-NEXT:    xxmrghd vs6, vs6, vs7
+; CHECK-P9-NEXT:    xxmrghd vs4, vs4, vs5
+; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P9-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-P9-NEXT:    xxmrghd vs2, vs10, vs11
+; CHECK-P9-NEXT:    xxmrghd vs3, vs8, vs9
+; CHECK-P9-NEXT:    xxmrghd vs5, vs13, vs12
+; CHECK-P9-NEXT:    xxmrghd vs7, vs30, vs31
+; CHECK-P9-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    xvcvdpsp v2, vs6
+; CHECK-P9-NEXT:    xvcvdpsp v3, vs4
+; CHECK-P9-NEXT:    xvcvdpsp v4, vs0
+; CHECK-P9-NEXT:    xvcvdpsp v5, vs1
+; CHECK-P9-NEXT:    xvcvdpsp v0, vs5
+; CHECK-P9-NEXT:    xvcvdpsp v1, vs2
+; CHECK-P9-NEXT:    xvcvdpsp v6, vs7
+; CHECK-P9-NEXT:    xvcvdpsp v7, vs3
+; CHECK-P9-NEXT:    vmrgew v2, v2, v4
+; CHECK-P9-NEXT:    vmrgew v3, v3, v5
+; CHECK-P9-NEXT:    vmrgew v4, v1, v0
+; CHECK-P9-NEXT:    vmrgew v5, v7, v6
+; CHECK-P9-NEXT:    stxv v4, 32(r3)
+; CHECK-P9-NEXT:    stxv v3, 16(r3)
+; CHECK-P9-NEXT:    stxv v2, 0(r3)
+; CHECK-P9-NEXT:    stxv v5, 48(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test16elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs2, 32(r4)
+; CHECK-BE-NEXT:    lxv vs3, 48(r4)
+; CHECK-BE-NEXT:    lxv vs4, 0(r4)
+; CHECK-BE-NEXT:    lxv vs5, 16(r4)
+; CHECK-BE-NEXT:    lxv vs6, 96(r4)
+; CHECK-BE-NEXT:    lxv vs7, 112(r4)
+; CHECK-BE-NEXT:    lxv vs8, 64(r4)
+; CHECK-BE-NEXT:    lxv vs9, 80(r4)
+; CHECK-BE-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    xxswapd vs0, vs5
+; CHECK-BE-NEXT:    xxswapd vs1, vs4
+; CHECK-BE-NEXT:    xxswapd vs10, vs3
+; CHECK-BE-NEXT:    xxswapd vs11, vs2
+; CHECK-BE-NEXT:    xxswapd vs12, vs9
+; CHECK-BE-NEXT:    xxswapd vs13, vs8
+; CHECK-BE-NEXT:    xxswapd vs31, vs7
+; CHECK-BE-NEXT:    xxswapd vs30, vs6
+; CHECK-BE-NEXT:    xscvsxdsp f5, f5
+; CHECK-BE-NEXT:    xscvsxdsp f4, f4
+; CHECK-BE-NEXT:    xscvsxdsp f3, f3
+; CHECK-BE-NEXT:    xscvsxdsp f2, f2
+; CHECK-BE-NEXT:    xscvsxdsp f9, f9
+; CHECK-BE-NEXT:    xscvsxdsp f8, f8
+; CHECK-BE-NEXT:    xscvsxdsp f7, f7
+; CHECK-BE-NEXT:    xscvsxdsp f6, f6
+; CHECK-BE-NEXT:    xscvsxdsp f0, f0
+; CHECK-BE-NEXT:    xscvsxdsp f1, f1
+; CHECK-BE-NEXT:    xscvsxdsp f10, f10
+; CHECK-BE-NEXT:    xscvsxdsp f11, f11
+; CHECK-BE-NEXT:    xscvsxdsp f12, f12
+; CHECK-BE-NEXT:    xscvsxdsp f13, f13
+; CHECK-BE-NEXT:    xscvsxdsp f31, f31
+; CHECK-BE-NEXT:    xscvsxdsp f30, f30
+; CHECK-BE-NEXT:    xxmrghd vs4, vs4, vs5
+; CHECK-BE-NEXT:    xxmrghd vs2, vs2, vs3
+; CHECK-BE-NEXT:    xxmrghd vs3, vs8, vs9
+; CHECK-BE-NEXT:    xxmrghd vs5, vs6, vs7
+; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-BE-NEXT:    xxmrghd vs1, vs11, vs10
+; CHECK-BE-NEXT:    xxmrghd vs6, vs13, vs12
+; CHECK-BE-NEXT:    xxmrghd vs7, vs30, vs31
+; CHECK-BE-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    xvcvdpsp v2, vs4
+; CHECK-BE-NEXT:    xvcvdpsp v3, vs2
+; CHECK-BE-NEXT:    xvcvdpsp v0, vs3
+; CHECK-BE-NEXT:    xvcvdpsp v6, vs5
+; CHECK-BE-NEXT:    xvcvdpsp v4, vs0
+; CHECK-BE-NEXT:    xvcvdpsp v5, vs1
+; CHECK-BE-NEXT:    xvcvdpsp v1, vs6
+; CHECK-BE-NEXT:    xvcvdpsp v7, vs7
+; CHECK-BE-NEXT:    vmrgew v2, v2, v4
+; CHECK-BE-NEXT:    vmrgew v3, v3, v5
+; CHECK-BE-NEXT:    vmrgew v4, v0, v1
+; CHECK-BE-NEXT:    vmrgew v5, v6, v7
+; CHECK-BE-NEXT:    stxv v5, 48(r3)
+; CHECK-BE-NEXT:    stxv v4, 32(r3)
+; CHECK-BE-NEXT:    stxv v3, 16(r3)
+; CHECK-BE-NEXT:    stxv v2, 0(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <16 x i64>, <16 x i64>* %0, align 128
+  %1 = sitofp <16 x i64> %a to <16 x float>
+  store <16 x float> %1, <16 x float>* %agg.result, align 64
+  ret void
+}

Added: llvm/trunk/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll?rev=347090&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll Fri Nov 16 12:24:10 2018
@@ -0,0 +1,1382 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:     -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-P8
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-P9
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-BE
+
+define i64 @test2elt(i16 %a.coerce) local_unnamed_addr #0 {
+; CHECK-P8-LABEL: test2elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    clrldi r4, r3, 56
+; CHECK-P8-NEXT:    rldicl r3, r3, 56, 56
+; CHECK-P8-NEXT:    rlwinm r4, r4, 0, 24, 31
+; CHECK-P8-NEXT:    rlwinm r3, r3, 0, 24, 31
+; CHECK-P8-NEXT:    mtvsrwz f0, r4
+; CHECK-P8-NEXT:    mtvsrwz f1, r3
+; CHECK-P8-NEXT:    xscvuxdsp f0, f0
+; CHECK-P8-NEXT:    xscvuxdsp f1, f1
+; CHECK-P8-NEXT:    xscvdpspn vs0, f0
+; CHECK-P8-NEXT:    xscvdpspn vs1, f1
+; CHECK-P8-NEXT:    xxsldwi v2, vs0, vs0, 1
+; CHECK-P8-NEXT:    xxsldwi v3, vs1, vs1, 1
+; CHECK-P8-NEXT:    vmrglw v2, v3, v2
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test2elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    mtvsrws v2, r3
+; CHECK-P9-NEXT:    li r3, 0
+; CHECK-P9-NEXT:    li r4, 1
+; CHECK-P9-NEXT:    vextubrx r3, r3, v2
+; CHECK-P9-NEXT:    vextubrx r4, r4, v2
+; CHECK-P9-NEXT:    rlwinm r3, r3, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r4, r4, 0, 24, 31
+; CHECK-P9-NEXT:    mtvsrwz f0, r3
+; CHECK-P9-NEXT:    mtvsrwz f1, r4
+; CHECK-P9-NEXT:    xscvuxdsp f0, f0
+; CHECK-P9-NEXT:    xscvuxdsp f1, f1
+; CHECK-P9-NEXT:    xscvdpspn vs0, f0
+; CHECK-P9-NEXT:    xscvdpspn vs1, f1
+; CHECK-P9-NEXT:    xxsldwi v2, vs0, vs0, 1
+; CHECK-P9-NEXT:    xxsldwi v3, vs1, vs1, 1
+; CHECK-P9-NEXT:    vmrglw v2, v3, v2
+; CHECK-P9-NEXT:    mfvsrld r3, v2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test2elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    mtvsrws v2, r3
+; CHECK-BE-NEXT:    li r3, 1
+; CHECK-BE-NEXT:    li r4, 0
+; CHECK-BE-NEXT:    vextublx r3, r3, v2
+; CHECK-BE-NEXT:    vextublx r4, r4, v2
+; CHECK-BE-NEXT:    rlwinm r3, r3, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r4, r4, 0, 24, 31
+; CHECK-BE-NEXT:    mtvsrwz f0, r3
+; CHECK-BE-NEXT:    mtvsrwz f1, r4
+; CHECK-BE-NEXT:    xscvuxdsp f0, f0
+; CHECK-BE-NEXT:    xscvuxdsp f1, f1
+; CHECK-BE-NEXT:    xscvdpspn v2, f0
+; CHECK-BE-NEXT:    xscvdpspn v3, f1
+; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    mfvsrd r3, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = bitcast i16 %a.coerce to <2 x i8>
+  %1 = uitofp <2 x i8> %0 to <2 x float>
+  %2 = bitcast <2 x float> %1 to i64
+  ret i64 %2
+}
+
+define <4 x float> @test4elt(i32 %a.coerce) local_unnamed_addr #1 {
+; CHECK-P8-LABEL: test4elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    clrldi r4, r3, 56
+; CHECK-P8-NEXT:    rldicl r5, r3, 48, 56
+; CHECK-P8-NEXT:    rlwinm r4, r4, 0, 24, 31
+; CHECK-P8-NEXT:    rlwinm r5, r5, 0, 24, 31
+; CHECK-P8-NEXT:    mtvsrwz f0, r4
+; CHECK-P8-NEXT:    rldicl r4, r3, 56, 56
+; CHECK-P8-NEXT:    rldicl r3, r3, 40, 56
+; CHECK-P8-NEXT:    rlwinm r4, r4, 0, 24, 31
+; CHECK-P8-NEXT:    rlwinm r3, r3, 0, 24, 31
+; CHECK-P8-NEXT:    mtvsrwz f1, r5
+; CHECK-P8-NEXT:    mtvsrwz f2, r4
+; CHECK-P8-NEXT:    mtvsrwz f3, r3
+; CHECK-P8-NEXT:    xscvuxdsp f0, f0
+; CHECK-P8-NEXT:    xscvuxdsp f1, f1
+; CHECK-P8-NEXT:    xscvuxdsp f2, f2
+; CHECK-P8-NEXT:    xscvuxdsp f3, f3
+; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P8-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-P8-NEXT:    xvcvdpsp v2, vs0
+; CHECK-P8-NEXT:    xvcvdpsp v3, vs1
+; CHECK-P8-NEXT:    vmrgew v2, v3, v2
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test4elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    mtvsrws v2, r3
+; CHECK-P9-NEXT:    li r3, 0
+; CHECK-P9-NEXT:    li r4, 2
+; CHECK-P9-NEXT:    li r5, 1
+; CHECK-P9-NEXT:    li r6, 3
+; CHECK-P9-NEXT:    vextubrx r3, r3, v2
+; CHECK-P9-NEXT:    vextubrx r4, r4, v2
+; CHECK-P9-NEXT:    vextubrx r5, r5, v2
+; CHECK-P9-NEXT:    vextubrx r6, r6, v2
+; CHECK-P9-NEXT:    rlwinm r3, r3, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r4, r4, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r5, r5, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r6, r6, 0, 24, 31
+; CHECK-P9-NEXT:    mtvsrwz f0, r3
+; CHECK-P9-NEXT:    mtvsrwz f1, r4
+; CHECK-P9-NEXT:    mtvsrwz f2, r5
+; CHECK-P9-NEXT:    mtvsrwz f3, r6
+; CHECK-P9-NEXT:    xscvuxdsp f0, f0
+; CHECK-P9-NEXT:    xscvuxdsp f1, f1
+; CHECK-P9-NEXT:    xscvuxdsp f2, f2
+; CHECK-P9-NEXT:    xscvuxdsp f3, f3
+; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P9-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-P9-NEXT:    xvcvdpsp v2, vs0
+; CHECK-P9-NEXT:    xvcvdpsp v3, vs1
+; CHECK-P9-NEXT:    vmrgew v2, v3, v2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test4elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    mtvsrws v2, r3
+; CHECK-BE-NEXT:    li r3, 3
+; CHECK-BE-NEXT:    li r4, 1
+; CHECK-BE-NEXT:    li r5, 2
+; CHECK-BE-NEXT:    li r6, 0
+; CHECK-BE-NEXT:    vextublx r3, r3, v2
+; CHECK-BE-NEXT:    vextublx r4, r4, v2
+; CHECK-BE-NEXT:    vextublx r5, r5, v2
+; CHECK-BE-NEXT:    vextublx r6, r6, v2
+; CHECK-BE-NEXT:    rlwinm r3, r3, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r4, r4, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r5, r5, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r6, r6, 0, 24, 31
+; CHECK-BE-NEXT:    mtvsrwz f0, r3
+; CHECK-BE-NEXT:    mtvsrwz f1, r4
+; CHECK-BE-NEXT:    mtvsrwz f2, r5
+; CHECK-BE-NEXT:    mtvsrwz f3, r6
+; CHECK-BE-NEXT:    xscvuxdsp f0, f0
+; CHECK-BE-NEXT:    xscvuxdsp f1, f1
+; CHECK-BE-NEXT:    xscvuxdsp f2, f2
+; CHECK-BE-NEXT:    xscvuxdsp f3, f3
+; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-BE-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-BE-NEXT:    xvcvdpsp v2, vs0
+; CHECK-BE-NEXT:    xvcvdpsp v3, vs1
+; CHECK-BE-NEXT:    vmrgew v2, v3, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = bitcast i32 %a.coerce to <4 x i8>
+  %1 = uitofp <4 x i8> %0 to <4 x float>
+  ret <4 x float> %1
+}
+
+define void @test8elt(<8 x float>* noalias nocapture sret %agg.result, i64 %a.coerce) local_unnamed_addr #2 {
+; CHECK-P8-LABEL: test8elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mtvsrd f0, r4
+; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    mfvsrd r4, f0
+; CHECK-P8-NEXT:    clrldi r6, r4, 56
+; CHECK-P8-NEXT:    rldicl r7, r4, 48, 56
+; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 24, 31
+; CHECK-P8-NEXT:    rlwinm r7, r7, 0, 24, 31
+; CHECK-P8-NEXT:    mtvsrwz f0, r6
+; CHECK-P8-NEXT:    rldicl r6, r4, 56, 56
+; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 24, 31
+; CHECK-P8-NEXT:    mtvsrwz f1, r7
+; CHECK-P8-NEXT:    rldicl r7, r4, 40, 56
+; CHECK-P8-NEXT:    mtvsrwz f2, r6
+; CHECK-P8-NEXT:    rldicl r6, r4, 32, 56
+; CHECK-P8-NEXT:    rlwinm r7, r7, 0, 24, 31
+; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 24, 31
+; CHECK-P8-NEXT:    mtvsrwz f3, r7
+; CHECK-P8-NEXT:    rldicl r7, r4, 16, 56
+; CHECK-P8-NEXT:    mtvsrwz f4, r6
+; CHECK-P8-NEXT:    rldicl r6, r4, 24, 56
+; CHECK-P8-NEXT:    rldicl r4, r4, 8, 56
+; CHECK-P8-NEXT:    rlwinm r7, r7, 0, 24, 31
+; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 24, 31
+; CHECK-P8-NEXT:    rlwinm r4, r4, 0, 24, 31
+; CHECK-P8-NEXT:    mtvsrwz f5, r7
+; CHECK-P8-NEXT:    mtvsrwz f6, r6
+; CHECK-P8-NEXT:    mtvsrwz f7, r4
+; CHECK-P8-NEXT:    xscvuxdsp f0, f0
+; CHECK-P8-NEXT:    xscvuxdsp f1, f1
+; CHECK-P8-NEXT:    xscvuxdsp f2, f2
+; CHECK-P8-NEXT:    xscvuxdsp f3, f3
+; CHECK-P8-NEXT:    xscvuxdsp f4, f4
+; CHECK-P8-NEXT:    xscvuxdsp f5, f5
+; CHECK-P8-NEXT:    xscvuxdsp f6, f6
+; CHECK-P8-NEXT:    xscvuxdsp f7, f7
+; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P8-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-P8-NEXT:    xvcvdpsp v2, vs0
+; CHECK-P8-NEXT:    xxmrghd vs2, vs5, vs4
+; CHECK-P8-NEXT:    xvcvdpsp v3, vs1
+; CHECK-P8-NEXT:    xxmrghd vs3, vs7, vs6
+; CHECK-P8-NEXT:    xvcvdpsp v4, vs2
+; CHECK-P8-NEXT:    xvcvdpsp v5, vs3
+; CHECK-P8-NEXT:    vmrgew v2, v3, v2
+; CHECK-P8-NEXT:    vmrgew v3, v5, v4
+; CHECK-P8-NEXT:    stvx v2, 0, r3
+; CHECK-P8-NEXT:    stvx v3, r3, r5
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test8elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    mtvsrd f0, r4
+; CHECK-P9-NEXT:    li r4, 0
+; CHECK-P9-NEXT:    li r5, 2
+; CHECK-P9-NEXT:    li r6, 1
+; CHECK-P9-NEXT:    li r7, 3
+; CHECK-P9-NEXT:    li r8, 4
+; CHECK-P9-NEXT:    li r9, 6
+; CHECK-P9-NEXT:    li r10, 5
+; CHECK-P9-NEXT:    li r11, 7
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    vextubrx r4, r4, v2
+; CHECK-P9-NEXT:    vextubrx r5, r5, v2
+; CHECK-P9-NEXT:    vextubrx r6, r6, v2
+; CHECK-P9-NEXT:    vextubrx r7, r7, v2
+; CHECK-P9-NEXT:    vextubrx r8, r8, v2
+; CHECK-P9-NEXT:    vextubrx r9, r9, v2
+; CHECK-P9-NEXT:    vextubrx r10, r10, v2
+; CHECK-P9-NEXT:    vextubrx r11, r11, v2
+; CHECK-P9-NEXT:    rlwinm r4, r4, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r5, r5, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r6, r6, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r7, r7, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r8, r8, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r9, r9, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r10, r10, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r11, r11, 0, 24, 31
+; CHECK-P9-NEXT:    mtvsrwz f0, r4
+; CHECK-P9-NEXT:    mtvsrwz f1, r5
+; CHECK-P9-NEXT:    mtvsrwz f2, r6
+; CHECK-P9-NEXT:    mtvsrwz f3, r7
+; CHECK-P9-NEXT:    mtvsrwz f4, r8
+; CHECK-P9-NEXT:    mtvsrwz f5, r9
+; CHECK-P9-NEXT:    mtvsrwz f6, r10
+; CHECK-P9-NEXT:    mtvsrwz f7, r11
+; CHECK-P9-NEXT:    xscvuxdsp f0, f0
+; CHECK-P9-NEXT:    xscvuxdsp f1, f1
+; CHECK-P9-NEXT:    xscvuxdsp f2, f2
+; CHECK-P9-NEXT:    xscvuxdsp f3, f3
+; CHECK-P9-NEXT:    xscvuxdsp f4, f4
+; CHECK-P9-NEXT:    xscvuxdsp f5, f5
+; CHECK-P9-NEXT:    xscvuxdsp f6, f6
+; CHECK-P9-NEXT:    xscvuxdsp f7, f7
+; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P9-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-P9-NEXT:    xxmrghd vs2, vs5, vs4
+; CHECK-P9-NEXT:    xxmrghd vs3, vs7, vs6
+; CHECK-P9-NEXT:    xvcvdpsp v2, vs0
+; CHECK-P9-NEXT:    xvcvdpsp v3, vs1
+; CHECK-P9-NEXT:    xvcvdpsp v4, vs2
+; CHECK-P9-NEXT:    xvcvdpsp v5, vs3
+; CHECK-P9-NEXT:    vmrgew v2, v3, v2
+; CHECK-P9-NEXT:    vmrgew v3, v5, v4
+; CHECK-P9-NEXT:    stxv v3, 16(r3)
+; CHECK-P9-NEXT:    stxv v2, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test8elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    li r5, 3
+; CHECK-BE-NEXT:    mtvsrd v2, r4
+; CHECK-BE-NEXT:    li r4, 1
+; CHECK-BE-NEXT:    li r6, 2
+; CHECK-BE-NEXT:    li r7, 0
+; CHECK-BE-NEXT:    li r8, 7
+; CHECK-BE-NEXT:    li r9, 5
+; CHECK-BE-NEXT:    li r10, 6
+; CHECK-BE-NEXT:    li r11, 4
+; CHECK-BE-NEXT:    vextublx r5, r5, v2
+; CHECK-BE-NEXT:    vextublx r4, r4, v2
+; CHECK-BE-NEXT:    vextublx r6, r6, v2
+; CHECK-BE-NEXT:    vextublx r7, r7, v2
+; CHECK-BE-NEXT:    vextublx r8, r8, v2
+; CHECK-BE-NEXT:    vextublx r9, r9, v2
+; CHECK-BE-NEXT:    vextublx r10, r10, v2
+; CHECK-BE-NEXT:    vextublx r11, r11, v2
+; CHECK-BE-NEXT:    rlwinm r5, r5, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r4, r4, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r6, r6, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r7, r7, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r8, r8, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r9, r9, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r10, r10, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r11, r11, 0, 24, 31
+; CHECK-BE-NEXT:    mtvsrwz f0, r5
+; CHECK-BE-NEXT:    mtvsrwz f1, r4
+; CHECK-BE-NEXT:    mtvsrwz f2, r6
+; CHECK-BE-NEXT:    mtvsrwz f3, r7
+; CHECK-BE-NEXT:    mtvsrwz f4, r8
+; CHECK-BE-NEXT:    mtvsrwz f5, r9
+; CHECK-BE-NEXT:    mtvsrwz f6, r10
+; CHECK-BE-NEXT:    mtvsrwz f7, r11
+; CHECK-BE-NEXT:    xscvuxdsp f0, f0
+; CHECK-BE-NEXT:    xscvuxdsp f1, f1
+; CHECK-BE-NEXT:    xscvuxdsp f2, f2
+; CHECK-BE-NEXT:    xscvuxdsp f3, f3
+; CHECK-BE-NEXT:    xscvuxdsp f4, f4
+; CHECK-BE-NEXT:    xscvuxdsp f5, f5
+; CHECK-BE-NEXT:    xscvuxdsp f6, f6
+; CHECK-BE-NEXT:    xscvuxdsp f7, f7
+; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-BE-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-BE-NEXT:    xxmrghd vs2, vs5, vs4
+; CHECK-BE-NEXT:    xxmrghd vs3, vs7, vs6
+; CHECK-BE-NEXT:    xvcvdpsp v2, vs0
+; CHECK-BE-NEXT:    xvcvdpsp v3, vs1
+; CHECK-BE-NEXT:    xvcvdpsp v4, vs2
+; CHECK-BE-NEXT:    xvcvdpsp v5, vs3
+; CHECK-BE-NEXT:    vmrgew v2, v3, v2
+; CHECK-BE-NEXT:    vmrgew v3, v5, v4
+; CHECK-BE-NEXT:    stxv v2, 0(r3)
+; CHECK-BE-NEXT:    stxv v3, 16(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = bitcast i64 %a.coerce to <8 x i8>
+  %1 = uitofp <8 x i8> %0 to <8 x float>
+  store <8 x float> %1, <8 x float>* %agg.result, align 32
+  ret void
+}
+
+define void @test16elt(<16 x float>* noalias nocapture sret %agg.result, <16 x i8> %a) local_unnamed_addr #3 {
+; CHECK-P8-LABEL: test16elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mfvsrd r4, v2
+; CHECK-P8-NEXT:    xxswapd vs2, v2
+; CHECK-P8-NEXT:    clrldi r5, r4, 56
+; CHECK-P8-NEXT:    rldicl r6, r4, 48, 56
+; CHECK-P8-NEXT:    rlwinm r5, r5, 0, 24, 31
+; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 24, 31
+; CHECK-P8-NEXT:    mtvsrwz f0, r5
+; CHECK-P8-NEXT:    rldicl r5, r4, 40, 56
+; CHECK-P8-NEXT:    rldicl r7, r4, 56, 56
+; CHECK-P8-NEXT:    mtvsrwz f1, r6
+; CHECK-P8-NEXT:    rldicl r6, r4, 32, 56
+; CHECK-P8-NEXT:    rlwinm r5, r5, 0, 24, 31
+; CHECK-P8-NEXT:    rlwinm r7, r7, 0, 24, 31
+; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 24, 31
+; CHECK-P8-NEXT:    mtvsrwz f4, r5
+; CHECK-P8-NEXT:    rldicl r5, r4, 16, 56
+; CHECK-P8-NEXT:    mtvsrwz f3, r7
+; CHECK-P8-NEXT:    rlwinm r5, r5, 0, 24, 31
+; CHECK-P8-NEXT:    mtvsrwz f5, r6
+; CHECK-P8-NEXT:    rldicl r6, r4, 24, 56
+; CHECK-P8-NEXT:    rldicl r4, r4, 8, 56
+; CHECK-P8-NEXT:    mfvsrd r7, f2
+; CHECK-P8-NEXT:    rlwinm r4, r4, 0, 24, 31
+; CHECK-P8-NEXT:    mtvsrwz f2, r5
+; CHECK-P8-NEXT:    rlwinm r5, r6, 0, 24, 31
+; CHECK-P8-NEXT:    mtvsrwz f6, r5
+; CHECK-P8-NEXT:    clrldi r5, r7, 56
+; CHECK-P8-NEXT:    mtvsrwz f7, r4
+; CHECK-P8-NEXT:    rldicl r4, r7, 48, 56
+; CHECK-P8-NEXT:    rlwinm r5, r5, 0, 24, 31
+; CHECK-P8-NEXT:    mtvsrwz f8, r5
+; CHECK-P8-NEXT:    rldicl r5, r7, 56, 56
+; CHECK-P8-NEXT:    rlwinm r4, r4, 0, 24, 31
+; CHECK-P8-NEXT:    mtvsrwz f9, r4
+; CHECK-P8-NEXT:    rlwinm r4, r5, 0, 24, 31
+; CHECK-P8-NEXT:    rldicl r5, r7, 8, 56
+; CHECK-P8-NEXT:    mtvsrwz f10, r4
+; CHECK-P8-NEXT:    rldicl r4, r7, 40, 56
+; CHECK-P8-NEXT:    rlwinm r5, r5, 0, 24, 31
+; CHECK-P8-NEXT:    rlwinm r4, r4, 0, 24, 31
+; CHECK-P8-NEXT:    xscvuxdsp f0, f0
+; CHECK-P8-NEXT:    mtvsrwz f11, r4
+; CHECK-P8-NEXT:    rldicl r4, r7, 32, 56
+; CHECK-P8-NEXT:    rlwinm r4, r4, 0, 24, 31
+; CHECK-P8-NEXT:    xscvuxdsp f1, f1
+; CHECK-P8-NEXT:    xscvuxdsp f3, f3
+; CHECK-P8-NEXT:    xscvuxdsp f4, f4
+; CHECK-P8-NEXT:    mtvsrwz f12, r4
+; CHECK-P8-NEXT:    rldicl r4, r7, 16, 56
+; CHECK-P8-NEXT:    rlwinm r4, r4, 0, 24, 31
+; CHECK-P8-NEXT:    xscvuxdsp f5, f5
+; CHECK-P8-NEXT:    mtvsrwz f13, r4
+; CHECK-P8-NEXT:    rldicl r4, r7, 24, 56
+; CHECK-P8-NEXT:    xscvuxdsp f2, f2
+; CHECK-P8-NEXT:    rlwinm r4, r4, 0, 24, 31
+; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P8-NEXT:    mtvsrwz v2, r4
+; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    mtvsrwz v3, r5
+; CHECK-P8-NEXT:    xxmrghd vs3, vs4, vs3
+; CHECK-P8-NEXT:    li r5, 32
+; CHECK-P8-NEXT:    xscvuxdsp f6, f6
+; CHECK-P8-NEXT:    xscvuxdsp f7, f7
+; CHECK-P8-NEXT:    xscvuxdsp f8, f8
+; CHECK-P8-NEXT:    xscvuxdsp f9, f9
+; CHECK-P8-NEXT:    xxmrghd vs2, vs2, vs5
+; CHECK-P8-NEXT:    xscvuxdsp f10, f10
+; CHECK-P8-NEXT:    xscvuxdsp f11, f11
+; CHECK-P8-NEXT:    xscvuxdsp f12, f12
+; CHECK-P8-NEXT:    xscvuxdsp f13, f13
+; CHECK-P8-NEXT:    xxmrghd vs5, vs7, vs6
+; CHECK-P8-NEXT:    xscvuxdsp f1, v2
+; CHECK-P8-NEXT:    xscvuxdsp f4, v3
+; CHECK-P8-NEXT:    xvcvdpsp v2, vs0
+; CHECK-P8-NEXT:    xxmrghd vs0, vs9, vs8
+; CHECK-P8-NEXT:    xvcvdpsp v3, vs3
+; CHECK-P8-NEXT:    xxmrghd vs3, vs11, vs10
+; CHECK-P8-NEXT:    xvcvdpsp v4, vs2
+; CHECK-P8-NEXT:    xxmrghd vs2, vs13, vs12
+; CHECK-P8-NEXT:    xvcvdpsp v5, vs5
+; CHECK-P8-NEXT:    xvcvdpsp v0, vs0
+; CHECK-P8-NEXT:    xxmrghd vs1, vs4, vs1
+; CHECK-P8-NEXT:    xvcvdpsp v1, vs3
+; CHECK-P8-NEXT:    xvcvdpsp v6, vs2
+; CHECK-P8-NEXT:    vmrgew v2, v3, v2
+; CHECK-P8-NEXT:    xvcvdpsp v7, vs1
+; CHECK-P8-NEXT:    vmrgew v3, v5, v4
+; CHECK-P8-NEXT:    vmrgew v4, v1, v0
+; CHECK-P8-NEXT:    stvx v2, r3, r5
+; CHECK-P8-NEXT:    vmrgew v5, v7, v6
+; CHECK-P8-NEXT:    stvx v3, r3, r4
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    stvx v4, 0, r3
+; CHECK-P8-NEXT:    stvx v5, r3, r4
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test16elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    std r25, -72(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r26, -64(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r27, -56(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r28, -48(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r29, -40(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    li r4, 0
+; CHECK-P9-NEXT:    std r30, -32(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    li r5, 2
+; CHECK-P9-NEXT:    li r6, 1
+; CHECK-P9-NEXT:    li r7, 3
+; CHECK-P9-NEXT:    li r8, 4
+; CHECK-P9-NEXT:    li r9, 6
+; CHECK-P9-NEXT:    li r10, 5
+; CHECK-P9-NEXT:    li r11, 7
+; CHECK-P9-NEXT:    li r12, 8
+; CHECK-P9-NEXT:    li r0, 10
+; CHECK-P9-NEXT:    li r30, 9
+; CHECK-P9-NEXT:    li r29, 11
+; CHECK-P9-NEXT:    li r28, 12
+; CHECK-P9-NEXT:    li r27, 14
+; CHECK-P9-NEXT:    li r26, 13
+; CHECK-P9-NEXT:    li r25, 15
+; CHECK-P9-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    vextubrx r4, r4, v2
+; CHECK-P9-NEXT:    vextubrx r5, r5, v2
+; CHECK-P9-NEXT:    vextubrx r6, r6, v2
+; CHECK-P9-NEXT:    vextubrx r7, r7, v2
+; CHECK-P9-NEXT:    vextubrx r8, r8, v2
+; CHECK-P9-NEXT:    vextubrx r9, r9, v2
+; CHECK-P9-NEXT:    vextubrx r10, r10, v2
+; CHECK-P9-NEXT:    vextubrx r11, r11, v2
+; CHECK-P9-NEXT:    vextubrx r12, r12, v2
+; CHECK-P9-NEXT:    vextubrx r0, r0, v2
+; CHECK-P9-NEXT:    vextubrx r30, r30, v2
+; CHECK-P9-NEXT:    vextubrx r29, r29, v2
+; CHECK-P9-NEXT:    vextubrx r28, r28, v2
+; CHECK-P9-NEXT:    vextubrx r27, r27, v2
+; CHECK-P9-NEXT:    vextubrx r26, r26, v2
+; CHECK-P9-NEXT:    vextubrx r25, r25, v2
+; CHECK-P9-NEXT:    rlwinm r4, r4, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r5, r5, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r6, r6, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r7, r7, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r8, r8, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r9, r9, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r10, r10, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r11, r11, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r12, r12, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r0, r0, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r30, r30, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r29, r29, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r28, r28, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r27, r27, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r26, r26, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r25, r25, 0, 24, 31
+; CHECK-P9-NEXT:    mtvsrwz f0, r4
+; CHECK-P9-NEXT:    mtvsrwz f1, r5
+; CHECK-P9-NEXT:    mtvsrwz f2, r6
+; CHECK-P9-NEXT:    mtvsrwz f3, r7
+; CHECK-P9-NEXT:    mtvsrwz f4, r8
+; CHECK-P9-NEXT:    mtvsrwz f5, r9
+; CHECK-P9-NEXT:    mtvsrwz f6, r10
+; CHECK-P9-NEXT:    mtvsrwz f7, r11
+; CHECK-P9-NEXT:    mtvsrwz f8, r12
+; CHECK-P9-NEXT:    mtvsrwz f9, r0
+; CHECK-P9-NEXT:    mtvsrwz f10, r30
+; CHECK-P9-NEXT:    ld r30, -32(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrwz f11, r29
+; CHECK-P9-NEXT:    ld r29, -40(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrwz f12, r28
+; CHECK-P9-NEXT:    ld r28, -48(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrwz f13, r27
+; CHECK-P9-NEXT:    ld r27, -56(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrwz v2, r26
+; CHECK-P9-NEXT:    ld r26, -64(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrwz v3, r25
+; CHECK-P9-NEXT:    ld r25, -72(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    xscvuxdsp f0, f0
+; CHECK-P9-NEXT:    xscvuxdsp f1, f1
+; CHECK-P9-NEXT:    xscvuxdsp f2, f2
+; CHECK-P9-NEXT:    xscvuxdsp f3, f3
+; CHECK-P9-NEXT:    xscvuxdsp f4, f4
+; CHECK-P9-NEXT:    xscvuxdsp f5, f5
+; CHECK-P9-NEXT:    xscvuxdsp f6, f6
+; CHECK-P9-NEXT:    xscvuxdsp f7, f7
+; CHECK-P9-NEXT:    xscvuxdsp f8, f8
+; CHECK-P9-NEXT:    xscvuxdsp f9, f9
+; CHECK-P9-NEXT:    xscvuxdsp f10, f10
+; CHECK-P9-NEXT:    xscvuxdsp f11, f11
+; CHECK-P9-NEXT:    xscvuxdsp f12, f12
+; CHECK-P9-NEXT:    xscvuxdsp f13, f13
+; CHECK-P9-NEXT:    xscvuxdsp f31, v2
+; CHECK-P9-NEXT:    xscvuxdsp f30, v3
+; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P9-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-P9-NEXT:    xxmrghd vs2, vs5, vs4
+; CHECK-P9-NEXT:    xxmrghd vs3, vs7, vs6
+; CHECK-P9-NEXT:    xxmrghd vs4, vs9, vs8
+; CHECK-P9-NEXT:    xxmrghd vs5, vs11, vs10
+; CHECK-P9-NEXT:    xxmrghd vs6, vs13, vs12
+; CHECK-P9-NEXT:    xxmrghd vs7, vs30, vs31
+; CHECK-P9-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    xvcvdpsp v2, vs0
+; CHECK-P9-NEXT:    xvcvdpsp v3, vs1
+; CHECK-P9-NEXT:    xvcvdpsp v4, vs2
+; CHECK-P9-NEXT:    xvcvdpsp v5, vs3
+; CHECK-P9-NEXT:    xvcvdpsp v0, vs4
+; CHECK-P9-NEXT:    xvcvdpsp v1, vs5
+; CHECK-P9-NEXT:    xvcvdpsp v6, vs6
+; CHECK-P9-NEXT:    xvcvdpsp v7, vs7
+; CHECK-P9-NEXT:    vmrgew v2, v3, v2
+; CHECK-P9-NEXT:    vmrgew v3, v5, v4
+; CHECK-P9-NEXT:    vmrgew v4, v1, v0
+; CHECK-P9-NEXT:    vmrgew v5, v7, v6
+; CHECK-P9-NEXT:    stxv v3, 16(r3)
+; CHECK-P9-NEXT:    stxv v2, 0(r3)
+; CHECK-P9-NEXT:    stxv v5, 48(r3)
+; CHECK-P9-NEXT:    stxv v4, 32(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test16elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    std r25, -72(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r26, -64(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r27, -56(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r28, -48(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r29, -40(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    li r4, 3
+; CHECK-BE-NEXT:    std r30, -32(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    li r5, 1
+; CHECK-BE-NEXT:    li r6, 2
+; CHECK-BE-NEXT:    li r7, 0
+; CHECK-BE-NEXT:    li r8, 7
+; CHECK-BE-NEXT:    li r9, 5
+; CHECK-BE-NEXT:    li r10, 6
+; CHECK-BE-NEXT:    li r11, 4
+; CHECK-BE-NEXT:    li r12, 11
+; CHECK-BE-NEXT:    li r0, 9
+; CHECK-BE-NEXT:    li r30, 10
+; CHECK-BE-NEXT:    li r29, 8
+; CHECK-BE-NEXT:    li r28, 15
+; CHECK-BE-NEXT:    li r27, 13
+; CHECK-BE-NEXT:    li r26, 14
+; CHECK-BE-NEXT:    li r25, 12
+; CHECK-BE-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    vextublx r4, r4, v2
+; CHECK-BE-NEXT:    vextublx r5, r5, v2
+; CHECK-BE-NEXT:    vextublx r6, r6, v2
+; CHECK-BE-NEXT:    vextublx r7, r7, v2
+; CHECK-BE-NEXT:    vextublx r8, r8, v2
+; CHECK-BE-NEXT:    vextublx r9, r9, v2
+; CHECK-BE-NEXT:    vextublx r10, r10, v2
+; CHECK-BE-NEXT:    vextublx r11, r11, v2
+; CHECK-BE-NEXT:    vextublx r12, r12, v2
+; CHECK-BE-NEXT:    vextublx r0, r0, v2
+; CHECK-BE-NEXT:    vextublx r30, r30, v2
+; CHECK-BE-NEXT:    vextublx r29, r29, v2
+; CHECK-BE-NEXT:    vextublx r28, r28, v2
+; CHECK-BE-NEXT:    vextublx r27, r27, v2
+; CHECK-BE-NEXT:    vextublx r26, r26, v2
+; CHECK-BE-NEXT:    vextublx r25, r25, v2
+; CHECK-BE-NEXT:    rlwinm r4, r4, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r5, r5, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r6, r6, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r7, r7, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r8, r8, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r9, r9, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r10, r10, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r11, r11, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r12, r12, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r0, r0, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r30, r30, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r29, r29, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r28, r28, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r27, r27, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r26, r26, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r25, r25, 0, 24, 31
+; CHECK-BE-NEXT:    mtvsrwz f0, r4
+; CHECK-BE-NEXT:    mtvsrwz f1, r5
+; CHECK-BE-NEXT:    mtvsrwz f2, r6
+; CHECK-BE-NEXT:    mtvsrwz f3, r7
+; CHECK-BE-NEXT:    mtvsrwz f4, r8
+; CHECK-BE-NEXT:    mtvsrwz f5, r9
+; CHECK-BE-NEXT:    mtvsrwz f6, r10
+; CHECK-BE-NEXT:    mtvsrwz f7, r11
+; CHECK-BE-NEXT:    mtvsrwz f8, r12
+; CHECK-BE-NEXT:    mtvsrwz f9, r0
+; CHECK-BE-NEXT:    mtvsrwz f10, r30
+; CHECK-BE-NEXT:    ld r30, -32(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrwz f11, r29
+; CHECK-BE-NEXT:    ld r29, -40(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrwz f12, r28
+; CHECK-BE-NEXT:    ld r28, -48(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrwz f13, r27
+; CHECK-BE-NEXT:    ld r27, -56(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrwz v2, r26
+; CHECK-BE-NEXT:    ld r26, -64(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrwz v3, r25
+; CHECK-BE-NEXT:    ld r25, -72(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    xscvuxdsp f0, f0
+; CHECK-BE-NEXT:    xscvuxdsp f1, f1
+; CHECK-BE-NEXT:    xscvuxdsp f2, f2
+; CHECK-BE-NEXT:    xscvuxdsp f3, f3
+; CHECK-BE-NEXT:    xscvuxdsp f4, f4
+; CHECK-BE-NEXT:    xscvuxdsp f5, f5
+; CHECK-BE-NEXT:    xscvuxdsp f6, f6
+; CHECK-BE-NEXT:    xscvuxdsp f7, f7
+; CHECK-BE-NEXT:    xscvuxdsp f8, f8
+; CHECK-BE-NEXT:    xscvuxdsp f9, f9
+; CHECK-BE-NEXT:    xscvuxdsp f10, f10
+; CHECK-BE-NEXT:    xscvuxdsp f11, f11
+; CHECK-BE-NEXT:    xscvuxdsp f12, f12
+; CHECK-BE-NEXT:    xscvuxdsp f13, f13
+; CHECK-BE-NEXT:    xscvuxdsp f31, v2
+; CHECK-BE-NEXT:    xscvuxdsp f30, v3
+; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-BE-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-BE-NEXT:    xxmrghd vs2, vs5, vs4
+; CHECK-BE-NEXT:    xxmrghd vs3, vs7, vs6
+; CHECK-BE-NEXT:    xxmrghd vs4, vs9, vs8
+; CHECK-BE-NEXT:    xxmrghd vs5, vs11, vs10
+; CHECK-BE-NEXT:    xxmrghd vs6, vs13, vs12
+; CHECK-BE-NEXT:    xxmrghd vs7, vs30, vs31
+; CHECK-BE-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    xvcvdpsp v2, vs0
+; CHECK-BE-NEXT:    xvcvdpsp v3, vs1
+; CHECK-BE-NEXT:    xvcvdpsp v4, vs2
+; CHECK-BE-NEXT:    xvcvdpsp v5, vs3
+; CHECK-BE-NEXT:    xvcvdpsp v0, vs4
+; CHECK-BE-NEXT:    xvcvdpsp v1, vs5
+; CHECK-BE-NEXT:    xvcvdpsp v6, vs6
+; CHECK-BE-NEXT:    xvcvdpsp v7, vs7
+; CHECK-BE-NEXT:    vmrgew v2, v3, v2
+; CHECK-BE-NEXT:    vmrgew v3, v5, v4
+; CHECK-BE-NEXT:    vmrgew v4, v1, v0
+; CHECK-BE-NEXT:    vmrgew v5, v7, v6
+; CHECK-BE-NEXT:    stxv v3, 16(r3)
+; CHECK-BE-NEXT:    stxv v2, 0(r3)
+; CHECK-BE-NEXT:    stxv v5, 48(r3)
+; CHECK-BE-NEXT:    stxv v4, 32(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = uitofp <16 x i8> %a to <16 x float>
+  store <16 x float> %0, <16 x float>* %agg.result, align 64
+  ret void
+}
+
+define i64 @test2elt_signed(i16 %a.coerce) local_unnamed_addr #0 {
+; CHECK-P8-LABEL: test2elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    clrldi r4, r3, 56
+; CHECK-P8-NEXT:    rldicl r3, r3, 56, 56
+; CHECK-P8-NEXT:    extsb r4, r4
+; CHECK-P8-NEXT:    extsb r3, r3
+; CHECK-P8-NEXT:    mtvsrwa f0, r4
+; CHECK-P8-NEXT:    mtvsrwa f1, r3
+; CHECK-P8-NEXT:    xscvsxdsp f0, f0
+; CHECK-P8-NEXT:    xscvsxdsp f1, f1
+; CHECK-P8-NEXT:    xscvdpspn vs0, f0
+; CHECK-P8-NEXT:    xscvdpspn vs1, f1
+; CHECK-P8-NEXT:    xxsldwi v2, vs0, vs0, 1
+; CHECK-P8-NEXT:    xxsldwi v3, vs1, vs1, 1
+; CHECK-P8-NEXT:    vmrglw v2, v3, v2
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test2elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    mtvsrws v2, r3
+; CHECK-P9-NEXT:    li r3, 0
+; CHECK-P9-NEXT:    li r4, 1
+; CHECK-P9-NEXT:    vextubrx r3, r3, v2
+; CHECK-P9-NEXT:    vextubrx r4, r4, v2
+; CHECK-P9-NEXT:    extsb r3, r3
+; CHECK-P9-NEXT:    extsb r4, r4
+; CHECK-P9-NEXT:    mtvsrwa f0, r3
+; CHECK-P9-NEXT:    mtvsrwa f1, r4
+; CHECK-P9-NEXT:    xscvsxdsp f0, f0
+; CHECK-P9-NEXT:    xscvsxdsp f1, f1
+; CHECK-P9-NEXT:    xscvdpspn vs0, f0
+; CHECK-P9-NEXT:    xscvdpspn vs1, f1
+; CHECK-P9-NEXT:    xxsldwi v2, vs0, vs0, 1
+; CHECK-P9-NEXT:    xxsldwi v3, vs1, vs1, 1
+; CHECK-P9-NEXT:    vmrglw v2, v3, v2
+; CHECK-P9-NEXT:    mfvsrld r3, v2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test2elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    mtvsrws v2, r3
+; CHECK-BE-NEXT:    li r3, 1
+; CHECK-BE-NEXT:    li r4, 0
+; CHECK-BE-NEXT:    vextublx r3, r3, v2
+; CHECK-BE-NEXT:    vextublx r4, r4, v2
+; CHECK-BE-NEXT:    extsb r3, r3
+; CHECK-BE-NEXT:    extsb r4, r4
+; CHECK-BE-NEXT:    mtvsrwa f0, r3
+; CHECK-BE-NEXT:    mtvsrwa f1, r4
+; CHECK-BE-NEXT:    xscvsxdsp f0, f0
+; CHECK-BE-NEXT:    xscvsxdsp f1, f1
+; CHECK-BE-NEXT:    xscvdpspn v2, f0
+; CHECK-BE-NEXT:    xscvdpspn v3, f1
+; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    mfvsrd r3, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = bitcast i16 %a.coerce to <2 x i8>
+  %1 = sitofp <2 x i8> %0 to <2 x float>
+  %2 = bitcast <2 x float> %1 to i64
+  ret i64 %2
+}
+
+define <4 x float> @test4elt_signed(i32 %a.coerce) local_unnamed_addr #1 {
+; CHECK-P8-LABEL: test4elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    clrldi r4, r3, 56
+; CHECK-P8-NEXT:    rldicl r5, r3, 48, 56
+; CHECK-P8-NEXT:    extsb r4, r4
+; CHECK-P8-NEXT:    extsb r5, r5
+; CHECK-P8-NEXT:    mtvsrwa f0, r4
+; CHECK-P8-NEXT:    rldicl r4, r3, 56, 56
+; CHECK-P8-NEXT:    rldicl r3, r3, 40, 56
+; CHECK-P8-NEXT:    extsb r4, r4
+; CHECK-P8-NEXT:    extsb r3, r3
+; CHECK-P8-NEXT:    mtvsrwa f1, r5
+; CHECK-P8-NEXT:    mtvsrwa f2, r4
+; CHECK-P8-NEXT:    mtvsrwa f3, r3
+; CHECK-P8-NEXT:    xscvsxdsp f0, f0
+; CHECK-P8-NEXT:    xscvsxdsp f1, f1
+; CHECK-P8-NEXT:    xscvsxdsp f2, f2
+; CHECK-P8-NEXT:    xscvsxdsp f3, f3
+; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P8-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-P8-NEXT:    xvcvdpsp v2, vs0
+; CHECK-P8-NEXT:    xvcvdpsp v3, vs1
+; CHECK-P8-NEXT:    vmrgew v2, v3, v2
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test4elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    mtvsrws v2, r3
+; CHECK-P9-NEXT:    li r3, 0
+; CHECK-P9-NEXT:    li r4, 2
+; CHECK-P9-NEXT:    li r5, 1
+; CHECK-P9-NEXT:    li r6, 3
+; CHECK-P9-NEXT:    vextubrx r3, r3, v2
+; CHECK-P9-NEXT:    vextubrx r4, r4, v2
+; CHECK-P9-NEXT:    vextubrx r5, r5, v2
+; CHECK-P9-NEXT:    vextubrx r6, r6, v2
+; CHECK-P9-NEXT:    extsb r3, r3
+; CHECK-P9-NEXT:    extsb r4, r4
+; CHECK-P9-NEXT:    extsb r5, r5
+; CHECK-P9-NEXT:    extsb r6, r6
+; CHECK-P9-NEXT:    mtvsrwa f0, r3
+; CHECK-P9-NEXT:    mtvsrwa f1, r4
+; CHECK-P9-NEXT:    mtvsrwa f2, r5
+; CHECK-P9-NEXT:    mtvsrwa f3, r6
+; CHECK-P9-NEXT:    xscvsxdsp f0, f0
+; CHECK-P9-NEXT:    xscvsxdsp f1, f1
+; CHECK-P9-NEXT:    xscvsxdsp f2, f2
+; CHECK-P9-NEXT:    xscvsxdsp f3, f3
+; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P9-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-P9-NEXT:    xvcvdpsp v2, vs0
+; CHECK-P9-NEXT:    xvcvdpsp v3, vs1
+; CHECK-P9-NEXT:    vmrgew v2, v3, v2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test4elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    mtvsrws v2, r3
+; CHECK-BE-NEXT:    li r3, 3
+; CHECK-BE-NEXT:    li r4, 1
+; CHECK-BE-NEXT:    li r5, 2
+; CHECK-BE-NEXT:    li r6, 0
+; CHECK-BE-NEXT:    vextublx r3, r3, v2
+; CHECK-BE-NEXT:    vextublx r4, r4, v2
+; CHECK-BE-NEXT:    vextublx r5, r5, v2
+; CHECK-BE-NEXT:    vextublx r6, r6, v2
+; CHECK-BE-NEXT:    extsb r3, r3
+; CHECK-BE-NEXT:    extsb r4, r4
+; CHECK-BE-NEXT:    extsb r5, r5
+; CHECK-BE-NEXT:    extsb r6, r6
+; CHECK-BE-NEXT:    mtvsrwa f0, r3
+; CHECK-BE-NEXT:    mtvsrwa f1, r4
+; CHECK-BE-NEXT:    mtvsrwa f2, r5
+; CHECK-BE-NEXT:    mtvsrwa f3, r6
+; CHECK-BE-NEXT:    xscvsxdsp f0, f0
+; CHECK-BE-NEXT:    xscvsxdsp f1, f1
+; CHECK-BE-NEXT:    xscvsxdsp f2, f2
+; CHECK-BE-NEXT:    xscvsxdsp f3, f3
+; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-BE-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-BE-NEXT:    xvcvdpsp v2, vs0
+; CHECK-BE-NEXT:    xvcvdpsp v3, vs1
+; CHECK-BE-NEXT:    vmrgew v2, v3, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = bitcast i32 %a.coerce to <4 x i8>
+  %1 = sitofp <4 x i8> %0 to <4 x float>
+  ret <4 x float> %1
+}
+
+define void @test8elt_signed(<8 x float>* noalias nocapture sret %agg.result, i64 %a.coerce) local_unnamed_addr #2 {
+; CHECK-P8-LABEL: test8elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mtvsrd f0, r4
+; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    mfvsrd r4, f0
+; CHECK-P8-NEXT:    clrldi r6, r4, 56
+; CHECK-P8-NEXT:    rldicl r7, r4, 48, 56
+; CHECK-P8-NEXT:    extsb r6, r6
+; CHECK-P8-NEXT:    extsb r7, r7
+; CHECK-P8-NEXT:    mtvsrwa f0, r6
+; CHECK-P8-NEXT:    rldicl r6, r4, 56, 56
+; CHECK-P8-NEXT:    extsb r6, r6
+; CHECK-P8-NEXT:    mtvsrwa f1, r7
+; CHECK-P8-NEXT:    rldicl r7, r4, 40, 56
+; CHECK-P8-NEXT:    mtvsrwa f2, r6
+; CHECK-P8-NEXT:    rldicl r6, r4, 32, 56
+; CHECK-P8-NEXT:    extsb r7, r7
+; CHECK-P8-NEXT:    extsb r6, r6
+; CHECK-P8-NEXT:    mtvsrwa f3, r7
+; CHECK-P8-NEXT:    rldicl r7, r4, 16, 56
+; CHECK-P8-NEXT:    mtvsrwa f4, r6
+; CHECK-P8-NEXT:    rldicl r6, r4, 24, 56
+; CHECK-P8-NEXT:    rldicl r4, r4, 8, 56
+; CHECK-P8-NEXT:    extsb r7, r7
+; CHECK-P8-NEXT:    extsb r6, r6
+; CHECK-P8-NEXT:    extsb r4, r4
+; CHECK-P8-NEXT:    mtvsrwa f5, r7
+; CHECK-P8-NEXT:    mtvsrwa f6, r6
+; CHECK-P8-NEXT:    mtvsrwa f7, r4
+; CHECK-P8-NEXT:    xscvsxdsp f0, f0
+; CHECK-P8-NEXT:    xscvsxdsp f1, f1
+; CHECK-P8-NEXT:    xscvsxdsp f2, f2
+; CHECK-P8-NEXT:    xscvsxdsp f3, f3
+; CHECK-P8-NEXT:    xscvsxdsp f4, f4
+; CHECK-P8-NEXT:    xscvsxdsp f5, f5
+; CHECK-P8-NEXT:    xscvsxdsp f6, f6
+; CHECK-P8-NEXT:    xscvsxdsp f7, f7
+; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P8-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-P8-NEXT:    xvcvdpsp v2, vs0
+; CHECK-P8-NEXT:    xxmrghd vs2, vs5, vs4
+; CHECK-P8-NEXT:    xvcvdpsp v3, vs1
+; CHECK-P8-NEXT:    xxmrghd vs3, vs7, vs6
+; CHECK-P8-NEXT:    xvcvdpsp v4, vs2
+; CHECK-P8-NEXT:    xvcvdpsp v5, vs3
+; CHECK-P8-NEXT:    vmrgew v2, v3, v2
+; CHECK-P8-NEXT:    vmrgew v3, v5, v4
+; CHECK-P8-NEXT:    stvx v2, 0, r3
+; CHECK-P8-NEXT:    stvx v3, r3, r5
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test8elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    mtvsrd f0, r4
+; CHECK-P9-NEXT:    li r4, 0
+; CHECK-P9-NEXT:    li r5, 2
+; CHECK-P9-NEXT:    li r6, 1
+; CHECK-P9-NEXT:    li r7, 3
+; CHECK-P9-NEXT:    li r8, 4
+; CHECK-P9-NEXT:    li r9, 6
+; CHECK-P9-NEXT:    li r10, 5
+; CHECK-P9-NEXT:    li r11, 7
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    vextubrx r4, r4, v2
+; CHECK-P9-NEXT:    vextubrx r5, r5, v2
+; CHECK-P9-NEXT:    vextubrx r6, r6, v2
+; CHECK-P9-NEXT:    vextubrx r7, r7, v2
+; CHECK-P9-NEXT:    vextubrx r8, r8, v2
+; CHECK-P9-NEXT:    vextubrx r9, r9, v2
+; CHECK-P9-NEXT:    vextubrx r10, r10, v2
+; CHECK-P9-NEXT:    vextubrx r11, r11, v2
+; CHECK-P9-NEXT:    extsb r4, r4
+; CHECK-P9-NEXT:    extsb r5, r5
+; CHECK-P9-NEXT:    extsb r6, r6
+; CHECK-P9-NEXT:    extsb r7, r7
+; CHECK-P9-NEXT:    extsb r8, r8
+; CHECK-P9-NEXT:    extsb r9, r9
+; CHECK-P9-NEXT:    extsb r10, r10
+; CHECK-P9-NEXT:    extsb r11, r11
+; CHECK-P9-NEXT:    mtvsrwa f0, r4
+; CHECK-P9-NEXT:    mtvsrwa f1, r5
+; CHECK-P9-NEXT:    mtvsrwa f2, r6
+; CHECK-P9-NEXT:    mtvsrwa f3, r7
+; CHECK-P9-NEXT:    mtvsrwa f4, r8
+; CHECK-P9-NEXT:    mtvsrwa f5, r9
+; CHECK-P9-NEXT:    mtvsrwa f6, r10
+; CHECK-P9-NEXT:    mtvsrwa f7, r11
+; CHECK-P9-NEXT:    xscvsxdsp f0, f0
+; CHECK-P9-NEXT:    xscvsxdsp f1, f1
+; CHECK-P9-NEXT:    xscvsxdsp f2, f2
+; CHECK-P9-NEXT:    xscvsxdsp f3, f3
+; CHECK-P9-NEXT:    xscvsxdsp f4, f4
+; CHECK-P9-NEXT:    xscvsxdsp f5, f5
+; CHECK-P9-NEXT:    xscvsxdsp f6, f6
+; CHECK-P9-NEXT:    xscvsxdsp f7, f7
+; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P9-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-P9-NEXT:    xxmrghd vs2, vs5, vs4
+; CHECK-P9-NEXT:    xxmrghd vs3, vs7, vs6
+; CHECK-P9-NEXT:    xvcvdpsp v2, vs0
+; CHECK-P9-NEXT:    xvcvdpsp v3, vs1
+; CHECK-P9-NEXT:    xvcvdpsp v4, vs2
+; CHECK-P9-NEXT:    xvcvdpsp v5, vs3
+; CHECK-P9-NEXT:    vmrgew v2, v3, v2
+; CHECK-P9-NEXT:    vmrgew v3, v5, v4
+; CHECK-P9-NEXT:    stxv v3, 16(r3)
+; CHECK-P9-NEXT:    stxv v2, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test8elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    li r5, 3
+; CHECK-BE-NEXT:    mtvsrd v2, r4
+; CHECK-BE-NEXT:    li r4, 1
+; CHECK-BE-NEXT:    li r6, 2
+; CHECK-BE-NEXT:    li r7, 0
+; CHECK-BE-NEXT:    li r8, 7
+; CHECK-BE-NEXT:    li r9, 5
+; CHECK-BE-NEXT:    li r10, 6
+; CHECK-BE-NEXT:    li r11, 4
+; CHECK-BE-NEXT:    vextublx r5, r5, v2
+; CHECK-BE-NEXT:    vextublx r4, r4, v2
+; CHECK-BE-NEXT:    vextublx r6, r6, v2
+; CHECK-BE-NEXT:    vextublx r7, r7, v2
+; CHECK-BE-NEXT:    vextublx r8, r8, v2
+; CHECK-BE-NEXT:    vextublx r9, r9, v2
+; CHECK-BE-NEXT:    vextublx r10, r10, v2
+; CHECK-BE-NEXT:    vextublx r11, r11, v2
+; CHECK-BE-NEXT:    extsb r5, r5
+; CHECK-BE-NEXT:    extsb r4, r4
+; CHECK-BE-NEXT:    extsb r6, r6
+; CHECK-BE-NEXT:    extsb r7, r7
+; CHECK-BE-NEXT:    extsb r8, r8
+; CHECK-BE-NEXT:    extsb r9, r9
+; CHECK-BE-NEXT:    extsb r10, r10
+; CHECK-BE-NEXT:    extsb r11, r11
+; CHECK-BE-NEXT:    mtvsrwa f0, r5
+; CHECK-BE-NEXT:    mtvsrwa f1, r4
+; CHECK-BE-NEXT:    mtvsrwa f2, r6
+; CHECK-BE-NEXT:    mtvsrwa f3, r7
+; CHECK-BE-NEXT:    mtvsrwa f4, r8
+; CHECK-BE-NEXT:    mtvsrwa f5, r9
+; CHECK-BE-NEXT:    mtvsrwa f6, r10
+; CHECK-BE-NEXT:    mtvsrwa f7, r11
+; CHECK-BE-NEXT:    xscvsxdsp f0, f0
+; CHECK-BE-NEXT:    xscvsxdsp f1, f1
+; CHECK-BE-NEXT:    xscvsxdsp f2, f2
+; CHECK-BE-NEXT:    xscvsxdsp f3, f3
+; CHECK-BE-NEXT:    xscvsxdsp f4, f4
+; CHECK-BE-NEXT:    xscvsxdsp f5, f5
+; CHECK-BE-NEXT:    xscvsxdsp f6, f6
+; CHECK-BE-NEXT:    xscvsxdsp f7, f7
+; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-BE-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-BE-NEXT:    xxmrghd vs2, vs5, vs4
+; CHECK-BE-NEXT:    xxmrghd vs3, vs7, vs6
+; CHECK-BE-NEXT:    xvcvdpsp v2, vs0
+; CHECK-BE-NEXT:    xvcvdpsp v3, vs1
+; CHECK-BE-NEXT:    xvcvdpsp v4, vs2
+; CHECK-BE-NEXT:    xvcvdpsp v5, vs3
+; CHECK-BE-NEXT:    vmrgew v2, v3, v2
+; CHECK-BE-NEXT:    vmrgew v3, v5, v4
+; CHECK-BE-NEXT:    stxv v2, 0(r3)
+; CHECK-BE-NEXT:    stxv v3, 16(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = bitcast i64 %a.coerce to <8 x i8>
+  %1 = sitofp <8 x i8> %0 to <8 x float>
+  store <8 x float> %1, <8 x float>* %agg.result, align 32
+  ret void
+}
+
+define void @test16elt_signed(<16 x float>* noalias nocapture sret %agg.result, <16 x i8> %a) local_unnamed_addr #3 {
+; CHECK-P8-LABEL: test16elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mfvsrd r4, v2
+; CHECK-P8-NEXT:    xxswapd vs2, v2
+; CHECK-P8-NEXT:    clrldi r5, r4, 56
+; CHECK-P8-NEXT:    rldicl r6, r4, 48, 56
+; CHECK-P8-NEXT:    extsb r5, r5
+; CHECK-P8-NEXT:    extsb r6, r6
+; CHECK-P8-NEXT:    mtvsrwa f0, r5
+; CHECK-P8-NEXT:    rldicl r5, r4, 40, 56
+; CHECK-P8-NEXT:    rldicl r7, r4, 56, 56
+; CHECK-P8-NEXT:    mtvsrwa f1, r6
+; CHECK-P8-NEXT:    rldicl r6, r4, 32, 56
+; CHECK-P8-NEXT:    extsb r5, r5
+; CHECK-P8-NEXT:    extsb r7, r7
+; CHECK-P8-NEXT:    extsb r6, r6
+; CHECK-P8-NEXT:    mtvsrwa f4, r5
+; CHECK-P8-NEXT:    rldicl r5, r4, 16, 56
+; CHECK-P8-NEXT:    mtvsrwa f3, r7
+; CHECK-P8-NEXT:    extsb r5, r5
+; CHECK-P8-NEXT:    mtvsrwa f5, r6
+; CHECK-P8-NEXT:    rldicl r6, r4, 24, 56
+; CHECK-P8-NEXT:    rldicl r4, r4, 8, 56
+; CHECK-P8-NEXT:    mfvsrd r7, f2
+; CHECK-P8-NEXT:    extsb r4, r4
+; CHECK-P8-NEXT:    mtvsrwa f2, r5
+; CHECK-P8-NEXT:    extsb r5, r6
+; CHECK-P8-NEXT:    mtvsrwa f6, r5
+; CHECK-P8-NEXT:    clrldi r5, r7, 56
+; CHECK-P8-NEXT:    mtvsrwa f7, r4
+; CHECK-P8-NEXT:    rldicl r4, r7, 48, 56
+; CHECK-P8-NEXT:    extsb r5, r5
+; CHECK-P8-NEXT:    mtvsrwa f8, r5
+; CHECK-P8-NEXT:    rldicl r5, r7, 56, 56
+; CHECK-P8-NEXT:    extsb r4, r4
+; CHECK-P8-NEXT:    mtvsrwa f9, r4
+; CHECK-P8-NEXT:    extsb r4, r5
+; CHECK-P8-NEXT:    rldicl r5, r7, 8, 56
+; CHECK-P8-NEXT:    mtvsrwa f10, r4
+; CHECK-P8-NEXT:    rldicl r4, r7, 40, 56
+; CHECK-P8-NEXT:    extsb r5, r5
+; CHECK-P8-NEXT:    extsb r4, r4
+; CHECK-P8-NEXT:    xscvsxdsp f0, f0
+; CHECK-P8-NEXT:    mtvsrwa f11, r4
+; CHECK-P8-NEXT:    rldicl r4, r7, 32, 56
+; CHECK-P8-NEXT:    extsb r4, r4
+; CHECK-P8-NEXT:    xscvsxdsp f1, f1
+; CHECK-P8-NEXT:    xscvsxdsp f3, f3
+; CHECK-P8-NEXT:    xscvsxdsp f4, f4
+; CHECK-P8-NEXT:    mtvsrwa f12, r4
+; CHECK-P8-NEXT:    rldicl r4, r7, 16, 56
+; CHECK-P8-NEXT:    extsb r4, r4
+; CHECK-P8-NEXT:    xscvsxdsp f5, f5
+; CHECK-P8-NEXT:    mtvsrwa f13, r4
+; CHECK-P8-NEXT:    rldicl r4, r7, 24, 56
+; CHECK-P8-NEXT:    xscvsxdsp f2, f2
+; CHECK-P8-NEXT:    extsb r4, r4
+; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P8-NEXT:    mtvsrwa v2, r4
+; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    mtvsrwa v3, r5
+; CHECK-P8-NEXT:    xxmrghd vs3, vs4, vs3
+; CHECK-P8-NEXT:    li r5, 32
+; CHECK-P8-NEXT:    xscvsxdsp f6, f6
+; CHECK-P8-NEXT:    xscvsxdsp f7, f7
+; CHECK-P8-NEXT:    xscvsxdsp f8, f8
+; CHECK-P8-NEXT:    xscvsxdsp f9, f9
+; CHECK-P8-NEXT:    xxmrghd vs2, vs2, vs5
+; CHECK-P8-NEXT:    xscvsxdsp f10, f10
+; CHECK-P8-NEXT:    xscvsxdsp f11, f11
+; CHECK-P8-NEXT:    xscvsxdsp f12, f12
+; CHECK-P8-NEXT:    xscvsxdsp f13, f13
+; CHECK-P8-NEXT:    xxmrghd vs5, vs7, vs6
+; CHECK-P8-NEXT:    xscvsxdsp f1, v2
+; CHECK-P8-NEXT:    xscvsxdsp f4, v3
+; CHECK-P8-NEXT:    xvcvdpsp v2, vs0
+; CHECK-P8-NEXT:    xxmrghd vs0, vs9, vs8
+; CHECK-P8-NEXT:    xvcvdpsp v3, vs3
+; CHECK-P8-NEXT:    xxmrghd vs3, vs11, vs10
+; CHECK-P8-NEXT:    xvcvdpsp v4, vs2
+; CHECK-P8-NEXT:    xxmrghd vs2, vs13, vs12
+; CHECK-P8-NEXT:    xvcvdpsp v5, vs5
+; CHECK-P8-NEXT:    xvcvdpsp v0, vs0
+; CHECK-P8-NEXT:    xxmrghd vs1, vs4, vs1
+; CHECK-P8-NEXT:    xvcvdpsp v1, vs3
+; CHECK-P8-NEXT:    xvcvdpsp v6, vs2
+; CHECK-P8-NEXT:    vmrgew v2, v3, v2
+; CHECK-P8-NEXT:    xvcvdpsp v7, vs1
+; CHECK-P8-NEXT:    vmrgew v3, v5, v4
+; CHECK-P8-NEXT:    vmrgew v4, v1, v0
+; CHECK-P8-NEXT:    stvx v2, r3, r5
+; CHECK-P8-NEXT:    vmrgew v5, v7, v6
+; CHECK-P8-NEXT:    stvx v3, r3, r4
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    stvx v4, 0, r3
+; CHECK-P8-NEXT:    stvx v5, r3, r4
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test16elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    std r25, -72(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r26, -64(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r27, -56(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r28, -48(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r29, -40(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    li r4, 0
+; CHECK-P9-NEXT:    std r30, -32(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    li r5, 2
+; CHECK-P9-NEXT:    li r6, 1
+; CHECK-P9-NEXT:    li r7, 3
+; CHECK-P9-NEXT:    li r8, 4
+; CHECK-P9-NEXT:    li r9, 6
+; CHECK-P9-NEXT:    li r10, 5
+; CHECK-P9-NEXT:    li r11, 7
+; CHECK-P9-NEXT:    li r12, 8
+; CHECK-P9-NEXT:    li r0, 10
+; CHECK-P9-NEXT:    li r30, 9
+; CHECK-P9-NEXT:    li r29, 11
+; CHECK-P9-NEXT:    li r28, 12
+; CHECK-P9-NEXT:    li r27, 14
+; CHECK-P9-NEXT:    li r26, 13
+; CHECK-P9-NEXT:    li r25, 15
+; CHECK-P9-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    vextubrx r4, r4, v2
+; CHECK-P9-NEXT:    vextubrx r5, r5, v2
+; CHECK-P9-NEXT:    vextubrx r6, r6, v2
+; CHECK-P9-NEXT:    vextubrx r7, r7, v2
+; CHECK-P9-NEXT:    vextubrx r8, r8, v2
+; CHECK-P9-NEXT:    vextubrx r9, r9, v2
+; CHECK-P9-NEXT:    vextubrx r10, r10, v2
+; CHECK-P9-NEXT:    vextubrx r11, r11, v2
+; CHECK-P9-NEXT:    vextubrx r12, r12, v2
+; CHECK-P9-NEXT:    vextubrx r0, r0, v2
+; CHECK-P9-NEXT:    vextubrx r30, r30, v2
+; CHECK-P9-NEXT:    vextubrx r29, r29, v2
+; CHECK-P9-NEXT:    vextubrx r28, r28, v2
+; CHECK-P9-NEXT:    vextubrx r27, r27, v2
+; CHECK-P9-NEXT:    vextubrx r26, r26, v2
+; CHECK-P9-NEXT:    vextubrx r25, r25, v2
+; CHECK-P9-NEXT:    extsb r4, r4
+; CHECK-P9-NEXT:    extsb r5, r5
+; CHECK-P9-NEXT:    extsb r6, r6
+; CHECK-P9-NEXT:    extsb r7, r7
+; CHECK-P9-NEXT:    extsb r8, r8
+; CHECK-P9-NEXT:    extsb r9, r9
+; CHECK-P9-NEXT:    extsb r10, r10
+; CHECK-P9-NEXT:    extsb r11, r11
+; CHECK-P9-NEXT:    extsb r12, r12
+; CHECK-P9-NEXT:    extsb r0, r0
+; CHECK-P9-NEXT:    extsb r30, r30
+; CHECK-P9-NEXT:    extsb r29, r29
+; CHECK-P9-NEXT:    extsb r28, r28
+; CHECK-P9-NEXT:    extsb r27, r27
+; CHECK-P9-NEXT:    extsb r26, r26
+; CHECK-P9-NEXT:    extsb r25, r25
+; CHECK-P9-NEXT:    mtvsrwa f0, r4
+; CHECK-P9-NEXT:    mtvsrwa f1, r5
+; CHECK-P9-NEXT:    mtvsrwa f2, r6
+; CHECK-P9-NEXT:    mtvsrwa f3, r7
+; CHECK-P9-NEXT:    mtvsrwa f4, r8
+; CHECK-P9-NEXT:    mtvsrwa f5, r9
+; CHECK-P9-NEXT:    mtvsrwa f6, r10
+; CHECK-P9-NEXT:    mtvsrwa f7, r11
+; CHECK-P9-NEXT:    mtvsrwa f8, r12
+; CHECK-P9-NEXT:    mtvsrwa f9, r0
+; CHECK-P9-NEXT:    mtvsrwa f10, r30
+; CHECK-P9-NEXT:    ld r30, -32(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrwa f11, r29
+; CHECK-P9-NEXT:    ld r29, -40(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrwa f12, r28
+; CHECK-P9-NEXT:    ld r28, -48(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrwa f13, r27
+; CHECK-P9-NEXT:    ld r27, -56(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrwa v2, r26
+; CHECK-P9-NEXT:    ld r26, -64(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrwa v3, r25
+; CHECK-P9-NEXT:    ld r25, -72(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    xscvsxdsp f0, f0
+; CHECK-P9-NEXT:    xscvsxdsp f1, f1
+; CHECK-P9-NEXT:    xscvsxdsp f2, f2
+; CHECK-P9-NEXT:    xscvsxdsp f3, f3
+; CHECK-P9-NEXT:    xscvsxdsp f4, f4
+; CHECK-P9-NEXT:    xscvsxdsp f5, f5
+; CHECK-P9-NEXT:    xscvsxdsp f6, f6
+; CHECK-P9-NEXT:    xscvsxdsp f7, f7
+; CHECK-P9-NEXT:    xscvsxdsp f8, f8
+; CHECK-P9-NEXT:    xscvsxdsp f9, f9
+; CHECK-P9-NEXT:    xscvsxdsp f10, f10
+; CHECK-P9-NEXT:    xscvsxdsp f11, f11
+; CHECK-P9-NEXT:    xscvsxdsp f12, f12
+; CHECK-P9-NEXT:    xscvsxdsp f13, f13
+; CHECK-P9-NEXT:    xscvsxdsp f31, v2
+; CHECK-P9-NEXT:    xscvsxdsp f30, v3
+; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P9-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-P9-NEXT:    xxmrghd vs2, vs5, vs4
+; CHECK-P9-NEXT:    xxmrghd vs3, vs7, vs6
+; CHECK-P9-NEXT:    xxmrghd vs4, vs9, vs8
+; CHECK-P9-NEXT:    xxmrghd vs5, vs11, vs10
+; CHECK-P9-NEXT:    xxmrghd vs6, vs13, vs12
+; CHECK-P9-NEXT:    xxmrghd vs7, vs30, vs31
+; CHECK-P9-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    xvcvdpsp v2, vs0
+; CHECK-P9-NEXT:    xvcvdpsp v3, vs1
+; CHECK-P9-NEXT:    xvcvdpsp v4, vs2
+; CHECK-P9-NEXT:    xvcvdpsp v5, vs3
+; CHECK-P9-NEXT:    xvcvdpsp v0, vs4
+; CHECK-P9-NEXT:    xvcvdpsp v1, vs5
+; CHECK-P9-NEXT:    xvcvdpsp v6, vs6
+; CHECK-P9-NEXT:    xvcvdpsp v7, vs7
+; CHECK-P9-NEXT:    vmrgew v2, v3, v2
+; CHECK-P9-NEXT:    vmrgew v3, v5, v4
+; CHECK-P9-NEXT:    vmrgew v4, v1, v0
+; CHECK-P9-NEXT:    vmrgew v5, v7, v6
+; CHECK-P9-NEXT:    stxv v3, 16(r3)
+; CHECK-P9-NEXT:    stxv v2, 0(r3)
+; CHECK-P9-NEXT:    stxv v5, 48(r3)
+; CHECK-P9-NEXT:    stxv v4, 32(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test16elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    std r25, -72(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r26, -64(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r27, -56(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r28, -48(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r29, -40(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    li r4, 3
+; CHECK-BE-NEXT:    std r30, -32(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    li r5, 1
+; CHECK-BE-NEXT:    li r6, 2
+; CHECK-BE-NEXT:    li r7, 0
+; CHECK-BE-NEXT:    li r8, 7
+; CHECK-BE-NEXT:    li r9, 5
+; CHECK-BE-NEXT:    li r10, 6
+; CHECK-BE-NEXT:    li r11, 4
+; CHECK-BE-NEXT:    li r12, 11
+; CHECK-BE-NEXT:    li r0, 9
+; CHECK-BE-NEXT:    li r30, 10
+; CHECK-BE-NEXT:    li r29, 8
+; CHECK-BE-NEXT:    li r28, 15
+; CHECK-BE-NEXT:    li r27, 13
+; CHECK-BE-NEXT:    li r26, 14
+; CHECK-BE-NEXT:    li r25, 12
+; CHECK-BE-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    vextublx r4, r4, v2
+; CHECK-BE-NEXT:    vextublx r5, r5, v2
+; CHECK-BE-NEXT:    vextublx r6, r6, v2
+; CHECK-BE-NEXT:    vextublx r7, r7, v2
+; CHECK-BE-NEXT:    vextublx r8, r8, v2
+; CHECK-BE-NEXT:    vextublx r9, r9, v2
+; CHECK-BE-NEXT:    vextublx r10, r10, v2
+; CHECK-BE-NEXT:    vextublx r11, r11, v2
+; CHECK-BE-NEXT:    vextublx r12, r12, v2
+; CHECK-BE-NEXT:    vextublx r0, r0, v2
+; CHECK-BE-NEXT:    vextublx r30, r30, v2
+; CHECK-BE-NEXT:    vextublx r29, r29, v2
+; CHECK-BE-NEXT:    vextublx r28, r28, v2
+; CHECK-BE-NEXT:    vextublx r27, r27, v2
+; CHECK-BE-NEXT:    vextublx r26, r26, v2
+; CHECK-BE-NEXT:    vextublx r25, r25, v2
+; CHECK-BE-NEXT:    extsb r4, r4
+; CHECK-BE-NEXT:    extsb r5, r5
+; CHECK-BE-NEXT:    extsb r6, r6
+; CHECK-BE-NEXT:    extsb r7, r7
+; CHECK-BE-NEXT:    extsb r8, r8
+; CHECK-BE-NEXT:    extsb r9, r9
+; CHECK-BE-NEXT:    extsb r10, r10
+; CHECK-BE-NEXT:    extsb r11, r11
+; CHECK-BE-NEXT:    extsb r12, r12
+; CHECK-BE-NEXT:    extsb r0, r0
+; CHECK-BE-NEXT:    extsb r30, r30
+; CHECK-BE-NEXT:    extsb r29, r29
+; CHECK-BE-NEXT:    extsb r28, r28
+; CHECK-BE-NEXT:    extsb r27, r27
+; CHECK-BE-NEXT:    extsb r26, r26
+; CHECK-BE-NEXT:    extsb r25, r25
+; CHECK-BE-NEXT:    mtvsrwa f0, r4
+; CHECK-BE-NEXT:    mtvsrwa f1, r5
+; CHECK-BE-NEXT:    mtvsrwa f2, r6
+; CHECK-BE-NEXT:    mtvsrwa f3, r7
+; CHECK-BE-NEXT:    mtvsrwa f4, r8
+; CHECK-BE-NEXT:    mtvsrwa f5, r9
+; CHECK-BE-NEXT:    mtvsrwa f6, r10
+; CHECK-BE-NEXT:    mtvsrwa f7, r11
+; CHECK-BE-NEXT:    mtvsrwa f8, r12
+; CHECK-BE-NEXT:    mtvsrwa f9, r0
+; CHECK-BE-NEXT:    mtvsrwa f10, r30
+; CHECK-BE-NEXT:    ld r30, -32(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrwa f11, r29
+; CHECK-BE-NEXT:    ld r29, -40(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrwa f12, r28
+; CHECK-BE-NEXT:    ld r28, -48(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrwa f13, r27
+; CHECK-BE-NEXT:    ld r27, -56(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrwa v2, r26
+; CHECK-BE-NEXT:    ld r26, -64(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrwa v3, r25
+; CHECK-BE-NEXT:    ld r25, -72(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    xscvsxdsp f0, f0
+; CHECK-BE-NEXT:    xscvsxdsp f1, f1
+; CHECK-BE-NEXT:    xscvsxdsp f2, f2
+; CHECK-BE-NEXT:    xscvsxdsp f3, f3
+; CHECK-BE-NEXT:    xscvsxdsp f4, f4
+; CHECK-BE-NEXT:    xscvsxdsp f5, f5
+; CHECK-BE-NEXT:    xscvsxdsp f6, f6
+; CHECK-BE-NEXT:    xscvsxdsp f7, f7
+; CHECK-BE-NEXT:    xscvsxdsp f8, f8
+; CHECK-BE-NEXT:    xscvsxdsp f9, f9
+; CHECK-BE-NEXT:    xscvsxdsp f10, f10
+; CHECK-BE-NEXT:    xscvsxdsp f11, f11
+; CHECK-BE-NEXT:    xscvsxdsp f12, f12
+; CHECK-BE-NEXT:    xscvsxdsp f13, f13
+; CHECK-BE-NEXT:    xscvsxdsp f31, v2
+; CHECK-BE-NEXT:    xscvsxdsp f30, v3
+; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-BE-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-BE-NEXT:    xxmrghd vs2, vs5, vs4
+; CHECK-BE-NEXT:    xxmrghd vs3, vs7, vs6
+; CHECK-BE-NEXT:    xxmrghd vs4, vs9, vs8
+; CHECK-BE-NEXT:    xxmrghd vs5, vs11, vs10
+; CHECK-BE-NEXT:    xxmrghd vs6, vs13, vs12
+; CHECK-BE-NEXT:    xxmrghd vs7, vs30, vs31
+; CHECK-BE-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    xvcvdpsp v2, vs0
+; CHECK-BE-NEXT:    xvcvdpsp v3, vs1
+; CHECK-BE-NEXT:    xvcvdpsp v4, vs2
+; CHECK-BE-NEXT:    xvcvdpsp v5, vs3
+; CHECK-BE-NEXT:    xvcvdpsp v0, vs4
+; CHECK-BE-NEXT:    xvcvdpsp v1, vs5
+; CHECK-BE-NEXT:    xvcvdpsp v6, vs6
+; CHECK-BE-NEXT:    xvcvdpsp v7, vs7
+; CHECK-BE-NEXT:    vmrgew v2, v3, v2
+; CHECK-BE-NEXT:    vmrgew v3, v5, v4
+; CHECK-BE-NEXT:    vmrgew v4, v1, v0
+; CHECK-BE-NEXT:    vmrgew v5, v7, v6
+; CHECK-BE-NEXT:    stxv v3, 16(r3)
+; CHECK-BE-NEXT:    stxv v2, 0(r3)
+; CHECK-BE-NEXT:    stxv v5, 48(r3)
+; CHECK-BE-NEXT:    stxv v4, 32(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = sitofp <16 x i8> %a to <16 x float>
+  store <16 x float> %0, <16 x float>* %agg.result, align 64
+  ret void
+}

Added: llvm/trunk/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll?rev=347090&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll Fri Nov 16 12:24:10 2018
@@ -0,0 +1,1322 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:     -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-P8
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-P9
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-BE
+
+define <2 x double> @test2elt(i16 %a.coerce) local_unnamed_addr #0 {
+; CHECK-P8-LABEL: test2elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    clrldi r4, r3, 56
+; CHECK-P8-NEXT:    rldicl r3, r3, 56, 56
+; CHECK-P8-NEXT:    rlwinm r4, r4, 0, 24, 31
+; CHECK-P8-NEXT:    rlwinm r3, r3, 0, 24, 31
+; CHECK-P8-NEXT:    mtvsrwz f0, r4
+; CHECK-P8-NEXT:    mtvsrwz f1, r3
+; CHECK-P8-NEXT:    xscvuxddp f0, f0
+; CHECK-P8-NEXT:    xscvuxddp f1, f1
+; CHECK-P8-NEXT:    xxmrghd v2, vs1, vs0
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test2elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    mtvsrws v2, r3
+; CHECK-P9-NEXT:    li r3, 0
+; CHECK-P9-NEXT:    li r4, 1
+; CHECK-P9-NEXT:    vextubrx r3, r3, v2
+; CHECK-P9-NEXT:    vextubrx r4, r4, v2
+; CHECK-P9-NEXT:    rlwinm r3, r3, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r4, r4, 0, 24, 31
+; CHECK-P9-NEXT:    mtvsrwz f0, r3
+; CHECK-P9-NEXT:    mtvsrwz f1, r4
+; CHECK-P9-NEXT:    xscvuxddp f0, f0
+; CHECK-P9-NEXT:    xscvuxddp f1, f1
+; CHECK-P9-NEXT:    xxmrghd v2, vs1, vs0
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test2elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    mtvsrws v2, r3
+; CHECK-BE-NEXT:    li r3, 1
+; CHECK-BE-NEXT:    li r4, 0
+; CHECK-BE-NEXT:    vextublx r3, r3, v2
+; CHECK-BE-NEXT:    vextublx r4, r4, v2
+; CHECK-BE-NEXT:    rlwinm r3, r3, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r4, r4, 0, 24, 31
+; CHECK-BE-NEXT:    mtvsrwz f0, r3
+; CHECK-BE-NEXT:    mtvsrwz f1, r4
+; CHECK-BE-NEXT:    xscvuxddp f0, f0
+; CHECK-BE-NEXT:    xscvuxddp f1, f1
+; CHECK-BE-NEXT:    xxmrghd v2, vs1, vs0
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = bitcast i16 %a.coerce to <2 x i8>
+  %1 = uitofp <2 x i8> %0 to <2 x double>
+  ret <2 x double> %1
+}
+
+define void @test4elt(<4 x double>* noalias nocapture sret %agg.result, i32 %a.coerce) local_unnamed_addr #1 {
+; CHECK-P8-LABEL: test4elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mtvsrd f0, r4
+; CHECK-P8-NEXT:    mfvsrd r4, f0
+; CHECK-P8-NEXT:    clrldi r5, r4, 56
+; CHECK-P8-NEXT:    rldicl r6, r4, 56, 56
+; CHECK-P8-NEXT:    rlwinm r5, r5, 0, 24, 31
+; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 24, 31
+; CHECK-P8-NEXT:    mtvsrwz f0, r5
+; CHECK-P8-NEXT:    rldicl r5, r4, 48, 56
+; CHECK-P8-NEXT:    rldicl r4, r4, 40, 56
+; CHECK-P8-NEXT:    rlwinm r5, r5, 0, 24, 31
+; CHECK-P8-NEXT:    rlwinm r4, r4, 0, 24, 31
+; CHECK-P8-NEXT:    mtvsrwz f1, r6
+; CHECK-P8-NEXT:    mtvsrwz f2, r5
+; CHECK-P8-NEXT:    mtvsrwz f3, r4
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    xscvuxddp f0, f0
+; CHECK-P8-NEXT:    xscvuxddp f1, f1
+; CHECK-P8-NEXT:    xscvuxddp f2, f2
+; CHECK-P8-NEXT:    xscvuxddp f3, f3
+; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P8-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test4elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    mtvsrws v2, r4
+; CHECK-P9-NEXT:    li r4, 0
+; CHECK-P9-NEXT:    li r5, 1
+; CHECK-P9-NEXT:    li r6, 2
+; CHECK-P9-NEXT:    li r7, 3
+; CHECK-P9-NEXT:    vextubrx r4, r4, v2
+; CHECK-P9-NEXT:    vextubrx r5, r5, v2
+; CHECK-P9-NEXT:    vextubrx r6, r6, v2
+; CHECK-P9-NEXT:    vextubrx r7, r7, v2
+; CHECK-P9-NEXT:    rlwinm r4, r4, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r5, r5, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r6, r6, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r7, r7, 0, 24, 31
+; CHECK-P9-NEXT:    mtvsrwz f0, r4
+; CHECK-P9-NEXT:    mtvsrwz f1, r5
+; CHECK-P9-NEXT:    mtvsrwz f2, r6
+; CHECK-P9-NEXT:    mtvsrwz f3, r7
+; CHECK-P9-NEXT:    xscvuxddp f0, f0
+; CHECK-P9-NEXT:    xscvuxddp f1, f1
+; CHECK-P9-NEXT:    xscvuxddp f2, f2
+; CHECK-P9-NEXT:    xscvuxddp f3, f3
+; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P9-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test4elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    mtvsrws v2, r4
+; CHECK-BE-NEXT:    li r4, 1
+; CHECK-BE-NEXT:    li r5, 0
+; CHECK-BE-NEXT:    li r6, 3
+; CHECK-BE-NEXT:    li r7, 2
+; CHECK-BE-NEXT:    vextublx r4, r4, v2
+; CHECK-BE-NEXT:    vextublx r5, r5, v2
+; CHECK-BE-NEXT:    vextublx r6, r6, v2
+; CHECK-BE-NEXT:    vextublx r7, r7, v2
+; CHECK-BE-NEXT:    rlwinm r4, r4, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r5, r5, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r6, r6, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r7, r7, 0, 24, 31
+; CHECK-BE-NEXT:    mtvsrwz f0, r4
+; CHECK-BE-NEXT:    mtvsrwz f1, r5
+; CHECK-BE-NEXT:    mtvsrwz f2, r6
+; CHECK-BE-NEXT:    mtvsrwz f3, r7
+; CHECK-BE-NEXT:    xscvuxddp f0, f0
+; CHECK-BE-NEXT:    xscvuxddp f1, f1
+; CHECK-BE-NEXT:    xscvuxddp f2, f2
+; CHECK-BE-NEXT:    xscvuxddp f3, f3
+; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-BE-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = bitcast i32 %a.coerce to <4 x i8>
+  %1 = uitofp <4 x i8> %0 to <4 x double>
+  store <4 x double> %1, <4 x double>* %agg.result, align 32
+  ret void
+}
+
+define void @test8elt(<8 x double>* noalias nocapture sret %agg.result, i64 %a.coerce) local_unnamed_addr #1 {
+; CHECK-P8-LABEL: test8elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mtvsrd f0, r4
+; CHECK-P8-NEXT:    mfvsrd r4, f0
+; CHECK-P8-NEXT:    clrldi r5, r4, 56
+; CHECK-P8-NEXT:    rldicl r6, r4, 56, 56
+; CHECK-P8-NEXT:    rlwinm r5, r5, 0, 24, 31
+; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 24, 31
+; CHECK-P8-NEXT:    mtvsrwz f0, r5
+; CHECK-P8-NEXT:    rldicl r5, r4, 48, 56
+; CHECK-P8-NEXT:    rlwinm r5, r5, 0, 24, 31
+; CHECK-P8-NEXT:    mtvsrwz f1, r6
+; CHECK-P8-NEXT:    rldicl r6, r4, 40, 56
+; CHECK-P8-NEXT:    mtvsrwz f2, r5
+; CHECK-P8-NEXT:    rldicl r5, r4, 32, 56
+; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 24, 31
+; CHECK-P8-NEXT:    rlwinm r5, r5, 0, 24, 31
+; CHECK-P8-NEXT:    mtvsrwz f3, r6
+; CHECK-P8-NEXT:    rldicl r6, r4, 24, 56
+; CHECK-P8-NEXT:    mtvsrwz f4, r5
+; CHECK-P8-NEXT:    rldicl r5, r4, 16, 56
+; CHECK-P8-NEXT:    rldicl r4, r4, 8, 56
+; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 24, 31
+; CHECK-P8-NEXT:    rlwinm r5, r5, 0, 24, 31
+; CHECK-P8-NEXT:    rlwinm r4, r4, 0, 24, 31
+; CHECK-P8-NEXT:    mtvsrwz f5, r6
+; CHECK-P8-NEXT:    mtvsrwz f6, r5
+; CHECK-P8-NEXT:    li r5, 32
+; CHECK-P8-NEXT:    mtvsrwz f7, r4
+; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    xscvuxddp f4, f4
+; CHECK-P8-NEXT:    xscvuxddp f5, f5
+; CHECK-P8-NEXT:    xscvuxddp f6, f6
+; CHECK-P8-NEXT:    xscvuxddp f7, f7
+; CHECK-P8-NEXT:    xscvuxddp f0, f0
+; CHECK-P8-NEXT:    xscvuxddp f1, f1
+; CHECK-P8-NEXT:    xscvuxddp f2, f2
+; CHECK-P8-NEXT:    xscvuxddp f3, f3
+; CHECK-P8-NEXT:    xxmrghd vs4, vs5, vs4
+; CHECK-P8-NEXT:    xxmrghd vs5, vs7, vs6
+; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P8-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-P8-NEXT:    xxswapd vs2, vs5
+; CHECK-P8-NEXT:    xxswapd vs3, vs4
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    stxvd2x vs3, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test8elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    mtvsrd f0, r4
+; CHECK-P9-NEXT:    li r4, 0
+; CHECK-P9-NEXT:    li r5, 1
+; CHECK-P9-NEXT:    li r6, 2
+; CHECK-P9-NEXT:    li r7, 3
+; CHECK-P9-NEXT:    li r8, 4
+; CHECK-P9-NEXT:    li r9, 5
+; CHECK-P9-NEXT:    li r10, 6
+; CHECK-P9-NEXT:    li r11, 7
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    vextubrx r4, r4, v2
+; CHECK-P9-NEXT:    vextubrx r5, r5, v2
+; CHECK-P9-NEXT:    vextubrx r6, r6, v2
+; CHECK-P9-NEXT:    vextubrx r7, r7, v2
+; CHECK-P9-NEXT:    vextubrx r8, r8, v2
+; CHECK-P9-NEXT:    vextubrx r9, r9, v2
+; CHECK-P9-NEXT:    vextubrx r10, r10, v2
+; CHECK-P9-NEXT:    vextubrx r11, r11, v2
+; CHECK-P9-NEXT:    rlwinm r4, r4, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r5, r5, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r6, r6, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r7, r7, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r8, r8, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r9, r9, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r10, r10, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r11, r11, 0, 24, 31
+; CHECK-P9-NEXT:    mtvsrwz f0, r4
+; CHECK-P9-NEXT:    mtvsrwz f1, r5
+; CHECK-P9-NEXT:    mtvsrwz f2, r6
+; CHECK-P9-NEXT:    mtvsrwz f3, r7
+; CHECK-P9-NEXT:    mtvsrwz f4, r8
+; CHECK-P9-NEXT:    mtvsrwz f5, r9
+; CHECK-P9-NEXT:    mtvsrwz f6, r10
+; CHECK-P9-NEXT:    mtvsrwz f7, r11
+; CHECK-P9-NEXT:    xscvuxddp f0, f0
+; CHECK-P9-NEXT:    xscvuxddp f1, f1
+; CHECK-P9-NEXT:    xscvuxddp f2, f2
+; CHECK-P9-NEXT:    xscvuxddp f3, f3
+; CHECK-P9-NEXT:    xscvuxddp f4, f4
+; CHECK-P9-NEXT:    xscvuxddp f5, f5
+; CHECK-P9-NEXT:    xscvuxddp f6, f6
+; CHECK-P9-NEXT:    xscvuxddp f7, f7
+; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P9-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-P9-NEXT:    xxmrghd vs2, vs5, vs4
+; CHECK-P9-NEXT:    xxmrghd vs3, vs7, vs6
+; CHECK-P9-NEXT:    stxv vs3, 48(r3)
+; CHECK-P9-NEXT:    stxv vs2, 32(r3)
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test8elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    li r5, 1
+; CHECK-BE-NEXT:    mtvsrd v2, r4
+; CHECK-BE-NEXT:    li r4, 0
+; CHECK-BE-NEXT:    li r6, 3
+; CHECK-BE-NEXT:    li r7, 2
+; CHECK-BE-NEXT:    li r8, 5
+; CHECK-BE-NEXT:    li r9, 4
+; CHECK-BE-NEXT:    li r10, 7
+; CHECK-BE-NEXT:    li r11, 6
+; CHECK-BE-NEXT:    vextublx r5, r5, v2
+; CHECK-BE-NEXT:    vextublx r4, r4, v2
+; CHECK-BE-NEXT:    vextublx r6, r6, v2
+; CHECK-BE-NEXT:    vextublx r7, r7, v2
+; CHECK-BE-NEXT:    vextublx r8, r8, v2
+; CHECK-BE-NEXT:    vextublx r9, r9, v2
+; CHECK-BE-NEXT:    vextublx r10, r10, v2
+; CHECK-BE-NEXT:    vextublx r11, r11, v2
+; CHECK-BE-NEXT:    rlwinm r5, r5, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r4, r4, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r6, r6, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r7, r7, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r8, r8, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r9, r9, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r10, r10, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r11, r11, 0, 24, 31
+; CHECK-BE-NEXT:    mtvsrwz f0, r5
+; CHECK-BE-NEXT:    mtvsrwz f1, r4
+; CHECK-BE-NEXT:    mtvsrwz f2, r6
+; CHECK-BE-NEXT:    mtvsrwz f3, r7
+; CHECK-BE-NEXT:    mtvsrwz f4, r8
+; CHECK-BE-NEXT:    mtvsrwz f5, r9
+; CHECK-BE-NEXT:    mtvsrwz f6, r10
+; CHECK-BE-NEXT:    mtvsrwz f7, r11
+; CHECK-BE-NEXT:    xscvuxddp f0, f0
+; CHECK-BE-NEXT:    xscvuxddp f1, f1
+; CHECK-BE-NEXT:    xscvuxddp f2, f2
+; CHECK-BE-NEXT:    xscvuxddp f3, f3
+; CHECK-BE-NEXT:    xscvuxddp f4, f4
+; CHECK-BE-NEXT:    xscvuxddp f5, f5
+; CHECK-BE-NEXT:    xscvuxddp f6, f6
+; CHECK-BE-NEXT:    xscvuxddp f7, f7
+; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-BE-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-BE-NEXT:    xxmrghd vs2, vs5, vs4
+; CHECK-BE-NEXT:    xxmrghd vs3, vs7, vs6
+; CHECK-BE-NEXT:    stxv vs2, 32(r3)
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    stxv vs3, 48(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = bitcast i64 %a.coerce to <8 x i8>
+  %1 = uitofp <8 x i8> %0 to <8 x double>
+  store <8 x double> %1, <8 x double>* %agg.result, align 64
+  ret void
+}
+
+define void @test16elt(<16 x double>* noalias nocapture sret %agg.result, <16 x i8> %a) local_unnamed_addr #2 {
+; CHECK-P8-LABEL: test16elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mfvsrd r5, v2
+; CHECK-P8-NEXT:    xxswapd vs2, v2
+; CHECK-P8-NEXT:    li r4, 112
+; CHECK-P8-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
+; CHECK-P8-NEXT:    clrldi r6, r5, 56
+; CHECK-P8-NEXT:    rldicl r7, r5, 56, 56
+; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 24, 31
+; CHECK-P8-NEXT:    rlwinm r7, r7, 0, 24, 31
+; CHECK-P8-NEXT:    mtvsrwz f0, r6
+; CHECK-P8-NEXT:    rldicl r6, r5, 40, 56
+; CHECK-P8-NEXT:    rldicl r8, r5, 48, 56
+; CHECK-P8-NEXT:    mtvsrwz f1, r7
+; CHECK-P8-NEXT:    rldicl r7, r5, 32, 56
+; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 24, 31
+; CHECK-P8-NEXT:    rlwinm r8, r8, 0, 24, 31
+; CHECK-P8-NEXT:    rlwinm r7, r7, 0, 24, 31
+; CHECK-P8-NEXT:    mtvsrwz f4, r6
+; CHECK-P8-NEXT:    rldicl r6, r5, 24, 56
+; CHECK-P8-NEXT:    mtvsrwz f3, r8
+; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 24, 31
+; CHECK-P8-NEXT:    mtvsrwz f5, r7
+; CHECK-P8-NEXT:    rldicl r7, r5, 16, 56
+; CHECK-P8-NEXT:    rldicl r5, r5, 8, 56
+; CHECK-P8-NEXT:    mfvsrd r8, f2
+; CHECK-P8-NEXT:    rlwinm r5, r5, 0, 24, 31
+; CHECK-P8-NEXT:    mtvsrwz f2, r6
+; CHECK-P8-NEXT:    rlwinm r6, r7, 0, 24, 31
+; CHECK-P8-NEXT:    mtvsrwz f6, r6
+; CHECK-P8-NEXT:    clrldi r6, r8, 56
+; CHECK-P8-NEXT:    mtvsrwz f7, r5
+; CHECK-P8-NEXT:    rldicl r5, r8, 56, 56
+; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 24, 31
+; CHECK-P8-NEXT:    mtvsrwz f8, r6
+; CHECK-P8-NEXT:    rldicl r6, r8, 48, 56
+; CHECK-P8-NEXT:    rlwinm r5, r5, 0, 24, 31
+; CHECK-P8-NEXT:    mtvsrwz f9, r5
+; CHECK-P8-NEXT:    rldicl r5, r8, 40, 56
+; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 24, 31
+; CHECK-P8-NEXT:    mtvsrwz f10, r6
+; CHECK-P8-NEXT:    rldicl r6, r8, 32, 56
+; CHECK-P8-NEXT:    rlwinm r5, r5, 0, 24, 31
+; CHECK-P8-NEXT:    mtvsrwz f11, r5
+; CHECK-P8-NEXT:    rldicl r5, r8, 24, 56
+; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 24, 31
+; CHECK-P8-NEXT:    mtvsrwz f12, r6
+; CHECK-P8-NEXT:    rldicl r6, r8, 16, 56
+; CHECK-P8-NEXT:    rlwinm r5, r5, 0, 24, 31
+; CHECK-P8-NEXT:    xscvuxddp f6, f6
+; CHECK-P8-NEXT:    xscvuxddp f7, f7
+; CHECK-P8-NEXT:    mtvsrwz f13, r5
+; CHECK-P8-NEXT:    rlwinm r5, r6, 0, 24, 31
+; CHECK-P8-NEXT:    mtvsrwz v2, r5
+; CHECK-P8-NEXT:    rldicl r5, r8, 8, 56
+; CHECK-P8-NEXT:    xscvuxddp f5, f5
+; CHECK-P8-NEXT:    rlwinm r5, r5, 0, 24, 31
+; CHECK-P8-NEXT:    xscvuxddp f2, f2
+; CHECK-P8-NEXT:    xscvuxddp f0, f0
+; CHECK-P8-NEXT:    xscvuxddp f1, f1
+; CHECK-P8-NEXT:    xxmrghd vs6, vs7, vs6
+; CHECK-P8-NEXT:    mtvsrwz v3, r5
+; CHECK-P8-NEXT:    li r5, 64
+; CHECK-P8-NEXT:    xscvuxddp f3, f3
+; CHECK-P8-NEXT:    xscvuxddp f4, f4
+; CHECK-P8-NEXT:    xscvuxddp f31, v2
+; CHECK-P8-NEXT:    xxmrghd vs2, vs2, vs5
+; CHECK-P8-NEXT:    xscvuxddp f7, v3
+; CHECK-P8-NEXT:    xscvuxddp f8, f8
+; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P8-NEXT:    xscvuxddp f9, f9
+; CHECK-P8-NEXT:    xxswapd vs1, vs6
+; CHECK-P8-NEXT:    xscvuxddp f10, f10
+; CHECK-P8-NEXT:    xxswapd vs2, vs2
+; CHECK-P8-NEXT:    xscvuxddp f12, f12
+; CHECK-P8-NEXT:    xxmrghd vs3, vs4, vs3
+; CHECK-P8-NEXT:    xscvuxddp f13, f13
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    li r4, 96
+; CHECK-P8-NEXT:    xscvuxddp f11, f11
+; CHECK-P8-NEXT:    xxmrghd vs6, vs7, vs31
+; CHECK-P8-NEXT:    xxswapd vs3, vs3
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xxmrghd vs4, vs9, vs8
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
+; CHECK-P8-NEXT:    li r4, 80
+; CHECK-P8-NEXT:    xxswapd vs2, vs6
+; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
+; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT:    li r5, 32
+; CHECK-P8-NEXT:    xxmrghd vs5, vs13, vs12
+; CHECK-P8-NEXT:    xxswapd vs4, vs4
+; CHECK-P8-NEXT:    xxmrghd vs1, vs11, vs10
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    xxswapd vs5, vs5
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    stxvd2x vs5, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    stxvd2x vs4, 0, r3
+; CHECK-P8-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test16elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    std r25, -72(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r26, -64(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r27, -56(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r28, -48(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r29, -40(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    li r4, 0
+; CHECK-P9-NEXT:    std r30, -32(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    li r5, 1
+; CHECK-P9-NEXT:    li r6, 2
+; CHECK-P9-NEXT:    li r7, 3
+; CHECK-P9-NEXT:    li r8, 4
+; CHECK-P9-NEXT:    li r9, 5
+; CHECK-P9-NEXT:    li r10, 6
+; CHECK-P9-NEXT:    li r11, 7
+; CHECK-P9-NEXT:    li r12, 8
+; CHECK-P9-NEXT:    li r0, 9
+; CHECK-P9-NEXT:    li r30, 10
+; CHECK-P9-NEXT:    li r29, 11
+; CHECK-P9-NEXT:    li r28, 12
+; CHECK-P9-NEXT:    li r27, 13
+; CHECK-P9-NEXT:    li r26, 14
+; CHECK-P9-NEXT:    li r25, 15
+; CHECK-P9-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    vextubrx r4, r4, v2
+; CHECK-P9-NEXT:    vextubrx r5, r5, v2
+; CHECK-P9-NEXT:    vextubrx r6, r6, v2
+; CHECK-P9-NEXT:    vextubrx r7, r7, v2
+; CHECK-P9-NEXT:    vextubrx r8, r8, v2
+; CHECK-P9-NEXT:    vextubrx r9, r9, v2
+; CHECK-P9-NEXT:    vextubrx r10, r10, v2
+; CHECK-P9-NEXT:    vextubrx r11, r11, v2
+; CHECK-P9-NEXT:    vextubrx r12, r12, v2
+; CHECK-P9-NEXT:    vextubrx r0, r0, v2
+; CHECK-P9-NEXT:    vextubrx r30, r30, v2
+; CHECK-P9-NEXT:    vextubrx r29, r29, v2
+; CHECK-P9-NEXT:    vextubrx r28, r28, v2
+; CHECK-P9-NEXT:    vextubrx r27, r27, v2
+; CHECK-P9-NEXT:    vextubrx r26, r26, v2
+; CHECK-P9-NEXT:    vextubrx r25, r25, v2
+; CHECK-P9-NEXT:    rlwinm r4, r4, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r5, r5, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r6, r6, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r7, r7, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r8, r8, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r9, r9, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r10, r10, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r11, r11, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r12, r12, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r0, r0, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r30, r30, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r29, r29, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r28, r28, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r27, r27, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r26, r26, 0, 24, 31
+; CHECK-P9-NEXT:    rlwinm r25, r25, 0, 24, 31
+; CHECK-P9-NEXT:    mtvsrwz f0, r4
+; CHECK-P9-NEXT:    mtvsrwz f1, r5
+; CHECK-P9-NEXT:    mtvsrwz f2, r6
+; CHECK-P9-NEXT:    mtvsrwz f3, r7
+; CHECK-P9-NEXT:    mtvsrwz f4, r8
+; CHECK-P9-NEXT:    mtvsrwz f5, r9
+; CHECK-P9-NEXT:    mtvsrwz f6, r10
+; CHECK-P9-NEXT:    mtvsrwz f7, r11
+; CHECK-P9-NEXT:    mtvsrwz f8, r12
+; CHECK-P9-NEXT:    mtvsrwz f9, r0
+; CHECK-P9-NEXT:    mtvsrwz f10, r30
+; CHECK-P9-NEXT:    ld r30, -32(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrwz f11, r29
+; CHECK-P9-NEXT:    ld r29, -40(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrwz f12, r28
+; CHECK-P9-NEXT:    ld r28, -48(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrwz f13, r27
+; CHECK-P9-NEXT:    ld r27, -56(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrwz v2, r26
+; CHECK-P9-NEXT:    ld r26, -64(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrwz v3, r25
+; CHECK-P9-NEXT:    ld r25, -72(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    xscvuxddp f0, f0
+; CHECK-P9-NEXT:    xscvuxddp f1, f1
+; CHECK-P9-NEXT:    xscvuxddp f2, f2
+; CHECK-P9-NEXT:    xscvuxddp f3, f3
+; CHECK-P9-NEXT:    xscvuxddp f4, f4
+; CHECK-P9-NEXT:    xscvuxddp f5, f5
+; CHECK-P9-NEXT:    xscvuxddp f6, f6
+; CHECK-P9-NEXT:    xscvuxddp f7, f7
+; CHECK-P9-NEXT:    xscvuxddp f8, f8
+; CHECK-P9-NEXT:    xscvuxddp f9, f9
+; CHECK-P9-NEXT:    xscvuxddp f10, f10
+; CHECK-P9-NEXT:    xscvuxddp f11, f11
+; CHECK-P9-NEXT:    xscvuxddp f12, f12
+; CHECK-P9-NEXT:    xscvuxddp f13, f13
+; CHECK-P9-NEXT:    xscvuxddp f31, v2
+; CHECK-P9-NEXT:    xscvuxddp f30, v3
+; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P9-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-P9-NEXT:    xxmrghd vs2, vs5, vs4
+; CHECK-P9-NEXT:    xxmrghd vs3, vs7, vs6
+; CHECK-P9-NEXT:    xxmrghd vs4, vs9, vs8
+; CHECK-P9-NEXT:    xxmrghd vs5, vs11, vs10
+; CHECK-P9-NEXT:    xxmrghd vs6, vs13, vs12
+; CHECK-P9-NEXT:    xxmrghd vs7, vs30, vs31
+; CHECK-P9-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    stxv vs4, 64(r3)
+; CHECK-P9-NEXT:    stxv vs3, 48(r3)
+; CHECK-P9-NEXT:    stxv vs2, 32(r3)
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    stxv vs7, 112(r3)
+; CHECK-P9-NEXT:    stxv vs6, 96(r3)
+; CHECK-P9-NEXT:    stxv vs5, 80(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test16elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    std r25, -72(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r26, -64(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r27, -56(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r28, -48(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r29, -40(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    li r4, 1
+; CHECK-BE-NEXT:    std r30, -32(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    li r5, 0
+; CHECK-BE-NEXT:    li r6, 3
+; CHECK-BE-NEXT:    li r7, 2
+; CHECK-BE-NEXT:    li r8, 5
+; CHECK-BE-NEXT:    li r9, 4
+; CHECK-BE-NEXT:    li r10, 7
+; CHECK-BE-NEXT:    li r11, 6
+; CHECK-BE-NEXT:    li r12, 9
+; CHECK-BE-NEXT:    li r0, 8
+; CHECK-BE-NEXT:    li r30, 11
+; CHECK-BE-NEXT:    li r29, 10
+; CHECK-BE-NEXT:    li r28, 13
+; CHECK-BE-NEXT:    li r27, 12
+; CHECK-BE-NEXT:    li r26, 15
+; CHECK-BE-NEXT:    li r25, 14
+; CHECK-BE-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    vextublx r4, r4, v2
+; CHECK-BE-NEXT:    vextublx r5, r5, v2
+; CHECK-BE-NEXT:    vextublx r6, r6, v2
+; CHECK-BE-NEXT:    vextublx r7, r7, v2
+; CHECK-BE-NEXT:    vextublx r8, r8, v2
+; CHECK-BE-NEXT:    vextublx r9, r9, v2
+; CHECK-BE-NEXT:    vextublx r10, r10, v2
+; CHECK-BE-NEXT:    vextublx r11, r11, v2
+; CHECK-BE-NEXT:    vextublx r12, r12, v2
+; CHECK-BE-NEXT:    vextublx r0, r0, v2
+; CHECK-BE-NEXT:    vextublx r30, r30, v2
+; CHECK-BE-NEXT:    vextublx r29, r29, v2
+; CHECK-BE-NEXT:    vextublx r28, r28, v2
+; CHECK-BE-NEXT:    vextublx r27, r27, v2
+; CHECK-BE-NEXT:    vextublx r26, r26, v2
+; CHECK-BE-NEXT:    vextublx r25, r25, v2
+; CHECK-BE-NEXT:    rlwinm r4, r4, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r5, r5, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r6, r6, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r7, r7, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r8, r8, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r9, r9, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r10, r10, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r11, r11, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r12, r12, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r0, r0, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r30, r30, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r29, r29, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r28, r28, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r27, r27, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r26, r26, 0, 24, 31
+; CHECK-BE-NEXT:    rlwinm r25, r25, 0, 24, 31
+; CHECK-BE-NEXT:    mtvsrwz f0, r4
+; CHECK-BE-NEXT:    mtvsrwz f1, r5
+; CHECK-BE-NEXT:    mtvsrwz f2, r6
+; CHECK-BE-NEXT:    mtvsrwz f3, r7
+; CHECK-BE-NEXT:    mtvsrwz f4, r8
+; CHECK-BE-NEXT:    mtvsrwz f5, r9
+; CHECK-BE-NEXT:    mtvsrwz f6, r10
+; CHECK-BE-NEXT:    mtvsrwz f7, r11
+; CHECK-BE-NEXT:    mtvsrwz f8, r12
+; CHECK-BE-NEXT:    mtvsrwz f9, r0
+; CHECK-BE-NEXT:    mtvsrwz f10, r30
+; CHECK-BE-NEXT:    ld r30, -32(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrwz f11, r29
+; CHECK-BE-NEXT:    ld r29, -40(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrwz f12, r28
+; CHECK-BE-NEXT:    ld r28, -48(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrwz f13, r27
+; CHECK-BE-NEXT:    ld r27, -56(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrwz v2, r26
+; CHECK-BE-NEXT:    ld r26, -64(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrwz v3, r25
+; CHECK-BE-NEXT:    ld r25, -72(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    xscvuxddp f0, f0
+; CHECK-BE-NEXT:    xscvuxddp f1, f1
+; CHECK-BE-NEXT:    xscvuxddp f2, f2
+; CHECK-BE-NEXT:    xscvuxddp f3, f3
+; CHECK-BE-NEXT:    xscvuxddp f4, f4
+; CHECK-BE-NEXT:    xscvuxddp f5, f5
+; CHECK-BE-NEXT:    xscvuxddp f6, f6
+; CHECK-BE-NEXT:    xscvuxddp f7, f7
+; CHECK-BE-NEXT:    xscvuxddp f8, f8
+; CHECK-BE-NEXT:    xscvuxddp f9, f9
+; CHECK-BE-NEXT:    xscvuxddp f10, f10
+; CHECK-BE-NEXT:    xscvuxddp f11, f11
+; CHECK-BE-NEXT:    xscvuxddp f12, f12
+; CHECK-BE-NEXT:    xscvuxddp f13, f13
+; CHECK-BE-NEXT:    xscvuxddp f31, v2
+; CHECK-BE-NEXT:    xscvuxddp f30, v3
+; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-BE-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-BE-NEXT:    xxmrghd vs2, vs5, vs4
+; CHECK-BE-NEXT:    xxmrghd vs3, vs7, vs6
+; CHECK-BE-NEXT:    xxmrghd vs4, vs9, vs8
+; CHECK-BE-NEXT:    xxmrghd vs5, vs11, vs10
+; CHECK-BE-NEXT:    xxmrghd vs6, vs13, vs12
+; CHECK-BE-NEXT:    xxmrghd vs7, vs30, vs31
+; CHECK-BE-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    stxv vs4, 64(r3)
+; CHECK-BE-NEXT:    stxv vs3, 48(r3)
+; CHECK-BE-NEXT:    stxv vs2, 32(r3)
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    stxv vs7, 112(r3)
+; CHECK-BE-NEXT:    stxv vs6, 96(r3)
+; CHECK-BE-NEXT:    stxv vs5, 80(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = uitofp <16 x i8> %a to <16 x double>
+  store <16 x double> %0, <16 x double>* %agg.result, align 128
+  ret void
+}
+
+define <2 x double> @test2elt_signed(i16 %a.coerce) local_unnamed_addr #0 {
+; CHECK-P8-LABEL: test2elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    clrldi r4, r3, 56
+; CHECK-P8-NEXT:    rldicl r3, r3, 56, 56
+; CHECK-P8-NEXT:    extsb r4, r4
+; CHECK-P8-NEXT:    extsb r3, r3
+; CHECK-P8-NEXT:    mtvsrwa f0, r4
+; CHECK-P8-NEXT:    mtvsrwa f1, r3
+; CHECK-P8-NEXT:    xscvsxddp f0, f0
+; CHECK-P8-NEXT:    xscvsxddp f1, f1
+; CHECK-P8-NEXT:    xxmrghd v2, vs1, vs0
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test2elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    mtvsrws v2, r3
+; CHECK-P9-NEXT:    li r3, 0
+; CHECK-P9-NEXT:    li r4, 1
+; CHECK-P9-NEXT:    vextubrx r3, r3, v2
+; CHECK-P9-NEXT:    vextubrx r4, r4, v2
+; CHECK-P9-NEXT:    extsb r3, r3
+; CHECK-P9-NEXT:    extsb r4, r4
+; CHECK-P9-NEXT:    mtvsrwa f0, r3
+; CHECK-P9-NEXT:    mtvsrwa f1, r4
+; CHECK-P9-NEXT:    xscvsxddp f0, f0
+; CHECK-P9-NEXT:    xscvsxddp f1, f1
+; CHECK-P9-NEXT:    xxmrghd v2, vs1, vs0
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test2elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    mtvsrws v2, r3
+; CHECK-BE-NEXT:    li r3, 1
+; CHECK-BE-NEXT:    li r4, 0
+; CHECK-BE-NEXT:    vextublx r3, r3, v2
+; CHECK-BE-NEXT:    vextublx r4, r4, v2
+; CHECK-BE-NEXT:    extsb r3, r3
+; CHECK-BE-NEXT:    extsb r4, r4
+; CHECK-BE-NEXT:    mtvsrwa f0, r3
+; CHECK-BE-NEXT:    mtvsrwa f1, r4
+; CHECK-BE-NEXT:    xscvsxddp f0, f0
+; CHECK-BE-NEXT:    xscvsxddp f1, f1
+; CHECK-BE-NEXT:    xxmrghd v2, vs1, vs0
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = bitcast i16 %a.coerce to <2 x i8>
+  %1 = sitofp <2 x i8> %0 to <2 x double>
+  ret <2 x double> %1
+}
+
+define void @test4elt_signed(<4 x double>* noalias nocapture sret %agg.result, i32 %a.coerce) local_unnamed_addr #1 {
+; CHECK-P8-LABEL: test4elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mtvsrd f0, r4
+; CHECK-P8-NEXT:    mfvsrd r4, f0
+; CHECK-P8-NEXT:    clrldi r5, r4, 56
+; CHECK-P8-NEXT:    rldicl r6, r4, 56, 56
+; CHECK-P8-NEXT:    extsb r5, r5
+; CHECK-P8-NEXT:    extsb r6, r6
+; CHECK-P8-NEXT:    mtvsrwa f0, r5
+; CHECK-P8-NEXT:    rldicl r5, r4, 48, 56
+; CHECK-P8-NEXT:    rldicl r4, r4, 40, 56
+; CHECK-P8-NEXT:    extsb r5, r5
+; CHECK-P8-NEXT:    extsb r4, r4
+; CHECK-P8-NEXT:    mtvsrwa f1, r6
+; CHECK-P8-NEXT:    mtvsrwa f2, r5
+; CHECK-P8-NEXT:    mtvsrwa f3, r4
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    xscvsxddp f0, f0
+; CHECK-P8-NEXT:    xscvsxddp f1, f1
+; CHECK-P8-NEXT:    xscvsxddp f2, f2
+; CHECK-P8-NEXT:    xscvsxddp f3, f3
+; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P8-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test4elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    mtvsrws v2, r4
+; CHECK-P9-NEXT:    li r4, 0
+; CHECK-P9-NEXT:    li r5, 1
+; CHECK-P9-NEXT:    li r6, 2
+; CHECK-P9-NEXT:    li r7, 3
+; CHECK-P9-NEXT:    vextubrx r4, r4, v2
+; CHECK-P9-NEXT:    vextubrx r5, r5, v2
+; CHECK-P9-NEXT:    vextubrx r6, r6, v2
+; CHECK-P9-NEXT:    vextubrx r7, r7, v2
+; CHECK-P9-NEXT:    extsb r4, r4
+; CHECK-P9-NEXT:    extsb r5, r5
+; CHECK-P9-NEXT:    extsb r6, r6
+; CHECK-P9-NEXT:    extsb r7, r7
+; CHECK-P9-NEXT:    mtvsrwa f0, r4
+; CHECK-P9-NEXT:    mtvsrwa f1, r5
+; CHECK-P9-NEXT:    mtvsrwa f2, r6
+; CHECK-P9-NEXT:    mtvsrwa f3, r7
+; CHECK-P9-NEXT:    xscvsxddp f0, f0
+; CHECK-P9-NEXT:    xscvsxddp f1, f1
+; CHECK-P9-NEXT:    xscvsxddp f2, f2
+; CHECK-P9-NEXT:    xscvsxddp f3, f3
+; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P9-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test4elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    mtvsrws v2, r4
+; CHECK-BE-NEXT:    li r4, 1
+; CHECK-BE-NEXT:    li r5, 0
+; CHECK-BE-NEXT:    li r6, 3
+; CHECK-BE-NEXT:    li r7, 2
+; CHECK-BE-NEXT:    vextublx r4, r4, v2
+; CHECK-BE-NEXT:    vextublx r5, r5, v2
+; CHECK-BE-NEXT:    vextublx r6, r6, v2
+; CHECK-BE-NEXT:    vextublx r7, r7, v2
+; CHECK-BE-NEXT:    extsb r4, r4
+; CHECK-BE-NEXT:    extsb r5, r5
+; CHECK-BE-NEXT:    extsb r6, r6
+; CHECK-BE-NEXT:    extsb r7, r7
+; CHECK-BE-NEXT:    mtvsrwa f0, r4
+; CHECK-BE-NEXT:    mtvsrwa f1, r5
+; CHECK-BE-NEXT:    mtvsrwa f2, r6
+; CHECK-BE-NEXT:    mtvsrwa f3, r7
+; CHECK-BE-NEXT:    xscvsxddp f0, f0
+; CHECK-BE-NEXT:    xscvsxddp f1, f1
+; CHECK-BE-NEXT:    xscvsxddp f2, f2
+; CHECK-BE-NEXT:    xscvsxddp f3, f3
+; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-BE-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = bitcast i32 %a.coerce to <4 x i8>
+  %1 = sitofp <4 x i8> %0 to <4 x double>
+  store <4 x double> %1, <4 x double>* %agg.result, align 32
+  ret void
+}
+
+define void @test8elt_signed(<8 x double>* noalias nocapture sret %agg.result, i64 %a.coerce) local_unnamed_addr #1 {
+; CHECK-P8-LABEL: test8elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mtvsrd f0, r4
+; CHECK-P8-NEXT:    mfvsrd r4, f0
+; CHECK-P8-NEXT:    clrldi r5, r4, 56
+; CHECK-P8-NEXT:    rldicl r6, r4, 56, 56
+; CHECK-P8-NEXT:    extsb r5, r5
+; CHECK-P8-NEXT:    extsb r6, r6
+; CHECK-P8-NEXT:    mtvsrwa f0, r5
+; CHECK-P8-NEXT:    rldicl r5, r4, 48, 56
+; CHECK-P8-NEXT:    extsb r5, r5
+; CHECK-P8-NEXT:    mtvsrwa f1, r6
+; CHECK-P8-NEXT:    rldicl r6, r4, 40, 56
+; CHECK-P8-NEXT:    mtvsrwa f2, r5
+; CHECK-P8-NEXT:    rldicl r5, r4, 32, 56
+; CHECK-P8-NEXT:    extsb r6, r6
+; CHECK-P8-NEXT:    extsb r5, r5
+; CHECK-P8-NEXT:    mtvsrwa f3, r6
+; CHECK-P8-NEXT:    rldicl r6, r4, 24, 56
+; CHECK-P8-NEXT:    mtvsrwa f4, r5
+; CHECK-P8-NEXT:    rldicl r5, r4, 16, 56
+; CHECK-P8-NEXT:    rldicl r4, r4, 8, 56
+; CHECK-P8-NEXT:    extsb r6, r6
+; CHECK-P8-NEXT:    extsb r5, r5
+; CHECK-P8-NEXT:    extsb r4, r4
+; CHECK-P8-NEXT:    mtvsrwa f5, r6
+; CHECK-P8-NEXT:    mtvsrwa f6, r5
+; CHECK-P8-NEXT:    li r5, 32
+; CHECK-P8-NEXT:    mtvsrwa f7, r4
+; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    xscvsxddp f4, f4
+; CHECK-P8-NEXT:    xscvsxddp f5, f5
+; CHECK-P8-NEXT:    xscvsxddp f6, f6
+; CHECK-P8-NEXT:    xscvsxddp f7, f7
+; CHECK-P8-NEXT:    xscvsxddp f0, f0
+; CHECK-P8-NEXT:    xscvsxddp f1, f1
+; CHECK-P8-NEXT:    xscvsxddp f2, f2
+; CHECK-P8-NEXT:    xscvsxddp f3, f3
+; CHECK-P8-NEXT:    xxmrghd vs4, vs5, vs4
+; CHECK-P8-NEXT:    xxmrghd vs5, vs7, vs6
+; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P8-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-P8-NEXT:    xxswapd vs2, vs5
+; CHECK-P8-NEXT:    xxswapd vs3, vs4
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    stxvd2x vs3, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test8elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    mtvsrd f0, r4
+; CHECK-P9-NEXT:    li r4, 0
+; CHECK-P9-NEXT:    li r5, 1
+; CHECK-P9-NEXT:    li r6, 2
+; CHECK-P9-NEXT:    li r7, 3
+; CHECK-P9-NEXT:    li r8, 4
+; CHECK-P9-NEXT:    li r9, 5
+; CHECK-P9-NEXT:    li r10, 6
+; CHECK-P9-NEXT:    li r11, 7
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    vextubrx r4, r4, v2
+; CHECK-P9-NEXT:    vextubrx r5, r5, v2
+; CHECK-P9-NEXT:    vextubrx r6, r6, v2
+; CHECK-P9-NEXT:    vextubrx r7, r7, v2
+; CHECK-P9-NEXT:    vextubrx r8, r8, v2
+; CHECK-P9-NEXT:    vextubrx r9, r9, v2
+; CHECK-P9-NEXT:    vextubrx r10, r10, v2
+; CHECK-P9-NEXT:    vextubrx r11, r11, v2
+; CHECK-P9-NEXT:    extsb r4, r4
+; CHECK-P9-NEXT:    extsb r5, r5
+; CHECK-P9-NEXT:    extsb r6, r6
+; CHECK-P9-NEXT:    extsb r7, r7
+; CHECK-P9-NEXT:    extsb r8, r8
+; CHECK-P9-NEXT:    extsb r9, r9
+; CHECK-P9-NEXT:    extsb r10, r10
+; CHECK-P9-NEXT:    extsb r11, r11
+; CHECK-P9-NEXT:    mtvsrwa f0, r4
+; CHECK-P9-NEXT:    mtvsrwa f1, r5
+; CHECK-P9-NEXT:    mtvsrwa f2, r6
+; CHECK-P9-NEXT:    mtvsrwa f3, r7
+; CHECK-P9-NEXT:    mtvsrwa f4, r8
+; CHECK-P9-NEXT:    mtvsrwa f5, r9
+; CHECK-P9-NEXT:    mtvsrwa f6, r10
+; CHECK-P9-NEXT:    mtvsrwa f7, r11
+; CHECK-P9-NEXT:    xscvsxddp f0, f0
+; CHECK-P9-NEXT:    xscvsxddp f1, f1
+; CHECK-P9-NEXT:    xscvsxddp f2, f2
+; CHECK-P9-NEXT:    xscvsxddp f3, f3
+; CHECK-P9-NEXT:    xscvsxddp f4, f4
+; CHECK-P9-NEXT:    xscvsxddp f5, f5
+; CHECK-P9-NEXT:    xscvsxddp f6, f6
+; CHECK-P9-NEXT:    xscvsxddp f7, f7
+; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P9-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-P9-NEXT:    xxmrghd vs2, vs5, vs4
+; CHECK-P9-NEXT:    xxmrghd vs3, vs7, vs6
+; CHECK-P9-NEXT:    stxv vs3, 48(r3)
+; CHECK-P9-NEXT:    stxv vs2, 32(r3)
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test8elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    li r5, 1
+; CHECK-BE-NEXT:    mtvsrd v2, r4
+; CHECK-BE-NEXT:    li r4, 0
+; CHECK-BE-NEXT:    li r6, 3
+; CHECK-BE-NEXT:    li r7, 2
+; CHECK-BE-NEXT:    li r8, 5
+; CHECK-BE-NEXT:    li r9, 4
+; CHECK-BE-NEXT:    li r10, 7
+; CHECK-BE-NEXT:    li r11, 6
+; CHECK-BE-NEXT:    vextublx r5, r5, v2
+; CHECK-BE-NEXT:    vextublx r4, r4, v2
+; CHECK-BE-NEXT:    vextublx r6, r6, v2
+; CHECK-BE-NEXT:    vextublx r7, r7, v2
+; CHECK-BE-NEXT:    vextublx r8, r8, v2
+; CHECK-BE-NEXT:    vextublx r9, r9, v2
+; CHECK-BE-NEXT:    vextublx r10, r10, v2
+; CHECK-BE-NEXT:    vextublx r11, r11, v2
+; CHECK-BE-NEXT:    extsb r5, r5
+; CHECK-BE-NEXT:    extsb r4, r4
+; CHECK-BE-NEXT:    extsb r6, r6
+; CHECK-BE-NEXT:    extsb r7, r7
+; CHECK-BE-NEXT:    extsb r8, r8
+; CHECK-BE-NEXT:    extsb r9, r9
+; CHECK-BE-NEXT:    extsb r10, r10
+; CHECK-BE-NEXT:    extsb r11, r11
+; CHECK-BE-NEXT:    mtvsrwa f0, r5
+; CHECK-BE-NEXT:    mtvsrwa f1, r4
+; CHECK-BE-NEXT:    mtvsrwa f2, r6
+; CHECK-BE-NEXT:    mtvsrwa f3, r7
+; CHECK-BE-NEXT:    mtvsrwa f4, r8
+; CHECK-BE-NEXT:    mtvsrwa f5, r9
+; CHECK-BE-NEXT:    mtvsrwa f6, r10
+; CHECK-BE-NEXT:    mtvsrwa f7, r11
+; CHECK-BE-NEXT:    xscvsxddp f0, f0
+; CHECK-BE-NEXT:    xscvsxddp f1, f1
+; CHECK-BE-NEXT:    xscvsxddp f2, f2
+; CHECK-BE-NEXT:    xscvsxddp f3, f3
+; CHECK-BE-NEXT:    xscvsxddp f4, f4
+; CHECK-BE-NEXT:    xscvsxddp f5, f5
+; CHECK-BE-NEXT:    xscvsxddp f6, f6
+; CHECK-BE-NEXT:    xscvsxddp f7, f7
+; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-BE-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-BE-NEXT:    xxmrghd vs2, vs5, vs4
+; CHECK-BE-NEXT:    xxmrghd vs3, vs7, vs6
+; CHECK-BE-NEXT:    stxv vs2, 32(r3)
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    stxv vs3, 48(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = bitcast i64 %a.coerce to <8 x i8>
+  %1 = sitofp <8 x i8> %0 to <8 x double>
+  store <8 x double> %1, <8 x double>* %agg.result, align 64
+  ret void
+}
+
+define void @test16elt_signed(<16 x double>* noalias nocapture sret %agg.result, <16 x i8> %a) local_unnamed_addr #2 {
+; CHECK-P8-LABEL: test16elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mfvsrd r5, v2
+; CHECK-P8-NEXT:    xxswapd vs2, v2
+; CHECK-P8-NEXT:    li r4, 112
+; CHECK-P8-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
+; CHECK-P8-NEXT:    clrldi r6, r5, 56
+; CHECK-P8-NEXT:    rldicl r7, r5, 56, 56
+; CHECK-P8-NEXT:    extsb r6, r6
+; CHECK-P8-NEXT:    extsb r7, r7
+; CHECK-P8-NEXT:    mtvsrwa f0, r6
+; CHECK-P8-NEXT:    rldicl r6, r5, 40, 56
+; CHECK-P8-NEXT:    rldicl r8, r5, 48, 56
+; CHECK-P8-NEXT:    mtvsrwa f1, r7
+; CHECK-P8-NEXT:    rldicl r7, r5, 32, 56
+; CHECK-P8-NEXT:    extsb r6, r6
+; CHECK-P8-NEXT:    extsb r8, r8
+; CHECK-P8-NEXT:    extsb r7, r7
+; CHECK-P8-NEXT:    mtvsrwa f4, r6
+; CHECK-P8-NEXT:    rldicl r6, r5, 24, 56
+; CHECK-P8-NEXT:    mtvsrwa f3, r8
+; CHECK-P8-NEXT:    extsb r6, r6
+; CHECK-P8-NEXT:    mtvsrwa f5, r7
+; CHECK-P8-NEXT:    rldicl r7, r5, 16, 56
+; CHECK-P8-NEXT:    rldicl r5, r5, 8, 56
+; CHECK-P8-NEXT:    mfvsrd r8, f2
+; CHECK-P8-NEXT:    extsb r5, r5
+; CHECK-P8-NEXT:    mtvsrwa f2, r6
+; CHECK-P8-NEXT:    extsb r6, r7
+; CHECK-P8-NEXT:    mtvsrwa f6, r6
+; CHECK-P8-NEXT:    clrldi r6, r8, 56
+; CHECK-P8-NEXT:    mtvsrwa f7, r5
+; CHECK-P8-NEXT:    rldicl r5, r8, 56, 56
+; CHECK-P8-NEXT:    extsb r6, r6
+; CHECK-P8-NEXT:    mtvsrwa f8, r6
+; CHECK-P8-NEXT:    rldicl r6, r8, 48, 56
+; CHECK-P8-NEXT:    extsb r5, r5
+; CHECK-P8-NEXT:    mtvsrwa f9, r5
+; CHECK-P8-NEXT:    rldicl r5, r8, 40, 56
+; CHECK-P8-NEXT:    extsb r6, r6
+; CHECK-P8-NEXT:    mtvsrwa f10, r6
+; CHECK-P8-NEXT:    rldicl r6, r8, 32, 56
+; CHECK-P8-NEXT:    extsb r5, r5
+; CHECK-P8-NEXT:    mtvsrwa f11, r5
+; CHECK-P8-NEXT:    rldicl r5, r8, 24, 56
+; CHECK-P8-NEXT:    extsb r6, r6
+; CHECK-P8-NEXT:    mtvsrwa f12, r6
+; CHECK-P8-NEXT:    rldicl r6, r8, 16, 56
+; CHECK-P8-NEXT:    extsb r5, r5
+; CHECK-P8-NEXT:    xscvsxddp f6, f6
+; CHECK-P8-NEXT:    xscvsxddp f7, f7
+; CHECK-P8-NEXT:    mtvsrwa f13, r5
+; CHECK-P8-NEXT:    extsb r5, r6
+; CHECK-P8-NEXT:    mtvsrwa v2, r5
+; CHECK-P8-NEXT:    rldicl r5, r8, 8, 56
+; CHECK-P8-NEXT:    xscvsxddp f5, f5
+; CHECK-P8-NEXT:    extsb r5, r5
+; CHECK-P8-NEXT:    xscvsxddp f2, f2
+; CHECK-P8-NEXT:    xscvsxddp f0, f0
+; CHECK-P8-NEXT:    xscvsxddp f1, f1
+; CHECK-P8-NEXT:    xxmrghd vs6, vs7, vs6
+; CHECK-P8-NEXT:    mtvsrwa v3, r5
+; CHECK-P8-NEXT:    li r5, 64
+; CHECK-P8-NEXT:    xscvsxddp f3, f3
+; CHECK-P8-NEXT:    xscvsxddp f4, f4
+; CHECK-P8-NEXT:    xscvsxddp f31, v2
+; CHECK-P8-NEXT:    xxmrghd vs2, vs2, vs5
+; CHECK-P8-NEXT:    xscvsxddp f7, v3
+; CHECK-P8-NEXT:    xscvsxddp f8, f8
+; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P8-NEXT:    xscvsxddp f9, f9
+; CHECK-P8-NEXT:    xxswapd vs1, vs6
+; CHECK-P8-NEXT:    xscvsxddp f10, f10
+; CHECK-P8-NEXT:    xxswapd vs2, vs2
+; CHECK-P8-NEXT:    xscvsxddp f12, f12
+; CHECK-P8-NEXT:    xxmrghd vs3, vs4, vs3
+; CHECK-P8-NEXT:    xscvsxddp f13, f13
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    li r4, 96
+; CHECK-P8-NEXT:    xscvsxddp f11, f11
+; CHECK-P8-NEXT:    xxmrghd vs6, vs7, vs31
+; CHECK-P8-NEXT:    xxswapd vs3, vs3
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xxmrghd vs4, vs9, vs8
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
+; CHECK-P8-NEXT:    li r4, 80
+; CHECK-P8-NEXT:    xxswapd vs2, vs6
+; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
+; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT:    li r5, 32
+; CHECK-P8-NEXT:    xxmrghd vs5, vs13, vs12
+; CHECK-P8-NEXT:    xxswapd vs4, vs4
+; CHECK-P8-NEXT:    xxmrghd vs1, vs11, vs10
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    xxswapd vs5, vs5
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    stxvd2x vs5, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    stxvd2x vs4, 0, r3
+; CHECK-P8-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test16elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    std r25, -72(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r26, -64(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r27, -56(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r28, -48(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    std r29, -40(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    li r4, 0
+; CHECK-P9-NEXT:    std r30, -32(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    li r5, 1
+; CHECK-P9-NEXT:    li r6, 2
+; CHECK-P9-NEXT:    li r7, 3
+; CHECK-P9-NEXT:    li r8, 4
+; CHECK-P9-NEXT:    li r9, 5
+; CHECK-P9-NEXT:    li r10, 6
+; CHECK-P9-NEXT:    li r11, 7
+; CHECK-P9-NEXT:    li r12, 8
+; CHECK-P9-NEXT:    li r0, 9
+; CHECK-P9-NEXT:    li r30, 10
+; CHECK-P9-NEXT:    li r29, 11
+; CHECK-P9-NEXT:    li r28, 12
+; CHECK-P9-NEXT:    li r27, 13
+; CHECK-P9-NEXT:    li r26, 14
+; CHECK-P9-NEXT:    li r25, 15
+; CHECK-P9-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
+; CHECK-P9-NEXT:    vextubrx r4, r4, v2
+; CHECK-P9-NEXT:    vextubrx r5, r5, v2
+; CHECK-P9-NEXT:    vextubrx r6, r6, v2
+; CHECK-P9-NEXT:    vextubrx r7, r7, v2
+; CHECK-P9-NEXT:    vextubrx r8, r8, v2
+; CHECK-P9-NEXT:    vextubrx r9, r9, v2
+; CHECK-P9-NEXT:    vextubrx r10, r10, v2
+; CHECK-P9-NEXT:    vextubrx r11, r11, v2
+; CHECK-P9-NEXT:    vextubrx r12, r12, v2
+; CHECK-P9-NEXT:    vextubrx r0, r0, v2
+; CHECK-P9-NEXT:    vextubrx r30, r30, v2
+; CHECK-P9-NEXT:    vextubrx r29, r29, v2
+; CHECK-P9-NEXT:    vextubrx r28, r28, v2
+; CHECK-P9-NEXT:    vextubrx r27, r27, v2
+; CHECK-P9-NEXT:    vextubrx r26, r26, v2
+; CHECK-P9-NEXT:    vextubrx r25, r25, v2
+; CHECK-P9-NEXT:    extsb r4, r4
+; CHECK-P9-NEXT:    extsb r5, r5
+; CHECK-P9-NEXT:    extsb r6, r6
+; CHECK-P9-NEXT:    extsb r7, r7
+; CHECK-P9-NEXT:    extsb r8, r8
+; CHECK-P9-NEXT:    extsb r9, r9
+; CHECK-P9-NEXT:    extsb r10, r10
+; CHECK-P9-NEXT:    extsb r11, r11
+; CHECK-P9-NEXT:    extsb r12, r12
+; CHECK-P9-NEXT:    extsb r0, r0
+; CHECK-P9-NEXT:    extsb r30, r30
+; CHECK-P9-NEXT:    extsb r29, r29
+; CHECK-P9-NEXT:    extsb r28, r28
+; CHECK-P9-NEXT:    extsb r27, r27
+; CHECK-P9-NEXT:    extsb r26, r26
+; CHECK-P9-NEXT:    extsb r25, r25
+; CHECK-P9-NEXT:    mtvsrwa f0, r4
+; CHECK-P9-NEXT:    mtvsrwa f1, r5
+; CHECK-P9-NEXT:    mtvsrwa f2, r6
+; CHECK-P9-NEXT:    mtvsrwa f3, r7
+; CHECK-P9-NEXT:    mtvsrwa f4, r8
+; CHECK-P9-NEXT:    mtvsrwa f5, r9
+; CHECK-P9-NEXT:    mtvsrwa f6, r10
+; CHECK-P9-NEXT:    mtvsrwa f7, r11
+; CHECK-P9-NEXT:    mtvsrwa f8, r12
+; CHECK-P9-NEXT:    mtvsrwa f9, r0
+; CHECK-P9-NEXT:    mtvsrwa f10, r30
+; CHECK-P9-NEXT:    ld r30, -32(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrwa f11, r29
+; CHECK-P9-NEXT:    ld r29, -40(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrwa f12, r28
+; CHECK-P9-NEXT:    ld r28, -48(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrwa f13, r27
+; CHECK-P9-NEXT:    ld r27, -56(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrwa v2, r26
+; CHECK-P9-NEXT:    ld r26, -64(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    mtvsrwa v3, r25
+; CHECK-P9-NEXT:    ld r25, -72(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    xscvsxddp f0, f0
+; CHECK-P9-NEXT:    xscvsxddp f1, f1
+; CHECK-P9-NEXT:    xscvsxddp f2, f2
+; CHECK-P9-NEXT:    xscvsxddp f3, f3
+; CHECK-P9-NEXT:    xscvsxddp f4, f4
+; CHECK-P9-NEXT:    xscvsxddp f5, f5
+; CHECK-P9-NEXT:    xscvsxddp f6, f6
+; CHECK-P9-NEXT:    xscvsxddp f7, f7
+; CHECK-P9-NEXT:    xscvsxddp f8, f8
+; CHECK-P9-NEXT:    xscvsxddp f9, f9
+; CHECK-P9-NEXT:    xscvsxddp f10, f10
+; CHECK-P9-NEXT:    xscvsxddp f11, f11
+; CHECK-P9-NEXT:    xscvsxddp f12, f12
+; CHECK-P9-NEXT:    xscvsxddp f13, f13
+; CHECK-P9-NEXT:    xscvsxddp f31, v2
+; CHECK-P9-NEXT:    xscvsxddp f30, v3
+; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P9-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-P9-NEXT:    xxmrghd vs2, vs5, vs4
+; CHECK-P9-NEXT:    xxmrghd vs3, vs7, vs6
+; CHECK-P9-NEXT:    xxmrghd vs4, vs9, vs8
+; CHECK-P9-NEXT:    xxmrghd vs5, vs11, vs10
+; CHECK-P9-NEXT:    xxmrghd vs6, vs13, vs12
+; CHECK-P9-NEXT:    xxmrghd vs7, vs30, vs31
+; CHECK-P9-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    stxv vs4, 64(r3)
+; CHECK-P9-NEXT:    stxv vs3, 48(r3)
+; CHECK-P9-NEXT:    stxv vs2, 32(r3)
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    stxv vs7, 112(r3)
+; CHECK-P9-NEXT:    stxv vs6, 96(r3)
+; CHECK-P9-NEXT:    stxv vs5, 80(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test16elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    std r25, -72(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r26, -64(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r27, -56(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r28, -48(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r29, -40(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    li r4, 1
+; CHECK-BE-NEXT:    std r30, -32(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    li r5, 0
+; CHECK-BE-NEXT:    li r6, 3
+; CHECK-BE-NEXT:    li r7, 2
+; CHECK-BE-NEXT:    li r8, 5
+; CHECK-BE-NEXT:    li r9, 4
+; CHECK-BE-NEXT:    li r10, 7
+; CHECK-BE-NEXT:    li r11, 6
+; CHECK-BE-NEXT:    li r12, 9
+; CHECK-BE-NEXT:    li r0, 8
+; CHECK-BE-NEXT:    li r30, 11
+; CHECK-BE-NEXT:    li r29, 10
+; CHECK-BE-NEXT:    li r28, 13
+; CHECK-BE-NEXT:    li r27, 12
+; CHECK-BE-NEXT:    li r26, 15
+; CHECK-BE-NEXT:    li r25, 14
+; CHECK-BE-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    vextublx r4, r4, v2
+; CHECK-BE-NEXT:    vextublx r5, r5, v2
+; CHECK-BE-NEXT:    vextublx r6, r6, v2
+; CHECK-BE-NEXT:    vextublx r7, r7, v2
+; CHECK-BE-NEXT:    vextublx r8, r8, v2
+; CHECK-BE-NEXT:    vextublx r9, r9, v2
+; CHECK-BE-NEXT:    vextublx r10, r10, v2
+; CHECK-BE-NEXT:    vextublx r11, r11, v2
+; CHECK-BE-NEXT:    vextublx r12, r12, v2
+; CHECK-BE-NEXT:    vextublx r0, r0, v2
+; CHECK-BE-NEXT:    vextublx r30, r30, v2
+; CHECK-BE-NEXT:    vextublx r29, r29, v2
+; CHECK-BE-NEXT:    vextublx r28, r28, v2
+; CHECK-BE-NEXT:    vextublx r27, r27, v2
+; CHECK-BE-NEXT:    vextublx r26, r26, v2
+; CHECK-BE-NEXT:    vextublx r25, r25, v2
+; CHECK-BE-NEXT:    extsb r4, r4
+; CHECK-BE-NEXT:    extsb r5, r5
+; CHECK-BE-NEXT:    extsb r6, r6
+; CHECK-BE-NEXT:    extsb r7, r7
+; CHECK-BE-NEXT:    extsb r8, r8
+; CHECK-BE-NEXT:    extsb r9, r9
+; CHECK-BE-NEXT:    extsb r10, r10
+; CHECK-BE-NEXT:    extsb r11, r11
+; CHECK-BE-NEXT:    extsb r12, r12
+; CHECK-BE-NEXT:    extsb r0, r0
+; CHECK-BE-NEXT:    extsb r30, r30
+; CHECK-BE-NEXT:    extsb r29, r29
+; CHECK-BE-NEXT:    extsb r28, r28
+; CHECK-BE-NEXT:    extsb r27, r27
+; CHECK-BE-NEXT:    extsb r26, r26
+; CHECK-BE-NEXT:    extsb r25, r25
+; CHECK-BE-NEXT:    mtvsrwa f0, r4
+; CHECK-BE-NEXT:    mtvsrwa f1, r5
+; CHECK-BE-NEXT:    mtvsrwa f2, r6
+; CHECK-BE-NEXT:    mtvsrwa f3, r7
+; CHECK-BE-NEXT:    mtvsrwa f4, r8
+; CHECK-BE-NEXT:    mtvsrwa f5, r9
+; CHECK-BE-NEXT:    mtvsrwa f6, r10
+; CHECK-BE-NEXT:    mtvsrwa f7, r11
+; CHECK-BE-NEXT:    mtvsrwa f8, r12
+; CHECK-BE-NEXT:    mtvsrwa f9, r0
+; CHECK-BE-NEXT:    mtvsrwa f10, r30
+; CHECK-BE-NEXT:    ld r30, -32(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrwa f11, r29
+; CHECK-BE-NEXT:    ld r29, -40(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrwa f12, r28
+; CHECK-BE-NEXT:    ld r28, -48(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrwa f13, r27
+; CHECK-BE-NEXT:    ld r27, -56(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrwa v2, r26
+; CHECK-BE-NEXT:    ld r26, -64(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    mtvsrwa v3, r25
+; CHECK-BE-NEXT:    ld r25, -72(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    xscvsxddp f0, f0
+; CHECK-BE-NEXT:    xscvsxddp f1, f1
+; CHECK-BE-NEXT:    xscvsxddp f2, f2
+; CHECK-BE-NEXT:    xscvsxddp f3, f3
+; CHECK-BE-NEXT:    xscvsxddp f4, f4
+; CHECK-BE-NEXT:    xscvsxddp f5, f5
+; CHECK-BE-NEXT:    xscvsxddp f6, f6
+; CHECK-BE-NEXT:    xscvsxddp f7, f7
+; CHECK-BE-NEXT:    xscvsxddp f8, f8
+; CHECK-BE-NEXT:    xscvsxddp f9, f9
+; CHECK-BE-NEXT:    xscvsxddp f10, f10
+; CHECK-BE-NEXT:    xscvsxddp f11, f11
+; CHECK-BE-NEXT:    xscvsxddp f12, f12
+; CHECK-BE-NEXT:    xscvsxddp f13, f13
+; CHECK-BE-NEXT:    xscvsxddp f31, v2
+; CHECK-BE-NEXT:    xscvsxddp f30, v3
+; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-BE-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-BE-NEXT:    xxmrghd vs2, vs5, vs4
+; CHECK-BE-NEXT:    xxmrghd vs3, vs7, vs6
+; CHECK-BE-NEXT:    xxmrghd vs4, vs9, vs8
+; CHECK-BE-NEXT:    xxmrghd vs5, vs11, vs10
+; CHECK-BE-NEXT:    xxmrghd vs6, vs13, vs12
+; CHECK-BE-NEXT:    xxmrghd vs7, vs30, vs31
+; CHECK-BE-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    stxv vs4, 64(r3)
+; CHECK-BE-NEXT:    stxv vs3, 48(r3)
+; CHECK-BE-NEXT:    stxv vs2, 32(r3)
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    stxv vs7, 112(r3)
+; CHECK-BE-NEXT:    stxv vs6, 96(r3)
+; CHECK-BE-NEXT:    stxv vs5, 80(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = sitofp <16 x i8> %a to <16 x double>
+  store <16 x double> %0, <16 x double>* %agg.result, align 128
+  ret void
+}

Added: llvm/trunk/test/CodeGen/PowerPC/vec_conv_i_to_fp_4byte_elts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vec_conv_i_to_fp_4byte_elts.ll?rev=347090&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vec_conv_i_to_fp_4byte_elts.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/vec_conv_i_to_fp_4byte_elts.ll Fri Nov 16 12:24:10 2018
@@ -0,0 +1,304 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:     -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-P8
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-P9
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-BE
+
+define i64 @test2elt(i64 %a.coerce) local_unnamed_addr #0 {
+; CHECK-P8-LABEL: test2elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xvcvuxwsp vs0, v2
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test2elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    xvcvuxwsp vs0, v2
+; CHECK-P9-NEXT:    mfvsrld r3, vs0
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test2elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    mtvsrd f0, r3
+; CHECK-BE-NEXT:    xvcvuxwsp vs0, vs0
+; CHECK-BE-NEXT:    mfvsrd r3, f0
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = bitcast i64 %a.coerce to <2 x i32>
+  %1 = uitofp <2 x i32> %0 to <2 x float>
+  %2 = bitcast <2 x float> %1 to i64
+  ret i64 %2
+}
+
+define <4 x float> @test4elt(<4 x i32> %a) local_unnamed_addr #1 {
+; CHECK-P8-LABEL: test4elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    xvcvuxwsp v2, v2
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test4elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    xvcvuxwsp v2, v2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test4elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xvcvuxwsp v2, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = uitofp <4 x i32> %a to <4 x float>
+  ret <4 x float> %0
+}
+
+define void @test8elt(<8 x float>* noalias nocapture sret %agg.result, <8 x i32>* nocapture readonly) local_unnamed_addr #2 {
+; CHECK-P8-LABEL: test8elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lvx v2, r4, r5
+; CHECK-P8-NEXT:    xvcvuxwsp v3, v3
+; CHECK-P8-NEXT:    xvcvuxwsp v2, v2
+; CHECK-P8-NEXT:    stvx v3, 0, r3
+; CHECK-P8-NEXT:    stvx v2, r3, r5
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test8elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs0, 16(r4)
+; CHECK-P9-NEXT:    lxv vs1, 0(r4)
+; CHECK-P9-NEXT:    xvcvuxwsp vs1, vs1
+; CHECK-P9-NEXT:    xvcvuxwsp vs0, vs0
+; CHECK-P9-NEXT:    stxv vs0, 16(r3)
+; CHECK-P9-NEXT:    stxv vs1, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test8elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs0, 16(r4)
+; CHECK-BE-NEXT:    lxv vs1, 0(r4)
+; CHECK-BE-NEXT:    xvcvuxwsp vs1, vs1
+; CHECK-BE-NEXT:    xvcvuxwsp vs0, vs0
+; CHECK-BE-NEXT:    stxv vs0, 16(r3)
+; CHECK-BE-NEXT:    stxv vs1, 0(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <8 x i32>, <8 x i32>* %0, align 32
+  %1 = uitofp <8 x i32> %a to <8 x float>
+  store <8 x float> %1, <8 x float>* %agg.result, align 32
+  ret void
+}
+
+define void @test16elt(<16 x float>* noalias nocapture sret %agg.result, <16 x i32>* nocapture readonly) local_unnamed_addr #2 {
+; CHECK-P8-LABEL: test16elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    li r6, 32
+; CHECK-P8-NEXT:    li r7, 48
+; CHECK-P8-NEXT:    lvx v5, 0, r4
+; CHECK-P8-NEXT:    lvx v2, r4, r5
+; CHECK-P8-NEXT:    lvx v3, r4, r6
+; CHECK-P8-NEXT:    lvx v4, r4, r7
+; CHECK-P8-NEXT:    xvcvuxwsp v5, v5
+; CHECK-P8-NEXT:    xvcvuxwsp v2, v2
+; CHECK-P8-NEXT:    xvcvuxwsp v3, v3
+; CHECK-P8-NEXT:    xvcvuxwsp v4, v4
+; CHECK-P8-NEXT:    stvx v5, 0, r3
+; CHECK-P8-NEXT:    stvx v2, r3, r5
+; CHECK-P8-NEXT:    stvx v3, r3, r6
+; CHECK-P8-NEXT:    stvx v4, r3, r7
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test16elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs0, 48(r4)
+; CHECK-P9-NEXT:    lxv vs1, 32(r4)
+; CHECK-P9-NEXT:    lxv vs2, 16(r4)
+; CHECK-P9-NEXT:    lxv vs3, 0(r4)
+; CHECK-P9-NEXT:    xvcvuxwsp vs3, vs3
+; CHECK-P9-NEXT:    xvcvuxwsp vs2, vs2
+; CHECK-P9-NEXT:    xvcvuxwsp vs1, vs1
+; CHECK-P9-NEXT:    xvcvuxwsp vs0, vs0
+; CHECK-P9-NEXT:    stxv vs0, 48(r3)
+; CHECK-P9-NEXT:    stxv vs1, 32(r3)
+; CHECK-P9-NEXT:    stxv vs2, 16(r3)
+; CHECK-P9-NEXT:    stxv vs3, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test16elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs0, 48(r4)
+; CHECK-BE-NEXT:    lxv vs1, 32(r4)
+; CHECK-BE-NEXT:    lxv vs2, 16(r4)
+; CHECK-BE-NEXT:    lxv vs3, 0(r4)
+; CHECK-BE-NEXT:    xvcvuxwsp vs3, vs3
+; CHECK-BE-NEXT:    xvcvuxwsp vs2, vs2
+; CHECK-BE-NEXT:    xvcvuxwsp vs1, vs1
+; CHECK-BE-NEXT:    xvcvuxwsp vs0, vs0
+; CHECK-BE-NEXT:    stxv vs0, 48(r3)
+; CHECK-BE-NEXT:    stxv vs1, 32(r3)
+; CHECK-BE-NEXT:    stxv vs2, 16(r3)
+; CHECK-BE-NEXT:    stxv vs3, 0(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <16 x i32>, <16 x i32>* %0, align 64
+  %1 = uitofp <16 x i32> %a to <16 x float>
+  store <16 x float> %1, <16 x float>* %agg.result, align 64
+  ret void
+}
+
+define i64 @test2elt_signed(i64 %a.coerce) local_unnamed_addr #0 {
+; CHECK-P8-LABEL: test2elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xvcvsxwsp vs0, v2
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test2elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    xvcvsxwsp vs0, v2
+; CHECK-P9-NEXT:    mfvsrld r3, vs0
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test2elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    mtvsrd f0, r3
+; CHECK-BE-NEXT:    xvcvsxwsp vs0, vs0
+; CHECK-BE-NEXT:    mfvsrd r3, f0
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = bitcast i64 %a.coerce to <2 x i32>
+  %1 = sitofp <2 x i32> %0 to <2 x float>
+  %2 = bitcast <2 x float> %1 to i64
+  ret i64 %2
+}
+
+define <4 x float> @test4elt_signed(<4 x i32> %a) local_unnamed_addr #1 {
+; CHECK-P8-LABEL: test4elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    xvcvsxwsp v2, v2
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test4elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    xvcvsxwsp v2, v2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test4elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xvcvsxwsp v2, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = sitofp <4 x i32> %a to <4 x float>
+  ret <4 x float> %0
+}
+
+define void @test8elt_signed(<8 x float>* noalias nocapture sret %agg.result, <8 x i32>* nocapture readonly) local_unnamed_addr #2 {
+; CHECK-P8-LABEL: test8elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lvx v2, r4, r5
+; CHECK-P8-NEXT:    xvcvsxwsp v3, v3
+; CHECK-P8-NEXT:    xvcvsxwsp v2, v2
+; CHECK-P8-NEXT:    stvx v3, 0, r3
+; CHECK-P8-NEXT:    stvx v2, r3, r5
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test8elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs0, 16(r4)
+; CHECK-P9-NEXT:    lxv vs1, 0(r4)
+; CHECK-P9-NEXT:    xvcvsxwsp vs1, vs1
+; CHECK-P9-NEXT:    xvcvsxwsp vs0, vs0
+; CHECK-P9-NEXT:    stxv vs0, 16(r3)
+; CHECK-P9-NEXT:    stxv vs1, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test8elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs0, 16(r4)
+; CHECK-BE-NEXT:    lxv vs1, 0(r4)
+; CHECK-BE-NEXT:    xvcvsxwsp vs1, vs1
+; CHECK-BE-NEXT:    xvcvsxwsp vs0, vs0
+; CHECK-BE-NEXT:    stxv vs0, 16(r3)
+; CHECK-BE-NEXT:    stxv vs1, 0(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <8 x i32>, <8 x i32>* %0, align 32
+  %1 = sitofp <8 x i32> %a to <8 x float>
+  store <8 x float> %1, <8 x float>* %agg.result, align 32
+  ret void
+}
+
+define void @test16elt_signed(<16 x float>* noalias nocapture sret %agg.result, <16 x i32>* nocapture readonly) local_unnamed_addr #2 {
+; CHECK-P8-LABEL: test16elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    li r6, 32
+; CHECK-P8-NEXT:    li r7, 48
+; CHECK-P8-NEXT:    lvx v5, 0, r4
+; CHECK-P8-NEXT:    lvx v2, r4, r5
+; CHECK-P8-NEXT:    lvx v3, r4, r6
+; CHECK-P8-NEXT:    lvx v4, r4, r7
+; CHECK-P8-NEXT:    xvcvsxwsp v5, v5
+; CHECK-P8-NEXT:    xvcvsxwsp v2, v2
+; CHECK-P8-NEXT:    xvcvsxwsp v3, v3
+; CHECK-P8-NEXT:    xvcvsxwsp v4, v4
+; CHECK-P8-NEXT:    stvx v5, 0, r3
+; CHECK-P8-NEXT:    stvx v2, r3, r5
+; CHECK-P8-NEXT:    stvx v3, r3, r6
+; CHECK-P8-NEXT:    stvx v4, r3, r7
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test16elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs0, 48(r4)
+; CHECK-P9-NEXT:    lxv vs1, 32(r4)
+; CHECK-P9-NEXT:    lxv vs2, 16(r4)
+; CHECK-P9-NEXT:    lxv vs3, 0(r4)
+; CHECK-P9-NEXT:    xvcvsxwsp vs3, vs3
+; CHECK-P9-NEXT:    xvcvsxwsp vs2, vs2
+; CHECK-P9-NEXT:    xvcvsxwsp vs1, vs1
+; CHECK-P9-NEXT:    xvcvsxwsp vs0, vs0
+; CHECK-P9-NEXT:    stxv vs0, 48(r3)
+; CHECK-P9-NEXT:    stxv vs1, 32(r3)
+; CHECK-P9-NEXT:    stxv vs2, 16(r3)
+; CHECK-P9-NEXT:    stxv vs3, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test16elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs0, 48(r4)
+; CHECK-BE-NEXT:    lxv vs1, 32(r4)
+; CHECK-BE-NEXT:    lxv vs2, 16(r4)
+; CHECK-BE-NEXT:    lxv vs3, 0(r4)
+; CHECK-BE-NEXT:    xvcvsxwsp vs3, vs3
+; CHECK-BE-NEXT:    xvcvsxwsp vs2, vs2
+; CHECK-BE-NEXT:    xvcvsxwsp vs1, vs1
+; CHECK-BE-NEXT:    xvcvsxwsp vs0, vs0
+; CHECK-BE-NEXT:    stxv vs0, 48(r3)
+; CHECK-BE-NEXT:    stxv vs1, 32(r3)
+; CHECK-BE-NEXT:    stxv vs2, 16(r3)
+; CHECK-BE-NEXT:    stxv vs3, 0(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <16 x i32>, <16 x i32>* %0, align 64
+  %1 = sitofp <16 x i32> %a to <16 x float>
+  store <16 x float> %1, <16 x float>* %agg.result, align 64
+  ret void
+}

Added: llvm/trunk/test/CodeGen/PowerPC/vec_conv_i_to_fp_8byte_elts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vec_conv_i_to_fp_8byte_elts.ll?rev=347090&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vec_conv_i_to_fp_8byte_elts.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/vec_conv_i_to_fp_8byte_elts.ll Fri Nov 16 12:24:10 2018
@@ -0,0 +1,438 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:     -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-P8
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-P9
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-BE
+
+define <2 x double> @test2elt(<2 x i64> %a) local_unnamed_addr #0 {
+; CHECK-P8-LABEL: test2elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    xvcvuxddp v2, v2
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test2elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    xvcvuxddp v2, v2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test2elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xvcvuxddp v2, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = uitofp <2 x i64> %a to <2 x double>
+  ret <2 x double> %0
+}
+
+define void @test4elt(<4 x double>* noalias nocapture sret %agg.result, <4 x i64>* nocapture readonly) local_unnamed_addr #1 {
+; CHECK-P8-LABEL: test4elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT:    xvcvuxddp vs1, vs1
+; CHECK-P8-NEXT:    xvcvuxddp vs0, vs0
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs1, 0, r3
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test4elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv v2, 16(r4)
+; CHECK-P9-NEXT:    lxv v3, 0(r4)
+; CHECK-P9-NEXT:    xvcvuxddp vs0, v3
+; CHECK-P9-NEXT:    xvcvuxddp vs1, v2
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test4elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv v2, 16(r4)
+; CHECK-BE-NEXT:    lxv v3, 0(r4)
+; CHECK-BE-NEXT:    xvcvuxddp vs0, v3
+; CHECK-BE-NEXT:    xvcvuxddp vs1, v2
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <4 x i64>, <4 x i64>* %0, align 32
+  %1 = uitofp <4 x i64> %a to <4 x double>
+  store <4 x double> %1, <4 x double>* %agg.result, align 32
+  ret void
+}
+
+define void @test8elt(<8 x double>* noalias nocapture sret %agg.result, <8 x i64>* nocapture readonly) local_unnamed_addr #1 {
+; CHECK-P8-LABEL: test8elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    li r6, 32
+; CHECK-P8-NEXT:    li r7, 48
+; CHECK-P8-NEXT:    lxvd2x vs3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
+; CHECK-P8-NEXT:    lxvd2x vs2, r4, r7
+; CHECK-P8-NEXT:    xvcvuxddp vs3, vs3
+; CHECK-P8-NEXT:    xvcvuxddp vs0, vs0
+; CHECK-P8-NEXT:    xvcvuxddp vs1, vs1
+; CHECK-P8-NEXT:    xvcvuxddp vs2, vs2
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r7
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r6
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs3, 0, r3
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test8elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv v2, 48(r4)
+; CHECK-P9-NEXT:    lxv v3, 32(r4)
+; CHECK-P9-NEXT:    lxv v4, 16(r4)
+; CHECK-P9-NEXT:    lxv v5, 0(r4)
+; CHECK-P9-NEXT:    xvcvuxddp vs0, v5
+; CHECK-P9-NEXT:    xvcvuxddp vs1, v4
+; CHECK-P9-NEXT:    xvcvuxddp vs2, v3
+; CHECK-P9-NEXT:    xvcvuxddp vs3, v2
+; CHECK-P9-NEXT:    stxv vs3, 48(r3)
+; CHECK-P9-NEXT:    stxv vs2, 32(r3)
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test8elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv v2, 48(r4)
+; CHECK-BE-NEXT:    lxv v3, 32(r4)
+; CHECK-BE-NEXT:    lxv v4, 16(r4)
+; CHECK-BE-NEXT:    lxv v5, 0(r4)
+; CHECK-BE-NEXT:    xvcvuxddp vs0, v5
+; CHECK-BE-NEXT:    xvcvuxddp vs1, v4
+; CHECK-BE-NEXT:    xvcvuxddp vs2, v3
+; CHECK-BE-NEXT:    xvcvuxddp vs3, v2
+; CHECK-BE-NEXT:    stxv vs3, 48(r3)
+; CHECK-BE-NEXT:    stxv vs2, 32(r3)
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <8 x i64>, <8 x i64>* %0, align 64
+  %1 = uitofp <8 x i64> %a to <8 x double>
+  store <8 x double> %1, <8 x double>* %agg.result, align 64
+  ret void
+}
+
+define void @test16elt(<16 x double>* noalias nocapture sret %agg.result, <16 x i64>* nocapture readonly) local_unnamed_addr #1 {
+; CHECK-P8-LABEL: test16elt:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    li r6, 32
+; CHECK-P8-NEXT:    li r7, 64
+; CHECK-P8-NEXT:    li r8, 96
+; CHECK-P8-NEXT:    li r9, 112
+; CHECK-P8-NEXT:    li r10, 80
+; CHECK-P8-NEXT:    li r11, 48
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
+; CHECK-P8-NEXT:    lxvd2x vs2, r4, r7
+; CHECK-P8-NEXT:    lxvd2x vs3, r4, r8
+; CHECK-P8-NEXT:    lxvd2x vs4, r4, r9
+; CHECK-P8-NEXT:    lxvd2x vs5, r4, r10
+; CHECK-P8-NEXT:    lxvd2x vs6, r4, r11
+; CHECK-P8-NEXT:    lxvd2x vs7, 0, r4
+; CHECK-P8-NEXT:    xvcvuxddp vs0, vs0
+; CHECK-P8-NEXT:    xvcvuxddp vs1, vs1
+; CHECK-P8-NEXT:    xvcvuxddp vs2, vs2
+; CHECK-P8-NEXT:    xvcvuxddp vs3, vs3
+; CHECK-P8-NEXT:    xvcvuxddp vs4, vs4
+; CHECK-P8-NEXT:    xvcvuxddp vs5, vs5
+; CHECK-P8-NEXT:    xvcvuxddp vs6, vs6
+; CHECK-P8-NEXT:    xvcvuxddp vs7, vs7
+; CHECK-P8-NEXT:    stxvd2x vs4, r3, r9
+; CHECK-P8-NEXT:    stxvd2x vs3, r3, r8
+; CHECK-P8-NEXT:    stxvd2x vs5, r3, r10
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r7
+; CHECK-P8-NEXT:    stxvd2x vs6, r3, r11
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r6
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs7, 0, r3
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test16elt:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv v2, 48(r4)
+; CHECK-P9-NEXT:    lxv v3, 32(r4)
+; CHECK-P9-NEXT:    lxv v4, 16(r4)
+; CHECK-P9-NEXT:    lxv v5, 0(r4)
+; CHECK-P9-NEXT:    lxv v0, 112(r4)
+; CHECK-P9-NEXT:    lxv v1, 96(r4)
+; CHECK-P9-NEXT:    lxv v6, 80(r4)
+; CHECK-P9-NEXT:    lxv v7, 64(r4)
+; CHECK-P9-NEXT:    xvcvuxddp vs0, v5
+; CHECK-P9-NEXT:    xvcvuxddp vs1, v4
+; CHECK-P9-NEXT:    xvcvuxddp vs2, v3
+; CHECK-P9-NEXT:    xvcvuxddp vs3, v2
+; CHECK-P9-NEXT:    xvcvuxddp vs4, v7
+; CHECK-P9-NEXT:    xvcvuxddp vs5, v6
+; CHECK-P9-NEXT:    xvcvuxddp vs6, v1
+; CHECK-P9-NEXT:    xvcvuxddp vs7, v0
+; CHECK-P9-NEXT:    stxv vs3, 48(r3)
+; CHECK-P9-NEXT:    stxv vs2, 32(r3)
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    stxv vs7, 112(r3)
+; CHECK-P9-NEXT:    stxv vs6, 96(r3)
+; CHECK-P9-NEXT:    stxv vs5, 80(r3)
+; CHECK-P9-NEXT:    stxv vs4, 64(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test16elt:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv v2, 48(r4)
+; CHECK-BE-NEXT:    lxv v3, 32(r4)
+; CHECK-BE-NEXT:    lxv v4, 16(r4)
+; CHECK-BE-NEXT:    lxv v5, 0(r4)
+; CHECK-BE-NEXT:    lxv v0, 112(r4)
+; CHECK-BE-NEXT:    lxv v1, 96(r4)
+; CHECK-BE-NEXT:    lxv v6, 80(r4)
+; CHECK-BE-NEXT:    lxv v7, 64(r4)
+; CHECK-BE-NEXT:    xvcvuxddp vs0, v5
+; CHECK-BE-NEXT:    xvcvuxddp vs1, v4
+; CHECK-BE-NEXT:    xvcvuxddp vs2, v3
+; CHECK-BE-NEXT:    xvcvuxddp vs3, v2
+; CHECK-BE-NEXT:    xvcvuxddp vs4, v7
+; CHECK-BE-NEXT:    xvcvuxddp vs5, v6
+; CHECK-BE-NEXT:    xvcvuxddp vs6, v1
+; CHECK-BE-NEXT:    xvcvuxddp vs7, v0
+; CHECK-BE-NEXT:    stxv vs3, 48(r3)
+; CHECK-BE-NEXT:    stxv vs2, 32(r3)
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    stxv vs7, 112(r3)
+; CHECK-BE-NEXT:    stxv vs6, 96(r3)
+; CHECK-BE-NEXT:    stxv vs5, 80(r3)
+; CHECK-BE-NEXT:    stxv vs4, 64(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <16 x i64>, <16 x i64>* %0, align 128
+  %1 = uitofp <16 x i64> %a to <16 x double>
+  store <16 x double> %1, <16 x double>* %agg.result, align 128
+  ret void
+}
+
+define <2 x double> @test2elt_signed(<2 x i64> %a) local_unnamed_addr #0 {
+; CHECK-P8-LABEL: test2elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    xvcvsxddp v2, v2
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test2elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    xvcvsxddp v2, v2
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test2elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xvcvsxddp v2, v2
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = sitofp <2 x i64> %a to <2 x double>
+  ret <2 x double> %0
+}
+
+define void @test4elt_signed(<4 x double>* noalias nocapture sret %agg.result, <4 x i64>* nocapture readonly) local_unnamed_addr #1 {
+; CHECK-P8-LABEL: test4elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT:    xvcvsxddp vs1, vs1
+; CHECK-P8-NEXT:    xvcvsxddp vs0, vs0
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs1, 0, r3
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test4elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv v2, 16(r4)
+; CHECK-P9-NEXT:    lxv v3, 0(r4)
+; CHECK-P9-NEXT:    xvcvsxddp vs0, v3
+; CHECK-P9-NEXT:    xvcvsxddp vs1, v2
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test4elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv v2, 16(r4)
+; CHECK-BE-NEXT:    lxv v3, 0(r4)
+; CHECK-BE-NEXT:    xvcvsxddp vs0, v3
+; CHECK-BE-NEXT:    xvcvsxddp vs1, v2
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <4 x i64>, <4 x i64>* %0, align 32
+  %1 = sitofp <4 x i64> %a to <4 x double>
+  store <4 x double> %1, <4 x double>* %agg.result, align 32
+  ret void
+}
+
+define void @test8elt_signed(<8 x double>* noalias nocapture sret %agg.result, <8 x i64>* nocapture readonly) local_unnamed_addr #1 {
+; CHECK-P8-LABEL: test8elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    li r6, 32
+; CHECK-P8-NEXT:    li r7, 48
+; CHECK-P8-NEXT:    lxvd2x vs3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
+; CHECK-P8-NEXT:    lxvd2x vs2, r4, r7
+; CHECK-P8-NEXT:    xvcvsxddp vs3, vs3
+; CHECK-P8-NEXT:    xvcvsxddp vs0, vs0
+; CHECK-P8-NEXT:    xvcvsxddp vs1, vs1
+; CHECK-P8-NEXT:    xvcvsxddp vs2, vs2
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r7
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r6
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs3, 0, r3
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test8elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv v2, 48(r4)
+; CHECK-P9-NEXT:    lxv v3, 32(r4)
+; CHECK-P9-NEXT:    lxv v4, 16(r4)
+; CHECK-P9-NEXT:    lxv v5, 0(r4)
+; CHECK-P9-NEXT:    xvcvsxddp vs0, v5
+; CHECK-P9-NEXT:    xvcvsxddp vs1, v4
+; CHECK-P9-NEXT:    xvcvsxddp vs2, v3
+; CHECK-P9-NEXT:    xvcvsxddp vs3, v2
+; CHECK-P9-NEXT:    stxv vs3, 48(r3)
+; CHECK-P9-NEXT:    stxv vs2, 32(r3)
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test8elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv v2, 48(r4)
+; CHECK-BE-NEXT:    lxv v3, 32(r4)
+; CHECK-BE-NEXT:    lxv v4, 16(r4)
+; CHECK-BE-NEXT:    lxv v5, 0(r4)
+; CHECK-BE-NEXT:    xvcvsxddp vs0, v5
+; CHECK-BE-NEXT:    xvcvsxddp vs1, v4
+; CHECK-BE-NEXT:    xvcvsxddp vs2, v3
+; CHECK-BE-NEXT:    xvcvsxddp vs3, v2
+; CHECK-BE-NEXT:    stxv vs3, 48(r3)
+; CHECK-BE-NEXT:    stxv vs2, 32(r3)
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <8 x i64>, <8 x i64>* %0, align 64
+  %1 = sitofp <8 x i64> %a to <8 x double>
+  store <8 x double> %1, <8 x double>* %agg.result, align 64
+  ret void
+}
+
+define void @test16elt_signed(<16 x double>* noalias nocapture sret %agg.result, <16 x i64>* nocapture readonly) local_unnamed_addr #1 {
+; CHECK-P8-LABEL: test16elt_signed:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    li r6, 32
+; CHECK-P8-NEXT:    li r7, 64
+; CHECK-P8-NEXT:    li r8, 96
+; CHECK-P8-NEXT:    li r9, 112
+; CHECK-P8-NEXT:    li r10, 80
+; CHECK-P8-NEXT:    li r11, 48
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
+; CHECK-P8-NEXT:    lxvd2x vs2, r4, r7
+; CHECK-P8-NEXT:    lxvd2x vs3, r4, r8
+; CHECK-P8-NEXT:    lxvd2x vs4, r4, r9
+; CHECK-P8-NEXT:    lxvd2x vs5, r4, r10
+; CHECK-P8-NEXT:    lxvd2x vs6, r4, r11
+; CHECK-P8-NEXT:    lxvd2x vs7, 0, r4
+; CHECK-P8-NEXT:    xvcvsxddp vs0, vs0
+; CHECK-P8-NEXT:    xvcvsxddp vs1, vs1
+; CHECK-P8-NEXT:    xvcvsxddp vs2, vs2
+; CHECK-P8-NEXT:    xvcvsxddp vs3, vs3
+; CHECK-P8-NEXT:    xvcvsxddp vs4, vs4
+; CHECK-P8-NEXT:    xvcvsxddp vs5, vs5
+; CHECK-P8-NEXT:    xvcvsxddp vs6, vs6
+; CHECK-P8-NEXT:    xvcvsxddp vs7, vs7
+; CHECK-P8-NEXT:    stxvd2x vs4, r3, r9
+; CHECK-P8-NEXT:    stxvd2x vs3, r3, r8
+; CHECK-P8-NEXT:    stxvd2x vs5, r3, r10
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r7
+; CHECK-P8-NEXT:    stxvd2x vs6, r3, r11
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r6
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs7, 0, r3
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: test16elt_signed:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv v2, 48(r4)
+; CHECK-P9-NEXT:    lxv v3, 32(r4)
+; CHECK-P9-NEXT:    lxv v4, 16(r4)
+; CHECK-P9-NEXT:    lxv v5, 0(r4)
+; CHECK-P9-NEXT:    lxv v0, 112(r4)
+; CHECK-P9-NEXT:    lxv v1, 96(r4)
+; CHECK-P9-NEXT:    lxv v6, 80(r4)
+; CHECK-P9-NEXT:    lxv v7, 64(r4)
+; CHECK-P9-NEXT:    xvcvsxddp vs0, v5
+; CHECK-P9-NEXT:    xvcvsxddp vs1, v4
+; CHECK-P9-NEXT:    xvcvsxddp vs2, v3
+; CHECK-P9-NEXT:    xvcvsxddp vs3, v2
+; CHECK-P9-NEXT:    xvcvsxddp vs4, v7
+; CHECK-P9-NEXT:    xvcvsxddp vs5, v6
+; CHECK-P9-NEXT:    xvcvsxddp vs6, v1
+; CHECK-P9-NEXT:    xvcvsxddp vs7, v0
+; CHECK-P9-NEXT:    stxv vs3, 48(r3)
+; CHECK-P9-NEXT:    stxv vs2, 32(r3)
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    stxv vs7, 112(r3)
+; CHECK-P9-NEXT:    stxv vs6, 96(r3)
+; CHECK-P9-NEXT:    stxv vs5, 80(r3)
+; CHECK-P9-NEXT:    stxv vs4, 64(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-BE-LABEL: test16elt_signed:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv v2, 48(r4)
+; CHECK-BE-NEXT:    lxv v3, 32(r4)
+; CHECK-BE-NEXT:    lxv v4, 16(r4)
+; CHECK-BE-NEXT:    lxv v5, 0(r4)
+; CHECK-BE-NEXT:    lxv v0, 112(r4)
+; CHECK-BE-NEXT:    lxv v1, 96(r4)
+; CHECK-BE-NEXT:    lxv v6, 80(r4)
+; CHECK-BE-NEXT:    lxv v7, 64(r4)
+; CHECK-BE-NEXT:    xvcvsxddp vs0, v5
+; CHECK-BE-NEXT:    xvcvsxddp vs1, v4
+; CHECK-BE-NEXT:    xvcvsxddp vs2, v3
+; CHECK-BE-NEXT:    xvcvsxddp vs3, v2
+; CHECK-BE-NEXT:    xvcvsxddp vs4, v7
+; CHECK-BE-NEXT:    xvcvsxddp vs5, v6
+; CHECK-BE-NEXT:    xvcvsxddp vs6, v1
+; CHECK-BE-NEXT:    xvcvsxddp vs7, v0
+; CHECK-BE-NEXT:    stxv vs3, 48(r3)
+; CHECK-BE-NEXT:    stxv vs2, 32(r3)
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    stxv vs7, 112(r3)
+; CHECK-BE-NEXT:    stxv vs6, 96(r3)
+; CHECK-BE-NEXT:    stxv vs5, 80(r3)
+; CHECK-BE-NEXT:    stxv vs4, 64(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %a = load <16 x i64>, <16 x i64>* %0, align 128
+  %1 = sitofp <16 x i64> %a to <16 x double>
+  store <16 x double> %1, <16 x double>* %agg.result, align 128
+  ret void
+}




More information about the llvm-commits mailing list