[llvm] r360175 - [PowerPC][NFC] Update build-vector-tests.ll using utils/update_llc_test_checks.py

Jinsong Ji via llvm-commits llvm-commits at lists.llvm.org
Tue May 7 10:29:45 PDT 2019


Author: jsji
Date: Tue May  7 10:29:44 2019
New Revision: 360175

URL: http://llvm.org/viewvc/llvm-project?rev=360175&view=rev
Log:
[PowerPC][NFC] Update build-vector-tests.ll using utils/update_llc_test_checks.py

build-vector-tests.ll is a huge testcase, it is hard to maintain: eg:
any fundamental changes might need to update hundreds of lines. We should
leverage the script to maintain it.

This patch simply run utils/update_llc_test_checks.py on it. There
should be no missing test points.

Modified:
    llvm/trunk/test/CodeGen/PowerPC/build-vector-tests.ll

Modified: llvm/trunk/test/CodeGen/PowerPC/build-vector-tests.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/build-vector-tests.ll?rev=360175&r1=360174&r2=360175&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/build-vector-tests.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/build-vector-tests.ll Tue May  7 10:29:44 2019
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
 ; RUN:   -mtriple=powerpc64-unknown-unknown < %s | FileCheck -allow-deprecated-dag-overlap %s \
 ; RUN:   -check-prefix=P9BE -implicit-check-not frsp
@@ -730,157 +731,233 @@
 
 ; Function Attrs: norecurse nounwind readnone
 define <4 x i32> @allZeroi() {
+; P9BE-LABEL: allZeroi:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    xxlxor v2, v2, v2
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: allZeroi:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    xxlxor v2, v2, v2
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: allZeroi:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    xxlxor v2, v2, v2
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: allZeroi:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    xxlxor v2, v2, v2
+; P8LE-NEXT:    blr
 entry:
   ret <4 x i32> zeroinitializer
-; P9BE-LABEL: allZeroi
-; P9LE-LABEL: allZeroi
-; P8BE-LABEL: allZeroi
-; P8LE-LABEL: allZeroi
-; P9BE: xxlxor v2, v2, v2
-; P9BE: blr
-; P9LE: xxlxor v2, v2, v2
-; P9LE: blr
-; P8BE: xxlxor v2, v2, v2
-; P8BE: blr
-; P8LE: xxlxor v2, v2, v2
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <4 x i32> @allOnei() {
+; P9BE-LABEL: allOnei:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    xxspltib v2, 255
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: allOnei:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    xxspltib v2, 255
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: allOnei:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    vspltisb v2, -1
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: allOnei:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    vspltisb v2, -1
+; P8LE-NEXT:    blr
 entry:
   ret <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
-; P9BE-LABEL: allOnei
-; P9LE-LABEL: allOnei
-; P8BE-LABEL: allOnei
-; P8LE-LABEL: allOnei
-; P9BE: xxspltib v2, 255
-; P9BE: blr
-; P9LE: xxspltib v2, 255
-; P9LE: blr
-; P8BE: vspltisb v2, -1
-; P8BE: blr
-; P8LE: vspltisb v2, -1
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <4 x i32> @spltConst1i() {
+; P9BE-LABEL: spltConst1i:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    vspltisw v2, 1
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: spltConst1i:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    vspltisw v2, 1
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: spltConst1i:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    vspltisw v2, 1
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: spltConst1i:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    vspltisw v2, 1
+; P8LE-NEXT:    blr
 entry:
   ret <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-; P9BE-LABEL: spltConst1i
-; P9LE-LABEL: spltConst1i
-; P8BE-LABEL: spltConst1i
-; P8LE-LABEL: spltConst1i
-; P9BE: vspltisw v2, 1
-; P9BE: blr
-; P9LE: vspltisw v2, 1
-; P9LE: blr
-; P8BE: vspltisw v2, 1
-; P8BE: blr
-; P8LE: vspltisw v2, 1
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <4 x i32> @spltConst16ki() {
+; P9BE-LABEL: spltConst16ki:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    vspltisw v2, -15
+; P9BE-NEXT:    vsrw v2, v2, v2
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: spltConst16ki:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    vspltisw v2, -15
+; P9LE-NEXT:    vsrw v2, v2, v2
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: spltConst16ki:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    vspltisw v2, -15
+; P8BE-NEXT:    vsrw v2, v2, v2
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: spltConst16ki:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    vspltisw v2, -15
+; P8LE-NEXT:    vsrw v2, v2, v2
+; P8LE-NEXT:    blr
 entry:
   ret <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>
-; P9BE-LABEL: spltConst16ki
-; P9LE-LABEL: spltConst16ki
-; P8BE-LABEL: spltConst16ki
-; P8LE-LABEL: spltConst16ki
-; P9BE: vspltisw v2, -15
-; P9BE: vsrw v2, v2, v2
-; P9BE: blr
-; P9LE: vspltisw v2, -15
-; P9LE: vsrw v2, v2, v2
-; P9LE: blr
-; P8BE: vspltisw v2, -15
-; P8BE: vsrw v2, v2, v2
-; P8BE: blr
-; P8LE: vspltisw v2, -15
-; P8LE: vsrw v2, v2, v2
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <4 x i32> @spltConst32ki() {
+; P9BE-LABEL: spltConst32ki:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    vspltisw v2, -16
+; P9BE-NEXT:    vsrw v2, v2, v2
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: spltConst32ki:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    vspltisw v2, -16
+; P9LE-NEXT:    vsrw v2, v2, v2
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: spltConst32ki:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    vspltisw v2, -16
+; P8BE-NEXT:    vsrw v2, v2, v2
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: spltConst32ki:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    vspltisw v2, -16
+; P8LE-NEXT:    vsrw v2, v2, v2
+; P8LE-NEXT:    blr
 entry:
   ret <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>
-; P9BE-LABEL: spltConst32ki
-; P9LE-LABEL: spltConst32ki
-; P8BE-LABEL: spltConst32ki
-; P8LE-LABEL: spltConst32ki
-; P9BE: vspltisw v2, -16
-; P9BE: vsrw v2, v2, v2
-; P9BE: blr
-; P9LE: vspltisw v2, -16
-; P9LE: vsrw v2, v2, v2
-; P9LE: blr
-; P8BE: vspltisw v2, -16
-; P8BE: vsrw v2, v2, v2
-; P8BE: blr
-; P8LE: vspltisw v2, -16
-; P8LE: vsrw v2, v2, v2
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <4 x i32> @fromRegsi(i32 signext %a, i32 signext %b, i32 signext %c, i32 signext %d) {
+; P9BE-LABEL: fromRegsi:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    rldimi r6, r5, 32, 0
+; P9BE-NEXT:    rldimi r4, r3, 32, 0
+; P9BE-NEXT:    mtvsrdd v2, r4, r6
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromRegsi:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    rldimi r3, r4, 32, 0
+; P9LE-NEXT:    rldimi r5, r6, 32, 0
+; P9LE-NEXT:    mtvsrdd v2, r5, r3
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromRegsi:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    rldimi r6, r5, 32, 0
+; P8BE-NEXT:    rldimi r4, r3, 32, 0
+; P8BE-NEXT:    mtvsrd f0, r6
+; P8BE-NEXT:    mtvsrd f1, r4
+; P8BE-NEXT:    xxmrghd v2, vs1, vs0
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromRegsi:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    rldimi r3, r4, 32, 0
+; P8LE-NEXT:    rldimi r5, r6, 32, 0
+; P8LE-NEXT:    mtvsrd f0, r3
+; P8LE-NEXT:    mtvsrd f1, r5
+; P8LE-NEXT:    xxmrghd v2, vs1, vs0
+; P8LE-NEXT:    blr
 entry:
   %vecinit = insertelement <4 x i32> undef, i32 %a, i32 0
   %vecinit1 = insertelement <4 x i32> %vecinit, i32 %b, i32 1
   %vecinit2 = insertelement <4 x i32> %vecinit1, i32 %c, i32 2
   %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %d, i32 3
   ret <4 x i32> %vecinit3
-; P9BE-LABEL: fromRegsi
-; P9LE-LABEL: fromRegsi
-; P8BE-LABEL: fromRegsi
-; P8LE-LABEL: fromRegsi
-; P9BE-DAG: rldimi r6, r5, 32, 0
-; P9BE-DAG: rldimi r4, r3, 32, 0
-; P9BE: mtvsrdd v2, r4, r6
-; P9BE: blr
-; P9LE-DAG: rldimi r3, r4, 32, 0
-; P9LE-DAG: rldimi r5, r6, 32, 0
-; P9LE: mtvsrdd v2, r5, r3
-; P9LE: blr
-; P8BE-DAG: rldimi r6, r5, 32, 0
-; P8BE-DAG: rldimi r4, r3, 32, 0
-; P8BE-DAG: mtvsrd f[[REG1:[0-9]+]], r6
-; P8BE-DAG: mtvsrd f[[REG2:[0-9]+]], r4
-; P8BE-DAG: xxmrghd v2, vs[[REG2]], vs[[REG1]]
-; P8BE: blr
-; P8LE-DAG: rldimi r3, r4, 32, 0
-; P8LE-DAG: rldimi r5, r6, 32, 0
-; P8LE-DAG: mtvsrd f[[REG1:[0-9]+]], r3
-; P8LE-DAG: mtvsrd f[[REG2:[0-9]+]], r5
-; P8LE-DAG: xxmrghd v2, vs[[REG2]], vs[[REG1]]
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <4 x i32> @fromDiffConstsi() {
+; P9BE-LABEL: fromDiffConstsi:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    addis r3, r2, .LCPI6_0 at toc@ha
+; P9BE-NEXT:    addi r3, r3, .LCPI6_0 at toc@l
+; P9BE-NEXT:    lxvx v2, 0, r3
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffConstsi:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    addis r3, r2, .LCPI6_0 at toc@ha
+; P9LE-NEXT:    addi r3, r3, .LCPI6_0 at toc@l
+; P9LE-NEXT:    lxvx v2, 0, r3
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffConstsi:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    addis r3, r2, .LCPI6_0 at toc@ha
+; P8BE-NEXT:    addi r3, r3, .LCPI6_0 at toc@l
+; P8BE-NEXT:    lxvw4x v2, 0, r3
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffConstsi:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    addis r3, r2, .LCPI6_0 at toc@ha
+; P8LE-NEXT:    addi r3, r3, .LCPI6_0 at toc@l
+; P8LE-NEXT:    lvx v2, 0, r3
+; P8LE-NEXT:    blr
 entry:
   ret <4 x i32> <i32 242, i32 -113, i32 889, i32 19>
-; P9BE-LABEL: fromDiffConstsi
-; P9LE-LABEL: fromDiffConstsi
-; P8BE-LABEL: fromDiffConstsi
-; P8LE-LABEL: fromDiffConstsi
-; P9BE: lxv
-; P9BE: blr
-; P9LE: lxv
-; P9LE: blr
-; P8BE: lxvw4x
-; P8BE: blr
-; P8LE: lvx
-; P8LE-NOT: xxswapd
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <4 x i32> @fromDiffMemConsAi(i32* nocapture readonly %arr) {
+; P9BE-LABEL: fromDiffMemConsAi:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lxv v2, 0(r3)
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffMemConsAi:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lxv v2, 0(r3)
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffMemConsAi:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    lxvw4x v2, 0, r3
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffMemConsAi:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    xxswapd v2, vs0
+; P8LE-NEXT:    blr
 entry:
   %0 = load i32, i32* %arr, align 4
   %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0
@@ -894,23 +971,46 @@ entry:
   %3 = load i32, i32* %arrayidx5, align 4
   %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %3, i32 3
   ret <4 x i32> %vecinit6
-; P9BE-LABEL: fromDiffMemConsAi
-; P9LE-LABEL: fromDiffMemConsAi
-; P8BE-LABEL: fromDiffMemConsAi
-; P8LE-LABEL: fromDiffMemConsAi
-; P9BE: lxv
-; P9BE: blr
-; P9LE: lxv
-; P9LE: blr
-; P8BE: lxvw4x
-; P8BE: blr
-; P8LE: lxvd2x
-; P8LE: xxswapd
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <4 x i32> @fromDiffMemConsDi(i32* nocapture readonly %arr) {
+; P9BE-LABEL: fromDiffMemConsDi:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lxv v2, 0(r3)
+; P9BE-NEXT:    addis r3, r2, .LCPI8_0 at toc@ha
+; P9BE-NEXT:    addi r3, r3, .LCPI8_0 at toc@l
+; P9BE-NEXT:    lxvx v3, 0, r3
+; P9BE-NEXT:    vperm v2, v2, v2, v3
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffMemConsDi:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lxv v2, 0(r3)
+; P9LE-NEXT:    addis r3, r2, .LCPI8_0 at toc@ha
+; P9LE-NEXT:    addi r3, r3, .LCPI8_0 at toc@l
+; P9LE-NEXT:    lxvx v3, 0, r3
+; P9LE-NEXT:    vperm v2, v2, v2, v3
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffMemConsDi:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    addis r4, r2, .LCPI8_0 at toc@ha
+; P8BE-NEXT:    lxvw4x v2, 0, r3
+; P8BE-NEXT:    addi r4, r4, .LCPI8_0 at toc@l
+; P8BE-NEXT:    lxvw4x v3, 0, r4
+; P8BE-NEXT:    vperm v2, v2, v2, v3
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffMemConsDi:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    addis r4, r2, .LCPI8_0 at toc@ha
+; P8LE-NEXT:    addi r3, r4, .LCPI8_0 at toc@l
+; P8LE-NEXT:    lvx v2, 0, r3
+; P8LE-NEXT:    xxswapd v3, vs0
+; P8LE-NEXT:    vperm v2, v3, v3, v2
+; P8LE-NEXT:    blr
 entry:
   %arrayidx = getelementptr inbounds i32, i32* %arr, i64 3
   %0 = load i32, i32* %arrayidx, align 4
@@ -924,31 +1024,34 @@ entry:
   %3 = load i32, i32* %arr, align 4
   %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %3, i32 3
   ret <4 x i32> %vecinit6
-; P9BE-LABEL: fromDiffMemConsDi
-; P9LE-LABEL: fromDiffMemConsDi
-; P8BE-LABEL: fromDiffMemConsDi
-; P8LE-LABEL: fromDiffMemConsDi
-; P9BE: lxv
-; P9BE: lxv
-; P9BE: vperm
-; P9BE: blr
-; P9LE: lxv
-; P9LE: lxv
-; P9LE: vperm
-; P9LE: blr
-; P8BE: lxvw4x
-; P8BE: lxvw4x
-; P8BE: vperm
-; P8BE: blr
-; P8LE: lxvd2x
-; P8LE-DAG: lvx
-; P8LE: xxswapd
-; P8LE: vperm
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <4 x i32> @fromDiffMemVarAi(i32* nocapture readonly %arr, i32 signext %elem) {
+; P9BE-LABEL: fromDiffMemVarAi:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    sldi r4, r4, 2
+; P9BE-NEXT:    lxvx v2, r3, r4
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffMemVarAi:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    sldi r4, r4, 2
+; P9LE-NEXT:    lxvx v2, r3, r4
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffMemVarAi:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    sldi r4, r4, 2
+; P8BE-NEXT:    lxvw4x v2, r3, r4
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffMemVarAi:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    sldi r4, r4, 2
+; P8LE-NEXT:    lxvd2x vs0, r3, r4
+; P8LE-NEXT:    xxswapd v2, vs0
+; P8LE-NEXT:    blr
 entry:
   %idxprom = sext i32 %elem to i64
   %arrayidx = getelementptr inbounds i32, i32* %arr, i64 %idxprom
@@ -970,27 +1073,58 @@ entry:
   %3 = load i32, i32* %arrayidx10, align 4
   %vecinit11 = insertelement <4 x i32> %vecinit7, i32 %3, i32 3
   ret <4 x i32> %vecinit11
-; P9BE-LABEL: fromDiffMemVarAi
-; P9LE-LABEL: fromDiffMemVarAi
-; P8BE-LABEL: fromDiffMemVarAi
-; P8LE-LABEL: fromDiffMemVarAi
-; P9BE: sldi r4, r4, 2
-; P9BE: lxvx v2, r3, r4
-; P9BE: blr
-; P9LE: sldi r4, r4, 2
-; P9LE: lxvx v2, r3, r4
-; P9LE: blr
-; P8BE: sldi r4, r4, 2
-; P8BE: lxvw4x {{[vs0-9]+}}, r3, r4
-; P8BE: blr
-; P8LE: sldi r4, r4, 2
-; P8LE: lxvd2x {{[vs0-9]+}}, r3, r4
-; P8LE: xxswapd
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <4 x i32> @fromDiffMemVarDi(i32* nocapture readonly %arr, i32 signext %elem) {
+; P9BE-LABEL: fromDiffMemVarDi:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    sldi r4, r4, 2
+; P9BE-NEXT:    add r3, r3, r4
+; P9BE-NEXT:    addi r3, r3, -12
+; P9BE-NEXT:    lxvx v2, 0, r3
+; P9BE-NEXT:    addis r3, r2, .LCPI10_0 at toc@ha
+; P9BE-NEXT:    addi r3, r3, .LCPI10_0 at toc@l
+; P9BE-NEXT:    lxvx v3, 0, r3
+; P9BE-NEXT:    vperm v2, v2, v2, v3
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffMemVarDi:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    sldi r4, r4, 2
+; P9LE-NEXT:    add r3, r3, r4
+; P9LE-NEXT:    addi r3, r3, -12
+; P9LE-NEXT:    lxvx v2, 0, r3
+; P9LE-NEXT:    addis r3, r2, .LCPI10_0 at toc@ha
+; P9LE-NEXT:    addi r3, r3, .LCPI10_0 at toc@l
+; P9LE-NEXT:    lxvx v3, 0, r3
+; P9LE-NEXT:    vperm v2, v2, v2, v3
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffMemVarDi:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    sldi r4, r4, 2
+; P8BE-NEXT:    addis r5, r2, .LCPI10_0 at toc@ha
+; P8BE-NEXT:    add r3, r3, r4
+; P8BE-NEXT:    addi r4, r5, .LCPI10_0 at toc@l
+; P8BE-NEXT:    addi r3, r3, -12
+; P8BE-NEXT:    lxvw4x v3, 0, r4
+; P8BE-NEXT:    lxvw4x v2, 0, r3
+; P8BE-NEXT:    vperm v2, v2, v2, v3
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffMemVarDi:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    sldi r4, r4, 2
+; P8LE-NEXT:    addis r5, r2, .LCPI10_0 at toc@ha
+; P8LE-NEXT:    add r3, r3, r4
+; P8LE-NEXT:    addi r3, r3, -12
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    addi r3, r5, .LCPI10_0 at toc@l
+; P8LE-NEXT:    lvx v3, 0, r3
+; P8LE-NEXT:    xxswapd v2, vs0
+; P8LE-NEXT:    vperm v2, v2, v2, v3
+; P8LE-NEXT:    blr
 entry:
   %idxprom = sext i32 %elem to i64
   %arrayidx = getelementptr inbounds i32, i32* %arr, i64 %idxprom
@@ -1012,35 +1146,57 @@ entry:
   %3 = load i32, i32* %arrayidx10, align 4
   %vecinit11 = insertelement <4 x i32> %vecinit7, i32 %3, i32 3
   ret <4 x i32> %vecinit11
-; P9BE-LABEL: fromDiffMemVarDi
-; P9LE-LABEL: fromDiffMemVarDi
-; P8BE-LABEL: fromDiffMemVarDi
-; P8LE-LABEL: fromDiffMemVarDi
-; P9BE: sldi {{r[0-9]+}}, r4, 2
-; P9BE-DAG: lxvx {{v[0-9]+}}
-; P9BE-DAG: lxvx
-; P9BE: vperm
-; P9BE: blr
-; P9LE: sldi {{r[0-9]+}}, r4, 2
-; P9LE-DAG: lxvx {{v[0-9]+}}
-; P9LE-DAG: lxvx
-; P9LE: vperm
-; P9LE: blr
-; P8BE: sldi {{r[0-9]+}}, r4, 2
-; P8BE-DAG: lxvw4x {{v[0-9]+}}, 0, r3
-; P8BE-DAG: lxvw4x
-; P8BE: vperm
-; P8BE: blr
-; P8LE: sldi {{r[0-9]+}}, r4, 2
-; P8LE-DAG: lxvd2x
-; P8LE-DAG: lxvd2x
-; P8LE: xxswapd
-; P8LE: vperm
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <4 x i32> @fromRandMemConsi(i32* nocapture readonly %arr) {
+; P9BE-LABEL: fromRandMemConsi:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lwz r4, 16(r3)
+; P9BE-NEXT:    lwz r5, 72(r3)
+; P9BE-NEXT:    lwz r6, 8(r3)
+; P9BE-NEXT:    lwz r3, 352(r3)
+; P9BE-NEXT:    rldimi r3, r6, 32, 0
+; P9BE-NEXT:    rldimi r5, r4, 32, 0
+; P9BE-NEXT:    mtvsrdd v2, r5, r3
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromRandMemConsi:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lwz r4, 16(r3)
+; P9LE-NEXT:    lwz r5, 72(r3)
+; P9LE-NEXT:    lwz r6, 8(r3)
+; P9LE-NEXT:    lwz r3, 352(r3)
+; P9LE-NEXT:    rldimi r4, r5, 32, 0
+; P9LE-NEXT:    rldimi r6, r3, 32, 0
+; P9LE-NEXT:    mtvsrdd v2, r6, r4
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromRandMemConsi:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    lwz r4, 8(r3)
+; P8BE-NEXT:    lwz r5, 352(r3)
+; P8BE-NEXT:    lwz r6, 16(r3)
+; P8BE-NEXT:    lwz r3, 72(r3)
+; P8BE-NEXT:    rldimi r5, r4, 32, 0
+; P8BE-NEXT:    rldimi r3, r6, 32, 0
+; P8BE-NEXT:    mtvsrd f0, r5
+; P8BE-NEXT:    mtvsrd f1, r3
+; P8BE-NEXT:    xxmrghd v2, vs1, vs0
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromRandMemConsi:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    lwz r4, 16(r3)
+; P8LE-NEXT:    lwz r5, 72(r3)
+; P8LE-NEXT:    lwz r6, 8(r3)
+; P8LE-NEXT:    lwz r3, 352(r3)
+; P8LE-NEXT:    rldimi r4, r5, 32, 0
+; P8LE-NEXT:    rldimi r6, r3, 32, 0
+; P8LE-NEXT:    mtvsrd f0, r4
+; P8LE-NEXT:    mtvsrd f1, r6
+; P8LE-NEXT:    xxmrghd v2, vs1, vs0
+; P8LE-NEXT:    blr
 entry:
   %arrayidx = getelementptr inbounds i32, i32* %arr, i64 4
   %0 = load i32, i32* %arrayidx, align 4
@@ -1055,46 +1211,65 @@ entry:
   %3 = load i32, i32* %arrayidx5, align 4
   %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %3, i32 3
   ret <4 x i32> %vecinit6
-; P9BE-LABEL: fromRandMemConsi
-; P9LE-LABEL: fromRandMemConsi
-; P8BE-LABEL: fromRandMemConsi
-; P8LE-LABEL: fromRandMemConsi
-; P9BE: lwz
-; P9BE: lwz
-; P9BE: lwz
-; P9BE: lwz
-; P9BE: rldimi
-; P9BE: rldimi
-; P9BE: mtvsrdd
-; P9LE: lwz
-; P9LE: lwz
-; P9LE: lwz
-; P9LE: lwz
-; P9LE: rldimi
-; P9LE: rldimi
-; P9LE: mtvsrdd
-; P8BE: lwz
-; P8BE: lwz
-; P8BE: lwz
-; P8BE: lwz
-; P8BE: rldimi
-; P8BE: rldimi
-; P8BE: mtvsrd
-; P8BE: mtvsrd
-; P8BE: xxmrghd
-; P8LE: lwz
-; P8LE: lwz
-; P8LE: lwz
-; P8LE: lwz
-; P8LE: rldimi
-; P8LE: rldimi
-; P8LE: mtvsrd
-; P8LE: mtvsrd
-; P8LE: xxmrghd
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <4 x i32> @fromRandMemVari(i32* nocapture readonly %arr, i32 signext %elem) {
+; P9BE-LABEL: fromRandMemVari:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    sldi r4, r4, 2
+; P9BE-NEXT:    add r3, r3, r4
+; P9BE-NEXT:    lwz r4, 16(r3)
+; P9BE-NEXT:    lwz r5, 4(r3)
+; P9BE-NEXT:    lwz r6, 8(r3)
+; P9BE-NEXT:    lwz r3, 32(r3)
+; P9BE-NEXT:    rldimi r3, r6, 32, 0
+; P9BE-NEXT:    rldimi r5, r4, 32, 0
+; P9BE-NEXT:    mtvsrdd v2, r5, r3
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromRandMemVari:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    sldi r4, r4, 2
+; P9LE-NEXT:    add r3, r3, r4
+; P9LE-NEXT:    lwz r4, 16(r3)
+; P9LE-NEXT:    lwz r5, 4(r3)
+; P9LE-NEXT:    lwz r6, 8(r3)
+; P9LE-NEXT:    lwz r3, 32(r3)
+; P9LE-NEXT:    rldimi r4, r5, 32, 0
+; P9LE-NEXT:    rldimi r6, r3, 32, 0
+; P9LE-NEXT:    mtvsrdd v2, r6, r4
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromRandMemVari:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    sldi r4, r4, 2
+; P8BE-NEXT:    add r3, r3, r4
+; P8BE-NEXT:    lwz r4, 8(r3)
+; P8BE-NEXT:    lwz r5, 32(r3)
+; P8BE-NEXT:    lwz r6, 16(r3)
+; P8BE-NEXT:    lwz r3, 4(r3)
+; P8BE-NEXT:    rldimi r5, r4, 32, 0
+; P8BE-NEXT:    rldimi r3, r6, 32, 0
+; P8BE-NEXT:    mtvsrd f0, r5
+; P8BE-NEXT:    mtvsrd f1, r3
+; P8BE-NEXT:    xxmrghd v2, vs1, vs0
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromRandMemVari:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    sldi r4, r4, 2
+; P8LE-NEXT:    add r3, r3, r4
+; P8LE-NEXT:    lwz r4, 16(r3)
+; P8LE-NEXT:    lwz r5, 4(r3)
+; P8LE-NEXT:    lwz r6, 8(r3)
+; P8LE-NEXT:    lwz r3, 32(r3)
+; P8LE-NEXT:    rldimi r4, r5, 32, 0
+; P8LE-NEXT:    rldimi r6, r3, 32, 0
+; P8LE-NEXT:    mtvsrd f0, r4
+; P8LE-NEXT:    mtvsrd f1, r6
+; P8LE-NEXT:    xxmrghd v2, vs1, vs0
+; P8LE-NEXT:    blr
 entry:
   %add = add nsw i32 %elem, 4
   %idxprom = sext i32 %add to i64
@@ -1117,119 +1292,151 @@ entry:
   %3 = load i32, i32* %arrayidx11, align 4
   %vecinit12 = insertelement <4 x i32> %vecinit8, i32 %3, i32 3
   ret <4 x i32> %vecinit12
-; P9BE-LABEL: fromRandMemVari
-; P9LE-LABEL: fromRandMemVari
-; P8BE-LABEL: fromRandMemVari
-; P8LE-LABEL: fromRandMemVari
-; P9BE: sldi r4, r4, 2
-; P9BE: lwz
-; P9BE: lwz
-; P9BE: lwz
-; P9BE: lwz
-; P9BE: rldimi
-; P9BE: rldimi
-; P9BE: mtvsrdd
-; P9LE: sldi r4, r4, 2
-; P9LE: lwz
-; P9LE: lwz
-; P9LE: lwz
-; P9LE: lwz
-; P9LE: rldimi
-; P9LE: rldimi
-; P9LE: mtvsrdd
-; P8BE: sldi r4, r4, 2
-; P8BE: lwz
-; P8BE: lwz
-; P8BE: lwz
-; P8BE: lwz
-; P8BE: rldimi
-; P8BE: rldimi
-; P8BE: mtvsrd
-; P8BE: mtvsrd
-; P8BE: xxmrghd
-; P8LE: sldi r4, r4, 2
-; P8LE: lwz
-; P8LE: lwz
-; P8LE: lwz
-; P8LE: lwz
-; P8LE: rldimi
-; P8LE: rldimi
-; P8LE: mtvsrd
-; P8LE: mtvsrd
-; P8LE: xxmrghd
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <4 x i32> @spltRegVali(i32 signext %val) {
+; P9BE-LABEL: spltRegVali:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    mtvsrws v2, r3
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: spltRegVali:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    mtvsrws v2, r3
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: spltRegVali:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    mtvsrwz f0, r3
+; P8BE-NEXT:    xxspltw v2, vs0, 1
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: spltRegVali:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    mtvsrwz f0, r3
+; P8LE-NEXT:    xxspltw v2, vs0, 1
+; P8LE-NEXT:    blr
 entry:
   %splat.splatinsert = insertelement <4 x i32> undef, i32 %val, i32 0
   %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
   ret <4 x i32> %splat.splat
-; P9BE-LABEL: spltRegVali
-; P9LE-LABEL: spltRegVali
-; P8BE-LABEL: spltRegVali
-; P8LE-LABEL: spltRegVali
-; P9BE: mtvsrws v2, r3
-; P9BE: blr
-; P9LE: mtvsrws v2, r3
-; P9LE: blr
-; P8BE: mtvsrwz {{[vsf0-9]+}}, r3
-; P8BE: xxspltw v2, {{[vsf0-9]+}}, 1
-; P8BE: blr
-; P8LE: mtvsrwz {{[vsf0-9]+}}, r3
-; P8LE: xxspltw v2, {{[vsf0-9]+}}, 1
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <4 x i32> @spltMemVali(i32* nocapture readonly %ptr) {
+; P9BE-LABEL: spltMemVali:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lfiwzx f0, 0, r3
+; P9BE-NEXT:    xxsldwi vs0, f0, f0, 1
+; P9BE-NEXT:    xxspltw v2, vs0, 0
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: spltMemVali:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lfiwzx f0, 0, r3
+; P9LE-NEXT:    xxpermdi vs0, f0, f0, 2
+; P9LE-NEXT:    xxspltw v2, vs0, 3
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: spltMemVali:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    lfiwzx f0, 0, r3
+; P8BE-NEXT:    xxsldwi vs0, f0, f0, 1
+; P8BE-NEXT:    xxspltw v2, vs0, 0
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: spltMemVali:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    lfiwzx f0, 0, r3
+; P8LE-NEXT:    xxpermdi vs0, f0, f0, 2
+; P8LE-NEXT:    xxspltw v2, vs0, 3
+; P8LE-NEXT:    blr
 entry:
   %0 = load i32, i32* %ptr, align 4
   %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0
   %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
   ret <4 x i32> %splat.splat
-; P9BE-LABEL: spltMemVali
-; P9LE-LABEL: spltMemVali
-; P8BE-LABEL: spltMemVali
-; P8LE-LABEL: spltMemVali
-; P9BE: lfiwzx f0, 0, r3
-; P9BE: xxsldwi vs0, f0, f0, 1
-; P9BE: xxspltw v2, vs0, 0
-; P9BE: blr
-; P9LE: lfiwzx f0, 0, r3
-; P9LE: xxpermdi vs0, f0, f0, 2
-; P9LE: xxspltw v2, vs0, 3
-; P9LE: blr
-; P8BE: lfiwzx f0, 0, r3
-; P8BE: xxsldwi vs0, f0, f0, 1
-; P8BE: xxspltw v2, vs0, 0
-; P8BE: blr
-; P8LE: lfiwzx f0, 0, r3
-; P8LE: xxpermdi vs0, f0, f0, 2
-; P8LE: xxspltw v2, vs0, 3
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <4 x i32> @spltCnstConvftoi() {
+; P9BE-LABEL: spltCnstConvftoi:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    vspltisw v2, 4
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: spltCnstConvftoi:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    vspltisw v2, 4
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: spltCnstConvftoi:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    vspltisw v2, 4
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: spltCnstConvftoi:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    vspltisw v2, 4
+; P8LE-NEXT:    blr
 entry:
   ret <4 x i32> <i32 4, i32 4, i32 4, i32 4>
-; P9BE-LABEL: spltCnstConvftoi
-; P9LE-LABEL: spltCnstConvftoi
-; P8BE-LABEL: spltCnstConvftoi
-; P8LE-LABEL: spltCnstConvftoi
-; P9BE: vspltisw v2, 4
-; P9BE: blr
-; P9LE: vspltisw v2, 4
-; P9LE: blr
-; P8BE: vspltisw v2, 4
-; P8BE: blr
-; P8LE: vspltisw v2, 4
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <4 x i32> @fromRegsConvftoi(float %a, float %b, float %c, float %d) {
+; P9BE-LABEL: fromRegsConvftoi:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
+; P9BE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P9BE-NEXT:    xxmrghd vs0, vs2, vs4
+; P9BE-NEXT:    # kill: def $f3 killed $f3 def $vsl3
+; P9BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; P9BE-NEXT:    xvcvdpsxws v2, vs0
+; P9BE-NEXT:    xxmrghd vs0, vs1, vs3
+; P9BE-NEXT:    xvcvdpsxws v3, vs0
+; P9BE-NEXT:    vmrgew v2, v3, v2
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromRegsConvftoi:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    # kill: def $f3 killed $f3 def $vsl3
+; P9LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; P9LE-NEXT:    xxmrghd vs0, vs3, vs1
+; P9LE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
+; P9LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P9LE-NEXT:    xvcvdpsxws v2, vs0
+; P9LE-NEXT:    xxmrghd vs0, vs4, vs2
+; P9LE-NEXT:    xvcvdpsxws v3, vs0
+; P9LE-NEXT:    vmrgew v2, v3, v2
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromRegsConvftoi:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; P8BE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
+; P8BE-NEXT:    # kill: def $f3 killed $f3 def $vsl3
+; P8BE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P8BE-NEXT:    xxmrghd vs0, vs2, vs4
+; P8BE-NEXT:    xxmrghd vs1, vs1, vs3
+; P8BE-NEXT:    xvcvdpsxws v2, vs0
+; P8BE-NEXT:    xvcvdpsxws v3, vs1
+; P8BE-NEXT:    vmrgew v2, v3, v2
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromRegsConvftoi:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; P8LE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
+; P8LE-NEXT:    # kill: def $f3 killed $f3 def $vsl3
+; P8LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P8LE-NEXT:    xxmrghd vs0, vs3, vs1
+; P8LE-NEXT:    xxmrghd vs1, vs4, vs2
+; P8LE-NEXT:    xvcvdpsxws v2, vs0
+; P8LE-NEXT:    xvcvdpsxws v3, vs1
+; P8LE-NEXT:    vmrgew v2, v3, v2
+; P8LE-NEXT:    blr
 entry:
   %conv = fptosi float %a to i32
   %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
@@ -1240,79 +1447,116 @@ entry:
   %conv5 = fptosi float %d to i32
   %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %conv5, i32 3
   ret <4 x i32> %vecinit6
-; P9BE-LABEL: fromRegsConvftoi
-; P9LE-LABEL: fromRegsConvftoi
-; P8BE-LABEL: fromRegsConvftoi
-; P8LE-LABEL: fromRegsConvftoi
-; P9BE: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
-; P9BE: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
-; P9BE: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
-; P9BE: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
-; P9BE: vmrgew v2, [[REG3]], [[REG4]]
-; P9LE: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
-; P9LE: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
-; P9LE: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
-; P9LE: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
-; P9LE: vmrgew v2, [[REG4]], [[REG3]]
-; P8BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
-; P8BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
-; P8BE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
-; P8BE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
-; P8BE: vmrgew v2, [[REG3]], [[REG4]]
-; P8LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
-; P8LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
-; P8LE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
-; P8LE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
-; P8LE: vmrgew v2, [[REG4]], [[REG3]]
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <4 x i32> @fromDiffConstsConvftoi() {
+; P9BE-LABEL: fromDiffConstsConvftoi:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    addis r3, r2, .LCPI17_0 at toc@ha
+; P9BE-NEXT:    addi r3, r3, .LCPI17_0 at toc@l
+; P9BE-NEXT:    lxvx v2, 0, r3
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffConstsConvftoi:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    addis r3, r2, .LCPI17_0 at toc@ha
+; P9LE-NEXT:    addi r3, r3, .LCPI17_0 at toc@l
+; P9LE-NEXT:    lxvx v2, 0, r3
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffConstsConvftoi:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    addis r3, r2, .LCPI17_0 at toc@ha
+; P8BE-NEXT:    addi r3, r3, .LCPI17_0 at toc@l
+; P8BE-NEXT:    lxvw4x v2, 0, r3
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffConstsConvftoi:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    addis r3, r2, .LCPI17_0 at toc@ha
+; P8LE-NEXT:    addi r3, r3, .LCPI17_0 at toc@l
+; P8LE-NEXT:    lvx v2, 0, r3
+; P8LE-NEXT:    blr
 entry:
   ret <4 x i32> <i32 24, i32 234, i32 988, i32 422>
-; P9BE-LABEL: fromDiffConstsConvftoi
-; P9LE-LABEL: fromDiffConstsConvftoi
-; P8BE-LABEL: fromDiffConstsConvftoi
-; P8LE-LABEL: fromDiffConstsConvftoi
-; P9BE: lxv
-; P9BE: blr
-; P9LE: lxv
-; P9LE: blr
-; P8BE: lxvw4x
-; P8BE: blr
-; P8LE: lvx
-; P8LE-NOT: xxswapd
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <4 x i32> @fromDiffMemConsAConvftoi(float* nocapture readonly %ptr) {
+; P9BE-LABEL: fromDiffMemConsAConvftoi:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lxv vs0, 0(r3)
+; P9BE-NEXT:    xvcvspsxws v2, vs0
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffMemConsAConvftoi:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lxv vs0, 0(r3)
+; P9LE-NEXT:    xvcvspsxws v2, vs0
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffMemConsAConvftoi:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    lxvw4x vs0, 0, r3
+; P8BE-NEXT:    xvcvspsxws v2, vs0
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffMemConsAConvftoi:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    xxswapd v2, vs0
+; P8LE-NEXT:    xvcvspsxws v2, v2
+; P8LE-NEXT:    blr
 entry:
   %0 = bitcast float* %ptr to <4 x float>*
   %1 = load <4 x float>, <4 x float>* %0, align 4
   %2 = fptosi <4 x float> %1 to <4 x i32>
   ret <4 x i32> %2
-; P9BE-LABEL: fromDiffMemConsAConvftoi
-; P9LE-LABEL: fromDiffMemConsAConvftoi
-; P8BE-LABEL: fromDiffMemConsAConvftoi
-; P8LE-LABEL: fromDiffMemConsAConvftoi
-; P9BE: lxv [[REG1:[vs0-9]+]], 0(r3)
-; P9BE: xvcvspsxws v2, [[REG1]]
-; P9BE: blr
-; P9LE: lxv [[REG1:[vs0-9]+]], 0(r3)
-; P9LE: xvcvspsxws v2, [[REG1]]
-; P9LE: blr
-; P8BE: lxvw4x [[REG1:[vs0-9]+]], 0, r3
-; P8BE: xvcvspsxws v2, [[REG1]]
-; P8BE: blr
-; P8LE: lxvd2x [[REG1:[vs0-9]+]], 0, r3
-; P8LE: xxswapd
-; P8LE: xvcvspsxws v2, v2
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <4 x i32> @fromDiffMemConsDConvftoi(float* nocapture readonly %ptr) {
+; P9BE-LABEL: fromDiffMemConsDConvftoi:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lxv v2, 0(r3)
+; P9BE-NEXT:    addis r3, r2, .LCPI19_0 at toc@ha
+; P9BE-NEXT:    addi r3, r3, .LCPI19_0 at toc@l
+; P9BE-NEXT:    lxvx v3, 0, r3
+; P9BE-NEXT:    vperm v2, v2, v2, v3
+; P9BE-NEXT:    xvcvspsxws v2, v2
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffMemConsDConvftoi:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lxv v2, 0(r3)
+; P9LE-NEXT:    addis r3, r2, .LCPI19_0 at toc@ha
+; P9LE-NEXT:    addi r3, r3, .LCPI19_0 at toc@l
+; P9LE-NEXT:    lxvx v3, 0, r3
+; P9LE-NEXT:    vperm v2, v2, v2, v3
+; P9LE-NEXT:    xvcvspsxws v2, v2
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffMemConsDConvftoi:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    addis r4, r2, .LCPI19_0 at toc@ha
+; P8BE-NEXT:    lxvw4x v2, 0, r3
+; P8BE-NEXT:    addi r4, r4, .LCPI19_0 at toc@l
+; P8BE-NEXT:    lxvw4x v3, 0, r4
+; P8BE-NEXT:    vperm v2, v2, v2, v3
+; P8BE-NEXT:    xvcvspsxws v2, v2
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffMemConsDConvftoi:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    addis r4, r2, .LCPI19_0 at toc@ha
+; P8LE-NEXT:    addi r3, r4, .LCPI19_0 at toc@l
+; P8LE-NEXT:    lvx v2, 0, r3
+; P8LE-NEXT:    xxswapd v3, vs0
+; P8LE-NEXT:    vperm v2, v3, v3, v2
+; P8LE-NEXT:    xvcvspsxws v2, v2
+; P8LE-NEXT:    blr
 entry:
   %arrayidx = getelementptr inbounds float, float* %ptr, i64 3
   %0 = load float, float* %arrayidx, align 4
@@ -1330,35 +1574,69 @@ entry:
   %conv8 = fptosi float %3 to i32
   %vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3
   ret <4 x i32> %vecinit9
-; P9BE-LABEL: fromDiffMemConsDConvftoi
-; P9LE-LABEL: fromDiffMemConsDConvftoi
-; P8BE-LABEL: fromDiffMemConsDConvftoi
-; P8LE-LABEL: fromDiffMemConsDConvftoi
-; P9BE: lxv
-; P9BE: lxv
-; P9BE: vperm
-; P9BE: xvcvspsxws
-; P9BE: blr
-; P9LE: lxv
-; P9LE: lxv
-; P9LE: vperm
-; P9LE: xvcvspsxws
-; P9LE: blr
-; P8BE: lxvw4x
-; P8BE: lxvw4x
-; P8BE: vperm
-; P8BE: xvcvspsxws
-; P8BE: blr
-; P8LE: lxvd2x
-; P8LE-DAG: lvx
-; P8LE: xxswapd
-; P8LE: vperm
-; P8LE: xvcvspsxws
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <4 x i32> @fromDiffMemVarAConvftoi(float* nocapture readonly %arr, i32 signext %elem) {
+; P9BE-LABEL: fromDiffMemVarAConvftoi:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    sldi r4, r4, 2
+; P9BE-NEXT:    lfsux f0, r3, r4
+; P9BE-NEXT:    lfs f1, 12(r3)
+; P9BE-NEXT:    lfs f2, 4(r3)
+; P9BE-NEXT:    xxmrghd vs1, vs2, vs1
+; P9BE-NEXT:    xvcvdpsp v2, vs1
+; P9BE-NEXT:    lfs f1, 8(r3)
+; P9BE-NEXT:    xxmrghd vs0, vs0, vs1
+; P9BE-NEXT:    xvcvdpsp v3, vs0
+; P9BE-NEXT:    vmrgew v2, v3, v2
+; P9BE-NEXT:    xvcvspsxws v2, v2
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffMemVarAConvftoi:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    sldi r4, r4, 2
+; P9LE-NEXT:    lfsux f0, r3, r4
+; P9LE-NEXT:    lfs f1, 8(r3)
+; P9LE-NEXT:    xxmrghd vs0, vs1, vs0
+; P9LE-NEXT:    lfs f1, 12(r3)
+; P9LE-NEXT:    xvcvdpsp v2, vs0
+; P9LE-NEXT:    lfs f0, 4(r3)
+; P9LE-NEXT:    xxmrghd vs0, vs1, vs0
+; P9LE-NEXT:    xvcvdpsp v3, vs0
+; P9LE-NEXT:    vmrgew v2, v3, v2
+; P9LE-NEXT:    xvcvspsxws v2, v2
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffMemVarAConvftoi:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    sldi r4, r4, 2
+; P8BE-NEXT:    lfsux f0, r3, r4
+; P8BE-NEXT:    lfs f1, 12(r3)
+; P8BE-NEXT:    lfs f2, 4(r3)
+; P8BE-NEXT:    lfs f3, 8(r3)
+; P8BE-NEXT:    xxmrghd vs1, vs2, vs1
+; P8BE-NEXT:    xxmrghd vs0, vs0, vs3
+; P8BE-NEXT:    xvcvdpsp v2, vs1
+; P8BE-NEXT:    xvcvdpsp v3, vs0
+; P8BE-NEXT:    vmrgew v2, v3, v2
+; P8BE-NEXT:    xvcvspsxws v2, v2
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffMemVarAConvftoi:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    sldi r4, r4, 2
+; P8LE-NEXT:    lfsux f0, r3, r4
+; P8LE-NEXT:    lfs f1, 8(r3)
+; P8LE-NEXT:    lfs f2, 4(r3)
+; P8LE-NEXT:    lfs f3, 12(r3)
+; P8LE-NEXT:    xxmrghd vs0, vs1, vs0
+; P8LE-NEXT:    xxmrghd vs1, vs3, vs2
+; P8LE-NEXT:    xvcvdpsp v2, vs0
+; P8LE-NEXT:    xvcvdpsp v3, vs1
+; P8LE-NEXT:    vmrgew v2, v3, v2
+; P8LE-NEXT:    xvcvspsxws v2, v2
+; P8LE-NEXT:    blr
 entry:
   %idxprom = sext i32 %elem to i64
   %arrayidx = getelementptr inbounds float, float* %arr, i64 %idxprom
@@ -1384,19 +1662,70 @@ entry:
   %conv13 = fptosi float %3 to i32
   %vecinit14 = insertelement <4 x i32> %vecinit9, i32 %conv13, i32 3
   ret <4 x i32> %vecinit14
-; P9BE-LABEL: fromDiffMemVarAConvftoi
-; P9LE-LABEL: fromDiffMemVarAConvftoi
-; P8BE-LABEL: fromDiffMemVarAConvftoi
-; P8LE-LABEL: fromDiffMemVarAConvftoi
 ; FIXME: implement finding consecutive loads with pre-inc
-; P9BE: lfsux
-; P9LE: lfsux
-; P8BE: lfsux
-; P8LE: lfsux
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <4 x i32> @fromDiffMemVarDConvftoi(float* nocapture readonly %arr, i32 signext %elem) {
+; P9BE-LABEL: fromDiffMemVarDConvftoi:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    sldi r4, r4, 2
+; P9BE-NEXT:    lfsux f0, r3, r4
+; P9BE-NEXT:    lfs f1, -12(r3)
+; P9BE-NEXT:    lfs f2, -4(r3)
+; P9BE-NEXT:    xxmrghd vs1, vs2, vs1
+; P9BE-NEXT:    xvcvdpsp v2, vs1
+; P9BE-NEXT:    lfs f1, -8(r3)
+; P9BE-NEXT:    xxmrghd vs0, vs0, vs1
+; P9BE-NEXT:    xvcvdpsp v3, vs0
+; P9BE-NEXT:    vmrgew v2, v3, v2
+; P9BE-NEXT:    xvcvspsxws v2, v2
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffMemVarDConvftoi:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    sldi r4, r4, 2
+; P9LE-NEXT:    lfsux f0, r3, r4
+; P9LE-NEXT:    lfs f1, -8(r3)
+; P9LE-NEXT:    xxmrghd vs0, vs1, vs0
+; P9LE-NEXT:    lfs f1, -12(r3)
+; P9LE-NEXT:    xvcvdpsp v2, vs0
+; P9LE-NEXT:    lfs f0, -4(r3)
+; P9LE-NEXT:    xxmrghd vs0, vs1, vs0
+; P9LE-NEXT:    xvcvdpsp v3, vs0
+; P9LE-NEXT:    vmrgew v2, v3, v2
+; P9LE-NEXT:    xvcvspsxws v2, v2
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffMemVarDConvftoi:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    sldi r4, r4, 2
+; P8BE-NEXT:    lfsux f0, r3, r4
+; P8BE-NEXT:    lfs f1, -12(r3)
+; P8BE-NEXT:    lfs f2, -4(r3)
+; P8BE-NEXT:    lfs f3, -8(r3)
+; P8BE-NEXT:    xxmrghd vs1, vs2, vs1
+; P8BE-NEXT:    xxmrghd vs0, vs0, vs3
+; P8BE-NEXT:    xvcvdpsp v2, vs1
+; P8BE-NEXT:    xvcvdpsp v3, vs0
+; P8BE-NEXT:    vmrgew v2, v3, v2
+; P8BE-NEXT:    xvcvspsxws v2, v2
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffMemVarDConvftoi:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    sldi r4, r4, 2
+; P8LE-NEXT:    lfsux f0, r3, r4
+; P8LE-NEXT:    lfs f1, -8(r3)
+; P8LE-NEXT:    lfs f2, -4(r3)
+; P8LE-NEXT:    lfs f3, -12(r3)
+; P8LE-NEXT:    xxmrghd vs0, vs1, vs0
+; P8LE-NEXT:    xxmrghd vs1, vs3, vs2
+; P8LE-NEXT:    xvcvdpsp v2, vs0
+; P8LE-NEXT:    xvcvdpsp v3, vs1
+; P8LE-NEXT:    vmrgew v2, v3, v2
+; P8LE-NEXT:    xvcvspsxws v2, v2
+; P8LE-NEXT:    blr
 entry:
   %idxprom = sext i32 %elem to i64
   %arrayidx = getelementptr inbounds float, float* %arr, i64 %idxprom
@@ -1422,86 +1751,154 @@ entry:
   %conv13 = fptosi float %3 to i32
   %vecinit14 = insertelement <4 x i32> %vecinit9, i32 %conv13, i32 3
   ret <4 x i32> %vecinit14
-; P9BE-LABEL: fromDiffMemVarDConvftoi
-; P9LE-LABEL: fromDiffMemVarDConvftoi
-; P8BE-LABEL: fromDiffMemVarDConvftoi
-; P8LE-LABEL: fromDiffMemVarDConvftoi
 ; FIXME: implement finding consecutive loads with pre-inc
-; P9BE: lfsux
-; P9LE: lfsux
-; P8BE: lfsux
-; P8LE: lfsux
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <4 x i32> @spltRegValConvftoi(float %val) {
+; P9BE-LABEL: spltRegValConvftoi:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    xscvdpsxws f0, f1
+; P9BE-NEXT:    xxspltw v2, vs0, 1
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: spltRegValConvftoi:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    xscvdpsxws f0, f1
+; P9LE-NEXT:    xxspltw v2, vs0, 1
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: spltRegValConvftoi:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    xscvdpsxws f0, f1
+; P8BE-NEXT:    xxspltw v2, vs0, 1
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: spltRegValConvftoi:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    xscvdpsxws f0, f1
+; P8LE-NEXT:    xxspltw v2, vs0, 1
+; P8LE-NEXT:    blr
 entry:
   %conv = fptosi float %val to i32
   %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0
   %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
   ret <4 x i32> %splat.splat
-; P9BE-LABEL: spltRegValConvftoi
-; P9LE-LABEL: spltRegValConvftoi
-; P8BE-LABEL: spltRegValConvftoi
-; P8LE-LABEL: spltRegValConvftoi
-; P9BE: xscvdpsxws f[[REG1:[0-9]+]], f1
-; P9BE: xxspltw v2, vs[[REG1]], 1
-; P9BE: blr
-; P9LE: xscvdpsxws f[[REG1:[0-9]+]], f1
-; P9LE: xxspltw v2, vs[[REG1]], 1
-; P9LE: blr
-; P8BE: xscvdpsxws f[[REG1:[0-9]+]], f1
-; P8BE: xxspltw v2, vs[[REG1]], 1
-; P8BE: blr
-; P8LE: xscvdpsxws f[[REG1:[0-9]+]], f1
-; P8LE: xxspltw v2, vs[[REG1]], 1
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <4 x i32> @spltMemValConvftoi(float* nocapture readonly %ptr) {
+; P9BE-LABEL: spltMemValConvftoi:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lxvwsx vs0, 0, r3
+; P9BE-NEXT:    xvcvspsxws v2, vs0
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: spltMemValConvftoi:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lxvwsx vs0, 0, r3
+; P9LE-NEXT:    xvcvspsxws v2, vs0
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: spltMemValConvftoi:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    lfsx f0, 0, r3
+; P8BE-NEXT:    xscvdpsxws f0, f0
+; P8BE-NEXT:    xxspltw v2, vs0, 1
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: spltMemValConvftoi:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    lfsx f0, 0, r3
+; P8LE-NEXT:    xscvdpsxws f0, f0
+; P8LE-NEXT:    xxspltw v2, vs0, 1
+; P8LE-NEXT:    blr
 entry:
   %0 = load float, float* %ptr, align 4
   %conv = fptosi float %0 to i32
   %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0
   %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
   ret <4 x i32> %splat.splat
-; P9BE-LABEL: spltMemValConvftoi
-; P9LE-LABEL: spltMemValConvftoi
-; P8BE-LABEL: spltMemValConvftoi
-; P8LE-LABEL: spltMemValConvftoi
-; P9BE: lxvwsx [[REG1:[vs0-9]+]], 0, r3
-; P9BE: xvcvspsxws v2, [[REG1]]
-; P9LE: [[REG1:[vs0-9]+]], 0, r3
-; P9LE: xvcvspsxws v2, [[REG1]]
-; P8BE: lfsx [[REG1:f[0-9]+]], 0, r3
-; P8BE: xscvdpsxws f[[REG2:[0-9]+]], [[REG1]]
-; P8BE: xxspltw v2, vs[[REG2]], 1
-; P8LE: lfsx [[REG1:f[0-9]+]], 0, r3
-; P8LE: xscvdpsxws f[[REG2:[vs0-9]+]], [[REG1]]
-; P8LE: xxspltw v2, vs[[REG2]], 1
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <4 x i32> @spltCnstConvdtoi() {
+; P9BE-LABEL: spltCnstConvdtoi:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    vspltisw v2, 4
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: spltCnstConvdtoi:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    vspltisw v2, 4
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: spltCnstConvdtoi:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    vspltisw v2, 4
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: spltCnstConvdtoi:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    vspltisw v2, 4
+; P8LE-NEXT:    blr
 entry:
   ret <4 x i32> <i32 4, i32 4, i32 4, i32 4>
-; P9BE-LABEL: spltCnstConvdtoi
-; P9LE-LABEL: spltCnstConvdtoi
-; P8BE-LABEL: spltCnstConvdtoi
-; P8LE-LABEL: spltCnstConvdtoi
-; P9BE: vspltisw v2, 4
-; P9BE: blr
-; P9LE: vspltisw v2, 4
-; P9LE: blr
-; P8BE: vspltisw v2, 4
-; P8BE: blr
-; P8LE: vspltisw v2, 4
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <4 x i32> @fromRegsConvdtoi(double %a, double %b, double %c, double %d) {
+; P9BE-LABEL: fromRegsConvdtoi:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
+; P9BE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P9BE-NEXT:    xxmrghd vs0, vs2, vs4
+; P9BE-NEXT:    # kill: def $f3 killed $f3 def $vsl3
+; P9BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; P9BE-NEXT:    xvcvdpsxws v2, vs0
+; P9BE-NEXT:    xxmrghd vs0, vs1, vs3
+; P9BE-NEXT:    xvcvdpsxws v3, vs0
+; P9BE-NEXT:    vmrgew v2, v3, v2
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromRegsConvdtoi:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    # kill: def $f3 killed $f3 def $vsl3
+; P9LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; P9LE-NEXT:    xxmrghd vs0, vs3, vs1
+; P9LE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
+; P9LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P9LE-NEXT:    xvcvdpsxws v2, vs0
+; P9LE-NEXT:    xxmrghd vs0, vs4, vs2
+; P9LE-NEXT:    xvcvdpsxws v3, vs0
+; P9LE-NEXT:    vmrgew v2, v3, v2
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromRegsConvdtoi:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; P8BE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
+; P8BE-NEXT:    # kill: def $f3 killed $f3 def $vsl3
+; P8BE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P8BE-NEXT:    xxmrghd vs0, vs2, vs4
+; P8BE-NEXT:    xxmrghd vs1, vs1, vs3
+; P8BE-NEXT:    xvcvdpsxws v2, vs0
+; P8BE-NEXT:    xvcvdpsxws v3, vs1
+; P8BE-NEXT:    vmrgew v2, v3, v2
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromRegsConvdtoi:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; P8LE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
+; P8LE-NEXT:    # kill: def $f3 killed $f3 def $vsl3
+; P8LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P8LE-NEXT:    xxmrghd vs0, vs3, vs1
+; P8LE-NEXT:    xxmrghd vs1, vs4, vs2
+; P8LE-NEXT:    xvcvdpsxws v2, vs0
+; P8LE-NEXT:    xvcvdpsxws v3, vs1
+; P8LE-NEXT:    vmrgew v2, v3, v2
+; P8LE-NEXT:    blr
 entry:
   %conv = fptosi double %a to i32
   %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
@@ -1512,53 +1909,90 @@ entry:
   %conv5 = fptosi double %d to i32
   %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %conv5, i32 3
   ret <4 x i32> %vecinit6
-; P9BE-LABEL: fromRegsConvdtoi
-; P9LE-LABEL: fromRegsConvdtoi
-; P8BE-LABEL: fromRegsConvdtoi
-; P8LE-LABEL: fromRegsConvdtoi
-; P9BE: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
-; P9BE: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
-; P9BE: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
-; P9BE: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
-; P9BE: vmrgew v2, [[REG3]], [[REG4]]
-; P9LE: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
-; P9LE: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
-; P9LE: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
-; P9LE: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
-; P9LE: vmrgew v2, [[REG4]], [[REG3]]
-; P8BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
-; P8BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
-; P8BE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
-; P8BE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
-; P8BE: vmrgew v2, [[REG3]], [[REG4]]
-; P8LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
-; P8LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
-; P8LE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
-; P8LE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
-; P8LE: vmrgew v2, [[REG4]], [[REG3]]
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <4 x i32> @fromDiffConstsConvdtoi() {
+; P9BE-LABEL: fromDiffConstsConvdtoi:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    addis r3, r2, .LCPI26_0 at toc@ha
+; P9BE-NEXT:    addi r3, r3, .LCPI26_0 at toc@l
+; P9BE-NEXT:    lxvx v2, 0, r3
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffConstsConvdtoi:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    addis r3, r2, .LCPI26_0 at toc@ha
+; P9LE-NEXT:    addi r3, r3, .LCPI26_0 at toc@l
+; P9LE-NEXT:    lxvx v2, 0, r3
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffConstsConvdtoi:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    addis r3, r2, .LCPI26_0 at toc@ha
+; P8BE-NEXT:    addi r3, r3, .LCPI26_0 at toc@l
+; P8BE-NEXT:    lxvw4x v2, 0, r3
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffConstsConvdtoi:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    addis r3, r2, .LCPI26_0 at toc@ha
+; P8LE-NEXT:    addi r3, r3, .LCPI26_0 at toc@l
+; P8LE-NEXT:    lvx v2, 0, r3
+; P8LE-NEXT:    blr
 entry:
   ret <4 x i32> <i32 24, i32 234, i32 988, i32 422>
-; P9BE-LABEL: fromDiffConstsConvdtoi
-; P9LE-LABEL: fromDiffConstsConvdtoi
-; P8BE-LABEL: fromDiffConstsConvdtoi
-; P8LE-LABEL: fromDiffConstsConvdtoi
-; P9BE: lxv
-; P9BE: blr
-; P9LE: lxv
-; P9LE: blr
-; P8BE: lxvw4x
-; P8BE: blr
-; P8LE: lvx
-; P8LE-NOT: xxswapd
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <4 x i32> @fromDiffMemConsAConvdtoi(double* nocapture readonly %ptr) {
+; P9BE-LABEL: fromDiffMemConsAConvdtoi:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lxv vs0, 0(r3)
+; P9BE-NEXT:    lxv vs1, 16(r3)
+; P9BE-NEXT:    xxmrgld vs2, vs0, vs1
+; P9BE-NEXT:    xxmrghd vs0, vs0, vs1
+; P9BE-NEXT:    xvcvdpsxws v2, vs2
+; P9BE-NEXT:    xvcvdpsxws v3, vs0
+; P9BE-NEXT:    vmrgew v2, v3, v2
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffMemConsAConvdtoi:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lxv vs0, 0(r3)
+; P9LE-NEXT:    lxv vs1, 16(r3)
+; P9LE-NEXT:    xxmrgld vs2, vs1, vs0
+; P9LE-NEXT:    xxmrghd vs0, vs1, vs0
+; P9LE-NEXT:    xvcvdpsxws v2, vs2
+; P9LE-NEXT:    xvcvdpsxws v3, vs0
+; P9LE-NEXT:    vmrgew v2, v3, v2
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffMemConsAConvdtoi:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    li r4, 16
+; P8BE-NEXT:    lxvd2x vs0, 0, r3
+; P8BE-NEXT:    lxvd2x vs1, r3, r4
+; P8BE-NEXT:    xxmrgld vs2, vs0, vs1
+; P8BE-NEXT:    xxmrghd vs0, vs0, vs1
+; P8BE-NEXT:    xvcvdpsxws v2, vs2
+; P8BE-NEXT:    xvcvdpsxws v3, vs0
+; P8BE-NEXT:    vmrgew v2, v3, v2
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffMemConsAConvdtoi:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    li r4, 16
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    lxvd2x vs1, r3, r4
+; P8LE-NEXT:    xxswapd vs0, vs0
+; P8LE-NEXT:    xxswapd vs1, vs1
+; P8LE-NEXT:    xxmrgld vs2, vs1, vs0
+; P8LE-NEXT:    xxmrghd vs0, vs1, vs0
+; P8LE-NEXT:    xvcvdpsxws v2, vs2
+; P8LE-NEXT:    xvcvdpsxws v3, vs0
+; P8LE-NEXT:    vmrgew v2, v3, v2
+; P8LE-NEXT:    blr
 entry:
   %0 = bitcast double* %ptr to <2 x double>*
   %1 = load <2 x double>, <2 x double>* %0, align 8
@@ -1569,44 +2003,61 @@ entry:
   %5 = fptosi <2 x double> %4 to <2 x i32>
   %vecinit9 = shufflevector <2 x i32> %2, <2 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   ret <4 x i32> %vecinit9
-; P9BE-LABEL: fromDiffMemConsAConvdtoi
-; P9LE-LABEL: fromDiffMemConsAConvdtoi
-; P8BE-LABEL: fromDiffMemConsAConvdtoi
-; P8LE-LABEL: fromDiffMemConsAConvdtoi
-; P9BE-DAG: lxv [[REG1:[vs0-9]+]], 0(r3)
-; P9BE-DAG: lxv [[REG2:[vs0-9]+]], 16(r3)
-; P9BE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG1]], [[REG2]]
-; P9BE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG1]], [[REG2]]
-; P9BE-DAG: xvcvdpsxws [[REG5:[vs0-9]+]], [[REG3]]
-; P9BE-DAG: xvcvdpsxws [[REG6:[vs0-9]+]], [[REG4]]
-; P9BE: vmrgew v2, [[REG6]], [[REG5]]
-; P9LE-DAG: lxv [[REG1:[vs0-9]+]], 0(r3)
-; P9LE-DAG: lxv [[REG2:[vs0-9]+]], 16(r3)
-; P9LE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG2]], [[REG1]]
-; P9LE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG2]], [[REG1]]
-; P9LE-DAG: xvcvdpsxws [[REG5:[vs0-9]+]], [[REG3]]
-; P9LE-DAG: xvcvdpsxws [[REG6:[vs0-9]+]], [[REG4]]
-; P9LE: vmrgew v2, [[REG6]], [[REG5]]
-; P8BE: lxvd2x [[REG1:[vs0-9]+]], 0, r3
-; P8BE: lxvd2x [[REG2:[vs0-9]+]], r3, r4
-; P8BE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG1]], [[REG2]]
-; P8BE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG1]], [[REG2]]
-; P8BE-DAG: xvcvdpsxws [[REG5:[vs0-9]+]], [[REG3]]
-; P8BE-DAG: xvcvdpsxws [[REG6:[vs0-9]+]], [[REG4]]
-; P8BE: vmrgew v2, [[REG6]], [[REG5]]
-; P8LE: lxvd2x [[REG1:[vs0-9]+]], 0, r3
-; P8LE: lxvd2x [[REG2:[vs0-9]+]], r3, r4
-; P8LE-DAG: xxswapd [[REG3:[vs0-9]+]], [[REG1]]
-; P8LE-DAG: xxswapd [[REG4:[vs0-9]+]], [[REG2]]
-; P8LE-DAG: xxmrgld [[REG5:[vs0-9]+]], [[REG4]], [[REG3]]
-; P8LE-DAG: xxmrghd [[REG6:[vs0-9]+]], [[REG4]], [[REG3]]
-; P8LE-DAG: xvcvdpsxws [[REG7:[vs0-9]+]], [[REG5]]
-; P8LE-DAG: xvcvdpsxws [[REG8:[vs0-9]+]], [[REG6]]
-; P8LE: vmrgew v2, [[REG8]], [[REG7]]
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <4 x i32> @fromDiffMemConsDConvdtoi(double* nocapture readonly %ptr) {
+; P9BE-LABEL: fromDiffMemConsDConvdtoi:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lfd f0, 24(r3)
+; P9BE-NEXT:    lfd f1, 16(r3)
+; P9BE-NEXT:    lfd f2, 8(r3)
+; P9BE-NEXT:    lfd f3, 0(r3)
+; P9BE-NEXT:    xxmrghd vs1, vs1, vs3
+; P9BE-NEXT:    xxmrghd vs0, vs0, vs2
+; P9BE-NEXT:    xvcvdpsxws v2, vs1
+; P9BE-NEXT:    xvcvdpsxws v3, vs0
+; P9BE-NEXT:    vmrgew v2, v3, v2
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffMemConsDConvdtoi:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lfd f0, 24(r3)
+; P9LE-NEXT:    lfd f2, 8(r3)
+; P9LE-NEXT:    lfd f1, 16(r3)
+; P9LE-NEXT:    lfd f3, 0(r3)
+; P9LE-NEXT:    xxmrghd vs0, vs2, vs0
+; P9LE-NEXT:    xvcvdpsxws v2, vs0
+; P9LE-NEXT:    xxmrghd vs0, vs3, vs1
+; P9LE-NEXT:    xvcvdpsxws v3, vs0
+; P9LE-NEXT:    vmrgew v2, v3, v2
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffMemConsDConvdtoi:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    lfdx f3, 0, r3
+; P8BE-NEXT:    lfd f0, 24(r3)
+; P8BE-NEXT:    lfd f1, 8(r3)
+; P8BE-NEXT:    lfd f2, 16(r3)
+; P8BE-NEXT:    xxmrghd vs0, vs0, vs1
+; P8BE-NEXT:    xxmrghd vs1, vs2, vs3
+; P8BE-NEXT:    xvcvdpsxws v2, vs0
+; P8BE-NEXT:    xvcvdpsxws v3, vs1
+; P8BE-NEXT:    vmrgew v2, v2, v3
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffMemConsDConvdtoi:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    lfdx f3, 0, r3
+; P8LE-NEXT:    lfd f0, 24(r3)
+; P8LE-NEXT:    lfd f1, 8(r3)
+; P8LE-NEXT:    lfd f2, 16(r3)
+; P8LE-NEXT:    xxmrghd vs0, vs1, vs0
+; P8LE-NEXT:    xxmrghd vs1, vs3, vs2
+; P8LE-NEXT:    xvcvdpsxws v2, vs0
+; P8LE-NEXT:    xvcvdpsxws v3, vs1
+; P8LE-NEXT:    vmrgew v2, v3, v2
+; P8LE-NEXT:    blr
 entry:
   %arrayidx = getelementptr inbounds double, double* %ptr, i64 3
   %0 = load double, double* %arrayidx, align 8
@@ -1624,50 +2075,65 @@ entry:
   %conv8 = fptosi double %3 to i32
   %vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3
   ret <4 x i32> %vecinit9
-; P9BE-LABEL: fromDiffMemConsDConvdtoi
-; P9LE-LABEL: fromDiffMemConsDConvdtoi
-; P8BE-LABEL: fromDiffMemConsDConvdtoi
-; P8LE-LABEL: fromDiffMemConsDConvdtoi
-; P9BE: lfd
-; P9BE: lfd
-; P9BE: lfd
-; P9BE: lfd
-; P9BE: xxmrghd
-; P9BE: xxmrghd
-; P9BE: xvcvdpsxws
-; P9BE: xvcvdpsxws
-; P9BE: vmrgew v2
-; P9LE: lfd
-; P9LE: lfd
-; P9LE: lfd
-; P9LE: lfd
-; P9LE: xxmrghd
-; P9LE: xvcvdpsxws
-; P9LE: xxmrghd
-; P9LE: xvcvdpsxws
-; P9LE: vmrgew v2
-; P8BE: lfdx
-; P8BE: lfd
-; P8BE: lfd
-; P8BE: lfd
-; P8BE: xxmrghd
-; P8BE: xxmrghd
-; P8BE: xvcvdpsxws
-; P8BE: xvcvdpsxws
-; P8BE: vmrgew v2
-; P8LE: lfdx
-; P8LE: lfd
-; P8LE: lfd
-; P8LE: lfd
-; P8LE: xxmrghd
-; P8LE: xxmrghd
-; P8LE: xvcvdpsxws
-; P8LE: xvcvdpsxws
-; P8LE: vmrgew v2
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <4 x i32> @fromDiffMemVarAConvdtoi(double* nocapture readonly %arr, i32 signext %elem) {
+; P9BE-LABEL: fromDiffMemVarAConvdtoi:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    sldi r4, r4, 3
+; P9BE-NEXT:    lfdux f0, r3, r4
+; P9BE-NEXT:    lfd f1, 8(r3)
+; P9BE-NEXT:    lfd f2, 16(r3)
+; P9BE-NEXT:    lfd f3, 24(r3)
+; P9BE-NEXT:    xxmrghd vs1, vs1, vs3
+; P9BE-NEXT:    xxmrghd vs0, vs0, vs2
+; P9BE-NEXT:    xvcvdpsxws v2, vs1
+; P9BE-NEXT:    xvcvdpsxws v3, vs0
+; P9BE-NEXT:    vmrgew v2, v3, v2
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffMemVarAConvdtoi:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    sldi r4, r4, 3
+; P9LE-NEXT:    lfdux f0, r3, r4
+; P9LE-NEXT:    lfd f2, 16(r3)
+; P9LE-NEXT:    lfd f1, 8(r3)
+; P9LE-NEXT:    lfd f3, 24(r3)
+; P9LE-NEXT:    xxmrghd vs0, vs2, vs0
+; P9LE-NEXT:    xvcvdpsxws v2, vs0
+; P9LE-NEXT:    xxmrghd vs0, vs3, vs1
+; P9LE-NEXT:    xvcvdpsxws v3, vs0
+; P9LE-NEXT:    vmrgew v2, v3, v2
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffMemVarAConvdtoi:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    sldi r4, r4, 3
+; P8BE-NEXT:    lfdux f0, r3, r4
+; P8BE-NEXT:    lfd f1, 8(r3)
+; P8BE-NEXT:    lfd f2, 24(r3)
+; P8BE-NEXT:    lfd f3, 16(r3)
+; P8BE-NEXT:    xxmrghd vs1, vs1, vs2
+; P8BE-NEXT:    xxmrghd vs0, vs0, vs3
+; P8BE-NEXT:    xvcvdpsxws v2, vs1
+; P8BE-NEXT:    xvcvdpsxws v3, vs0
+; P8BE-NEXT:    vmrgew v2, v3, v2
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffMemVarAConvdtoi:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    sldi r4, r4, 3
+; P8LE-NEXT:    lfdux f0, r3, r4
+; P8LE-NEXT:    lfd f1, 16(r3)
+; P8LE-NEXT:    lfd f2, 8(r3)
+; P8LE-NEXT:    lfd f3, 24(r3)
+; P8LE-NEXT:    xxmrghd vs0, vs1, vs0
+; P8LE-NEXT:    xxmrghd vs1, vs3, vs2
+; P8LE-NEXT:    xvcvdpsxws v2, vs0
+; P8LE-NEXT:    xvcvdpsxws v3, vs1
+; P8LE-NEXT:    vmrgew v2, v3, v2
+; P8LE-NEXT:    blr
 entry:
   %idxprom = sext i32 %elem to i64
   %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom
@@ -1693,50 +2159,65 @@ entry:
   %conv13 = fptosi double %3 to i32
   %vecinit14 = insertelement <4 x i32> %vecinit9, i32 %conv13, i32 3
   ret <4 x i32> %vecinit14
-; P9BE-LABEL: fromDiffMemVarAConvdtoi
-; P9LE-LABEL: fromDiffMemVarAConvdtoi
-; P8BE-LABEL: fromDiffMemVarAConvdtoi
-; P8LE-LABEL: fromDiffMemVarAConvdtoi
-; P9BE: lfdux
-; P9BE: lfd
-; P9BE: lfd
-; P9BE: lfd
-; P9BE: xxmrghd
-; P9BE: xxmrghd
-; P9BE: xvcvdpsxws
-; P9BE: xvcvdpsxws
-; P9BE: vmrgew v2
-; P9LE: lfdux
-; P9LE: lfd
-; P9LE: lfd
-; P9LE: lfd
-; P9LE: xxmrghd
-; P9LE: xvcvdpsxws
-; P9LE: xxmrghd
-; P9LE: xvcvdpsxws
-; P9LE: vmrgew v2
-; P8BE: lfdux
-; P8BE: lfd
-; P8BE: lfd
-; P8BE: lfd
-; P8BE: xxmrghd
-; P8BE: xxmrghd
-; P8BE: xvcvdpsxws
-; P8BE: xvcvdpsxws
-; P8BE: vmrgew v2
-; P8LE: lfdux
-; P8LE: lfd
-; P8LE: lfd
-; P8LE: lfd
-; P8LE: xxmrghd
-; P8LE: xxmrghd
-; P8LE: xvcvdpsxws
-; P8LE: xvcvdpsxws
-; P8LE: vmrgew v2
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <4 x i32> @fromDiffMemVarDConvdtoi(double* nocapture readonly %arr, i32 signext %elem) {
+; P9BE-LABEL: fromDiffMemVarDConvdtoi:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    sldi r4, r4, 3
+; P9BE-NEXT:    lfdux f0, r3, r4
+; P9BE-NEXT:    lfd f1, -8(r3)
+; P9BE-NEXT:    lfd f2, -16(r3)
+; P9BE-NEXT:    lfd f3, -24(r3)
+; P9BE-NEXT:    xxmrghd vs1, vs1, vs3
+; P9BE-NEXT:    xxmrghd vs0, vs0, vs2
+; P9BE-NEXT:    xvcvdpsxws v2, vs1
+; P9BE-NEXT:    xvcvdpsxws v3, vs0
+; P9BE-NEXT:    vmrgew v2, v3, v2
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffMemVarDConvdtoi:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    sldi r4, r4, 3
+; P9LE-NEXT:    lfdux f0, r3, r4
+; P9LE-NEXT:    lfd f2, -16(r3)
+; P9LE-NEXT:    lfd f1, -8(r3)
+; P9LE-NEXT:    lfd f3, -24(r3)
+; P9LE-NEXT:    xxmrghd vs0, vs2, vs0
+; P9LE-NEXT:    xvcvdpsxws v2, vs0
+; P9LE-NEXT:    xxmrghd vs0, vs3, vs1
+; P9LE-NEXT:    xvcvdpsxws v3, vs0
+; P9LE-NEXT:    vmrgew v2, v3, v2
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffMemVarDConvdtoi:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    sldi r4, r4, 3
+; P8BE-NEXT:    lfdux f0, r3, r4
+; P8BE-NEXT:    lfd f1, -8(r3)
+; P8BE-NEXT:    lfd f2, -24(r3)
+; P8BE-NEXT:    lfd f3, -16(r3)
+; P8BE-NEXT:    xxmrghd vs1, vs1, vs2
+; P8BE-NEXT:    xxmrghd vs0, vs0, vs3
+; P8BE-NEXT:    xvcvdpsxws v2, vs1
+; P8BE-NEXT:    xvcvdpsxws v3, vs0
+; P8BE-NEXT:    vmrgew v2, v3, v2
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffMemVarDConvdtoi:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    sldi r4, r4, 3
+; P8LE-NEXT:    lfdux f0, r3, r4
+; P8LE-NEXT:    lfd f1, -16(r3)
+; P8LE-NEXT:    lfd f2, -8(r3)
+; P8LE-NEXT:    lfd f3, -24(r3)
+; P8LE-NEXT:    xxmrghd vs0, vs1, vs0
+; P8LE-NEXT:    xxmrghd vs1, vs3, vs2
+; P8LE-NEXT:    xvcvdpsxws v2, vs0
+; P8LE-NEXT:    xvcvdpsxws v3, vs1
+; P8LE-NEXT:    vmrgew v2, v3, v2
+; P8LE-NEXT:    blr
 entry:
   %idxprom = sext i32 %elem to i64
   %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom
@@ -1762,255 +2243,305 @@ entry:
   %conv13 = fptosi double %3 to i32
   %vecinit14 = insertelement <4 x i32> %vecinit9, i32 %conv13, i32 3
   ret <4 x i32> %vecinit14
-; P9BE-LABEL: fromDiffMemVarDConvdtoi
-; P9LE-LABEL: fromDiffMemVarDConvdtoi
-; P8BE-LABEL: fromDiffMemVarDConvdtoi
-; P8LE-LABEL: fromDiffMemVarDConvdtoi
-; P9BE: lfdux
-; P9BE: lfd
-; P9BE: lfd
-; P9BE: lfd
-; P9BE: xxmrghd
-; P9BE: xxmrghd
-; P9BE: xvcvdpsxws
-; P9BE: xvcvdpsxws
-; P9BE: vmrgew v2
-; P9LE: lfdux
-; P9LE: lfd
-; P9LE: lfd
-; P9LE: lfd
-; P9LE: xxmrghd
-; P9LE: xvcvdpsxws
-; P9LE: xxmrghd
-; P9LE: xvcvdpsxws
-; P9LE: vmrgew v2
-; P8BE: lfdux
-; P8BE: lfd
-; P8BE: lfd
-; P8BE: lfd
-; P8BE: xxmrghd
-; P8BE: xxmrghd
-; P8BE: xvcvdpsxws
-; P8BE: xvcvdpsxws
-; P8BE: vmrgew v2
-; P8LE: lfdux
-; P8LE: lfd
-; P8LE: lfd
-; P8LE: lfd
-; P8LE: xxmrghd
-; P8LE: xxmrghd
-; P8LE: xvcvdpsxws
-; P8LE: xvcvdpsxws
-; P8LE: vmrgew v2
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <4 x i32> @spltRegValConvdtoi(double %val) {
+; P9BE-LABEL: spltRegValConvdtoi:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    xscvdpsxws f0, f1
+; P9BE-NEXT:    xxspltw v2, vs0, 1
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: spltRegValConvdtoi:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    xscvdpsxws f0, f1
+; P9LE-NEXT:    xxspltw v2, vs0, 1
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: spltRegValConvdtoi:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    xscvdpsxws f0, f1
+; P8BE-NEXT:    xxspltw v2, vs0, 1
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: spltRegValConvdtoi:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    xscvdpsxws f0, f1
+; P8LE-NEXT:    xxspltw v2, vs0, 1
+; P8LE-NEXT:    blr
 entry:
   %conv = fptosi double %val to i32
   %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0
   %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
   ret <4 x i32> %splat.splat
-; P9BE-LABEL: spltRegValConvdtoi
-; P9LE-LABEL: spltRegValConvdtoi
-; P8BE-LABEL: spltRegValConvdtoi
-; P8LE-LABEL: spltRegValConvdtoi
-; P9BE: xscvdpsxws
-; P9BE: xxspltw
-; P9BE: blr
-; P9LE: xscvdpsxws
-; P9LE: xxspltw
-; P9LE: blr
-; P8BE: xscvdpsxws
-; P8BE: xxspltw
-; P8BE: blr
-; P8LE: xscvdpsxws
-; P8LE: xxspltw
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <4 x i32> @spltMemValConvdtoi(double* nocapture readonly %ptr) {
+; P9BE-LABEL: spltMemValConvdtoi:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lfd f0, 0(r3)
+; P9BE-NEXT:    xscvdpsxws f0, f0
+; P9BE-NEXT:    xxspltw v2, vs0, 1
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: spltMemValConvdtoi:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lfd f0, 0(r3)
+; P9LE-NEXT:    xscvdpsxws f0, f0
+; P9LE-NEXT:    xxspltw v2, vs0, 1
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: spltMemValConvdtoi:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    lfdx f0, 0, r3
+; P8BE-NEXT:    xscvdpsxws f0, f0
+; P8BE-NEXT:    xxspltw v2, vs0, 1
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: spltMemValConvdtoi:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    lfdx f0, 0, r3
+; P8LE-NEXT:    xscvdpsxws f0, f0
+; P8LE-NEXT:    xxspltw v2, vs0, 1
+; P8LE-NEXT:    blr
 entry:
   %0 = load double, double* %ptr, align 8
   %conv = fptosi double %0 to i32
   %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0
   %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
   ret <4 x i32> %splat.splat
-; P9BE-LABEL: spltMemValConvdtoi
-; P9LE-LABEL: spltMemValConvdtoi
-; P8BE-LABEL: spltMemValConvdtoi
-; P8LE-LABEL: spltMemValConvdtoi
-; P9BE: lfd
-; P9BE: xscvdpsxws
-; P9BE: xxspltw
-; P9BE: blr
-; P9LE: lfd
-; P9LE: xscvdpsxws
-; P9LE: xxspltw
-; P9LE: blr
-; P8BE: lfdx
-; P8BE: xscvdpsxws
-; P8BE: xxspltw
-; P8BE: blr
-; P8LE: lfdx
-; P8LE: xscvdpsxws
-; P8LE: xxspltw
-; P8LE: blr
 }
 ; Function Attrs: norecurse nounwind readnone
 define <4 x i32> @allZeroui() {
+; P9BE-LABEL: allZeroui:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    xxlxor v2, v2, v2
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: allZeroui:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    xxlxor v2, v2, v2
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: allZeroui:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    xxlxor v2, v2, v2
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: allZeroui:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    xxlxor v2, v2, v2
+; P8LE-NEXT:    blr
 entry:
   ret <4 x i32> zeroinitializer
-; P9BE-LABEL: allZeroui
-; P9LE-LABEL: allZeroui
-; P8BE-LABEL: allZeroui
-; P8LE-LABEL: allZeroui
-; P9BE: xxlxor v2, v2, v2
-; P9BE: blr
-; P9LE: xxlxor v2, v2, v2
-; P9LE: blr
-; P8BE: xxlxor v2, v2, v2
-; P8BE: blr
-; P8LE: xxlxor v2, v2, v2
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <4 x i32> @allOneui() {
+; P9BE-LABEL: allOneui:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    xxspltib v2, 255
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: allOneui:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    xxspltib v2, 255
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: allOneui:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    vspltisb v2, -1
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: allOneui:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    vspltisb v2, -1
+; P8LE-NEXT:    blr
 entry:
   ret <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
-; P9BE-LABEL: allOneui
-; P9LE-LABEL: allOneui
-; P8BE-LABEL: allOneui
-; P8LE-LABEL: allOneui
-; P9BE: xxspltib v2, 255
-; P9BE: blr
-; P9LE: xxspltib v2, 255
-; P9LE: blr
-; P8BE: vspltisb v2, -1
-; P8BE: blr
-; P8LE: vspltisb v2, -1
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <4 x i32> @spltConst1ui() {
+; P9BE-LABEL: spltConst1ui:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    vspltisw v2, 1
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: spltConst1ui:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    vspltisw v2, 1
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: spltConst1ui:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    vspltisw v2, 1
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: spltConst1ui:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    vspltisw v2, 1
+; P8LE-NEXT:    blr
 entry:
   ret <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-; P9BE-LABEL: spltConst1ui
-; P9LE-LABEL: spltConst1ui
-; P8BE-LABEL: spltConst1ui
-; P8LE-LABEL: spltConst1ui
-; P9BE: vspltisw v2, 1
-; P9BE: blr
-; P9LE: vspltisw v2, 1
-; P9LE: blr
-; P8BE: vspltisw v2, 1
-; P8BE: blr
-; P8LE: vspltisw v2, 1
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <4 x i32> @spltConst16kui() {
+; P9BE-LABEL: spltConst16kui:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    vspltisw v2, -15
+; P9BE-NEXT:    vsrw v2, v2, v2
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: spltConst16kui:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    vspltisw v2, -15
+; P9LE-NEXT:    vsrw v2, v2, v2
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: spltConst16kui:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    vspltisw v2, -15
+; P8BE-NEXT:    vsrw v2, v2, v2
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: spltConst16kui:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    vspltisw v2, -15
+; P8LE-NEXT:    vsrw v2, v2, v2
+; P8LE-NEXT:    blr
 entry:
   ret <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>
-; P9BE-LABEL: spltConst16kui
-; P9LE-LABEL: spltConst16kui
-; P8BE-LABEL: spltConst16kui
-; P8LE-LABEL: spltConst16kui
-; P9BE: vspltisw v2, -15
-; P9BE: vsrw v2, v2, v2
-; P9BE: blr
-; P9LE: vspltisw v2, -15
-; P9LE: vsrw v2, v2, v2
-; P9LE: blr
-; P8BE: vspltisw v2, -15
-; P8BE: vsrw v2, v2, v2
-; P8BE: blr
-; P8LE: vspltisw v2, -15
-; P8LE: vsrw v2, v2, v2
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <4 x i32> @spltConst32kui() {
+; P9BE-LABEL: spltConst32kui:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    vspltisw v2, -16
+; P9BE-NEXT:    vsrw v2, v2, v2
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: spltConst32kui:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    vspltisw v2, -16
+; P9LE-NEXT:    vsrw v2, v2, v2
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: spltConst32kui:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    vspltisw v2, -16
+; P8BE-NEXT:    vsrw v2, v2, v2
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: spltConst32kui:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    vspltisw v2, -16
+; P8LE-NEXT:    vsrw v2, v2, v2
+; P8LE-NEXT:    blr
 entry:
   ret <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>
-; P9BE-LABEL: spltConst32kui
-; P9LE-LABEL: spltConst32kui
-; P8BE-LABEL: spltConst32kui
-; P8LE-LABEL: spltConst32kui
-; P9BE: vspltisw v2, -16
-; P9BE: vsrw v2, v2, v2
-; P9BE: blr
-; P9LE: vspltisw v2, -16
-; P9LE: vsrw v2, v2, v2
-; P9LE: blr
-; P8BE: vspltisw v2, -16
-; P8BE: vsrw v2, v2, v2
-; P8BE: blr
-; P8LE: vspltisw v2, -16
-; P8LE: vsrw v2, v2, v2
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <4 x i32> @fromRegsui(i32 zeroext %a, i32 zeroext %b, i32 zeroext %c, i32 zeroext %d) {
+; P9BE-LABEL: fromRegsui:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    rldimi r6, r5, 32, 0
+; P9BE-NEXT:    rldimi r4, r3, 32, 0
+; P9BE-NEXT:    mtvsrdd v2, r4, r6
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromRegsui:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    rldimi r3, r4, 32, 0
+; P9LE-NEXT:    rldimi r5, r6, 32, 0
+; P9LE-NEXT:    mtvsrdd v2, r5, r3
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromRegsui:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    rldimi r6, r5, 32, 0
+; P8BE-NEXT:    rldimi r4, r3, 32, 0
+; P8BE-NEXT:    mtvsrd f0, r6
+; P8BE-NEXT:    mtvsrd f1, r4
+; P8BE-NEXT:    xxmrghd v2, vs1, vs0
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromRegsui:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    rldimi r3, r4, 32, 0
+; P8LE-NEXT:    rldimi r5, r6, 32, 0
+; P8LE-NEXT:    mtvsrd f0, r3
+; P8LE-NEXT:    mtvsrd f1, r5
+; P8LE-NEXT:    xxmrghd v2, vs1, vs0
+; P8LE-NEXT:    blr
 entry:
   %vecinit = insertelement <4 x i32> undef, i32 %a, i32 0
   %vecinit1 = insertelement <4 x i32> %vecinit, i32 %b, i32 1
   %vecinit2 = insertelement <4 x i32> %vecinit1, i32 %c, i32 2
   %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %d, i32 3
   ret <4 x i32> %vecinit3
-; P9BE-LABEL: fromRegsui
-; P9LE-LABEL: fromRegsui
-; P8BE-LABEL: fromRegsui
-; P8LE-LABEL: fromRegsui
-; P9BE-DAG: rldimi r6, r5, 32, 0
-; P9BE-DAG: rldimi r4, r3, 32, 0
-; P9BE: mtvsrdd v2, r4, r6
-; P9BE: blr
-; P9LE-DAG: rldimi r3, r4, 32, 0
-; P9LE-DAG: rldimi r5, r6, 32, 0
-; P9LE: mtvsrdd v2, r5, r3
-; P9LE: blr
-; P8BE-DAG: rldimi r6, r5, 32, 0
-; P8BE-DAG: rldimi r4, r3, 32, 0
-; P8BE-DAG: mtvsrd f[[REG1:[0-9]+]], r6
-; P8BE-DAG: mtvsrd f[[REG2:[0-9]+]], r4
-; P8BE-DAG: xxmrghd v2, vs[[REG2]], vs[[REG1]]
-; P8BE: blr
-; P8LE-DAG: rldimi r3, r4, 32, 0
-; P8LE-DAG: rldimi r5, r6, 32, 0
-; P8LE-DAG: mtvsrd f[[REG1:[0-9]+]], r3
-; P8LE-DAG: mtvsrd f[[REG2:[0-9]+]], r5
-; P8LE-DAG: xxmrghd v2, vs[[REG2]], vs[[REG1]]
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <4 x i32> @fromDiffConstsui() {
+; P9BE-LABEL: fromDiffConstsui:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    addis r3, r2, .LCPI39_0 at toc@ha
+; P9BE-NEXT:    addi r3, r3, .LCPI39_0 at toc@l
+; P9BE-NEXT:    lxvx v2, 0, r3
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffConstsui:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    addis r3, r2, .LCPI39_0 at toc@ha
+; P9LE-NEXT:    addi r3, r3, .LCPI39_0 at toc@l
+; P9LE-NEXT:    lxvx v2, 0, r3
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffConstsui:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    addis r3, r2, .LCPI39_0 at toc@ha
+; P8BE-NEXT:    addi r3, r3, .LCPI39_0 at toc@l
+; P8BE-NEXT:    lxvw4x v2, 0, r3
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffConstsui:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    addis r3, r2, .LCPI39_0 at toc@ha
+; P8LE-NEXT:    addi r3, r3, .LCPI39_0 at toc@l
+; P8LE-NEXT:    lvx v2, 0, r3
+; P8LE-NEXT:    blr
 entry:
   ret <4 x i32> <i32 242, i32 -113, i32 889, i32 19>
-; P9BE-LABEL: fromDiffConstsui
-; P9LE-LABEL: fromDiffConstsui
-; P8BE-LABEL: fromDiffConstsui
-; P8LE-LABEL: fromDiffConstsui
-; P9BE: lxv
-; P9BE: blr
-; P9LE: lxv
-; P9LE: blr
-; P8BE: lxvw4x
-; P8BE: blr
-; P8LE: lvx
-; P8LE-NOT: xxswapd
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <4 x i32> @fromDiffMemConsAui(i32* nocapture readonly %arr) {
+; P9BE-LABEL: fromDiffMemConsAui:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lxv v2, 0(r3)
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffMemConsAui:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lxv v2, 0(r3)
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffMemConsAui:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    lxvw4x v2, 0, r3
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffMemConsAui:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    xxswapd v2, vs0
+; P8LE-NEXT:    blr
 entry:
   %0 = load i32, i32* %arr, align 4
   %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0
@@ -2024,23 +2555,46 @@ entry:
   %3 = load i32, i32* %arrayidx5, align 4
   %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %3, i32 3
   ret <4 x i32> %vecinit6
-; P9BE-LABEL: fromDiffMemConsAui
-; P9LE-LABEL: fromDiffMemConsAui
-; P8BE-LABEL: fromDiffMemConsAui
-; P8LE-LABEL: fromDiffMemConsAui
-; P9BE: lxv
-; P9BE: blr
-; P9LE: lxv
-; P9LE: blr
-; P8BE: lxvw4x
-; P8BE: blr
-; P8LE: lxvd2x
-; P8LE: xxswapd
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <4 x i32> @fromDiffMemConsDui(i32* nocapture readonly %arr) {
+; P9BE-LABEL: fromDiffMemConsDui:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lxv v2, 0(r3)
+; P9BE-NEXT:    addis r3, r2, .LCPI41_0 at toc@ha
+; P9BE-NEXT:    addi r3, r3, .LCPI41_0 at toc@l
+; P9BE-NEXT:    lxvx v3, 0, r3
+; P9BE-NEXT:    vperm v2, v2, v2, v3
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffMemConsDui:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lxv v2, 0(r3)
+; P9LE-NEXT:    addis r3, r2, .LCPI41_0 at toc@ha
+; P9LE-NEXT:    addi r3, r3, .LCPI41_0 at toc@l
+; P9LE-NEXT:    lxvx v3, 0, r3
+; P9LE-NEXT:    vperm v2, v2, v2, v3
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffMemConsDui:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    addis r4, r2, .LCPI41_0 at toc@ha
+; P8BE-NEXT:    lxvw4x v2, 0, r3
+; P8BE-NEXT:    addi r4, r4, .LCPI41_0 at toc@l
+; P8BE-NEXT:    lxvw4x v3, 0, r4
+; P8BE-NEXT:    vperm v2, v2, v2, v3
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffMemConsDui:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    addis r4, r2, .LCPI41_0 at toc@ha
+; P8LE-NEXT:    addi r3, r4, .LCPI41_0 at toc@l
+; P8LE-NEXT:    lvx v2, 0, r3
+; P8LE-NEXT:    xxswapd v3, vs0
+; P8LE-NEXT:    vperm v2, v3, v3, v2
+; P8LE-NEXT:    blr
 entry:
   %arrayidx = getelementptr inbounds i32, i32* %arr, i64 3
   %0 = load i32, i32* %arrayidx, align 4
@@ -2054,32 +2608,34 @@ entry:
   %3 = load i32, i32* %arr, align 4
   %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %3, i32 3
   ret <4 x i32> %vecinit6
-; P9BE-LABEL: fromDiffMemConsDui
-; P9LE-LABEL: fromDiffMemConsDui
-; P8BE-LABEL: fromDiffMemConsDui
-; P8LE-LABEL: fromDiffMemConsDui
-; P9BE: lxv
-; P9BE: lxv
-; P9BE: vperm
-; P9BE: blr
-; P9LE: lxv
-; P9LE: lxv
-; P9LE: vperm
-; P9LE: blr
-; P8BE: lxvw4x
-; P8BE: lxvw4x
-; P8BE: vperm
-; P8BE: blr
-; P8LE: lxvd2x
-; P8LE-DAG: lvx
-; P8LE-NOT: xxswapd
-; P8LE: xxswapd
-; P8LE: vperm
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <4 x i32> @fromDiffMemVarAui(i32* nocapture readonly %arr, i32 signext %elem) {
+; P9BE-LABEL: fromDiffMemVarAui:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    sldi r4, r4, 2
+; P9BE-NEXT:    lxvx v2, r3, r4
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffMemVarAui:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    sldi r4, r4, 2
+; P9LE-NEXT:    lxvx v2, r3, r4
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffMemVarAui:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    sldi r4, r4, 2
+; P8BE-NEXT:    lxvw4x v2, r3, r4
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffMemVarAui:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    sldi r4, r4, 2
+; P8LE-NEXT:    lxvd2x vs0, r3, r4
+; P8LE-NEXT:    xxswapd v2, vs0
+; P8LE-NEXT:    blr
 entry:
   %idxprom = sext i32 %elem to i64
   %arrayidx = getelementptr inbounds i32, i32* %arr, i64 %idxprom
@@ -2101,27 +2657,58 @@ entry:
   %3 = load i32, i32* %arrayidx10, align 4
   %vecinit11 = insertelement <4 x i32> %vecinit7, i32 %3, i32 3
   ret <4 x i32> %vecinit11
-; P9BE-LABEL: fromDiffMemVarAui
-; P9LE-LABEL: fromDiffMemVarAui
-; P8BE-LABEL: fromDiffMemVarAui
-; P8LE-LABEL: fromDiffMemVarAui
-; P9BE: sldi r4, r4, 2
-; P9BE: lxvx v2, r3, r4
-; P9BE: blr
-; P9LE: sldi r4, r4, 2
-; P9LE: lxvx v2, r3, r4
-; P9LE: blr
-; P8BE: sldi r4, r4, 2
-; P8BE: lxvw4x {{[vs0-9]+}}, r3, r4
-; P8BE: blr
-; P8LE: sldi r4, r4, 2
-; P8LE: lxvd2x {{[vs0-9]+}}, r3, r4
-; P8LE: xxswapd
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <4 x i32> @fromDiffMemVarDui(i32* nocapture readonly %arr, i32 signext %elem) {
+; P9BE-LABEL: fromDiffMemVarDui:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    sldi r4, r4, 2
+; P9BE-NEXT:    add r3, r3, r4
+; P9BE-NEXT:    addi r3, r3, -12
+; P9BE-NEXT:    lxvx v2, 0, r3
+; P9BE-NEXT:    addis r3, r2, .LCPI43_0 at toc@ha
+; P9BE-NEXT:    addi r3, r3, .LCPI43_0 at toc@l
+; P9BE-NEXT:    lxvx v3, 0, r3
+; P9BE-NEXT:    vperm v2, v2, v2, v3
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffMemVarDui:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    sldi r4, r4, 2
+; P9LE-NEXT:    add r3, r3, r4
+; P9LE-NEXT:    addi r3, r3, -12
+; P9LE-NEXT:    lxvx v2, 0, r3
+; P9LE-NEXT:    addis r3, r2, .LCPI43_0 at toc@ha
+; P9LE-NEXT:    addi r3, r3, .LCPI43_0 at toc@l
+; P9LE-NEXT:    lxvx v3, 0, r3
+; P9LE-NEXT:    vperm v2, v2, v2, v3
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffMemVarDui:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    sldi r4, r4, 2
+; P8BE-NEXT:    addis r5, r2, .LCPI43_0 at toc@ha
+; P8BE-NEXT:    add r3, r3, r4
+; P8BE-NEXT:    addi r4, r5, .LCPI43_0 at toc@l
+; P8BE-NEXT:    addi r3, r3, -12
+; P8BE-NEXT:    lxvw4x v3, 0, r4
+; P8BE-NEXT:    lxvw4x v2, 0, r3
+; P8BE-NEXT:    vperm v2, v2, v2, v3
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffMemVarDui:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    sldi r4, r4, 2
+; P8LE-NEXT:    addis r5, r2, .LCPI43_0 at toc@ha
+; P8LE-NEXT:    add r3, r3, r4
+; P8LE-NEXT:    addi r3, r3, -12
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    addi r3, r5, .LCPI43_0 at toc@l
+; P8LE-NEXT:    lvx v3, 0, r3
+; P8LE-NEXT:    xxswapd v2, vs0
+; P8LE-NEXT:    vperm v2, v2, v2, v3
+; P8LE-NEXT:    blr
 entry:
   %idxprom = sext i32 %elem to i64
   %arrayidx = getelementptr inbounds i32, i32* %arr, i64 %idxprom
@@ -2143,36 +2730,57 @@ entry:
   %3 = load i32, i32* %arrayidx10, align 4
   %vecinit11 = insertelement <4 x i32> %vecinit7, i32 %3, i32 3
   ret <4 x i32> %vecinit11
-; P9BE-LABEL: fromDiffMemVarDui
-; P9LE-LABEL: fromDiffMemVarDui
-; P8BE-LABEL: fromDiffMemVarDui
-; P8LE-LABEL: fromDiffMemVarDui
-; P9BE-DAG: sldi {{r[0-9]+}}, r4, 2
-; P9BE-DAG: addi r3, r3, -12
-; P9BE-DAG: lxvx {{v[0-9]+}}, 0, r3
-; P9BE-DAG: lxvx
-; P9BE: vperm
-; P9BE: blr
-; P9LE-DAG: sldi {{r[0-9]+}}, r4, 2
-; P9LE-DAG: addi r3, r3, -12
-; P9LE-DAG: lxvx {{v[0-9]+}}, 0, r3
-; P9LE-DAG: lxv
-; P9LE: vperm
-; P9LE: blr
-; P8BE-DAG: sldi {{r[0-9]+}}, r4, 2
-; P8BE-DAG: lxvw4x {{v[0-9]+}}, 0, r3
-; P8BE-DAG: lxvw4x
-; P8BE: vperm
-; P8BE: blr
-; P8LE-DAG: sldi {{r[0-9]+}}, r4, 2
-; P8LE-DAG: lvx
-; P8LE-DAG: lvx
-; P8LE: vperm
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <4 x i32> @fromRandMemConsui(i32* nocapture readonly %arr) {
+; P9BE-LABEL: fromRandMemConsui:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lwz r4, 16(r3)
+; P9BE-NEXT:    lwz r5, 72(r3)
+; P9BE-NEXT:    lwz r6, 8(r3)
+; P9BE-NEXT:    lwz r3, 352(r3)
+; P9BE-NEXT:    rldimi r3, r6, 32, 0
+; P9BE-NEXT:    rldimi r5, r4, 32, 0
+; P9BE-NEXT:    mtvsrdd v2, r5, r3
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromRandMemConsui:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lwz r4, 16(r3)
+; P9LE-NEXT:    lwz r5, 72(r3)
+; P9LE-NEXT:    lwz r6, 8(r3)
+; P9LE-NEXT:    lwz r3, 352(r3)
+; P9LE-NEXT:    rldimi r4, r5, 32, 0
+; P9LE-NEXT:    rldimi r6, r3, 32, 0
+; P9LE-NEXT:    mtvsrdd v2, r6, r4
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromRandMemConsui:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    lwz r4, 8(r3)
+; P8BE-NEXT:    lwz r5, 352(r3)
+; P8BE-NEXT:    lwz r6, 16(r3)
+; P8BE-NEXT:    lwz r3, 72(r3)
+; P8BE-NEXT:    rldimi r5, r4, 32, 0
+; P8BE-NEXT:    rldimi r3, r6, 32, 0
+; P8BE-NEXT:    mtvsrd f0, r5
+; P8BE-NEXT:    mtvsrd f1, r3
+; P8BE-NEXT:    xxmrghd v2, vs1, vs0
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromRandMemConsui:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    lwz r4, 16(r3)
+; P8LE-NEXT:    lwz r5, 72(r3)
+; P8LE-NEXT:    lwz r6, 8(r3)
+; P8LE-NEXT:    lwz r3, 352(r3)
+; P8LE-NEXT:    rldimi r4, r5, 32, 0
+; P8LE-NEXT:    rldimi r6, r3, 32, 0
+; P8LE-NEXT:    mtvsrd f0, r4
+; P8LE-NEXT:    mtvsrd f1, r6
+; P8LE-NEXT:    xxmrghd v2, vs1, vs0
+; P8LE-NEXT:    blr
 entry:
   %arrayidx = getelementptr inbounds i32, i32* %arr, i64 4
   %0 = load i32, i32* %arrayidx, align 4
@@ -2187,46 +2795,65 @@ entry:
   %3 = load i32, i32* %arrayidx5, align 4
   %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %3, i32 3
   ret <4 x i32> %vecinit6
-; P9BE-LABEL: fromRandMemConsui
-; P9LE-LABEL: fromRandMemConsui
-; P8BE-LABEL: fromRandMemConsui
-; P8LE-LABEL: fromRandMemConsui
-; P9BE: lwz
-; P9BE: lwz
-; P9BE: lwz
-; P9BE: lwz
-; P9BE: rldimi
-; P9BE: rldimi
-; P9BE: mtvsrdd
-; P9LE: lwz
-; P9LE: lwz
-; P9LE: lwz
-; P9LE: lwz
-; P9LE: rldimi
-; P9LE: rldimi
-; P9LE: mtvsrdd
-; P8BE: lwz
-; P8BE: lwz
-; P8BE: lwz
-; P8BE: lwz
-; P8BE: rldimi
-; P8BE: rldimi
-; P8BE: mtvsrd
-; P8BE: mtvsrd
-; P8BE: xxmrghd
-; P8LE: lwz
-; P8LE: lwz
-; P8LE: lwz
-; P8LE: lwz
-; P8LE: rldimi
-; P8LE: rldimi
-; P8LE: mtvsrd
-; P8LE: mtvsrd
-; P8LE: xxmrghd
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <4 x i32> @fromRandMemVarui(i32* nocapture readonly %arr, i32 signext %elem) {
+; P9BE-LABEL: fromRandMemVarui:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    sldi r4, r4, 2
+; P9BE-NEXT:    add r3, r3, r4
+; P9BE-NEXT:    lwz r4, 16(r3)
+; P9BE-NEXT:    lwz r5, 4(r3)
+; P9BE-NEXT:    lwz r6, 8(r3)
+; P9BE-NEXT:    lwz r3, 32(r3)
+; P9BE-NEXT:    rldimi r3, r6, 32, 0
+; P9BE-NEXT:    rldimi r5, r4, 32, 0
+; P9BE-NEXT:    mtvsrdd v2, r5, r3
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromRandMemVarui:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    sldi r4, r4, 2
+; P9LE-NEXT:    add r3, r3, r4
+; P9LE-NEXT:    lwz r4, 16(r3)
+; P9LE-NEXT:    lwz r5, 4(r3)
+; P9LE-NEXT:    lwz r6, 8(r3)
+; P9LE-NEXT:    lwz r3, 32(r3)
+; P9LE-NEXT:    rldimi r4, r5, 32, 0
+; P9LE-NEXT:    rldimi r6, r3, 32, 0
+; P9LE-NEXT:    mtvsrdd v2, r6, r4
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromRandMemVarui:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    sldi r4, r4, 2
+; P8BE-NEXT:    add r3, r3, r4
+; P8BE-NEXT:    lwz r4, 8(r3)
+; P8BE-NEXT:    lwz r5, 32(r3)
+; P8BE-NEXT:    lwz r6, 16(r3)
+; P8BE-NEXT:    lwz r3, 4(r3)
+; P8BE-NEXT:    rldimi r5, r4, 32, 0
+; P8BE-NEXT:    rldimi r3, r6, 32, 0
+; P8BE-NEXT:    mtvsrd f0, r5
+; P8BE-NEXT:    mtvsrd f1, r3
+; P8BE-NEXT:    xxmrghd v2, vs1, vs0
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromRandMemVarui:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    sldi r4, r4, 2
+; P8LE-NEXT:    add r3, r3, r4
+; P8LE-NEXT:    lwz r4, 16(r3)
+; P8LE-NEXT:    lwz r5, 4(r3)
+; P8LE-NEXT:    lwz r6, 8(r3)
+; P8LE-NEXT:    lwz r3, 32(r3)
+; P8LE-NEXT:    rldimi r4, r5, 32, 0
+; P8LE-NEXT:    rldimi r6, r3, 32, 0
+; P8LE-NEXT:    mtvsrd f0, r4
+; P8LE-NEXT:    mtvsrd f1, r6
+; P8LE-NEXT:    xxmrghd v2, vs1, vs0
+; P8LE-NEXT:    blr
 entry:
   %add = add nsw i32 %elem, 4
   %idxprom = sext i32 %add to i64
@@ -2249,119 +2876,151 @@ entry:
   %3 = load i32, i32* %arrayidx11, align 4
   %vecinit12 = insertelement <4 x i32> %vecinit8, i32 %3, i32 3
   ret <4 x i32> %vecinit12
-; P9BE-LABEL: fromRandMemVarui
-; P9LE-LABEL: fromRandMemVarui
-; P8BE-LABEL: fromRandMemVarui
-; P8LE-LABEL: fromRandMemVarui
-; P9BE: sldi r4, r4, 2
-; P9BE: lwz
-; P9BE: lwz
-; P9BE: lwz
-; P9BE: lwz
-; P9BE: rldimi
-; P9BE: rldimi
-; P9BE: mtvsrdd
-; P9LE: sldi r4, r4, 2
-; P9LE: lwz
-; P9LE: lwz
-; P9LE: lwz
-; P9LE: lwz
-; P9LE: rldimi
-; P9LE: rldimi
-; P9LE: mtvsrdd
-; P8BE: sldi r4, r4, 2
-; P8BE: lwz
-; P8BE: lwz
-; P8BE: lwz
-; P8BE: lwz
-; P8BE: rldimi
-; P8BE: rldimi
-; P8BE: mtvsrd
-; P8BE: mtvsrd
-; P8BE: xxmrghd
-; P8LE: sldi r4, r4, 2
-; P8LE: lwz
-; P8LE: lwz
-; P8LE: lwz
-; P8LE: lwz
-; P8LE: rldimi
-; P8LE: rldimi
-; P8LE: mtvsrd
-; P8LE: mtvsrd
-; P8LE: xxmrghd
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <4 x i32> @spltRegValui(i32 zeroext %val) {
+; P9BE-LABEL: spltRegValui:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    mtvsrws v2, r3
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: spltRegValui:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    mtvsrws v2, r3
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: spltRegValui:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    mtvsrwz f0, r3
+; P8BE-NEXT:    xxspltw v2, vs0, 1
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: spltRegValui:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    mtvsrwz f0, r3
+; P8LE-NEXT:    xxspltw v2, vs0, 1
+; P8LE-NEXT:    blr
 entry:
   %splat.splatinsert = insertelement <4 x i32> undef, i32 %val, i32 0
   %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
   ret <4 x i32> %splat.splat
-; P9BE-LABEL: spltRegValui
-; P9LE-LABEL: spltRegValui
-; P8BE-LABEL: spltRegValui
-; P8LE-LABEL: spltRegValui
-; P9BE: mtvsrws v2, r3
-; P9BE: blr
-; P9LE: mtvsrws v2, r3
-; P9LE: blr
-; P8BE: mtvsrwz {{[vsf0-9]+}}, r3
-; P8BE: xxspltw v2, {{[vsf0-9]+}}, 1
-; P8BE: blr
-; P8LE: mtvsrwz {{[vsf0-9]+}}, r3
-; P8LE: xxspltw v2, {{[vsf0-9]+}}, 1
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <4 x i32> @spltMemValui(i32* nocapture readonly %ptr) {
+; P9BE-LABEL: spltMemValui:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lfiwzx f0, 0, r3
+; P9BE-NEXT:    xxsldwi vs0, f0, f0, 1
+; P9BE-NEXT:    xxspltw v2, vs0, 0
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: spltMemValui:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lfiwzx f0, 0, r3
+; P9LE-NEXT:    xxpermdi vs0, f0, f0, 2
+; P9LE-NEXT:    xxspltw v2, vs0, 3
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: spltMemValui:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    lfiwzx f0, 0, r3
+; P8BE-NEXT:    xxsldwi vs0, f0, f0, 1
+; P8BE-NEXT:    xxspltw v2, vs0, 0
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: spltMemValui:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    lfiwzx f0, 0, r3
+; P8LE-NEXT:    xxpermdi vs0, f0, f0, 2
+; P8LE-NEXT:    xxspltw v2, vs0, 3
+; P8LE-NEXT:    blr
 entry:
   %0 = load i32, i32* %ptr, align 4
   %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0
   %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
   ret <4 x i32> %splat.splat
-; P9BE-LABEL: spltMemValui
-; P9LE-LABEL: spltMemValui
-; P8BE-LABEL: spltMemValui
-; P8LE-LABEL: spltMemValui
-; P9BE: lfiwzx f0, 0, r3
-; P9BE: xxsldwi vs0, f0, f0, 1
-; P9BE: xxspltw v2, vs0, 0
-; P9BE: blr
-; P9LE: lfiwzx f0, 0, r3
-; P9LE: xxpermdi vs0, f0, f0, 2
-; P9LE: xxspltw v2, vs0, 3
-; P9LE: blr
-; P8BE: lfiwzx f0, 0, r3
-; P8BE: xxsldwi vs0, f0, f0, 1
-; P8BE: xxspltw v2, vs0, 0
-; P8BE: blr
-; P8LE: lfiwzx f0, 0, r3
-; P8LE: xxpermdi vs0, f0, f0, 2
-; P8LE: xxspltw v2, vs0, 3
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <4 x i32> @spltCnstConvftoui() {
+; P9BE-LABEL: spltCnstConvftoui:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    vspltisw v2, 4
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: spltCnstConvftoui:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    vspltisw v2, 4
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: spltCnstConvftoui:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    vspltisw v2, 4
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: spltCnstConvftoui:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    vspltisw v2, 4
+; P8LE-NEXT:    blr
 entry:
   ret <4 x i32> <i32 4, i32 4, i32 4, i32 4>
-; P9BE-LABEL: spltCnstConvftoui
-; P9LE-LABEL: spltCnstConvftoui
-; P8BE-LABEL: spltCnstConvftoui
-; P8LE-LABEL: spltCnstConvftoui
-; P9BE: vspltisw v2, 4
-; P9BE: blr
-; P9LE: vspltisw v2, 4
-; P9LE: blr
-; P8BE: vspltisw v2, 4
-; P8BE: blr
-; P8LE: vspltisw v2, 4
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <4 x i32> @fromRegsConvftoui(float %a, float %b, float %c, float %d) {
+; P9BE-LABEL: fromRegsConvftoui:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
+; P9BE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P9BE-NEXT:    xxmrghd vs0, vs2, vs4
+; P9BE-NEXT:    # kill: def $f3 killed $f3 def $vsl3
+; P9BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; P9BE-NEXT:    xvcvdpuxws v2, vs0
+; P9BE-NEXT:    xxmrghd vs0, vs1, vs3
+; P9BE-NEXT:    xvcvdpuxws v3, vs0
+; P9BE-NEXT:    vmrgew v2, v3, v2
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromRegsConvftoui:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    # kill: def $f3 killed $f3 def $vsl3
+; P9LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; P9LE-NEXT:    xxmrghd vs0, vs3, vs1
+; P9LE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
+; P9LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P9LE-NEXT:    xvcvdpuxws v2, vs0
+; P9LE-NEXT:    xxmrghd vs0, vs4, vs2
+; P9LE-NEXT:    xvcvdpuxws v3, vs0
+; P9LE-NEXT:    vmrgew v2, v3, v2
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromRegsConvftoui:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; P8BE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
+; P8BE-NEXT:    # kill: def $f3 killed $f3 def $vsl3
+; P8BE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P8BE-NEXT:    xxmrghd vs0, vs2, vs4
+; P8BE-NEXT:    xxmrghd vs1, vs1, vs3
+; P8BE-NEXT:    xvcvdpuxws v2, vs0
+; P8BE-NEXT:    xvcvdpuxws v3, vs1
+; P8BE-NEXT:    vmrgew v2, v3, v2
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromRegsConvftoui:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; P8LE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
+; P8LE-NEXT:    # kill: def $f3 killed $f3 def $vsl3
+; P8LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P8LE-NEXT:    xxmrghd vs0, vs3, vs1
+; P8LE-NEXT:    xxmrghd vs1, vs4, vs2
+; P8LE-NEXT:    xvcvdpuxws v2, vs0
+; P8LE-NEXT:    xvcvdpuxws v3, vs1
+; P8LE-NEXT:    vmrgew v2, v3, v2
+; P8LE-NEXT:    blr
 entry:
   %conv = fptoui float %a to i32
   %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
@@ -2372,79 +3031,116 @@ entry:
   %conv5 = fptoui float %d to i32
   %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %conv5, i32 3
   ret <4 x i32> %vecinit6
-; P9BE-LABEL: fromRegsConvftoui
-; P9LE-LABEL: fromRegsConvftoui
-; P8BE-LABEL: fromRegsConvftoui
-; P8LE-LABEL: fromRegsConvftoui
-; P9BE: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
-; P9BE: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
-; P9BE: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
-; P9BE: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
-; P9BE: vmrgew v2, [[REG3]], [[REG4]]
-; P9LE: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
-; P9LE: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
-; P9LE: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
-; P9LE: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
-; P9LE: vmrgew v2, [[REG4]], [[REG3]]
-; P8BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
-; P8BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
-; P8BE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
-; P8BE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
-; P8BE: vmrgew v2, [[REG3]], [[REG4]]
-; P8LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
-; P8LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
-; P8LE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
-; P8LE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
-; P8LE: vmrgew v2, [[REG4]], [[REG3]]
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <4 x i32> @fromDiffConstsConvftoui() {
+; P9BE-LABEL: fromDiffConstsConvftoui:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    addis r3, r2, .LCPI50_0 at toc@ha
+; P9BE-NEXT:    addi r3, r3, .LCPI50_0 at toc@l
+; P9BE-NEXT:    lxvx v2, 0, r3
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffConstsConvftoui:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    addis r3, r2, .LCPI50_0 at toc@ha
+; P9LE-NEXT:    addi r3, r3, .LCPI50_0 at toc@l
+; P9LE-NEXT:    lxvx v2, 0, r3
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffConstsConvftoui:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    addis r3, r2, .LCPI50_0 at toc@ha
+; P8BE-NEXT:    addi r3, r3, .LCPI50_0 at toc@l
+; P8BE-NEXT:    lxvw4x v2, 0, r3
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffConstsConvftoui:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    addis r3, r2, .LCPI50_0 at toc@ha
+; P8LE-NEXT:    addi r3, r3, .LCPI50_0 at toc@l
+; P8LE-NEXT:    lvx v2, 0, r3
+; P8LE-NEXT:    blr
 entry:
   ret <4 x i32> <i32 24, i32 234, i32 988, i32 422>
-; P9BE-LABEL: fromDiffConstsConvftoui
-; P9LE-LABEL: fromDiffConstsConvftoui
-; P8BE-LABEL: fromDiffConstsConvftoui
-; P8LE-LABEL: fromDiffConstsConvftoui
-; P9BE: lxv
-; P9BE: blr
-; P9LE: lxv
-; P9LE: blr
-; P8BE: lxvw4x
-; P8BE: blr
-; P8LE: lvx
-; P8LE-NOT: xxswapd
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <4 x i32> @fromDiffMemConsAConvftoui(float* nocapture readonly %ptr) {
+; P9BE-LABEL: fromDiffMemConsAConvftoui:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lxv vs0, 0(r3)
+; P9BE-NEXT:    xvcvspuxws v2, vs0
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffMemConsAConvftoui:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lxv vs0, 0(r3)
+; P9LE-NEXT:    xvcvspuxws v2, vs0
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffMemConsAConvftoui:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    lxvw4x vs0, 0, r3
+; P8BE-NEXT:    xvcvspuxws v2, vs0
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffMemConsAConvftoui:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    xxswapd v2, vs0
+; P8LE-NEXT:    xvcvspuxws v2, v2
+; P8LE-NEXT:    blr
 entry:
   %0 = bitcast float* %ptr to <4 x float>*
   %1 = load <4 x float>, <4 x float>* %0, align 4
   %2 = fptoui <4 x float> %1 to <4 x i32>
   ret <4 x i32> %2
-; P9BE-LABEL: fromDiffMemConsAConvftoui
-; P9LE-LABEL: fromDiffMemConsAConvftoui
-; P8BE-LABEL: fromDiffMemConsAConvftoui
-; P8LE-LABEL: fromDiffMemConsAConvftoui
-; P9BE: lxv [[REG1:[vs0-9]+]], 0(r3)
-; P9BE: xvcvspuxws v2, [[REG1]]
-; P9BE: blr
-; P9LE: lxv [[REG1:[vs0-9]+]], 0(r3)
-; P9LE: xvcvspuxws v2, [[REG1]]
-; P9LE: blr
-; P8BE: lxvw4x [[REG1:[vs0-9]+]], 0, r3
-; P8BE: xvcvspuxws v2, [[REG1]]
-; P8BE: blr
-; P8LE: lxvd2x [[REG1:[vs0-9]+]], 0, r3
-; P8LE: xxswapd v2, [[REG1]]
-; P8LE: xvcvspuxws v2, v2
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <4 x i32> @fromDiffMemConsDConvftoui(float* nocapture readonly %ptr) {
+; P9BE-LABEL: fromDiffMemConsDConvftoui:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lxv v2, 0(r3)
+; P9BE-NEXT:    addis r3, r2, .LCPI52_0 at toc@ha
+; P9BE-NEXT:    addi r3, r3, .LCPI52_0 at toc@l
+; P9BE-NEXT:    lxvx v3, 0, r3
+; P9BE-NEXT:    vperm v2, v2, v2, v3
+; P9BE-NEXT:    xvcvspuxws v2, v2
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffMemConsDConvftoui:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lxv v2, 0(r3)
+; P9LE-NEXT:    addis r3, r2, .LCPI52_0 at toc@ha
+; P9LE-NEXT:    addi r3, r3, .LCPI52_0 at toc@l
+; P9LE-NEXT:    lxvx v3, 0, r3
+; P9LE-NEXT:    vperm v2, v2, v2, v3
+; P9LE-NEXT:    xvcvspuxws v2, v2
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffMemConsDConvftoui:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    addis r4, r2, .LCPI52_0 at toc@ha
+; P8BE-NEXT:    lxvw4x v2, 0, r3
+; P8BE-NEXT:    addi r4, r4, .LCPI52_0 at toc@l
+; P8BE-NEXT:    lxvw4x v3, 0, r4
+; P8BE-NEXT:    vperm v2, v2, v2, v3
+; P8BE-NEXT:    xvcvspuxws v2, v2
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffMemConsDConvftoui:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    addis r4, r2, .LCPI52_0 at toc@ha
+; P8LE-NEXT:    addi r3, r4, .LCPI52_0 at toc@l
+; P8LE-NEXT:    lvx v2, 0, r3
+; P8LE-NEXT:    xxswapd v3, vs0
+; P8LE-NEXT:    vperm v2, v3, v3, v2
+; P8LE-NEXT:    xvcvspuxws v2, v2
+; P8LE-NEXT:    blr
 entry:
   %arrayidx = getelementptr inbounds float, float* %ptr, i64 3
   %0 = load float, float* %arrayidx, align 4
@@ -2462,35 +3158,69 @@ entry:
   %conv8 = fptoui float %3 to i32
   %vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3
   ret <4 x i32> %vecinit9
-; P9BE-LABEL: fromDiffMemConsDConvftoui
-; P9LE-LABEL: fromDiffMemConsDConvftoui
-; P8BE-LABEL: fromDiffMemConsDConvftoui
-; P8LE-LABEL: fromDiffMemConsDConvftoui
-; P9BE: lxv
-; P9BE: lxv
-; P9BE: vperm
-; P9BE: xvcvspuxws
-; P9BE: blr
-; P9LE: lxv
-; P9LE: lxv
-; P9LE: vperm
-; P9LE: xvcvspuxws
-; P9LE: blr
-; P8BE: lxvw4x
-; P8BE: lxvw4x
-; P8BE: vperm
-; P8BE: xvcvspuxws
-; P8BE: blr
-; P8LE-DAG: lxvd2x
-; P8LE-DAG: lvx
-; P8LE: xxswapd
-; P8LE: vperm
-; P8LE: xvcvspuxws
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <4 x i32> @fromDiffMemVarAConvftoui(float* nocapture readonly %arr, i32 signext %elem) {
+; P9BE-LABEL: fromDiffMemVarAConvftoui:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    sldi r4, r4, 2
+; P9BE-NEXT:    lfsux f0, r3, r4
+; P9BE-NEXT:    lfs f1, 12(r3)
+; P9BE-NEXT:    lfs f2, 4(r3)
+; P9BE-NEXT:    xxmrghd vs1, vs2, vs1
+; P9BE-NEXT:    xvcvdpsp v2, vs1
+; P9BE-NEXT:    lfs f1, 8(r3)
+; P9BE-NEXT:    xxmrghd vs0, vs0, vs1
+; P9BE-NEXT:    xvcvdpsp v3, vs0
+; P9BE-NEXT:    vmrgew v2, v3, v2
+; P9BE-NEXT:    xvcvspuxws v2, v2
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffMemVarAConvftoui:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    sldi r4, r4, 2
+; P9LE-NEXT:    lfsux f0, r3, r4
+; P9LE-NEXT:    lfs f1, 8(r3)
+; P9LE-NEXT:    xxmrghd vs0, vs1, vs0
+; P9LE-NEXT:    lfs f1, 12(r3)
+; P9LE-NEXT:    xvcvdpsp v2, vs0
+; P9LE-NEXT:    lfs f0, 4(r3)
+; P9LE-NEXT:    xxmrghd vs0, vs1, vs0
+; P9LE-NEXT:    xvcvdpsp v3, vs0
+; P9LE-NEXT:    vmrgew v2, v3, v2
+; P9LE-NEXT:    xvcvspuxws v2, v2
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffMemVarAConvftoui:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    sldi r4, r4, 2
+; P8BE-NEXT:    lfsux f0, r3, r4
+; P8BE-NEXT:    lfs f1, 12(r3)
+; P8BE-NEXT:    lfs f2, 4(r3)
+; P8BE-NEXT:    lfs f3, 8(r3)
+; P8BE-NEXT:    xxmrghd vs1, vs2, vs1
+; P8BE-NEXT:    xxmrghd vs0, vs0, vs3
+; P8BE-NEXT:    xvcvdpsp v2, vs1
+; P8BE-NEXT:    xvcvdpsp v3, vs0
+; P8BE-NEXT:    vmrgew v2, v3, v2
+; P8BE-NEXT:    xvcvspuxws v2, v2
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffMemVarAConvftoui:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    sldi r4, r4, 2
+; P8LE-NEXT:    lfsux f0, r3, r4
+; P8LE-NEXT:    lfs f1, 8(r3)
+; P8LE-NEXT:    lfs f2, 4(r3)
+; P8LE-NEXT:    lfs f3, 12(r3)
+; P8LE-NEXT:    xxmrghd vs0, vs1, vs0
+; P8LE-NEXT:    xxmrghd vs1, vs3, vs2
+; P8LE-NEXT:    xvcvdpsp v2, vs0
+; P8LE-NEXT:    xvcvdpsp v3, vs1
+; P8LE-NEXT:    vmrgew v2, v3, v2
+; P8LE-NEXT:    xvcvspuxws v2, v2
+; P8LE-NEXT:    blr
 entry:
   %idxprom = sext i32 %elem to i64
   %arrayidx = getelementptr inbounds float, float* %arr, i64 %idxprom
@@ -2516,19 +3246,70 @@ entry:
   %conv13 = fptoui float %3 to i32
   %vecinit14 = insertelement <4 x i32> %vecinit9, i32 %conv13, i32 3
   ret <4 x i32> %vecinit14
-; P9BE-LABEL: fromDiffMemVarAConvftoui
-; P9LE-LABEL: fromDiffMemVarAConvftoui
-; P8BE-LABEL: fromDiffMemVarAConvftoui
-; P8LE-LABEL: fromDiffMemVarAConvftoui
 ; FIXME: implement finding consecutive loads with pre-inc
-; P9BE: lfsux
-; P9LE: lfsux
-; P8BE: lfsux
-; P8LE: lfsux
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <4 x i32> @fromDiffMemVarDConvftoui(float* nocapture readonly %arr, i32 signext %elem) {
+; P9BE-LABEL: fromDiffMemVarDConvftoui:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    sldi r4, r4, 2
+; P9BE-NEXT:    lfsux f0, r3, r4
+; P9BE-NEXT:    lfs f1, -12(r3)
+; P9BE-NEXT:    lfs f2, -4(r3)
+; P9BE-NEXT:    xxmrghd vs1, vs2, vs1
+; P9BE-NEXT:    xvcvdpsp v2, vs1
+; P9BE-NEXT:    lfs f1, -8(r3)
+; P9BE-NEXT:    xxmrghd vs0, vs0, vs1
+; P9BE-NEXT:    xvcvdpsp v3, vs0
+; P9BE-NEXT:    vmrgew v2, v3, v2
+; P9BE-NEXT:    xvcvspuxws v2, v2
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffMemVarDConvftoui:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    sldi r4, r4, 2
+; P9LE-NEXT:    lfsux f0, r3, r4
+; P9LE-NEXT:    lfs f1, -8(r3)
+; P9LE-NEXT:    xxmrghd vs0, vs1, vs0
+; P9LE-NEXT:    lfs f1, -12(r3)
+; P9LE-NEXT:    xvcvdpsp v2, vs0
+; P9LE-NEXT:    lfs f0, -4(r3)
+; P9LE-NEXT:    xxmrghd vs0, vs1, vs0
+; P9LE-NEXT:    xvcvdpsp v3, vs0
+; P9LE-NEXT:    vmrgew v2, v3, v2
+; P9LE-NEXT:    xvcvspuxws v2, v2
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffMemVarDConvftoui:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    sldi r4, r4, 2
+; P8BE-NEXT:    lfsux f0, r3, r4
+; P8BE-NEXT:    lfs f1, -12(r3)
+; P8BE-NEXT:    lfs f2, -4(r3)
+; P8BE-NEXT:    lfs f3, -8(r3)
+; P8BE-NEXT:    xxmrghd vs1, vs2, vs1
+; P8BE-NEXT:    xxmrghd vs0, vs0, vs3
+; P8BE-NEXT:    xvcvdpsp v2, vs1
+; P8BE-NEXT:    xvcvdpsp v3, vs0
+; P8BE-NEXT:    vmrgew v2, v3, v2
+; P8BE-NEXT:    xvcvspuxws v2, v2
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffMemVarDConvftoui:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    sldi r4, r4, 2
+; P8LE-NEXT:    lfsux f0, r3, r4
+; P8LE-NEXT:    lfs f1, -8(r3)
+; P8LE-NEXT:    lfs f2, -4(r3)
+; P8LE-NEXT:    lfs f3, -12(r3)
+; P8LE-NEXT:    xxmrghd vs0, vs1, vs0
+; P8LE-NEXT:    xxmrghd vs1, vs3, vs2
+; P8LE-NEXT:    xvcvdpsp v2, vs0
+; P8LE-NEXT:    xvcvdpsp v3, vs1
+; P8LE-NEXT:    vmrgew v2, v3, v2
+; P8LE-NEXT:    xvcvspuxws v2, v2
+; P8LE-NEXT:    blr
 entry:
   %idxprom = sext i32 %elem to i64
   %arrayidx = getelementptr inbounds float, float* %arr, i64 %idxprom
@@ -2554,86 +3335,154 @@ entry:
   %conv13 = fptoui float %3 to i32
   %vecinit14 = insertelement <4 x i32> %vecinit9, i32 %conv13, i32 3
   ret <4 x i32> %vecinit14
-; P9BE-LABEL: fromDiffMemVarDConvftoui
-; P9LE-LABEL: fromDiffMemVarDConvftoui
-; P8BE-LABEL: fromDiffMemVarDConvftoui
-; P8LE-LABEL: fromDiffMemVarDConvftoui
 ; FIXME: implement finding consecutive loads with pre-inc
-; P9BE: lfsux
-; P9LE: lfsux
-; P8BE: lfsux
-; P8LE: lfsux
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <4 x i32> @spltRegValConvftoui(float %val) {
+; P9BE-LABEL: spltRegValConvftoui:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    xscvdpuxws f0, f1
+; P9BE-NEXT:    xxspltw v2, vs0, 1
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: spltRegValConvftoui:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    xscvdpuxws f0, f1
+; P9LE-NEXT:    xxspltw v2, vs0, 1
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: spltRegValConvftoui:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    xscvdpuxws f0, f1
+; P8BE-NEXT:    xxspltw v2, vs0, 1
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: spltRegValConvftoui:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    xscvdpuxws f0, f1
+; P8LE-NEXT:    xxspltw v2, vs0, 1
+; P8LE-NEXT:    blr
 entry:
   %conv = fptoui float %val to i32
   %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0
   %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
   ret <4 x i32> %splat.splat
-; P9BE-LABEL: spltRegValConvftoui
-; P9LE-LABEL: spltRegValConvftoui
-; P8BE-LABEL: spltRegValConvftoui
-; P8LE-LABEL: spltRegValConvftoui
-; P9BE: xscvdpuxws f[[REG1:[0-9]+]], f1
-; P9BE: xxspltw v2, vs[[REG1]], 1
-; P9BE: blr
-; P9LE: xscvdpuxws f[[REG1:[0-9]+]], f1
-; P9LE: xxspltw v2, vs[[REG1]], 1
-; P9LE: blr
-; P8BE: xscvdpuxws f[[REG1:[0-9]+]], f1
-; P8BE: xxspltw v2, vs[[REG1]], 1
-; P8BE: blr
-; P8LE: xscvdpuxws f[[REG1:[0-9]+]], f1
-; P8LE: xxspltw v2, vs[[REG1]], 1
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <4 x i32> @spltMemValConvftoui(float* nocapture readonly %ptr) {
+; P9BE-LABEL: spltMemValConvftoui:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lxvwsx vs0, 0, r3
+; P9BE-NEXT:    xvcvspuxws v2, vs0
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: spltMemValConvftoui:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lxvwsx vs0, 0, r3
+; P9LE-NEXT:    xvcvspuxws v2, vs0
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: spltMemValConvftoui:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    lfsx f0, 0, r3
+; P8BE-NEXT:    xscvdpuxws f0, f0
+; P8BE-NEXT:    xxspltw v2, vs0, 1
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: spltMemValConvftoui:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    lfsx f0, 0, r3
+; P8LE-NEXT:    xscvdpuxws f0, f0
+; P8LE-NEXT:    xxspltw v2, vs0, 1
+; P8LE-NEXT:    blr
 entry:
   %0 = load float, float* %ptr, align 4
   %conv = fptoui float %0 to i32
   %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0
   %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
   ret <4 x i32> %splat.splat
-; P9BE-LABEL: spltMemValConvftoui
-; P9LE-LABEL: spltMemValConvftoui
-; P8BE-LABEL: spltMemValConvftoui
-; P8LE-LABEL: spltMemValConvftoui
-; P9BE: lxvwsx [[REG1:[vs0-9]+]], 0, r3
-; P9BE: xvcvspuxws v2, [[REG1]]
-; P9LE: [[REG1:[vs0-9]+]], 0, r3
-; P9LE: xvcvspuxws v2, [[REG1]]
-; P8BE: lfsx [[REG1:f[0-9]+]], 0, r3
-; P8BE: xscvdpuxws f[[REG2:[0-9]+]], [[REG1]]
-; P8BE: xxspltw v2, vs[[REG2]], 1
-; P8LE: lfsx [[REG1:f[0-9]+]], 0, r3
-; P8LE: xscvdpuxws f[[REG2:[vs0-9]+]], [[REG1]]
-; P8LE: xxspltw v2, vs[[REG2]], 1
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <4 x i32> @spltCnstConvdtoui() {
+; P9BE-LABEL: spltCnstConvdtoui:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    vspltisw v2, 4
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: spltCnstConvdtoui:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    vspltisw v2, 4
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: spltCnstConvdtoui:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    vspltisw v2, 4
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: spltCnstConvdtoui:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    vspltisw v2, 4
+; P8LE-NEXT:    blr
 entry:
   ret <4 x i32> <i32 4, i32 4, i32 4, i32 4>
-; P9BE-LABEL: spltCnstConvdtoui
-; P9LE-LABEL: spltCnstConvdtoui
-; P8BE-LABEL: spltCnstConvdtoui
-; P8LE-LABEL: spltCnstConvdtoui
-; P9BE: vspltisw v2, 4
-; P9BE: blr
-; P9LE: vspltisw v2, 4
-; P9LE: blr
-; P8BE: vspltisw v2, 4
-; P8BE: blr
-; P8LE: vspltisw v2, 4
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <4 x i32> @fromRegsConvdtoui(double %a, double %b, double %c, double %d) {
+; P9BE-LABEL: fromRegsConvdtoui:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
+; P9BE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P9BE-NEXT:    xxmrghd vs0, vs2, vs4
+; P9BE-NEXT:    # kill: def $f3 killed $f3 def $vsl3
+; P9BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; P9BE-NEXT:    xvcvdpuxws v2, vs0
+; P9BE-NEXT:    xxmrghd vs0, vs1, vs3
+; P9BE-NEXT:    xvcvdpuxws v3, vs0
+; P9BE-NEXT:    vmrgew v2, v3, v2
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromRegsConvdtoui:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    # kill: def $f3 killed $f3 def $vsl3
+; P9LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; P9LE-NEXT:    xxmrghd vs0, vs3, vs1
+; P9LE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
+; P9LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P9LE-NEXT:    xvcvdpuxws v2, vs0
+; P9LE-NEXT:    xxmrghd vs0, vs4, vs2
+; P9LE-NEXT:    xvcvdpuxws v3, vs0
+; P9LE-NEXT:    vmrgew v2, v3, v2
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromRegsConvdtoui:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; P8BE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
+; P8BE-NEXT:    # kill: def $f3 killed $f3 def $vsl3
+; P8BE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P8BE-NEXT:    xxmrghd vs0, vs2, vs4
+; P8BE-NEXT:    xxmrghd vs1, vs1, vs3
+; P8BE-NEXT:    xvcvdpuxws v2, vs0
+; P8BE-NEXT:    xvcvdpuxws v3, vs1
+; P8BE-NEXT:    vmrgew v2, v3, v2
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromRegsConvdtoui:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; P8LE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
+; P8LE-NEXT:    # kill: def $f3 killed $f3 def $vsl3
+; P8LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P8LE-NEXT:    xxmrghd vs0, vs3, vs1
+; P8LE-NEXT:    xxmrghd vs1, vs4, vs2
+; P8LE-NEXT:    xvcvdpuxws v2, vs0
+; P8LE-NEXT:    xvcvdpuxws v3, vs1
+; P8LE-NEXT:    vmrgew v2, v3, v2
+; P8LE-NEXT:    blr
 entry:
   %conv = fptoui double %a to i32
   %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
@@ -2644,53 +3493,90 @@ entry:
   %conv5 = fptoui double %d to i32
   %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %conv5, i32 3
   ret <4 x i32> %vecinit6
-; P9BE-LABEL: fromRegsConvdtoui
-; P9LE-LABEL: fromRegsConvdtoui
-; P8BE-LABEL: fromRegsConvdtoui
-; P8LE-LABEL: fromRegsConvdtoui
-; P9BE: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
-; P9BE: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
-; P9BE: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
-; P9BE: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
-; P9BE: vmrgew v2, [[REG3]], [[REG4]]
-; P9LE: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
-; P9LE: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
-; P9LE: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
-; P9LE: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
-; P9LE: vmrgew v2, [[REG4]], [[REG3]]
-; P8BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
-; P8BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
-; P8BE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
-; P8BE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
-; P8BE: vmrgew v2, [[REG3]], [[REG4]]
-; P8LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
-; P8LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
-; P8LE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
-; P8LE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
-; P8LE: vmrgew v2, [[REG4]], [[REG3]]
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <4 x i32> @fromDiffConstsConvdtoui() {
+; P9BE-LABEL: fromDiffConstsConvdtoui:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    addis r3, r2, .LCPI59_0 at toc@ha
+; P9BE-NEXT:    addi r3, r3, .LCPI59_0 at toc@l
+; P9BE-NEXT:    lxvx v2, 0, r3
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffConstsConvdtoui:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    addis r3, r2, .LCPI59_0 at toc@ha
+; P9LE-NEXT:    addi r3, r3, .LCPI59_0 at toc@l
+; P9LE-NEXT:    lxvx v2, 0, r3
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffConstsConvdtoui:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    addis r3, r2, .LCPI59_0 at toc@ha
+; P8BE-NEXT:    addi r3, r3, .LCPI59_0 at toc@l
+; P8BE-NEXT:    lxvw4x v2, 0, r3
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffConstsConvdtoui:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    addis r3, r2, .LCPI59_0 at toc@ha
+; P8LE-NEXT:    addi r3, r3, .LCPI59_0 at toc@l
+; P8LE-NEXT:    lvx v2, 0, r3
+; P8LE-NEXT:    blr
 entry:
   ret <4 x i32> <i32 24, i32 234, i32 988, i32 422>
-; P9BE-LABEL: fromDiffConstsConvdtoui
-; P9LE-LABEL: fromDiffConstsConvdtoui
-; P8BE-LABEL: fromDiffConstsConvdtoui
-; P8LE-LABEL: fromDiffConstsConvdtoui
-; P9BE: lxv
-; P9BE: blr
-; P9LE: lxv
-; P9LE: blr
-; P8BE: lxvw4x
-; P8BE: blr
-; P8LE: lvx
-; P8LE-NOT: xxswapd
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <4 x i32> @fromDiffMemConsAConvdtoui(double* nocapture readonly %ptr) {
+; P9BE-LABEL: fromDiffMemConsAConvdtoui:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lxv vs0, 0(r3)
+; P9BE-NEXT:    lxv vs1, 16(r3)
+; P9BE-NEXT:    xxmrgld vs2, vs0, vs1
+; P9BE-NEXT:    xxmrghd vs0, vs0, vs1
+; P9BE-NEXT:    xvcvdpuxws v2, vs2
+; P9BE-NEXT:    xvcvdpuxws v3, vs0
+; P9BE-NEXT:    vmrgew v2, v3, v2
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffMemConsAConvdtoui:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lxv vs0, 0(r3)
+; P9LE-NEXT:    lxv vs1, 16(r3)
+; P9LE-NEXT:    xxmrgld vs2, vs1, vs0
+; P9LE-NEXT:    xxmrghd vs0, vs1, vs0
+; P9LE-NEXT:    xvcvdpuxws v2, vs2
+; P9LE-NEXT:    xvcvdpuxws v3, vs0
+; P9LE-NEXT:    vmrgew v2, v3, v2
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffMemConsAConvdtoui:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    li r4, 16
+; P8BE-NEXT:    lxvd2x vs0, 0, r3
+; P8BE-NEXT:    lxvd2x vs1, r3, r4
+; P8BE-NEXT:    xxmrgld vs2, vs0, vs1
+; P8BE-NEXT:    xxmrghd vs0, vs0, vs1
+; P8BE-NEXT:    xvcvdpuxws v2, vs2
+; P8BE-NEXT:    xvcvdpuxws v3, vs0
+; P8BE-NEXT:    vmrgew v2, v3, v2
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffMemConsAConvdtoui:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    li r4, 16
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    lxvd2x vs1, r3, r4
+; P8LE-NEXT:    xxswapd vs0, vs0
+; P8LE-NEXT:    xxswapd vs1, vs1
+; P8LE-NEXT:    xxmrgld vs2, vs1, vs0
+; P8LE-NEXT:    xxmrghd vs0, vs1, vs0
+; P8LE-NEXT:    xvcvdpuxws v2, vs2
+; P8LE-NEXT:    xvcvdpuxws v3, vs0
+; P8LE-NEXT:    vmrgew v2, v3, v2
+; P8LE-NEXT:    blr
 entry:
   %0 = bitcast double* %ptr to <2 x double>*
   %1 = load <2 x double>, <2 x double>* %0, align 8
@@ -2701,44 +3587,61 @@ entry:
   %5 = fptoui <2 x double> %4 to <2 x i32>
   %vecinit9 = shufflevector <2 x i32> %2, <2 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   ret <4 x i32> %vecinit9
-; P9BE-LABEL: fromDiffMemConsAConvdtoui
-; P9LE-LABEL: fromDiffMemConsAConvdtoui
-; P8BE-LABEL: fromDiffMemConsAConvdtoui
-; P8LE-LABEL: fromDiffMemConsAConvdtoui
-; P9BE-DAG: lxv [[REG1:[vs0-9]+]], 0(r3)
-; P9BE-DAG: lxv [[REG2:[vs0-9]+]], 16(r3)
-; P9BE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG1]], [[REG2]]
-; P9BE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG1]], [[REG2]]
-; P9BE-DAG: xvcvdpuxws [[REG5:[vs0-9]+]], [[REG3]]
-; P9BE-DAG: xvcvdpuxws [[REG6:[vs0-9]+]], [[REG4]]
-; P9BE: vmrgew v2, [[REG6]], [[REG5]]
-; P9LE-DAG: lxv [[REG1:[vs0-9]+]], 0(r3)
-; P9LE-DAG: lxv [[REG2:[vs0-9]+]], 16(r3)
-; P9LE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG2]], [[REG1]]
-; P9LE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG2]], [[REG1]]
-; P9LE-DAG: xvcvdpuxws [[REG5:[vs0-9]+]], [[REG3]]
-; P9LE-DAG: xvcvdpuxws [[REG6:[vs0-9]+]], [[REG4]]
-; P9LE: vmrgew v2, [[REG6]], [[REG5]]
-; P8BE: lxvd2x [[REG1:[vs0-9]+]], 0, r3
-; P8BE: lxvd2x [[REG2:[vs0-9]+]], r3, r4
-; P8BE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG1]], [[REG2]]
-; P8BE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG1]], [[REG2]]
-; P8BE-DAG: xvcvdpuxws [[REG5:[vs0-9]+]], [[REG3]]
-; P8BE-DAG: xvcvdpuxws [[REG6:[vs0-9]+]], [[REG4]]
-; P8BE: vmrgew v2, [[REG6]], [[REG5]]
-; P8LE: lxvd2x [[REG1:[vs0-9]+]], 0, r3
-; P8LE: lxvd2x [[REG2:[vs0-9]+]], r3, r4
-; P8LE-DAG: xxswapd [[REG3:[vs0-9]+]], [[REG1]]
-; P8LE-DAG: xxswapd [[REG4:[vs0-9]+]], [[REG2]]
-; P8LE-DAG: xxmrgld [[REG5:[vs0-9]+]], [[REG4]], [[REG3]]
-; P8LE-DAG: xxmrghd [[REG6:[vs0-9]+]], [[REG4]], [[REG3]]
-; P8LE-DAG: xvcvdpuxws [[REG7:[vs0-9]+]], [[REG5]]
-; P8LE-DAG: xvcvdpuxws [[REG8:[vs0-9]+]], [[REG6]]
-; P8LE: vmrgew v2, [[REG8]], [[REG7]]
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <4 x i32> @fromDiffMemConsDConvdtoui(double* nocapture readonly %ptr) {
+; P9BE-LABEL: fromDiffMemConsDConvdtoui:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lfd f0, 24(r3)
+; P9BE-NEXT:    lfd f1, 16(r3)
+; P9BE-NEXT:    lfd f2, 8(r3)
+; P9BE-NEXT:    lfd f3, 0(r3)
+; P9BE-NEXT:    xxmrghd vs1, vs1, vs3
+; P9BE-NEXT:    xxmrghd vs0, vs0, vs2
+; P9BE-NEXT:    xvcvdpuxws v2, vs1
+; P9BE-NEXT:    xvcvdpuxws v3, vs0
+; P9BE-NEXT:    vmrgew v2, v3, v2
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffMemConsDConvdtoui:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lfd f0, 24(r3)
+; P9LE-NEXT:    lfd f2, 8(r3)
+; P9LE-NEXT:    lfd f1, 16(r3)
+; P9LE-NEXT:    lfd f3, 0(r3)
+; P9LE-NEXT:    xxmrghd vs0, vs2, vs0
+; P9LE-NEXT:    xvcvdpuxws v2, vs0
+; P9LE-NEXT:    xxmrghd vs0, vs3, vs1
+; P9LE-NEXT:    xvcvdpuxws v3, vs0
+; P9LE-NEXT:    vmrgew v2, v3, v2
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffMemConsDConvdtoui:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    lfdx f3, 0, r3
+; P8BE-NEXT:    lfd f0, 24(r3)
+; P8BE-NEXT:    lfd f1, 8(r3)
+; P8BE-NEXT:    lfd f2, 16(r3)
+; P8BE-NEXT:    xxmrghd vs0, vs0, vs1
+; P8BE-NEXT:    xxmrghd vs1, vs2, vs3
+; P8BE-NEXT:    xvcvdpuxws v2, vs0
+; P8BE-NEXT:    xvcvdpuxws v3, vs1
+; P8BE-NEXT:    vmrgew v2, v2, v3
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffMemConsDConvdtoui:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    lfdx f3, 0, r3
+; P8LE-NEXT:    lfd f0, 24(r3)
+; P8LE-NEXT:    lfd f1, 8(r3)
+; P8LE-NEXT:    lfd f2, 16(r3)
+; P8LE-NEXT:    xxmrghd vs0, vs1, vs0
+; P8LE-NEXT:    xxmrghd vs1, vs3, vs2
+; P8LE-NEXT:    xvcvdpuxws v2, vs0
+; P8LE-NEXT:    xvcvdpuxws v3, vs1
+; P8LE-NEXT:    vmrgew v2, v3, v2
+; P8LE-NEXT:    blr
 entry:
   %arrayidx = getelementptr inbounds double, double* %ptr, i64 3
   %0 = load double, double* %arrayidx, align 8
@@ -2756,50 +3659,65 @@ entry:
   %conv8 = fptoui double %3 to i32
   %vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3
   ret <4 x i32> %vecinit9
-; P9BE-LABEL: fromDiffMemConsDConvdtoui
-; P9LE-LABEL: fromDiffMemConsDConvdtoui
-; P8BE-LABEL: fromDiffMemConsDConvdtoui
-; P8LE-LABEL: fromDiffMemConsDConvdtoui
-; P9BE: lfd
-; P9BE: lfd
-; P9BE: lfd
-; P9BE: lfd
-; P9BE: xxmrghd
-; P9BE: xxmrghd
-; P9BE: xvcvdpuxws
-; P9BE: xvcvdpuxws
-; P9BE: vmrgew v2
-; P9LE: lfd
-; P9LE: lfd
-; P9LE: lfd
-; P9LE: lfd
-; P9LE: xxmrghd
-; P9LE: xvcvdpuxws
-; P9LE: xxmrghd
-; P9LE: xvcvdpuxws
-; P9LE: vmrgew v2
-; P8BE: lfdx
-; P8BE: lfd
-; P8BE: lfd
-; P8BE: lfd
-; P8BE: xxmrghd
-; P8BE: xxmrghd
-; P8BE: xvcvdpuxws
-; P8BE: xvcvdpuxws
-; P8BE: vmrgew v2
-; P8LE: lfdx
-; P8LE: lfd
-; P8LE: lfd
-; P8LE: lfd
-; P8LE: xxmrghd
-; P8LE: xxmrghd
-; P8LE: xvcvdpuxws
-; P8LE: xvcvdpuxws
-; P8LE: vmrgew v2
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <4 x i32> @fromDiffMemVarAConvdtoui(double* nocapture readonly %arr, i32 signext %elem) {
+; P9BE-LABEL: fromDiffMemVarAConvdtoui:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    sldi r4, r4, 3
+; P9BE-NEXT:    lfdux f0, r3, r4
+; P9BE-NEXT:    lfd f1, 8(r3)
+; P9BE-NEXT:    lfd f2, 16(r3)
+; P9BE-NEXT:    lfd f3, 24(r3)
+; P9BE-NEXT:    xxmrghd vs1, vs1, vs3
+; P9BE-NEXT:    xxmrghd vs0, vs0, vs2
+; P9BE-NEXT:    xvcvdpuxws v2, vs1
+; P9BE-NEXT:    xvcvdpuxws v3, vs0
+; P9BE-NEXT:    vmrgew v2, v3, v2
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffMemVarAConvdtoui:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    sldi r4, r4, 3
+; P9LE-NEXT:    lfdux f0, r3, r4
+; P9LE-NEXT:    lfd f2, 16(r3)
+; P9LE-NEXT:    lfd f1, 8(r3)
+; P9LE-NEXT:    lfd f3, 24(r3)
+; P9LE-NEXT:    xxmrghd vs0, vs2, vs0
+; P9LE-NEXT:    xvcvdpuxws v2, vs0
+; P9LE-NEXT:    xxmrghd vs0, vs3, vs1
+; P9LE-NEXT:    xvcvdpuxws v3, vs0
+; P9LE-NEXT:    vmrgew v2, v3, v2
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffMemVarAConvdtoui:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    sldi r4, r4, 3
+; P8BE-NEXT:    lfdux f0, r3, r4
+; P8BE-NEXT:    lfd f1, 8(r3)
+; P8BE-NEXT:    lfd f2, 24(r3)
+; P8BE-NEXT:    lfd f3, 16(r3)
+; P8BE-NEXT:    xxmrghd vs1, vs1, vs2
+; P8BE-NEXT:    xxmrghd vs0, vs0, vs3
+; P8BE-NEXT:    xvcvdpuxws v2, vs1
+; P8BE-NEXT:    xvcvdpuxws v3, vs0
+; P8BE-NEXT:    vmrgew v2, v3, v2
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffMemVarAConvdtoui:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    sldi r4, r4, 3
+; P8LE-NEXT:    lfdux f0, r3, r4
+; P8LE-NEXT:    lfd f1, 16(r3)
+; P8LE-NEXT:    lfd f2, 8(r3)
+; P8LE-NEXT:    lfd f3, 24(r3)
+; P8LE-NEXT:    xxmrghd vs0, vs1, vs0
+; P8LE-NEXT:    xxmrghd vs1, vs3, vs2
+; P8LE-NEXT:    xvcvdpuxws v2, vs0
+; P8LE-NEXT:    xvcvdpuxws v3, vs1
+; P8LE-NEXT:    vmrgew v2, v3, v2
+; P8LE-NEXT:    blr
 entry:
   %idxprom = sext i32 %elem to i64
   %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom
@@ -2825,50 +3743,65 @@ entry:
   %conv13 = fptoui double %3 to i32
   %vecinit14 = insertelement <4 x i32> %vecinit9, i32 %conv13, i32 3
   ret <4 x i32> %vecinit14
-; P9BE-LABEL: fromDiffMemVarAConvdtoui
-; P9LE-LABEL: fromDiffMemVarAConvdtoui
-; P8BE-LABEL: fromDiffMemVarAConvdtoui
-; P8LE-LABEL: fromDiffMemVarAConvdtoui
-; P9BE: lfdux
-; P9BE: lfd
-; P9BE: lfd
-; P9BE: lfd
-; P9BE: xxmrghd
-; P9BE: xxmrghd
-; P9BE: xvcvdpuxws
-; P9BE: xvcvdpuxws
-; P9BE: vmrgew v2
-; P9LE: lfdux
-; P9LE: lfd
-; P9LE: lfd
-; P9LE: lfd
-; P9LE: xxmrghd
-; P9LE: xvcvdpuxws
-; P9LE: xxmrghd
-; P9LE: xvcvdpuxws
-; P9LE: vmrgew v2
-; P8BE: lfdux
-; P8BE: lfd
-; P8BE: lfd
-; P8BE: lfd
-; P8BE: xxmrghd
-; P8BE: xxmrghd
-; P8BE: xvcvdpuxws
-; P8BE: xvcvdpuxws
-; P8BE: vmrgew v2
-; P8LE: lfdux
-; P8LE: lfd
-; P8LE: lfd
-; P8LE: lfd
-; P8LE: xxmrghd
-; P8LE: xxmrghd
-; P8LE: xvcvdpuxws
-; P8LE: xvcvdpuxws
-; P8LE: vmrgew v2
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <4 x i32> @fromDiffMemVarDConvdtoui(double* nocapture readonly %arr, i32 signext %elem) {
+; P9BE-LABEL: fromDiffMemVarDConvdtoui:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    sldi r4, r4, 3
+; P9BE-NEXT:    lfdux f0, r3, r4
+; P9BE-NEXT:    lfd f1, -8(r3)
+; P9BE-NEXT:    lfd f2, -16(r3)
+; P9BE-NEXT:    lfd f3, -24(r3)
+; P9BE-NEXT:    xxmrghd vs1, vs1, vs3
+; P9BE-NEXT:    xxmrghd vs0, vs0, vs2
+; P9BE-NEXT:    xvcvdpuxws v2, vs1
+; P9BE-NEXT:    xvcvdpuxws v3, vs0
+; P9BE-NEXT:    vmrgew v2, v3, v2
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffMemVarDConvdtoui:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    sldi r4, r4, 3
+; P9LE-NEXT:    lfdux f0, r3, r4
+; P9LE-NEXT:    lfd f2, -16(r3)
+; P9LE-NEXT:    lfd f1, -8(r3)
+; P9LE-NEXT:    lfd f3, -24(r3)
+; P9LE-NEXT:    xxmrghd vs0, vs2, vs0
+; P9LE-NEXT:    xvcvdpuxws v2, vs0
+; P9LE-NEXT:    xxmrghd vs0, vs3, vs1
+; P9LE-NEXT:    xvcvdpuxws v3, vs0
+; P9LE-NEXT:    vmrgew v2, v3, v2
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffMemVarDConvdtoui:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    sldi r4, r4, 3
+; P8BE-NEXT:    lfdux f0, r3, r4
+; P8BE-NEXT:    lfd f1, -8(r3)
+; P8BE-NEXT:    lfd f2, -24(r3)
+; P8BE-NEXT:    lfd f3, -16(r3)
+; P8BE-NEXT:    xxmrghd vs1, vs1, vs2
+; P8BE-NEXT:    xxmrghd vs0, vs0, vs3
+; P8BE-NEXT:    xvcvdpuxws v2, vs1
+; P8BE-NEXT:    xvcvdpuxws v3, vs0
+; P8BE-NEXT:    vmrgew v2, v3, v2
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffMemVarDConvdtoui:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    sldi r4, r4, 3
+; P8LE-NEXT:    lfdux f0, r3, r4
+; P8LE-NEXT:    lfd f1, -16(r3)
+; P8LE-NEXT:    lfd f2, -8(r3)
+; P8LE-NEXT:    lfd f3, -24(r3)
+; P8LE-NEXT:    xxmrghd vs0, vs1, vs0
+; P8LE-NEXT:    xxmrghd vs1, vs3, vs2
+; P8LE-NEXT:    xvcvdpuxws v2, vs0
+; P8LE-NEXT:    xvcvdpuxws v3, vs1
+; P8LE-NEXT:    vmrgew v2, v3, v2
+; P8LE-NEXT:    blr
 entry:
   %idxprom = sext i32 %elem to i64
   %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom
@@ -2894,236 +3827,315 @@ entry:
   %conv13 = fptoui double %3 to i32
   %vecinit14 = insertelement <4 x i32> %vecinit9, i32 %conv13, i32 3
   ret <4 x i32> %vecinit14
-; P9BE-LABEL: fromDiffMemVarDConvdtoui
-; P9LE-LABEL: fromDiffMemVarDConvdtoui
-; P8BE-LABEL: fromDiffMemVarDConvdtoui
-; P8LE-LABEL: fromDiffMemVarDConvdtoui
-; P9BE: lfdux
-; P9BE: lfd
-; P9BE: lfd
-; P9BE: lfd
-; P9BE: xxmrghd
-; P9BE: xxmrghd
-; P9BE: xvcvdpuxws
-; P9BE: xvcvdpuxws
-; P9BE: vmrgew v2
-; P9LE: lfdux
-; P9LE: lfd
-; P9LE: lfd
-; P9LE: lfd
-; P9LE: xxmrghd
-; P9LE: xvcvdpuxws
-; P9LE: xxmrghd
-; P9LE: xvcvdpuxws
-; P9LE: vmrgew v2
-; P8BE: lfdux
-; P8BE: lfd
-; P8BE: lfd
-; P8BE: lfd
-; P8BE: xxmrghd
-; P8BE: xxmrghd
-; P8BE: xvcvdpuxws
-; P8BE: xvcvdpuxws
-; P8BE: vmrgew v2
-; P8LE: lfdux
-; P8LE: lfd
-; P8LE: lfd
-; P8LE: lfd
-; P8LE: xxmrghd
-; P8LE: xxmrghd
-; P8LE: xvcvdpuxws
-; P8LE: xvcvdpuxws
-; P8LE: vmrgew v2
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <4 x i32> @spltRegValConvdtoui(double %val) {
+; P9BE-LABEL: spltRegValConvdtoui:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    xscvdpuxws f0, f1
+; P9BE-NEXT:    xxspltw v2, vs0, 1
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: spltRegValConvdtoui:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    xscvdpuxws f0, f1
+; P9LE-NEXT:    xxspltw v2, vs0, 1
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: spltRegValConvdtoui:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    xscvdpuxws f0, f1
+; P8BE-NEXT:    xxspltw v2, vs0, 1
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: spltRegValConvdtoui:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    xscvdpuxws f0, f1
+; P8LE-NEXT:    xxspltw v2, vs0, 1
+; P8LE-NEXT:    blr
 entry:
   %conv = fptoui double %val to i32
   %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0
   %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
   ret <4 x i32> %splat.splat
-; P9BE-LABEL: spltRegValConvdtoui
-; P9LE-LABEL: spltRegValConvdtoui
-; P8BE-LABEL: spltRegValConvdtoui
-; P8LE-LABEL: spltRegValConvdtoui
-; P9BE: xscvdpuxws
-; P9BE: xxspltw
-; P9BE: blr
-; P9LE: xscvdpuxws
-; P9LE: xxspltw
-; P9LE: blr
-; P8BE: xscvdpuxws
-; P8BE: xxspltw
-; P8BE: blr
-; P8LE: xscvdpuxws
-; P8LE: xxspltw
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <4 x i32> @spltMemValConvdtoui(double* nocapture readonly %ptr) {
+; P9BE-LABEL: spltMemValConvdtoui:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lfd f0, 0(r3)
+; P9BE-NEXT:    xscvdpuxws f0, f0
+; P9BE-NEXT:    xxspltw v2, vs0, 1
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: spltMemValConvdtoui:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lfd f0, 0(r3)
+; P9LE-NEXT:    xscvdpuxws f0, f0
+; P9LE-NEXT:    xxspltw v2, vs0, 1
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: spltMemValConvdtoui:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    lfdx f0, 0, r3
+; P8BE-NEXT:    xscvdpuxws f0, f0
+; P8BE-NEXT:    xxspltw v2, vs0, 1
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: spltMemValConvdtoui:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    lfdx f0, 0, r3
+; P8LE-NEXT:    xscvdpuxws f0, f0
+; P8LE-NEXT:    xxspltw v2, vs0, 1
+; P8LE-NEXT:    blr
 entry:
   %0 = load double, double* %ptr, align 8
   %conv = fptoui double %0 to i32
   %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0
   %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
   ret <4 x i32> %splat.splat
-; P9BE-LABEL: spltMemValConvdtoui
-; P9LE-LABEL: spltMemValConvdtoui
-; P8BE-LABEL: spltMemValConvdtoui
-; P8LE-LABEL: spltMemValConvdtoui
-; P9BE: lfd
-; P9BE: xscvdpuxws
-; P9BE: xxspltw
-; P9BE: blr
-; P9LE: lfd
-; P9LE: xscvdpuxws
-; P9LE: xxspltw
-; P9LE: blr
-; P8BE: lfdx
-; P8BE: xscvdpuxws
-; P8BE: xxspltw
-; P8BE: blr
-; P8LE: lfdx
-; P8LE: xscvdpuxws
-; P8LE: xxspltw
-; P8LE: blr
 }
 ; Function Attrs: norecurse nounwind readnone
 define <2 x i64> @allZeroll() {
+; P9BE-LABEL: allZeroll:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    xxlxor v2, v2, v2
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: allZeroll:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    xxlxor v2, v2, v2
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: allZeroll:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    xxlxor v2, v2, v2
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: allZeroll:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    xxlxor v2, v2, v2
+; P8LE-NEXT:    blr
 entry:
   ret <2 x i64> zeroinitializer
-; P9BE-LABEL: allZeroll
-; P9LE-LABEL: allZeroll
-; P8BE-LABEL: allZeroll
-; P8LE-LABEL: allZeroll
-; P9BE: xxlxor v2, v2, v2
-; P9BE: blr
-; P9LE: xxlxor v2, v2, v2
-; P9LE: blr
-; P8BE: xxlxor v2, v2, v2
-; P8BE: blr
-; P8LE: xxlxor v2, v2, v2
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <2 x i64> @allOnell() {
+; P9BE-LABEL: allOnell:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    xxspltib v2, 255
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: allOnell:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    xxspltib v2, 255
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: allOnell:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    vspltisb v2, -1
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: allOnell:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    vspltisb v2, -1
+; P8LE-NEXT:    blr
 entry:
   ret <2 x i64> <i64 -1, i64 -1>
-; P9BE-LABEL: allOnell
-; P9LE-LABEL: allOnell
-; P8BE-LABEL: allOnell
-; P8LE-LABEL: allOnell
-; P9BE: xxspltib v2, 255
-; P9BE: blr
-; P9LE: xxspltib v2, 255
-; P9LE: blr
-; P8BE: vspltisb v2, -1
-; P8BE: blr
-; P8LE: vspltisb v2, -1
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <2 x i64> @spltConst1ll() {
+; P9BE-LABEL: spltConst1ll:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    addis r3, r2, .LCPI68_0 at toc@ha
+; P9BE-NEXT:    addi r3, r3, .LCPI68_0 at toc@l
+; P9BE-NEXT:    lxvx v2, 0, r3
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: spltConst1ll:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    addis r3, r2, .LCPI68_0 at toc@ha
+; P9LE-NEXT:    addi r3, r3, .LCPI68_0 at toc@l
+; P9LE-NEXT:    lxvx v2, 0, r3
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: spltConst1ll:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    addis r3, r2, .LCPI68_0 at toc@ha
+; P8BE-NEXT:    addi r3, r3, .LCPI68_0 at toc@l
+; P8BE-NEXT:    lxvd2x v2, 0, r3
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: spltConst1ll:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    addis r3, r2, .LCPI68_0 at toc@ha
+; P8LE-NEXT:    addi r3, r3, .LCPI68_0 at toc@l
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    xxswapd v2, vs0
+; P8LE-NEXT:    blr
 entry:
   ret <2 x i64> <i64 1, i64 1>
-; P9BE-LABEL: spltConst1ll
-; P9LE-LABEL: spltConst1ll
-; P8BE-LABEL: spltConst1ll
-; P8LE-LABEL: spltConst1ll
-; P9BE: lxv
-; P9BE: blr
-; P9LE: lxv
-; P9LE: blr
-; P8BE: lxvd2x
-; P8BE: blr
-; P8LE: lxvd2x
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <2 x i64> @spltConst16kll() {
+; P9BE-LABEL: spltConst16kll:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    addis r3, r2, .LCPI69_0 at toc@ha
+; P9BE-NEXT:    addi r3, r3, .LCPI69_0 at toc@l
+; P9BE-NEXT:    lxvx v2, 0, r3
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: spltConst16kll:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    addis r3, r2, .LCPI69_0 at toc@ha
+; P9LE-NEXT:    addi r3, r3, .LCPI69_0 at toc@l
+; P9LE-NEXT:    lxvx v2, 0, r3
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: spltConst16kll:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    addis r3, r2, .LCPI69_0 at toc@ha
+; P8BE-NEXT:    addi r3, r3, .LCPI69_0 at toc@l
+; P8BE-NEXT:    lxvd2x v2, 0, r3
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: spltConst16kll:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    addis r3, r2, .LCPI69_0 at toc@ha
+; P8LE-NEXT:    addi r3, r3, .LCPI69_0 at toc@l
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    xxswapd v2, vs0
+; P8LE-NEXT:    blr
 entry:
   ret <2 x i64> <i64 32767, i64 32767>
-; P9BE-LABEL: spltConst16kll
-; P9LE-LABEL: spltConst16kll
-; P8BE-LABEL: spltConst16kll
-; P8LE-LABEL: spltConst16kll
-; P9BE: lxv
-; P9BE: blr
-; P9LE: lxv
-; P9LE: blr
-; P8BE: lxvd2x
-; P8BE: blr
-; P8LE: lxvd2x
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <2 x i64> @spltConst32kll() {
+; P9BE-LABEL: spltConst32kll:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    addis r3, r2, .LCPI70_0 at toc@ha
+; P9BE-NEXT:    addi r3, r3, .LCPI70_0 at toc@l
+; P9BE-NEXT:    lxvx v2, 0, r3
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: spltConst32kll:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    addis r3, r2, .LCPI70_0 at toc@ha
+; P9LE-NEXT:    addi r3, r3, .LCPI70_0 at toc@l
+; P9LE-NEXT:    lxvx v2, 0, r3
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: spltConst32kll:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    addis r3, r2, .LCPI70_0 at toc@ha
+; P8BE-NEXT:    addi r3, r3, .LCPI70_0 at toc@l
+; P8BE-NEXT:    lxvd2x v2, 0, r3
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: spltConst32kll:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    addis r3, r2, .LCPI70_0 at toc@ha
+; P8LE-NEXT:    addi r3, r3, .LCPI70_0 at toc@l
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    xxswapd v2, vs0
+; P8LE-NEXT:    blr
 entry:
   ret <2 x i64> <i64 65535, i64 65535>
-; P9BE-LABEL: spltConst32kll
-; P9LE-LABEL: spltConst32kll
-; P8BE-LABEL: spltConst32kll
-; P8LE-LABEL: spltConst32kll
-; P9BE: lxv
-; P9BE: blr
-; P9LE: lxv
-; P9LE: blr
-; P8BE: lxvd2x
-; P8BE: blr
-; P8LE: lxvd2x
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <2 x i64> @fromRegsll(i64 %a, i64 %b) {
+; P9BE-LABEL: fromRegsll:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    mtvsrdd v2, r3, r4
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromRegsll:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    mtvsrdd v2, r4, r3
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromRegsll:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    mtvsrd f0, r4
+; P8BE-NEXT:    mtvsrd f1, r3
+; P8BE-NEXT:    xxmrghd v2, vs1, vs0
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromRegsll:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    mtvsrd f0, r3
+; P8LE-NEXT:    mtvsrd f1, r4
+; P8LE-NEXT:    xxmrghd v2, vs1, vs0
+; P8LE-NEXT:    blr
 entry:
   %vecinit = insertelement <2 x i64> undef, i64 %a, i32 0
   %vecinit1 = insertelement <2 x i64> %vecinit, i64 %b, i32 1
   ret <2 x i64> %vecinit1
-; P9BE-LABEL: fromRegsll
-; P9LE-LABEL: fromRegsll
-; P8BE-LABEL: fromRegsll
-; P8LE-LABEL: fromRegsll
-; P9BE: mtvsrdd v2, r3, r4
-; P9BE: blr
-; P9LE: mtvsrdd v2, r4, r3
-; P9LE: blr
-; P8BE-DAG: mtvsrd {{[vsf0-9]+}}, r3
-; P8BE-DAG: mtvsrd {{[vsf0-9]+}}, r4
-; P8BE: xxmrghd v2
-; P8BE: blr
-; P8LE-DAG: mtvsrd {{[vsf0-9]+}}, r3
-; P8LE-DAG: mtvsrd {{[vsf0-9]+}}, r4
-; P8LE: xxmrghd v2
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <2 x i64> @fromDiffConstsll() {
+; P9BE-LABEL: fromDiffConstsll:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    addis r3, r2, .LCPI72_0 at toc@ha
+; P9BE-NEXT:    addi r3, r3, .LCPI72_0 at toc@l
+; P9BE-NEXT:    lxvx v2, 0, r3
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffConstsll:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    addis r3, r2, .LCPI72_0 at toc@ha
+; P9LE-NEXT:    addi r3, r3, .LCPI72_0 at toc@l
+; P9LE-NEXT:    lxvx v2, 0, r3
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffConstsll:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    addis r3, r2, .LCPI72_0 at toc@ha
+; P8BE-NEXT:    addi r3, r3, .LCPI72_0 at toc@l
+; P8BE-NEXT:    lxvd2x v2, 0, r3
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffConstsll:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    addis r3, r2, .LCPI72_0 at toc@ha
+; P8LE-NEXT:    addi r3, r3, .LCPI72_0 at toc@l
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    xxswapd v2, vs0
+; P8LE-NEXT:    blr
 entry:
   ret <2 x i64> <i64 242, i64 -113>
-; P9BE-LABEL: fromDiffConstsll
-; P9LE-LABEL: fromDiffConstsll
-; P8BE-LABEL: fromDiffConstsll
-; P8LE-LABEL: fromDiffConstsll
-; P9BE: lxv
-; P9BE: blr
-; P9LE: lxv
-; P9LE: blr
-; P8BE: lxvd2x
-; P8BE: blr
-; P8LE: lxvd2x
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <2 x i64> @fromDiffMemConsAll(i64* nocapture readonly %arr) {
+; P9BE-LABEL: fromDiffMemConsAll:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lxv v2, 0(r3)
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffMemConsAll:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lxv v2, 0(r3)
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffMemConsAll:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    lxvd2x v2, 0, r3
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffMemConsAll:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    xxswapd v2, vs0
+; P8LE-NEXT:    blr
 entry:
   %0 = load i64, i64* %arr, align 8
   %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0
@@ -3131,23 +4143,34 @@ entry:
   %1 = load i64, i64* %arrayidx1, align 8
   %vecinit2 = insertelement <2 x i64> %vecinit, i64 %1, i32 1
   ret <2 x i64> %vecinit2
-; P9BE-LABEL: fromDiffMemConsAll
-; P9LE-LABEL: fromDiffMemConsAll
-; P8BE-LABEL: fromDiffMemConsAll
-; P8LE-LABEL: fromDiffMemConsAll
-; P9BE: lxv v2
-; P9BE: blr
-; P9LE: lxv v2
-; P9LE: blr
-; P8BE: lxvd2x v2
-; P8BE: blr
-; P8LE: lxvd2x
-; P8LE: xxswapd v2
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <2 x i64> @fromDiffMemConsDll(i64* nocapture readonly %arr) {
+; P9BE-LABEL: fromDiffMemConsDll:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lxv v2, 16(r3)
+; P9BE-NEXT:    xxswapd v2, v2
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffMemConsDll:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lxv v2, 16(r3)
+; P9LE-NEXT:    xxswapd v2, v2
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffMemConsDll:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    addi r3, r3, 16
+; P8BE-NEXT:    lxvd2x v2, 0, r3
+; P8BE-NEXT:    xxswapd v2, v2
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffMemConsDll:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    addi r3, r3, 16
+; P8LE-NEXT:    lxvd2x v2, 0, r3
+; P8LE-NEXT:    blr
 entry:
   %arrayidx = getelementptr inbounds i64, i64* %arr, i64 3
   %0 = load i64, i64* %arrayidx, align 8
@@ -3156,24 +4179,34 @@ entry:
   %1 = load i64, i64* %arrayidx1, align 8
   %vecinit2 = insertelement <2 x i64> %vecinit, i64 %1, i32 1
   ret <2 x i64> %vecinit2
-; P9BE-LABEL: fromDiffMemConsDll
-; P9LE-LABEL: fromDiffMemConsDll
-; P8BE-LABEL: fromDiffMemConsDll
-; P8LE-LABEL: fromDiffMemConsDll
-; P9BE: lxv v2
-; P9BE: blr
-; P9LE: lxv
-; P9LE: xxswapd v2
-; P9LE: blr
-; P8BE: lxvd2x
-; P8BE: xxswapd v2
-; P8BE-NEXT: blr
-; P8LE: lxvd2x v2
-; P8LE-NEXT: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <2 x i64> @fromDiffMemVarAll(i64* nocapture readonly %arr, i32 signext %elem) {
+; P9BE-LABEL: fromDiffMemVarAll:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    sldi r4, r4, 3
+; P9BE-NEXT:    lxvx v2, r3, r4
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffMemVarAll:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    sldi r4, r4, 3
+; P9LE-NEXT:    lxvx v2, r3, r4
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffMemVarAll:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    sldi r4, r4, 3
+; P8BE-NEXT:    lxvd2x v2, r3, r4
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffMemVarAll:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    sldi r4, r4, 3
+; P8LE-NEXT:    lxvd2x vs0, r3, r4
+; P8LE-NEXT:    xxswapd v2, vs0
+; P8LE-NEXT:    blr
 entry:
   %idxprom = sext i32 %elem to i64
   %arrayidx = getelementptr inbounds i64, i64* %arr, i64 %idxprom
@@ -3185,27 +4218,44 @@ entry:
   %1 = load i64, i64* %arrayidx2, align 8
   %vecinit3 = insertelement <2 x i64> %vecinit, i64 %1, i32 1
   ret <2 x i64> %vecinit3
-; P9BE-LABEL: fromDiffMemVarAll
-; P9LE-LABEL: fromDiffMemVarAll
-; P8BE-LABEL: fromDiffMemVarAll
-; P8LE-LABEL: fromDiffMemVarAll
-; P9BE: sldi
-; P9BE: lxvx v2
-; P9BE-NEXT: blr
-; P9LE: sldi
-; P9LE: lxvx v2
-; P9LE-NEXT: blr
-; P8BE: sldi
-; P8BE: lxvd2x v2
-; P8BE-NEXT: blr
-; P8LE: sldi
-; P8LE: lxvd2x
-; P8LE: xxswapd v2
-; P8LE-NEXT: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <2 x i64> @fromDiffMemVarDll(i64* nocapture readonly %arr, i32 signext %elem) {
+; P9BE-LABEL: fromDiffMemVarDll:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    sldi r4, r4, 3
+; P9BE-NEXT:    add r3, r3, r4
+; P9BE-NEXT:    addi r3, r3, -8
+; P9BE-NEXT:    lxvx v2, 0, r3
+; P9BE-NEXT:    xxswapd v2, v2
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffMemVarDll:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    sldi r4, r4, 3
+; P9LE-NEXT:    add r3, r3, r4
+; P9LE-NEXT:    addi r3, r3, -8
+; P9LE-NEXT:    lxvx v2, 0, r3
+; P9LE-NEXT:    xxswapd v2, v2
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffMemVarDll:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    sldi r4, r4, 3
+; P8BE-NEXT:    add r3, r3, r4
+; P8BE-NEXT:    addi r3, r3, -8
+; P8BE-NEXT:    lxvd2x v2, 0, r3
+; P8BE-NEXT:    xxswapd v2, v2
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffMemVarDll:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    sldi r4, r4, 3
+; P8LE-NEXT:    add r3, r3, r4
+; P8LE-NEXT:    addi r3, r3, -8
+; P8LE-NEXT:    lxvd2x v2, 0, r3
+; P8LE-NEXT:    blr
 entry:
   %idxprom = sext i32 %elem to i64
   %arrayidx = getelementptr inbounds i64, i64* %arr, i64 %idxprom
@@ -3217,29 +4267,41 @@ entry:
   %1 = load i64, i64* %arrayidx2, align 8
   %vecinit3 = insertelement <2 x i64> %vecinit, i64 %1, i32 1
   ret <2 x i64> %vecinit3
-; P9BE-LABEL: fromDiffMemVarDll
-; P9LE-LABEL: fromDiffMemVarDll
-; P8BE-LABEL: fromDiffMemVarDll
-; P8LE-LABEL: fromDiffMemVarDll
-; P9BE: sldi
-; P9BE: lxv
-; P9BE: xxswapd v2
-; P9BE-NEXT: blr
-; P9LE: sldi
-; P9LE: lxv
-; P9LE: xxswapd v2
-; P9LE-NEXT: blr
-; P8BE: sldi
-; P8BE: lxvd2x
-; P8BE: xxswapd v2
-; P8BE-NEXT: blr
-; P8LE: sldi
-; P8LE: lxvd2x v2
-; P8LE-NEXT: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <2 x i64> @fromRandMemConsll(i64* nocapture readonly %arr) {
+; P9BE-LABEL: fromRandMemConsll:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    ld r4, 32(r3)
+; P9BE-NEXT:    ld r3, 144(r3)
+; P9BE-NEXT:    mtvsrdd v2, r4, r3
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromRandMemConsll:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    ld r4, 32(r3)
+; P9LE-NEXT:    ld r3, 144(r3)
+; P9LE-NEXT:    mtvsrdd v2, r3, r4
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromRandMemConsll:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    ld r4, 144(r3)
+; P8BE-NEXT:    ld r3, 32(r3)
+; P8BE-NEXT:    mtvsrd f0, r4
+; P8BE-NEXT:    mtvsrd f1, r3
+; P8BE-NEXT:    xxmrghd v2, vs1, vs0
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromRandMemConsll:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    ld r4, 32(r3)
+; P8LE-NEXT:    ld r3, 144(r3)
+; P8LE-NEXT:    mtvsrd f0, r4
+; P8LE-NEXT:    mtvsrd f1, r3
+; P8LE-NEXT:    xxmrghd v2, vs1, vs0
+; P8LE-NEXT:    blr
 entry:
   %arrayidx = getelementptr inbounds i64, i64* %arr, i64 4
   %0 = load i64, i64* %arrayidx, align 8
@@ -3248,34 +4310,49 @@ entry:
   %1 = load i64, i64* %arrayidx1, align 8
   %vecinit2 = insertelement <2 x i64> %vecinit, i64 %1, i32 1
   ret <2 x i64> %vecinit2
-; P9BE-LABEL: fromRandMemConsll
-; P9LE-LABEL: fromRandMemConsll
-; P8BE-LABEL: fromRandMemConsll
-; P8LE-LABEL: fromRandMemConsll
-; P9BE: ld
-; P9BE: ld
-; P9BE: mtvsrdd v2
-; P9BE-NEXT: blr
-; P9LE: ld
-; P9LE: ld
-; P9LE: mtvsrdd v2
-; P9LE-NEXT: blr
-; P8BE: ld
-; P8BE: ld
-; P8BE-DAG: mtvsrd
-; P8BE-DAG: mtvsrd
-; P8BE: xxmrghd v2
-; P8BE-NEXT: blr
-; P8LE: ld
-; P8LE: ld
-; P8LE-DAG: mtvsrd
-; P8LE-DAG: mtvsrd
-; P8LE: xxmrghd v2
-; P8LE-NEXT: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <2 x i64> @fromRandMemVarll(i64* nocapture readonly %arr, i32 signext %elem) {
+; P9BE-LABEL: fromRandMemVarll:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    sldi r4, r4, 3
+; P9BE-NEXT:    add r3, r3, r4
+; P9BE-NEXT:    ld r4, 32(r3)
+; P9BE-NEXT:    ld r3, 8(r3)
+; P9BE-NEXT:    mtvsrdd v2, r4, r3
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromRandMemVarll:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    sldi r4, r4, 3
+; P9LE-NEXT:    add r3, r3, r4
+; P9LE-NEXT:    ld r4, 32(r3)
+; P9LE-NEXT:    ld r3, 8(r3)
+; P9LE-NEXT:    mtvsrdd v2, r3, r4
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromRandMemVarll:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    sldi r4, r4, 3
+; P8BE-NEXT:    add r3, r3, r4
+; P8BE-NEXT:    ld r4, 8(r3)
+; P8BE-NEXT:    ld r3, 32(r3)
+; P8BE-NEXT:    mtvsrd f0, r4
+; P8BE-NEXT:    mtvsrd f1, r3
+; P8BE-NEXT:    xxmrghd v2, vs1, vs0
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromRandMemVarll:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    sldi r4, r4, 3
+; P8LE-NEXT:    add r3, r3, r4
+; P8LE-NEXT:    ld r4, 32(r3)
+; P8LE-NEXT:    ld r3, 8(r3)
+; P8LE-NEXT:    mtvsrd f0, r4
+; P8LE-NEXT:    mtvsrd f1, r3
+; P8LE-NEXT:    xxmrghd v2, vs1, vs0
+; P8LE-NEXT:    blr
 entry:
   %add = add nsw i32 %elem, 4
   %idxprom = sext i32 %add to i64
@@ -3288,144 +4365,207 @@ entry:
   %1 = load i64, i64* %arrayidx3, align 8
   %vecinit4 = insertelement <2 x i64> %vecinit, i64 %1, i32 1
   ret <2 x i64> %vecinit4
-; P9BE-LABEL: fromRandMemVarll
-; P9LE-LABEL: fromRandMemVarll
-; P8BE-LABEL: fromRandMemVarll
-; P8LE-LABEL: fromRandMemVarll
-; P9BE: sldi
-; P9BE: ld
-; P9BE: ld
-; P9BE: mtvsrdd v2
-; P9BE-NEXT: blr
-; P9LE: sldi
-; P9LE: ld
-; P9LE: ld
-; P9LE: mtvsrdd v2
-; P9LE-NEXT: blr
-; P8BE: sldi
-; P8BE: ld
-; P8BE: ld
-; P8BE: mtvsrd
-; P8BE: mtvsrd
-; P8BE: xxmrghd v2
-; P8BE-NEXT: blr
-; P8LE: sldi
-; P8LE: ld
-; P8LE: ld
-; P8LE: mtvsrd
-; P8LE: mtvsrd
-; P8LE: xxmrghd v2
-; P8LE-NEXT: blr
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <2 x i64> @spltRegValll(i64 %val) {
+; P9BE-LABEL: spltRegValll:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    mtvsrdd v2, r3, r3
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: spltRegValll:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    mtvsrdd v2, r3, r3
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: spltRegValll:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    mtvsrd f0, r3
+; P8BE-NEXT:    xxspltd v2, vs0, 0
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: spltRegValll:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    mtvsrd f0, r3
+; P8LE-NEXT:    xxspltd v2, vs0, 0
+; P8LE-NEXT:    blr
 entry:
   %splat.splatinsert = insertelement <2 x i64> undef, i64 %val, i32 0
   %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
   ret <2 x i64> %splat.splat
-; P9BE-LABEL: spltRegValll
-; P9LE-LABEL: spltRegValll
-; P8BE-LABEL: spltRegValll
-; P8LE-LABEL: spltRegValll
-; P9BE: mtvsrdd v2, r3, r3
-; P9BE-NEXT: blr
-; P9LE: mtvsrdd v2, r3, r3
-; P9LE-NEXT: blr
-; P8BE: mtvsrd {{[vsf]+}}[[REG1:[0-9]+]], r3
-; P8BE: xxspltd v2, {{[vsf]+}}[[REG1]], 0
-; P8BE-NEXT: blr
-; P8LE: mtvsrd {{[vsf]+}}[[REG1:[0-9]+]], r3
-; P8LE: xxspltd v2, {{[vsf]+}}[[REG1]], 0
-; P8LE-NEXT: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <2 x i64> @spltMemValll(i64* nocapture readonly %ptr) {
+; P9BE-LABEL: spltMemValll:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lxvdsx v2, 0, r3
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: spltMemValll:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lxvdsx v2, 0, r3
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: spltMemValll:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    lxvdsx v2, 0, r3
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: spltMemValll:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    lxvdsx v2, 0, r3
+; P8LE-NEXT:    blr
 entry:
   %0 = load i64, i64* %ptr, align 8
   %splat.splatinsert = insertelement <2 x i64> undef, i64 %0, i32 0
   %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
   ret <2 x i64> %splat.splat
-; P9BE-LABEL: spltMemValll
-; P9LE-LABEL: spltMemValll
-; P8BE-LABEL: spltMemValll
-; P8LE-LABEL: spltMemValll
-; P9BE: lxvdsx v2
-; P9BE-NEXT: blr
-; P9LE: lxvdsx v2
-; P9LE-NEXT: blr
-; P8BE: lxvdsx v2
-; P8BE-NEXT: blr
-; P8LE: lxvdsx v2
-; P8LE-NEXT: blr
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <2 x i64> @spltCnstConvftoll() {
+; P9BE-LABEL: spltCnstConvftoll:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    addis r3, r2, .LCPI81_0 at toc@ha
+; P9BE-NEXT:    addi r3, r3, .LCPI81_0 at toc@l
+; P9BE-NEXT:    lxvx v2, 0, r3
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: spltCnstConvftoll:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    addis r3, r2, .LCPI81_0 at toc@ha
+; P9LE-NEXT:    addi r3, r3, .LCPI81_0 at toc@l
+; P9LE-NEXT:    lxvx v2, 0, r3
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: spltCnstConvftoll:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    addis r3, r2, .LCPI81_0 at toc@ha
+; P8BE-NEXT:    addi r3, r3, .LCPI81_0 at toc@l
+; P8BE-NEXT:    lxvd2x v2, 0, r3
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: spltCnstConvftoll:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    addis r3, r2, .LCPI81_0 at toc@ha
+; P8LE-NEXT:    addi r3, r3, .LCPI81_0 at toc@l
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    xxswapd v2, vs0
+; P8LE-NEXT:    blr
 entry:
   ret <2 x i64> <i64 4, i64 4>
-; P9BE-LABEL: spltCnstConvftoll
-; P9LE-LABEL: spltCnstConvftoll
-; P8BE-LABEL: spltCnstConvftoll
-; P8LE-LABEL: spltCnstConvftoll
-; P9BE: lxv
-; P9BE: blr
-; P9LE: lxv
-; P9LE: blr
-; P8BE: lxvd2x
-; P8BE: blr
-; P8LE: lxvd2x
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <2 x i64> @fromRegsConvftoll(float %a, float %b) {
+; P9BE-LABEL: fromRegsConvftoll:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P9BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; P9BE-NEXT:    xxmrghd vs0, vs1, vs2
+; P9BE-NEXT:    xvcvdpsxds v2, vs0
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromRegsConvftoll:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P9LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; P9LE-NEXT:    xxmrghd vs0, vs2, vs1
+; P9LE-NEXT:    xvcvdpsxds v2, vs0
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromRegsConvftoll:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P8BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; P8BE-NEXT:    xxmrghd vs0, vs1, vs2
+; P8BE-NEXT:    xvcvdpsxds v2, vs0
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromRegsConvftoll:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P8LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; P8LE-NEXT:    xxmrghd vs0, vs2, vs1
+; P8LE-NEXT:    xvcvdpsxds v2, vs0
+; P8LE-NEXT:    blr
 entry:
   %conv = fptosi float %a to i64
   %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
   %conv1 = fptosi float %b to i64
   %vecinit2 = insertelement <2 x i64> %vecinit, i64 %conv1, i32 1
   ret <2 x i64> %vecinit2
-; P9BE-LABEL: fromRegsConvftoll
-; P9LE-LABEL: fromRegsConvftoll
-; P8BE-LABEL: fromRegsConvftoll
-; P8LE-LABEL: fromRegsConvftoll
-; P9BE: xxmrghd
-; P9BE: xvcvdpsxds v2
-; P9BE-NEXT: blr
-; P9LE: xxmrghd
-; P9LE: xvcvdpsxds v2
-; P9LE-NEXT: blr
-; P8BE: xxmrghd
-; P8BE: xvcvdpsxds v2
-; P8BE-NEXT: blr
-; P8LE: xxmrghd
-; P8LE: xvcvdpsxds v2
-; P8LE-NEXT: blr
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <2 x i64> @fromDiffConstsConvftoll() {
+; P9BE-LABEL: fromDiffConstsConvftoll:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    addis r3, r2, .LCPI83_0 at toc@ha
+; P9BE-NEXT:    addi r3, r3, .LCPI83_0 at toc@l
+; P9BE-NEXT:    lxvx v2, 0, r3
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffConstsConvftoll:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    addis r3, r2, .LCPI83_0 at toc@ha
+; P9LE-NEXT:    addi r3, r3, .LCPI83_0 at toc@l
+; P9LE-NEXT:    lxvx v2, 0, r3
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffConstsConvftoll:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    addis r3, r2, .LCPI83_0 at toc@ha
+; P8BE-NEXT:    addi r3, r3, .LCPI83_0 at toc@l
+; P8BE-NEXT:    lxvd2x v2, 0, r3
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffConstsConvftoll:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    addis r3, r2, .LCPI83_0 at toc@ha
+; P8LE-NEXT:    addi r3, r3, .LCPI83_0 at toc@l
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    xxswapd v2, vs0
+; P8LE-NEXT:    blr
 entry:
   ret <2 x i64> <i64 24, i64 234>
-; P9BE-LABEL: fromDiffConstsConvftoll
-; P9LE-LABEL: fromDiffConstsConvftoll
-; P8BE-LABEL: fromDiffConstsConvftoll
-; P8LE-LABEL: fromDiffConstsConvftoll
-; P9BE: lxvx v2
-; P9BE: blr
-; P9LE: lxvx v2
-; P9LE: blr
-; P8BE: lxvd2x v2
-; P8BE: blr
-; P8LE: lxvd2x
-; P8LE: xxswapd v2
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <2 x i64> @fromDiffMemConsAConvftoll(float* nocapture readonly %ptr) {
+; P9BE-LABEL: fromDiffMemConsAConvftoll:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lfs f0, 0(r3)
+; P9BE-NEXT:    lfs f1, 4(r3)
+; P9BE-NEXT:    xxmrghd vs0, vs0, vs1
+; P9BE-NEXT:    xvcvdpsxds v2, vs0
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffMemConsAConvftoll:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lfs f0, 0(r3)
+; P9LE-NEXT:    lfs f1, 4(r3)
+; P9LE-NEXT:    xxmrghd vs0, vs1, vs0
+; P9LE-NEXT:    xvcvdpsxds v2, vs0
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffMemConsAConvftoll:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    lfsx f0, 0, r3
+; P8BE-NEXT:    lfs f1, 4(r3)
+; P8BE-NEXT:    xxmrghd vs0, vs0, vs1
+; P8BE-NEXT:    xvcvdpsxds v2, vs0
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffMemConsAConvftoll:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    lfsx f0, 0, r3
+; P8LE-NEXT:    lfs f1, 4(r3)
+; P8LE-NEXT:    xxmrghd vs0, vs1, vs0
+; P8LE-NEXT:    xvcvdpsxds v2, vs0
+; P8LE-NEXT:    blr
 entry:
   %0 = load float, float* %ptr, align 4
   %conv = fptosi float %0 to i64
@@ -3435,34 +4575,41 @@ entry:
   %conv2 = fptosi float %1 to i64
   %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
   ret <2 x i64> %vecinit3
-; P9BE-LABEL: fromDiffMemConsAConvftoll
-; P9LE-LABEL: fromDiffMemConsAConvftoll
-; P8BE-LABEL: fromDiffMemConsAConvftoll
-; P8LE-LABEL: fromDiffMemConsAConvftoll
-; P9BE: lfs
-; P9BE: lfs
-; P9BE: xxmrghd
-; P9BE-NEXT: xvcvdpsxds v2
-; P9BE-NEXT: blr
-; P9LE: lfs
-; P9LE: lfs
-; P9LE: xxmrghd
-; P9LE-NEXT: xvcvdpsxds v2
-; P9LE-NEXT: blr
-; P8BE: lfs
-; P8BE: lfs
-; P8BE: xxmrghd
-; P8BE-NEXT: xvcvdpsxds v2
-; P8BE-NEXT: blr
-; P8LE: lfs
-; P8LE: lfs
-; P8LE: xxmrghd
-; P8LE-NEXT: xvcvdpsxds v2
-; P8LE-NEXT: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <2 x i64> @fromDiffMemConsDConvftoll(float* nocapture readonly %ptr) {
+; P9BE-LABEL: fromDiffMemConsDConvftoll:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lfs f0, 12(r3)
+; P9BE-NEXT:    lfs f1, 8(r3)
+; P9BE-NEXT:    xxmrghd vs0, vs0, vs1
+; P9BE-NEXT:    xvcvdpsxds v2, vs0
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffMemConsDConvftoll:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lfs f0, 12(r3)
+; P9LE-NEXT:    lfs f1, 8(r3)
+; P9LE-NEXT:    xxmrghd vs0, vs1, vs0
+; P9LE-NEXT:    xvcvdpsxds v2, vs0
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffMemConsDConvftoll:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    lfs f0, 12(r3)
+; P8BE-NEXT:    lfs f1, 8(r3)
+; P8BE-NEXT:    xxmrghd vs0, vs0, vs1
+; P8BE-NEXT:    xvcvdpsxds v2, vs0
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffMemConsDConvftoll:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    lfs f0, 12(r3)
+; P8LE-NEXT:    lfs f1, 8(r3)
+; P8LE-NEXT:    xxmrghd vs0, vs1, vs0
+; P8LE-NEXT:    xvcvdpsxds v2, vs0
+; P8LE-NEXT:    blr
 entry:
   %arrayidx = getelementptr inbounds float, float* %ptr, i64 3
   %0 = load float, float* %arrayidx, align 4
@@ -3473,34 +4620,45 @@ entry:
   %conv2 = fptosi float %1 to i64
   %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
   ret <2 x i64> %vecinit3
-; P9BE-LABEL: fromDiffMemConsDConvftoll
-; P9LE-LABEL: fromDiffMemConsDConvftoll
-; P8BE-LABEL: fromDiffMemConsDConvftoll
-; P8LE-LABEL: fromDiffMemConsDConvftoll
-; P9BE: lfs
-; P9BE: lfs
-; P9BE: xxmrghd
-; P9BE-NEXT: xvcvdpsxds v2
-; P9BE-NEXT: blr
-; P9LE: lfs
-; P9LE: lfs
-; P9LE: xxmrghd
-; P9LE-NEXT: xvcvdpsxds v2
-; P9LE-NEXT: blr
-; P8BE: lfs
-; P8BE: lfs
-; P8BE: xxmrghd
-; P8BE-NEXT: xvcvdpsxds v2
-; P8BE-NEXT: blr
-; P8LE: lfs
-; P8LE: lfs
-; P8LE: xxmrghd
-; P8LE-NEXT: xvcvdpsxds v2
-; P8LE-NEXT: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <2 x i64> @fromDiffMemVarAConvftoll(float* nocapture readonly %arr, i32 signext %elem) {
+; P9BE-LABEL: fromDiffMemVarAConvftoll:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    sldi r4, r4, 2
+; P9BE-NEXT:    lfsux f0, r3, r4
+; P9BE-NEXT:    lfs f1, 4(r3)
+; P9BE-NEXT:    xxmrghd vs0, vs0, vs1
+; P9BE-NEXT:    xvcvdpsxds v2, vs0
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffMemVarAConvftoll:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    sldi r4, r4, 2
+; P9LE-NEXT:    lfsux f0, r3, r4
+; P9LE-NEXT:    lfs f1, 4(r3)
+; P9LE-NEXT:    xxmrghd vs0, vs1, vs0
+; P9LE-NEXT:    xvcvdpsxds v2, vs0
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffMemVarAConvftoll:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    sldi r4, r4, 2
+; P8BE-NEXT:    lfsux f0, r3, r4
+; P8BE-NEXT:    lfs f1, 4(r3)
+; P8BE-NEXT:    xxmrghd vs0, vs0, vs1
+; P8BE-NEXT:    xvcvdpsxds v2, vs0
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffMemVarAConvftoll:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    sldi r4, r4, 2
+; P8LE-NEXT:    lfsux f0, r3, r4
+; P8LE-NEXT:    lfs f1, 4(r3)
+; P8LE-NEXT:    xxmrghd vs0, vs1, vs0
+; P8LE-NEXT:    xvcvdpsxds v2, vs0
+; P8LE-NEXT:    blr
 entry:
   %idxprom = sext i32 %elem to i64
   %arrayidx = getelementptr inbounds float, float* %arr, i64 %idxprom
@@ -3514,38 +4672,45 @@ entry:
   %conv3 = fptosi float %1 to i64
   %vecinit4 = insertelement <2 x i64> %vecinit, i64 %conv3, i32 1
   ret <2 x i64> %vecinit4
-; P9BE-LABEL: fromDiffMemVarAConvftoll
-; P9LE-LABEL: fromDiffMemVarAConvftoll
-; P8BE-LABEL: fromDiffMemVarAConvftoll
-; P8LE-LABEL: fromDiffMemVarAConvftoll
-; P9BE: sldi
-; P9BE: lfsux
-; P9BE: lfs
-; P9BE: xxmrghd
-; P9BE-NEXT: xvcvdpsxds v2
-; P9BE-NEXT: blr
-; P9LE: sldi
-; P9LE: lfsux
-; P9LE: lfs
-; P9LE: xxmrghd
-; P9LE-NEXT: xvcvdpsxds v2
-; P9LE-NEXT: blr
-; P8BE: sldi
-; P8BE: lfsux
-; P8BE: lfs
-; P8BE: xxmrghd
-; P8BE-NEXT: xvcvdpsxds v2
-; P8BE-NEXT: blr
-; P8LE: sldi
-; P8LE: lfsux
-; P8LE: lfs
-; P8LE: xxmrghd
-; P8LE-NEXT: xvcvdpsxds v2
-; P8LE-NEXT: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <2 x i64> @fromDiffMemVarDConvftoll(float* nocapture readonly %arr, i32 signext %elem) {
+; P9BE-LABEL: fromDiffMemVarDConvftoll:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    sldi r4, r4, 2
+; P9BE-NEXT:    lfsux f0, r3, r4
+; P9BE-NEXT:    lfs f1, -4(r3)
+; P9BE-NEXT:    xxmrghd vs0, vs0, vs1
+; P9BE-NEXT:    xvcvdpsxds v2, vs0
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffMemVarDConvftoll:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    sldi r4, r4, 2
+; P9LE-NEXT:    lfsux f0, r3, r4
+; P9LE-NEXT:    lfs f1, -4(r3)
+; P9LE-NEXT:    xxmrghd vs0, vs1, vs0
+; P9LE-NEXT:    xvcvdpsxds v2, vs0
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffMemVarDConvftoll:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    sldi r4, r4, 2
+; P8BE-NEXT:    lfsux f0, r3, r4
+; P8BE-NEXT:    lfs f1, -4(r3)
+; P8BE-NEXT:    xxmrghd vs0, vs0, vs1
+; P8BE-NEXT:    xvcvdpsxds v2, vs0
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffMemVarDConvftoll:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    sldi r4, r4, 2
+; P8LE-NEXT:    lfsux f0, r3, r4
+; P8LE-NEXT:    lfs f1, -4(r3)
+; P8LE-NEXT:    xxmrghd vs0, vs1, vs0
+; P8LE-NEXT:    xvcvdpsxds v2, vs0
+; P8LE-NEXT:    blr
 entry:
   %idxprom = sext i32 %elem to i64
   %arrayidx = getelementptr inbounds float, float* %arr, i64 %idxprom
@@ -3559,181 +4724,249 @@ entry:
   %conv3 = fptosi float %1 to i64
   %vecinit4 = insertelement <2 x i64> %vecinit, i64 %conv3, i32 1
   ret <2 x i64> %vecinit4
-; P9BE-LABEL: fromDiffMemVarDConvftoll
-; P9LE-LABEL: fromDiffMemVarDConvftoll
-; P8BE-LABEL: fromDiffMemVarDConvftoll
-; P8LE-LABEL: fromDiffMemVarDConvftoll
-; P9BE: sldi
-; P9BE: lfsux
-; P9BE: lfs
-; P9BE: xxmrghd
-; P9BE-NEXT: xvcvdpsxds v2
-; P9BE-NEXT: blr
-; P9LE: sldi
-; P9LE: lfsux
-; P9LE: lfs
-; P9LE: xxmrghd
-; P9LE-NEXT: xvcvdpsxds v2
-; P9LE-NEXT: blr
-; P8BE: sldi
-; P8BE: lfsux
-; P8BE: lfs
-; P8BE: xxmrghd
-; P8BE-NEXT: xvcvdpsxds v2
-; P8BE-NEXT: blr
-; P8LE: sldi
-; P8LE: lfsux
-; P8LE: lfs
-; P8LE: xxmrghd
-; P8LE-NEXT: xvcvdpsxds v2
-; P8LE-NEXT: blr
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <2 x i64> @spltRegValConvftoll(float %val) {
+; P9BE-LABEL: spltRegValConvftoll:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    xscvdpsxds f0, f1
+; P9BE-NEXT:    xxspltd v2, f0, 0
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: spltRegValConvftoll:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    xscvdpsxds f0, f1
+; P9LE-NEXT:    xxspltd v2, f0, 0
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: spltRegValConvftoll:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    xscvdpsxds f0, f1
+; P8BE-NEXT:    xxspltd v2, f0, 0
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: spltRegValConvftoll:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    xscvdpsxds f0, f1
+; P8LE-NEXT:    xxspltd v2, f0, 0
+; P8LE-NEXT:    blr
 entry:
   %conv = fptosi float %val to i64
   %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
   %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
   ret <2 x i64> %splat.splat
-; P9BE-LABEL: spltRegValConvftoll
-; P9LE-LABEL: spltRegValConvftoll
-; P8BE-LABEL: spltRegValConvftoll
-; P8LE-LABEL: spltRegValConvftoll
-; P9BE: xscvdpsxds
-; P9BE-NEXT: xxspltd v2
-; P9BE-NEXT: blr
-; P9LE: xscvdpsxds
-; P9LE-NEXT: xxspltd v2
-; P9LE-NEXT: blr
-; P8BE: xscvdpsxds
-; P8BE-NEXT: xxspltd v2
-; P8BE-NEXT: blr
-; P8LE: xscvdpsxds
-; P8LE-NEXT: xxspltd v2
-; P8LE-NEXT: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <2 x i64> @spltMemValConvftoll(float* nocapture readonly %ptr) {
+; P9BE-LABEL: spltMemValConvftoll:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lfs f0, 0(r3)
+; P9BE-NEXT:    xscvdpsxds f0, f0
+; P9BE-NEXT:    xxspltd v2, f0, 0
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: spltMemValConvftoll:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lfs f0, 0(r3)
+; P9LE-NEXT:    xscvdpsxds f0, f0
+; P9LE-NEXT:    xxspltd v2, f0, 0
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: spltMemValConvftoll:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    lfsx f0, 0, r3
+; P8BE-NEXT:    xscvdpsxds f0, f0
+; P8BE-NEXT:    xxspltd v2, f0, 0
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: spltMemValConvftoll:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    lfsx f0, 0, r3
+; P8LE-NEXT:    xscvdpsxds f0, f0
+; P8LE-NEXT:    xxspltd v2, f0, 0
+; P8LE-NEXT:    blr
 entry:
   %0 = load float, float* %ptr, align 4
   %conv = fptosi float %0 to i64
   %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
   %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
   ret <2 x i64> %splat.splat
-; P9BE-LABEL: spltMemValConvftoll
-; P9LE-LABEL: spltMemValConvftoll
-; P8BE-LABEL: spltMemValConvftoll
-; P8LE-LABEL: spltMemValConvftoll
-; P9BE: lfs
-; P9BE-NEXT: xscvdpsxds
-; P9BE-NEXT: xxspltd v2
-; P9BE-NEXT: blr
-; P9LE: lfs
-; P9LE-NEXT: xscvdpsxds
-; P9LE-NEXT: xxspltd v2
-; P9LE-NEXT: blr
-; P8BE: lfs
-; P8BE-NEXT: xscvdpsxds
-; P8BE-NEXT: xxspltd v2
-; P8BE-NEXT: blr
-; P8LE: lfs
-; P8LE-NEXT: xscvdpsxds
-; P8LE-NEXT: xxspltd v2
-; P8LE-NEXT: blr
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <2 x i64> @spltCnstConvdtoll() {
+; P9BE-LABEL: spltCnstConvdtoll:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    addis r3, r2, .LCPI90_0 at toc@ha
+; P9BE-NEXT:    addi r3, r3, .LCPI90_0 at toc@l
+; P9BE-NEXT:    lxvx v2, 0, r3
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: spltCnstConvdtoll:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    addis r3, r2, .LCPI90_0 at toc@ha
+; P9LE-NEXT:    addi r3, r3, .LCPI90_0 at toc@l
+; P9LE-NEXT:    lxvx v2, 0, r3
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: spltCnstConvdtoll:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    addis r3, r2, .LCPI90_0 at toc@ha
+; P8BE-NEXT:    addi r3, r3, .LCPI90_0 at toc@l
+; P8BE-NEXT:    lxvd2x v2, 0, r3
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: spltCnstConvdtoll:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    addis r3, r2, .LCPI90_0 at toc@ha
+; P8LE-NEXT:    addi r3, r3, .LCPI90_0 at toc@l
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    xxswapd v2, vs0
+; P8LE-NEXT:    blr
 entry:
   ret <2 x i64> <i64 4, i64 4>
-; P9BE-LABEL: spltCnstConvdtoll
-; P9LE-LABEL: spltCnstConvdtoll
-; P8BE-LABEL: spltCnstConvdtoll
-; P8LE-LABEL: spltCnstConvdtoll
-; P9BE: lxv
-; P9BE: blr
-; P9LE: lxv
-; P9LE: blr
-; P8BE: lxvd2x
-; P8BE: blr
-; P8LE: lxvd2x
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <2 x i64> @fromRegsConvdtoll(double %a, double %b) {
+; P9BE-LABEL: fromRegsConvdtoll:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P9BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; P9BE-NEXT:    xxmrghd vs0, vs1, vs2
+; P9BE-NEXT:    xvcvdpsxds v2, vs0
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromRegsConvdtoll:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P9LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; P9LE-NEXT:    xxmrghd vs0, vs2, vs1
+; P9LE-NEXT:    xvcvdpsxds v2, vs0
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromRegsConvdtoll:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P8BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; P8BE-NEXT:    xxmrghd vs0, vs1, vs2
+; P8BE-NEXT:    xvcvdpsxds v2, vs0
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromRegsConvdtoll:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P8LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; P8LE-NEXT:    xxmrghd vs0, vs2, vs1
+; P8LE-NEXT:    xvcvdpsxds v2, vs0
+; P8LE-NEXT:    blr
 entry:
   %conv = fptosi double %a to i64
   %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
   %conv1 = fptosi double %b to i64
   %vecinit2 = insertelement <2 x i64> %vecinit, i64 %conv1, i32 1
   ret <2 x i64> %vecinit2
-; P9BE-LABEL: fromRegsConvdtoll
-; P9LE-LABEL: fromRegsConvdtoll
-; P8BE-LABEL: fromRegsConvdtoll
-; P8LE-LABEL: fromRegsConvdtoll
-; P9BE: xxmrghd
-; P9BE-NEXT: xvcvdpsxds
-; P9BE-NEXT: blr
-; P9LE: xxmrghd
-; P9LE-NEXT: xvcvdpsxds
-; P9LE-NEXT: blr
-; P8BE: xxmrghd
-; P8BE-NEXT: xvcvdpsxds
-; P8BE-NEXT: blr
-; P8LE: xxmrghd
-; P8LE-NEXT: xvcvdpsxds
-; P8LE-NEXT: blr
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <2 x i64> @fromDiffConstsConvdtoll() {
+; P9BE-LABEL: fromDiffConstsConvdtoll:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    addis r3, r2, .LCPI92_0 at toc@ha
+; P9BE-NEXT:    addi r3, r3, .LCPI92_0 at toc@l
+; P9BE-NEXT:    lxvx v2, 0, r3
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffConstsConvdtoll:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    addis r3, r2, .LCPI92_0 at toc@ha
+; P9LE-NEXT:    addi r3, r3, .LCPI92_0 at toc@l
+; P9LE-NEXT:    lxvx v2, 0, r3
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffConstsConvdtoll:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    addis r3, r2, .LCPI92_0 at toc@ha
+; P8BE-NEXT:    addi r3, r3, .LCPI92_0 at toc@l
+; P8BE-NEXT:    lxvd2x v2, 0, r3
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffConstsConvdtoll:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    addis r3, r2, .LCPI92_0 at toc@ha
+; P8LE-NEXT:    addi r3, r3, .LCPI92_0 at toc@l
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    xxswapd v2, vs0
+; P8LE-NEXT:    blr
 entry:
   ret <2 x i64> <i64 24, i64 234>
-; P9BE-LABEL: fromDiffConstsConvdtoll
-; P9LE-LABEL: fromDiffConstsConvdtoll
-; P8BE-LABEL: fromDiffConstsConvdtoll
-; P8LE-LABEL: fromDiffConstsConvdtoll
-; P9BE: lxv
-; P9BE: blr
-; P9LE: lxv
-; P9LE: blr
-; P8BE: lxvd2x
-; P8BE: blr
-; P8LE: lxvd2x
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <2 x i64> @fromDiffMemConsAConvdtoll(double* nocapture readonly %ptr) {
+; P9BE-LABEL: fromDiffMemConsAConvdtoll:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lxv vs0, 0(r3)
+; P9BE-NEXT:    xvcvdpsxds v2, vs0
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffMemConsAConvdtoll:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lxv vs0, 0(r3)
+; P9LE-NEXT:    xvcvdpsxds v2, vs0
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffMemConsAConvdtoll:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    lxvd2x vs0, 0, r3
+; P8BE-NEXT:    xvcvdpsxds v2, vs0
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffMemConsAConvdtoll:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    xxswapd vs0, vs0
+; P8LE-NEXT:    xvcvdpsxds v2, vs0
+; P8LE-NEXT:    blr
 entry:
   %0 = bitcast double* %ptr to <2 x double>*
   %1 = load <2 x double>, <2 x double>* %0, align 8
   %2 = fptosi <2 x double> %1 to <2 x i64>
   ret <2 x i64> %2
-; P9BE-LABEL: fromDiffMemConsAConvdtoll
-; P9LE-LABEL: fromDiffMemConsAConvdtoll
-; P8BE-LABEL: fromDiffMemConsAConvdtoll
-; P8LE-LABEL: fromDiffMemConsAConvdtoll
-; P9BE: lxv
-; P9BE-NEXT: xvcvdpsxds v2
-; P9BE-NEXT: blr
-; P9LE: lxv
-; P9LE-NEXT: xvcvdpsxds v2
-; P9LE-NEXT: blr
-; P8BE: lxvd2x
-; P8BE-NEXT: xvcvdpsxds v2
-; P8BE-NEXT: blr
-; P8LE: lxvd2x
-; P8LE: xxswapd
-; P8LE-NEXT: xvcvdpsxds v2
-; P8LE-NEXT: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <2 x i64> @fromDiffMemConsDConvdtoll(double* nocapture readonly %ptr) {
+; P9BE-LABEL: fromDiffMemConsDConvdtoll:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lxv vs0, 16(r3)
+; P9BE-NEXT:    xxswapd vs0, vs0
+; P9BE-NEXT:    xvcvdpsxds v2, vs0
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffMemConsDConvdtoll:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lxv vs0, 16(r3)
+; P9LE-NEXT:    xxswapd vs0, vs0
+; P9LE-NEXT:    xvcvdpsxds v2, vs0
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffMemConsDConvdtoll:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    addi r3, r3, 16
+; P8BE-NEXT:    lxvd2x vs0, 0, r3
+; P8BE-NEXT:    xxswapd vs0, vs0
+; P8BE-NEXT:    xvcvdpsxds v2, vs0
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffMemConsDConvdtoll:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    addi r3, r3, 16
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    xvcvdpsxds v2, vs0
+; P8LE-NEXT:    blr
 entry:
   %arrayidx = getelementptr inbounds double, double* %ptr, i64 3
   %0 = load double, double* %arrayidx, align 8
@@ -3744,29 +4977,38 @@ entry:
   %conv2 = fptosi double %1 to i64
   %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
   ret <2 x i64> %vecinit3
-; P9BE-LABEL: fromDiffMemConsDConvdtoll
-; P9LE-LABEL: fromDiffMemConsDConvdtoll
-; P8BE-LABEL: fromDiffMemConsDConvdtoll
-; P8LE-LABEL: fromDiffMemConsDConvdtoll
-; P9BE: lxv
-; P9BE-NEXT: xxswapd
-; P9BE-NEXT: xvcvdpsxds v2
-; P9BE-NEXT: blr
-; P9LE: lxv
-; P9LE-NEXT: xxswapd
-; P9LE-NEXT: xvcvdpsxds v2
-; P9LE-NEXT: blr
-; P8BE: lxvd2x
-; P8BE-NEXT: xxswapd
-; P8BE-NEXT: xvcvdpsxds v2
-; P8BE-NEXT: blr
-; P8LE: lxvd2x
-; P8LE-NEXT: xvcvdpsxds v2
-; P8LE-NEXT: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <2 x i64> @fromDiffMemVarAConvdtoll(double* nocapture readonly %arr, i32 signext %elem) {
+; P9BE-LABEL: fromDiffMemVarAConvdtoll:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    sldi r4, r4, 3
+; P9BE-NEXT:    lxvx vs0, r3, r4
+; P9BE-NEXT:    xvcvdpsxds v2, vs0
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffMemVarAConvdtoll:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    sldi r4, r4, 3
+; P9LE-NEXT:    lxvx vs0, r3, r4
+; P9LE-NEXT:    xvcvdpsxds v2, vs0
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffMemVarAConvdtoll:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    sldi r4, r4, 3
+; P8BE-NEXT:    lxvd2x vs0, r3, r4
+; P8BE-NEXT:    xvcvdpsxds v2, vs0
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffMemVarAConvdtoll:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    sldi r4, r4, 3
+; P8LE-NEXT:    lxvd2x vs0, r3, r4
+; P8LE-NEXT:    xxswapd vs0, vs0
+; P8LE-NEXT:    xvcvdpsxds v2, vs0
+; P8LE-NEXT:    blr
 entry:
   %idxprom = sext i32 %elem to i64
   %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom
@@ -3780,31 +5022,48 @@ entry:
   %conv3 = fptosi double %1 to i64
   %vecinit4 = insertelement <2 x i64> %vecinit, i64 %conv3, i32 1
   ret <2 x i64> %vecinit4
-; P9BE-LABEL: fromDiffMemVarAConvdtoll
-; P9LE-LABEL: fromDiffMemVarAConvdtoll
-; P8BE-LABEL: fromDiffMemVarAConvdtoll
-; P8LE-LABEL: fromDiffMemVarAConvdtoll
-; P9BE: sldi
-; P9BE: lxvx
-; P9BE-NEXT: xvcvdpsxds v2
-; P9BE-NEXT: blr
-; P9LE: sldi
-; P9LE: lxvx
-; P9LE-NEXT: xvcvdpsxds v2
-; P9LE-NEXT: blr
-; P8BE: sldi
-; P8BE: lxvd2x
-; P8BE-NEXT: xvcvdpsxds v2
-; P8BE-NEXT: blr
-; P8LE: sldi
-; P8LE: lxvd2x
-; P8LE-NEXT: xxswapd
-; P8LE-NEXT: xvcvdpsxds v2
-; P8LE-NEXT: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <2 x i64> @fromDiffMemVarDConvdtoll(double* nocapture readonly %arr, i32 signext %elem) {
+; P9BE-LABEL: fromDiffMemVarDConvdtoll:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    sldi r4, r4, 3
+; P9BE-NEXT:    add r3, r3, r4
+; P9BE-NEXT:    addi r3, r3, -8
+; P9BE-NEXT:    lxvx vs0, 0, r3
+; P9BE-NEXT:    xxswapd vs0, vs0
+; P9BE-NEXT:    xvcvdpsxds v2, vs0
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffMemVarDConvdtoll:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    sldi r4, r4, 3
+; P9LE-NEXT:    add r3, r3, r4
+; P9LE-NEXT:    addi r3, r3, -8
+; P9LE-NEXT:    lxvx vs0, 0, r3
+; P9LE-NEXT:    xxswapd vs0, vs0
+; P9LE-NEXT:    xvcvdpsxds v2, vs0
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffMemVarDConvdtoll:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    sldi r4, r4, 3
+; P8BE-NEXT:    add r3, r3, r4
+; P8BE-NEXT:    addi r3, r3, -8
+; P8BE-NEXT:    lxvd2x vs0, 0, r3
+; P8BE-NEXT:    xxswapd vs0, vs0
+; P8BE-NEXT:    xvcvdpsxds v2, vs0
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffMemVarDConvdtoll:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    sldi r4, r4, 3
+; P8LE-NEXT:    add r3, r3, r4
+; P8LE-NEXT:    addi r3, r3, -8
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    xvcvdpsxds v2, vs0
+; P8LE-NEXT:    blr
 entry:
   %idxprom = sext i32 %elem to i64
   %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom
@@ -3818,216 +5077,312 @@ entry:
   %conv3 = fptosi double %1 to i64
   %vecinit4 = insertelement <2 x i64> %vecinit, i64 %conv3, i32 1
   ret <2 x i64> %vecinit4
-; P9BE-LABEL: fromDiffMemVarDConvdtoll
-; P9LE-LABEL: fromDiffMemVarDConvdtoll
-; P8BE-LABEL: fromDiffMemVarDConvdtoll
-; P8LE-LABEL: fromDiffMemVarDConvdtoll
-; P9BE: sldi
-; P9BE: lxv
-; P9BE-NEXT: xxswapd
-; P9BE-NEXT: xvcvdpsxds v2
-; P9BE-NEXT: blr
-; P9LE: sldi
-; P9LE: lxv
-; P9LE-NEXT: xxswapd
-; P9LE-NEXT: xvcvdpsxds v2
-; P9LE-NEXT: blr
-; P8BE: sldi
-; P8BE: lxvd2x
-; P8BE-NEXT: xxswapd
-; P8BE-NEXT: xvcvdpsxds v2
-; P8BE-NEXT: blr
-; P8LE: sldi
-; P8LE: lxvd2x
-; P8LE-NEXT: xvcvdpsxds v2
-; P8LE-NEXT: blr
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <2 x i64> @spltRegValConvdtoll(double %val) {
+; P9BE-LABEL: spltRegValConvdtoll:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    xscvdpsxds f0, f1
+; P9BE-NEXT:    xxspltd v2, vs0, 0
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: spltRegValConvdtoll:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    xscvdpsxds f0, f1
+; P9LE-NEXT:    xxspltd v2, vs0, 0
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: spltRegValConvdtoll:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    xscvdpsxds f0, f1
+; P8BE-NEXT:    xxspltd v2, vs0, 0
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: spltRegValConvdtoll:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    xscvdpsxds f0, f1
+; P8LE-NEXT:    xxspltd v2, vs0, 0
+; P8LE-NEXT:    blr
 entry:
   %conv = fptosi double %val to i64
   %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
   %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
   ret <2 x i64> %splat.splat
-; P9BE-LABEL: spltRegValConvdtoll
-; P9LE-LABEL: spltRegValConvdtoll
-; P8BE-LABEL: spltRegValConvdtoll
-; P8LE-LABEL: spltRegValConvdtoll
-; P9BE: xscvdpsxds
-; P9BE-NEXT: xxspltd v2
-; P9BE-NEXT: blr
-; P9LE: xscvdpsxds
-; P9LE-NEXT: xxspltd v2
-; P9LE-NEXT: blr
-; P8BE: xscvdpsxds
-; P8BE-NEXT: xxspltd v2
-; P8BE-NEXT: blr
-; P8LE: xscvdpsxds
-; P8LE-NEXT: xxspltd v2
-; P8LE-NEXT: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <2 x i64> @spltMemValConvdtoll(double* nocapture readonly %ptr) {
+; P9BE-LABEL: spltMemValConvdtoll:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lxvdsx vs0, 0, r3
+; P9BE-NEXT:    xvcvdpsxds v2, vs0
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: spltMemValConvdtoll:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lxvdsx vs0, 0, r3
+; P9LE-NEXT:    xvcvdpsxds v2, vs0
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: spltMemValConvdtoll:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    lxvdsx vs0, 0, r3
+; P8BE-NEXT:    xvcvdpsxds v2, vs0
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: spltMemValConvdtoll:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    lxvdsx vs0, 0, r3
+; P8LE-NEXT:    xvcvdpsxds v2, vs0
+; P8LE-NEXT:    blr
 entry:
   %0 = load double, double* %ptr, align 8
   %conv = fptosi double %0 to i64
   %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
   %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
   ret <2 x i64> %splat.splat
-; P9BE-LABEL: spltMemValConvdtoll
-; P9LE-LABEL: spltMemValConvdtoll
-; P8BE-LABEL: spltMemValConvdtoll
-; P8LE-LABEL: spltMemValConvdtoll
-; P9BE: lxvdsx
-; P9BE-NEXT: xvcvdpsxds
-; P9BE-NEXT: blr
-; P9LE: lxvdsx
-; P9LE-NEXT: xvcvdpsxds
-; P9LE-NEXT: blr
-; P8BE: lxvdsx
-; P8BE-NEXT: xvcvdpsxds
-; P8BE-NEXT: blr
-; P8LE: lxvdsx
-; P8LE-NEXT: xvcvdpsxds
-; P8LE-NEXT: blr
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <2 x i64> @allZeroull() {
+; P9BE-LABEL: allZeroull:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    xxlxor v2, v2, v2
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: allZeroull:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    xxlxor v2, v2, v2
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: allZeroull:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    xxlxor v2, v2, v2
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: allZeroull:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    xxlxor v2, v2, v2
+; P8LE-NEXT:    blr
 entry:
   ret <2 x i64> zeroinitializer
-; P9BE-LABEL: allZeroull
-; P9LE-LABEL: allZeroull
-; P8BE-LABEL: allZeroull
-; P8LE-LABEL: allZeroull
-; P9BE: xxlxor v2, v2, v2
-; P9BE: blr
-; P9LE: xxlxor v2, v2, v2
-; P9LE: blr
-; P8BE: xxlxor v2, v2, v2
-; P8BE: blr
-; P8LE: xxlxor v2, v2, v2
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <2 x i64> @allOneull() {
+; P9BE-LABEL: allOneull:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    xxspltib v2, 255
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: allOneull:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    xxspltib v2, 255
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: allOneull:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    vspltisb v2, -1
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: allOneull:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    vspltisb v2, -1
+; P8LE-NEXT:    blr
 entry:
   ret <2 x i64> <i64 -1, i64 -1>
-; P9BE-LABEL: allOneull
-; P9LE-LABEL: allOneull
-; P8BE-LABEL: allOneull
-; P8LE-LABEL: allOneull
-; P9BE: xxspltib v2, 255
-; P9BE: blr
-; P9LE: xxspltib v2, 255
-; P9LE: blr
-; P8BE: vspltisb v2, -1
-; P8BE: blr
-; P8LE: vspltisb v2, -1
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <2 x i64> @spltConst1ull() {
+; P9BE-LABEL: spltConst1ull:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    addis r3, r2, .LCPI101_0 at toc@ha
+; P9BE-NEXT:    addi r3, r3, .LCPI101_0 at toc@l
+; P9BE-NEXT:    lxvx v2, 0, r3
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: spltConst1ull:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    addis r3, r2, .LCPI101_0 at toc@ha
+; P9LE-NEXT:    addi r3, r3, .LCPI101_0 at toc@l
+; P9LE-NEXT:    lxvx v2, 0, r3
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: spltConst1ull:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    addis r3, r2, .LCPI101_0 at toc@ha
+; P8BE-NEXT:    addi r3, r3, .LCPI101_0 at toc@l
+; P8BE-NEXT:    lxvd2x v2, 0, r3
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: spltConst1ull:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    addis r3, r2, .LCPI101_0 at toc@ha
+; P8LE-NEXT:    addi r3, r3, .LCPI101_0 at toc@l
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    xxswapd v2, vs0
+; P8LE-NEXT:    blr
 entry:
   ret <2 x i64> <i64 1, i64 1>
-; P9BE-LABEL: spltConst1ull
-; P9LE-LABEL: spltConst1ull
-; P8BE-LABEL: spltConst1ull
-; P8LE-LABEL: spltConst1ull
-; P9BE: lxv
-; P9BE: blr
-; P9LE: lxv
-; P9LE: blr
-; P8BE: lxvd2x
-; P8BE: blr
-; P8LE: lxvd2x
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <2 x i64> @spltConst16kull() {
+; P9BE-LABEL: spltConst16kull:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    addis r3, r2, .LCPI102_0 at toc@ha
+; P9BE-NEXT:    addi r3, r3, .LCPI102_0 at toc@l
+; P9BE-NEXT:    lxvx v2, 0, r3
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: spltConst16kull:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    addis r3, r2, .LCPI102_0 at toc@ha
+; P9LE-NEXT:    addi r3, r3, .LCPI102_0 at toc@l
+; P9LE-NEXT:    lxvx v2, 0, r3
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: spltConst16kull:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    addis r3, r2, .LCPI102_0 at toc@ha
+; P8BE-NEXT:    addi r3, r3, .LCPI102_0 at toc@l
+; P8BE-NEXT:    lxvd2x v2, 0, r3
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: spltConst16kull:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    addis r3, r2, .LCPI102_0 at toc@ha
+; P8LE-NEXT:    addi r3, r3, .LCPI102_0 at toc@l
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    xxswapd v2, vs0
+; P8LE-NEXT:    blr
 entry:
   ret <2 x i64> <i64 32767, i64 32767>
-; P9BE-LABEL: spltConst16kull
-; P9LE-LABEL: spltConst16kull
-; P8BE-LABEL: spltConst16kull
-; P8LE-LABEL: spltConst16kull
-; P9BE: lxv
-; P9BE: blr
-; P9LE: lxv
-; P9LE: blr
-; P8BE: lxvd2x
-; P8BE: blr
-; P8LE: lxvd2x
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <2 x i64> @spltConst32kull() {
+; P9BE-LABEL: spltConst32kull:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    addis r3, r2, .LCPI103_0 at toc@ha
+; P9BE-NEXT:    addi r3, r3, .LCPI103_0 at toc@l
+; P9BE-NEXT:    lxvx v2, 0, r3
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: spltConst32kull:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    addis r3, r2, .LCPI103_0 at toc@ha
+; P9LE-NEXT:    addi r3, r3, .LCPI103_0 at toc@l
+; P9LE-NEXT:    lxvx v2, 0, r3
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: spltConst32kull:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    addis r3, r2, .LCPI103_0 at toc@ha
+; P8BE-NEXT:    addi r3, r3, .LCPI103_0 at toc@l
+; P8BE-NEXT:    lxvd2x v2, 0, r3
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: spltConst32kull:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    addis r3, r2, .LCPI103_0 at toc@ha
+; P8LE-NEXT:    addi r3, r3, .LCPI103_0 at toc@l
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    xxswapd v2, vs0
+; P8LE-NEXT:    blr
 entry:
   ret <2 x i64> <i64 65535, i64 65535>
-; P9BE-LABEL: spltConst32kull
-; P9LE-LABEL: spltConst32kull
-; P8BE-LABEL: spltConst32kull
-; P8LE-LABEL: spltConst32kull
-; P9BE: lxv
-; P9BE: blr
-; P9LE: lxv
-; P9LE: blr
-; P8BE: lxvd2x
-; P8BE: blr
-; P8LE: lxvd2x
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <2 x i64> @fromRegsull(i64 %a, i64 %b) {
+; P9BE-LABEL: fromRegsull:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    mtvsrdd v2, r3, r4
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromRegsull:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    mtvsrdd v2, r4, r3
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromRegsull:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    mtvsrd f0, r4
+; P8BE-NEXT:    mtvsrd f1, r3
+; P8BE-NEXT:    xxmrghd v2, vs1, vs0
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromRegsull:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    mtvsrd f0, r3
+; P8LE-NEXT:    mtvsrd f1, r4
+; P8LE-NEXT:    xxmrghd v2, vs1, vs0
+; P8LE-NEXT:    blr
 entry:
   %vecinit = insertelement <2 x i64> undef, i64 %a, i32 0
   %vecinit1 = insertelement <2 x i64> %vecinit, i64 %b, i32 1
   ret <2 x i64> %vecinit1
-; P9BE-LABEL: fromRegsull
-; P9LE-LABEL: fromRegsull
-; P8BE-LABEL: fromRegsull
-; P8LE-LABEL: fromRegsull
-; P9BE: mtvsrdd v2, r3, r4
-; P9BE: blr
-; P9LE: mtvsrdd v2, r4, r3
-; P9LE: blr
-; P8BE-DAG: mtvsrd {{[vsf0-9]+}}, r3
-; P8BE-DAG: mtvsrd {{[vsf0-9]+}}, r4
-; P8BE: xxmrghd v2
-; P8BE: blr
-; P8LE-DAG: mtvsrd {{[vsf0-9]+}}, r3
-; P8LE-DAG: mtvsrd {{[vsf0-9]+}}, r4
-; P8LE: xxmrghd v2
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <2 x i64> @fromDiffConstsull() {
+; P9BE-LABEL: fromDiffConstsull:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    addis r3, r2, .LCPI105_0 at toc@ha
+; P9BE-NEXT:    addi r3, r3, .LCPI105_0 at toc@l
+; P9BE-NEXT:    lxvx v2, 0, r3
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffConstsull:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    addis r3, r2, .LCPI105_0 at toc@ha
+; P9LE-NEXT:    addi r3, r3, .LCPI105_0 at toc@l
+; P9LE-NEXT:    lxvx v2, 0, r3
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffConstsull:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    addis r3, r2, .LCPI105_0 at toc@ha
+; P8BE-NEXT:    addi r3, r3, .LCPI105_0 at toc@l
+; P8BE-NEXT:    lxvd2x v2, 0, r3
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffConstsull:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    addis r3, r2, .LCPI105_0 at toc@ha
+; P8LE-NEXT:    addi r3, r3, .LCPI105_0 at toc@l
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    xxswapd v2, vs0
+; P8LE-NEXT:    blr
 entry:
   ret <2 x i64> <i64 242, i64 -113>
-; P9BE-LABEL: fromDiffConstsull
-; P9LE-LABEL: fromDiffConstsull
-; P8BE-LABEL: fromDiffConstsull
-; P8LE-LABEL: fromDiffConstsull
-; P9BE: lxv
-; P9BE: blr
-; P9LE: lxv
-; P9LE: blr
-; P8BE: lxvd2x
-; P8BE: blr
-; P8LE: lxvd2x
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <2 x i64> @fromDiffMemConsAull(i64* nocapture readonly %arr) {
+; P9BE-LABEL: fromDiffMemConsAull:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lxv v2, 0(r3)
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffMemConsAull:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lxv v2, 0(r3)
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffMemConsAull:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    lxvd2x v2, 0, r3
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffMemConsAull:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    xxswapd v2, vs0
+; P8LE-NEXT:    blr
 entry:
   %0 = load i64, i64* %arr, align 8
   %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0
@@ -4035,23 +5390,34 @@ entry:
   %1 = load i64, i64* %arrayidx1, align 8
   %vecinit2 = insertelement <2 x i64> %vecinit, i64 %1, i32 1
   ret <2 x i64> %vecinit2
-; P9BE-LABEL: fromDiffMemConsAull
-; P9LE-LABEL: fromDiffMemConsAull
-; P8BE-LABEL: fromDiffMemConsAull
-; P8LE-LABEL: fromDiffMemConsAull
-; P9BE: lxv v2
-; P9BE: blr
-; P9LE: lxv v2
-; P9LE: blr
-; P8BE: lxvd2x v2
-; P8BE: blr
-; P8LE: lxvd2x
-; P8LE: xxswapd v2
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <2 x i64> @fromDiffMemConsDull(i64* nocapture readonly %arr) {
+; P9BE-LABEL: fromDiffMemConsDull:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lxv v2, 16(r3)
+; P9BE-NEXT:    xxswapd v2, v2
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffMemConsDull:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lxv v2, 16(r3)
+; P9LE-NEXT:    xxswapd v2, v2
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffMemConsDull:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    addi r3, r3, 16
+; P8BE-NEXT:    lxvd2x v2, 0, r3
+; P8BE-NEXT:    xxswapd v2, v2
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffMemConsDull:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    addi r3, r3, 16
+; P8LE-NEXT:    lxvd2x v2, 0, r3
+; P8LE-NEXT:    blr
 entry:
   %arrayidx = getelementptr inbounds i64, i64* %arr, i64 3
   %0 = load i64, i64* %arrayidx, align 8
@@ -4060,24 +5426,34 @@ entry:
   %1 = load i64, i64* %arrayidx1, align 8
   %vecinit2 = insertelement <2 x i64> %vecinit, i64 %1, i32 1
   ret <2 x i64> %vecinit2
-; P9BE-LABEL: fromDiffMemConsDull
-; P9LE-LABEL: fromDiffMemConsDull
-; P8BE-LABEL: fromDiffMemConsDull
-; P8LE-LABEL: fromDiffMemConsDull
-; P9BE: lxv v2
-; P9BE: blr
-; P9LE: lxv
-; P9LE: xxswapd v2
-; P9LE: blr
-; P8BE: lxvd2x
-; P8BE: xxswapd v2
-; P8BE-NEXT: blr
-; P8LE: lxvd2x v2
-; P8LE-NEXT: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <2 x i64> @fromDiffMemVarAull(i64* nocapture readonly %arr, i32 signext %elem) {
+; P9BE-LABEL: fromDiffMemVarAull:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    sldi r4, r4, 3
+; P9BE-NEXT:    lxvx v2, r3, r4
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffMemVarAull:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    sldi r4, r4, 3
+; P9LE-NEXT:    lxvx v2, r3, r4
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffMemVarAull:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    sldi r4, r4, 3
+; P8BE-NEXT:    lxvd2x v2, r3, r4
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffMemVarAull:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    sldi r4, r4, 3
+; P8LE-NEXT:    lxvd2x vs0, r3, r4
+; P8LE-NEXT:    xxswapd v2, vs0
+; P8LE-NEXT:    blr
 entry:
   %idxprom = sext i32 %elem to i64
   %arrayidx = getelementptr inbounds i64, i64* %arr, i64 %idxprom
@@ -4089,27 +5465,44 @@ entry:
   %1 = load i64, i64* %arrayidx2, align 8
   %vecinit3 = insertelement <2 x i64> %vecinit, i64 %1, i32 1
   ret <2 x i64> %vecinit3
-; P9BE-LABEL: fromDiffMemVarAull
-; P9LE-LABEL: fromDiffMemVarAull
-; P8BE-LABEL: fromDiffMemVarAull
-; P8LE-LABEL: fromDiffMemVarAull
-; P9BE: sldi
-; P9BE: lxvx v2
-; P9BE-NEXT: blr
-; P9LE: sldi
-; P9LE: lxvx v2
-; P9LE-NEXT: blr
-; P8BE: sldi
-; P8BE: lxvd2x v2
-; P8BE-NEXT: blr
-; P8LE: sldi
-; P8LE: lxvd2x
-; P8LE: xxswapd v2
-; P8LE-NEXT: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <2 x i64> @fromDiffMemVarDull(i64* nocapture readonly %arr, i32 signext %elem) {
+; P9BE-LABEL: fromDiffMemVarDull:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    sldi r4, r4, 3
+; P9BE-NEXT:    add r3, r3, r4
+; P9BE-NEXT:    addi r3, r3, -8
+; P9BE-NEXT:    lxvx v2, 0, r3
+; P9BE-NEXT:    xxswapd v2, v2
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffMemVarDull:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    sldi r4, r4, 3
+; P9LE-NEXT:    add r3, r3, r4
+; P9LE-NEXT:    addi r3, r3, -8
+; P9LE-NEXT:    lxvx v2, 0, r3
+; P9LE-NEXT:    xxswapd v2, v2
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffMemVarDull:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    sldi r4, r4, 3
+; P8BE-NEXT:    add r3, r3, r4
+; P8BE-NEXT:    addi r3, r3, -8
+; P8BE-NEXT:    lxvd2x v2, 0, r3
+; P8BE-NEXT:    xxswapd v2, v2
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffMemVarDull:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    sldi r4, r4, 3
+; P8LE-NEXT:    add r3, r3, r4
+; P8LE-NEXT:    addi r3, r3, -8
+; P8LE-NEXT:    lxvd2x v2, 0, r3
+; P8LE-NEXT:    blr
 entry:
   %idxprom = sext i32 %elem to i64
   %arrayidx = getelementptr inbounds i64, i64* %arr, i64 %idxprom
@@ -4121,29 +5514,41 @@ entry:
   %1 = load i64, i64* %arrayidx2, align 8
   %vecinit3 = insertelement <2 x i64> %vecinit, i64 %1, i32 1
   ret <2 x i64> %vecinit3
-; P9BE-LABEL: fromDiffMemVarDull
-; P9LE-LABEL: fromDiffMemVarDull
-; P8BE-LABEL: fromDiffMemVarDull
-; P8LE-LABEL: fromDiffMemVarDull
-; P9BE: sldi
-; P9BE: lxv
-; P9BE: xxswapd v2
-; P9BE-NEXT: blr
-; P9LE: sldi
-; P9LE: lxv
-; P9LE: xxswapd v2
-; P9LE-NEXT: blr
-; P8BE: sldi
-; P8BE: lxvd2x
-; P8BE: xxswapd v2
-; P8BE-NEXT: blr
-; P8LE: sldi
-; P8LE: lxvd2x v2
-; P8LE-NEXT: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <2 x i64> @fromRandMemConsull(i64* nocapture readonly %arr) {
+; P9BE-LABEL: fromRandMemConsull:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    ld r4, 32(r3)
+; P9BE-NEXT:    ld r3, 144(r3)
+; P9BE-NEXT:    mtvsrdd v2, r4, r3
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromRandMemConsull:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    ld r4, 32(r3)
+; P9LE-NEXT:    ld r3, 144(r3)
+; P9LE-NEXT:    mtvsrdd v2, r3, r4
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromRandMemConsull:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    ld r4, 144(r3)
+; P8BE-NEXT:    ld r3, 32(r3)
+; P8BE-NEXT:    mtvsrd f0, r4
+; P8BE-NEXT:    mtvsrd f1, r3
+; P8BE-NEXT:    xxmrghd v2, vs1, vs0
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromRandMemConsull:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    ld r4, 32(r3)
+; P8LE-NEXT:    ld r3, 144(r3)
+; P8LE-NEXT:    mtvsrd f0, r4
+; P8LE-NEXT:    mtvsrd f1, r3
+; P8LE-NEXT:    xxmrghd v2, vs1, vs0
+; P8LE-NEXT:    blr
 entry:
   %arrayidx = getelementptr inbounds i64, i64* %arr, i64 4
   %0 = load i64, i64* %arrayidx, align 8
@@ -4152,34 +5557,49 @@ entry:
   %1 = load i64, i64* %arrayidx1, align 8
   %vecinit2 = insertelement <2 x i64> %vecinit, i64 %1, i32 1
   ret <2 x i64> %vecinit2
-; P9BE-LABEL: fromRandMemConsull
-; P9LE-LABEL: fromRandMemConsull
-; P8BE-LABEL: fromRandMemConsull
-; P8LE-LABEL: fromRandMemConsull
-; P9BE: ld
-; P9BE: ld
-; P9BE: mtvsrdd v2
-; P9BE-NEXT: blr
-; P9LE: ld
-; P9LE: ld
-; P9LE: mtvsrdd v2
-; P9LE-NEXT: blr
-; P8BE: ld
-; P8BE: ld
-; P8BE-DAG: mtvsrd
-; P8BE-DAG: mtvsrd
-; P8BE: xxmrghd v2
-; P8BE-NEXT: blr
-; P8LE: ld
-; P8LE: ld
-; P8LE-DAG: mtvsrd
-; P8LE-DAG: mtvsrd
-; P8LE: xxmrghd v2
-; P8LE-NEXT: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <2 x i64> @fromRandMemVarull(i64* nocapture readonly %arr, i32 signext %elem) {
+; P9BE-LABEL: fromRandMemVarull:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    sldi r4, r4, 3
+; P9BE-NEXT:    add r3, r3, r4
+; P9BE-NEXT:    ld r4, 32(r3)
+; P9BE-NEXT:    ld r3, 8(r3)
+; P9BE-NEXT:    mtvsrdd v2, r4, r3
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromRandMemVarull:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    sldi r4, r4, 3
+; P9LE-NEXT:    add r3, r3, r4
+; P9LE-NEXT:    ld r4, 32(r3)
+; P9LE-NEXT:    ld r3, 8(r3)
+; P9LE-NEXT:    mtvsrdd v2, r3, r4
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromRandMemVarull:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    sldi r4, r4, 3
+; P8BE-NEXT:    add r3, r3, r4
+; P8BE-NEXT:    ld r4, 8(r3)
+; P8BE-NEXT:    ld r3, 32(r3)
+; P8BE-NEXT:    mtvsrd f0, r4
+; P8BE-NEXT:    mtvsrd f1, r3
+; P8BE-NEXT:    xxmrghd v2, vs1, vs0
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromRandMemVarull:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    sldi r4, r4, 3
+; P8LE-NEXT:    add r3, r3, r4
+; P8LE-NEXT:    ld r4, 32(r3)
+; P8LE-NEXT:    ld r3, 8(r3)
+; P8LE-NEXT:    mtvsrd f0, r4
+; P8LE-NEXT:    mtvsrd f1, r3
+; P8LE-NEXT:    xxmrghd v2, vs1, vs0
+; P8LE-NEXT:    blr
 entry:
   %add = add nsw i32 %elem, 4
   %idxprom = sext i32 %add to i64
@@ -4192,144 +5612,207 @@ entry:
   %1 = load i64, i64* %arrayidx3, align 8
   %vecinit4 = insertelement <2 x i64> %vecinit, i64 %1, i32 1
   ret <2 x i64> %vecinit4
-; P9BE-LABEL: fromRandMemVarull
-; P9LE-LABEL: fromRandMemVarull
-; P8BE-LABEL: fromRandMemVarull
-; P8LE-LABEL: fromRandMemVarull
-; P9BE: sldi
-; P9BE: ld
-; P9BE: ld
-; P9BE: mtvsrdd v2
-; P9BE-NEXT: blr
-; P9LE: sldi
-; P9LE: ld
-; P9LE: ld
-; P9LE: mtvsrdd v2
-; P9LE-NEXT: blr
-; P8BE: sldi
-; P8BE: ld
-; P8BE: ld
-; P8BE: mtvsrd
-; P8BE: mtvsrd
-; P8BE: xxmrghd v2
-; P8BE-NEXT: blr
-; P8LE: sldi
-; P8LE: ld
-; P8LE: ld
-; P8LE: mtvsrd
-; P8LE: mtvsrd
-; P8LE: xxmrghd v2
-; P8LE-NEXT: blr
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <2 x i64> @spltRegValull(i64 %val) {
+; P9BE-LABEL: spltRegValull:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    mtvsrdd v2, r3, r3
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: spltRegValull:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    mtvsrdd v2, r3, r3
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: spltRegValull:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    mtvsrd f0, r3
+; P8BE-NEXT:    xxspltd v2, vs0, 0
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: spltRegValull:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    mtvsrd f0, r3
+; P8LE-NEXT:    xxspltd v2, vs0, 0
+; P8LE-NEXT:    blr
 entry:
   %splat.splatinsert = insertelement <2 x i64> undef, i64 %val, i32 0
   %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
   ret <2 x i64> %splat.splat
-; P9BE-LABEL: spltRegValull
-; P9LE-LABEL: spltRegValull
-; P8BE-LABEL: spltRegValull
-; P8LE-LABEL: spltRegValull
-; P9BE: mtvsrdd v2, r3, r3
-; P9BE-NEXT: blr
-; P9LE: mtvsrdd v2, r3, r3
-; P9LE-NEXT: blr
-; P8BE: mtvsrd {{[vsf]+}}[[REG1:[0-9]+]], r3
-; P8BE: xxspltd v2, {{[vsf]+}}[[REG1]], 0
-; P8BE-NEXT: blr
-; P8LE: mtvsrd {{[vsf]+}}[[REG1:[0-9]+]], r3
-; P8LE: xxspltd v2, {{[vsf]+}}[[REG1]], 0
-; P8LE-NEXT: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <2 x i64> @spltMemValull(i64* nocapture readonly %ptr) {
+; P9BE-LABEL: spltMemValull:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lxvdsx v2, 0, r3
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: spltMemValull:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lxvdsx v2, 0, r3
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: spltMemValull:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    lxvdsx v2, 0, r3
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: spltMemValull:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    lxvdsx v2, 0, r3
+; P8LE-NEXT:    blr
 entry:
   %0 = load i64, i64* %ptr, align 8
   %splat.splatinsert = insertelement <2 x i64> undef, i64 %0, i32 0
   %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
   ret <2 x i64> %splat.splat
-; P9BE-LABEL: spltMemValull
-; P9LE-LABEL: spltMemValull
-; P8BE-LABEL: spltMemValull
-; P8LE-LABEL: spltMemValull
-; P9BE: lxvdsx v2
-; P9BE-NEXT: blr
-; P9LE: lxvdsx v2
-; P9LE-NEXT: blr
-; P8BE: lxvdsx v2
-; P8BE-NEXT: blr
-; P8LE: lxvdsx v2
-; P8LE-NEXT: blr
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <2 x i64> @spltCnstConvftoull() {
+; P9BE-LABEL: spltCnstConvftoull:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    addis r3, r2, .LCPI114_0 at toc@ha
+; P9BE-NEXT:    addi r3, r3, .LCPI114_0 at toc@l
+; P9BE-NEXT:    lxvx v2, 0, r3
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: spltCnstConvftoull:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    addis r3, r2, .LCPI114_0 at toc@ha
+; P9LE-NEXT:    addi r3, r3, .LCPI114_0 at toc@l
+; P9LE-NEXT:    lxvx v2, 0, r3
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: spltCnstConvftoull:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    addis r3, r2, .LCPI114_0 at toc@ha
+; P8BE-NEXT:    addi r3, r3, .LCPI114_0 at toc@l
+; P8BE-NEXT:    lxvd2x v2, 0, r3
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: spltCnstConvftoull:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    addis r3, r2, .LCPI114_0 at toc@ha
+; P8LE-NEXT:    addi r3, r3, .LCPI114_0 at toc@l
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    xxswapd v2, vs0
+; P8LE-NEXT:    blr
 entry:
   ret <2 x i64> <i64 4, i64 4>
-; P9BE-LABEL: spltCnstConvftoull
-; P9LE-LABEL: spltCnstConvftoull
-; P8BE-LABEL: spltCnstConvftoull
-; P8LE-LABEL: spltCnstConvftoull
-; P9BE: lxv
-; P9BE: blr
-; P9LE: lxv
-; P9LE: blr
-; P8BE: lxvd2x
-; P8BE: blr
-; P8LE: lxvd2x
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <2 x i64> @fromRegsConvftoull(float %a, float %b) {
+; P9BE-LABEL: fromRegsConvftoull:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P9BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; P9BE-NEXT:    xxmrghd vs0, vs1, vs2
+; P9BE-NEXT:    xvcvdpuxds v2, vs0
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromRegsConvftoull:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P9LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; P9LE-NEXT:    xxmrghd vs0, vs2, vs1
+; P9LE-NEXT:    xvcvdpuxds v2, vs0
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromRegsConvftoull:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P8BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; P8BE-NEXT:    xxmrghd vs0, vs1, vs2
+; P8BE-NEXT:    xvcvdpuxds v2, vs0
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromRegsConvftoull:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P8LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; P8LE-NEXT:    xxmrghd vs0, vs2, vs1
+; P8LE-NEXT:    xvcvdpuxds v2, vs0
+; P8LE-NEXT:    blr
 entry:
   %conv = fptoui float %a to i64
   %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
   %conv1 = fptoui float %b to i64
   %vecinit2 = insertelement <2 x i64> %vecinit, i64 %conv1, i32 1
   ret <2 x i64> %vecinit2
-; P9BE-LABEL: fromRegsConvftoull
-; P9LE-LABEL: fromRegsConvftoull
-; P8BE-LABEL: fromRegsConvftoull
-; P8LE-LABEL: fromRegsConvftoull
-; P9BE: xxmrghd
-; P9BE: xvcvdpuxds v2
-; P9BE-NEXT: blr
-; P9LE: xxmrghd
-; P9LE: xvcvdpuxds v2
-; P9LE-NEXT: blr
-; P8BE: xxmrghd
-; P8BE: xvcvdpuxds v2
-; P8BE-NEXT: blr
-; P8LE: xxmrghd
-; P8LE: xvcvdpuxds v2
-; P8LE-NEXT: blr
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <2 x i64> @fromDiffConstsConvftoull() {
+; P9BE-LABEL: fromDiffConstsConvftoull:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    addis r3, r2, .LCPI116_0 at toc@ha
+; P9BE-NEXT:    addi r3, r3, .LCPI116_0 at toc@l
+; P9BE-NEXT:    lxvx v2, 0, r3
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffConstsConvftoull:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    addis r3, r2, .LCPI116_0 at toc@ha
+; P9LE-NEXT:    addi r3, r3, .LCPI116_0 at toc@l
+; P9LE-NEXT:    lxvx v2, 0, r3
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffConstsConvftoull:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    addis r3, r2, .LCPI116_0 at toc@ha
+; P8BE-NEXT:    addi r3, r3, .LCPI116_0 at toc@l
+; P8BE-NEXT:    lxvd2x v2, 0, r3
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffConstsConvftoull:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    addis r3, r2, .LCPI116_0 at toc@ha
+; P8LE-NEXT:    addi r3, r3, .LCPI116_0 at toc@l
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    xxswapd v2, vs0
+; P8LE-NEXT:    blr
 entry:
   ret <2 x i64> <i64 24, i64 234>
-; P9BE-LABEL: fromDiffConstsConvftoull
-; P9LE-LABEL: fromDiffConstsConvftoull
-; P8BE-LABEL: fromDiffConstsConvftoull
-; P8LE-LABEL: fromDiffConstsConvftoull
-; P9BE: lxvx v2
-; P9BE: blr
-; P9LE: lxvx v2
-; P9LE: blr
-; P8BE: lxvd2x v2
-; P8BE: blr
-; P8LE: lxvd2x
-; P8LE: xxswapd v2
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <2 x i64> @fromDiffMemConsAConvftoull(float* nocapture readonly %ptr) {
+; P9BE-LABEL: fromDiffMemConsAConvftoull:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lfs f0, 0(r3)
+; P9BE-NEXT:    lfs f1, 4(r3)
+; P9BE-NEXT:    xxmrghd vs0, vs0, vs1
+; P9BE-NEXT:    xvcvdpuxds v2, vs0
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffMemConsAConvftoull:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lfs f0, 0(r3)
+; P9LE-NEXT:    lfs f1, 4(r3)
+; P9LE-NEXT:    xxmrghd vs0, vs1, vs0
+; P9LE-NEXT:    xvcvdpuxds v2, vs0
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffMemConsAConvftoull:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    lfsx f0, 0, r3
+; P8BE-NEXT:    lfs f1, 4(r3)
+; P8BE-NEXT:    xxmrghd vs0, vs0, vs1
+; P8BE-NEXT:    xvcvdpuxds v2, vs0
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffMemConsAConvftoull:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    lfsx f0, 0, r3
+; P8LE-NEXT:    lfs f1, 4(r3)
+; P8LE-NEXT:    xxmrghd vs0, vs1, vs0
+; P8LE-NEXT:    xvcvdpuxds v2, vs0
+; P8LE-NEXT:    blr
 entry:
   %0 = load float, float* %ptr, align 4
   %conv = fptoui float %0 to i64
@@ -4339,34 +5822,41 @@ entry:
   %conv2 = fptoui float %1 to i64
   %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
   ret <2 x i64> %vecinit3
-; P9BE-LABEL: fromDiffMemConsAConvftoull
-; P9LE-LABEL: fromDiffMemConsAConvftoull
-; P8BE-LABEL: fromDiffMemConsAConvftoull
-; P8LE-LABEL: fromDiffMemConsAConvftoull
-; P9BE: lfs
-; P9BE: lfs
-; P9BE: xxmrghd
-; P9BE-NEXT: xvcvdpuxds v2
-; P9BE-NEXT: blr
-; P9LE: lfs
-; P9LE: lfs
-; P9LE: xxmrghd
-; P9LE-NEXT: xvcvdpuxds v2
-; P9LE-NEXT: blr
-; P8BE: lfs
-; P8BE: lfs
-; P8BE: xxmrghd
-; P8BE-NEXT: xvcvdpuxds v2
-; P8BE-NEXT: blr
-; P8LE: lfs
-; P8LE: lfs
-; P8LE: xxmrghd
-; P8LE-NEXT: xvcvdpuxds v2
-; P8LE-NEXT: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <2 x i64> @fromDiffMemConsDConvftoull(float* nocapture readonly %ptr) {
+; P9BE-LABEL: fromDiffMemConsDConvftoull:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lfs f0, 12(r3)
+; P9BE-NEXT:    lfs f1, 8(r3)
+; P9BE-NEXT:    xxmrghd vs0, vs0, vs1
+; P9BE-NEXT:    xvcvdpuxds v2, vs0
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffMemConsDConvftoull:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lfs f0, 12(r3)
+; P9LE-NEXT:    lfs f1, 8(r3)
+; P9LE-NEXT:    xxmrghd vs0, vs1, vs0
+; P9LE-NEXT:    xvcvdpuxds v2, vs0
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffMemConsDConvftoull:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    lfs f0, 12(r3)
+; P8BE-NEXT:    lfs f1, 8(r3)
+; P8BE-NEXT:    xxmrghd vs0, vs0, vs1
+; P8BE-NEXT:    xvcvdpuxds v2, vs0
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffMemConsDConvftoull:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    lfs f0, 12(r3)
+; P8LE-NEXT:    lfs f1, 8(r3)
+; P8LE-NEXT:    xxmrghd vs0, vs1, vs0
+; P8LE-NEXT:    xvcvdpuxds v2, vs0
+; P8LE-NEXT:    blr
 entry:
   %arrayidx = getelementptr inbounds float, float* %ptr, i64 3
   %0 = load float, float* %arrayidx, align 4
@@ -4377,34 +5867,45 @@ entry:
   %conv2 = fptoui float %1 to i64
   %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
   ret <2 x i64> %vecinit3
-; P9BE-LABEL: fromDiffMemConsDConvftoull
-; P9LE-LABEL: fromDiffMemConsDConvftoull
-; P8BE-LABEL: fromDiffMemConsDConvftoull
-; P8LE-LABEL: fromDiffMemConsDConvftoull
-; P9BE: lfs
-; P9BE: lfs
-; P9BE: xxmrghd
-; P9BE-NEXT: xvcvdpuxds v2
-; P9BE-NEXT: blr
-; P9LE: lfs
-; P9LE: lfs
-; P9LE: xxmrghd
-; P9LE-NEXT: xvcvdpuxds v2
-; P9LE-NEXT: blr
-; P8BE: lfs
-; P8BE: lfs
-; P8BE: xxmrghd
-; P8BE-NEXT: xvcvdpuxds v2
-; P8BE-NEXT: blr
-; P8LE: lfs
-; P8LE: lfs
-; P8LE: xxmrghd
-; P8LE-NEXT: xvcvdpuxds v2
-; P8LE-NEXT: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <2 x i64> @fromDiffMemVarAConvftoull(float* nocapture readonly %arr, i32 signext %elem) {
+; P9BE-LABEL: fromDiffMemVarAConvftoull:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    sldi r4, r4, 2
+; P9BE-NEXT:    lfsux f0, r3, r4
+; P9BE-NEXT:    lfs f1, 4(r3)
+; P9BE-NEXT:    xxmrghd vs0, vs0, vs1
+; P9BE-NEXT:    xvcvdpuxds v2, vs0
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffMemVarAConvftoull:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    sldi r4, r4, 2
+; P9LE-NEXT:    lfsux f0, r3, r4
+; P9LE-NEXT:    lfs f1, 4(r3)
+; P9LE-NEXT:    xxmrghd vs0, vs1, vs0
+; P9LE-NEXT:    xvcvdpuxds v2, vs0
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffMemVarAConvftoull:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    sldi r4, r4, 2
+; P8BE-NEXT:    lfsux f0, r3, r4
+; P8BE-NEXT:    lfs f1, 4(r3)
+; P8BE-NEXT:    xxmrghd vs0, vs0, vs1
+; P8BE-NEXT:    xvcvdpuxds v2, vs0
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffMemVarAConvftoull:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    sldi r4, r4, 2
+; P8LE-NEXT:    lfsux f0, r3, r4
+; P8LE-NEXT:    lfs f1, 4(r3)
+; P8LE-NEXT:    xxmrghd vs0, vs1, vs0
+; P8LE-NEXT:    xvcvdpuxds v2, vs0
+; P8LE-NEXT:    blr
 entry:
   %idxprom = sext i32 %elem to i64
   %arrayidx = getelementptr inbounds float, float* %arr, i64 %idxprom
@@ -4418,38 +5919,45 @@ entry:
   %conv3 = fptoui float %1 to i64
   %vecinit4 = insertelement <2 x i64> %vecinit, i64 %conv3, i32 1
   ret <2 x i64> %vecinit4
-; P9BE-LABEL: fromDiffMemVarAConvftoull
-; P9LE-LABEL: fromDiffMemVarAConvftoull
-; P8BE-LABEL: fromDiffMemVarAConvftoull
-; P8LE-LABEL: fromDiffMemVarAConvftoull
-; P9BE: sldi
-; P9BE: lfsux
-; P9BE: lfs
-; P9BE: xxmrghd
-; P9BE-NEXT: xvcvdpuxds v2
-; P9BE-NEXT: blr
-; P9LE: sldi
-; P9LE: lfsux
-; P9LE: lfs
-; P9LE: xxmrghd
-; P9LE-NEXT: xvcvdpuxds v2
-; P9LE-NEXT: blr
-; P8BE: sldi
-; P8BE: lfsux
-; P8BE: lfs
-; P8BE: xxmrghd
-; P8BE-NEXT: xvcvdpuxds v2
-; P8BE-NEXT: blr
-; P8LE: sldi
-; P8LE: lfsux
-; P8LE: lfs
-; P8LE: xxmrghd
-; P8LE-NEXT: xvcvdpuxds v2
-; P8LE-NEXT: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <2 x i64> @fromDiffMemVarDConvftoull(float* nocapture readonly %arr, i32 signext %elem) {
+; P9BE-LABEL: fromDiffMemVarDConvftoull:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    sldi r4, r4, 2
+; P9BE-NEXT:    lfsux f0, r3, r4
+; P9BE-NEXT:    lfs f1, -4(r3)
+; P9BE-NEXT:    xxmrghd vs0, vs0, vs1
+; P9BE-NEXT:    xvcvdpuxds v2, vs0
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffMemVarDConvftoull:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    sldi r4, r4, 2
+; P9LE-NEXT:    lfsux f0, r3, r4
+; P9LE-NEXT:    lfs f1, -4(r3)
+; P9LE-NEXT:    xxmrghd vs0, vs1, vs0
+; P9LE-NEXT:    xvcvdpuxds v2, vs0
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffMemVarDConvftoull:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    sldi r4, r4, 2
+; P8BE-NEXT:    lfsux f0, r3, r4
+; P8BE-NEXT:    lfs f1, -4(r3)
+; P8BE-NEXT:    xxmrghd vs0, vs0, vs1
+; P8BE-NEXT:    xvcvdpuxds v2, vs0
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffMemVarDConvftoull:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    sldi r4, r4, 2
+; P8LE-NEXT:    lfsux f0, r3, r4
+; P8LE-NEXT:    lfs f1, -4(r3)
+; P8LE-NEXT:    xxmrghd vs0, vs1, vs0
+; P8LE-NEXT:    xvcvdpuxds v2, vs0
+; P8LE-NEXT:    blr
 entry:
   %idxprom = sext i32 %elem to i64
   %arrayidx = getelementptr inbounds float, float* %arr, i64 %idxprom
@@ -4463,181 +5971,249 @@ entry:
   %conv3 = fptoui float %1 to i64
   %vecinit4 = insertelement <2 x i64> %vecinit, i64 %conv3, i32 1
   ret <2 x i64> %vecinit4
-; P9BE-LABEL: fromDiffMemVarDConvftoull
-; P9LE-LABEL: fromDiffMemVarDConvftoull
-; P8BE-LABEL: fromDiffMemVarDConvftoull
-; P8LE-LABEL: fromDiffMemVarDConvftoull
-; P9BE: sldi
-; P9BE: lfsux
-; P9BE: lfs
-; P9BE: xxmrghd
-; P9BE-NEXT: xvcvdpuxds v2
-; P9BE-NEXT: blr
-; P9LE: sldi
-; P9LE: lfsux
-; P9LE: lfs
-; P9LE: xxmrghd
-; P9LE-NEXT: xvcvdpuxds v2
-; P9LE-NEXT: blr
-; P8BE: sldi
-; P8BE: lfsux
-; P8BE: lfs
-; P8BE: xxmrghd
-; P8BE-NEXT: xvcvdpuxds v2
-; P8BE-NEXT: blr
-; P8LE: sldi
-; P8LE: lfsux
-; P8LE: lfs
-; P8LE: xxmrghd
-; P8LE-NEXT: xvcvdpuxds v2
-; P8LE-NEXT: blr
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <2 x i64> @spltRegValConvftoull(float %val) {
+; P9BE-LABEL: spltRegValConvftoull:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    xscvdpuxds f0, f1
+; P9BE-NEXT:    xxspltd v2, f0, 0
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: spltRegValConvftoull:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    xscvdpuxds f0, f1
+; P9LE-NEXT:    xxspltd v2, f0, 0
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: spltRegValConvftoull:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    xscvdpuxds f0, f1
+; P8BE-NEXT:    xxspltd v2, f0, 0
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: spltRegValConvftoull:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    xscvdpuxds f0, f1
+; P8LE-NEXT:    xxspltd v2, f0, 0
+; P8LE-NEXT:    blr
 entry:
   %conv = fptoui float %val to i64
   %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
   %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
   ret <2 x i64> %splat.splat
-; P9BE-LABEL: spltRegValConvftoull
-; P9LE-LABEL: spltRegValConvftoull
-; P8BE-LABEL: spltRegValConvftoull
-; P8LE-LABEL: spltRegValConvftoull
-; P9BE: xscvdpuxds
-; P9BE-NEXT: xxspltd v2
-; P9BE-NEXT: blr
-; P9LE: xscvdpuxds
-; P9LE-NEXT: xxspltd v2
-; P9LE-NEXT: blr
-; P8BE: xscvdpuxds
-; P8BE-NEXT: xxspltd v2
-; P8BE-NEXT: blr
-; P8LE: xscvdpuxds
-; P8LE-NEXT: xxspltd v2
-; P8LE-NEXT: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <2 x i64> @spltMemValConvftoull(float* nocapture readonly %ptr) {
+; P9BE-LABEL: spltMemValConvftoull:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lfs f0, 0(r3)
+; P9BE-NEXT:    xscvdpuxds f0, f0
+; P9BE-NEXT:    xxspltd v2, f0, 0
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: spltMemValConvftoull:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lfs f0, 0(r3)
+; P9LE-NEXT:    xscvdpuxds f0, f0
+; P9LE-NEXT:    xxspltd v2, f0, 0
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: spltMemValConvftoull:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    lfsx f0, 0, r3
+; P8BE-NEXT:    xscvdpuxds f0, f0
+; P8BE-NEXT:    xxspltd v2, f0, 0
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: spltMemValConvftoull:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    lfsx f0, 0, r3
+; P8LE-NEXT:    xscvdpuxds f0, f0
+; P8LE-NEXT:    xxspltd v2, f0, 0
+; P8LE-NEXT:    blr
 entry:
   %0 = load float, float* %ptr, align 4
   %conv = fptoui float %0 to i64
   %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
   %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
   ret <2 x i64> %splat.splat
-; P9BE-LABEL: spltMemValConvftoull
-; P9LE-LABEL: spltMemValConvftoull
-; P8BE-LABEL: spltMemValConvftoull
-; P8LE-LABEL: spltMemValConvftoull
-; P9BE: lfs
-; P9BE-NEXT: xscvdpuxds
-; P9BE-NEXT: xxspltd v2
-; P9BE-NEXT: blr
-; P9LE: lfs
-; P9LE-NEXT: xscvdpuxds
-; P9LE-NEXT: xxspltd v2
-; P9LE-NEXT: blr
-; P8BE: lfs
-; P8BE-NEXT: xscvdpuxds
-; P8BE-NEXT: xxspltd v2
-; P8BE-NEXT: blr
-; P8LE: lfs
-; P8LE-NEXT: xscvdpuxds
-; P8LE-NEXT: xxspltd v2
-; P8LE-NEXT: blr
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <2 x i64> @spltCnstConvdtoull() {
+; P9BE-LABEL: spltCnstConvdtoull:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    addis r3, r2, .LCPI123_0 at toc@ha
+; P9BE-NEXT:    addi r3, r3, .LCPI123_0 at toc@l
+; P9BE-NEXT:    lxvx v2, 0, r3
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: spltCnstConvdtoull:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    addis r3, r2, .LCPI123_0 at toc@ha
+; P9LE-NEXT:    addi r3, r3, .LCPI123_0 at toc@l
+; P9LE-NEXT:    lxvx v2, 0, r3
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: spltCnstConvdtoull:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    addis r3, r2, .LCPI123_0 at toc@ha
+; P8BE-NEXT:    addi r3, r3, .LCPI123_0 at toc@l
+; P8BE-NEXT:    lxvd2x v2, 0, r3
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: spltCnstConvdtoull:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    addis r3, r2, .LCPI123_0 at toc@ha
+; P8LE-NEXT:    addi r3, r3, .LCPI123_0 at toc@l
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    xxswapd v2, vs0
+; P8LE-NEXT:    blr
 entry:
   ret <2 x i64> <i64 4, i64 4>
-; P9BE-LABEL: spltCnstConvdtoull
-; P9LE-LABEL: spltCnstConvdtoull
-; P8BE-LABEL: spltCnstConvdtoull
-; P8LE-LABEL: spltCnstConvdtoull
-; P9BE: lxv
-; P9BE: blr
-; P9LE: lxv
-; P9LE: blr
-; P8BE: lxvd2x
-; P8BE: blr
-; P8LE: lxvd2x
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <2 x i64> @fromRegsConvdtoull(double %a, double %b) {
+; P9BE-LABEL: fromRegsConvdtoull:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P9BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; P9BE-NEXT:    xxmrghd vs0, vs1, vs2
+; P9BE-NEXT:    xvcvdpuxds v2, vs0
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromRegsConvdtoull:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P9LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; P9LE-NEXT:    xxmrghd vs0, vs2, vs1
+; P9LE-NEXT:    xvcvdpuxds v2, vs0
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromRegsConvdtoull:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P8BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; P8BE-NEXT:    xxmrghd vs0, vs1, vs2
+; P8BE-NEXT:    xvcvdpuxds v2, vs0
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromRegsConvdtoull:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P8LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; P8LE-NEXT:    xxmrghd vs0, vs2, vs1
+; P8LE-NEXT:    xvcvdpuxds v2, vs0
+; P8LE-NEXT:    blr
 entry:
   %conv = fptoui double %a to i64
   %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
   %conv1 = fptoui double %b to i64
   %vecinit2 = insertelement <2 x i64> %vecinit, i64 %conv1, i32 1
   ret <2 x i64> %vecinit2
-; P9BE-LABEL: fromRegsConvdtoull
-; P9LE-LABEL: fromRegsConvdtoull
-; P8BE-LABEL: fromRegsConvdtoull
-; P8LE-LABEL: fromRegsConvdtoull
-; P9BE: xxmrghd
-; P9BE-NEXT: xvcvdpuxds
-; P9BE-NEXT: blr
-; P9LE: xxmrghd
-; P9LE-NEXT: xvcvdpuxds
-; P9LE-NEXT: blr
-; P8BE: xxmrghd
-; P8BE-NEXT: xvcvdpuxds
-; P8BE-NEXT: blr
-; P8LE: xxmrghd
-; P8LE-NEXT: xvcvdpuxds
-; P8LE-NEXT: blr
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <2 x i64> @fromDiffConstsConvdtoull() {
+; P9BE-LABEL: fromDiffConstsConvdtoull:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    addis r3, r2, .LCPI125_0 at toc@ha
+; P9BE-NEXT:    addi r3, r3, .LCPI125_0 at toc@l
+; P9BE-NEXT:    lxvx v2, 0, r3
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffConstsConvdtoull:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    addis r3, r2, .LCPI125_0 at toc@ha
+; P9LE-NEXT:    addi r3, r3, .LCPI125_0 at toc@l
+; P9LE-NEXT:    lxvx v2, 0, r3
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffConstsConvdtoull:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    addis r3, r2, .LCPI125_0 at toc@ha
+; P8BE-NEXT:    addi r3, r3, .LCPI125_0 at toc@l
+; P8BE-NEXT:    lxvd2x v2, 0, r3
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffConstsConvdtoull:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    addis r3, r2, .LCPI125_0 at toc@ha
+; P8LE-NEXT:    addi r3, r3, .LCPI125_0 at toc@l
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    xxswapd v2, vs0
+; P8LE-NEXT:    blr
 entry:
   ret <2 x i64> <i64 24, i64 234>
-; P9BE-LABEL: fromDiffConstsConvdtoull
-; P9LE-LABEL: fromDiffConstsConvdtoull
-; P8BE-LABEL: fromDiffConstsConvdtoull
-; P8LE-LABEL: fromDiffConstsConvdtoull
-; P9BE: lxv
-; P9BE: blr
-; P9LE: lxv
-; P9LE: blr
-; P8BE: lxvd2x
-; P8BE: blr
-; P8LE: lxvd2x
-; P8LE: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <2 x i64> @fromDiffMemConsAConvdtoull(double* nocapture readonly %ptr) {
+; P9BE-LABEL: fromDiffMemConsAConvdtoull:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lxv vs0, 0(r3)
+; P9BE-NEXT:    xvcvdpuxds v2, vs0
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffMemConsAConvdtoull:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lxv vs0, 0(r3)
+; P9LE-NEXT:    xvcvdpuxds v2, vs0
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffMemConsAConvdtoull:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    lxvd2x vs0, 0, r3
+; P8BE-NEXT:    xvcvdpuxds v2, vs0
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffMemConsAConvdtoull:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    xxswapd vs0, vs0
+; P8LE-NEXT:    xvcvdpuxds v2, vs0
+; P8LE-NEXT:    blr
 entry:
   %0 = bitcast double* %ptr to <2 x double>*
   %1 = load <2 x double>, <2 x double>* %0, align 8
   %2 = fptoui <2 x double> %1 to <2 x i64>
   ret <2 x i64> %2
-; P9BE-LABEL: fromDiffMemConsAConvdtoull
-; P9LE-LABEL: fromDiffMemConsAConvdtoull
-; P8BE-LABEL: fromDiffMemConsAConvdtoull
-; P8LE-LABEL: fromDiffMemConsAConvdtoull
-; P9BE: lxv
-; P9BE-NEXT: xvcvdpuxds v2
-; P9BE-NEXT: blr
-; P9LE: lxv
-; P9LE-NEXT: xvcvdpuxds v2
-; P9LE-NEXT: blr
-; P8BE: lxvd2x
-; P8BE-NEXT: xvcvdpuxds v2
-; P8BE-NEXT: blr
-; P8LE: lxvd2x
-; P8LE: xxswapd
-; P8LE-NEXT: xvcvdpuxds v2
-; P8LE-NEXT: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <2 x i64> @fromDiffMemConsDConvdtoull(double* nocapture readonly %ptr) {
+; P9BE-LABEL: fromDiffMemConsDConvdtoull:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lxv vs0, 16(r3)
+; P9BE-NEXT:    xxswapd vs0, vs0
+; P9BE-NEXT:    xvcvdpuxds v2, vs0
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffMemConsDConvdtoull:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lxv vs0, 16(r3)
+; P9LE-NEXT:    xxswapd vs0, vs0
+; P9LE-NEXT:    xvcvdpuxds v2, vs0
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffMemConsDConvdtoull:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    addi r3, r3, 16
+; P8BE-NEXT:    lxvd2x vs0, 0, r3
+; P8BE-NEXT:    xxswapd vs0, vs0
+; P8BE-NEXT:    xvcvdpuxds v2, vs0
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffMemConsDConvdtoull:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    addi r3, r3, 16
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    xvcvdpuxds v2, vs0
+; P8LE-NEXT:    blr
 entry:
   %arrayidx = getelementptr inbounds double, double* %ptr, i64 3
   %0 = load double, double* %arrayidx, align 8
@@ -4648,29 +6224,38 @@ entry:
   %conv2 = fptoui double %1 to i64
   %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
   ret <2 x i64> %vecinit3
-; P9BE-LABEL: fromDiffMemConsDConvdtoull
-; P9LE-LABEL: fromDiffMemConsDConvdtoull
-; P8BE-LABEL: fromDiffMemConsDConvdtoull
-; P8LE-LABEL: fromDiffMemConsDConvdtoull
-; P9BE: lxv
-; P9BE-NEXT: xxswapd
-; P9BE-NEXT: xvcvdpuxds v2
-; P9BE-NEXT: blr
-; P9LE: lxv
-; P9LE-NEXT: xxswapd
-; P9LE-NEXT: xvcvdpuxds v2
-; P9LE-NEXT: blr
-; P8BE: lxvd2x
-; P8BE-NEXT: xxswapd
-; P8BE-NEXT: xvcvdpuxds v2
-; P8BE-NEXT: blr
-; P8LE: lxvd2x
-; P8LE-NEXT: xvcvdpuxds v2
-; P8LE-NEXT: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <2 x i64> @fromDiffMemVarAConvdtoull(double* nocapture readonly %arr, i32 signext %elem) {
+; P9BE-LABEL: fromDiffMemVarAConvdtoull:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    sldi r4, r4, 3
+; P9BE-NEXT:    lxvx vs0, r3, r4
+; P9BE-NEXT:    xvcvdpuxds v2, vs0
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffMemVarAConvdtoull:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    sldi r4, r4, 3
+; P9LE-NEXT:    lxvx vs0, r3, r4
+; P9LE-NEXT:    xvcvdpuxds v2, vs0
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffMemVarAConvdtoull:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    sldi r4, r4, 3
+; P8BE-NEXT:    lxvd2x vs0, r3, r4
+; P8BE-NEXT:    xvcvdpuxds v2, vs0
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffMemVarAConvdtoull:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    sldi r4, r4, 3
+; P8LE-NEXT:    lxvd2x vs0, r3, r4
+; P8LE-NEXT:    xxswapd vs0, vs0
+; P8LE-NEXT:    xvcvdpuxds v2, vs0
+; P8LE-NEXT:    blr
 entry:
   %idxprom = sext i32 %elem to i64
   %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom
@@ -4684,31 +6269,48 @@ entry:
   %conv3 = fptoui double %1 to i64
   %vecinit4 = insertelement <2 x i64> %vecinit, i64 %conv3, i32 1
   ret <2 x i64> %vecinit4
-; P9BE-LABEL: fromDiffMemVarAConvdtoull
-; P9LE-LABEL: fromDiffMemVarAConvdtoull
-; P8BE-LABEL: fromDiffMemVarAConvdtoull
-; P8LE-LABEL: fromDiffMemVarAConvdtoull
-; P9BE: sldi
-; P9BE: lxvx
-; P9BE-NEXT: xvcvdpuxds v2
-; P9BE-NEXT: blr
-; P9LE: sldi
-; P9LE: lxvx
-; P9LE-NEXT: xvcvdpuxds v2
-; P9LE-NEXT: blr
-; P8BE: sldi
-; P8BE: lxvd2x
-; P8BE-NEXT: xvcvdpuxds v2
-; P8BE-NEXT: blr
-; P8LE: sldi
-; P8LE: lxvd2x
-; P8LE-NEXT: xxswapd
-; P8LE-NEXT: xvcvdpuxds v2
-; P8LE-NEXT: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <2 x i64> @fromDiffMemVarDConvdtoull(double* nocapture readonly %arr, i32 signext %elem) {
+; P9BE-LABEL: fromDiffMemVarDConvdtoull:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    sldi r4, r4, 3
+; P9BE-NEXT:    add r3, r3, r4
+; P9BE-NEXT:    addi r3, r3, -8
+; P9BE-NEXT:    lxvx vs0, 0, r3
+; P9BE-NEXT:    xxswapd vs0, vs0
+; P9BE-NEXT:    xvcvdpuxds v2, vs0
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: fromDiffMemVarDConvdtoull:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    sldi r4, r4, 3
+; P9LE-NEXT:    add r3, r3, r4
+; P9LE-NEXT:    addi r3, r3, -8
+; P9LE-NEXT:    lxvx vs0, 0, r3
+; P9LE-NEXT:    xxswapd vs0, vs0
+; P9LE-NEXT:    xvcvdpuxds v2, vs0
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: fromDiffMemVarDConvdtoull:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    sldi r4, r4, 3
+; P8BE-NEXT:    add r3, r3, r4
+; P8BE-NEXT:    addi r3, r3, -8
+; P8BE-NEXT:    lxvd2x vs0, 0, r3
+; P8BE-NEXT:    xxswapd vs0, vs0
+; P8BE-NEXT:    xvcvdpuxds v2, vs0
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: fromDiffMemVarDConvdtoull:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    sldi r4, r4, 3
+; P8LE-NEXT:    add r3, r3, r4
+; P8LE-NEXT:    addi r3, r3, -8
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    xvcvdpuxds v2, vs0
+; P8LE-NEXT:    blr
 entry:
   %idxprom = sext i32 %elem to i64
   %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom
@@ -4722,78 +6324,69 @@ entry:
   %conv3 = fptoui double %1 to i64
   %vecinit4 = insertelement <2 x i64> %vecinit, i64 %conv3, i32 1
   ret <2 x i64> %vecinit4
-; P9BE-LABEL: fromDiffMemVarDConvdtoull
-; P9LE-LABEL: fromDiffMemVarDConvdtoull
-; P8BE-LABEL: fromDiffMemVarDConvdtoull
-; P8LE-LABEL: fromDiffMemVarDConvdtoull
-; P9BE: sldi
-; P9BE: lxv
-; P9BE-NEXT: xxswapd
-; P9BE-NEXT: xvcvdpuxds v2
-; P9BE-NEXT: blr
-; P9LE: sldi
-; P9LE: lxv
-; P9LE-NEXT: xxswapd
-; P9LE-NEXT: xvcvdpuxds v2
-; P9LE-NEXT: blr
-; P8BE: sldi
-; P8BE: lxvd2x
-; P8BE-NEXT: xxswapd
-; P8BE-NEXT: xvcvdpuxds v2
-; P8BE-NEXT: blr
-; P8LE: sldi
-; P8LE: lxvd2x
-; P8LE-NEXT: xvcvdpuxds v2
-; P8LE-NEXT: blr
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <2 x i64> @spltRegValConvdtoull(double %val) {
+; P9BE-LABEL: spltRegValConvdtoull:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    xscvdpuxds f0, f1
+; P9BE-NEXT:    xxspltd v2, vs0, 0
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: spltRegValConvdtoull:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    xscvdpuxds f0, f1
+; P9LE-NEXT:    xxspltd v2, vs0, 0
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: spltRegValConvdtoull:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    xscvdpuxds f0, f1
+; P8BE-NEXT:    xxspltd v2, vs0, 0
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: spltRegValConvdtoull:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    xscvdpuxds f0, f1
+; P8LE-NEXT:    xxspltd v2, vs0, 0
+; P8LE-NEXT:    blr
 entry:
   %conv = fptoui double %val to i64
   %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
   %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
   ret <2 x i64> %splat.splat
-; P9BE-LABEL: spltRegValConvdtoull
-; P9LE-LABEL: spltRegValConvdtoull
-; P8BE-LABEL: spltRegValConvdtoull
-; P8LE-LABEL: spltRegValConvdtoull
-; P9BE: xscvdpuxds
-; P9BE-NEXT: xxspltd v2
-; P9BE-NEXT: blr
-; P9LE: xscvdpuxds
-; P9LE-NEXT: xxspltd v2
-; P9LE-NEXT: blr
-; P8BE: xscvdpuxds
-; P8BE-NEXT: xxspltd v2
-; P8BE-NEXT: blr
-; P8LE: xscvdpuxds
-; P8LE-NEXT: xxspltd v2
-; P8LE-NEXT: blr
 }
 
 ; Function Attrs: norecurse nounwind readonly
 define <2 x i64> @spltMemValConvdtoull(double* nocapture readonly %ptr) {
+; P9BE-LABEL: spltMemValConvdtoull:
+; P9BE:       # %bb.0: # %entry
+; P9BE-NEXT:    lxvdsx vs0, 0, r3
+; P9BE-NEXT:    xvcvdpuxds v2, vs0
+; P9BE-NEXT:    blr
+;
+; P9LE-LABEL: spltMemValConvdtoull:
+; P9LE:       # %bb.0: # %entry
+; P9LE-NEXT:    lxvdsx vs0, 0, r3
+; P9LE-NEXT:    xvcvdpuxds v2, vs0
+; P9LE-NEXT:    blr
+;
+; P8BE-LABEL: spltMemValConvdtoull:
+; P8BE:       # %bb.0: # %entry
+; P8BE-NEXT:    lxvdsx vs0, 0, r3
+; P8BE-NEXT:    xvcvdpuxds v2, vs0
+; P8BE-NEXT:    blr
+;
+; P8LE-LABEL: spltMemValConvdtoull:
+; P8LE:       # %bb.0: # %entry
+; P8LE-NEXT:    lxvdsx vs0, 0, r3
+; P8LE-NEXT:    xvcvdpuxds v2, vs0
+; P8LE-NEXT:    blr
 entry:
   %0 = load double, double* %ptr, align 8
   %conv = fptoui double %0 to i64
   %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
   %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
   ret <2 x i64> %splat.splat
-; P9BE-LABEL: spltMemValConvdtoull
-; P9LE-LABEL: spltMemValConvdtoull
-; P8BE-LABEL: spltMemValConvdtoull
-; P8LE-LABEL: spltMemValConvdtoull
-; P9BE: lxvdsx
-; P9BE-NEXT: xvcvdpuxds
-; P9BE-NEXT: blr
-; P9LE: lxvdsx
-; P9LE-NEXT: xvcvdpuxds
-; P9LE-NEXT: blr
-; P8BE: lxvdsx
-; P8BE-NEXT: xvcvdpuxds
-; P8BE-NEXT: blr
-; P8LE: lxvdsx
-; P8LE-NEXT: xvcvdpuxds
-; P8LE-NEXT: blr
 }




More information about the llvm-commits mailing list