[llvm] cf0395f - [PowerPC] Fix the xxperm swap requirements

Maryam Moghadas via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 5 18:13:47 PDT 2023


Author: Maryam Moghadas
Date: 2023-04-05T20:13:40-05:00
New Revision: cf0395f81655ead3ac8e7e6c85778316097f2748

URL: https://github.com/llvm/llvm-project/commit/cf0395f81655ead3ac8e7e6c85778316097f2748
DIFF: https://github.com/llvm/llvm-project/commit/cf0395f81655ead3ac8e7e6c85778316097f2748.diff

LOG: [PowerPC] Fix the xxperm swap requirements

This patch is to fix the xxperm vector operand swap condition so that the
single-use operand is in V2 to prevent copying, it also fixes the subtarget
condition to exploit the xpperm.

Reviewed By: stefanp

Differential Revision: https://reviews.llvm.org/D146632

Added: 
    llvm/test/CodeGen/PowerPC/xxperm-swap.ll

Modified: 
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
    llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll
    llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index a16c77a857292..eb3e5e233aab0 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -10228,14 +10228,16 @@ SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
   if (isLittleEndian)
     std::swap(V1, V2);
 
-  if (Subtarget.isISA3_0() && (V1->hasOneUse() || V2->hasOneUse())) {
+  if (Subtarget.hasVSX() && Subtarget.hasP9Vector() &&
+      (V1->hasOneUse() || V2->hasOneUse())) {
     LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using "
                          "XXPERM instead\n");
     Opcode = PPCISD::XXPERM;
 
-    // if V2 is dead, then we swap V1 and V2 so we can
-    // use V2 as the destination instead.
-    if (!V1->hasOneUse() && V2->hasOneUse()) {
+    // The second input to XXPERM is also an output so if the second input has
+    // multiple uses then copying is necessary, as a result we want the
+    // single-use operand to be used as the second input to prevent copying.
+    if (!V2->hasOneUse() && V1->hasOneUse()) {
       std::swap(V1, V2);
       NeedSwap = !NeedSwap;
     }

diff  --git a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
index 5d49bcae0ceae..4da36c9af5c10 100644
--- a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
+++ b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
@@ -137,21 +137,20 @@ define void @test32(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
 ; P9LE-LABEL: test32:
 ; P9LE:       # %bb.0: # %entry
 ; P9LE-NEXT:    add 5, 3, 4
-; P9LE-NEXT:    lfiwzx 0, 3, 4
+; P9LE-NEXT:    lxsiwzx 2, 3, 4
 ; P9LE-NEXT:    addis 3, 2, .LCPI1_0 at toc@ha
-; P9LE-NEXT:    xxlxor 3, 3, 3
+; P9LE-NEXT:    xxlxor 0, 0, 0
 ; P9LE-NEXT:    vspltisw 4, 8
 ; P9LE-NEXT:    addi 3, 3, .LCPI1_0 at toc@l
 ; P9LE-NEXT:    lxv 1, 0(3)
 ; P9LE-NEXT:    li 3, 4
-; P9LE-NEXT:    xxlxor 2, 2, 2
 ; P9LE-NEXT:    vadduwm 4, 4, 4
-; P9LE-NEXT:    xxperm 3, 0, 1
-; P9LE-NEXT:    lfiwzx 0, 5, 3
+; P9LE-NEXT:    lxsiwzx 3, 5, 3
 ; P9LE-NEXT:    xxperm 2, 0, 1
-; P9LE-NEXT:    vnegw 2, 2
-; P9LE-NEXT:    vslw 2, 2, 4
-; P9LE-NEXT:    vsubuwm 2, 2, 3
+; P9LE-NEXT:    xxperm 3, 0, 1
+; P9LE-NEXT:    vnegw 3, 3
+; P9LE-NEXT:    vslw 3, 3, 4
+; P9LE-NEXT:    vsubuwm 2, 3, 2
 ; P9LE-NEXT:    xxswapd 0, 2
 ; P9LE-NEXT:    stxv 0, 0(3)
 ; P9LE-NEXT:    blr
@@ -159,21 +158,20 @@ define void @test32(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
 ; P9BE-LABEL: test32:
 ; P9BE:       # %bb.0: # %entry
 ; P9BE-NEXT:    add 5, 3, 4
-; P9BE-NEXT:    lfiwzx 0, 3, 4
+; P9BE-NEXT:    lxsiwzx 2, 3, 4
 ; P9BE-NEXT:    addis 3, 2, .LCPI1_0 at toc@ha
-; P9BE-NEXT:    xxlxor 3, 3, 3
+; P9BE-NEXT:    xxlxor 0, 0, 0
 ; P9BE-NEXT:    vspltisw 4, 8
 ; P9BE-NEXT:    addi 3, 3, .LCPI1_0 at toc@l
 ; P9BE-NEXT:    lxv 1, 0(3)
 ; P9BE-NEXT:    li 3, 4
-; P9BE-NEXT:    xxlxor 2, 2, 2
 ; P9BE-NEXT:    vadduwm 4, 4, 4
-; P9BE-NEXT:    xxperm 3, 0, 1
-; P9BE-NEXT:    lfiwzx 0, 5, 3
+; P9BE-NEXT:    lxsiwzx 3, 5, 3
 ; P9BE-NEXT:    xxperm 2, 0, 1
-; P9BE-NEXT:    vnegw 2, 2
-; P9BE-NEXT:    vslw 2, 2, 4
-; P9BE-NEXT:    vsubuwm 2, 2, 3
+; P9BE-NEXT:    xxperm 3, 0, 1
+; P9BE-NEXT:    vnegw 3, 3
+; P9BE-NEXT:    vslw 3, 3, 4
+; P9BE-NEXT:    vsubuwm 2, 3, 2
 ; P9BE-NEXT:    xxswapd 0, 2
 ; P9BE-NEXT:    stxv 0, 0(3)
 ; P9BE-NEXT:    blr
@@ -181,20 +179,19 @@ define void @test32(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
 ; P9BE-AIX-LABEL: test32:
 ; P9BE-AIX:       # %bb.0: # %entry
 ; P9BE-AIX-NEXT:    add 5, 3, 4
-; P9BE-AIX-NEXT:    lfiwzx 0, 3, 4
+; P9BE-AIX-NEXT:    lxsiwzx 2, 3, 4
 ; P9BE-AIX-NEXT:    ld 3, L..C2(2) # %const.0
-; P9BE-AIX-NEXT:    xxlxor 3, 3, 3
-; P9BE-AIX-NEXT:    xxlxor 2, 2, 2
+; P9BE-AIX-NEXT:    xxlxor 0, 0, 0
 ; P9BE-AIX-NEXT:    vspltisw 4, 8
-; P9BE-AIX-NEXT:    vadduwm 4, 4, 4
 ; P9BE-AIX-NEXT:    lxv 1, 0(3)
 ; P9BE-AIX-NEXT:    li 3, 4
-; P9BE-AIX-NEXT:    xxperm 3, 0, 1
-; P9BE-AIX-NEXT:    lfiwzx 0, 5, 3
+; P9BE-AIX-NEXT:    vadduwm 4, 4, 4
+; P9BE-AIX-NEXT:    lxsiwzx 3, 5, 3
 ; P9BE-AIX-NEXT:    xxperm 2, 0, 1
-; P9BE-AIX-NEXT:    vnegw 2, 2
-; P9BE-AIX-NEXT:    vslw 2, 2, 4
-; P9BE-AIX-NEXT:    vsubuwm 2, 2, 3
+; P9BE-AIX-NEXT:    xxperm 3, 0, 1
+; P9BE-AIX-NEXT:    vnegw 3, 3
+; P9BE-AIX-NEXT:    vslw 3, 3, 4
+; P9BE-AIX-NEXT:    vsubuwm 2, 3, 2
 ; P9BE-AIX-NEXT:    xxswapd 0, 2
 ; P9BE-AIX-NEXT:    stxv 0, 0(3)
 ; P9BE-AIX-NEXT:    blr
@@ -202,20 +199,19 @@ define void @test32(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
 ; P9BE-AIX32-LABEL: test32:
 ; P9BE-AIX32:       # %bb.0: # %entry
 ; P9BE-AIX32-NEXT:    add 5, 3, 4
-; P9BE-AIX32-NEXT:    lfiwzx 0, 3, 4
+; P9BE-AIX32-NEXT:    lxsiwzx 2, 3, 4
 ; P9BE-AIX32-NEXT:    lwz 3, L..C2(2) # %const.0
-; P9BE-AIX32-NEXT:    xxlxor 3, 3, 3
-; P9BE-AIX32-NEXT:    xxlxor 2, 2, 2
+; P9BE-AIX32-NEXT:    xxlxor 0, 0, 0
 ; P9BE-AIX32-NEXT:    vspltisw 4, 8
-; P9BE-AIX32-NEXT:    vadduwm 4, 4, 4
 ; P9BE-AIX32-NEXT:    lxv 1, 0(3)
 ; P9BE-AIX32-NEXT:    li 3, 4
-; P9BE-AIX32-NEXT:    xxperm 3, 0, 1
-; P9BE-AIX32-NEXT:    lfiwzx 0, 5, 3
+; P9BE-AIX32-NEXT:    vadduwm 4, 4, 4
+; P9BE-AIX32-NEXT:    lxsiwzx 3, 5, 3
 ; P9BE-AIX32-NEXT:    xxperm 2, 0, 1
-; P9BE-AIX32-NEXT:    vnegw 2, 2
-; P9BE-AIX32-NEXT:    vslw 2, 2, 4
-; P9BE-AIX32-NEXT:    vsubuwm 2, 2, 3
+; P9BE-AIX32-NEXT:    xxperm 3, 0, 1
+; P9BE-AIX32-NEXT:    vnegw 3, 3
+; P9BE-AIX32-NEXT:    vslw 3, 3, 4
+; P9BE-AIX32-NEXT:    vsubuwm 2, 3, 2
 ; P9BE-AIX32-NEXT:    xxswapd 0, 2
 ; P9BE-AIX32-NEXT:    stxv 0, 0(3)
 ; P9BE-AIX32-NEXT:    blr
@@ -272,25 +268,24 @@ define void @test16(ptr nocapture readonly %sums, i32 signext %delta, i32 signex
 ; P9BE-NEXT:    sldi 4, 4, 1
 ; P9BE-NEXT:    li 7, 16
 ; P9BE-NEXT:    add 6, 3, 4
-; P9BE-NEXT:    lxsihzx 0, 6, 7
+; P9BE-NEXT:    lxsihzx 1, 3, 4
+; P9BE-NEXT:    addis 3, 2, .LCPI2_1 at toc@ha
+; P9BE-NEXT:    lxsihzx 2, 6, 7
 ; P9BE-NEXT:    addis 6, 2, .LCPI2_0 at toc@ha
+; P9BE-NEXT:    addi 3, 3, .LCPI2_1 at toc@l
 ; P9BE-NEXT:    addi 6, 6, .LCPI2_0 at toc@l
-; P9BE-NEXT:    lxv 1, 0(6)
+; P9BE-NEXT:    lxv 0, 0(6)
 ; P9BE-NEXT:    li 6, 0
-; P9BE-NEXT:    mtvsrwz 2, 6
-; P9BE-NEXT:    vmr 3, 2
-; P9BE-NEXT:    vsplth 4, 2, 3
-; P9BE-NEXT:    xxperm 3, 0, 1
-; P9BE-NEXT:    lxsihzx 0, 3, 4
-; P9BE-NEXT:    addis 3, 2, .LCPI2_1 at toc@ha
-; P9BE-NEXT:    addi 3, 3, .LCPI2_1 at toc@l
-; P9BE-NEXT:    xxperm 2, 0, 1
+; P9BE-NEXT:    mtvsrwz 3, 6
+; P9BE-NEXT:    xxperm 2, 3, 0
+; P9BE-NEXT:    xxperm 1, 3, 0
+; P9BE-NEXT:    vsplth 3, 3, 3
 ; P9BE-NEXT:    lxv 0, 0(3)
 ; P9BE-NEXT:    li 3, 0
-; P9BE-NEXT:    xxmrghw 2, 4, 2
-; P9BE-NEXT:    xxperm 3, 2, 0
-; P9BE-NEXT:    xxspltw 2, 3, 1
-; P9BE-NEXT:    vadduwm 2, 3, 2
+; P9BE-NEXT:    xxmrghw 3, 3, 1
+; P9BE-NEXT:    xxperm 2, 3, 0
+; P9BE-NEXT:    xxspltw 3, 2, 1
+; P9BE-NEXT:    vadduwm 2, 2, 3
 ; P9BE-NEXT:    vextuwlx 3, 3, 2
 ; P9BE-NEXT:    cmpw 3, 5
 ; P9BE-NEXT:    bgelr+ 0
@@ -301,23 +296,22 @@ define void @test16(ptr nocapture readonly %sums, i32 signext %delta, i32 signex
 ; P9BE-AIX-NEXT:    sldi 4, 4, 1
 ; P9BE-AIX-NEXT:    li 7, 16
 ; P9BE-AIX-NEXT:    add 6, 3, 4
-; P9BE-AIX-NEXT:    lxsihzx 0, 6, 7
-; P9BE-AIX-NEXT:    ld 6, L..C3(2) # %const.0
-; P9BE-AIX-NEXT:    lxv 1, 0(6)
+; P9BE-AIX-NEXT:    lxsihzx 1, 3, 4
+; P9BE-AIX-NEXT:    ld 3, L..C3(2) # %const.1
+; P9BE-AIX-NEXT:    lxsihzx 2, 6, 7
+; P9BE-AIX-NEXT:    ld 6, L..C4(2) # %const.0
+; P9BE-AIX-NEXT:    lxv 0, 0(6)
 ; P9BE-AIX-NEXT:    li 6, 0
-; P9BE-AIX-NEXT:    mtvsrwz 2, 6
-; P9BE-AIX-NEXT:    vmr 3, 2
-; P9BE-AIX-NEXT:    vsplth 4, 2, 3
-; P9BE-AIX-NEXT:    xxperm 3, 0, 1
-; P9BE-AIX-NEXT:    lxsihzx 0, 3, 4
-; P9BE-AIX-NEXT:    ld 3, L..C4(2) # %const.1
-; P9BE-AIX-NEXT:    xxperm 2, 0, 1
+; P9BE-AIX-NEXT:    mtvsrwz 3, 6
+; P9BE-AIX-NEXT:    xxperm 2, 3, 0
+; P9BE-AIX-NEXT:    xxperm 1, 3, 0
+; P9BE-AIX-NEXT:    vsplth 3, 3, 3
 ; P9BE-AIX-NEXT:    lxv 0, 0(3)
 ; P9BE-AIX-NEXT:    li 3, 0
-; P9BE-AIX-NEXT:    xxmrghw 2, 4, 2
-; P9BE-AIX-NEXT:    xxperm 3, 2, 0
-; P9BE-AIX-NEXT:    xxspltw 2, 3, 1
-; P9BE-AIX-NEXT:    vadduwm 2, 3, 2
+; P9BE-AIX-NEXT:    xxmrghw 3, 3, 1
+; P9BE-AIX-NEXT:    xxperm 2, 3, 0
+; P9BE-AIX-NEXT:    xxspltw 3, 2, 1
+; P9BE-AIX-NEXT:    vadduwm 2, 2, 3
 ; P9BE-AIX-NEXT:    vextuwlx 3, 3, 2
 ; P9BE-AIX-NEXT:    cmpw 3, 5
 ; P9BE-AIX-NEXT:    bgelr+ 0
@@ -413,29 +407,26 @@ define void @test8(ptr nocapture readonly %sums, i32 signext %delta, i32 signext
 ; P9BE:       # %bb.0: # %entry
 ; P9BE-NEXT:    add 6, 3, 4
 ; P9BE-NEXT:    li 7, 8
+; P9BE-NEXT:    lxsibzx 3, 3, 4
+; P9BE-NEXT:    addis 3, 2, .LCPI3_1 at toc@ha
 ; P9BE-NEXT:    lxsibzx 0, 6, 7
 ; P9BE-NEXT:    addis 6, 2, .LCPI3_0 at toc@ha
+; P9BE-NEXT:    addi 3, 3, .LCPI3_1 at toc@l
 ; P9BE-NEXT:    addi 6, 6, .LCPI3_0 at toc@l
 ; P9BE-NEXT:    lxv 1, 0(6)
 ; P9BE-NEXT:    li 6, 0
 ; P9BE-NEXT:    mtvsrwz 2, 6
-; P9BE-NEXT:    vspltb 3, 2, 7
 ; P9BE-NEXT:    xxperm 0, 2, 1
-; P9BE-NEXT:    lxsibzx 1, 3, 4
-; P9BE-NEXT:    addis 3, 2, .LCPI3_1 at toc@ha
-; P9BE-NEXT:    addi 3, 3, .LCPI3_1 at toc@l
-; P9BE-NEXT:    lxv 2, 0(3)
-; P9BE-NEXT:    addis 3, 2, .LCPI3_2 at toc@ha
-; P9BE-NEXT:    addi 3, 3, .LCPI3_2 at toc@l
-; P9BE-NEXT:    xxperm 2, 1, 2
-; P9BE-NEXT:    xxspltw 1, 3, 0
-; P9BE-NEXT:    vmrghh 2, 2, 3
-; P9BE-NEXT:    xxmrghw 2, 2, 0
+; P9BE-NEXT:    xxperm 3, 2, 1
+; P9BE-NEXT:    vspltb 2, 2, 7
+; P9BE-NEXT:    vmrghh 3, 3, 2
+; P9BE-NEXT:    xxspltw 1, 2, 0
+; P9BE-NEXT:    xxmrghw 3, 3, 0
 ; P9BE-NEXT:    lxv 0, 0(3)
 ; P9BE-NEXT:    li 3, 0
-; P9BE-NEXT:    xxperm 2, 1, 0
-; P9BE-NEXT:    xxspltw 3, 2, 1
-; P9BE-NEXT:    vadduwm 2, 2, 3
+; P9BE-NEXT:    xxperm 3, 1, 0
+; P9BE-NEXT:    xxspltw 2, 3, 1
+; P9BE-NEXT:    vadduwm 2, 3, 2
 ; P9BE-NEXT:    vextuwlx 3, 3, 2
 ; P9BE-NEXT:    cmpw 3, 5
 ; P9BE-NEXT:    bgelr+ 0
@@ -445,26 +436,24 @@ define void @test8(ptr nocapture readonly %sums, i32 signext %delta, i32 signext
 ; P9BE-AIX:       # %bb.0: # %entry
 ; P9BE-AIX-NEXT:    add 6, 3, 4
 ; P9BE-AIX-NEXT:    li 7, 8
+; P9BE-AIX-NEXT:    lxsibzx 3, 3, 4
+; P9BE-AIX-NEXT:    ld 3, L..C5(2) # %const.1
 ; P9BE-AIX-NEXT:    lxsibzx 0, 6, 7
-; P9BE-AIX-NEXT:    ld 6, L..C5(2) # %const.0
+; P9BE-AIX-NEXT:    ld 6, L..C6(2) # %const.0
 ; P9BE-AIX-NEXT:    lxv 1, 0(6)
 ; P9BE-AIX-NEXT:    li 6, 0
 ; P9BE-AIX-NEXT:    mtvsrwz 2, 6
-; P9BE-AIX-NEXT:    vspltb 3, 2, 7
 ; P9BE-AIX-NEXT:    xxperm 0, 2, 1
-; P9BE-AIX-NEXT:    lxsibzx 1, 3, 4
-; P9BE-AIX-NEXT:    ld 3, L..C6(2) # %const.1
-; P9BE-AIX-NEXT:    lxv 2, 0(3)
-; P9BE-AIX-NEXT:    ld 3, L..C7(2) # %const.2
-; P9BE-AIX-NEXT:    xxperm 2, 1, 2
-; P9BE-AIX-NEXT:    xxspltw 1, 3, 0
-; P9BE-AIX-NEXT:    vmrghh 2, 2, 3
-; P9BE-AIX-NEXT:    xxmrghw 2, 2, 0
+; P9BE-AIX-NEXT:    xxperm 3, 2, 1
+; P9BE-AIX-NEXT:    vspltb 2, 2, 7
+; P9BE-AIX-NEXT:    vmrghh 3, 3, 2
+; P9BE-AIX-NEXT:    xxspltw 1, 2, 0
+; P9BE-AIX-NEXT:    xxmrghw 3, 3, 0
 ; P9BE-AIX-NEXT:    lxv 0, 0(3)
 ; P9BE-AIX-NEXT:    li 3, 0
-; P9BE-AIX-NEXT:    xxperm 2, 1, 0
-; P9BE-AIX-NEXT:    xxspltw 3, 2, 1
-; P9BE-AIX-NEXT:    vadduwm 2, 2, 3
+; P9BE-AIX-NEXT:    xxperm 3, 1, 0
+; P9BE-AIX-NEXT:    xxspltw 2, 3, 1
+; P9BE-AIX-NEXT:    vadduwm 2, 3, 2
 ; P9BE-AIX-NEXT:    vextuwlx 3, 3, 2
 ; P9BE-AIX-NEXT:    cmpw 3, 5
 ; P9BE-AIX-NEXT:    bgelr+ 0
@@ -474,25 +463,23 @@ define void @test8(ptr nocapture readonly %sums, i32 signext %delta, i32 signext
 ; P9BE-AIX32:       # %bb.0: # %entry
 ; P9BE-AIX32-NEXT:    add 6, 3, 4
 ; P9BE-AIX32-NEXT:    li 7, 8
+; P9BE-AIX32-NEXT:    lxsibzx 3, 3, 4
+; P9BE-AIX32-NEXT:    lwz 3, L..C4(2) # %const.1
 ; P9BE-AIX32-NEXT:    lxsibzx 0, 6, 7
-; P9BE-AIX32-NEXT:    lwz 6, L..C4(2) # %const.0
+; P9BE-AIX32-NEXT:    lwz 6, L..C5(2) # %const.0
 ; P9BE-AIX32-NEXT:    lxv 1, 0(6)
 ; P9BE-AIX32-NEXT:    li 6, 0
 ; P9BE-AIX32-NEXT:    mtvsrwz 2, 6
-; P9BE-AIX32-NEXT:    vspltb 3, 2, 7
 ; P9BE-AIX32-NEXT:    xxperm 0, 2, 1
-; P9BE-AIX32-NEXT:    lxsibzx 1, 3, 4
-; P9BE-AIX32-NEXT:    lwz 3, L..C5(2) # %const.1
-; P9BE-AIX32-NEXT:    lxv 2, 0(3)
-; P9BE-AIX32-NEXT:    lwz 3, L..C6(2) # %const.2
-; P9BE-AIX32-NEXT:    xxperm 2, 1, 2
-; P9BE-AIX32-NEXT:    xxspltw 1, 3, 0
-; P9BE-AIX32-NEXT:    vmrghh 2, 2, 3
-; P9BE-AIX32-NEXT:    xxmrghw 2, 2, 0
+; P9BE-AIX32-NEXT:    xxperm 3, 2, 1
+; P9BE-AIX32-NEXT:    vspltb 2, 2, 7
+; P9BE-AIX32-NEXT:    vmrghh 3, 3, 2
+; P9BE-AIX32-NEXT:    xxspltw 1, 2, 0
+; P9BE-AIX32-NEXT:    xxmrghw 3, 3, 0
 ; P9BE-AIX32-NEXT:    lxv 0, 0(3)
-; P9BE-AIX32-NEXT:    xxperm 2, 1, 0
-; P9BE-AIX32-NEXT:    xxspltw 3, 2, 1
-; P9BE-AIX32-NEXT:    vadduwm 2, 2, 3
+; P9BE-AIX32-NEXT:    xxperm 3, 1, 0
+; P9BE-AIX32-NEXT:    xxspltw 2, 3, 1
+; P9BE-AIX32-NEXT:    vadduwm 2, 3, 2
 ; P9BE-AIX32-NEXT:    stxv 2, -16(1)
 ; P9BE-AIX32-NEXT:    lwz 3, -16(1)
 ; P9BE-AIX32-NEXT:    cmpw 3, 5

diff  --git a/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll
index b90b9aed44e73..500e031fa600a 100644
--- a/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll
+++ b/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll
@@ -195,9 +195,9 @@ define <16 x i8> @test_none_v16i8(i8 %arg, ptr nocapture noundef readonly %b) {
 ;
 ; CHECK-LE-P9-LABEL: test_none_v16i8:
 ; CHECK-LE-P9:       # %bb.0: # %entry
-; CHECK-LE-P9-NEXT:    mtfprd f0, r3
+; CHECK-LE-P9-NEXT:    mtvsrd v2, r3
 ; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
-; CHECK-LE-P9-NEXT:    lxv v2, 0(r4)
+; CHECK-LE-P9-NEXT:    lxv vs0, 0(r4)
 ; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
 ; CHECK-LE-P9-NEXT:    lxv vs1, 0(r3)
 ; CHECK-LE-P9-NEXT:    xxperm v2, vs0, vs1
@@ -215,9 +215,9 @@ define <16 x i8> @test_none_v16i8(i8 %arg, ptr nocapture noundef readonly %b) {
 ;
 ; CHECK-BE-P9-LABEL: test_none_v16i8:
 ; CHECK-BE-P9:       # %bb.0: # %entry
-; CHECK-BE-P9-NEXT:    mtfprwz f0, r3
+; CHECK-BE-P9-NEXT:    mtvsrwz v2, r3
 ; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
-; CHECK-BE-P9-NEXT:    lxv v2, 0(r4)
+; CHECK-BE-P9-NEXT:    lxv vs0, 0(r4)
 ; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
 ; CHECK-BE-P9-NEXT:    lxv vs1, 0(r3)
 ; CHECK-BE-P9-NEXT:    xxperm v2, vs0, vs1
@@ -234,9 +234,9 @@ define <16 x i8> @test_none_v16i8(i8 %arg, ptr nocapture noundef readonly %b) {
 ;
 ; CHECK-AIX-64-P9-LABEL: test_none_v16i8:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r3
+; CHECK-AIX-64-P9-NEXT:    mtvsrwz v2, r3
 ; CHECK-AIX-64-P9-NEXT:    ld r3, L..C1(r2) # %const.0
-; CHECK-AIX-64-P9-NEXT:    lxv v2, 0(r4)
+; CHECK-AIX-64-P9-NEXT:    lxv vs0, 0(r4)
 ; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
 ; CHECK-AIX-64-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-AIX-64-P9-NEXT:    blr
@@ -443,9 +443,9 @@ define <16 x i8> @test_none_v8i16(i16 %arg, ptr nocapture noundef readonly %b) {
 ;
 ; CHECK-LE-P9-LABEL: test_none_v8i16:
 ; CHECK-LE-P9:       # %bb.0: # %entry
-; CHECK-LE-P9-NEXT:    mtfprd f0, r3
+; CHECK-LE-P9-NEXT:    mtvsrd v2, r3
 ; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI5_0 at toc@ha
-; CHECK-LE-P9-NEXT:    lxv v2, 0(r4)
+; CHECK-LE-P9-NEXT:    lxv vs0, 0(r4)
 ; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI5_0 at toc@l
 ; CHECK-LE-P9-NEXT:    lxv vs1, 0(r3)
 ; CHECK-LE-P9-NEXT:    xxperm v2, vs0, vs1
@@ -463,9 +463,9 @@ define <16 x i8> @test_none_v8i16(i16 %arg, ptr nocapture noundef readonly %b) {
 ;
 ; CHECK-BE-P9-LABEL: test_none_v8i16:
 ; CHECK-BE-P9:       # %bb.0: # %entry
-; CHECK-BE-P9-NEXT:    mtfprwz f0, r3
+; CHECK-BE-P9-NEXT:    mtvsrwz v2, r3
 ; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI5_0 at toc@ha
-; CHECK-BE-P9-NEXT:    lxv v2, 0(r4)
+; CHECK-BE-P9-NEXT:    lxv vs0, 0(r4)
 ; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI5_0 at toc@l
 ; CHECK-BE-P9-NEXT:    lxv vs1, 0(r3)
 ; CHECK-BE-P9-NEXT:    xxperm v2, vs0, vs1
@@ -482,9 +482,9 @@ define <16 x i8> @test_none_v8i16(i16 %arg, ptr nocapture noundef readonly %b) {
 ;
 ; CHECK-AIX-64-P9-LABEL: test_none_v8i16:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r3
+; CHECK-AIX-64-P9-NEXT:    mtvsrwz v2, r3
 ; CHECK-AIX-64-P9-NEXT:    ld r3, L..C2(r2) # %const.0
-; CHECK-AIX-64-P9-NEXT:    lxv v2, 0(r4)
+; CHECK-AIX-64-P9-NEXT:    lxv vs0, 0(r4)
 ; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
 ; CHECK-AIX-64-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-AIX-64-P9-NEXT:    blr

diff  --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
index 25fcd227be687..b94dac42e1fff 100644
--- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
@@ -88,12 +88,12 @@ define <3 x float> @constrained_vector_fdiv_v3f32(<3 x float> %x, <3 x float> %y
 ; PC64LE9-NEXT:    xxsldwi 2, 35, 35, 3
 ; PC64LE9-NEXT:    xscvspdpn 2, 2
 ; PC64LE9-NEXT:    xsdivsp 2, 3, 2
-; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xscvdpspn 34, 0
+; PC64LE9-NEXT:    lxv 0, 0(3)
 ; PC64LE9-NEXT:    xscvdpspn 1, 1
 ; PC64LE9-NEXT:    xscvdpspn 2, 2
-; PC64LE9-NEXT:    xxmrghw 34, 1, 2
-; PC64LE9-NEXT:    lxv 1, 0(3)
-; PC64LE9-NEXT:    xxperm 34, 0, 1
+; PC64LE9-NEXT:    xxmrghw 35, 1, 2
+; PC64LE9-NEXT:    xxperm 34, 35, 0
 ; PC64LE9-NEXT:    blr
 entry:
   %div = call <3 x float> @llvm.experimental.constrained.fdiv.v3f32(
@@ -362,15 +362,15 @@ define <3 x float> @constrained_vector_frem_v3f32(<3 x float> %x, <3 x float> %y
 ; PC64LE9-NEXT:    xscvdpspn 0, 1
 ; PC64LE9-NEXT:    xscvdpspn 1, 30
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI7_0 at toc@ha
+; PC64LE9-NEXT:    xscvdpspn 34, 31
 ; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lxv 62, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    addi 3, 3, .LCPI7_0 at toc@l
-; PC64LE9-NEXT:    xxmrghw 34, 1, 0
-; PC64LE9-NEXT:    xscvdpspn 0, 31
-; PC64LE9-NEXT:    lxv 1, 0(3)
 ; PC64LE9-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    xxperm 34, 0, 1
+; PC64LE9-NEXT:    addi 3, 3, .LCPI7_0 at toc@l
+; PC64LE9-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    xxmrghw 35, 1, 0
+; PC64LE9-NEXT:    lxv 0, 0(3)
+; PC64LE9-NEXT:    xxperm 34, 35, 0
 ; PC64LE9-NEXT:    addi 1, 1, 80
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
@@ -683,14 +683,14 @@ define <3 x float> @constrained_vector_fmul_v3f32(<3 x float> %x, <3 x float> %y
 ; PC64LE9-NEXT:    xscvspdpn 1, 1
 ; PC64LE9-NEXT:    xsmulsp 1, 2, 1
 ; PC64LE9-NEXT:    xxsldwi 2, 35, 35, 3
-; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xscvdpspn 34, 0
+; PC64LE9-NEXT:    lxv 0, 0(3)
 ; PC64LE9-NEXT:    xscvspdpn 2, 2
 ; PC64LE9-NEXT:    xsmulsp 2, 3, 2
 ; PC64LE9-NEXT:    xscvdpspn 1, 1
 ; PC64LE9-NEXT:    xscvdpspn 2, 2
-; PC64LE9-NEXT:    xxmrghw 34, 1, 2
-; PC64LE9-NEXT:    lxv 1, 0(3)
-; PC64LE9-NEXT:    xxperm 34, 0, 1
+; PC64LE9-NEXT:    xxmrghw 35, 1, 2
+; PC64LE9-NEXT:    xxperm 34, 35, 0
 ; PC64LE9-NEXT:    blr
 entry:
   %mul = call <3 x float> @llvm.experimental.constrained.fmul.v3f32(
@@ -845,14 +845,14 @@ define <3 x float> @constrained_vector_fadd_v3f32(<3 x float> %x, <3 x float> %y
 ; PC64LE9-NEXT:    xscvspdpn 1, 1
 ; PC64LE9-NEXT:    xsaddsp 1, 2, 1
 ; PC64LE9-NEXT:    xxsldwi 2, 35, 35, 3
-; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xscvdpspn 34, 0
+; PC64LE9-NEXT:    lxv 0, 0(3)
 ; PC64LE9-NEXT:    xscvspdpn 2, 2
 ; PC64LE9-NEXT:    xsaddsp 2, 3, 2
 ; PC64LE9-NEXT:    xscvdpspn 1, 1
 ; PC64LE9-NEXT:    xscvdpspn 2, 2
-; PC64LE9-NEXT:    xxmrghw 34, 1, 2
-; PC64LE9-NEXT:    lxv 1, 0(3)
-; PC64LE9-NEXT:    xxperm 34, 0, 1
+; PC64LE9-NEXT:    xxmrghw 35, 1, 2
+; PC64LE9-NEXT:    xxperm 34, 35, 0
 ; PC64LE9-NEXT:    blr
 entry:
   %add = call <3 x float> @llvm.experimental.constrained.fadd.v3f32(
@@ -1007,14 +1007,14 @@ define <3 x float> @constrained_vector_fsub_v3f32(<3 x float> %x, <3 x float> %y
 ; PC64LE9-NEXT:    xscvspdpn 1, 1
 ; PC64LE9-NEXT:    xssubsp 1, 2, 1
 ; PC64LE9-NEXT:    xxsldwi 2, 35, 35, 3
-; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xscvdpspn 34, 0
+; PC64LE9-NEXT:    lxv 0, 0(3)
 ; PC64LE9-NEXT:    xscvspdpn 2, 2
 ; PC64LE9-NEXT:    xssubsp 2, 3, 2
 ; PC64LE9-NEXT:    xscvdpspn 1, 1
 ; PC64LE9-NEXT:    xscvdpspn 2, 2
-; PC64LE9-NEXT:    xxmrghw 34, 1, 2
-; PC64LE9-NEXT:    lxv 1, 0(3)
-; PC64LE9-NEXT:    xxperm 34, 0, 1
+; PC64LE9-NEXT:    xxmrghw 35, 1, 2
+; PC64LE9-NEXT:    xxperm 34, 35, 0
 ; PC64LE9-NEXT:    blr
 entry:
   %sub = call <3 x float> @llvm.experimental.constrained.fsub.v3f32(
@@ -1159,10 +1159,10 @@ define <3 x float> @constrained_vector_sqrt_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-NEXT:    xssqrtsp 0, 0
 ; PC64LE9-NEXT:    xscvdpspn 2, 2
 ; PC64LE9-NEXT:    xscvdpspn 1, 1
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxmrghw 34, 1, 2
+; PC64LE9-NEXT:    xscvdpspn 34, 0
+; PC64LE9-NEXT:    xxmrghw 35, 1, 2
 ; PC64LE9-NEXT:    lxv 1, 0(3)
-; PC64LE9-NEXT:    xxperm 34, 0, 1
+; PC64LE9-NEXT:    xxperm 34, 35, 1
 ; PC64LE9-NEXT:    blr
 entry:
   %sqrt = call <3 x float> @llvm.experimental.constrained.sqrt.v3f32(
@@ -1422,15 +1422,15 @@ define <3 x float> @constrained_vector_pow_v3f32(<3 x float> %x, <3 x float> %y)
 ; PC64LE9-NEXT:    xscvdpspn 0, 1
 ; PC64LE9-NEXT:    xscvdpspn 1, 30
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI32_0 at toc@ha
+; PC64LE9-NEXT:    xscvdpspn 34, 31
 ; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lxv 62, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    addi 3, 3, .LCPI32_0 at toc@l
-; PC64LE9-NEXT:    xxmrghw 34, 1, 0
-; PC64LE9-NEXT:    xscvdpspn 0, 31
-; PC64LE9-NEXT:    lxv 1, 0(3)
 ; PC64LE9-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    xxperm 34, 0, 1
+; PC64LE9-NEXT:    addi 3, 3, .LCPI32_0 at toc@l
+; PC64LE9-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    xxmrghw 35, 1, 0
+; PC64LE9-NEXT:    lxv 0, 0(3)
+; PC64LE9-NEXT:    xxperm 34, 35, 0
 ; PC64LE9-NEXT:    addi 1, 1, 80
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
@@ -1850,15 +1850,15 @@ define <3 x float> @constrained_vector_powi_v3f32(<3 x float> %x, i32 %y) #0 {
 ; PC64LE9-NEXT:    xscvdpspn 0, 1
 ; PC64LE9-NEXT:    xscvdpspn 1, 30
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI37_0 at toc@ha
+; PC64LE9-NEXT:    xscvdpspn 34, 31
 ; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    ld 30, 48(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI37_0 at toc@l
-; PC64LE9-NEXT:    xxmrghw 34, 1, 0
-; PC64LE9-NEXT:    xscvdpspn 0, 31
-; PC64LE9-NEXT:    lxv 1, 0(3)
-; PC64LE9-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    xxperm 34, 0, 1
+; PC64LE9-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    xxmrghw 35, 1, 0
+; PC64LE9-NEXT:    lxv 0, 0(3)
+; PC64LE9-NEXT:    xxperm 34, 35, 0
 ; PC64LE9-NEXT:    addi 1, 1, 80
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
@@ -2231,14 +2231,14 @@ define <3 x float> @constrained_vector_sin_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-NEXT:    xscvdpspn 0, 1
 ; PC64LE9-NEXT:    xscvdpspn 1, 30
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI42_0 at toc@ha
+; PC64LE9-NEXT:    xscvdpspn 34, 31
 ; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    lfd 30, 48(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI42_0 at toc@l
-; PC64LE9-NEXT:    xxmrghw 34, 1, 0
-; PC64LE9-NEXT:    xscvdpspn 1, 31
+; PC64LE9-NEXT:    lfd 30, 48(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    xxmrghw 35, 1, 0
 ; PC64LE9-NEXT:    lxv 0, 0(3)
-; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    xxperm 34, 1, 0
+; PC64LE9-NEXT:    xxperm 34, 35, 0
 ; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
@@ -2582,14 +2582,14 @@ define <3 x float> @constrained_vector_cos_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-NEXT:    xscvdpspn 0, 1
 ; PC64LE9-NEXT:    xscvdpspn 1, 30
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI47_0 at toc@ha
+; PC64LE9-NEXT:    xscvdpspn 34, 31
 ; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    lfd 30, 48(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI47_0 at toc@l
-; PC64LE9-NEXT:    xxmrghw 34, 1, 0
-; PC64LE9-NEXT:    xscvdpspn 1, 31
+; PC64LE9-NEXT:    lfd 30, 48(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    xxmrghw 35, 1, 0
 ; PC64LE9-NEXT:    lxv 0, 0(3)
-; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    xxperm 34, 1, 0
+; PC64LE9-NEXT:    xxperm 34, 35, 0
 ; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
@@ -2933,14 +2933,14 @@ define <3 x float> @constrained_vector_exp_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-NEXT:    xscvdpspn 0, 1
 ; PC64LE9-NEXT:    xscvdpspn 1, 30
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI52_0 at toc@ha
+; PC64LE9-NEXT:    xscvdpspn 34, 31
 ; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    lfd 30, 48(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI52_0 at toc@l
-; PC64LE9-NEXT:    xxmrghw 34, 1, 0
-; PC64LE9-NEXT:    xscvdpspn 1, 31
+; PC64LE9-NEXT:    lfd 30, 48(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    xxmrghw 35, 1, 0
 ; PC64LE9-NEXT:    lxv 0, 0(3)
-; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    xxperm 34, 1, 0
+; PC64LE9-NEXT:    xxperm 34, 35, 0
 ; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
@@ -3284,14 +3284,14 @@ define <3 x float> @constrained_vector_exp2_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-NEXT:    xscvdpspn 0, 1
 ; PC64LE9-NEXT:    xscvdpspn 1, 30
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI57_0 at toc@ha
+; PC64LE9-NEXT:    xscvdpspn 34, 31
 ; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    lfd 30, 48(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI57_0 at toc@l
-; PC64LE9-NEXT:    xxmrghw 34, 1, 0
-; PC64LE9-NEXT:    xscvdpspn 1, 31
+; PC64LE9-NEXT:    lfd 30, 48(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    xxmrghw 35, 1, 0
 ; PC64LE9-NEXT:    lxv 0, 0(3)
-; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    xxperm 34, 1, 0
+; PC64LE9-NEXT:    xxperm 34, 35, 0
 ; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
@@ -3635,14 +3635,14 @@ define <3 x float> @constrained_vector_log_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-NEXT:    xscvdpspn 0, 1
 ; PC64LE9-NEXT:    xscvdpspn 1, 30
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI62_0 at toc@ha
+; PC64LE9-NEXT:    xscvdpspn 34, 31
 ; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    lfd 30, 48(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI62_0 at toc@l
-; PC64LE9-NEXT:    xxmrghw 34, 1, 0
-; PC64LE9-NEXT:    xscvdpspn 1, 31
+; PC64LE9-NEXT:    lfd 30, 48(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    xxmrghw 35, 1, 0
 ; PC64LE9-NEXT:    lxv 0, 0(3)
-; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    xxperm 34, 1, 0
+; PC64LE9-NEXT:    xxperm 34, 35, 0
 ; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
@@ -3986,14 +3986,14 @@ define <3 x float> @constrained_vector_log10_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-NEXT:    xscvdpspn 0, 1
 ; PC64LE9-NEXT:    xscvdpspn 1, 30
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI67_0 at toc@ha
+; PC64LE9-NEXT:    xscvdpspn 34, 31
 ; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    lfd 30, 48(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI67_0 at toc@l
-; PC64LE9-NEXT:    xxmrghw 34, 1, 0
-; PC64LE9-NEXT:    xscvdpspn 1, 31
+; PC64LE9-NEXT:    lfd 30, 48(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    xxmrghw 35, 1, 0
 ; PC64LE9-NEXT:    lxv 0, 0(3)
-; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    xxperm 34, 1, 0
+; PC64LE9-NEXT:    xxperm 34, 35, 0
 ; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
@@ -4337,14 +4337,14 @@ define <3 x float> @constrained_vector_log2_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-NEXT:    xscvdpspn 0, 1
 ; PC64LE9-NEXT:    xscvdpspn 1, 30
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI72_0 at toc@ha
+; PC64LE9-NEXT:    xscvdpspn 34, 31
 ; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    lfd 30, 48(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI72_0 at toc@l
-; PC64LE9-NEXT:    xxmrghw 34, 1, 0
-; PC64LE9-NEXT:    xscvdpspn 1, 31
+; PC64LE9-NEXT:    lfd 30, 48(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    xxmrghw 35, 1, 0
 ; PC64LE9-NEXT:    lxv 0, 0(3)
-; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    xxperm 34, 1, 0
+; PC64LE9-NEXT:    xxperm 34, 35, 0
 ; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
@@ -4599,10 +4599,10 @@ define <3 x float> @constrained_vector_rint_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-NEXT:    xsrdpic 0, 0
 ; PC64LE9-NEXT:    xscvdpspn 2, 2
 ; PC64LE9-NEXT:    xscvdpspn 1, 1
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxmrghw 34, 1, 2
+; PC64LE9-NEXT:    xscvdpspn 34, 0
+; PC64LE9-NEXT:    xxmrghw 35, 1, 2
 ; PC64LE9-NEXT:    lxv 1, 0(3)
-; PC64LE9-NEXT:    xxperm 34, 0, 1
+; PC64LE9-NEXT:    xxperm 34, 35, 1
 ; PC64LE9-NEXT:    blr
  entry:
   %rint = call <3 x float> @llvm.experimental.constrained.rint.v3f32(
@@ -4827,14 +4827,14 @@ define <3 x float> @constrained_vector_nearbyint_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-NEXT:    xscvdpspn 0, 1
 ; PC64LE9-NEXT:    xscvdpspn 1, 30
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI82_0 at toc@ha
+; PC64LE9-NEXT:    xscvdpspn 34, 31
 ; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    lfd 30, 48(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI82_0 at toc@l
-; PC64LE9-NEXT:    xxmrghw 34, 1, 0
-; PC64LE9-NEXT:    xscvdpspn 1, 31
+; PC64LE9-NEXT:    lfd 30, 48(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    xxmrghw 35, 1, 0
 ; PC64LE9-NEXT:    lxv 0, 0(3)
-; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    xxperm 34, 1, 0
+; PC64LE9-NEXT:    xxperm 34, 35, 0
 ; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
@@ -5152,15 +5152,15 @@ define <3 x float> @constrained_vector_maxnum_v3f32(<3 x float> %x, <3 x float>
 ; PC64LE9-NEXT:    xscvdpspn 0, 1
 ; PC64LE9-NEXT:    xscvdpspn 1, 30
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI87_0 at toc@ha
+; PC64LE9-NEXT:    xscvdpspn 34, 31
 ; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lxv 62, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    addi 3, 3, .LCPI87_0 at toc@l
-; PC64LE9-NEXT:    xxmrghw 34, 1, 0
-; PC64LE9-NEXT:    xscvdpspn 0, 31
-; PC64LE9-NEXT:    lxv 1, 0(3)
 ; PC64LE9-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    xxperm 34, 0, 1
+; PC64LE9-NEXT:    addi 3, 3, .LCPI87_0 at toc@l
+; PC64LE9-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    xxmrghw 35, 1, 0
+; PC64LE9-NEXT:    lxv 0, 0(3)
+; PC64LE9-NEXT:    xxperm 34, 35, 0
 ; PC64LE9-NEXT:    addi 1, 1, 80
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
@@ -5394,15 +5394,15 @@ define <3 x float> @constrained_vector_minnum_v3f32(<3 x float> %x, <3 x float>
 ; PC64LE9-NEXT:    xscvdpspn 0, 1
 ; PC64LE9-NEXT:    xscvdpspn 1, 30
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI92_0 at toc@ha
+; PC64LE9-NEXT:    xscvdpspn 34, 31
 ; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lxv 62, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    addi 3, 3, .LCPI92_0 at toc@l
-; PC64LE9-NEXT:    xxmrghw 34, 1, 0
-; PC64LE9-NEXT:    xscvdpspn 0, 31
-; PC64LE9-NEXT:    lxv 1, 0(3)
 ; PC64LE9-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    xxperm 34, 0, 1
+; PC64LE9-NEXT:    addi 3, 3, .LCPI92_0 at toc@l
+; PC64LE9-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    xxmrghw 35, 1, 0
+; PC64LE9-NEXT:    lxv 0, 0(3)
+; PC64LE9-NEXT:    xxperm 34, 35, 0
 ; PC64LE9-NEXT:    addi 1, 1, 80
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
@@ -5587,23 +5587,22 @@ define <3 x i32> @constrained_vector_fptosi_v3i32_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-NEXT:    xxswapd 1, 34
 ; PC64LE9-NEXT:    xscvspdpn 0, 0
 ; PC64LE9-NEXT:    xscvspdpn 1, 1
-; PC64LE9-NEXT:    xxsldwi 2, 34, 34, 1
-; PC64LE9-NEXT:    xscvspdpn 2, 2
 ; PC64LE9-NEXT:    xscvdpsxws 0, 0
 ; PC64LE9-NEXT:    xscvdpsxws 1, 1
-; PC64LE9-NEXT:    xscvdpsxws 2, 2
 ; PC64LE9-NEXT:    mffprwz 3, 0
 ; PC64LE9-NEXT:    mtfprwz 0, 3
 ; PC64LE9-NEXT:    mffprwz 3, 1
 ; PC64LE9-NEXT:    mtfprwz 1, 3
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI97_0 at toc@ha
+; PC64LE9-NEXT:    xxmrghw 35, 1, 0
+; PC64LE9-NEXT:    xxsldwi 1, 34, 34, 1
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI97_0 at toc@l
-; PC64LE9-NEXT:    xxmrghw 0, 1, 0
-; PC64LE9-NEXT:    lxv 1, 0(3)
-; PC64LE9-NEXT:    mffprwz 3, 2
-; PC64LE9-NEXT:    mtfprwz 2, 3
-; PC64LE9-NEXT:    xxperm 0, 2, 1
-; PC64LE9-NEXT:    xxlor 34, 0, 0
+; PC64LE9-NEXT:    lxv 0, 0(3)
+; PC64LE9-NEXT:    xscvspdpn 1, 1
+; PC64LE9-NEXT:    xscvdpsxws 1, 1
+; PC64LE9-NEXT:    mffprwz 3, 1
+; PC64LE9-NEXT:    mtvsrwz 34, 3
+; PC64LE9-NEXT:    xxperm 34, 35, 0
 ; PC64LE9-NEXT:    blr
 entry:
   %result = call <3 x i32> @llvm.experimental.constrained.fptosi.v3i32.v3f32(
@@ -5856,13 +5855,13 @@ define <3 x i32> @constrained_vector_fptosi_v3i32_v3f64(<3 x double> %x) #0 {
 ; PC64LE9-NEXT:    mffprwz 3, 1
 ; PC64LE9-NEXT:    mtfprwz 1, 3
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI105_0 at toc@ha
-; PC64LE9-NEXT:    xxmrghw 34, 1, 0
+; PC64LE9-NEXT:    xxmrghw 35, 1, 0
 ; PC64LE9-NEXT:    xscvdpsxws 1, 3
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI105_0 at toc@l
 ; PC64LE9-NEXT:    lxv 0, 0(3)
 ; PC64LE9-NEXT:    mffprwz 3, 1
-; PC64LE9-NEXT:    mtfprwz 1, 3
-; PC64LE9-NEXT:    xxperm 34, 1, 0
+; PC64LE9-NEXT:    mtvsrwz 34, 3
+; PC64LE9-NEXT:    xxperm 34, 35, 0
 ; PC64LE9-NEXT:    blr
 entry:
   %result = call <3 x i32> @llvm.experimental.constrained.fptosi.v3i32.v3f64(
@@ -6084,23 +6083,22 @@ define <3 x i32> @constrained_vector_fptoui_v3i32_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-NEXT:    xxswapd 1, 34
 ; PC64LE9-NEXT:    xscvspdpn 0, 0
 ; PC64LE9-NEXT:    xscvspdpn 1, 1
-; PC64LE9-NEXT:    xxsldwi 2, 34, 34, 1
-; PC64LE9-NEXT:    xscvspdpn 2, 2
 ; PC64LE9-NEXT:    xscvdpuxws 0, 0
 ; PC64LE9-NEXT:    xscvdpuxws 1, 1
-; PC64LE9-NEXT:    xscvdpuxws 2, 2
 ; PC64LE9-NEXT:    mffprwz 3, 0
 ; PC64LE9-NEXT:    mtfprwz 0, 3
 ; PC64LE9-NEXT:    mffprwz 3, 1
 ; PC64LE9-NEXT:    mtfprwz 1, 3
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI113_0 at toc@ha
+; PC64LE9-NEXT:    xxmrghw 35, 1, 0
+; PC64LE9-NEXT:    xxsldwi 1, 34, 34, 1
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI113_0 at toc@l
-; PC64LE9-NEXT:    xxmrghw 0, 1, 0
-; PC64LE9-NEXT:    lxv 1, 0(3)
-; PC64LE9-NEXT:    mffprwz 3, 2
-; PC64LE9-NEXT:    mtfprwz 2, 3
-; PC64LE9-NEXT:    xxperm 0, 2, 1
-; PC64LE9-NEXT:    xxlor 34, 0, 0
+; PC64LE9-NEXT:    lxv 0, 0(3)
+; PC64LE9-NEXT:    xscvspdpn 1, 1
+; PC64LE9-NEXT:    xscvdpuxws 1, 1
+; PC64LE9-NEXT:    mffprwz 3, 1
+; PC64LE9-NEXT:    mtvsrwz 34, 3
+; PC64LE9-NEXT:    xxperm 34, 35, 0
 ; PC64LE9-NEXT:    blr
 entry:
   %result = call <3 x i32> @llvm.experimental.constrained.fptoui.v3i32.v3f32(
@@ -6352,13 +6350,13 @@ define <3 x i32> @constrained_vector_fptoui_v3i32_v3f64(<3 x double> %x) #0 {
 ; PC64LE9-NEXT:    mffprwz 3, 1
 ; PC64LE9-NEXT:    mtfprwz 1, 3
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI121_0 at toc@ha
-; PC64LE9-NEXT:    xxmrghw 34, 1, 0
+; PC64LE9-NEXT:    xxmrghw 35, 1, 0
 ; PC64LE9-NEXT:    xscvdpuxws 1, 3
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI121_0 at toc@l
 ; PC64LE9-NEXT:    lxv 0, 0(3)
 ; PC64LE9-NEXT:    mffprwz 3, 1
-; PC64LE9-NEXT:    mtfprwz 1, 3
-; PC64LE9-NEXT:    xxperm 34, 1, 0
+; PC64LE9-NEXT:    mtvsrwz 34, 3
+; PC64LE9-NEXT:    xxperm 34, 35, 0
 ; PC64LE9-NEXT:    blr
 entry:
   %result = call <3 x i32> @llvm.experimental.constrained.fptoui.v3i32.v3f64(
@@ -6563,11 +6561,11 @@ define <3 x float> @constrained_vector_fptrunc_v3f64(<3 x double> %x) #0 {
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI129_0 at toc@l
 ; PC64LE9-NEXT:    xscvdpspn 0, 0
 ; PC64LE9-NEXT:    xscvdpspn 1, 1
-; PC64LE9-NEXT:    xxmrghw 34, 1, 0
+; PC64LE9-NEXT:    xxmrghw 35, 1, 0
 ; PC64LE9-NEXT:    xsrsp 1, 3
 ; PC64LE9-NEXT:    lxv 0, 0(3)
-; PC64LE9-NEXT:    xscvdpspn 1, 1
-; PC64LE9-NEXT:    xxperm 34, 1, 0
+; PC64LE9-NEXT:    xscvdpspn 34, 1
+; PC64LE9-NEXT:    xxperm 34, 35, 0
 ; PC64LE9-NEXT:    blr
 entry:
   %result = call <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(
@@ -6776,10 +6774,10 @@ define <3 x float> @constrained_vector_ceil_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-NEXT:    xsrdpip 0, 0
 ; PC64LE9-NEXT:    xscvdpspn 2, 2
 ; PC64LE9-NEXT:    xscvdpspn 1, 1
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxmrghw 34, 1, 2
+; PC64LE9-NEXT:    xscvdpspn 34, 0
+; PC64LE9-NEXT:    xxmrghw 35, 1, 2
 ; PC64LE9-NEXT:    lxv 1, 0(3)
-; PC64LE9-NEXT:    xxperm 34, 0, 1
+; PC64LE9-NEXT:    xxperm 34, 35, 1
 ; PC64LE9-NEXT:    blr
 entry:
   %ceil = call <3 x float> @llvm.experimental.constrained.ceil.v3f32(
@@ -6892,10 +6890,10 @@ define <3 x float> @constrained_vector_floor_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-NEXT:    xsrdpim 0, 0
 ; PC64LE9-NEXT:    xscvdpspn 2, 2
 ; PC64LE9-NEXT:    xscvdpspn 1, 1
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxmrghw 34, 1, 2
+; PC64LE9-NEXT:    xscvdpspn 34, 0
+; PC64LE9-NEXT:    xxmrghw 35, 1, 2
 ; PC64LE9-NEXT:    lxv 1, 0(3)
-; PC64LE9-NEXT:    xxperm 34, 0, 1
+; PC64LE9-NEXT:    xxperm 34, 35, 1
 ; PC64LE9-NEXT:    blr
 entry:
   %floor = call <3 x float> @llvm.experimental.constrained.floor.v3f32(
@@ -7007,10 +7005,10 @@ define <3 x float> @constrained_vector_round_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-NEXT:    xsrdpi 0, 0
 ; PC64LE9-NEXT:    xscvdpspn 2, 2
 ; PC64LE9-NEXT:    xscvdpspn 1, 1
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxmrghw 34, 1, 2
+; PC64LE9-NEXT:    xscvdpspn 34, 0
+; PC64LE9-NEXT:    xxmrghw 35, 1, 2
 ; PC64LE9-NEXT:    lxv 1, 0(3)
-; PC64LE9-NEXT:    xxperm 34, 0, 1
+; PC64LE9-NEXT:    xxperm 34, 35, 1
 ; PC64LE9-NEXT:    blr
 entry:
   %round = call <3 x float> @llvm.experimental.constrained.round.v3f32(
@@ -7123,10 +7121,10 @@ define <3 x float> @constrained_vector_trunc_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-NEXT:    xsrdpiz 0, 0
 ; PC64LE9-NEXT:    xscvdpspn 2, 2
 ; PC64LE9-NEXT:    xscvdpspn 1, 1
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxmrghw 34, 1, 2
+; PC64LE9-NEXT:    xscvdpspn 34, 0
+; PC64LE9-NEXT:    xxmrghw 35, 1, 2
 ; PC64LE9-NEXT:    lxv 1, 0(3)
-; PC64LE9-NEXT:    xxperm 34, 0, 1
+; PC64LE9-NEXT:    xxperm 34, 35, 1
 ; PC64LE9-NEXT:    blr
 entry:
   %trunc = call <3 x float> @llvm.experimental.constrained.trunc.v3f32(
@@ -7481,14 +7479,13 @@ define <3 x float> @constrained_vector_sitofp_v3f32_v3i32(<3 x i32> %x) #0 {
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI161_0 at toc@l
 ; PC64LE9-NEXT:    xscvdpspn 0, 0
 ; PC64LE9-NEXT:    xscvdpspn 1, 1
-; PC64LE9-NEXT:    xxmrghw 0, 1, 0
-; PC64LE9-NEXT:    lxv 1, 0(3)
+; PC64LE9-NEXT:    xxmrghw 35, 1, 0
+; PC64LE9-NEXT:    lxv 0, 0(3)
 ; PC64LE9-NEXT:    mfvsrwz 3, 34
-; PC64LE9-NEXT:    mtfprwa 2, 3
-; PC64LE9-NEXT:    xscvsxdsp 2, 2
-; PC64LE9-NEXT:    xscvdpspn 2, 2
-; PC64LE9-NEXT:    xxperm 0, 2, 1
-; PC64LE9-NEXT:    xxlor 34, 0, 0
+; PC64LE9-NEXT:    mtfprwa 1, 3
+; PC64LE9-NEXT:    xscvsxdsp 1, 1
+; PC64LE9-NEXT:    xscvdpspn 34, 1
+; PC64LE9-NEXT:    xxperm 34, 35, 0
 ; PC64LE9-NEXT:    blr
 entry:
   %result = call <3 x float>
@@ -7556,12 +7553,12 @@ define <3 x float> @constrained_vector_sitofp_v3f32_v3i64(<3 x i64> %x) #0 {
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI163_0 at toc@l
 ; PC64LE9-NEXT:    xscvdpspn 0, 0
 ; PC64LE9-NEXT:    xscvdpspn 1, 1
-; PC64LE9-NEXT:    xxmrghw 34, 1, 0
+; PC64LE9-NEXT:    xxmrghw 35, 1, 0
 ; PC64LE9-NEXT:    mtfprd 1, 5
 ; PC64LE9-NEXT:    lxv 0, 0(3)
 ; PC64LE9-NEXT:    xscvsxdsp 1, 1
-; PC64LE9-NEXT:    xscvdpspn 1, 1
-; PC64LE9-NEXT:    xxperm 34, 1, 0
+; PC64LE9-NEXT:    xscvdpspn 34, 1
+; PC64LE9-NEXT:    xxperm 34, 35, 0
 ; PC64LE9-NEXT:    blr
 entry:
   %result = call <3 x float>
@@ -8048,14 +8045,13 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i32(<3 x i32> %x) #0 {
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI179_0 at toc@l
 ; PC64LE9-NEXT:    xscvdpspn 0, 0
 ; PC64LE9-NEXT:    xscvdpspn 1, 1
-; PC64LE9-NEXT:    xxmrghw 0, 1, 0
-; PC64LE9-NEXT:    lxv 1, 0(3)
+; PC64LE9-NEXT:    xxmrghw 35, 1, 0
+; PC64LE9-NEXT:    lxv 0, 0(3)
 ; PC64LE9-NEXT:    mfvsrwz 3, 34
-; PC64LE9-NEXT:    mtfprwz 2, 3
-; PC64LE9-NEXT:    xscvuxdsp 2, 2
-; PC64LE9-NEXT:    xscvdpspn 2, 2
-; PC64LE9-NEXT:    xxperm 0, 2, 1
-; PC64LE9-NEXT:    xxlor 34, 0, 0
+; PC64LE9-NEXT:    mtfprwz 1, 3
+; PC64LE9-NEXT:    xscvuxdsp 1, 1
+; PC64LE9-NEXT:    xscvdpspn 34, 1
+; PC64LE9-NEXT:    xxperm 34, 35, 0
 ; PC64LE9-NEXT:    blr
 entry:
   %result = call <3 x float>
@@ -8123,12 +8119,12 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 {
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI181_0 at toc@l
 ; PC64LE9-NEXT:    xscvdpspn 0, 0
 ; PC64LE9-NEXT:    xscvdpspn 1, 1
-; PC64LE9-NEXT:    xxmrghw 34, 1, 0
+; PC64LE9-NEXT:    xxmrghw 35, 1, 0
 ; PC64LE9-NEXT:    mtfprd 1, 5
 ; PC64LE9-NEXT:    lxv 0, 0(3)
 ; PC64LE9-NEXT:    xscvuxdsp 1, 1
-; PC64LE9-NEXT:    xscvdpspn 1, 1
-; PC64LE9-NEXT:    xxperm 34, 1, 0
+; PC64LE9-NEXT:    xscvdpspn 34, 1
+; PC64LE9-NEXT:    xxperm 34, 35, 0
 ; PC64LE9-NEXT:    blr
 entry:
   %result = call <3 x float>

diff  --git a/llvm/test/CodeGen/PowerPC/xxperm-swap.ll b/llvm/test/CodeGen/PowerPC/xxperm-swap.ll
new file mode 100644
index 0000000000000..24d4c07afa016
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/xxperm-swap.ll
@@ -0,0 +1,97 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
+; RUN:   -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-linux-gnu < %s | \
+; RUN:   FileCheck %s --check-prefix=CHECK-LE-P9
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
+; RUN:   -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu < %s | \
+; RUN:   FileCheck %s --check-prefix=CHECK-BE-P9
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
+; RUN:   -ppc-vsr-nums-as-vr -mtriple=powerpc64-ibm-aix < %s | \
+; RUN:   FileCheck %s --check-prefix=CHECK-AIX-64-P9
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
+; RUN:   -ppc-vsr-nums-as-vr -mtriple=powerpc-ibm-aix < %s | \
+; RUN:   FileCheck %s --check-prefix=CHECK-AIX-32-P9
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable
+define dso_local noundef <16 x i8> @test1(<16 x i8> noundef %burn, <16 x i8> noundef %a, <16 x i8> noundef %b) local_unnamed_addr #0 {
+; CHECK-LE-P9-LABEL: test1:
+; CHECK-LE-P9:       # %bb.0: # %entry
+; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
+; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
+; CHECK-LE-P9-NEXT:    lxv vs0, 0(r3)
+; CHECK-LE-P9-NEXT:    xxperm v4, v3, vs0
+; CHECK-LE-P9-NEXT:    vaddubm v2, v4, v3
+; CHECK-LE-P9-NEXT:    blr
+;
+; CHECK-BE-P9-LABEL: test1:
+; CHECK-BE-P9:       # %bb.0: # %entry
+; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
+; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
+; CHECK-BE-P9-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-P9-NEXT:    xxperm v4, v3, vs0
+; CHECK-BE-P9-NEXT:    vaddubm v2, v4, v3
+; CHECK-BE-P9-NEXT:    blr
+;
+; CHECK-AIX-64-P9-LABEL: test1:
+; CHECK-AIX-64-P9:       # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT:    ld r3, L..C0(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT:    lxv vs0, 0(r3)
+; CHECK-AIX-64-P9-NEXT:    xxperm v4, v3, vs0
+; CHECK-AIX-64-P9-NEXT:    vaddubm v2, v4, v3
+; CHECK-AIX-64-P9-NEXT:    blr
+;
+; CHECK-AIX-32-P9-LABEL: test1:
+; CHECK-AIX-32-P9:       # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C0(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT:    lxv vs0, 0(r3)
+; CHECK-AIX-32-P9-NEXT:    xxperm v4, v3, vs0
+; CHECK-AIX-32-P9-NEXT:    vaddubm v2, v4, v3
+; CHECK-AIX-32-P9-NEXT:    blr
+entry:
+  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 18, i32 23, i32 12, i32 22, i32 22, i32 22, i32 22, i32 0, i32 0, i32 0, i32 0, i32 9, i32 9, i32 9, i32 9>
+  %add = add <16 x i8> %shuffle, %a
+  ret <16 x i8> %add
+}
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable
+define dso_local noundef <16 x i8> @test2(<16 x i8> noundef %burn, <16 x i8> noundef %a, <16 x i8> noundef %b) local_unnamed_addr #0 {
+; CHECK-LE-P9-LABEL: test2:
+; CHECK-LE-P9:       # %bb.0: # %entry
+; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
+; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
+; CHECK-LE-P9-NEXT:    lxv vs0, 0(r3)
+; CHECK-LE-P9-NEXT:    xxperm v3, v4, vs0
+; CHECK-LE-P9-NEXT:    vaddubm v2, v3, v4
+; CHECK-LE-P9-NEXT:    blr
+;
+; CHECK-BE-P9-LABEL: test2:
+; CHECK-BE-P9:       # %bb.0: # %entry
+; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
+; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
+; CHECK-BE-P9-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-P9-NEXT:    xxperm v3, v4, vs0
+; CHECK-BE-P9-NEXT:    vaddubm v2, v3, v4
+; CHECK-BE-P9-NEXT:    blr
+;
+; CHECK-AIX-64-P9-LABEL: test2:
+; CHECK-AIX-64-P9:       # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT:    ld r3, L..C1(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT:    lxv vs0, 0(r3)
+; CHECK-AIX-64-P9-NEXT:    xxperm v3, v4, vs0
+; CHECK-AIX-64-P9-NEXT:    vaddubm v2, v3, v4
+; CHECK-AIX-64-P9-NEXT:    blr
+;
+; CHECK-AIX-32-P9-LABEL: test2:
+; CHECK-AIX-32-P9:       # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C1(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT:    lxv vs0, 0(r3)
+; CHECK-AIX-32-P9-NEXT:    xxperm v3, v4, vs0
+; CHECK-AIX-32-P9-NEXT:    vaddubm v2, v3, v4
+; CHECK-AIX-32-P9-NEXT:    blr
+entry:
+  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 18, i32 23, i32 12, i32 22, i32 22, i32 22, i32 22, i32 0, i32 0, i32 0, i32 0, i32 9, i32 9, i32 9, i32 9>
+  %add = add <16 x i8> %shuffle, %b
+  ret <16 x i8> %add
+}
+
+attributes #0 = {nounwind}


        


More information about the llvm-commits mailing list