[llvm] [PowerPC] Improve pwr7 codegen for v4i8 load (PR #104507)

via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 29 12:12:16 PDT 2024


https://github.com/RolandF77 updated https://github.com/llvm/llvm-project/pull/104507

>From f398149f3d48e37ba8cd01f1266918243b957c60 Mon Sep 17 00:00:00 2001
From: Roland Froese <froese at ca.ibm.com>
Date: Thu, 15 Aug 2024 19:30:45 +0000
Subject: [PATCH 1/2] improve pwr7 v4i8 load

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   |  29 +++-
 .../build-vector-from-load-and-zeros.ll       | 152 ++++++++----------
 .../PowerPC/canonical-merge-shuffles.ll       |  53 +++---
 llvm/test/CodeGen/PowerPC/load-and-splat.ll   | 127 +++++++--------
 llvm/test/CodeGen/PowerPC/pre-inc-disable.ll  |  24 +--
 .../CodeGen/PowerPC/scalar_vector_test_4.ll   |  42 ++---
 .../CodeGen/PowerPC/test-vector-insert.ll     |  84 ++++------
 .../PowerPC/v16i8_scalar_to_vector_shuffle.ll |  30 ++--
 .../PowerPC/v2i64_scalar_to_vector_shuffle.ll |  48 +++---
 .../PowerPC/v4i32_scalar_to_vector_shuffle.ll | 144 +++++++----------
 .../PowerPC/v8i16_scalar_to_vector_shuffle.ll | 100 +++++-------
 11 files changed, 364 insertions(+), 469 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 8ff9f5a5a991e0..a926b226ba738e 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -11490,13 +11490,38 @@ SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
                                                  SelectionDAG &DAG) const {
   SDLoc dl(Op);
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  SDValue Op0 = Op.getOperand(0);
+  ReuseLoadInfo RLI;
+  if (Subtarget.hasVSX() && Op.getValueType() == MVT::v4i32 &&
+      Op0.getOpcode() == ISD::LOAD && Op0.getValueType() == MVT::i32 &&
+      Op0.hasOneUse() &&
+      canReuseLoadAddress(Op0, MVT::i32, RLI, DAG, ISD::NON_EXTLOAD)) {
+
+    MachineMemOperand *MMO =
+        MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
+                                RLI.Alignment, RLI.AAInfo, RLI.Ranges);
+    SDValue Ops[] = {RLI.Chain, RLI.Ptr};
+    SDValue Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
+                                           DAG.getVTList(MVT::f64, MVT::Other),
+                                           Ops, MVT::i32, MMO);
+    spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
+
+    SDValue ConvVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, Bits);
+    SDValue Bitcast = DAG.getBitcast(MVT::v4i32, ConvVec);
+    unsigned LowIx = Subtarget.isLittleEndian() ? 3 : 1;
+    return DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Bitcast,
+                       DAG.getConstant(LowIx, dl, MVT::i32));
+  }
+
   // Create a stack slot that is 16-byte aligned.
-  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
   EVT PtrVT = getPointerTy(DAG.getDataLayout());
   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
 
-  SDValue Val = Op.getOperand(0);
+  SDValue Val = Op0;
   EVT ValVT = Val.getValueType();
   // P10 hardware store forwarding requires that a single store contains all
   // the data for the load. P10 is able to merge a pair of adjacent stores. Try
diff --git a/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll b/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
index 6d35a7281de6b4..54312fb5cee330 100644
--- a/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
+++ b/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
@@ -27,20 +27,18 @@ define  <2 x i64> @build_v2i64_extload_0(ptr nocapture noundef readonly %p) {
 ; PWR7-LE-LABEL: build_v2i64_extload_0:
 ; PWR7-LE:       # %bb.0: # %entry
 ; PWR7-LE-NEXT:    li 4, 0
-; PWR7-LE-NEXT:    lwz 3, 0(3)
 ; PWR7-LE-NEXT:    stw 4, -16(1)
-; PWR7-LE-NEXT:    addis 4, 2, .LCPI0_0 at toc@ha
-; PWR7-LE-NEXT:    addi 4, 4, .LCPI0_0 at toc@l
-; PWR7-LE-NEXT:    stw 3, -32(1)
-; PWR7-LE-NEXT:    addi 3, 1, -32
-; PWR7-LE-NEXT:    lxvd2x 0, 0, 4
-; PWR7-LE-NEXT:    addi 4, 1, -16
-; PWR7-LE-NEXT:    lxvd2x 1, 0, 4
-; PWR7-LE-NEXT:    xxswapd 34, 0
+; PWR7-LE-NEXT:    lfiwax 0, 0, 3
+; PWR7-LE-NEXT:    addis 3, 2, .LCPI0_0 at toc@ha
+; PWR7-LE-NEXT:    addi 3, 3, .LCPI0_0 at toc@l
+; PWR7-LE-NEXT:    xxspltd 34, 0, 0
 ; PWR7-LE-NEXT:    lxvd2x 0, 0, 3
-; PWR7-LE-NEXT:    xxswapd 35, 1
+; PWR7-LE-NEXT:    addi 3, 1, -16
+; PWR7-LE-NEXT:    xxswapd 35, 0
+; PWR7-LE-NEXT:    lxvd2x 0, 0, 3
+; PWR7-LE-NEXT:    xxspltw 34, 34, 3
 ; PWR7-LE-NEXT:    xxswapd 36, 0
-; PWR7-LE-NEXT:    vperm 2, 3, 4, 2
+; PWR7-LE-NEXT:    vperm 2, 4, 2, 3
 ; PWR7-LE-NEXT:    blr
 ;
 ; PWR8-LE-LABEL: build_v2i64_extload_0:
@@ -337,17 +335,14 @@ entry:
 define <4 x i32> @build_v4i32_load_0(ptr nocapture noundef readonly %p) {
 ; PWR7-BE-LABEL: build_v4i32_load_0:
 ; PWR7-BE:       # %bb.0: # %entry
-; PWR7-BE-NEXT:    lwz 3, 0(3)
-; PWR7-BE-NEXT:    xxlxor 36, 36, 36
-; PWR7-BE-NEXT:    sldi 3, 3, 32
-; PWR7-BE-NEXT:    std 3, -32(1)
-; PWR7-BE-NEXT:    std 3, -24(1)
+; PWR7-BE-NEXT:    lfiwax 0, 0, 3
 ; PWR7-BE-NEXT:    addis 3, 2, .LCPI8_0 at toc@ha
+; PWR7-BE-NEXT:    xxlxor 36, 36, 36
 ; PWR7-BE-NEXT:    addi 3, 3, .LCPI8_0 at toc@l
-; PWR7-BE-NEXT:    lxvw4x 34, 0, 3
-; PWR7-BE-NEXT:    addi 3, 1, -32
 ; PWR7-BE-NEXT:    lxvw4x 35, 0, 3
-; PWR7-BE-NEXT:    vperm 2, 3, 4, 2
+; PWR7-BE-NEXT:    xxlor 34, 0, 0
+; PWR7-BE-NEXT:    xxspltw 34, 34, 1
+; PWR7-BE-NEXT:    vperm 2, 2, 4, 3
 ; PWR7-BE-NEXT:    blr
 ;
 ; PWR8-BE-LABEL: build_v4i32_load_0:
@@ -365,20 +360,18 @@ define <4 x i32> @build_v4i32_load_0(ptr nocapture noundef readonly %p) {
 ; PWR7-LE-LABEL: build_v4i32_load_0:
 ; PWR7-LE:       # %bb.0: # %entry
 ; PWR7-LE-NEXT:    li 4, 0
-; PWR7-LE-NEXT:    lwz 3, 0(3)
 ; PWR7-LE-NEXT:    stw 4, -16(1)
-; PWR7-LE-NEXT:    addis 4, 2, .LCPI8_0 at toc@ha
-; PWR7-LE-NEXT:    addi 4, 4, .LCPI8_0 at toc@l
-; PWR7-LE-NEXT:    stw 3, -32(1)
-; PWR7-LE-NEXT:    addi 3, 1, -32
-; PWR7-LE-NEXT:    lxvd2x 0, 0, 4
-; PWR7-LE-NEXT:    addi 4, 1, -16
-; PWR7-LE-NEXT:    lxvd2x 1, 0, 4
-; PWR7-LE-NEXT:    xxswapd 34, 0
+; PWR7-LE-NEXT:    lfiwax 0, 0, 3
+; PWR7-LE-NEXT:    addis 3, 2, .LCPI8_0 at toc@ha
+; PWR7-LE-NEXT:    addi 3, 3, .LCPI8_0 at toc@l
+; PWR7-LE-NEXT:    xxspltd 34, 0, 0
 ; PWR7-LE-NEXT:    lxvd2x 0, 0, 3
-; PWR7-LE-NEXT:    xxswapd 35, 1
+; PWR7-LE-NEXT:    addi 3, 1, -16
+; PWR7-LE-NEXT:    xxswapd 35, 0
+; PWR7-LE-NEXT:    lxvd2x 0, 0, 3
+; PWR7-LE-NEXT:    xxspltw 34, 34, 3
 ; PWR7-LE-NEXT:    xxswapd 36, 0
-; PWR7-LE-NEXT:    vperm 2, 3, 4, 2
+; PWR7-LE-NEXT:    vperm 2, 4, 2, 3
 ; PWR7-LE-NEXT:    blr
 ;
 ; PWR8-LE-LABEL: build_v4i32_load_0:
@@ -400,17 +393,14 @@ entry:
 define <4 x i32> @build_v4i32_load_1(ptr nocapture noundef readonly %p) {
 ; PWR7-BE-LABEL: build_v4i32_load_1:
 ; PWR7-BE:       # %bb.0: # %entry
-; PWR7-BE-NEXT:    lwz 3, 0(3)
-; PWR7-BE-NEXT:    xxlxor 36, 36, 36
-; PWR7-BE-NEXT:    sldi 3, 3, 32
-; PWR7-BE-NEXT:    std 3, -16(1)
-; PWR7-BE-NEXT:    std 3, -8(1)
+; PWR7-BE-NEXT:    lfiwax 0, 0, 3
 ; PWR7-BE-NEXT:    addis 3, 2, .LCPI9_0 at toc@ha
+; PWR7-BE-NEXT:    xxlxor 36, 36, 36
 ; PWR7-BE-NEXT:    addi 3, 3, .LCPI9_0 at toc@l
-; PWR7-BE-NEXT:    lxvw4x 34, 0, 3
-; PWR7-BE-NEXT:    addi 3, 1, -16
 ; PWR7-BE-NEXT:    lxvw4x 35, 0, 3
-; PWR7-BE-NEXT:    vperm 2, 4, 3, 2
+; PWR7-BE-NEXT:    xxlor 34, 0, 0
+; PWR7-BE-NEXT:    xxspltw 34, 34, 1
+; PWR7-BE-NEXT:    vperm 2, 4, 2, 3
 ; PWR7-BE-NEXT:    blr
 ;
 ; PWR8-BE-LABEL: build_v4i32_load_1:
@@ -427,20 +417,18 @@ define <4 x i32> @build_v4i32_load_1(ptr nocapture noundef readonly %p) {
 ; PWR7-LE-LABEL: build_v4i32_load_1:
 ; PWR7-LE:       # %bb.0: # %entry
 ; PWR7-LE-NEXT:    li 4, 0
-; PWR7-LE-NEXT:    lwz 3, 0(3)
-; PWR7-LE-NEXT:    stw 4, -32(1)
-; PWR7-LE-NEXT:    addis 4, 2, .LCPI9_0 at toc@ha
-; PWR7-LE-NEXT:    addi 4, 4, .LCPI9_0 at toc@l
-; PWR7-LE-NEXT:    stw 3, -16(1)
+; PWR7-LE-NEXT:    stw 4, -16(1)
+; PWR7-LE-NEXT:    lfiwax 0, 0, 3
+; PWR7-LE-NEXT:    addis 3, 2, .LCPI9_0 at toc@ha
+; PWR7-LE-NEXT:    addi 3, 3, .LCPI9_0 at toc@l
+; PWR7-LE-NEXT:    xxspltd 34, 0, 0
+; PWR7-LE-NEXT:    lxvd2x 0, 0, 3
 ; PWR7-LE-NEXT:    addi 3, 1, -16
-; PWR7-LE-NEXT:    lxvd2x 0, 0, 4
-; PWR7-LE-NEXT:    addi 4, 1, -32
-; PWR7-LE-NEXT:    lxvd2x 1, 0, 4
-; PWR7-LE-NEXT:    xxswapd 34, 0
+; PWR7-LE-NEXT:    xxswapd 35, 0
 ; PWR7-LE-NEXT:    lxvd2x 0, 0, 3
-; PWR7-LE-NEXT:    xxswapd 35, 1
+; PWR7-LE-NEXT:    xxspltw 34, 34, 3
 ; PWR7-LE-NEXT:    xxswapd 36, 0
-; PWR7-LE-NEXT:    vperm 2, 4, 3, 2
+; PWR7-LE-NEXT:    vperm 2, 2, 4, 3
 ; PWR7-LE-NEXT:    blr
 ;
 ; PWR8-LE-LABEL: build_v4i32_load_1:
@@ -463,17 +451,14 @@ entry:
 define <4 x i32> @build_v4i32_load_2(ptr nocapture noundef readonly %p) {
 ; PWR7-BE-LABEL: build_v4i32_load_2:
 ; PWR7-BE:       # %bb.0: # %entry
-; PWR7-BE-NEXT:    lwz 3, 0(3)
-; PWR7-BE-NEXT:    xxlxor 36, 36, 36
-; PWR7-BE-NEXT:    sldi 3, 3, 32
-; PWR7-BE-NEXT:    std 3, -16(1)
-; PWR7-BE-NEXT:    std 3, -8(1)
+; PWR7-BE-NEXT:    lfiwax 0, 0, 3
 ; PWR7-BE-NEXT:    addis 3, 2, .LCPI10_0 at toc@ha
+; PWR7-BE-NEXT:    xxlxor 36, 36, 36
 ; PWR7-BE-NEXT:    addi 3, 3, .LCPI10_0 at toc@l
-; PWR7-BE-NEXT:    lxvw4x 34, 0, 3
-; PWR7-BE-NEXT:    addi 3, 1, -16
 ; PWR7-BE-NEXT:    lxvw4x 35, 0, 3
-; PWR7-BE-NEXT:    vperm 2, 4, 3, 2
+; PWR7-BE-NEXT:    xxlor 34, 0, 0
+; PWR7-BE-NEXT:    xxspltw 34, 34, 1
+; PWR7-BE-NEXT:    vperm 2, 4, 2, 3
 ; PWR7-BE-NEXT:    blr
 ;
 ; PWR8-BE-LABEL: build_v4i32_load_2:
@@ -491,20 +476,18 @@ define <4 x i32> @build_v4i32_load_2(ptr nocapture noundef readonly %p) {
 ; PWR7-LE-LABEL: build_v4i32_load_2:
 ; PWR7-LE:       # %bb.0: # %entry
 ; PWR7-LE-NEXT:    li 4, 0
-; PWR7-LE-NEXT:    lwz 3, 0(3)
-; PWR7-LE-NEXT:    stw 4, -32(1)
-; PWR7-LE-NEXT:    addis 4, 2, .LCPI10_0 at toc@ha
-; PWR7-LE-NEXT:    addi 4, 4, .LCPI10_0 at toc@l
-; PWR7-LE-NEXT:    stw 3, -16(1)
+; PWR7-LE-NEXT:    stw 4, -16(1)
+; PWR7-LE-NEXT:    lfiwax 0, 0, 3
+; PWR7-LE-NEXT:    addis 3, 2, .LCPI10_0 at toc@ha
+; PWR7-LE-NEXT:    addi 3, 3, .LCPI10_0 at toc@l
+; PWR7-LE-NEXT:    xxspltd 34, 0, 0
+; PWR7-LE-NEXT:    lxvd2x 0, 0, 3
 ; PWR7-LE-NEXT:    addi 3, 1, -16
-; PWR7-LE-NEXT:    lxvd2x 0, 0, 4
-; PWR7-LE-NEXT:    addi 4, 1, -32
-; PWR7-LE-NEXT:    lxvd2x 1, 0, 4
-; PWR7-LE-NEXT:    xxswapd 34, 0
+; PWR7-LE-NEXT:    xxswapd 35, 0
 ; PWR7-LE-NEXT:    lxvd2x 0, 0, 3
-; PWR7-LE-NEXT:    xxswapd 35, 1
+; PWR7-LE-NEXT:    xxspltw 34, 34, 3
 ; PWR7-LE-NEXT:    xxswapd 36, 0
-; PWR7-LE-NEXT:    vperm 2, 4, 3, 2
+; PWR7-LE-NEXT:    vperm 2, 2, 4, 3
 ; PWR7-LE-NEXT:    blr
 ;
 ; PWR8-LE-LABEL: build_v4i32_load_2:
@@ -526,17 +509,14 @@ entry:
 define <4 x i32> @build_v4i32_load_3(ptr nocapture noundef readonly %p) {
 ; PWR7-BE-LABEL: build_v4i32_load_3:
 ; PWR7-BE:       # %bb.0: # %entry
-; PWR7-BE-NEXT:    lwz 3, 0(3)
-; PWR7-BE-NEXT:    xxlxor 36, 36, 36
-; PWR7-BE-NEXT:    sldi 3, 3, 32
-; PWR7-BE-NEXT:    std 3, -16(1)
-; PWR7-BE-NEXT:    std 3, -8(1)
+; PWR7-BE-NEXT:    lfiwax 0, 0, 3
 ; PWR7-BE-NEXT:    addis 3, 2, .LCPI11_0 at toc@ha
+; PWR7-BE-NEXT:    xxlxor 36, 36, 36
 ; PWR7-BE-NEXT:    addi 3, 3, .LCPI11_0 at toc@l
-; PWR7-BE-NEXT:    lxvw4x 34, 0, 3
-; PWR7-BE-NEXT:    addi 3, 1, -16
 ; PWR7-BE-NEXT:    lxvw4x 35, 0, 3
-; PWR7-BE-NEXT:    vperm 2, 4, 3, 2
+; PWR7-BE-NEXT:    xxlor 34, 0, 0
+; PWR7-BE-NEXT:    xxspltw 34, 34, 1
+; PWR7-BE-NEXT:    vperm 2, 4, 2, 3
 ; PWR7-BE-NEXT:    blr
 ;
 ; PWR8-BE-LABEL: build_v4i32_load_3:
@@ -553,20 +533,18 @@ define <4 x i32> @build_v4i32_load_3(ptr nocapture noundef readonly %p) {
 ; PWR7-LE-LABEL: build_v4i32_load_3:
 ; PWR7-LE:       # %bb.0: # %entry
 ; PWR7-LE-NEXT:    li 4, 0
-; PWR7-LE-NEXT:    lwz 3, 0(3)
-; PWR7-LE-NEXT:    stw 4, -32(1)
-; PWR7-LE-NEXT:    addis 4, 2, .LCPI11_0 at toc@ha
-; PWR7-LE-NEXT:    addi 4, 4, .LCPI11_0 at toc@l
-; PWR7-LE-NEXT:    stw 3, -16(1)
+; PWR7-LE-NEXT:    stw 4, -16(1)
+; PWR7-LE-NEXT:    lfiwax 0, 0, 3
+; PWR7-LE-NEXT:    addis 3, 2, .LCPI11_0 at toc@ha
+; PWR7-LE-NEXT:    addi 3, 3, .LCPI11_0 at toc@l
+; PWR7-LE-NEXT:    xxspltd 34, 0, 0
+; PWR7-LE-NEXT:    lxvd2x 0, 0, 3
 ; PWR7-LE-NEXT:    addi 3, 1, -16
-; PWR7-LE-NEXT:    lxvd2x 0, 0, 4
-; PWR7-LE-NEXT:    addi 4, 1, -32
-; PWR7-LE-NEXT:    lxvd2x 1, 0, 4
-; PWR7-LE-NEXT:    xxswapd 34, 0
+; PWR7-LE-NEXT:    xxswapd 35, 0
 ; PWR7-LE-NEXT:    lxvd2x 0, 0, 3
-; PWR7-LE-NEXT:    xxswapd 35, 1
+; PWR7-LE-NEXT:    xxspltw 34, 34, 3
 ; PWR7-LE-NEXT:    xxswapd 36, 0
-; PWR7-LE-NEXT:    vperm 2, 4, 3, 2
+; PWR7-LE-NEXT:    vperm 2, 2, 4, 3
 ; PWR7-LE-NEXT:    blr
 ;
 ; PWR8-LE-LABEL: build_v4i32_load_3:
diff --git a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
index c26f98c5b0495d..5e73999c80b5ac 100644
--- a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
+++ b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
@@ -536,15 +536,12 @@ define dso_local <8 x i16> @testmrglb3(ptr nocapture readonly %a) local_unnamed_
 ;
 ; P8-AIX-32-LABEL: testmrglb3:
 ; P8-AIX-32:       # %bb.0: # %entry
-; P8-AIX-32-NEXT:    lwz r4, 4(r3)
+; P8-AIX-32-NEXT:    li r4, 4
+; P8-AIX-32-NEXT:    lfiwax f1, 0, r3
 ; P8-AIX-32-NEXT:    xxlxor v3, v3, v3
-; P8-AIX-32-NEXT:    stw r4, -16(r1)
-; P8-AIX-32-NEXT:    lwz r3, 0(r3)
-; P8-AIX-32-NEXT:    stw r3, -32(r1)
-; P8-AIX-32-NEXT:    addi r3, r1, -16
-; P8-AIX-32-NEXT:    lxvw4x vs0, 0, r3
-; P8-AIX-32-NEXT:    addi r3, r1, -32
-; P8-AIX-32-NEXT:    lxvw4x vs1, 0, r3
+; P8-AIX-32-NEXT:    lfiwax f0, r3, r4
+; P8-AIX-32-NEXT:    xxspltw vs1, vs1, 1
+; P8-AIX-32-NEXT:    xxspltw vs0, vs0, 1
 ; P8-AIX-32-NEXT:    xxmrghw v2, vs1, vs0
 ; P8-AIX-32-NEXT:    vmrghb v2, v3, v2
 ; P8-AIX-32-NEXT:    blr
@@ -852,17 +849,15 @@ define dso_local <16 x i8> @no_RAUW_in_combine_during_legalize(ptr nocapture rea
 ;
 ; P8-AIX-32-LABEL: no_RAUW_in_combine_during_legalize:
 ; P8-AIX-32:       # %bb.0: # %entry
+; P8-AIX-32-NEXT:    li r5, 0
 ; P8-AIX-32-NEXT:    slwi r4, r4, 2
 ; P8-AIX-32-NEXT:    xxlxor v3, v3, v3
-; P8-AIX-32-NEXT:    lwzx r3, r3, r4
-; P8-AIX-32-NEXT:    li r4, 0
-; P8-AIX-32-NEXT:    stw r4, -32(r1)
-; P8-AIX-32-NEXT:    stw r3, -16(r1)
-; P8-AIX-32-NEXT:    addi r3, r1, -32
-; P8-AIX-32-NEXT:    lxvw4x vs0, 0, r3
+; P8-AIX-32-NEXT:    stw r5, -16(r1)
+; P8-AIX-32-NEXT:    lfiwax f0, r3, r4
 ; P8-AIX-32-NEXT:    addi r3, r1, -16
 ; P8-AIX-32-NEXT:    lxvw4x vs1, 0, r3
-; P8-AIX-32-NEXT:    xxmrghw v2, vs0, vs1
+; P8-AIX-32-NEXT:    xxspltw vs0, vs0, 1
+; P8-AIX-32-NEXT:    xxmrghw v2, vs1, vs0
 ; P8-AIX-32-NEXT:    vmrghb v2, v2, v3
 ; P8-AIX-32-NEXT:    blr
 entry:
@@ -1026,14 +1021,11 @@ define dso_local <2 x i64> @testSplat8(ptr nocapture readonly %ptr) local_unname
 ;
 ; P8-AIX-32-LABEL: testSplat8:
 ; P8-AIX-32:       # %bb.0: # %entry
-; P8-AIX-32-NEXT:    lwz r4, 4(r3)
-; P8-AIX-32-NEXT:    stw r4, -16(r1)
-; P8-AIX-32-NEXT:    lwz r3, 0(r3)
-; P8-AIX-32-NEXT:    stw r3, -32(r1)
-; P8-AIX-32-NEXT:    addi r3, r1, -16
-; P8-AIX-32-NEXT:    lxvw4x vs0, 0, r3
-; P8-AIX-32-NEXT:    addi r3, r1, -32
-; P8-AIX-32-NEXT:    lxvw4x vs1, 0, r3
+; P8-AIX-32-NEXT:    li r4, 4
+; P8-AIX-32-NEXT:    lfiwax f1, 0, r3
+; P8-AIX-32-NEXT:    lfiwax f0, r3, r4
+; P8-AIX-32-NEXT:    xxspltw vs1, vs1, 1
+; P8-AIX-32-NEXT:    xxspltw vs0, vs0, 1
 ; P8-AIX-32-NEXT:    xxmrghw vs0, vs1, vs0
 ; P8-AIX-32-NEXT:    xxmrghd v2, vs0, vs0
 ; P8-AIX-32-NEXT:    blr
@@ -1081,17 +1073,14 @@ define <2 x i64> @testSplati64_0(ptr nocapture readonly %ptr) #0 {
 ;
 ; P8-AIX-32-LABEL: testSplati64_0:
 ; P8-AIX-32:       # %bb.0: # %entry
-; P8-AIX-32-NEXT:    lwz r4, 0(r3)
-; P8-AIX-32-NEXT:    lwz r3, 4(r3)
-; P8-AIX-32-NEXT:    stw r3, -16(r1)
+; P8-AIX-32-NEXT:    li r4, 4
+; P8-AIX-32-NEXT:    lfiwax f0, r3, r4
+; P8-AIX-32-NEXT:    xxspltw v2, vs0, 1
+; P8-AIX-32-NEXT:    lfiwax f0, 0, r3
 ; P8-AIX-32-NEXT:    lwz r3, L..C3(r2) # %const.0
-; P8-AIX-32-NEXT:    stw r4, -32(r1)
-; P8-AIX-32-NEXT:    lxvw4x v2, 0, r3
-; P8-AIX-32-NEXT:    addi r3, r1, -16
-; P8-AIX-32-NEXT:    lxvw4x v3, 0, r3
-; P8-AIX-32-NEXT:    addi r3, r1, -32
 ; P8-AIX-32-NEXT:    lxvw4x v4, 0, r3
-; P8-AIX-32-NEXT:    vperm v2, v4, v3, v2
+; P8-AIX-32-NEXT:    xxspltw v3, vs0, 1
+; P8-AIX-32-NEXT:    vperm v2, v3, v2, v4
 ; P8-AIX-32-NEXT:    blr
 entry:
   %0 = load <1 x i64>, ptr %ptr, align 8
diff --git a/llvm/test/CodeGen/PowerPC/load-and-splat.ll b/llvm/test/CodeGen/PowerPC/load-and-splat.ll
index bc68ad2a67bf5d..38f47838a42fbf 100644
--- a/llvm/test/CodeGen/PowerPC/load-and-splat.ll
+++ b/llvm/test/CodeGen/PowerPC/load-and-splat.ll
@@ -208,47 +208,45 @@ define dso_local void @test4(ptr nocapture %c, ptr nocapture readonly %a) local_
 ;
 ; P9-AIX32-LABEL: test4:
 ; P9-AIX32:       # %bb.0: # %entry
-; P9-AIX32-NEXT:    lwz r5, 24(r4)
-; P9-AIX32-NEXT:    lwz r4, 28(r4)
-; P9-AIX32-NEXT:    stw r4, -16(r1)
+; P9-AIX32-NEXT:    li r5, 28
+; P9-AIX32-NEXT:    lfiwax f0, r4, r5
+; P9-AIX32-NEXT:    li r5, 24
+; P9-AIX32-NEXT:    lfiwax f1, r4, r5
 ; P9-AIX32-NEXT:    lwz r4, L..C0(r2) # %const.0
-; P9-AIX32-NEXT:    stw r5, -32(r1)
-; P9-AIX32-NEXT:    lxv vs1, -16(r1)
-; P9-AIX32-NEXT:    lxv vs2, -32(r1)
-; P9-AIX32-NEXT:    lxv vs0, 0(r4)
-; P9-AIX32-NEXT:    xxperm vs1, vs2, vs0
-; P9-AIX32-NEXT:    stxv vs1, 0(r3)
+; P9-AIX32-NEXT:    xxspltw vs0, vs0, 1
+; P9-AIX32-NEXT:    lxv vs2, 0(r4)
+; P9-AIX32-NEXT:    xxspltw vs1, vs1, 1
+; P9-AIX32-NEXT:    xxperm vs0, vs1, vs2
+; P9-AIX32-NEXT:    stxv vs0, 0(r3)
 ; P9-AIX32-NEXT:    blr
 ;
 ; P8-AIX32-LABEL: test4:
 ; P8-AIX32:       # %bb.0: # %entry
-; P8-AIX32-NEXT:    lwz r5, 24(r4)
-; P8-AIX32-NEXT:    lwz r4, 28(r4)
-; P8-AIX32-NEXT:    stw r4, -16(r1)
+; P8-AIX32-NEXT:    li r5, 28
+; P8-AIX32-NEXT:    lfiwax f0, r4, r5
+; P8-AIX32-NEXT:    li r5, 24
+; P8-AIX32-NEXT:    xxspltw v2, vs0, 1
+; P8-AIX32-NEXT:    lfiwax f0, r4, r5
 ; P8-AIX32-NEXT:    lwz r4, L..C0(r2) # %const.0
-; P8-AIX32-NEXT:    stw r5, -32(r1)
-; P8-AIX32-NEXT:    lxvw4x v2, 0, r4
-; P8-AIX32-NEXT:    addi r4, r1, -16
-; P8-AIX32-NEXT:    lxvw4x v3, 0, r4
-; P8-AIX32-NEXT:    addi r4, r1, -32
 ; P8-AIX32-NEXT:    lxvw4x v4, 0, r4
-; P8-AIX32-NEXT:    vperm v2, v4, v3, v2
+; P8-AIX32-NEXT:    xxspltw v3, vs0, 1
+; P8-AIX32-NEXT:    vperm v2, v3, v2, v4
 ; P8-AIX32-NEXT:    stxvw4x v2, 0, r3
 ; P8-AIX32-NEXT:    blr
 ;
 ; P7-AIX32-LABEL: test4:
 ; P7-AIX32:       # %bb.0: # %entry
-; P7-AIX32-NEXT:    lwz r5, 24(r4)
-; P7-AIX32-NEXT:    lwz r4, 28(r4)
-; P7-AIX32-NEXT:    stw r4, -16(r1)
+; P7-AIX32-NEXT:    li r5, 28
+; P7-AIX32-NEXT:    lfiwax f0, r4, r5
+; P7-AIX32-NEXT:    li r5, 24
+; P7-AIX32-NEXT:    xxlor v2, f0, f0
+; P7-AIX32-NEXT:    xxspltw v2, v2, 1
+; P7-AIX32-NEXT:    lfiwax f0, r4, r5
 ; P7-AIX32-NEXT:    lwz r4, L..C0(r2) # %const.0
-; P7-AIX32-NEXT:    stw r5, -32(r1)
-; P7-AIX32-NEXT:    lxvw4x v2, 0, r4
-; P7-AIX32-NEXT:    addi r4, r1, -16
-; P7-AIX32-NEXT:    lxvw4x v3, 0, r4
-; P7-AIX32-NEXT:    addi r4, r1, -32
 ; P7-AIX32-NEXT:    lxvw4x v4, 0, r4
-; P7-AIX32-NEXT:    vperm v2, v4, v3, v2
+; P7-AIX32-NEXT:    xxlor v3, f0, f0
+; P7-AIX32-NEXT:    xxspltw v3, v3, 1
+; P7-AIX32-NEXT:    vperm v2, v3, v2, v4
 ; P7-AIX32-NEXT:    stxvw4x v2, 0, r3
 ; P7-AIX32-NEXT:    blr
 entry:
@@ -362,47 +360,43 @@ define void @test6(ptr %a, ptr %in) {
 ;
 ; P9-AIX32-LABEL: test6:
 ; P9-AIX32:       # %bb.0: # %entry
-; P9-AIX32-NEXT:    lwz r4, 0(r4)
 ; P9-AIX32-NEXT:    li r5, 0
-; P9-AIX32-NEXT:    stw r5, -32(r1)
-; P9-AIX32-NEXT:    lxv vs1, -32(r1)
-; P9-AIX32-NEXT:    stw r4, -16(r1)
-; P9-AIX32-NEXT:    lwz r4, L..C2(r2) # %const.0
+; P9-AIX32-NEXT:    stw r5, -16(r1)
+; P9-AIX32-NEXT:    lwz r5, L..C2(r2) # %const.0
+; P9-AIX32-NEXT:    lfiwax f1, 0, r4
 ; P9-AIX32-NEXT:    lxv vs2, -16(r1)
-; P9-AIX32-NEXT:    lxv vs0, 0(r4)
-; P9-AIX32-NEXT:    xxperm vs2, vs1, vs0
-; P9-AIX32-NEXT:    stxv vs2, 0(r3)
+; P9-AIX32-NEXT:    lxv vs0, 0(r5)
+; P9-AIX32-NEXT:    xxspltw vs1, vs1, 1
+; P9-AIX32-NEXT:    xxperm vs1, vs2, vs0
+; P9-AIX32-NEXT:    stxv vs1, 0(r3)
 ; P9-AIX32-NEXT:    blr
 ;
 ; P8-AIX32-LABEL: test6:
 ; P8-AIX32:       # %bb.0: # %entry
-; P8-AIX32-NEXT:    lwz r4, 0(r4)
 ; P8-AIX32-NEXT:    li r5, 0
-; P8-AIX32-NEXT:    stw r5, -32(r1)
-; P8-AIX32-NEXT:    stw r4, -16(r1)
+; P8-AIX32-NEXT:    stw r5, -16(r1)
+; P8-AIX32-NEXT:    lfiwax f0, 0, r4
 ; P8-AIX32-NEXT:    lwz r4, L..C2(r2) # %const.0
-; P8-AIX32-NEXT:    lxvw4x v2, 0, r4
-; P8-AIX32-NEXT:    addi r4, r1, -32
 ; P8-AIX32-NEXT:    lxvw4x v3, 0, r4
 ; P8-AIX32-NEXT:    addi r4, r1, -16
 ; P8-AIX32-NEXT:    lxvw4x v4, 0, r4
-; P8-AIX32-NEXT:    vperm v2, v3, v4, v2
+; P8-AIX32-NEXT:    xxspltw v2, vs0, 1
+; P8-AIX32-NEXT:    vperm v2, v4, v2, v3
 ; P8-AIX32-NEXT:    stxvw4x v2, 0, r3
 ; P8-AIX32-NEXT:    blr
 ;
 ; P7-AIX32-LABEL: test6:
 ; P7-AIX32:       # %bb.0: # %entry
-; P7-AIX32-NEXT:    lwz r4, 0(r4)
 ; P7-AIX32-NEXT:    li r5, 0
-; P7-AIX32-NEXT:    stw r5, -32(r1)
-; P7-AIX32-NEXT:    stw r4, -16(r1)
+; P7-AIX32-NEXT:    stw r5, -16(r1)
+; P7-AIX32-NEXT:    lfiwax f0, 0, r4
 ; P7-AIX32-NEXT:    lwz r4, L..C2(r2) # %const.0
-; P7-AIX32-NEXT:    lxvw4x v2, 0, r4
-; P7-AIX32-NEXT:    addi r4, r1, -32
 ; P7-AIX32-NEXT:    lxvw4x v3, 0, r4
 ; P7-AIX32-NEXT:    addi r4, r1, -16
+; P7-AIX32-NEXT:    xxlor v2, f0, f0
 ; P7-AIX32-NEXT:    lxvw4x v4, 0, r4
-; P7-AIX32-NEXT:    vperm v2, v3, v4, v2
+; P7-AIX32-NEXT:    xxspltw v2, v2, 1
+; P7-AIX32-NEXT:    vperm v2, v4, v2, v3
 ; P7-AIX32-NEXT:    stxvw4x v2, 0, r3
 ; P7-AIX32-NEXT:    blr
 entry:
@@ -810,40 +804,35 @@ define <16 x i8> @unadjusted_lxvdsx(ptr %s, ptr %t) {
 ;
 ; P9-AIX32-LABEL: unadjusted_lxvdsx:
 ; P9-AIX32:       # %bb.0: # %entry
-; P9-AIX32-NEXT:    lwz r4, 4(r3)
-; P9-AIX32-NEXT:    stw r4, -16(r1)
-; P9-AIX32-NEXT:    lwz r3, 0(r3)
-; P9-AIX32-NEXT:    lxv vs0, -16(r1)
-; P9-AIX32-NEXT:    stw r3, -32(r1)
-; P9-AIX32-NEXT:    lxv vs1, -32(r1)
+; P9-AIX32-NEXT:    li r4, 4
+; P9-AIX32-NEXT:    lfiwax f1, 0, r3
+; P9-AIX32-NEXT:    xxspltw vs1, vs1, 1
+; P9-AIX32-NEXT:    lfiwax f0, r3, r4
+; P9-AIX32-NEXT:    xxspltw vs0, vs0, 1
 ; P9-AIX32-NEXT:    xxmrghw vs0, vs1, vs0
 ; P9-AIX32-NEXT:    xxmrghd v2, vs0, vs0
 ; P9-AIX32-NEXT:    blr
 ;
 ; P8-AIX32-LABEL: unadjusted_lxvdsx:
 ; P8-AIX32:       # %bb.0: # %entry
-; P8-AIX32-NEXT:    lwz r4, 4(r3)
-; P8-AIX32-NEXT:    stw r4, -16(r1)
-; P8-AIX32-NEXT:    lwz r3, 0(r3)
-; P8-AIX32-NEXT:    stw r3, -32(r1)
-; P8-AIX32-NEXT:    addi r3, r1, -16
-; P8-AIX32-NEXT:    lxvw4x vs0, 0, r3
-; P8-AIX32-NEXT:    addi r3, r1, -32
-; P8-AIX32-NEXT:    lxvw4x vs1, 0, r3
+; P8-AIX32-NEXT:    li r4, 4
+; P8-AIX32-NEXT:    lfiwax f1, 0, r3
+; P8-AIX32-NEXT:    lfiwax f0, r3, r4
+; P8-AIX32-NEXT:    xxspltw vs1, vs1, 1
+; P8-AIX32-NEXT:    xxspltw vs0, vs0, 1
 ; P8-AIX32-NEXT:    xxmrghw vs0, vs1, vs0
 ; P8-AIX32-NEXT:    xxmrghd v2, vs0, vs0
 ; P8-AIX32-NEXT:    blr
 ;
 ; P7-AIX32-LABEL: unadjusted_lxvdsx:
 ; P7-AIX32:       # %bb.0: # %entry
-; P7-AIX32-NEXT:    lwz r4, 4(r3)
-; P7-AIX32-NEXT:    stw r4, -16(r1)
-; P7-AIX32-NEXT:    lwz r3, 0(r3)
-; P7-AIX32-NEXT:    stw r3, -32(r1)
-; P7-AIX32-NEXT:    addi r3, r1, -16
-; P7-AIX32-NEXT:    lxvw4x vs0, 0, r3
-; P7-AIX32-NEXT:    addi r3, r1, -32
-; P7-AIX32-NEXT:    lxvw4x vs1, 0, r3
+; P7-AIX32-NEXT:    li r4, 4
+; P7-AIX32-NEXT:    lfiwax f1, 0, r3
+; P7-AIX32-NEXT:    lfiwax f0, r3, r4
+; P7-AIX32-NEXT:    xxlor v2, f0, f0
+; P7-AIX32-NEXT:    xxspltw vs0, v2, 1
+; P7-AIX32-NEXT:    xxlor v2, f1, f1
+; P7-AIX32-NEXT:    xxspltw vs1, v2, 1
 ; P7-AIX32-NEXT:    xxmrghw vs0, vs1, vs0
 ; P7-AIX32-NEXT:    xxmrghd v2, vs0, vs0
 ; P7-AIX32-NEXT:    blr
diff --git a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
index 4da36c9af5c101..db7a0292e036b7 100644
--- a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
+++ b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
@@ -85,23 +85,23 @@ define void @test64(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
 ;
 ; P9BE-AIX32-LABEL: test64:
 ; P9BE-AIX32:       # %bb.0: # %entry
-; P9BE-AIX32-NEXT:    lwzux 4, 3, 4
+; P9BE-AIX32-NEXT:    add 5, 3, 4
+; P9BE-AIX32-NEXT:    lfiwax 0, 3, 4
+; P9BE-AIX32-NEXT:    li 3, 4
 ; P9BE-AIX32-NEXT:    xxlxor 2, 2, 2
 ; P9BE-AIX32-NEXT:    vspltisw 4, 8
-; P9BE-AIX32-NEXT:    stw 4, -48(1)
+; P9BE-AIX32-NEXT:    xxspltw 0, 0, 1
+; P9BE-AIX32-NEXT:    lfiwax 1, 5, 3
+; P9BE-AIX32-NEXT:    lwz 3, L..C0(2) # %const.0
 ; P9BE-AIX32-NEXT:    vadduwm 4, 4, 4
-; P9BE-AIX32-NEXT:    lwz 4, 4(3)
-; P9BE-AIX32-NEXT:    lxv 0, -48(1)
-; P9BE-AIX32-NEXT:    stw 4, -32(1)
-; P9BE-AIX32-NEXT:    lwz 4, L..C0(2) # %const.0
-; P9BE-AIX32-NEXT:    lxv 1, -32(1)
-; P9BE-AIX32-NEXT:    lwz 3, 8(3)
-; P9BE-AIX32-NEXT:    stw 3, -16(1)
-; P9BE-AIX32-NEXT:    lwz 3, L..C1(2) # %const.1
+; P9BE-AIX32-NEXT:    xxspltw 1, 1, 1
 ; P9BE-AIX32-NEXT:    xxmrghw 2, 0, 1
-; P9BE-AIX32-NEXT:    lxv 0, 0(4)
+; P9BE-AIX32-NEXT:    lxv 0, 0(3)
+; P9BE-AIX32-NEXT:    li 3, 8
 ; P9BE-AIX32-NEXT:    xxperm 2, 2, 0
-; P9BE-AIX32-NEXT:    lxv 0, -16(1)
+; P9BE-AIX32-NEXT:    lfiwax 0, 5, 3
+; P9BE-AIX32-NEXT:    lwz 3, L..C1(2) # %const.1
+; P9BE-AIX32-NEXT:    xxspltw 0, 0, 1
 ; P9BE-AIX32-NEXT:    xxmrghw 3, 1, 0
 ; P9BE-AIX32-NEXT:    lxv 0, 0(3)
 ; P9BE-AIX32-NEXT:    xxperm 3, 3, 0
diff --git a/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll b/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll
index 25e1baa28f7ef3..be2a5d039d04ff 100644
--- a/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll
+++ b/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll
@@ -73,13 +73,11 @@ define <4 x i32> @s2v_test1(ptr nocapture readonly %int32, <4 x i32> %vec)  {
 ;
 ; P8-AIX-32-LABEL: s2v_test1:
 ; P8-AIX-32:       # %bb.0: # %entry
-; P8-AIX-32-NEXT:    lwz r3, 0(r3)
-; P8-AIX-32-NEXT:    stw r3, -16(r1)
+; P8-AIX-32-NEXT:    lfiwax f0, 0, r3
 ; P8-AIX-32-NEXT:    lwz r3, L..C0(r2) # %const.0
-; P8-AIX-32-NEXT:    lxvw4x v3, 0, r3
-; P8-AIX-32-NEXT:    addi r3, r1, -16
 ; P8-AIX-32-NEXT:    lxvw4x v4, 0, r3
-; P8-AIX-32-NEXT:    vperm v2, v4, v2, v3
+; P8-AIX-32-NEXT:    xxspltw v3, vs0, 1
+; P8-AIX-32-NEXT:    vperm v2, v3, v2, v4
 ; P8-AIX-32-NEXT:    blr
 entry:
   %0 = load i32, ptr %int32, align 4
@@ -142,13 +140,12 @@ define <4 x i32> @s2v_test2(ptr nocapture readonly %int32, <4 x i32> %vec)  {
 ;
 ; P8-AIX-32-LABEL: s2v_test2:
 ; P8-AIX-32:       # %bb.0: # %entry
-; P8-AIX-32-NEXT:    lwz r3, 4(r3)
-; P8-AIX-32-NEXT:    stw r3, -16(r1)
+; P8-AIX-32-NEXT:    addi r3, r3, 4
+; P8-AIX-32-NEXT:    lfiwax f0, 0, r3
 ; P8-AIX-32-NEXT:    lwz r3, L..C1(r2) # %const.0
-; P8-AIX-32-NEXT:    lxvw4x v3, 0, r3
-; P8-AIX-32-NEXT:    addi r3, r1, -16
 ; P8-AIX-32-NEXT:    lxvw4x v4, 0, r3
-; P8-AIX-32-NEXT:    vperm v2, v4, v2, v3
+; P8-AIX-32-NEXT:    xxspltw v3, vs0, 1
+; P8-AIX-32-NEXT:    vperm v2, v3, v2, v4
 ; P8-AIX-32-NEXT:    blr
 entry:
   %arrayidx = getelementptr inbounds i32, ptr %int32, i64 1
@@ -224,13 +221,11 @@ define <4 x i32> @s2v_test3(ptr nocapture readonly %int32, <4 x i32> %vec, i32 s
 ; P8-AIX-32-LABEL: s2v_test3:
 ; P8-AIX-32:       # %bb.0: # %entry
 ; P8-AIX-32-NEXT:    slwi r4, r4, 2
-; P8-AIX-32-NEXT:    lwzx r3, r3, r4
-; P8-AIX-32-NEXT:    stw r3, -16(r1)
+; P8-AIX-32-NEXT:    lfiwax f0, r3, r4
 ; P8-AIX-32-NEXT:    lwz r3, L..C2(r2) # %const.0
-; P8-AIX-32-NEXT:    lxvw4x v3, 0, r3
-; P8-AIX-32-NEXT:    addi r3, r1, -16
 ; P8-AIX-32-NEXT:    lxvw4x v4, 0, r3
-; P8-AIX-32-NEXT:    vperm v2, v4, v2, v3
+; P8-AIX-32-NEXT:    xxspltw v3, vs0, 1
+; P8-AIX-32-NEXT:    vperm v2, v3, v2, v4
 ; P8-AIX-32-NEXT:    blr
 entry:
   %idxprom = sext i32 %Idx to i64
@@ -295,13 +290,12 @@ define <4 x i32> @s2v_test4(ptr nocapture readonly %int32, <4 x i32> %vec)  {
 ;
 ; P8-AIX-32-LABEL: s2v_test4:
 ; P8-AIX-32:       # %bb.0: # %entry
-; P8-AIX-32-NEXT:    lwz r3, 4(r3)
-; P8-AIX-32-NEXT:    stw r3, -16(r1)
+; P8-AIX-32-NEXT:    addi r3, r3, 4
+; P8-AIX-32-NEXT:    lfiwax f0, 0, r3
 ; P8-AIX-32-NEXT:    lwz r3, L..C3(r2) # %const.0
-; P8-AIX-32-NEXT:    lxvw4x v3, 0, r3
-; P8-AIX-32-NEXT:    addi r3, r1, -16
 ; P8-AIX-32-NEXT:    lxvw4x v4, 0, r3
-; P8-AIX-32-NEXT:    vperm v2, v4, v2, v3
+; P8-AIX-32-NEXT:    xxspltw v3, vs0, 1
+; P8-AIX-32-NEXT:    vperm v2, v3, v2, v4
 ; P8-AIX-32-NEXT:    blr
 entry:
   %arrayidx = getelementptr inbounds i32, ptr %int32, i64 1
@@ -362,13 +356,11 @@ define <4 x i32> @s2v_test5(<4 x i32> %vec, ptr nocapture readonly %ptr1)  {
 ;
 ; P8-AIX-32-LABEL: s2v_test5:
 ; P8-AIX-32:       # %bb.0: # %entry
-; P8-AIX-32-NEXT:    lwz r3, 0(r3)
-; P8-AIX-32-NEXT:    stw r3, -16(r1)
+; P8-AIX-32-NEXT:    lfiwax f0, 0, r3
 ; P8-AIX-32-NEXT:    lwz r3, L..C4(r2) # %const.0
-; P8-AIX-32-NEXT:    lxvw4x v3, 0, r3
-; P8-AIX-32-NEXT:    addi r3, r1, -16
 ; P8-AIX-32-NEXT:    lxvw4x v4, 0, r3
-; P8-AIX-32-NEXT:    vperm v2, v4, v2, v3
+; P8-AIX-32-NEXT:    xxspltw v3, vs0, 1
+; P8-AIX-32-NEXT:    vperm v2, v3, v2, v4
 ; P8-AIX-32-NEXT:    blr
 entry:
   %0 = load i32, ptr %ptr1, align 4
diff --git a/llvm/test/CodeGen/PowerPC/test-vector-insert.ll b/llvm/test/CodeGen/PowerPC/test-vector-insert.ll
index 73b4ad8a507b82..00ebd279e6f667 100644
--- a/llvm/test/CodeGen/PowerPC/test-vector-insert.ll
+++ b/llvm/test/CodeGen/PowerPC/test-vector-insert.ll
@@ -26,16 +26,14 @@ define dso_local <4 x i32> @test(<4 x i32> %a, double %b) {
 ; CHECK-LE-P7-NEXT:    xscvdpsxws f0, f1
 ; CHECK-LE-P7-NEXT:    addi r3, r1, -4
 ; CHECK-LE-P7-NEXT:    stfiwx f0, 0, r3
-; CHECK-LE-P7-NEXT:    lwz r3, -4(r1)
-; CHECK-LE-P7-NEXT:    stw r3, -32(r1)
+; CHECK-LE-P7-NEXT:    lfiwax f0, 0, r3
 ; CHECK-LE-P7-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
 ; CHECK-LE-P7-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
-; CHECK-LE-P7-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-LE-P7-NEXT:    addi r3, r1, -32
-; CHECK-LE-P7-NEXT:    xxswapd v3, vs0
+; CHECK-LE-P7-NEXT:    xxspltd v3, vs0, 0
 ; CHECK-LE-P7-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-LE-P7-NEXT:    xxswapd v4, vs0
-; CHECK-LE-P7-NEXT:    vperm v2, v4, v2, v3
+; CHECK-LE-P7-NEXT:    xxspltw v3, v3, 3
+; CHECK-LE-P7-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-LE-P7-NEXT:    blr
 ;
 ; CHECK-LE-P8-LABEL: test:
@@ -59,16 +57,13 @@ define dso_local <4 x i32> @test(<4 x i32> %a, double %b) {
 ; CHECK-BE-P7-NEXT:    xscvdpsxws f0, f1
 ; CHECK-BE-P7-NEXT:    addi r3, r1, -4
 ; CHECK-BE-P7-NEXT:    stfiwx f0, 0, r3
-; CHECK-BE-P7-NEXT:    lwz r3, -4(r1)
-; CHECK-BE-P7-NEXT:    sldi r3, r3, 32
-; CHECK-BE-P7-NEXT:    std r3, -32(r1)
-; CHECK-BE-P7-NEXT:    std r3, -24(r1)
+; CHECK-BE-P7-NEXT:    lfiwax f0, 0, r3
 ; CHECK-BE-P7-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
 ; CHECK-BE-P7-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
-; CHECK-BE-P7-NEXT:    lxvw4x v3, 0, r3
-; CHECK-BE-P7-NEXT:    addi r3, r1, -32
 ; CHECK-BE-P7-NEXT:    lxvw4x v4, 0, r3
-; CHECK-BE-P7-NEXT:    vperm v2, v2, v4, v3
+; CHECK-BE-P7-NEXT:    xxlor v3, f0, f0
+; CHECK-BE-P7-NEXT:    xxspltw v3, v3, 1
+; CHECK-BE-P7-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-BE-P7-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test:
@@ -97,16 +92,14 @@ define dso_local <4 x i32> @test2(<4 x i32> %a, float %b) {
 ; CHECK-LE-P7-NEXT:    xscvdpsxws f0, f1
 ; CHECK-LE-P7-NEXT:    addi r3, r1, -4
 ; CHECK-LE-P7-NEXT:    stfiwx f0, 0, r3
-; CHECK-LE-P7-NEXT:    lwz r3, -4(r1)
-; CHECK-LE-P7-NEXT:    stw r3, -32(r1)
+; CHECK-LE-P7-NEXT:    lfiwax f0, 0, r3
 ; CHECK-LE-P7-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
 ; CHECK-LE-P7-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
-; CHECK-LE-P7-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-LE-P7-NEXT:    addi r3, r1, -32
-; CHECK-LE-P7-NEXT:    xxswapd v3, vs0
+; CHECK-LE-P7-NEXT:    xxspltd v3, vs0, 0
 ; CHECK-LE-P7-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-LE-P7-NEXT:    xxswapd v4, vs0
-; CHECK-LE-P7-NEXT:    vperm v2, v4, v2, v3
+; CHECK-LE-P7-NEXT:    xxspltw v3, v3, 3
+; CHECK-LE-P7-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-LE-P7-NEXT:    blr
 ;
 ; CHECK-LE-P8-LABEL: test2:
@@ -130,16 +123,13 @@ define dso_local <4 x i32> @test2(<4 x i32> %a, float %b) {
 ; CHECK-BE-P7-NEXT:    xscvdpsxws f0, f1
 ; CHECK-BE-P7-NEXT:    addi r3, r1, -4
 ; CHECK-BE-P7-NEXT:    stfiwx f0, 0, r3
-; CHECK-BE-P7-NEXT:    lwz r3, -4(r1)
-; CHECK-BE-P7-NEXT:    sldi r3, r3, 32
-; CHECK-BE-P7-NEXT:    std r3, -32(r1)
-; CHECK-BE-P7-NEXT:    std r3, -24(r1)
+; CHECK-BE-P7-NEXT:    lfiwax f0, 0, r3
 ; CHECK-BE-P7-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
 ; CHECK-BE-P7-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
-; CHECK-BE-P7-NEXT:    lxvw4x v3, 0, r3
-; CHECK-BE-P7-NEXT:    addi r3, r1, -32
 ; CHECK-BE-P7-NEXT:    lxvw4x v4, 0, r3
-; CHECK-BE-P7-NEXT:    vperm v2, v2, v4, v3
+; CHECK-BE-P7-NEXT:    xxlor v3, f0, f0
+; CHECK-BE-P7-NEXT:    xxspltw v3, v3, 1
+; CHECK-BE-P7-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-BE-P7-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test2:
@@ -168,16 +158,14 @@ define dso_local <4 x i32> @test3(<4 x i32> %a, double %b) {
 ; CHECK-LE-P7-NEXT:    xscvdpuxws f0, f1
 ; CHECK-LE-P7-NEXT:    addi r3, r1, -4
 ; CHECK-LE-P7-NEXT:    stfiwx f0, 0, r3
-; CHECK-LE-P7-NEXT:    lwz r3, -4(r1)
-; CHECK-LE-P7-NEXT:    stw r3, -32(r1)
+; CHECK-LE-P7-NEXT:    lfiwax f0, 0, r3
 ; CHECK-LE-P7-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
 ; CHECK-LE-P7-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
-; CHECK-LE-P7-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-LE-P7-NEXT:    addi r3, r1, -32
-; CHECK-LE-P7-NEXT:    xxswapd v3, vs0
+; CHECK-LE-P7-NEXT:    xxspltd v3, vs0, 0
 ; CHECK-LE-P7-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-LE-P7-NEXT:    xxswapd v4, vs0
-; CHECK-LE-P7-NEXT:    vperm v2, v4, v2, v3
+; CHECK-LE-P7-NEXT:    xxspltw v3, v3, 3
+; CHECK-LE-P7-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-LE-P7-NEXT:    blr
 ;
 ; CHECK-LE-P8-LABEL: test3:
@@ -201,16 +189,13 @@ define dso_local <4 x i32> @test3(<4 x i32> %a, double %b) {
 ; CHECK-BE-P7-NEXT:    xscvdpuxws f0, f1
 ; CHECK-BE-P7-NEXT:    addi r3, r1, -4
 ; CHECK-BE-P7-NEXT:    stfiwx f0, 0, r3
-; CHECK-BE-P7-NEXT:    lwz r3, -4(r1)
-; CHECK-BE-P7-NEXT:    sldi r3, r3, 32
-; CHECK-BE-P7-NEXT:    std r3, -32(r1)
-; CHECK-BE-P7-NEXT:    std r3, -24(r1)
+; CHECK-BE-P7-NEXT:    lfiwax f0, 0, r3
 ; CHECK-BE-P7-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
 ; CHECK-BE-P7-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
-; CHECK-BE-P7-NEXT:    lxvw4x v3, 0, r3
-; CHECK-BE-P7-NEXT:    addi r3, r1, -32
 ; CHECK-BE-P7-NEXT:    lxvw4x v4, 0, r3
-; CHECK-BE-P7-NEXT:    vperm v2, v2, v4, v3
+; CHECK-BE-P7-NEXT:    xxlor v3, f0, f0
+; CHECK-BE-P7-NEXT:    xxspltw v3, v3, 1
+; CHECK-BE-P7-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-BE-P7-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test3:
@@ -239,16 +224,14 @@ define dso_local <4 x i32> @test4(<4 x i32> %a, float %b) {
 ; CHECK-LE-P7-NEXT:    xscvdpuxws f0, f1
 ; CHECK-LE-P7-NEXT:    addi r3, r1, -4
 ; CHECK-LE-P7-NEXT:    stfiwx f0, 0, r3
-; CHECK-LE-P7-NEXT:    lwz r3, -4(r1)
-; CHECK-LE-P7-NEXT:    stw r3, -32(r1)
+; CHECK-LE-P7-NEXT:    lfiwax f0, 0, r3
 ; CHECK-LE-P7-NEXT:    addis r3, r2, .LCPI3_0 at toc@ha
 ; CHECK-LE-P7-NEXT:    addi r3, r3, .LCPI3_0 at toc@l
-; CHECK-LE-P7-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-LE-P7-NEXT:    addi r3, r1, -32
-; CHECK-LE-P7-NEXT:    xxswapd v3, vs0
+; CHECK-LE-P7-NEXT:    xxspltd v3, vs0, 0
 ; CHECK-LE-P7-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-LE-P7-NEXT:    xxswapd v4, vs0
-; CHECK-LE-P7-NEXT:    vperm v2, v4, v2, v3
+; CHECK-LE-P7-NEXT:    xxspltw v3, v3, 3
+; CHECK-LE-P7-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-LE-P7-NEXT:    blr
 ;
 ; CHECK-LE-P8-LABEL: test4:
@@ -272,16 +255,13 @@ define dso_local <4 x i32> @test4(<4 x i32> %a, float %b) {
 ; CHECK-BE-P7-NEXT:    xscvdpuxws f0, f1
 ; CHECK-BE-P7-NEXT:    addi r3, r1, -4
 ; CHECK-BE-P7-NEXT:    stfiwx f0, 0, r3
-; CHECK-BE-P7-NEXT:    lwz r3, -4(r1)
-; CHECK-BE-P7-NEXT:    sldi r3, r3, 32
-; CHECK-BE-P7-NEXT:    std r3, -32(r1)
-; CHECK-BE-P7-NEXT:    std r3, -24(r1)
+; CHECK-BE-P7-NEXT:    lfiwax f0, 0, r3
 ; CHECK-BE-P7-NEXT:    addis r3, r2, .LCPI3_0 at toc@ha
 ; CHECK-BE-P7-NEXT:    addi r3, r3, .LCPI3_0 at toc@l
-; CHECK-BE-P7-NEXT:    lxvw4x v3, 0, r3
-; CHECK-BE-P7-NEXT:    addi r3, r1, -32
 ; CHECK-BE-P7-NEXT:    lxvw4x v4, 0, r3
-; CHECK-BE-P7-NEXT:    vperm v2, v2, v4, v3
+; CHECK-BE-P7-NEXT:    xxlor v3, f0, f0
+; CHECK-BE-P7-NEXT:    xxspltw v3, v3, 1
+; CHECK-BE-P7-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-BE-P7-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test4:
diff --git a/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll
index 11cc8abd2c7fa3..9159095d1decea 100644
--- a/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll
+++ b/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll
@@ -2045,31 +2045,27 @@ define <16 x i8> @test_v4i32_v2i64(ptr nocapture noundef readonly %a, ptr nocapt
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v4i32_v2i64:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    lxsiwzx v2, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lwz r3, 4(r4)
-; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
-; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r4)
-; CHECK-AIX-32-P8-NEXT:    stw r3, -32(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r3
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
+; CHECK-AIX-32-P8-NEXT:    li r5, 4
+; CHECK-AIX-32-P8-NEXT:    lfiwax f1, 0, r4
+; CHECK-AIX-32-P8-NEXT:    lxsiwzx v3, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C9(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT:    lfiwax f0, r4, r5
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
-; CHECK-AIX-32-P8-NEXT:    xxmrghw v3, vs1, vs0
-; CHECK-AIX-32-P8-NEXT:    vperm v2, v2, v3, v4
+; CHECK-AIX-32-P8-NEXT:    xxspltw vs1, vs1, 1
+; CHECK-AIX-32-P8-NEXT:    xxspltw vs0, vs0, 1
+; CHECK-AIX-32-P8-NEXT:    xxmrghw v2, vs1, vs0
+; CHECK-AIX-32-P8-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v4i32_v2i64:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    lfiwzx f0, 0, r3
-; CHECK-AIX-32-P9-NEXT:    lwz r3, 4(r4)
-; CHECK-AIX-32-P9-NEXT:    stw r3, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r4)
-; CHECK-AIX-32-P9-NEXT:    lxv vs1, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    stw r3, -32(r1)
+; CHECK-AIX-32-P9-NEXT:    li r3, 4
+; CHECK-AIX-32-P9-NEXT:    lfiwax f2, 0, r4
+; CHECK-AIX-32-P9-NEXT:    xxspltw vs2, vs2, 1
+; CHECK-AIX-32-P9-NEXT:    lfiwax f1, r4, r3
 ; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C5(r2) # %const.0
-; CHECK-AIX-32-P9-NEXT:    lxv vs2, -32(r1)
+; CHECK-AIX-32-P9-NEXT:    xxspltw vs1, vs1, 1
 ; CHECK-AIX-32-P9-NEXT:    xxmrghw v2, vs2, vs1
 ; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    xxperm v2, vs0, vs1
diff --git a/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll
index 8bb71e073e8146..be507204651df4 100644
--- a/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll
+++ b/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll
@@ -1685,43 +1685,33 @@ define <2 x i64> @test_v2i64_v2i64(ptr nocapture noundef readonly %a, ptr nocapt
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v2i64_v2i64:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    lwz r5, 4(r3)
-; CHECK-AIX-32-P8-NEXT:    stw r5, -16(r1)
-; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    stw r3, -32(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r3
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lwz r3, 4(r4)
-; CHECK-AIX-32-P8-NEXT:    stw r3, -48(r1)
-; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r4)
-; CHECK-AIX-32-P8-NEXT:    stw r3, -64(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -48
+; CHECK-AIX-32-P8-NEXT:    li r5, 4
+; CHECK-AIX-32-P8-NEXT:    lfiwax f1, 0, r3
+; CHECK-AIX-32-P8-NEXT:    lfiwax f3, 0, r4
+; CHECK-AIX-32-P8-NEXT:    lfiwax f0, r3, r5
+; CHECK-AIX-32-P8-NEXT:    lfiwax f2, r4, r5
+; CHECK-AIX-32-P8-NEXT:    xxspltw vs1, vs1, 1
+; CHECK-AIX-32-P8-NEXT:    xxspltw vs3, vs3, 1
+; CHECK-AIX-32-P8-NEXT:    xxspltw vs0, vs0, 1
+; CHECK-AIX-32-P8-NEXT:    xxspltw vs2, vs2, 1
 ; CHECK-AIX-32-P8-NEXT:    xxmrghw v2, vs1, vs0
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r3
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -64
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
-; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs1, vs0
+; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs3, vs2
 ; CHECK-AIX-32-P8-NEXT:    xxmrghd v3, v2, vs0
 ; CHECK-AIX-32-P8-NEXT:    vaddudm v2, v3, v2
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v2i64_v2i64:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
-; CHECK-AIX-32-P9-NEXT:    lwz r5, 4(r3)
-; CHECK-AIX-32-P9-NEXT:    stw r5, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    lxv vs0, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    stw r3, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    lwz r3, 4(r4)
-; CHECK-AIX-32-P9-NEXT:    lxv vs1, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    stw r3, -48(r1)
-; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r4)
+; CHECK-AIX-32-P9-NEXT:    li r5, 4
+; CHECK-AIX-32-P9-NEXT:    lfiwax f1, 0, r3
+; CHECK-AIX-32-P9-NEXT:    xxspltw vs1, vs1, 1
+; CHECK-AIX-32-P9-NEXT:    lfiwax f0, r3, r5
+; CHECK-AIX-32-P9-NEXT:    xxspltw vs0, vs0, 1
 ; CHECK-AIX-32-P9-NEXT:    xxmrghw v2, vs1, vs0
-; CHECK-AIX-32-P9-NEXT:    lxv vs0, -48(r1)
-; CHECK-AIX-32-P9-NEXT:    stw r3, -64(r1)
-; CHECK-AIX-32-P9-NEXT:    lxv vs1, -64(r1)
+; CHECK-AIX-32-P9-NEXT:    lfiwax f0, r4, r5
+; CHECK-AIX-32-P9-NEXT:    lfiwax f1, 0, r4
+; CHECK-AIX-32-P9-NEXT:    xxspltw vs0, vs0, 1
+; CHECK-AIX-32-P9-NEXT:    xxspltw vs1, vs1, 1
 ; CHECK-AIX-32-P9-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-AIX-32-P9-NEXT:    xxmrghd v3, v2, vs0
 ; CHECK-AIX-32-P9-NEXT:    vaddudm v2, v3, v2
diff --git a/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll
index 4ca55d276647bf..b5192d31252a75 100644
--- a/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll
+++ b/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll
@@ -743,25 +743,22 @@ define void @test_v8i16_v4i32(ptr %a) {
 ; CHECK-AIX-32-P8-LABEL: test_v8i16_v4i32:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    lhz r4, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    sth r4, -32(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r4
-; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT:    sth r4, -16(r1)
+; CHECK-AIX-32-P8-NEXT:    lfiwax f0, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
-; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs0, vs1
+; CHECK-AIX-32-P8-NEXT:    xxspltw vs0, vs0, 1
+; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-AIX-32-P8-NEXT:    stxvw4x vs0, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v8i16_v4i32:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    lhz r4, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    sth r4, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    lxv vs0, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    stw r3, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    lxv vs1, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    sth r4, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    lfiwax f1, 0, r3
+; CHECK-AIX-32-P9-NEXT:    lxv vs0, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    xxspltw vs1, vs1, 1
 ; CHECK-AIX-32-P9-NEXT:    xxmrghw vs0, vs0, vs1
 ; CHECK-AIX-32-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    blr
@@ -842,25 +839,22 @@ define void @test_v8i16_v2i64(ptr %a) {
 ; CHECK-AIX-32-P8-LABEL: test_v8i16_v2i64:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    lhz r4, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    sth r4, -32(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r4
-; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT:    sth r4, -16(r1)
+; CHECK-AIX-32-P8-NEXT:    lfiwax f0, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
-; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs0, vs1
+; CHECK-AIX-32-P8-NEXT:    xxspltw vs0, vs0, 1
+; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-AIX-32-P8-NEXT:    stxvw4x vs0, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v8i16_v2i64:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    lhz r4, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    sth r4, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    lxv vs0, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    stw r3, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    lxv vs1, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    sth r4, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    lfiwax f1, 0, r3
+; CHECK-AIX-32-P9-NEXT:    lxv vs0, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    xxspltw vs1, vs1, 1
 ; CHECK-AIX-32-P9-NEXT:    xxmrghw vs0, vs0, vs1
 ; CHECK-AIX-32-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    blr
@@ -1030,25 +1024,22 @@ define void @test_v4i32_v8i16(ptr %a) {
 ; CHECK-AIX-32-P8-LABEL: test_v4i32_v8i16:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    lhz r4, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    sth r4, -32(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r4
-; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT:    sth r4, -16(r1)
+; CHECK-AIX-32-P8-NEXT:    lfiwax f0, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
-; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs1, vs0
+; CHECK-AIX-32-P8-NEXT:    xxspltw vs0, vs0, 1
+; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs0, vs1
 ; CHECK-AIX-32-P8-NEXT:    stxvw4x vs0, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v4i32_v8i16:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    lhz r4, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    sth r4, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    lxv vs0, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    stw r3, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    lxv vs1, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    sth r4, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    lfiwax f1, 0, r3
+; CHECK-AIX-32-P9-NEXT:    lxv vs0, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    xxspltw vs1, vs1, 1
 ; CHECK-AIX-32-P9-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-AIX-32-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    blr
@@ -1125,26 +1116,20 @@ define void @test_v4i32_v2i64(ptr %a) {
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v4i32_v2i64:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    lwz r4, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    stw r3, -32(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
-; CHECK-AIX-32-P8-NEXT:    stw r4, -16(r1)
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r3
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
+; CHECK-AIX-32-P8-NEXT:    lfiwax f0, 0, r3
+; CHECK-AIX-32-P8-NEXT:    lfiwax f1, 0, r3
+; CHECK-AIX-32-P8-NEXT:    xxspltw vs0, vs0, 1
+; CHECK-AIX-32-P8-NEXT:    xxspltw vs1, vs1, 1
 ; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-AIX-32-P8-NEXT:    stxvw4x vs0, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v4i32_v2i64:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
-; CHECK-AIX-32-P9-NEXT:    lwz r4, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    stw r4, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    stw r3, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    lxv vs0, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    lxv vs1, -32(r1)
+; CHECK-AIX-32-P9-NEXT:    lfiwax f0, 0, r3
+; CHECK-AIX-32-P9-NEXT:    xxspltw vs0, vs0, 1
+; CHECK-AIX-32-P9-NEXT:    lfiwax f1, 0, r3
+; CHECK-AIX-32-P9-NEXT:    xxspltw vs1, vs1, 1
 ; CHECK-AIX-32-P9-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-AIX-32-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    blr
@@ -1212,14 +1197,11 @@ define void @test_v2i64_v2i64(ptr %a) {
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v2i64_v2i64:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    lwz r4, 4(r3)
-; CHECK-AIX-32-P8-NEXT:    stw r4, -16(r1)
-; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    stw r3, -32(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r3
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
+; CHECK-AIX-32-P8-NEXT:    li r4, 4
+; CHECK-AIX-32-P8-NEXT:    lfiwax f1, 0, r3
+; CHECK-AIX-32-P8-NEXT:    lfiwax f0, r3, r4
+; CHECK-AIX-32-P8-NEXT:    xxspltw vs1, vs1, 1
+; CHECK-AIX-32-P8-NEXT:    xxspltw vs0, vs0, 1
 ; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-AIX-32-P8-NEXT:    lfiwzx f1, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    xxspltw vs1, vs1, 1
@@ -1229,12 +1211,11 @@ define void @test_v2i64_v2i64(ptr %a) {
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v2i64_v2i64:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
-; CHECK-AIX-32-P9-NEXT:    lwz r4, 4(r3)
-; CHECK-AIX-32-P9-NEXT:    stw r4, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    lxv vs0, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    stw r3, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    lxv vs1, -32(r1)
+; CHECK-AIX-32-P9-NEXT:    li r4, 4
+; CHECK-AIX-32-P9-NEXT:    lfiwax f1, 0, r3
+; CHECK-AIX-32-P9-NEXT:    xxspltw vs1, vs1, 1
+; CHECK-AIX-32-P9-NEXT:    lfiwax f0, r3, r4
+; CHECK-AIX-32-P9-NEXT:    xxspltw vs0, vs0, 1
 ; CHECK-AIX-32-P9-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-AIX-32-P9-NEXT:    lxvwsx vs1, 0, r3
 ; CHECK-AIX-32-P9-NEXT:    xxmrghw vs0, vs1, vs0
@@ -1308,26 +1289,20 @@ define void @test_v2i64_v4i32(ptr %a) {
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v2i64_v4i32:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    lwz r4, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
-; CHECK-AIX-32-P8-NEXT:    stw r4, -32(r1)
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r3
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
+; CHECK-AIX-32-P8-NEXT:    lfiwax f0, 0, r3
+; CHECK-AIX-32-P8-NEXT:    lfiwax f1, 0, r3
+; CHECK-AIX-32-P8-NEXT:    xxspltw vs0, vs0, 1
+; CHECK-AIX-32-P8-NEXT:    xxspltw vs1, vs1, 1
 ; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-AIX-32-P8-NEXT:    stxvw4x vs0, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v2i64_v4i32:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
-; CHECK-AIX-32-P9-NEXT:    lwz r4, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    stw r3, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    stw r4, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    lxv vs0, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    lxv vs1, -32(r1)
+; CHECK-AIX-32-P9-NEXT:    lfiwax f0, 0, r3
+; CHECK-AIX-32-P9-NEXT:    xxspltw vs0, vs0, 1
+; CHECK-AIX-32-P9-NEXT:    lfiwax f1, 0, r3
+; CHECK-AIX-32-P9-NEXT:    xxspltw vs1, vs1, 1
 ; CHECK-AIX-32-P9-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-AIX-32-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    blr
@@ -1407,25 +1382,22 @@ define void @test_v2i64_v8i16(ptr %a) {
 ; CHECK-AIX-32-P8-LABEL: test_v2i64_v8i16:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    lhz r4, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    sth r4, -32(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r4
-; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT:    sth r4, -16(r1)
+; CHECK-AIX-32-P8-NEXT:    lfiwax f0, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
-; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs1, vs0
+; CHECK-AIX-32-P8-NEXT:    xxspltw vs0, vs0, 1
+; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs0, vs1
 ; CHECK-AIX-32-P8-NEXT:    stxvw4x vs0, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v2i64_v8i16:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    lhz r4, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    sth r4, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    lxv vs0, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    stw r3, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    lxv vs1, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    sth r4, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    lfiwax f1, 0, r3
+; CHECK-AIX-32-P9-NEXT:    lxv vs0, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    xxspltw vs1, vs1, 1
 ; CHECK-AIX-32-P9-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-AIX-32-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    blr
diff --git a/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll
index 201bc5be545068..623c7edf521565 100644
--- a/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll
+++ b/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll
@@ -654,17 +654,14 @@ define void @test_v2i64_none(ptr nocapture readonly %ptr1) {
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v2i64_none:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    lwz r4, 4(r3)
+; CHECK-AIX-32-P8-NEXT:    li r4, 4
+; CHECK-AIX-32-P8-NEXT:    lfiwax f1, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    xxlxor v4, v4, v4
-; CHECK-AIX-32-P8-NEXT:    stw r4, -16(r1)
-; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    stw r3, -32(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r3
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
+; CHECK-AIX-32-P8-NEXT:    lfiwax f0, r3, r4
 ; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C6(r2) # %const.0
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
+; CHECK-AIX-32-P8-NEXT:    xxspltw vs1, vs1, 1
+; CHECK-AIX-32-P8-NEXT:    xxspltw vs0, vs0, 1
 ; CHECK-AIX-32-P8-NEXT:    xxmrghw v2, vs1, vs0
 ; CHECK-AIX-32-P8-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-AIX-32-P8-NEXT:    stxvw4x v2, 0, r3
@@ -672,14 +669,13 @@ define void @test_v2i64_none(ptr nocapture readonly %ptr1) {
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v2i64_none:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
-; CHECK-AIX-32-P9-NEXT:    lwz r4, 4(r3)
+; CHECK-AIX-32-P9-NEXT:    li r4, 4
+; CHECK-AIX-32-P9-NEXT:    lfiwax f1, 0, r3
 ; CHECK-AIX-32-P9-NEXT:    xxlxor vs2, vs2, vs2
-; CHECK-AIX-32-P9-NEXT:    stw r4, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    lxv vs0, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    stw r3, -32(r1)
+; CHECK-AIX-32-P9-NEXT:    xxspltw vs1, vs1, 1
+; CHECK-AIX-32-P9-NEXT:    lfiwax f0, r3, r4
 ; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C5(r2) # %const.0
-; CHECK-AIX-32-P9-NEXT:    lxv vs1, -32(r1)
+; CHECK-AIX-32-P9-NEXT:    xxspltw vs0, vs0, 1
 ; CHECK-AIX-32-P9-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    xxperm vs0, vs2, vs1
@@ -847,24 +843,21 @@ define <16 x i8> @test_v8i16_v4i32(ptr %a, ptr %b) local_unnamed_addr {
 ; CHECK-AIX-32-P8-LABEL: test_v8i16_v4i32:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    lhz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    sth r3, -32(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r4)
-; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT:    sth r3, -16(r1)
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT:    lfiwax f0, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
-; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v2, v3
+; CHECK-AIX-32-P8-NEXT:    xxspltw v2, vs0, 1
+; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v3, v2
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v8i16_v4i32:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    lhz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    sth r3, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r4)
-; CHECK-AIX-32-P9-NEXT:    lxv v2, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    stw r3, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    lxv v3, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    sth r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    lfiwax f0, 0, r4
+; CHECK-AIX-32-P9-NEXT:    lxv v2, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    xxspltw v3, vs0, 1
 ; CHECK-AIX-32-P9-NEXT:    vmrghh v2, v2, v3
 ; CHECK-AIX-32-P9-NEXT:    blr
 entry:
@@ -937,24 +930,21 @@ define <16 x i8> @test_v8i16_v2i64(ptr %a, ptr %b) local_unnamed_addr {
 ; CHECK-AIX-32-P8-LABEL: test_v8i16_v2i64:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    lhz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    sth r3, -32(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r4)
-; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT:    sth r3, -16(r1)
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT:    lfiwax f0, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
-; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v2, v3
+; CHECK-AIX-32-P8-NEXT:    xxspltw v2, vs0, 1
+; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v3, v2
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v8i16_v2i64:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    lhz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    sth r3, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r4)
-; CHECK-AIX-32-P9-NEXT:    lxv v2, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    stw r3, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    lxv v3, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    sth r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    lfiwax f0, 0, r4
+; CHECK-AIX-32-P9-NEXT:    lxv v2, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    xxspltw v3, vs0, 1
 ; CHECK-AIX-32-P9-NEXT:    vmrghh v2, v2, v3
 ; CHECK-AIX-32-P9-NEXT:    blr
 entry:
@@ -1149,24 +1139,21 @@ define <16 x i8> @test_v4i32_v8i16(ptr %a, ptr %b) local_unnamed_addr {
 ; CHECK-AIX-32-P8-LABEL: test_v4i32_v8i16:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    lhz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    sth r3, -32(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r4)
-; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT:    sth r3, -16(r1)
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT:    lfiwax f0, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
-; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v3, v2
+; CHECK-AIX-32-P8-NEXT:    xxspltw v2, vs0, 1
+; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v2, v3
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v4i32_v8i16:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    lhz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    sth r3, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r4)
-; CHECK-AIX-32-P9-NEXT:    lxv v2, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    stw r3, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    lxv v3, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    sth r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    lfiwax f0, 0, r4
+; CHECK-AIX-32-P9-NEXT:    lxv v2, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    xxspltw v3, vs0, 1
 ; CHECK-AIX-32-P9-NEXT:    vmrghh v2, v3, v2
 ; CHECK-AIX-32-P9-NEXT:    blr
 entry:
@@ -1519,24 +1506,21 @@ define <16 x i8> @test_v2i64_v8i16(ptr %a, ptr %b) local_unnamed_addr {
 ; CHECK-AIX-32-P8-LABEL: test_v2i64_v8i16:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    lhz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    sth r3, -32(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r4)
-; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT:    sth r3, -16(r1)
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT:    lfiwax f0, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
-; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v3, v2
+; CHECK-AIX-32-P8-NEXT:    xxspltw v2, vs0, 1
+; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v2, v3
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v2i64_v8i16:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    lhz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    sth r3, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r4)
-; CHECK-AIX-32-P9-NEXT:    lxv v2, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    stw r3, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    lxv v3, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    sth r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    lfiwax f0, 0, r4
+; CHECK-AIX-32-P9-NEXT:    lxv v2, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    xxspltw v3, vs0, 1
 ; CHECK-AIX-32-P9-NEXT:    vmrghh v2, v3, v2
 ; CHECK-AIX-32-P9-NEXT:    blr
 entry:

>From f1969c91c5fafaec8ff3c9e925980086b0e7348e Mon Sep 17 00:00:00 2001
From: Roland Froese <froese at ca.ibm.com>
Date: Thu, 29 Aug 2024 19:22:13 +0000
Subject: [PATCH 2/2] address comments

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   | 21 ++--
 .../build-vector-from-load-and-zeros.ll       | 95 +++++++++----------
 .../PowerPC/canonical-merge-shuffles.ll       | 14 +--
 llvm/test/CodeGen/PowerPC/load-and-splat.ll   | 50 ++++------
 llvm/test/CodeGen/PowerPC/pre-inc-disable.ll  |  9 +-
 .../CodeGen/PowerPC/scalar_vector_test_4.ll   | 10 +-
 .../CodeGen/PowerPC/test-vector-insert.ll     | 80 +++++++---------
 .../PowerPC/v16i8_scalar_to_vector_shuffle.ll | 10 +-
 .../PowerPC/v2i64_scalar_to_vector_shuffle.ll | 20 ++--
 .../PowerPC/v4i32_scalar_to_vector_shuffle.ll | 50 ++++------
 .../PowerPC/v8i16_scalar_to_vector_shuffle.ll | 30 +++---
 11 files changed, 166 insertions(+), 223 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index a926b226ba738e..981288c49fc133 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -11494,25 +11494,20 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
   MachineFunction &MF = DAG.getMachineFunction();
   SDValue Op0 = Op.getOperand(0);
   ReuseLoadInfo RLI;
-  if (Subtarget.hasVSX() && Op.getValueType() == MVT::v4i32 &&
-      Op0.getOpcode() == ISD::LOAD && Op0.getValueType() == MVT::i32 &&
-      Op0.hasOneUse() &&
+  if (Subtarget.hasLFIWAX() && Subtarget.hasVSX() &&
+      Op.getValueType() == MVT::v4i32 && Op0.getOpcode() == ISD::LOAD &&
+      Op0.getValueType() == MVT::i32 && Op0.hasOneUse() &&
       canReuseLoadAddress(Op0, MVT::i32, RLI, DAG, ISD::NON_EXTLOAD)) {
 
     MachineMemOperand *MMO =
         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
-    SDValue Ops[] = {RLI.Chain, RLI.Ptr};
-    SDValue Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
-                                           DAG.getVTList(MVT::f64, MVT::Other),
-                                           Ops, MVT::i32, MMO);
+    SDValue Ops[] = {RLI.Chain, RLI.Ptr, DAG.getValueType(Op.getValueType())};
+    SDValue Bits = DAG.getMemIntrinsicNode(
+        PPCISD::LD_SPLAT, dl, DAG.getVTList(MVT::v4i32, MVT::Other), Ops,
+        MVT::i32, MMO);
     spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
-
-    SDValue ConvVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, Bits);
-    SDValue Bitcast = DAG.getBitcast(MVT::v4i32, ConvVec);
-    unsigned LowIx = Subtarget.isLittleEndian() ? 3 : 1;
-    return DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Bitcast,
-                       DAG.getConstant(LowIx, dl, MVT::i32));
+    return Bits.getValue(0);
   }
 
   // Create a stack slot that is 16-byte aligned.
diff --git a/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll b/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
index 54312fb5cee330..fba6725e2b2a3f 100644
--- a/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
+++ b/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
@@ -28,17 +28,16 @@ define  <2 x i64> @build_v2i64_extload_0(ptr nocapture noundef readonly %p) {
 ; PWR7-LE:       # %bb.0: # %entry
 ; PWR7-LE-NEXT:    li 4, 0
 ; PWR7-LE-NEXT:    stw 4, -16(1)
-; PWR7-LE-NEXT:    lfiwax 0, 0, 3
-; PWR7-LE-NEXT:    addis 3, 2, .LCPI0_0 at toc@ha
-; PWR7-LE-NEXT:    addi 3, 3, .LCPI0_0 at toc@l
-; PWR7-LE-NEXT:    xxspltd 34, 0, 0
-; PWR7-LE-NEXT:    lxvd2x 0, 0, 3
+; PWR7-LE-NEXT:    addis 4, 2, .LCPI0_0 at toc@ha
+; PWR7-LE-NEXT:    lfiwzx 0, 0, 3
 ; PWR7-LE-NEXT:    addi 3, 1, -16
-; PWR7-LE-NEXT:    xxswapd 35, 0
+; PWR7-LE-NEXT:    addi 4, 4, .LCPI0_0 at toc@l
+; PWR7-LE-NEXT:    lxvd2x 1, 0, 4
+; PWR7-LE-NEXT:    xxspltw 35, 0, 1
 ; PWR7-LE-NEXT:    lxvd2x 0, 0, 3
-; PWR7-LE-NEXT:    xxspltw 34, 34, 3
+; PWR7-LE-NEXT:    xxswapd 34, 1
 ; PWR7-LE-NEXT:    xxswapd 36, 0
-; PWR7-LE-NEXT:    vperm 2, 4, 2, 3
+; PWR7-LE-NEXT:    vperm 2, 4, 3, 2
 ; PWR7-LE-NEXT:    blr
 ;
 ; PWR8-LE-LABEL: build_v2i64_extload_0:
@@ -335,13 +334,12 @@ entry:
 define <4 x i32> @build_v4i32_load_0(ptr nocapture noundef readonly %p) {
 ; PWR7-BE-LABEL: build_v4i32_load_0:
 ; PWR7-BE:       # %bb.0: # %entry
-; PWR7-BE-NEXT:    lfiwax 0, 0, 3
+; PWR7-BE-NEXT:    lfiwzx 0, 0, 3
 ; PWR7-BE-NEXT:    addis 3, 2, .LCPI8_0 at toc@ha
 ; PWR7-BE-NEXT:    xxlxor 36, 36, 36
 ; PWR7-BE-NEXT:    addi 3, 3, .LCPI8_0 at toc@l
 ; PWR7-BE-NEXT:    lxvw4x 35, 0, 3
-; PWR7-BE-NEXT:    xxlor 34, 0, 0
-; PWR7-BE-NEXT:    xxspltw 34, 34, 1
+; PWR7-BE-NEXT:    xxspltw 34, 0, 1
 ; PWR7-BE-NEXT:    vperm 2, 2, 4, 3
 ; PWR7-BE-NEXT:    blr
 ;
@@ -361,17 +359,16 @@ define <4 x i32> @build_v4i32_load_0(ptr nocapture noundef readonly %p) {
 ; PWR7-LE:       # %bb.0: # %entry
 ; PWR7-LE-NEXT:    li 4, 0
 ; PWR7-LE-NEXT:    stw 4, -16(1)
-; PWR7-LE-NEXT:    lfiwax 0, 0, 3
-; PWR7-LE-NEXT:    addis 3, 2, .LCPI8_0 at toc@ha
-; PWR7-LE-NEXT:    addi 3, 3, .LCPI8_0 at toc@l
-; PWR7-LE-NEXT:    xxspltd 34, 0, 0
-; PWR7-LE-NEXT:    lxvd2x 0, 0, 3
+; PWR7-LE-NEXT:    addis 4, 2, .LCPI8_0 at toc@ha
+; PWR7-LE-NEXT:    lfiwzx 0, 0, 3
 ; PWR7-LE-NEXT:    addi 3, 1, -16
-; PWR7-LE-NEXT:    xxswapd 35, 0
+; PWR7-LE-NEXT:    addi 4, 4, .LCPI8_0 at toc@l
+; PWR7-LE-NEXT:    lxvd2x 1, 0, 4
+; PWR7-LE-NEXT:    xxspltw 35, 0, 1
 ; PWR7-LE-NEXT:    lxvd2x 0, 0, 3
-; PWR7-LE-NEXT:    xxspltw 34, 34, 3
+; PWR7-LE-NEXT:    xxswapd 34, 1
 ; PWR7-LE-NEXT:    xxswapd 36, 0
-; PWR7-LE-NEXT:    vperm 2, 4, 2, 3
+; PWR7-LE-NEXT:    vperm 2, 4, 3, 2
 ; PWR7-LE-NEXT:    blr
 ;
 ; PWR8-LE-LABEL: build_v4i32_load_0:
@@ -393,13 +390,12 @@ entry:
 define <4 x i32> @build_v4i32_load_1(ptr nocapture noundef readonly %p) {
 ; PWR7-BE-LABEL: build_v4i32_load_1:
 ; PWR7-BE:       # %bb.0: # %entry
-; PWR7-BE-NEXT:    lfiwax 0, 0, 3
+; PWR7-BE-NEXT:    lfiwzx 0, 0, 3
 ; PWR7-BE-NEXT:    addis 3, 2, .LCPI9_0 at toc@ha
 ; PWR7-BE-NEXT:    xxlxor 36, 36, 36
 ; PWR7-BE-NEXT:    addi 3, 3, .LCPI9_0 at toc@l
 ; PWR7-BE-NEXT:    lxvw4x 35, 0, 3
-; PWR7-BE-NEXT:    xxlor 34, 0, 0
-; PWR7-BE-NEXT:    xxspltw 34, 34, 1
+; PWR7-BE-NEXT:    xxspltw 34, 0, 1
 ; PWR7-BE-NEXT:    vperm 2, 4, 2, 3
 ; PWR7-BE-NEXT:    blr
 ;
@@ -418,17 +414,16 @@ define <4 x i32> @build_v4i32_load_1(ptr nocapture noundef readonly %p) {
 ; PWR7-LE:       # %bb.0: # %entry
 ; PWR7-LE-NEXT:    li 4, 0
 ; PWR7-LE-NEXT:    stw 4, -16(1)
-; PWR7-LE-NEXT:    lfiwax 0, 0, 3
-; PWR7-LE-NEXT:    addis 3, 2, .LCPI9_0 at toc@ha
-; PWR7-LE-NEXT:    addi 3, 3, .LCPI9_0 at toc@l
-; PWR7-LE-NEXT:    xxspltd 34, 0, 0
-; PWR7-LE-NEXT:    lxvd2x 0, 0, 3
+; PWR7-LE-NEXT:    addis 4, 2, .LCPI9_0 at toc@ha
+; PWR7-LE-NEXT:    lfiwzx 0, 0, 3
 ; PWR7-LE-NEXT:    addi 3, 1, -16
-; PWR7-LE-NEXT:    xxswapd 35, 0
+; PWR7-LE-NEXT:    addi 4, 4, .LCPI9_0 at toc@l
+; PWR7-LE-NEXT:    lxvd2x 1, 0, 4
+; PWR7-LE-NEXT:    xxspltw 35, 0, 1
 ; PWR7-LE-NEXT:    lxvd2x 0, 0, 3
-; PWR7-LE-NEXT:    xxspltw 34, 34, 3
+; PWR7-LE-NEXT:    xxswapd 34, 1
 ; PWR7-LE-NEXT:    xxswapd 36, 0
-; PWR7-LE-NEXT:    vperm 2, 2, 4, 3
+; PWR7-LE-NEXT:    vperm 2, 3, 4, 2
 ; PWR7-LE-NEXT:    blr
 ;
 ; PWR8-LE-LABEL: build_v4i32_load_1:
@@ -451,13 +446,12 @@ entry:
 define <4 x i32> @build_v4i32_load_2(ptr nocapture noundef readonly %p) {
 ; PWR7-BE-LABEL: build_v4i32_load_2:
 ; PWR7-BE:       # %bb.0: # %entry
-; PWR7-BE-NEXT:    lfiwax 0, 0, 3
+; PWR7-BE-NEXT:    lfiwzx 0, 0, 3
 ; PWR7-BE-NEXT:    addis 3, 2, .LCPI10_0 at toc@ha
 ; PWR7-BE-NEXT:    xxlxor 36, 36, 36
 ; PWR7-BE-NEXT:    addi 3, 3, .LCPI10_0 at toc@l
 ; PWR7-BE-NEXT:    lxvw4x 35, 0, 3
-; PWR7-BE-NEXT:    xxlor 34, 0, 0
-; PWR7-BE-NEXT:    xxspltw 34, 34, 1
+; PWR7-BE-NEXT:    xxspltw 34, 0, 1
 ; PWR7-BE-NEXT:    vperm 2, 4, 2, 3
 ; PWR7-BE-NEXT:    blr
 ;
@@ -477,17 +471,16 @@ define <4 x i32> @build_v4i32_load_2(ptr nocapture noundef readonly %p) {
 ; PWR7-LE:       # %bb.0: # %entry
 ; PWR7-LE-NEXT:    li 4, 0
 ; PWR7-LE-NEXT:    stw 4, -16(1)
-; PWR7-LE-NEXT:    lfiwax 0, 0, 3
-; PWR7-LE-NEXT:    addis 3, 2, .LCPI10_0 at toc@ha
-; PWR7-LE-NEXT:    addi 3, 3, .LCPI10_0 at toc@l
-; PWR7-LE-NEXT:    xxspltd 34, 0, 0
-; PWR7-LE-NEXT:    lxvd2x 0, 0, 3
+; PWR7-LE-NEXT:    addis 4, 2, .LCPI10_0 at toc@ha
+; PWR7-LE-NEXT:    lfiwzx 0, 0, 3
 ; PWR7-LE-NEXT:    addi 3, 1, -16
-; PWR7-LE-NEXT:    xxswapd 35, 0
+; PWR7-LE-NEXT:    addi 4, 4, .LCPI10_0 at toc@l
+; PWR7-LE-NEXT:    lxvd2x 1, 0, 4
+; PWR7-LE-NEXT:    xxspltw 35, 0, 1
 ; PWR7-LE-NEXT:    lxvd2x 0, 0, 3
-; PWR7-LE-NEXT:    xxspltw 34, 34, 3
+; PWR7-LE-NEXT:    xxswapd 34, 1
 ; PWR7-LE-NEXT:    xxswapd 36, 0
-; PWR7-LE-NEXT:    vperm 2, 2, 4, 3
+; PWR7-LE-NEXT:    vperm 2, 3, 4, 2
 ; PWR7-LE-NEXT:    blr
 ;
 ; PWR8-LE-LABEL: build_v4i32_load_2:
@@ -509,13 +502,12 @@ entry:
 define <4 x i32> @build_v4i32_load_3(ptr nocapture noundef readonly %p) {
 ; PWR7-BE-LABEL: build_v4i32_load_3:
 ; PWR7-BE:       # %bb.0: # %entry
-; PWR7-BE-NEXT:    lfiwax 0, 0, 3
+; PWR7-BE-NEXT:    lfiwzx 0, 0, 3
 ; PWR7-BE-NEXT:    addis 3, 2, .LCPI11_0 at toc@ha
 ; PWR7-BE-NEXT:    xxlxor 36, 36, 36
 ; PWR7-BE-NEXT:    addi 3, 3, .LCPI11_0 at toc@l
 ; PWR7-BE-NEXT:    lxvw4x 35, 0, 3
-; PWR7-BE-NEXT:    xxlor 34, 0, 0
-; PWR7-BE-NEXT:    xxspltw 34, 34, 1
+; PWR7-BE-NEXT:    xxspltw 34, 0, 1
 ; PWR7-BE-NEXT:    vperm 2, 4, 2, 3
 ; PWR7-BE-NEXT:    blr
 ;
@@ -534,17 +526,16 @@ define <4 x i32> @build_v4i32_load_3(ptr nocapture noundef readonly %p) {
 ; PWR7-LE:       # %bb.0: # %entry
 ; PWR7-LE-NEXT:    li 4, 0
 ; PWR7-LE-NEXT:    stw 4, -16(1)
-; PWR7-LE-NEXT:    lfiwax 0, 0, 3
-; PWR7-LE-NEXT:    addis 3, 2, .LCPI11_0 at toc@ha
-; PWR7-LE-NEXT:    addi 3, 3, .LCPI11_0 at toc@l
-; PWR7-LE-NEXT:    xxspltd 34, 0, 0
-; PWR7-LE-NEXT:    lxvd2x 0, 0, 3
+; PWR7-LE-NEXT:    addis 4, 2, .LCPI11_0 at toc@ha
+; PWR7-LE-NEXT:    lfiwzx 0, 0, 3
 ; PWR7-LE-NEXT:    addi 3, 1, -16
-; PWR7-LE-NEXT:    xxswapd 35, 0
+; PWR7-LE-NEXT:    addi 4, 4, .LCPI11_0 at toc@l
+; PWR7-LE-NEXT:    lxvd2x 1, 0, 4
+; PWR7-LE-NEXT:    xxspltw 35, 0, 1
 ; PWR7-LE-NEXT:    lxvd2x 0, 0, 3
-; PWR7-LE-NEXT:    xxspltw 34, 34, 3
+; PWR7-LE-NEXT:    xxswapd 34, 1
 ; PWR7-LE-NEXT:    xxswapd 36, 0
-; PWR7-LE-NEXT:    vperm 2, 2, 4, 3
+; PWR7-LE-NEXT:    vperm 2, 3, 4, 2
 ; PWR7-LE-NEXT:    blr
 ;
 ; PWR8-LE-LABEL: build_v4i32_load_3:
diff --git a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
index 5e73999c80b5ac..e1159e56e23ebe 100644
--- a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
+++ b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
@@ -537,9 +537,9 @@ define dso_local <8 x i16> @testmrglb3(ptr nocapture readonly %a) local_unnamed_
 ; P8-AIX-32-LABEL: testmrglb3:
 ; P8-AIX-32:       # %bb.0: # %entry
 ; P8-AIX-32-NEXT:    li r4, 4
-; P8-AIX-32-NEXT:    lfiwax f1, 0, r3
+; P8-AIX-32-NEXT:    lfiwzx f1, 0, r3
 ; P8-AIX-32-NEXT:    xxlxor v3, v3, v3
-; P8-AIX-32-NEXT:    lfiwax f0, r3, r4
+; P8-AIX-32-NEXT:    lfiwzx f0, r3, r4
 ; P8-AIX-32-NEXT:    xxspltw vs1, vs1, 1
 ; P8-AIX-32-NEXT:    xxspltw vs0, vs0, 1
 ; P8-AIX-32-NEXT:    xxmrghw v2, vs1, vs0
@@ -853,7 +853,7 @@ define dso_local <16 x i8> @no_RAUW_in_combine_during_legalize(ptr nocapture rea
 ; P8-AIX-32-NEXT:    slwi r4, r4, 2
 ; P8-AIX-32-NEXT:    xxlxor v3, v3, v3
 ; P8-AIX-32-NEXT:    stw r5, -16(r1)
-; P8-AIX-32-NEXT:    lfiwax f0, r3, r4
+; P8-AIX-32-NEXT:    lfiwzx f0, r3, r4
 ; P8-AIX-32-NEXT:    addi r3, r1, -16
 ; P8-AIX-32-NEXT:    lxvw4x vs1, 0, r3
 ; P8-AIX-32-NEXT:    xxspltw vs0, vs0, 1
@@ -1022,8 +1022,8 @@ define dso_local <2 x i64> @testSplat8(ptr nocapture readonly %ptr) local_unname
 ; P8-AIX-32-LABEL: testSplat8:
 ; P8-AIX-32:       # %bb.0: # %entry
 ; P8-AIX-32-NEXT:    li r4, 4
-; P8-AIX-32-NEXT:    lfiwax f1, 0, r3
-; P8-AIX-32-NEXT:    lfiwax f0, r3, r4
+; P8-AIX-32-NEXT:    lfiwzx f1, 0, r3
+; P8-AIX-32-NEXT:    lfiwzx f0, r3, r4
 ; P8-AIX-32-NEXT:    xxspltw vs1, vs1, 1
 ; P8-AIX-32-NEXT:    xxspltw vs0, vs0, 1
 ; P8-AIX-32-NEXT:    xxmrghw vs0, vs1, vs0
@@ -1074,9 +1074,9 @@ define <2 x i64> @testSplati64_0(ptr nocapture readonly %ptr) #0 {
 ; P8-AIX-32-LABEL: testSplati64_0:
 ; P8-AIX-32:       # %bb.0: # %entry
 ; P8-AIX-32-NEXT:    li r4, 4
-; P8-AIX-32-NEXT:    lfiwax f0, r3, r4
+; P8-AIX-32-NEXT:    lfiwzx f0, r3, r4
 ; P8-AIX-32-NEXT:    xxspltw v2, vs0, 1
-; P8-AIX-32-NEXT:    lfiwax f0, 0, r3
+; P8-AIX-32-NEXT:    lfiwzx f0, 0, r3
 ; P8-AIX-32-NEXT:    lwz r3, L..C3(r2) # %const.0
 ; P8-AIX-32-NEXT:    lxvw4x v4, 0, r3
 ; P8-AIX-32-NEXT:    xxspltw v3, vs0, 1
diff --git a/llvm/test/CodeGen/PowerPC/load-and-splat.ll b/llvm/test/CodeGen/PowerPC/load-and-splat.ll
index 38f47838a42fbf..c9ee3a51f41724 100644
--- a/llvm/test/CodeGen/PowerPC/load-and-splat.ll
+++ b/llvm/test/CodeGen/PowerPC/load-and-splat.ll
@@ -209,13 +209,11 @@ define dso_local void @test4(ptr nocapture %c, ptr nocapture readonly %a) local_
 ; P9-AIX32-LABEL: test4:
 ; P9-AIX32:       # %bb.0: # %entry
 ; P9-AIX32-NEXT:    li r5, 28
-; P9-AIX32-NEXT:    lfiwax f0, r4, r5
+; P9-AIX32-NEXT:    lxvwsx vs0, r4, r5
 ; P9-AIX32-NEXT:    li r5, 24
-; P9-AIX32-NEXT:    lfiwax f1, r4, r5
+; P9-AIX32-NEXT:    lxvwsx vs1, r4, r5
 ; P9-AIX32-NEXT:    lwz r4, L..C0(r2) # %const.0
-; P9-AIX32-NEXT:    xxspltw vs0, vs0, 1
 ; P9-AIX32-NEXT:    lxv vs2, 0(r4)
-; P9-AIX32-NEXT:    xxspltw vs1, vs1, 1
 ; P9-AIX32-NEXT:    xxperm vs0, vs1, vs2
 ; P9-AIX32-NEXT:    stxv vs0, 0(r3)
 ; P9-AIX32-NEXT:    blr
@@ -223,10 +221,10 @@ define dso_local void @test4(ptr nocapture %c, ptr nocapture readonly %a) local_
 ; P8-AIX32-LABEL: test4:
 ; P8-AIX32:       # %bb.0: # %entry
 ; P8-AIX32-NEXT:    li r5, 28
-; P8-AIX32-NEXT:    lfiwax f0, r4, r5
+; P8-AIX32-NEXT:    lfiwzx f0, r4, r5
 ; P8-AIX32-NEXT:    li r5, 24
 ; P8-AIX32-NEXT:    xxspltw v2, vs0, 1
-; P8-AIX32-NEXT:    lfiwax f0, r4, r5
+; P8-AIX32-NEXT:    lfiwzx f0, r4, r5
 ; P8-AIX32-NEXT:    lwz r4, L..C0(r2) # %const.0
 ; P8-AIX32-NEXT:    lxvw4x v4, 0, r4
 ; P8-AIX32-NEXT:    xxspltw v3, vs0, 1
@@ -237,15 +235,13 @@ define dso_local void @test4(ptr nocapture %c, ptr nocapture readonly %a) local_
 ; P7-AIX32-LABEL: test4:
 ; P7-AIX32:       # %bb.0: # %entry
 ; P7-AIX32-NEXT:    li r5, 28
-; P7-AIX32-NEXT:    lfiwax f0, r4, r5
+; P7-AIX32-NEXT:    lfiwzx f0, r4, r5
 ; P7-AIX32-NEXT:    li r5, 24
-; P7-AIX32-NEXT:    xxlor v2, f0, f0
-; P7-AIX32-NEXT:    xxspltw v2, v2, 1
-; P7-AIX32-NEXT:    lfiwax f0, r4, r5
+; P7-AIX32-NEXT:    xxspltw v2, vs0, 1
+; P7-AIX32-NEXT:    lfiwzx f0, r4, r5
 ; P7-AIX32-NEXT:    lwz r4, L..C0(r2) # %const.0
 ; P7-AIX32-NEXT:    lxvw4x v4, 0, r4
-; P7-AIX32-NEXT:    xxlor v3, f0, f0
-; P7-AIX32-NEXT:    xxspltw v3, v3, 1
+; P7-AIX32-NEXT:    xxspltw v3, vs0, 1
 ; P7-AIX32-NEXT:    vperm v2, v3, v2, v4
 ; P7-AIX32-NEXT:    stxvw4x v2, 0, r3
 ; P7-AIX32-NEXT:    blr
@@ -363,10 +359,9 @@ define void @test6(ptr %a, ptr %in) {
 ; P9-AIX32-NEXT:    li r5, 0
 ; P9-AIX32-NEXT:    stw r5, -16(r1)
 ; P9-AIX32-NEXT:    lwz r5, L..C2(r2) # %const.0
-; P9-AIX32-NEXT:    lfiwax f1, 0, r4
+; P9-AIX32-NEXT:    lxvwsx vs1, 0, r4
 ; P9-AIX32-NEXT:    lxv vs2, -16(r1)
 ; P9-AIX32-NEXT:    lxv vs0, 0(r5)
-; P9-AIX32-NEXT:    xxspltw vs1, vs1, 1
 ; P9-AIX32-NEXT:    xxperm vs1, vs2, vs0
 ; P9-AIX32-NEXT:    stxv vs1, 0(r3)
 ; P9-AIX32-NEXT:    blr
@@ -375,7 +370,7 @@ define void @test6(ptr %a, ptr %in) {
 ; P8-AIX32:       # %bb.0: # %entry
 ; P8-AIX32-NEXT:    li r5, 0
 ; P8-AIX32-NEXT:    stw r5, -16(r1)
-; P8-AIX32-NEXT:    lfiwax f0, 0, r4
+; P8-AIX32-NEXT:    lfiwzx f0, 0, r4
 ; P8-AIX32-NEXT:    lwz r4, L..C2(r2) # %const.0
 ; P8-AIX32-NEXT:    lxvw4x v3, 0, r4
 ; P8-AIX32-NEXT:    addi r4, r1, -16
@@ -389,13 +384,12 @@ define void @test6(ptr %a, ptr %in) {
 ; P7-AIX32:       # %bb.0: # %entry
 ; P7-AIX32-NEXT:    li r5, 0
 ; P7-AIX32-NEXT:    stw r5, -16(r1)
-; P7-AIX32-NEXT:    lfiwax f0, 0, r4
+; P7-AIX32-NEXT:    lfiwzx f0, 0, r4
 ; P7-AIX32-NEXT:    lwz r4, L..C2(r2) # %const.0
 ; P7-AIX32-NEXT:    lxvw4x v3, 0, r4
 ; P7-AIX32-NEXT:    addi r4, r1, -16
-; P7-AIX32-NEXT:    xxlor v2, f0, f0
 ; P7-AIX32-NEXT:    lxvw4x v4, 0, r4
-; P7-AIX32-NEXT:    xxspltw v2, v2, 1
+; P7-AIX32-NEXT:    xxspltw v2, vs0, 1
 ; P7-AIX32-NEXT:    vperm v2, v4, v2, v3
 ; P7-AIX32-NEXT:    stxvw4x v2, 0, r3
 ; P7-AIX32-NEXT:    blr
@@ -805,10 +799,8 @@ define <16 x i8> @unadjusted_lxvdsx(ptr %s, ptr %t) {
 ; P9-AIX32-LABEL: unadjusted_lxvdsx:
 ; P9-AIX32:       # %bb.0: # %entry
 ; P9-AIX32-NEXT:    li r4, 4
-; P9-AIX32-NEXT:    lfiwax f1, 0, r3
-; P9-AIX32-NEXT:    xxspltw vs1, vs1, 1
-; P9-AIX32-NEXT:    lfiwax f0, r3, r4
-; P9-AIX32-NEXT:    xxspltw vs0, vs0, 1
+; P9-AIX32-NEXT:    lxvwsx vs1, 0, r3
+; P9-AIX32-NEXT:    lxvwsx vs0, r3, r4
 ; P9-AIX32-NEXT:    xxmrghw vs0, vs1, vs0
 ; P9-AIX32-NEXT:    xxmrghd v2, vs0, vs0
 ; P9-AIX32-NEXT:    blr
@@ -816,8 +808,8 @@ define <16 x i8> @unadjusted_lxvdsx(ptr %s, ptr %t) {
 ; P8-AIX32-LABEL: unadjusted_lxvdsx:
 ; P8-AIX32:       # %bb.0: # %entry
 ; P8-AIX32-NEXT:    li r4, 4
-; P8-AIX32-NEXT:    lfiwax f1, 0, r3
-; P8-AIX32-NEXT:    lfiwax f0, r3, r4
+; P8-AIX32-NEXT:    lfiwzx f1, 0, r3
+; P8-AIX32-NEXT:    lfiwzx f0, r3, r4
 ; P8-AIX32-NEXT:    xxspltw vs1, vs1, 1
 ; P8-AIX32-NEXT:    xxspltw vs0, vs0, 1
 ; P8-AIX32-NEXT:    xxmrghw vs0, vs1, vs0
@@ -827,12 +819,10 @@ define <16 x i8> @unadjusted_lxvdsx(ptr %s, ptr %t) {
 ; P7-AIX32-LABEL: unadjusted_lxvdsx:
 ; P7-AIX32:       # %bb.0: # %entry
 ; P7-AIX32-NEXT:    li r4, 4
-; P7-AIX32-NEXT:    lfiwax f1, 0, r3
-; P7-AIX32-NEXT:    lfiwax f0, r3, r4
-; P7-AIX32-NEXT:    xxlor v2, f0, f0
-; P7-AIX32-NEXT:    xxspltw vs0, v2, 1
-; P7-AIX32-NEXT:    xxlor v2, f1, f1
-; P7-AIX32-NEXT:    xxspltw vs1, v2, 1
+; P7-AIX32-NEXT:    lfiwzx f1, 0, r3
+; P7-AIX32-NEXT:    lfiwzx f0, r3, r4
+; P7-AIX32-NEXT:    xxspltw vs1, vs1, 1
+; P7-AIX32-NEXT:    xxspltw vs0, vs0, 1
 ; P7-AIX32-NEXT:    xxmrghw vs0, vs1, vs0
 ; P7-AIX32-NEXT:    xxmrghd v2, vs0, vs0
 ; P7-AIX32-NEXT:    blr
diff --git a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
index db7a0292e036b7..4435484ae0b947 100644
--- a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
+++ b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
@@ -86,22 +86,19 @@ define void @test64(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
 ; P9BE-AIX32-LABEL: test64:
 ; P9BE-AIX32:       # %bb.0: # %entry
 ; P9BE-AIX32-NEXT:    add 5, 3, 4
-; P9BE-AIX32-NEXT:    lfiwax 0, 3, 4
+; P9BE-AIX32-NEXT:    lxvwsx 0, 3, 4
 ; P9BE-AIX32-NEXT:    li 3, 4
 ; P9BE-AIX32-NEXT:    xxlxor 2, 2, 2
 ; P9BE-AIX32-NEXT:    vspltisw 4, 8
-; P9BE-AIX32-NEXT:    xxspltw 0, 0, 1
-; P9BE-AIX32-NEXT:    lfiwax 1, 5, 3
+; P9BE-AIX32-NEXT:    lxvwsx 1, 5, 3
 ; P9BE-AIX32-NEXT:    lwz 3, L..C0(2) # %const.0
 ; P9BE-AIX32-NEXT:    vadduwm 4, 4, 4
-; P9BE-AIX32-NEXT:    xxspltw 1, 1, 1
 ; P9BE-AIX32-NEXT:    xxmrghw 2, 0, 1
 ; P9BE-AIX32-NEXT:    lxv 0, 0(3)
 ; P9BE-AIX32-NEXT:    li 3, 8
 ; P9BE-AIX32-NEXT:    xxperm 2, 2, 0
-; P9BE-AIX32-NEXT:    lfiwax 0, 5, 3
+; P9BE-AIX32-NEXT:    lxvwsx 0, 5, 3
 ; P9BE-AIX32-NEXT:    lwz 3, L..C1(2) # %const.1
-; P9BE-AIX32-NEXT:    xxspltw 0, 0, 1
 ; P9BE-AIX32-NEXT:    xxmrghw 3, 1, 0
 ; P9BE-AIX32-NEXT:    lxv 0, 0(3)
 ; P9BE-AIX32-NEXT:    xxperm 3, 3, 0
diff --git a/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll b/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll
index be2a5d039d04ff..c8e0d0d25f4f7e 100644
--- a/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll
+++ b/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll
@@ -73,7 +73,7 @@ define <4 x i32> @s2v_test1(ptr nocapture readonly %int32, <4 x i32> %vec)  {
 ;
 ; P8-AIX-32-LABEL: s2v_test1:
 ; P8-AIX-32:       # %bb.0: # %entry
-; P8-AIX-32-NEXT:    lfiwax f0, 0, r3
+; P8-AIX-32-NEXT:    lfiwzx f0, 0, r3
 ; P8-AIX-32-NEXT:    lwz r3, L..C0(r2) # %const.0
 ; P8-AIX-32-NEXT:    lxvw4x v4, 0, r3
 ; P8-AIX-32-NEXT:    xxspltw v3, vs0, 1
@@ -141,7 +141,7 @@ define <4 x i32> @s2v_test2(ptr nocapture readonly %int32, <4 x i32> %vec)  {
 ; P8-AIX-32-LABEL: s2v_test2:
 ; P8-AIX-32:       # %bb.0: # %entry
 ; P8-AIX-32-NEXT:    addi r3, r3, 4
-; P8-AIX-32-NEXT:    lfiwax f0, 0, r3
+; P8-AIX-32-NEXT:    lfiwzx f0, 0, r3
 ; P8-AIX-32-NEXT:    lwz r3, L..C1(r2) # %const.0
 ; P8-AIX-32-NEXT:    lxvw4x v4, 0, r3
 ; P8-AIX-32-NEXT:    xxspltw v3, vs0, 1
@@ -221,7 +221,7 @@ define <4 x i32> @s2v_test3(ptr nocapture readonly %int32, <4 x i32> %vec, i32 s
 ; P8-AIX-32-LABEL: s2v_test3:
 ; P8-AIX-32:       # %bb.0: # %entry
 ; P8-AIX-32-NEXT:    slwi r4, r4, 2
-; P8-AIX-32-NEXT:    lfiwax f0, r3, r4
+; P8-AIX-32-NEXT:    lfiwzx f0, r3, r4
 ; P8-AIX-32-NEXT:    lwz r3, L..C2(r2) # %const.0
 ; P8-AIX-32-NEXT:    lxvw4x v4, 0, r3
 ; P8-AIX-32-NEXT:    xxspltw v3, vs0, 1
@@ -291,7 +291,7 @@ define <4 x i32> @s2v_test4(ptr nocapture readonly %int32, <4 x i32> %vec)  {
 ; P8-AIX-32-LABEL: s2v_test4:
 ; P8-AIX-32:       # %bb.0: # %entry
 ; P8-AIX-32-NEXT:    addi r3, r3, 4
-; P8-AIX-32-NEXT:    lfiwax f0, 0, r3
+; P8-AIX-32-NEXT:    lfiwzx f0, 0, r3
 ; P8-AIX-32-NEXT:    lwz r3, L..C3(r2) # %const.0
 ; P8-AIX-32-NEXT:    lxvw4x v4, 0, r3
 ; P8-AIX-32-NEXT:    xxspltw v3, vs0, 1
@@ -356,7 +356,7 @@ define <4 x i32> @s2v_test5(<4 x i32> %vec, ptr nocapture readonly %ptr1)  {
 ;
 ; P8-AIX-32-LABEL: s2v_test5:
 ; P8-AIX-32:       # %bb.0: # %entry
-; P8-AIX-32-NEXT:    lfiwax f0, 0, r3
+; P8-AIX-32-NEXT:    lfiwzx f0, 0, r3
 ; P8-AIX-32-NEXT:    lwz r3, L..C4(r2) # %const.0
 ; P8-AIX-32-NEXT:    lxvw4x v4, 0, r3
 ; P8-AIX-32-NEXT:    xxspltw v3, vs0, 1
diff --git a/llvm/test/CodeGen/PowerPC/test-vector-insert.ll b/llvm/test/CodeGen/PowerPC/test-vector-insert.ll
index 00ebd279e6f667..47fa6f2a5b4d29 100644
--- a/llvm/test/CodeGen/PowerPC/test-vector-insert.ll
+++ b/llvm/test/CodeGen/PowerPC/test-vector-insert.ll
@@ -25,15 +25,14 @@ define dso_local <4 x i32> @test(<4 x i32> %a, double %b) {
 ; CHECK-LE-P7:       # %bb.0: # %entry
 ; CHECK-LE-P7-NEXT:    xscvdpsxws f0, f1
 ; CHECK-LE-P7-NEXT:    addi r3, r1, -4
+; CHECK-LE-P7-NEXT:    addis r4, r2, .LCPI0_0 at toc@ha
+; CHECK-LE-P7-NEXT:    addi r4, r4, .LCPI0_0 at toc@l
 ; CHECK-LE-P7-NEXT:    stfiwx f0, 0, r3
-; CHECK-LE-P7-NEXT:    lfiwax f0, 0, r3
-; CHECK-LE-P7-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
-; CHECK-LE-P7-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
-; CHECK-LE-P7-NEXT:    xxspltd v3, vs0, 0
-; CHECK-LE-P7-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-LE-P7-NEXT:    xxswapd v4, vs0
-; CHECK-LE-P7-NEXT:    xxspltw v3, v3, 3
-; CHECK-LE-P7-NEXT:    vperm v2, v3, v2, v4
+; CHECK-LE-P7-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-LE-P7-NEXT:    xxswapd v3, vs0
+; CHECK-LE-P7-NEXT:    lfiwzx f0, 0, r3
+; CHECK-LE-P7-NEXT:    xxspltw v4, vs0, 1
+; CHECK-LE-P7-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-LE-P7-NEXT:    blr
 ;
 ; CHECK-LE-P8-LABEL: test:
@@ -57,12 +56,11 @@ define dso_local <4 x i32> @test(<4 x i32> %a, double %b) {
 ; CHECK-BE-P7-NEXT:    xscvdpsxws f0, f1
 ; CHECK-BE-P7-NEXT:    addi r3, r1, -4
 ; CHECK-BE-P7-NEXT:    stfiwx f0, 0, r3
-; CHECK-BE-P7-NEXT:    lfiwax f0, 0, r3
+; CHECK-BE-P7-NEXT:    lfiwzx f0, 0, r3
 ; CHECK-BE-P7-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
 ; CHECK-BE-P7-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
 ; CHECK-BE-P7-NEXT:    lxvw4x v4, 0, r3
-; CHECK-BE-P7-NEXT:    xxlor v3, f0, f0
-; CHECK-BE-P7-NEXT:    xxspltw v3, v3, 1
+; CHECK-BE-P7-NEXT:    xxspltw v3, vs0, 1
 ; CHECK-BE-P7-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-BE-P7-NEXT:    blr
 ;
@@ -91,15 +89,14 @@ define dso_local <4 x i32> @test2(<4 x i32> %a, float %b) {
 ; CHECK-LE-P7:       # %bb.0: # %entry
 ; CHECK-LE-P7-NEXT:    xscvdpsxws f0, f1
 ; CHECK-LE-P7-NEXT:    addi r3, r1, -4
+; CHECK-LE-P7-NEXT:    addis r4, r2, .LCPI1_0 at toc@ha
+; CHECK-LE-P7-NEXT:    addi r4, r4, .LCPI1_0 at toc@l
 ; CHECK-LE-P7-NEXT:    stfiwx f0, 0, r3
-; CHECK-LE-P7-NEXT:    lfiwax f0, 0, r3
-; CHECK-LE-P7-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
-; CHECK-LE-P7-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
-; CHECK-LE-P7-NEXT:    xxspltd v3, vs0, 0
-; CHECK-LE-P7-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-LE-P7-NEXT:    xxswapd v4, vs0
-; CHECK-LE-P7-NEXT:    xxspltw v3, v3, 3
-; CHECK-LE-P7-NEXT:    vperm v2, v3, v2, v4
+; CHECK-LE-P7-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-LE-P7-NEXT:    xxswapd v3, vs0
+; CHECK-LE-P7-NEXT:    lfiwzx f0, 0, r3
+; CHECK-LE-P7-NEXT:    xxspltw v4, vs0, 1
+; CHECK-LE-P7-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-LE-P7-NEXT:    blr
 ;
 ; CHECK-LE-P8-LABEL: test2:
@@ -123,12 +120,11 @@ define dso_local <4 x i32> @test2(<4 x i32> %a, float %b) {
 ; CHECK-BE-P7-NEXT:    xscvdpsxws f0, f1
 ; CHECK-BE-P7-NEXT:    addi r3, r1, -4
 ; CHECK-BE-P7-NEXT:    stfiwx f0, 0, r3
-; CHECK-BE-P7-NEXT:    lfiwax f0, 0, r3
+; CHECK-BE-P7-NEXT:    lfiwzx f0, 0, r3
 ; CHECK-BE-P7-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
 ; CHECK-BE-P7-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
 ; CHECK-BE-P7-NEXT:    lxvw4x v4, 0, r3
-; CHECK-BE-P7-NEXT:    xxlor v3, f0, f0
-; CHECK-BE-P7-NEXT:    xxspltw v3, v3, 1
+; CHECK-BE-P7-NEXT:    xxspltw v3, vs0, 1
 ; CHECK-BE-P7-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-BE-P7-NEXT:    blr
 ;
@@ -157,15 +153,14 @@ define dso_local <4 x i32> @test3(<4 x i32> %a, double %b) {
 ; CHECK-LE-P7:       # %bb.0: # %entry
 ; CHECK-LE-P7-NEXT:    xscvdpuxws f0, f1
 ; CHECK-LE-P7-NEXT:    addi r3, r1, -4
+; CHECK-LE-P7-NEXT:    addis r4, r2, .LCPI2_0 at toc@ha
+; CHECK-LE-P7-NEXT:    addi r4, r4, .LCPI2_0 at toc@l
 ; CHECK-LE-P7-NEXT:    stfiwx f0, 0, r3
-; CHECK-LE-P7-NEXT:    lfiwax f0, 0, r3
-; CHECK-LE-P7-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
-; CHECK-LE-P7-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
-; CHECK-LE-P7-NEXT:    xxspltd v3, vs0, 0
-; CHECK-LE-P7-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-LE-P7-NEXT:    xxswapd v4, vs0
-; CHECK-LE-P7-NEXT:    xxspltw v3, v3, 3
-; CHECK-LE-P7-NEXT:    vperm v2, v3, v2, v4
+; CHECK-LE-P7-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-LE-P7-NEXT:    xxswapd v3, vs0
+; CHECK-LE-P7-NEXT:    lfiwzx f0, 0, r3
+; CHECK-LE-P7-NEXT:    xxspltw v4, vs0, 1
+; CHECK-LE-P7-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-LE-P7-NEXT:    blr
 ;
 ; CHECK-LE-P8-LABEL: test3:
@@ -189,12 +184,11 @@ define dso_local <4 x i32> @test3(<4 x i32> %a, double %b) {
 ; CHECK-BE-P7-NEXT:    xscvdpuxws f0, f1
 ; CHECK-BE-P7-NEXT:    addi r3, r1, -4
 ; CHECK-BE-P7-NEXT:    stfiwx f0, 0, r3
-; CHECK-BE-P7-NEXT:    lfiwax f0, 0, r3
+; CHECK-BE-P7-NEXT:    lfiwzx f0, 0, r3
 ; CHECK-BE-P7-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
 ; CHECK-BE-P7-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
 ; CHECK-BE-P7-NEXT:    lxvw4x v4, 0, r3
-; CHECK-BE-P7-NEXT:    xxlor v3, f0, f0
-; CHECK-BE-P7-NEXT:    xxspltw v3, v3, 1
+; CHECK-BE-P7-NEXT:    xxspltw v3, vs0, 1
 ; CHECK-BE-P7-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-BE-P7-NEXT:    blr
 ;
@@ -223,15 +217,14 @@ define dso_local <4 x i32> @test4(<4 x i32> %a, float %b) {
 ; CHECK-LE-P7:       # %bb.0: # %entry
 ; CHECK-LE-P7-NEXT:    xscvdpuxws f0, f1
 ; CHECK-LE-P7-NEXT:    addi r3, r1, -4
+; CHECK-LE-P7-NEXT:    addis r4, r2, .LCPI3_0 at toc@ha
+; CHECK-LE-P7-NEXT:    addi r4, r4, .LCPI3_0 at toc@l
 ; CHECK-LE-P7-NEXT:    stfiwx f0, 0, r3
-; CHECK-LE-P7-NEXT:    lfiwax f0, 0, r3
-; CHECK-LE-P7-NEXT:    addis r3, r2, .LCPI3_0 at toc@ha
-; CHECK-LE-P7-NEXT:    addi r3, r3, .LCPI3_0 at toc@l
-; CHECK-LE-P7-NEXT:    xxspltd v3, vs0, 0
-; CHECK-LE-P7-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-LE-P7-NEXT:    xxswapd v4, vs0
-; CHECK-LE-P7-NEXT:    xxspltw v3, v3, 3
-; CHECK-LE-P7-NEXT:    vperm v2, v3, v2, v4
+; CHECK-LE-P7-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-LE-P7-NEXT:    xxswapd v3, vs0
+; CHECK-LE-P7-NEXT:    lfiwzx f0, 0, r3
+; CHECK-LE-P7-NEXT:    xxspltw v4, vs0, 1
+; CHECK-LE-P7-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-LE-P7-NEXT:    blr
 ;
 ; CHECK-LE-P8-LABEL: test4:
@@ -255,12 +248,11 @@ define dso_local <4 x i32> @test4(<4 x i32> %a, float %b) {
 ; CHECK-BE-P7-NEXT:    xscvdpuxws f0, f1
 ; CHECK-BE-P7-NEXT:    addi r3, r1, -4
 ; CHECK-BE-P7-NEXT:    stfiwx f0, 0, r3
-; CHECK-BE-P7-NEXT:    lfiwax f0, 0, r3
+; CHECK-BE-P7-NEXT:    lfiwzx f0, 0, r3
 ; CHECK-BE-P7-NEXT:    addis r3, r2, .LCPI3_0 at toc@ha
 ; CHECK-BE-P7-NEXT:    addi r3, r3, .LCPI3_0 at toc@l
 ; CHECK-BE-P7-NEXT:    lxvw4x v4, 0, r3
-; CHECK-BE-P7-NEXT:    xxlor v3, f0, f0
-; CHECK-BE-P7-NEXT:    xxspltw v3, v3, 1
+; CHECK-BE-P7-NEXT:    xxspltw v3, vs0, 1
 ; CHECK-BE-P7-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-BE-P7-NEXT:    blr
 ;
diff --git a/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll
index 9159095d1decea..31d0960e19f4ef 100644
--- a/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll
+++ b/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll
@@ -2046,10 +2046,10 @@ define <16 x i8> @test_v4i32_v2i64(ptr nocapture noundef readonly %a, ptr nocapt
 ; CHECK-AIX-32-P8-LABEL: test_v4i32_v2i64:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    li r5, 4
-; CHECK-AIX-32-P8-NEXT:    lfiwax f1, 0, r4
+; CHECK-AIX-32-P8-NEXT:    lfiwzx f1, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    lxsiwzx v3, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C9(r2) # %const.0
-; CHECK-AIX-32-P8-NEXT:    lfiwax f0, r4, r5
+; CHECK-AIX-32-P8-NEXT:    lfiwzx f0, r4, r5
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    xxspltw vs1, vs1, 1
 ; CHECK-AIX-32-P8-NEXT:    xxspltw vs0, vs0, 1
@@ -2061,11 +2061,9 @@ define <16 x i8> @test_v4i32_v2i64(ptr nocapture noundef readonly %a, ptr nocapt
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    lfiwzx f0, 0, r3
 ; CHECK-AIX-32-P9-NEXT:    li r3, 4
-; CHECK-AIX-32-P9-NEXT:    lfiwax f2, 0, r4
-; CHECK-AIX-32-P9-NEXT:    xxspltw vs2, vs2, 1
-; CHECK-AIX-32-P9-NEXT:    lfiwax f1, r4, r3
+; CHECK-AIX-32-P9-NEXT:    lxvwsx vs2, 0, r4
+; CHECK-AIX-32-P9-NEXT:    lxvwsx vs1, r4, r3
 ; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C5(r2) # %const.0
-; CHECK-AIX-32-P9-NEXT:    xxspltw vs1, vs1, 1
 ; CHECK-AIX-32-P9-NEXT:    xxmrghw v2, vs2, vs1
 ; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    xxperm v2, vs0, vs1
diff --git a/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll
index be507204651df4..56c8c128ba9f40 100644
--- a/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll
+++ b/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll
@@ -1686,10 +1686,10 @@ define <2 x i64> @test_v2i64_v2i64(ptr nocapture noundef readonly %a, ptr nocapt
 ; CHECK-AIX-32-P8-LABEL: test_v2i64_v2i64:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    li r5, 4
-; CHECK-AIX-32-P8-NEXT:    lfiwax f1, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lfiwax f3, 0, r4
-; CHECK-AIX-32-P8-NEXT:    lfiwax f0, r3, r5
-; CHECK-AIX-32-P8-NEXT:    lfiwax f2, r4, r5
+; CHECK-AIX-32-P8-NEXT:    lfiwzx f1, 0, r3
+; CHECK-AIX-32-P8-NEXT:    lfiwzx f3, 0, r4
+; CHECK-AIX-32-P8-NEXT:    lfiwzx f0, r3, r5
+; CHECK-AIX-32-P8-NEXT:    lfiwzx f2, r4, r5
 ; CHECK-AIX-32-P8-NEXT:    xxspltw vs1, vs1, 1
 ; CHECK-AIX-32-P8-NEXT:    xxspltw vs3, vs3, 1
 ; CHECK-AIX-32-P8-NEXT:    xxspltw vs0, vs0, 1
@@ -1703,15 +1703,11 @@ define <2 x i64> @test_v2i64_v2i64(ptr nocapture noundef readonly %a, ptr nocapt
 ; CHECK-AIX-32-P9-LABEL: test_v2i64_v2i64:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    li r5, 4
-; CHECK-AIX-32-P9-NEXT:    lfiwax f1, 0, r3
-; CHECK-AIX-32-P9-NEXT:    xxspltw vs1, vs1, 1
-; CHECK-AIX-32-P9-NEXT:    lfiwax f0, r3, r5
-; CHECK-AIX-32-P9-NEXT:    xxspltw vs0, vs0, 1
+; CHECK-AIX-32-P9-NEXT:    lxvwsx vs1, 0, r3
+; CHECK-AIX-32-P9-NEXT:    lxvwsx vs0, r3, r5
 ; CHECK-AIX-32-P9-NEXT:    xxmrghw v2, vs1, vs0
-; CHECK-AIX-32-P9-NEXT:    lfiwax f0, r4, r5
-; CHECK-AIX-32-P9-NEXT:    lfiwax f1, 0, r4
-; CHECK-AIX-32-P9-NEXT:    xxspltw vs0, vs0, 1
-; CHECK-AIX-32-P9-NEXT:    xxspltw vs1, vs1, 1
+; CHECK-AIX-32-P9-NEXT:    lxvwsx vs0, r4, r5
+; CHECK-AIX-32-P9-NEXT:    lxvwsx vs1, 0, r4
 ; CHECK-AIX-32-P9-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-AIX-32-P9-NEXT:    xxmrghd v3, v2, vs0
 ; CHECK-AIX-32-P9-NEXT:    vaddudm v2, v3, v2
diff --git a/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll
index b5192d31252a75..c8e7b20e4b8c37 100644
--- a/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll
+++ b/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll
@@ -744,7 +744,7 @@ define void @test_v8i16_v4i32(ptr %a) {
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    lhz r4, 0(r3)
 ; CHECK-AIX-32-P8-NEXT:    sth r4, -16(r1)
-; CHECK-AIX-32-P8-NEXT:    lfiwax f0, 0, r3
+; CHECK-AIX-32-P8-NEXT:    lfiwzx f0, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    xxspltw vs0, vs0, 1
@@ -756,9 +756,8 @@ define void @test_v8i16_v4i32(ptr %a) {
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    lhz r4, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    sth r4, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    lfiwax f1, 0, r3
 ; CHECK-AIX-32-P9-NEXT:    lxv vs0, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    xxspltw vs1, vs1, 1
+; CHECK-AIX-32-P9-NEXT:    lxvwsx vs1, 0, r3
 ; CHECK-AIX-32-P9-NEXT:    xxmrghw vs0, vs0, vs1
 ; CHECK-AIX-32-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    blr
@@ -840,7 +839,7 @@ define void @test_v8i16_v2i64(ptr %a) {
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    lhz r4, 0(r3)
 ; CHECK-AIX-32-P8-NEXT:    sth r4, -16(r1)
-; CHECK-AIX-32-P8-NEXT:    lfiwax f0, 0, r3
+; CHECK-AIX-32-P8-NEXT:    lfiwzx f0, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    xxspltw vs0, vs0, 1
@@ -852,9 +851,8 @@ define void @test_v8i16_v2i64(ptr %a) {
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    lhz r4, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    sth r4, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    lfiwax f1, 0, r3
 ; CHECK-AIX-32-P9-NEXT:    lxv vs0, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    xxspltw vs1, vs1, 1
+; CHECK-AIX-32-P9-NEXT:    lxvwsx vs1, 0, r3
 ; CHECK-AIX-32-P9-NEXT:    xxmrghw vs0, vs0, vs1
 ; CHECK-AIX-32-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    blr
@@ -1025,7 +1023,7 @@ define void @test_v4i32_v8i16(ptr %a) {
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    lhz r4, 0(r3)
 ; CHECK-AIX-32-P8-NEXT:    sth r4, -16(r1)
-; CHECK-AIX-32-P8-NEXT:    lfiwax f0, 0, r3
+; CHECK-AIX-32-P8-NEXT:    lfiwzx f0, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    xxspltw vs0, vs0, 1
@@ -1037,9 +1035,8 @@ define void @test_v4i32_v8i16(ptr %a) {
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    lhz r4, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    sth r4, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    lfiwax f1, 0, r3
 ; CHECK-AIX-32-P9-NEXT:    lxv vs0, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    xxspltw vs1, vs1, 1
+; CHECK-AIX-32-P9-NEXT:    lxvwsx vs1, 0, r3
 ; CHECK-AIX-32-P9-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-AIX-32-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    blr
@@ -1116,8 +1113,8 @@ define void @test_v4i32_v2i64(ptr %a) {
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v4i32_v2i64:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    lfiwax f0, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lfiwax f1, 0, r3
+; CHECK-AIX-32-P8-NEXT:    lfiwzx f0, 0, r3
+; CHECK-AIX-32-P8-NEXT:    lfiwzx f1, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    xxspltw vs0, vs0, 1
 ; CHECK-AIX-32-P8-NEXT:    xxspltw vs1, vs1, 1
 ; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs1, vs0
@@ -1126,10 +1123,8 @@ define void @test_v4i32_v2i64(ptr %a) {
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v4i32_v2i64:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
-; CHECK-AIX-32-P9-NEXT:    lfiwax f0, 0, r3
-; CHECK-AIX-32-P9-NEXT:    xxspltw vs0, vs0, 1
-; CHECK-AIX-32-P9-NEXT:    lfiwax f1, 0, r3
-; CHECK-AIX-32-P9-NEXT:    xxspltw vs1, vs1, 1
+; CHECK-AIX-32-P9-NEXT:    lxvwsx vs0, 0, r3
+; CHECK-AIX-32-P9-NEXT:    lxvwsx vs1, 0, r3
 ; CHECK-AIX-32-P9-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-AIX-32-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    blr
@@ -1198,8 +1193,8 @@ define void @test_v2i64_v2i64(ptr %a) {
 ; CHECK-AIX-32-P8-LABEL: test_v2i64_v2i64:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    li r4, 4
-; CHECK-AIX-32-P8-NEXT:    lfiwax f1, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lfiwax f0, r3, r4
+; CHECK-AIX-32-P8-NEXT:    lfiwzx f1, 0, r3
+; CHECK-AIX-32-P8-NEXT:    lfiwzx f0, r3, r4
 ; CHECK-AIX-32-P8-NEXT:    xxspltw vs1, vs1, 1
 ; CHECK-AIX-32-P8-NEXT:    xxspltw vs0, vs0, 1
 ; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs1, vs0
@@ -1212,10 +1207,8 @@ define void @test_v2i64_v2i64(ptr %a) {
 ; CHECK-AIX-32-P9-LABEL: test_v2i64_v2i64:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    li r4, 4
-; CHECK-AIX-32-P9-NEXT:    lfiwax f1, 0, r3
-; CHECK-AIX-32-P9-NEXT:    xxspltw vs1, vs1, 1
-; CHECK-AIX-32-P9-NEXT:    lfiwax f0, r3, r4
-; CHECK-AIX-32-P9-NEXT:    xxspltw vs0, vs0, 1
+; CHECK-AIX-32-P9-NEXT:    lxvwsx vs1, 0, r3
+; CHECK-AIX-32-P9-NEXT:    lxvwsx vs0, r3, r4
 ; CHECK-AIX-32-P9-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-AIX-32-P9-NEXT:    lxvwsx vs1, 0, r3
 ; CHECK-AIX-32-P9-NEXT:    xxmrghw vs0, vs1, vs0
@@ -1289,8 +1282,8 @@ define void @test_v2i64_v4i32(ptr %a) {
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v2i64_v4i32:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    lfiwax f0, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lfiwax f1, 0, r3
+; CHECK-AIX-32-P8-NEXT:    lfiwzx f0, 0, r3
+; CHECK-AIX-32-P8-NEXT:    lfiwzx f1, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    xxspltw vs0, vs0, 1
 ; CHECK-AIX-32-P8-NEXT:    xxspltw vs1, vs1, 1
 ; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs1, vs0
@@ -1299,10 +1292,8 @@ define void @test_v2i64_v4i32(ptr %a) {
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v2i64_v4i32:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
-; CHECK-AIX-32-P9-NEXT:    lfiwax f0, 0, r3
-; CHECK-AIX-32-P9-NEXT:    xxspltw vs0, vs0, 1
-; CHECK-AIX-32-P9-NEXT:    lfiwax f1, 0, r3
-; CHECK-AIX-32-P9-NEXT:    xxspltw vs1, vs1, 1
+; CHECK-AIX-32-P9-NEXT:    lxvwsx vs0, 0, r3
+; CHECK-AIX-32-P9-NEXT:    lxvwsx vs1, 0, r3
 ; CHECK-AIX-32-P9-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-AIX-32-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    blr
@@ -1383,7 +1374,7 @@ define void @test_v2i64_v8i16(ptr %a) {
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    lhz r4, 0(r3)
 ; CHECK-AIX-32-P8-NEXT:    sth r4, -16(r1)
-; CHECK-AIX-32-P8-NEXT:    lfiwax f0, 0, r3
+; CHECK-AIX-32-P8-NEXT:    lfiwzx f0, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    xxspltw vs0, vs0, 1
@@ -1395,9 +1386,8 @@ define void @test_v2i64_v8i16(ptr %a) {
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    lhz r4, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    sth r4, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    lfiwax f1, 0, r3
 ; CHECK-AIX-32-P9-NEXT:    lxv vs0, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    xxspltw vs1, vs1, 1
+; CHECK-AIX-32-P9-NEXT:    lxvwsx vs1, 0, r3
 ; CHECK-AIX-32-P9-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-AIX-32-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    blr
diff --git a/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll
index 623c7edf521565..e1aa531db449e5 100644
--- a/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll
+++ b/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll
@@ -655,9 +655,9 @@ define void @test_v2i64_none(ptr nocapture readonly %ptr1) {
 ; CHECK-AIX-32-P8-LABEL: test_v2i64_none:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    li r4, 4
-; CHECK-AIX-32-P8-NEXT:    lfiwax f1, 0, r3
+; CHECK-AIX-32-P8-NEXT:    lfiwzx f1, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    xxlxor v4, v4, v4
-; CHECK-AIX-32-P8-NEXT:    lfiwax f0, r3, r4
+; CHECK-AIX-32-P8-NEXT:    lfiwzx f0, r3, r4
 ; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C6(r2) # %const.0
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    xxspltw vs1, vs1, 1
@@ -670,12 +670,10 @@ define void @test_v2i64_none(ptr nocapture readonly %ptr1) {
 ; CHECK-AIX-32-P9-LABEL: test_v2i64_none:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    li r4, 4
-; CHECK-AIX-32-P9-NEXT:    lfiwax f1, 0, r3
+; CHECK-AIX-32-P9-NEXT:    lxvwsx vs1, 0, r3
 ; CHECK-AIX-32-P9-NEXT:    xxlxor vs2, vs2, vs2
-; CHECK-AIX-32-P9-NEXT:    xxspltw vs1, vs1, 1
-; CHECK-AIX-32-P9-NEXT:    lfiwax f0, r3, r4
+; CHECK-AIX-32-P9-NEXT:    lxvwsx vs0, r3, r4
 ; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C5(r2) # %const.0
-; CHECK-AIX-32-P9-NEXT:    xxspltw vs0, vs0, 1
 ; CHECK-AIX-32-P9-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    xxperm vs0, vs2, vs1
@@ -845,7 +843,7 @@ define <16 x i8> @test_v8i16_v4i32(ptr %a, ptr %b) local_unnamed_addr {
 ; CHECK-AIX-32-P8-NEXT:    lhz r3, 0(r3)
 ; CHECK-AIX-32-P8-NEXT:    sth r3, -16(r1)
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
-; CHECK-AIX-32-P8-NEXT:    lfiwax f0, 0, r4
+; CHECK-AIX-32-P8-NEXT:    lfiwzx f0, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    xxspltw v2, vs0, 1
 ; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v3, v2
@@ -855,9 +853,8 @@ define <16 x i8> @test_v8i16_v4i32(ptr %a, ptr %b) local_unnamed_addr {
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    lhz r3, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    sth r3, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    lfiwax f0, 0, r4
 ; CHECK-AIX-32-P9-NEXT:    lxv v2, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    xxspltw v3, vs0, 1
+; CHECK-AIX-32-P9-NEXT:    lxvwsx v3, 0, r4
 ; CHECK-AIX-32-P9-NEXT:    vmrghh v2, v2, v3
 ; CHECK-AIX-32-P9-NEXT:    blr
 entry:
@@ -932,7 +929,7 @@ define <16 x i8> @test_v8i16_v2i64(ptr %a, ptr %b) local_unnamed_addr {
 ; CHECK-AIX-32-P8-NEXT:    lhz r3, 0(r3)
 ; CHECK-AIX-32-P8-NEXT:    sth r3, -16(r1)
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
-; CHECK-AIX-32-P8-NEXT:    lfiwax f0, 0, r4
+; CHECK-AIX-32-P8-NEXT:    lfiwzx f0, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    xxspltw v2, vs0, 1
 ; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v3, v2
@@ -942,9 +939,8 @@ define <16 x i8> @test_v8i16_v2i64(ptr %a, ptr %b) local_unnamed_addr {
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    lhz r3, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    sth r3, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    lfiwax f0, 0, r4
 ; CHECK-AIX-32-P9-NEXT:    lxv v2, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    xxspltw v3, vs0, 1
+; CHECK-AIX-32-P9-NEXT:    lxvwsx v3, 0, r4
 ; CHECK-AIX-32-P9-NEXT:    vmrghh v2, v2, v3
 ; CHECK-AIX-32-P9-NEXT:    blr
 entry:
@@ -1141,7 +1137,7 @@ define <16 x i8> @test_v4i32_v8i16(ptr %a, ptr %b) local_unnamed_addr {
 ; CHECK-AIX-32-P8-NEXT:    lhz r3, 0(r3)
 ; CHECK-AIX-32-P8-NEXT:    sth r3, -16(r1)
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
-; CHECK-AIX-32-P8-NEXT:    lfiwax f0, 0, r4
+; CHECK-AIX-32-P8-NEXT:    lfiwzx f0, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    xxspltw v2, vs0, 1
 ; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v2, v3
@@ -1151,9 +1147,8 @@ define <16 x i8> @test_v4i32_v8i16(ptr %a, ptr %b) local_unnamed_addr {
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    lhz r3, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    sth r3, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    lfiwax f0, 0, r4
 ; CHECK-AIX-32-P9-NEXT:    lxv v2, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    xxspltw v3, vs0, 1
+; CHECK-AIX-32-P9-NEXT:    lxvwsx v3, 0, r4
 ; CHECK-AIX-32-P9-NEXT:    vmrghh v2, v3, v2
 ; CHECK-AIX-32-P9-NEXT:    blr
 entry:
@@ -1508,7 +1503,7 @@ define <16 x i8> @test_v2i64_v8i16(ptr %a, ptr %b) local_unnamed_addr {
 ; CHECK-AIX-32-P8-NEXT:    lhz r3, 0(r3)
 ; CHECK-AIX-32-P8-NEXT:    sth r3, -16(r1)
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
-; CHECK-AIX-32-P8-NEXT:    lfiwax f0, 0, r4
+; CHECK-AIX-32-P8-NEXT:    lfiwzx f0, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    xxspltw v2, vs0, 1
 ; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v2, v3
@@ -1518,9 +1513,8 @@ define <16 x i8> @test_v2i64_v8i16(ptr %a, ptr %b) local_unnamed_addr {
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    lhz r3, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    sth r3, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    lfiwax f0, 0, r4
 ; CHECK-AIX-32-P9-NEXT:    lxv v2, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    xxspltw v3, vs0, 1
+; CHECK-AIX-32-P9-NEXT:    lxvwsx v3, 0, r4
 ; CHECK-AIX-32-P9-NEXT:    vmrghh v2, v3, v2
 ; CHECK-AIX-32-P9-NEXT:    blr
 entry:



More information about the llvm-commits mailing list