[llvm] [PowerPC] Improve pwr7 codegen for v4i8 load (PR #104507)

via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 15 14:10:39 PDT 2024


https://github.com/RolandF77 created https://github.com/llvm/llvm-project/pull/104507

There are no partial vector loads on pwr7 so current v4i8 codegen is an int load then store to vector sized temp and re-load as vector. Try to use lfiwax to load 32 bits into an FP reg and take advantage of VSX FP and vector reg sharing to move the result to the right vector position.

>From f398149f3d48e37ba8cd01f1266918243b957c60 Mon Sep 17 00:00:00 2001
From: Roland Froese <froese at ca.ibm.com>
Date: Thu, 15 Aug 2024 19:30:45 +0000
Subject: [PATCH] improve pwr7 v4i8 load

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   |  29 +++-
 .../build-vector-from-load-and-zeros.ll       | 152 ++++++++----------
 .../PowerPC/canonical-merge-shuffles.ll       |  53 +++---
 llvm/test/CodeGen/PowerPC/load-and-splat.ll   | 127 +++++++--------
 llvm/test/CodeGen/PowerPC/pre-inc-disable.ll  |  24 +--
 .../CodeGen/PowerPC/scalar_vector_test_4.ll   |  42 ++---
 .../CodeGen/PowerPC/test-vector-insert.ll     |  84 ++++------
 .../PowerPC/v16i8_scalar_to_vector_shuffle.ll |  30 ++--
 .../PowerPC/v2i64_scalar_to_vector_shuffle.ll |  48 +++---
 .../PowerPC/v4i32_scalar_to_vector_shuffle.ll | 144 +++++++----------
 .../PowerPC/v8i16_scalar_to_vector_shuffle.ll | 100 +++++-------
 11 files changed, 364 insertions(+), 469 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 8ff9f5a5a991e0..a926b226ba738e 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -11490,13 +11490,38 @@ SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
                                                  SelectionDAG &DAG) const {
   SDLoc dl(Op);
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  SDValue Op0 = Op.getOperand(0);
+  ReuseLoadInfo RLI;
+  if (Subtarget.hasVSX() && Op.getValueType() == MVT::v4i32 &&
+      Op0.getOpcode() == ISD::LOAD && Op0.getValueType() == MVT::i32 &&
+      Op0.hasOneUse() &&
+      canReuseLoadAddress(Op0, MVT::i32, RLI, DAG, ISD::NON_EXTLOAD)) {
+
+    MachineMemOperand *MMO =
+        MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
+                                RLI.Alignment, RLI.AAInfo, RLI.Ranges);
+    SDValue Ops[] = {RLI.Chain, RLI.Ptr};
+    SDValue Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
+                                           DAG.getVTList(MVT::f64, MVT::Other),
+                                           Ops, MVT::i32, MMO);
+    spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
+
+    SDValue ConvVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, Bits);
+    SDValue Bitcast = DAG.getBitcast(MVT::v4i32, ConvVec);
+    unsigned LowIx = Subtarget.isLittleEndian() ? 3 : 1;
+    return DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Bitcast,
+                       DAG.getConstant(LowIx, dl, MVT::i32));
+  }
+
   // Create a stack slot that is 16-byte aligned.
-  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
   EVT PtrVT = getPointerTy(DAG.getDataLayout());
   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
 
-  SDValue Val = Op.getOperand(0);
+  SDValue Val = Op0;
   EVT ValVT = Val.getValueType();
   // P10 hardware store forwarding requires that a single store contains all
   // the data for the load. P10 is able to merge a pair of adjacent stores. Try
diff --git a/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll b/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
index 6d35a7281de6b4..54312fb5cee330 100644
--- a/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
+++ b/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
@@ -27,20 +27,18 @@ define  <2 x i64> @build_v2i64_extload_0(ptr nocapture noundef readonly %p) {
 ; PWR7-LE-LABEL: build_v2i64_extload_0:
 ; PWR7-LE:       # %bb.0: # %entry
 ; PWR7-LE-NEXT:    li 4, 0
-; PWR7-LE-NEXT:    lwz 3, 0(3)
 ; PWR7-LE-NEXT:    stw 4, -16(1)
-; PWR7-LE-NEXT:    addis 4, 2, .LCPI0_0 at toc@ha
-; PWR7-LE-NEXT:    addi 4, 4, .LCPI0_0 at toc@l
-; PWR7-LE-NEXT:    stw 3, -32(1)
-; PWR7-LE-NEXT:    addi 3, 1, -32
-; PWR7-LE-NEXT:    lxvd2x 0, 0, 4
-; PWR7-LE-NEXT:    addi 4, 1, -16
-; PWR7-LE-NEXT:    lxvd2x 1, 0, 4
-; PWR7-LE-NEXT:    xxswapd 34, 0
+; PWR7-LE-NEXT:    lfiwax 0, 0, 3
+; PWR7-LE-NEXT:    addis 3, 2, .LCPI0_0 at toc@ha
+; PWR7-LE-NEXT:    addi 3, 3, .LCPI0_0 at toc@l
+; PWR7-LE-NEXT:    xxspltd 34, 0, 0
 ; PWR7-LE-NEXT:    lxvd2x 0, 0, 3
-; PWR7-LE-NEXT:    xxswapd 35, 1
+; PWR7-LE-NEXT:    addi 3, 1, -16
+; PWR7-LE-NEXT:    xxswapd 35, 0
+; PWR7-LE-NEXT:    lxvd2x 0, 0, 3
+; PWR7-LE-NEXT:    xxspltw 34, 34, 3
 ; PWR7-LE-NEXT:    xxswapd 36, 0
-; PWR7-LE-NEXT:    vperm 2, 3, 4, 2
+; PWR7-LE-NEXT:    vperm 2, 4, 2, 3
 ; PWR7-LE-NEXT:    blr
 ;
 ; PWR8-LE-LABEL: build_v2i64_extload_0:
@@ -337,17 +335,14 @@ entry:
 define <4 x i32> @build_v4i32_load_0(ptr nocapture noundef readonly %p) {
 ; PWR7-BE-LABEL: build_v4i32_load_0:
 ; PWR7-BE:       # %bb.0: # %entry
-; PWR7-BE-NEXT:    lwz 3, 0(3)
-; PWR7-BE-NEXT:    xxlxor 36, 36, 36
-; PWR7-BE-NEXT:    sldi 3, 3, 32
-; PWR7-BE-NEXT:    std 3, -32(1)
-; PWR7-BE-NEXT:    std 3, -24(1)
+; PWR7-BE-NEXT:    lfiwax 0, 0, 3
 ; PWR7-BE-NEXT:    addis 3, 2, .LCPI8_0 at toc@ha
+; PWR7-BE-NEXT:    xxlxor 36, 36, 36
 ; PWR7-BE-NEXT:    addi 3, 3, .LCPI8_0 at toc@l
-; PWR7-BE-NEXT:    lxvw4x 34, 0, 3
-; PWR7-BE-NEXT:    addi 3, 1, -32
 ; PWR7-BE-NEXT:    lxvw4x 35, 0, 3
-; PWR7-BE-NEXT:    vperm 2, 3, 4, 2
+; PWR7-BE-NEXT:    xxlor 34, 0, 0
+; PWR7-BE-NEXT:    xxspltw 34, 34, 1
+; PWR7-BE-NEXT:    vperm 2, 2, 4, 3
 ; PWR7-BE-NEXT:    blr
 ;
 ; PWR8-BE-LABEL: build_v4i32_load_0:
@@ -365,20 +360,18 @@ define <4 x i32> @build_v4i32_load_0(ptr nocapture noundef readonly %p) {
 ; PWR7-LE-LABEL: build_v4i32_load_0:
 ; PWR7-LE:       # %bb.0: # %entry
 ; PWR7-LE-NEXT:    li 4, 0
-; PWR7-LE-NEXT:    lwz 3, 0(3)
 ; PWR7-LE-NEXT:    stw 4, -16(1)
-; PWR7-LE-NEXT:    addis 4, 2, .LCPI8_0 at toc@ha
-; PWR7-LE-NEXT:    addi 4, 4, .LCPI8_0 at toc@l
-; PWR7-LE-NEXT:    stw 3, -32(1)
-; PWR7-LE-NEXT:    addi 3, 1, -32
-; PWR7-LE-NEXT:    lxvd2x 0, 0, 4
-; PWR7-LE-NEXT:    addi 4, 1, -16
-; PWR7-LE-NEXT:    lxvd2x 1, 0, 4
-; PWR7-LE-NEXT:    xxswapd 34, 0
+; PWR7-LE-NEXT:    lfiwax 0, 0, 3
+; PWR7-LE-NEXT:    addis 3, 2, .LCPI8_0 at toc@ha
+; PWR7-LE-NEXT:    addi 3, 3, .LCPI8_0 at toc@l
+; PWR7-LE-NEXT:    xxspltd 34, 0, 0
 ; PWR7-LE-NEXT:    lxvd2x 0, 0, 3
-; PWR7-LE-NEXT:    xxswapd 35, 1
+; PWR7-LE-NEXT:    addi 3, 1, -16
+; PWR7-LE-NEXT:    xxswapd 35, 0
+; PWR7-LE-NEXT:    lxvd2x 0, 0, 3
+; PWR7-LE-NEXT:    xxspltw 34, 34, 3
 ; PWR7-LE-NEXT:    xxswapd 36, 0
-; PWR7-LE-NEXT:    vperm 2, 3, 4, 2
+; PWR7-LE-NEXT:    vperm 2, 4, 2, 3
 ; PWR7-LE-NEXT:    blr
 ;
 ; PWR8-LE-LABEL: build_v4i32_load_0:
@@ -400,17 +393,14 @@ entry:
 define <4 x i32> @build_v4i32_load_1(ptr nocapture noundef readonly %p) {
 ; PWR7-BE-LABEL: build_v4i32_load_1:
 ; PWR7-BE:       # %bb.0: # %entry
-; PWR7-BE-NEXT:    lwz 3, 0(3)
-; PWR7-BE-NEXT:    xxlxor 36, 36, 36
-; PWR7-BE-NEXT:    sldi 3, 3, 32
-; PWR7-BE-NEXT:    std 3, -16(1)
-; PWR7-BE-NEXT:    std 3, -8(1)
+; PWR7-BE-NEXT:    lfiwax 0, 0, 3
 ; PWR7-BE-NEXT:    addis 3, 2, .LCPI9_0 at toc@ha
+; PWR7-BE-NEXT:    xxlxor 36, 36, 36
 ; PWR7-BE-NEXT:    addi 3, 3, .LCPI9_0 at toc@l
-; PWR7-BE-NEXT:    lxvw4x 34, 0, 3
-; PWR7-BE-NEXT:    addi 3, 1, -16
 ; PWR7-BE-NEXT:    lxvw4x 35, 0, 3
-; PWR7-BE-NEXT:    vperm 2, 4, 3, 2
+; PWR7-BE-NEXT:    xxlor 34, 0, 0
+; PWR7-BE-NEXT:    xxspltw 34, 34, 1
+; PWR7-BE-NEXT:    vperm 2, 4, 2, 3
 ; PWR7-BE-NEXT:    blr
 ;
 ; PWR8-BE-LABEL: build_v4i32_load_1:
@@ -427,20 +417,18 @@ define <4 x i32> @build_v4i32_load_1(ptr nocapture noundef readonly %p) {
 ; PWR7-LE-LABEL: build_v4i32_load_1:
 ; PWR7-LE:       # %bb.0: # %entry
 ; PWR7-LE-NEXT:    li 4, 0
-; PWR7-LE-NEXT:    lwz 3, 0(3)
-; PWR7-LE-NEXT:    stw 4, -32(1)
-; PWR7-LE-NEXT:    addis 4, 2, .LCPI9_0 at toc@ha
-; PWR7-LE-NEXT:    addi 4, 4, .LCPI9_0 at toc@l
-; PWR7-LE-NEXT:    stw 3, -16(1)
+; PWR7-LE-NEXT:    stw 4, -16(1)
+; PWR7-LE-NEXT:    lfiwax 0, 0, 3
+; PWR7-LE-NEXT:    addis 3, 2, .LCPI9_0 at toc@ha
+; PWR7-LE-NEXT:    addi 3, 3, .LCPI9_0 at toc@l
+; PWR7-LE-NEXT:    xxspltd 34, 0, 0
+; PWR7-LE-NEXT:    lxvd2x 0, 0, 3
 ; PWR7-LE-NEXT:    addi 3, 1, -16
-; PWR7-LE-NEXT:    lxvd2x 0, 0, 4
-; PWR7-LE-NEXT:    addi 4, 1, -32
-; PWR7-LE-NEXT:    lxvd2x 1, 0, 4
-; PWR7-LE-NEXT:    xxswapd 34, 0
+; PWR7-LE-NEXT:    xxswapd 35, 0
 ; PWR7-LE-NEXT:    lxvd2x 0, 0, 3
-; PWR7-LE-NEXT:    xxswapd 35, 1
+; PWR7-LE-NEXT:    xxspltw 34, 34, 3
 ; PWR7-LE-NEXT:    xxswapd 36, 0
-; PWR7-LE-NEXT:    vperm 2, 4, 3, 2
+; PWR7-LE-NEXT:    vperm 2, 2, 4, 3
 ; PWR7-LE-NEXT:    blr
 ;
 ; PWR8-LE-LABEL: build_v4i32_load_1:
@@ -463,17 +451,14 @@ entry:
 define <4 x i32> @build_v4i32_load_2(ptr nocapture noundef readonly %p) {
 ; PWR7-BE-LABEL: build_v4i32_load_2:
 ; PWR7-BE:       # %bb.0: # %entry
-; PWR7-BE-NEXT:    lwz 3, 0(3)
-; PWR7-BE-NEXT:    xxlxor 36, 36, 36
-; PWR7-BE-NEXT:    sldi 3, 3, 32
-; PWR7-BE-NEXT:    std 3, -16(1)
-; PWR7-BE-NEXT:    std 3, -8(1)
+; PWR7-BE-NEXT:    lfiwax 0, 0, 3
 ; PWR7-BE-NEXT:    addis 3, 2, .LCPI10_0 at toc@ha
+; PWR7-BE-NEXT:    xxlxor 36, 36, 36
 ; PWR7-BE-NEXT:    addi 3, 3, .LCPI10_0 at toc@l
-; PWR7-BE-NEXT:    lxvw4x 34, 0, 3
-; PWR7-BE-NEXT:    addi 3, 1, -16
 ; PWR7-BE-NEXT:    lxvw4x 35, 0, 3
-; PWR7-BE-NEXT:    vperm 2, 4, 3, 2
+; PWR7-BE-NEXT:    xxlor 34, 0, 0
+; PWR7-BE-NEXT:    xxspltw 34, 34, 1
+; PWR7-BE-NEXT:    vperm 2, 4, 2, 3
 ; PWR7-BE-NEXT:    blr
 ;
 ; PWR8-BE-LABEL: build_v4i32_load_2:
@@ -491,20 +476,18 @@ define <4 x i32> @build_v4i32_load_2(ptr nocapture noundef readonly %p) {
 ; PWR7-LE-LABEL: build_v4i32_load_2:
 ; PWR7-LE:       # %bb.0: # %entry
 ; PWR7-LE-NEXT:    li 4, 0
-; PWR7-LE-NEXT:    lwz 3, 0(3)
-; PWR7-LE-NEXT:    stw 4, -32(1)
-; PWR7-LE-NEXT:    addis 4, 2, .LCPI10_0 at toc@ha
-; PWR7-LE-NEXT:    addi 4, 4, .LCPI10_0 at toc@l
-; PWR7-LE-NEXT:    stw 3, -16(1)
+; PWR7-LE-NEXT:    stw 4, -16(1)
+; PWR7-LE-NEXT:    lfiwax 0, 0, 3
+; PWR7-LE-NEXT:    addis 3, 2, .LCPI10_0 at toc@ha
+; PWR7-LE-NEXT:    addi 3, 3, .LCPI10_0 at toc@l
+; PWR7-LE-NEXT:    xxspltd 34, 0, 0
+; PWR7-LE-NEXT:    lxvd2x 0, 0, 3
 ; PWR7-LE-NEXT:    addi 3, 1, -16
-; PWR7-LE-NEXT:    lxvd2x 0, 0, 4
-; PWR7-LE-NEXT:    addi 4, 1, -32
-; PWR7-LE-NEXT:    lxvd2x 1, 0, 4
-; PWR7-LE-NEXT:    xxswapd 34, 0
+; PWR7-LE-NEXT:    xxswapd 35, 0
 ; PWR7-LE-NEXT:    lxvd2x 0, 0, 3
-; PWR7-LE-NEXT:    xxswapd 35, 1
+; PWR7-LE-NEXT:    xxspltw 34, 34, 3
 ; PWR7-LE-NEXT:    xxswapd 36, 0
-; PWR7-LE-NEXT:    vperm 2, 4, 3, 2
+; PWR7-LE-NEXT:    vperm 2, 2, 4, 3
 ; PWR7-LE-NEXT:    blr
 ;
 ; PWR8-LE-LABEL: build_v4i32_load_2:
@@ -526,17 +509,14 @@ entry:
 define <4 x i32> @build_v4i32_load_3(ptr nocapture noundef readonly %p) {
 ; PWR7-BE-LABEL: build_v4i32_load_3:
 ; PWR7-BE:       # %bb.0: # %entry
-; PWR7-BE-NEXT:    lwz 3, 0(3)
-; PWR7-BE-NEXT:    xxlxor 36, 36, 36
-; PWR7-BE-NEXT:    sldi 3, 3, 32
-; PWR7-BE-NEXT:    std 3, -16(1)
-; PWR7-BE-NEXT:    std 3, -8(1)
+; PWR7-BE-NEXT:    lfiwax 0, 0, 3
 ; PWR7-BE-NEXT:    addis 3, 2, .LCPI11_0 at toc@ha
+; PWR7-BE-NEXT:    xxlxor 36, 36, 36
 ; PWR7-BE-NEXT:    addi 3, 3, .LCPI11_0 at toc@l
-; PWR7-BE-NEXT:    lxvw4x 34, 0, 3
-; PWR7-BE-NEXT:    addi 3, 1, -16
 ; PWR7-BE-NEXT:    lxvw4x 35, 0, 3
-; PWR7-BE-NEXT:    vperm 2, 4, 3, 2
+; PWR7-BE-NEXT:    xxlor 34, 0, 0
+; PWR7-BE-NEXT:    xxspltw 34, 34, 1
+; PWR7-BE-NEXT:    vperm 2, 4, 2, 3
 ; PWR7-BE-NEXT:    blr
 ;
 ; PWR8-BE-LABEL: build_v4i32_load_3:
@@ -553,20 +533,18 @@ define <4 x i32> @build_v4i32_load_3(ptr nocapture noundef readonly %p) {
 ; PWR7-LE-LABEL: build_v4i32_load_3:
 ; PWR7-LE:       # %bb.0: # %entry
 ; PWR7-LE-NEXT:    li 4, 0
-; PWR7-LE-NEXT:    lwz 3, 0(3)
-; PWR7-LE-NEXT:    stw 4, -32(1)
-; PWR7-LE-NEXT:    addis 4, 2, .LCPI11_0 at toc@ha
-; PWR7-LE-NEXT:    addi 4, 4, .LCPI11_0 at toc@l
-; PWR7-LE-NEXT:    stw 3, -16(1)
+; PWR7-LE-NEXT:    stw 4, -16(1)
+; PWR7-LE-NEXT:    lfiwax 0, 0, 3
+; PWR7-LE-NEXT:    addis 3, 2, .LCPI11_0 at toc@ha
+; PWR7-LE-NEXT:    addi 3, 3, .LCPI11_0 at toc@l
+; PWR7-LE-NEXT:    xxspltd 34, 0, 0
+; PWR7-LE-NEXT:    lxvd2x 0, 0, 3
 ; PWR7-LE-NEXT:    addi 3, 1, -16
-; PWR7-LE-NEXT:    lxvd2x 0, 0, 4
-; PWR7-LE-NEXT:    addi 4, 1, -32
-; PWR7-LE-NEXT:    lxvd2x 1, 0, 4
-; PWR7-LE-NEXT:    xxswapd 34, 0
+; PWR7-LE-NEXT:    xxswapd 35, 0
 ; PWR7-LE-NEXT:    lxvd2x 0, 0, 3
-; PWR7-LE-NEXT:    xxswapd 35, 1
+; PWR7-LE-NEXT:    xxspltw 34, 34, 3
 ; PWR7-LE-NEXT:    xxswapd 36, 0
-; PWR7-LE-NEXT:    vperm 2, 4, 3, 2
+; PWR7-LE-NEXT:    vperm 2, 2, 4, 3
 ; PWR7-LE-NEXT:    blr
 ;
 ; PWR8-LE-LABEL: build_v4i32_load_3:
diff --git a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
index c26f98c5b0495d..5e73999c80b5ac 100644
--- a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
+++ b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
@@ -536,15 +536,12 @@ define dso_local <8 x i16> @testmrglb3(ptr nocapture readonly %a) local_unnamed_
 ;
 ; P8-AIX-32-LABEL: testmrglb3:
 ; P8-AIX-32:       # %bb.0: # %entry
-; P8-AIX-32-NEXT:    lwz r4, 4(r3)
+; P8-AIX-32-NEXT:    li r4, 4
+; P8-AIX-32-NEXT:    lfiwax f1, 0, r3
 ; P8-AIX-32-NEXT:    xxlxor v3, v3, v3
-; P8-AIX-32-NEXT:    stw r4, -16(r1)
-; P8-AIX-32-NEXT:    lwz r3, 0(r3)
-; P8-AIX-32-NEXT:    stw r3, -32(r1)
-; P8-AIX-32-NEXT:    addi r3, r1, -16
-; P8-AIX-32-NEXT:    lxvw4x vs0, 0, r3
-; P8-AIX-32-NEXT:    addi r3, r1, -32
-; P8-AIX-32-NEXT:    lxvw4x vs1, 0, r3
+; P8-AIX-32-NEXT:    lfiwax f0, r3, r4
+; P8-AIX-32-NEXT:    xxspltw vs1, vs1, 1
+; P8-AIX-32-NEXT:    xxspltw vs0, vs0, 1
 ; P8-AIX-32-NEXT:    xxmrghw v2, vs1, vs0
 ; P8-AIX-32-NEXT:    vmrghb v2, v3, v2
 ; P8-AIX-32-NEXT:    blr
@@ -852,17 +849,15 @@ define dso_local <16 x i8> @no_RAUW_in_combine_during_legalize(ptr nocapture rea
 ;
 ; P8-AIX-32-LABEL: no_RAUW_in_combine_during_legalize:
 ; P8-AIX-32:       # %bb.0: # %entry
+; P8-AIX-32-NEXT:    li r5, 0
 ; P8-AIX-32-NEXT:    slwi r4, r4, 2
 ; P8-AIX-32-NEXT:    xxlxor v3, v3, v3
-; P8-AIX-32-NEXT:    lwzx r3, r3, r4
-; P8-AIX-32-NEXT:    li r4, 0
-; P8-AIX-32-NEXT:    stw r4, -32(r1)
-; P8-AIX-32-NEXT:    stw r3, -16(r1)
-; P8-AIX-32-NEXT:    addi r3, r1, -32
-; P8-AIX-32-NEXT:    lxvw4x vs0, 0, r3
+; P8-AIX-32-NEXT:    stw r5, -16(r1)
+; P8-AIX-32-NEXT:    lfiwax f0, r3, r4
 ; P8-AIX-32-NEXT:    addi r3, r1, -16
 ; P8-AIX-32-NEXT:    lxvw4x vs1, 0, r3
-; P8-AIX-32-NEXT:    xxmrghw v2, vs0, vs1
+; P8-AIX-32-NEXT:    xxspltw vs0, vs0, 1
+; P8-AIX-32-NEXT:    xxmrghw v2, vs1, vs0
 ; P8-AIX-32-NEXT:    vmrghb v2, v2, v3
 ; P8-AIX-32-NEXT:    blr
 entry:
@@ -1026,14 +1021,11 @@ define dso_local <2 x i64> @testSplat8(ptr nocapture readonly %ptr) local_unname
 ;
 ; P8-AIX-32-LABEL: testSplat8:
 ; P8-AIX-32:       # %bb.0: # %entry
-; P8-AIX-32-NEXT:    lwz r4, 4(r3)
-; P8-AIX-32-NEXT:    stw r4, -16(r1)
-; P8-AIX-32-NEXT:    lwz r3, 0(r3)
-; P8-AIX-32-NEXT:    stw r3, -32(r1)
-; P8-AIX-32-NEXT:    addi r3, r1, -16
-; P8-AIX-32-NEXT:    lxvw4x vs0, 0, r3
-; P8-AIX-32-NEXT:    addi r3, r1, -32
-; P8-AIX-32-NEXT:    lxvw4x vs1, 0, r3
+; P8-AIX-32-NEXT:    li r4, 4
+; P8-AIX-32-NEXT:    lfiwax f1, 0, r3
+; P8-AIX-32-NEXT:    lfiwax f0, r3, r4
+; P8-AIX-32-NEXT:    xxspltw vs1, vs1, 1
+; P8-AIX-32-NEXT:    xxspltw vs0, vs0, 1
 ; P8-AIX-32-NEXT:    xxmrghw vs0, vs1, vs0
 ; P8-AIX-32-NEXT:    xxmrghd v2, vs0, vs0
 ; P8-AIX-32-NEXT:    blr
@@ -1081,17 +1073,14 @@ define <2 x i64> @testSplati64_0(ptr nocapture readonly %ptr) #0 {
 ;
 ; P8-AIX-32-LABEL: testSplati64_0:
 ; P8-AIX-32:       # %bb.0: # %entry
-; P8-AIX-32-NEXT:    lwz r4, 0(r3)
-; P8-AIX-32-NEXT:    lwz r3, 4(r3)
-; P8-AIX-32-NEXT:    stw r3, -16(r1)
+; P8-AIX-32-NEXT:    li r4, 4
+; P8-AIX-32-NEXT:    lfiwax f0, r3, r4
+; P8-AIX-32-NEXT:    xxspltw v2, vs0, 1
+; P8-AIX-32-NEXT:    lfiwax f0, 0, r3
 ; P8-AIX-32-NEXT:    lwz r3, L..C3(r2) # %const.0
-; P8-AIX-32-NEXT:    stw r4, -32(r1)
-; P8-AIX-32-NEXT:    lxvw4x v2, 0, r3
-; P8-AIX-32-NEXT:    addi r3, r1, -16
-; P8-AIX-32-NEXT:    lxvw4x v3, 0, r3
-; P8-AIX-32-NEXT:    addi r3, r1, -32
 ; P8-AIX-32-NEXT:    lxvw4x v4, 0, r3
-; P8-AIX-32-NEXT:    vperm v2, v4, v3, v2
+; P8-AIX-32-NEXT:    xxspltw v3, vs0, 1
+; P8-AIX-32-NEXT:    vperm v2, v3, v2, v4
 ; P8-AIX-32-NEXT:    blr
 entry:
   %0 = load <1 x i64>, ptr %ptr, align 8
diff --git a/llvm/test/CodeGen/PowerPC/load-and-splat.ll b/llvm/test/CodeGen/PowerPC/load-and-splat.ll
index bc68ad2a67bf5d..38f47838a42fbf 100644
--- a/llvm/test/CodeGen/PowerPC/load-and-splat.ll
+++ b/llvm/test/CodeGen/PowerPC/load-and-splat.ll
@@ -208,47 +208,45 @@ define dso_local void @test4(ptr nocapture %c, ptr nocapture readonly %a) local_
 ;
 ; P9-AIX32-LABEL: test4:
 ; P9-AIX32:       # %bb.0: # %entry
-; P9-AIX32-NEXT:    lwz r5, 24(r4)
-; P9-AIX32-NEXT:    lwz r4, 28(r4)
-; P9-AIX32-NEXT:    stw r4, -16(r1)
+; P9-AIX32-NEXT:    li r5, 28
+; P9-AIX32-NEXT:    lfiwax f0, r4, r5
+; P9-AIX32-NEXT:    li r5, 24
+; P9-AIX32-NEXT:    lfiwax f1, r4, r5
 ; P9-AIX32-NEXT:    lwz r4, L..C0(r2) # %const.0
-; P9-AIX32-NEXT:    stw r5, -32(r1)
-; P9-AIX32-NEXT:    lxv vs1, -16(r1)
-; P9-AIX32-NEXT:    lxv vs2, -32(r1)
-; P9-AIX32-NEXT:    lxv vs0, 0(r4)
-; P9-AIX32-NEXT:    xxperm vs1, vs2, vs0
-; P9-AIX32-NEXT:    stxv vs1, 0(r3)
+; P9-AIX32-NEXT:    xxspltw vs0, vs0, 1
+; P9-AIX32-NEXT:    lxv vs2, 0(r4)
+; P9-AIX32-NEXT:    xxspltw vs1, vs1, 1
+; P9-AIX32-NEXT:    xxperm vs0, vs1, vs2
+; P9-AIX32-NEXT:    stxv vs0, 0(r3)
 ; P9-AIX32-NEXT:    blr
 ;
 ; P8-AIX32-LABEL: test4:
 ; P8-AIX32:       # %bb.0: # %entry
-; P8-AIX32-NEXT:    lwz r5, 24(r4)
-; P8-AIX32-NEXT:    lwz r4, 28(r4)
-; P8-AIX32-NEXT:    stw r4, -16(r1)
+; P8-AIX32-NEXT:    li r5, 28
+; P8-AIX32-NEXT:    lfiwax f0, r4, r5
+; P8-AIX32-NEXT:    li r5, 24
+; P8-AIX32-NEXT:    xxspltw v2, vs0, 1
+; P8-AIX32-NEXT:    lfiwax f0, r4, r5
 ; P8-AIX32-NEXT:    lwz r4, L..C0(r2) # %const.0
-; P8-AIX32-NEXT:    stw r5, -32(r1)
-; P8-AIX32-NEXT:    lxvw4x v2, 0, r4
-; P8-AIX32-NEXT:    addi r4, r1, -16
-; P8-AIX32-NEXT:    lxvw4x v3, 0, r4
-; P8-AIX32-NEXT:    addi r4, r1, -32
 ; P8-AIX32-NEXT:    lxvw4x v4, 0, r4
-; P8-AIX32-NEXT:    vperm v2, v4, v3, v2
+; P8-AIX32-NEXT:    xxspltw v3, vs0, 1
+; P8-AIX32-NEXT:    vperm v2, v3, v2, v4
 ; P8-AIX32-NEXT:    stxvw4x v2, 0, r3
 ; P8-AIX32-NEXT:    blr
 ;
 ; P7-AIX32-LABEL: test4:
 ; P7-AIX32:       # %bb.0: # %entry
-; P7-AIX32-NEXT:    lwz r5, 24(r4)
-; P7-AIX32-NEXT:    lwz r4, 28(r4)
-; P7-AIX32-NEXT:    stw r4, -16(r1)
+; P7-AIX32-NEXT:    li r5, 28
+; P7-AIX32-NEXT:    lfiwax f0, r4, r5
+; P7-AIX32-NEXT:    li r5, 24
+; P7-AIX32-NEXT:    xxlor v2, f0, f0
+; P7-AIX32-NEXT:    xxspltw v2, v2, 1
+; P7-AIX32-NEXT:    lfiwax f0, r4, r5
 ; P7-AIX32-NEXT:    lwz r4, L..C0(r2) # %const.0
-; P7-AIX32-NEXT:    stw r5, -32(r1)
-; P7-AIX32-NEXT:    lxvw4x v2, 0, r4
-; P7-AIX32-NEXT:    addi r4, r1, -16
-; P7-AIX32-NEXT:    lxvw4x v3, 0, r4
-; P7-AIX32-NEXT:    addi r4, r1, -32
 ; P7-AIX32-NEXT:    lxvw4x v4, 0, r4
-; P7-AIX32-NEXT:    vperm v2, v4, v3, v2
+; P7-AIX32-NEXT:    xxlor v3, f0, f0
+; P7-AIX32-NEXT:    xxspltw v3, v3, 1
+; P7-AIX32-NEXT:    vperm v2, v3, v2, v4
 ; P7-AIX32-NEXT:    stxvw4x v2, 0, r3
 ; P7-AIX32-NEXT:    blr
 entry:
@@ -362,47 +360,43 @@ define void @test6(ptr %a, ptr %in) {
 ;
 ; P9-AIX32-LABEL: test6:
 ; P9-AIX32:       # %bb.0: # %entry
-; P9-AIX32-NEXT:    lwz r4, 0(r4)
 ; P9-AIX32-NEXT:    li r5, 0
-; P9-AIX32-NEXT:    stw r5, -32(r1)
-; P9-AIX32-NEXT:    lxv vs1, -32(r1)
-; P9-AIX32-NEXT:    stw r4, -16(r1)
-; P9-AIX32-NEXT:    lwz r4, L..C2(r2) # %const.0
+; P9-AIX32-NEXT:    stw r5, -16(r1)
+; P9-AIX32-NEXT:    lwz r5, L..C2(r2) # %const.0
+; P9-AIX32-NEXT:    lfiwax f1, 0, r4
 ; P9-AIX32-NEXT:    lxv vs2, -16(r1)
-; P9-AIX32-NEXT:    lxv vs0, 0(r4)
-; P9-AIX32-NEXT:    xxperm vs2, vs1, vs0
-; P9-AIX32-NEXT:    stxv vs2, 0(r3)
+; P9-AIX32-NEXT:    lxv vs0, 0(r5)
+; P9-AIX32-NEXT:    xxspltw vs1, vs1, 1
+; P9-AIX32-NEXT:    xxperm vs1, vs2, vs0
+; P9-AIX32-NEXT:    stxv vs1, 0(r3)
 ; P9-AIX32-NEXT:    blr
 ;
 ; P8-AIX32-LABEL: test6:
 ; P8-AIX32:       # %bb.0: # %entry
-; P8-AIX32-NEXT:    lwz r4, 0(r4)
 ; P8-AIX32-NEXT:    li r5, 0
-; P8-AIX32-NEXT:    stw r5, -32(r1)
-; P8-AIX32-NEXT:    stw r4, -16(r1)
+; P8-AIX32-NEXT:    stw r5, -16(r1)
+; P8-AIX32-NEXT:    lfiwax f0, 0, r4
 ; P8-AIX32-NEXT:    lwz r4, L..C2(r2) # %const.0
-; P8-AIX32-NEXT:    lxvw4x v2, 0, r4
-; P8-AIX32-NEXT:    addi r4, r1, -32
 ; P8-AIX32-NEXT:    lxvw4x v3, 0, r4
 ; P8-AIX32-NEXT:    addi r4, r1, -16
 ; P8-AIX32-NEXT:    lxvw4x v4, 0, r4
-; P8-AIX32-NEXT:    vperm v2, v3, v4, v2
+; P8-AIX32-NEXT:    xxspltw v2, vs0, 1
+; P8-AIX32-NEXT:    vperm v2, v4, v2, v3
 ; P8-AIX32-NEXT:    stxvw4x v2, 0, r3
 ; P8-AIX32-NEXT:    blr
 ;
 ; P7-AIX32-LABEL: test6:
 ; P7-AIX32:       # %bb.0: # %entry
-; P7-AIX32-NEXT:    lwz r4, 0(r4)
 ; P7-AIX32-NEXT:    li r5, 0
-; P7-AIX32-NEXT:    stw r5, -32(r1)
-; P7-AIX32-NEXT:    stw r4, -16(r1)
+; P7-AIX32-NEXT:    stw r5, -16(r1)
+; P7-AIX32-NEXT:    lfiwax f0, 0, r4
 ; P7-AIX32-NEXT:    lwz r4, L..C2(r2) # %const.0
-; P7-AIX32-NEXT:    lxvw4x v2, 0, r4
-; P7-AIX32-NEXT:    addi r4, r1, -32
 ; P7-AIX32-NEXT:    lxvw4x v3, 0, r4
 ; P7-AIX32-NEXT:    addi r4, r1, -16
+; P7-AIX32-NEXT:    xxlor v2, f0, f0
 ; P7-AIX32-NEXT:    lxvw4x v4, 0, r4
-; P7-AIX32-NEXT:    vperm v2, v3, v4, v2
+; P7-AIX32-NEXT:    xxspltw v2, v2, 1
+; P7-AIX32-NEXT:    vperm v2, v4, v2, v3
 ; P7-AIX32-NEXT:    stxvw4x v2, 0, r3
 ; P7-AIX32-NEXT:    blr
 entry:
@@ -810,40 +804,35 @@ define <16 x i8> @unadjusted_lxvdsx(ptr %s, ptr %t) {
 ;
 ; P9-AIX32-LABEL: unadjusted_lxvdsx:
 ; P9-AIX32:       # %bb.0: # %entry
-; P9-AIX32-NEXT:    lwz r4, 4(r3)
-; P9-AIX32-NEXT:    stw r4, -16(r1)
-; P9-AIX32-NEXT:    lwz r3, 0(r3)
-; P9-AIX32-NEXT:    lxv vs0, -16(r1)
-; P9-AIX32-NEXT:    stw r3, -32(r1)
-; P9-AIX32-NEXT:    lxv vs1, -32(r1)
+; P9-AIX32-NEXT:    li r4, 4
+; P9-AIX32-NEXT:    lfiwax f1, 0, r3
+; P9-AIX32-NEXT:    xxspltw vs1, vs1, 1
+; P9-AIX32-NEXT:    lfiwax f0, r3, r4
+; P9-AIX32-NEXT:    xxspltw vs0, vs0, 1
 ; P9-AIX32-NEXT:    xxmrghw vs0, vs1, vs0
 ; P9-AIX32-NEXT:    xxmrghd v2, vs0, vs0
 ; P9-AIX32-NEXT:    blr
 ;
 ; P8-AIX32-LABEL: unadjusted_lxvdsx:
 ; P8-AIX32:       # %bb.0: # %entry
-; P8-AIX32-NEXT:    lwz r4, 4(r3)
-; P8-AIX32-NEXT:    stw r4, -16(r1)
-; P8-AIX32-NEXT:    lwz r3, 0(r3)
-; P8-AIX32-NEXT:    stw r3, -32(r1)
-; P8-AIX32-NEXT:    addi r3, r1, -16
-; P8-AIX32-NEXT:    lxvw4x vs0, 0, r3
-; P8-AIX32-NEXT:    addi r3, r1, -32
-; P8-AIX32-NEXT:    lxvw4x vs1, 0, r3
+; P8-AIX32-NEXT:    li r4, 4
+; P8-AIX32-NEXT:    lfiwax f1, 0, r3
+; P8-AIX32-NEXT:    lfiwax f0, r3, r4
+; P8-AIX32-NEXT:    xxspltw vs1, vs1, 1
+; P8-AIX32-NEXT:    xxspltw vs0, vs0, 1
 ; P8-AIX32-NEXT:    xxmrghw vs0, vs1, vs0
 ; P8-AIX32-NEXT:    xxmrghd v2, vs0, vs0
 ; P8-AIX32-NEXT:    blr
 ;
 ; P7-AIX32-LABEL: unadjusted_lxvdsx:
 ; P7-AIX32:       # %bb.0: # %entry
-; P7-AIX32-NEXT:    lwz r4, 4(r3)
-; P7-AIX32-NEXT:    stw r4, -16(r1)
-; P7-AIX32-NEXT:    lwz r3, 0(r3)
-; P7-AIX32-NEXT:    stw r3, -32(r1)
-; P7-AIX32-NEXT:    addi r3, r1, -16
-; P7-AIX32-NEXT:    lxvw4x vs0, 0, r3
-; P7-AIX32-NEXT:    addi r3, r1, -32
-; P7-AIX32-NEXT:    lxvw4x vs1, 0, r3
+; P7-AIX32-NEXT:    li r4, 4
+; P7-AIX32-NEXT:    lfiwax f1, 0, r3
+; P7-AIX32-NEXT:    lfiwax f0, r3, r4
+; P7-AIX32-NEXT:    xxlor v2, f0, f0
+; P7-AIX32-NEXT:    xxspltw vs0, v2, 1
+; P7-AIX32-NEXT:    xxlor v2, f1, f1
+; P7-AIX32-NEXT:    xxspltw vs1, v2, 1
 ; P7-AIX32-NEXT:    xxmrghw vs0, vs1, vs0
 ; P7-AIX32-NEXT:    xxmrghd v2, vs0, vs0
 ; P7-AIX32-NEXT:    blr
diff --git a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
index 4da36c9af5c101..db7a0292e036b7 100644
--- a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
+++ b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
@@ -85,23 +85,23 @@ define void @test64(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
 ;
 ; P9BE-AIX32-LABEL: test64:
 ; P9BE-AIX32:       # %bb.0: # %entry
-; P9BE-AIX32-NEXT:    lwzux 4, 3, 4
+; P9BE-AIX32-NEXT:    add 5, 3, 4
+; P9BE-AIX32-NEXT:    lfiwax 0, 3, 4
+; P9BE-AIX32-NEXT:    li 3, 4
 ; P9BE-AIX32-NEXT:    xxlxor 2, 2, 2
 ; P9BE-AIX32-NEXT:    vspltisw 4, 8
-; P9BE-AIX32-NEXT:    stw 4, -48(1)
+; P9BE-AIX32-NEXT:    xxspltw 0, 0, 1
+; P9BE-AIX32-NEXT:    lfiwax 1, 5, 3
+; P9BE-AIX32-NEXT:    lwz 3, L..C0(2) # %const.0
 ; P9BE-AIX32-NEXT:    vadduwm 4, 4, 4
-; P9BE-AIX32-NEXT:    lwz 4, 4(3)
-; P9BE-AIX32-NEXT:    lxv 0, -48(1)
-; P9BE-AIX32-NEXT:    stw 4, -32(1)
-; P9BE-AIX32-NEXT:    lwz 4, L..C0(2) # %const.0
-; P9BE-AIX32-NEXT:    lxv 1, -32(1)
-; P9BE-AIX32-NEXT:    lwz 3, 8(3)
-; P9BE-AIX32-NEXT:    stw 3, -16(1)
-; P9BE-AIX32-NEXT:    lwz 3, L..C1(2) # %const.1
+; P9BE-AIX32-NEXT:    xxspltw 1, 1, 1
 ; P9BE-AIX32-NEXT:    xxmrghw 2, 0, 1
-; P9BE-AIX32-NEXT:    lxv 0, 0(4)
+; P9BE-AIX32-NEXT:    lxv 0, 0(3)
+; P9BE-AIX32-NEXT:    li 3, 8
 ; P9BE-AIX32-NEXT:    xxperm 2, 2, 0
-; P9BE-AIX32-NEXT:    lxv 0, -16(1)
+; P9BE-AIX32-NEXT:    lfiwax 0, 5, 3
+; P9BE-AIX32-NEXT:    lwz 3, L..C1(2) # %const.1
+; P9BE-AIX32-NEXT:    xxspltw 0, 0, 1
 ; P9BE-AIX32-NEXT:    xxmrghw 3, 1, 0
 ; P9BE-AIX32-NEXT:    lxv 0, 0(3)
 ; P9BE-AIX32-NEXT:    xxperm 3, 3, 0
diff --git a/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll b/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll
index 25e1baa28f7ef3..be2a5d039d04ff 100644
--- a/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll
+++ b/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll
@@ -73,13 +73,11 @@ define <4 x i32> @s2v_test1(ptr nocapture readonly %int32, <4 x i32> %vec)  {
 ;
 ; P8-AIX-32-LABEL: s2v_test1:
 ; P8-AIX-32:       # %bb.0: # %entry
-; P8-AIX-32-NEXT:    lwz r3, 0(r3)
-; P8-AIX-32-NEXT:    stw r3, -16(r1)
+; P8-AIX-32-NEXT:    lfiwax f0, 0, r3
 ; P8-AIX-32-NEXT:    lwz r3, L..C0(r2) # %const.0
-; P8-AIX-32-NEXT:    lxvw4x v3, 0, r3
-; P8-AIX-32-NEXT:    addi r3, r1, -16
 ; P8-AIX-32-NEXT:    lxvw4x v4, 0, r3
-; P8-AIX-32-NEXT:    vperm v2, v4, v2, v3
+; P8-AIX-32-NEXT:    xxspltw v3, vs0, 1
+; P8-AIX-32-NEXT:    vperm v2, v3, v2, v4
 ; P8-AIX-32-NEXT:    blr
 entry:
   %0 = load i32, ptr %int32, align 4
@@ -142,13 +140,12 @@ define <4 x i32> @s2v_test2(ptr nocapture readonly %int32, <4 x i32> %vec)  {
 ;
 ; P8-AIX-32-LABEL: s2v_test2:
 ; P8-AIX-32:       # %bb.0: # %entry
-; P8-AIX-32-NEXT:    lwz r3, 4(r3)
-; P8-AIX-32-NEXT:    stw r3, -16(r1)
+; P8-AIX-32-NEXT:    addi r3, r3, 4
+; P8-AIX-32-NEXT:    lfiwax f0, 0, r3
 ; P8-AIX-32-NEXT:    lwz r3, L..C1(r2) # %const.0
-; P8-AIX-32-NEXT:    lxvw4x v3, 0, r3
-; P8-AIX-32-NEXT:    addi r3, r1, -16
 ; P8-AIX-32-NEXT:    lxvw4x v4, 0, r3
-; P8-AIX-32-NEXT:    vperm v2, v4, v2, v3
+; P8-AIX-32-NEXT:    xxspltw v3, vs0, 1
+; P8-AIX-32-NEXT:    vperm v2, v3, v2, v4
 ; P8-AIX-32-NEXT:    blr
 entry:
   %arrayidx = getelementptr inbounds i32, ptr %int32, i64 1
@@ -224,13 +221,11 @@ define <4 x i32> @s2v_test3(ptr nocapture readonly %int32, <4 x i32> %vec, i32 s
 ; P8-AIX-32-LABEL: s2v_test3:
 ; P8-AIX-32:       # %bb.0: # %entry
 ; P8-AIX-32-NEXT:    slwi r4, r4, 2
-; P8-AIX-32-NEXT:    lwzx r3, r3, r4
-; P8-AIX-32-NEXT:    stw r3, -16(r1)
+; P8-AIX-32-NEXT:    lfiwax f0, r3, r4
 ; P8-AIX-32-NEXT:    lwz r3, L..C2(r2) # %const.0
-; P8-AIX-32-NEXT:    lxvw4x v3, 0, r3
-; P8-AIX-32-NEXT:    addi r3, r1, -16
 ; P8-AIX-32-NEXT:    lxvw4x v4, 0, r3
-; P8-AIX-32-NEXT:    vperm v2, v4, v2, v3
+; P8-AIX-32-NEXT:    xxspltw v3, vs0, 1
+; P8-AIX-32-NEXT:    vperm v2, v3, v2, v4
 ; P8-AIX-32-NEXT:    blr
 entry:
   %idxprom = sext i32 %Idx to i64
@@ -295,13 +290,12 @@ define <4 x i32> @s2v_test4(ptr nocapture readonly %int32, <4 x i32> %vec)  {
 ;
 ; P8-AIX-32-LABEL: s2v_test4:
 ; P8-AIX-32:       # %bb.0: # %entry
-; P8-AIX-32-NEXT:    lwz r3, 4(r3)
-; P8-AIX-32-NEXT:    stw r3, -16(r1)
+; P8-AIX-32-NEXT:    addi r3, r3, 4
+; P8-AIX-32-NEXT:    lfiwax f0, 0, r3
 ; P8-AIX-32-NEXT:    lwz r3, L..C3(r2) # %const.0
-; P8-AIX-32-NEXT:    lxvw4x v3, 0, r3
-; P8-AIX-32-NEXT:    addi r3, r1, -16
 ; P8-AIX-32-NEXT:    lxvw4x v4, 0, r3
-; P8-AIX-32-NEXT:    vperm v2, v4, v2, v3
+; P8-AIX-32-NEXT:    xxspltw v3, vs0, 1
+; P8-AIX-32-NEXT:    vperm v2, v3, v2, v4
 ; P8-AIX-32-NEXT:    blr
 entry:
   %arrayidx = getelementptr inbounds i32, ptr %int32, i64 1
@@ -362,13 +356,11 @@ define <4 x i32> @s2v_test5(<4 x i32> %vec, ptr nocapture readonly %ptr1)  {
 ;
 ; P8-AIX-32-LABEL: s2v_test5:
 ; P8-AIX-32:       # %bb.0: # %entry
-; P8-AIX-32-NEXT:    lwz r3, 0(r3)
-; P8-AIX-32-NEXT:    stw r3, -16(r1)
+; P8-AIX-32-NEXT:    lfiwax f0, 0, r3
 ; P8-AIX-32-NEXT:    lwz r3, L..C4(r2) # %const.0
-; P8-AIX-32-NEXT:    lxvw4x v3, 0, r3
-; P8-AIX-32-NEXT:    addi r3, r1, -16
 ; P8-AIX-32-NEXT:    lxvw4x v4, 0, r3
-; P8-AIX-32-NEXT:    vperm v2, v4, v2, v3
+; P8-AIX-32-NEXT:    xxspltw v3, vs0, 1
+; P8-AIX-32-NEXT:    vperm v2, v3, v2, v4
 ; P8-AIX-32-NEXT:    blr
 entry:
   %0 = load i32, ptr %ptr1, align 4
diff --git a/llvm/test/CodeGen/PowerPC/test-vector-insert.ll b/llvm/test/CodeGen/PowerPC/test-vector-insert.ll
index 73b4ad8a507b82..00ebd279e6f667 100644
--- a/llvm/test/CodeGen/PowerPC/test-vector-insert.ll
+++ b/llvm/test/CodeGen/PowerPC/test-vector-insert.ll
@@ -26,16 +26,14 @@ define dso_local <4 x i32> @test(<4 x i32> %a, double %b) {
 ; CHECK-LE-P7-NEXT:    xscvdpsxws f0, f1
 ; CHECK-LE-P7-NEXT:    addi r3, r1, -4
 ; CHECK-LE-P7-NEXT:    stfiwx f0, 0, r3
-; CHECK-LE-P7-NEXT:    lwz r3, -4(r1)
-; CHECK-LE-P7-NEXT:    stw r3, -32(r1)
+; CHECK-LE-P7-NEXT:    lfiwax f0, 0, r3
 ; CHECK-LE-P7-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
 ; CHECK-LE-P7-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
-; CHECK-LE-P7-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-LE-P7-NEXT:    addi r3, r1, -32
-; CHECK-LE-P7-NEXT:    xxswapd v3, vs0
+; CHECK-LE-P7-NEXT:    xxspltd v3, vs0, 0
 ; CHECK-LE-P7-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-LE-P7-NEXT:    xxswapd v4, vs0
-; CHECK-LE-P7-NEXT:    vperm v2, v4, v2, v3
+; CHECK-LE-P7-NEXT:    xxspltw v3, v3, 3
+; CHECK-LE-P7-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-LE-P7-NEXT:    blr
 ;
 ; CHECK-LE-P8-LABEL: test:
@@ -59,16 +57,13 @@ define dso_local <4 x i32> @test(<4 x i32> %a, double %b) {
 ; CHECK-BE-P7-NEXT:    xscvdpsxws f0, f1
 ; CHECK-BE-P7-NEXT:    addi r3, r1, -4
 ; CHECK-BE-P7-NEXT:    stfiwx f0, 0, r3
-; CHECK-BE-P7-NEXT:    lwz r3, -4(r1)
-; CHECK-BE-P7-NEXT:    sldi r3, r3, 32
-; CHECK-BE-P7-NEXT:    std r3, -32(r1)
-; CHECK-BE-P7-NEXT:    std r3, -24(r1)
+; CHECK-BE-P7-NEXT:    lfiwax f0, 0, r3
 ; CHECK-BE-P7-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
 ; CHECK-BE-P7-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
-; CHECK-BE-P7-NEXT:    lxvw4x v3, 0, r3
-; CHECK-BE-P7-NEXT:    addi r3, r1, -32
 ; CHECK-BE-P7-NEXT:    lxvw4x v4, 0, r3
-; CHECK-BE-P7-NEXT:    vperm v2, v2, v4, v3
+; CHECK-BE-P7-NEXT:    xxlor v3, f0, f0
+; CHECK-BE-P7-NEXT:    xxspltw v3, v3, 1
+; CHECK-BE-P7-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-BE-P7-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test:
@@ -97,16 +92,14 @@ define dso_local <4 x i32> @test2(<4 x i32> %a, float %b) {
 ; CHECK-LE-P7-NEXT:    xscvdpsxws f0, f1
 ; CHECK-LE-P7-NEXT:    addi r3, r1, -4
 ; CHECK-LE-P7-NEXT:    stfiwx f0, 0, r3
-; CHECK-LE-P7-NEXT:    lwz r3, -4(r1)
-; CHECK-LE-P7-NEXT:    stw r3, -32(r1)
+; CHECK-LE-P7-NEXT:    lfiwax f0, 0, r3
 ; CHECK-LE-P7-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
 ; CHECK-LE-P7-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
-; CHECK-LE-P7-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-LE-P7-NEXT:    addi r3, r1, -32
-; CHECK-LE-P7-NEXT:    xxswapd v3, vs0
+; CHECK-LE-P7-NEXT:    xxspltd v3, vs0, 0
 ; CHECK-LE-P7-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-LE-P7-NEXT:    xxswapd v4, vs0
-; CHECK-LE-P7-NEXT:    vperm v2, v4, v2, v3
+; CHECK-LE-P7-NEXT:    xxspltw v3, v3, 3
+; CHECK-LE-P7-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-LE-P7-NEXT:    blr
 ;
 ; CHECK-LE-P8-LABEL: test2:
@@ -130,16 +123,13 @@ define dso_local <4 x i32> @test2(<4 x i32> %a, float %b) {
 ; CHECK-BE-P7-NEXT:    xscvdpsxws f0, f1
 ; CHECK-BE-P7-NEXT:    addi r3, r1, -4
 ; CHECK-BE-P7-NEXT:    stfiwx f0, 0, r3
-; CHECK-BE-P7-NEXT:    lwz r3, -4(r1)
-; CHECK-BE-P7-NEXT:    sldi r3, r3, 32
-; CHECK-BE-P7-NEXT:    std r3, -32(r1)
-; CHECK-BE-P7-NEXT:    std r3, -24(r1)
+; CHECK-BE-P7-NEXT:    lfiwax f0, 0, r3
 ; CHECK-BE-P7-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
 ; CHECK-BE-P7-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
-; CHECK-BE-P7-NEXT:    lxvw4x v3, 0, r3
-; CHECK-BE-P7-NEXT:    addi r3, r1, -32
 ; CHECK-BE-P7-NEXT:    lxvw4x v4, 0, r3
-; CHECK-BE-P7-NEXT:    vperm v2, v2, v4, v3
+; CHECK-BE-P7-NEXT:    xxlor v3, f0, f0
+; CHECK-BE-P7-NEXT:    xxspltw v3, v3, 1
+; CHECK-BE-P7-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-BE-P7-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test2:
@@ -168,16 +158,14 @@ define dso_local <4 x i32> @test3(<4 x i32> %a, double %b) {
 ; CHECK-LE-P7-NEXT:    xscvdpuxws f0, f1
 ; CHECK-LE-P7-NEXT:    addi r3, r1, -4
 ; CHECK-LE-P7-NEXT:    stfiwx f0, 0, r3
-; CHECK-LE-P7-NEXT:    lwz r3, -4(r1)
-; CHECK-LE-P7-NEXT:    stw r3, -32(r1)
+; CHECK-LE-P7-NEXT:    lfiwax f0, 0, r3
 ; CHECK-LE-P7-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
 ; CHECK-LE-P7-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
-; CHECK-LE-P7-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-LE-P7-NEXT:    addi r3, r1, -32
-; CHECK-LE-P7-NEXT:    xxswapd v3, vs0
+; CHECK-LE-P7-NEXT:    xxspltd v3, vs0, 0
 ; CHECK-LE-P7-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-LE-P7-NEXT:    xxswapd v4, vs0
-; CHECK-LE-P7-NEXT:    vperm v2, v4, v2, v3
+; CHECK-LE-P7-NEXT:    xxspltw v3, v3, 3
+; CHECK-LE-P7-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-LE-P7-NEXT:    blr
 ;
 ; CHECK-LE-P8-LABEL: test3:
@@ -201,16 +189,13 @@ define dso_local <4 x i32> @test3(<4 x i32> %a, double %b) {
 ; CHECK-BE-P7-NEXT:    xscvdpuxws f0, f1
 ; CHECK-BE-P7-NEXT:    addi r3, r1, -4
 ; CHECK-BE-P7-NEXT:    stfiwx f0, 0, r3
-; CHECK-BE-P7-NEXT:    lwz r3, -4(r1)
-; CHECK-BE-P7-NEXT:    sldi r3, r3, 32
-; CHECK-BE-P7-NEXT:    std r3, -32(r1)
-; CHECK-BE-P7-NEXT:    std r3, -24(r1)
+; CHECK-BE-P7-NEXT:    lfiwax f0, 0, r3
 ; CHECK-BE-P7-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
 ; CHECK-BE-P7-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
-; CHECK-BE-P7-NEXT:    lxvw4x v3, 0, r3
-; CHECK-BE-P7-NEXT:    addi r3, r1, -32
 ; CHECK-BE-P7-NEXT:    lxvw4x v4, 0, r3
-; CHECK-BE-P7-NEXT:    vperm v2, v2, v4, v3
+; CHECK-BE-P7-NEXT:    xxlor v3, f0, f0
+; CHECK-BE-P7-NEXT:    xxspltw v3, v3, 1
+; CHECK-BE-P7-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-BE-P7-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test3:
@@ -239,16 +224,14 @@ define dso_local <4 x i32> @test4(<4 x i32> %a, float %b) {
 ; CHECK-LE-P7-NEXT:    xscvdpuxws f0, f1
 ; CHECK-LE-P7-NEXT:    addi r3, r1, -4
 ; CHECK-LE-P7-NEXT:    stfiwx f0, 0, r3
-; CHECK-LE-P7-NEXT:    lwz r3, -4(r1)
-; CHECK-LE-P7-NEXT:    stw r3, -32(r1)
+; CHECK-LE-P7-NEXT:    lfiwax f0, 0, r3
 ; CHECK-LE-P7-NEXT:    addis r3, r2, .LCPI3_0 at toc@ha
 ; CHECK-LE-P7-NEXT:    addi r3, r3, .LCPI3_0 at toc@l
-; CHECK-LE-P7-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-LE-P7-NEXT:    addi r3, r1, -32
-; CHECK-LE-P7-NEXT:    xxswapd v3, vs0
+; CHECK-LE-P7-NEXT:    xxspltd v3, vs0, 0
 ; CHECK-LE-P7-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-LE-P7-NEXT:    xxswapd v4, vs0
-; CHECK-LE-P7-NEXT:    vperm v2, v4, v2, v3
+; CHECK-LE-P7-NEXT:    xxspltw v3, v3, 3
+; CHECK-LE-P7-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-LE-P7-NEXT:    blr
 ;
 ; CHECK-LE-P8-LABEL: test4:
@@ -272,16 +255,13 @@ define dso_local <4 x i32> @test4(<4 x i32> %a, float %b) {
 ; CHECK-BE-P7-NEXT:    xscvdpuxws f0, f1
 ; CHECK-BE-P7-NEXT:    addi r3, r1, -4
 ; CHECK-BE-P7-NEXT:    stfiwx f0, 0, r3
-; CHECK-BE-P7-NEXT:    lwz r3, -4(r1)
-; CHECK-BE-P7-NEXT:    sldi r3, r3, 32
-; CHECK-BE-P7-NEXT:    std r3, -32(r1)
-; CHECK-BE-P7-NEXT:    std r3, -24(r1)
+; CHECK-BE-P7-NEXT:    lfiwax f0, 0, r3
 ; CHECK-BE-P7-NEXT:    addis r3, r2, .LCPI3_0 at toc@ha
 ; CHECK-BE-P7-NEXT:    addi r3, r3, .LCPI3_0 at toc@l
-; CHECK-BE-P7-NEXT:    lxvw4x v3, 0, r3
-; CHECK-BE-P7-NEXT:    addi r3, r1, -32
 ; CHECK-BE-P7-NEXT:    lxvw4x v4, 0, r3
-; CHECK-BE-P7-NEXT:    vperm v2, v2, v4, v3
+; CHECK-BE-P7-NEXT:    xxlor v3, f0, f0
+; CHECK-BE-P7-NEXT:    xxspltw v3, v3, 1
+; CHECK-BE-P7-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-BE-P7-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test4:
diff --git a/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll
index 11cc8abd2c7fa3..9159095d1decea 100644
--- a/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll
+++ b/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll
@@ -2045,31 +2045,27 @@ define <16 x i8> @test_v4i32_v2i64(ptr nocapture noundef readonly %a, ptr nocapt
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v4i32_v2i64:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    lxsiwzx v2, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lwz r3, 4(r4)
-; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
-; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r4)
-; CHECK-AIX-32-P8-NEXT:    stw r3, -32(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r3
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
+; CHECK-AIX-32-P8-NEXT:    li r5, 4
+; CHECK-AIX-32-P8-NEXT:    lfiwax f1, 0, r4
+; CHECK-AIX-32-P8-NEXT:    lxsiwzx v3, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C9(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT:    lfiwax f0, r4, r5
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
-; CHECK-AIX-32-P8-NEXT:    xxmrghw v3, vs1, vs0
-; CHECK-AIX-32-P8-NEXT:    vperm v2, v2, v3, v4
+; CHECK-AIX-32-P8-NEXT:    xxspltw vs1, vs1, 1
+; CHECK-AIX-32-P8-NEXT:    xxspltw vs0, vs0, 1
+; CHECK-AIX-32-P8-NEXT:    xxmrghw v2, vs1, vs0
+; CHECK-AIX-32-P8-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v4i32_v2i64:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    lfiwzx f0, 0, r3
-; CHECK-AIX-32-P9-NEXT:    lwz r3, 4(r4)
-; CHECK-AIX-32-P9-NEXT:    stw r3, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r4)
-; CHECK-AIX-32-P9-NEXT:    lxv vs1, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    stw r3, -32(r1)
+; CHECK-AIX-32-P9-NEXT:    li r3, 4
+; CHECK-AIX-32-P9-NEXT:    lfiwax f2, 0, r4
+; CHECK-AIX-32-P9-NEXT:    xxspltw vs2, vs2, 1
+; CHECK-AIX-32-P9-NEXT:    lfiwax f1, r4, r3
 ; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C5(r2) # %const.0
-; CHECK-AIX-32-P9-NEXT:    lxv vs2, -32(r1)
+; CHECK-AIX-32-P9-NEXT:    xxspltw vs1, vs1, 1
 ; CHECK-AIX-32-P9-NEXT:    xxmrghw v2, vs2, vs1
 ; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    xxperm v2, vs0, vs1
diff --git a/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll
index 8bb71e073e8146..be507204651df4 100644
--- a/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll
+++ b/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll
@@ -1685,43 +1685,33 @@ define <2 x i64> @test_v2i64_v2i64(ptr nocapture noundef readonly %a, ptr nocapt
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v2i64_v2i64:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    lwz r5, 4(r3)
-; CHECK-AIX-32-P8-NEXT:    stw r5, -16(r1)
-; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    stw r3, -32(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r3
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lwz r3, 4(r4)
-; CHECK-AIX-32-P8-NEXT:    stw r3, -48(r1)
-; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r4)
-; CHECK-AIX-32-P8-NEXT:    stw r3, -64(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -48
+; CHECK-AIX-32-P8-NEXT:    li r5, 4
+; CHECK-AIX-32-P8-NEXT:    lfiwax f1, 0, r3
+; CHECK-AIX-32-P8-NEXT:    lfiwax f3, 0, r4
+; CHECK-AIX-32-P8-NEXT:    lfiwax f0, r3, r5
+; CHECK-AIX-32-P8-NEXT:    lfiwax f2, r4, r5
+; CHECK-AIX-32-P8-NEXT:    xxspltw vs1, vs1, 1
+; CHECK-AIX-32-P8-NEXT:    xxspltw vs3, vs3, 1
+; CHECK-AIX-32-P8-NEXT:    xxspltw vs0, vs0, 1
+; CHECK-AIX-32-P8-NEXT:    xxspltw vs2, vs2, 1
 ; CHECK-AIX-32-P8-NEXT:    xxmrghw v2, vs1, vs0
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r3
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -64
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
-; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs1, vs0
+; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs3, vs2
 ; CHECK-AIX-32-P8-NEXT:    xxmrghd v3, v2, vs0
 ; CHECK-AIX-32-P8-NEXT:    vaddudm v2, v3, v2
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v2i64_v2i64:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
-; CHECK-AIX-32-P9-NEXT:    lwz r5, 4(r3)
-; CHECK-AIX-32-P9-NEXT:    stw r5, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    lxv vs0, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    stw r3, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    lwz r3, 4(r4)
-; CHECK-AIX-32-P9-NEXT:    lxv vs1, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    stw r3, -48(r1)
-; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r4)
+; CHECK-AIX-32-P9-NEXT:    li r5, 4
+; CHECK-AIX-32-P9-NEXT:    lfiwax f1, 0, r3
+; CHECK-AIX-32-P9-NEXT:    xxspltw vs1, vs1, 1
+; CHECK-AIX-32-P9-NEXT:    lfiwax f0, r3, r5
+; CHECK-AIX-32-P9-NEXT:    xxspltw vs0, vs0, 1
 ; CHECK-AIX-32-P9-NEXT:    xxmrghw v2, vs1, vs0
-; CHECK-AIX-32-P9-NEXT:    lxv vs0, -48(r1)
-; CHECK-AIX-32-P9-NEXT:    stw r3, -64(r1)
-; CHECK-AIX-32-P9-NEXT:    lxv vs1, -64(r1)
+; CHECK-AIX-32-P9-NEXT:    lfiwax f0, r4, r5
+; CHECK-AIX-32-P9-NEXT:    lfiwax f1, 0, r4
+; CHECK-AIX-32-P9-NEXT:    xxspltw vs0, vs0, 1
+; CHECK-AIX-32-P9-NEXT:    xxspltw vs1, vs1, 1
 ; CHECK-AIX-32-P9-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-AIX-32-P9-NEXT:    xxmrghd v3, v2, vs0
 ; CHECK-AIX-32-P9-NEXT:    vaddudm v2, v3, v2
diff --git a/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll
index 4ca55d276647bf..b5192d31252a75 100644
--- a/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll
+++ b/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll
@@ -743,25 +743,22 @@ define void @test_v8i16_v4i32(ptr %a) {
 ; CHECK-AIX-32-P8-LABEL: test_v8i16_v4i32:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    lhz r4, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    sth r4, -32(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r4
-; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT:    sth r4, -16(r1)
+; CHECK-AIX-32-P8-NEXT:    lfiwax f0, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
-; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs0, vs1
+; CHECK-AIX-32-P8-NEXT:    xxspltw vs0, vs0, 1
+; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-AIX-32-P8-NEXT:    stxvw4x vs0, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v8i16_v4i32:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    lhz r4, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    sth r4, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    lxv vs0, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    stw r3, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    lxv vs1, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    sth r4, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    lfiwax f1, 0, r3
+; CHECK-AIX-32-P9-NEXT:    lxv vs0, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    xxspltw vs1, vs1, 1
 ; CHECK-AIX-32-P9-NEXT:    xxmrghw vs0, vs0, vs1
 ; CHECK-AIX-32-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    blr
@@ -842,25 +839,22 @@ define void @test_v8i16_v2i64(ptr %a) {
 ; CHECK-AIX-32-P8-LABEL: test_v8i16_v2i64:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    lhz r4, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    sth r4, -32(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r4
-; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT:    sth r4, -16(r1)
+; CHECK-AIX-32-P8-NEXT:    lfiwax f0, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
-; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs0, vs1
+; CHECK-AIX-32-P8-NEXT:    xxspltw vs0, vs0, 1
+; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-AIX-32-P8-NEXT:    stxvw4x vs0, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v8i16_v2i64:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    lhz r4, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    sth r4, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    lxv vs0, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    stw r3, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    lxv vs1, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    sth r4, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    lfiwax f1, 0, r3
+; CHECK-AIX-32-P9-NEXT:    lxv vs0, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    xxspltw vs1, vs1, 1
 ; CHECK-AIX-32-P9-NEXT:    xxmrghw vs0, vs0, vs1
 ; CHECK-AIX-32-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    blr
@@ -1030,25 +1024,22 @@ define void @test_v4i32_v8i16(ptr %a) {
 ; CHECK-AIX-32-P8-LABEL: test_v4i32_v8i16:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    lhz r4, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    sth r4, -32(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r4
-; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT:    sth r4, -16(r1)
+; CHECK-AIX-32-P8-NEXT:    lfiwax f0, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
-; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs1, vs0
+; CHECK-AIX-32-P8-NEXT:    xxspltw vs0, vs0, 1
+; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs0, vs1
 ; CHECK-AIX-32-P8-NEXT:    stxvw4x vs0, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v4i32_v8i16:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    lhz r4, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    sth r4, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    lxv vs0, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    stw r3, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    lxv vs1, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    sth r4, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    lfiwax f1, 0, r3
+; CHECK-AIX-32-P9-NEXT:    lxv vs0, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    xxspltw vs1, vs1, 1
 ; CHECK-AIX-32-P9-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-AIX-32-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    blr
@@ -1125,26 +1116,20 @@ define void @test_v4i32_v2i64(ptr %a) {
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v4i32_v2i64:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    lwz r4, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    stw r3, -32(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
-; CHECK-AIX-32-P8-NEXT:    stw r4, -16(r1)
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r3
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
+; CHECK-AIX-32-P8-NEXT:    lfiwax f0, 0, r3
+; CHECK-AIX-32-P8-NEXT:    lfiwax f1, 0, r3
+; CHECK-AIX-32-P8-NEXT:    xxspltw vs0, vs0, 1
+; CHECK-AIX-32-P8-NEXT:    xxspltw vs1, vs1, 1
 ; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-AIX-32-P8-NEXT:    stxvw4x vs0, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v4i32_v2i64:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
-; CHECK-AIX-32-P9-NEXT:    lwz r4, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    stw r4, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    stw r3, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    lxv vs0, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    lxv vs1, -32(r1)
+; CHECK-AIX-32-P9-NEXT:    lfiwax f0, 0, r3
+; CHECK-AIX-32-P9-NEXT:    xxspltw vs0, vs0, 1
+; CHECK-AIX-32-P9-NEXT:    lfiwax f1, 0, r3
+; CHECK-AIX-32-P9-NEXT:    xxspltw vs1, vs1, 1
 ; CHECK-AIX-32-P9-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-AIX-32-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    blr
@@ -1212,14 +1197,11 @@ define void @test_v2i64_v2i64(ptr %a) {
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v2i64_v2i64:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    lwz r4, 4(r3)
-; CHECK-AIX-32-P8-NEXT:    stw r4, -16(r1)
-; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    stw r3, -32(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r3
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
+; CHECK-AIX-32-P8-NEXT:    li r4, 4
+; CHECK-AIX-32-P8-NEXT:    lfiwax f1, 0, r3
+; CHECK-AIX-32-P8-NEXT:    lfiwax f0, r3, r4
+; CHECK-AIX-32-P8-NEXT:    xxspltw vs1, vs1, 1
+; CHECK-AIX-32-P8-NEXT:    xxspltw vs0, vs0, 1
 ; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-AIX-32-P8-NEXT:    lfiwzx f1, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    xxspltw vs1, vs1, 1
@@ -1229,12 +1211,11 @@ define void @test_v2i64_v2i64(ptr %a) {
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v2i64_v2i64:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
-; CHECK-AIX-32-P9-NEXT:    lwz r4, 4(r3)
-; CHECK-AIX-32-P9-NEXT:    stw r4, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    lxv vs0, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    stw r3, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    lxv vs1, -32(r1)
+; CHECK-AIX-32-P9-NEXT:    li r4, 4
+; CHECK-AIX-32-P9-NEXT:    lfiwax f1, 0, r3
+; CHECK-AIX-32-P9-NEXT:    xxspltw vs1, vs1, 1
+; CHECK-AIX-32-P9-NEXT:    lfiwax f0, r3, r4
+; CHECK-AIX-32-P9-NEXT:    xxspltw vs0, vs0, 1
 ; CHECK-AIX-32-P9-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-AIX-32-P9-NEXT:    lxvwsx vs1, 0, r3
 ; CHECK-AIX-32-P9-NEXT:    xxmrghw vs0, vs1, vs0
@@ -1308,26 +1289,20 @@ define void @test_v2i64_v4i32(ptr %a) {
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v2i64_v4i32:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    lwz r4, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
-; CHECK-AIX-32-P8-NEXT:    stw r4, -32(r1)
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r3
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
+; CHECK-AIX-32-P8-NEXT:    lfiwax f0, 0, r3
+; CHECK-AIX-32-P8-NEXT:    lfiwax f1, 0, r3
+; CHECK-AIX-32-P8-NEXT:    xxspltw vs0, vs0, 1
+; CHECK-AIX-32-P8-NEXT:    xxspltw vs1, vs1, 1
 ; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-AIX-32-P8-NEXT:    stxvw4x vs0, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v2i64_v4i32:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
-; CHECK-AIX-32-P9-NEXT:    lwz r4, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    stw r3, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    stw r4, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    lxv vs0, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    lxv vs1, -32(r1)
+; CHECK-AIX-32-P9-NEXT:    lfiwax f0, 0, r3
+; CHECK-AIX-32-P9-NEXT:    xxspltw vs0, vs0, 1
+; CHECK-AIX-32-P9-NEXT:    lfiwax f1, 0, r3
+; CHECK-AIX-32-P9-NEXT:    xxspltw vs1, vs1, 1
 ; CHECK-AIX-32-P9-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-AIX-32-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    blr
@@ -1407,25 +1382,22 @@ define void @test_v2i64_v8i16(ptr %a) {
 ; CHECK-AIX-32-P8-LABEL: test_v2i64_v8i16:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    lhz r4, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    sth r4, -32(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r4
-; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT:    sth r4, -16(r1)
+; CHECK-AIX-32-P8-NEXT:    lfiwax f0, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
-; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs1, vs0
+; CHECK-AIX-32-P8-NEXT:    xxspltw vs0, vs0, 1
+; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs0, vs1
 ; CHECK-AIX-32-P8-NEXT:    stxvw4x vs0, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v2i64_v8i16:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    lhz r4, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    sth r4, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    lxv vs0, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    stw r3, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    lxv vs1, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    sth r4, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    lfiwax f1, 0, r3
+; CHECK-AIX-32-P9-NEXT:    lxv vs0, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    xxspltw vs1, vs1, 1
 ; CHECK-AIX-32-P9-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-AIX-32-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    blr
diff --git a/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll
index 201bc5be545068..623c7edf521565 100644
--- a/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll
+++ b/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll
@@ -654,17 +654,14 @@ define void @test_v2i64_none(ptr nocapture readonly %ptr1) {
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v2i64_none:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    lwz r4, 4(r3)
+; CHECK-AIX-32-P8-NEXT:    li r4, 4
+; CHECK-AIX-32-P8-NEXT:    lfiwax f1, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    xxlxor v4, v4, v4
-; CHECK-AIX-32-P8-NEXT:    stw r4, -16(r1)
-; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    stw r3, -32(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r3
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
+; CHECK-AIX-32-P8-NEXT:    lfiwax f0, r3, r4
 ; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C6(r2) # %const.0
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
+; CHECK-AIX-32-P8-NEXT:    xxspltw vs1, vs1, 1
+; CHECK-AIX-32-P8-NEXT:    xxspltw vs0, vs0, 1
 ; CHECK-AIX-32-P8-NEXT:    xxmrghw v2, vs1, vs0
 ; CHECK-AIX-32-P8-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-AIX-32-P8-NEXT:    stxvw4x v2, 0, r3
@@ -672,14 +669,13 @@ define void @test_v2i64_none(ptr nocapture readonly %ptr1) {
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v2i64_none:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
-; CHECK-AIX-32-P9-NEXT:    lwz r4, 4(r3)
+; CHECK-AIX-32-P9-NEXT:    li r4, 4
+; CHECK-AIX-32-P9-NEXT:    lfiwax f1, 0, r3
 ; CHECK-AIX-32-P9-NEXT:    xxlxor vs2, vs2, vs2
-; CHECK-AIX-32-P9-NEXT:    stw r4, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    lxv vs0, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    stw r3, -32(r1)
+; CHECK-AIX-32-P9-NEXT:    xxspltw vs1, vs1, 1
+; CHECK-AIX-32-P9-NEXT:    lfiwax f0, r3, r4
 ; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C5(r2) # %const.0
-; CHECK-AIX-32-P9-NEXT:    lxv vs1, -32(r1)
+; CHECK-AIX-32-P9-NEXT:    xxspltw vs0, vs0, 1
 ; CHECK-AIX-32-P9-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    xxperm vs0, vs2, vs1
@@ -847,24 +843,21 @@ define <16 x i8> @test_v8i16_v4i32(ptr %a, ptr %b) local_unnamed_addr {
 ; CHECK-AIX-32-P8-LABEL: test_v8i16_v4i32:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    lhz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    sth r3, -32(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r4)
-; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT:    sth r3, -16(r1)
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT:    lfiwax f0, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
-; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v2, v3
+; CHECK-AIX-32-P8-NEXT:    xxspltw v2, vs0, 1
+; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v3, v2
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v8i16_v4i32:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    lhz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    sth r3, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r4)
-; CHECK-AIX-32-P9-NEXT:    lxv v2, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    stw r3, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    lxv v3, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    sth r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    lfiwax f0, 0, r4
+; CHECK-AIX-32-P9-NEXT:    lxv v2, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    xxspltw v3, vs0, 1
 ; CHECK-AIX-32-P9-NEXT:    vmrghh v2, v2, v3
 ; CHECK-AIX-32-P9-NEXT:    blr
 entry:
@@ -937,24 +930,21 @@ define <16 x i8> @test_v8i16_v2i64(ptr %a, ptr %b) local_unnamed_addr {
 ; CHECK-AIX-32-P8-LABEL: test_v8i16_v2i64:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    lhz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    sth r3, -32(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r4)
-; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT:    sth r3, -16(r1)
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT:    lfiwax f0, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
-; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v2, v3
+; CHECK-AIX-32-P8-NEXT:    xxspltw v2, vs0, 1
+; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v3, v2
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v8i16_v2i64:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    lhz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    sth r3, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r4)
-; CHECK-AIX-32-P9-NEXT:    lxv v2, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    stw r3, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    lxv v3, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    sth r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    lfiwax f0, 0, r4
+; CHECK-AIX-32-P9-NEXT:    lxv v2, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    xxspltw v3, vs0, 1
 ; CHECK-AIX-32-P9-NEXT:    vmrghh v2, v2, v3
 ; CHECK-AIX-32-P9-NEXT:    blr
 entry:
@@ -1149,24 +1139,21 @@ define <16 x i8> @test_v4i32_v8i16(ptr %a, ptr %b) local_unnamed_addr {
 ; CHECK-AIX-32-P8-LABEL: test_v4i32_v8i16:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    lhz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    sth r3, -32(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r4)
-; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT:    sth r3, -16(r1)
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT:    lfiwax f0, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
-; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v3, v2
+; CHECK-AIX-32-P8-NEXT:    xxspltw v2, vs0, 1
+; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v2, v3
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v4i32_v8i16:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    lhz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    sth r3, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r4)
-; CHECK-AIX-32-P9-NEXT:    lxv v2, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    stw r3, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    lxv v3, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    sth r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    lfiwax f0, 0, r4
+; CHECK-AIX-32-P9-NEXT:    lxv v2, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    xxspltw v3, vs0, 1
 ; CHECK-AIX-32-P9-NEXT:    vmrghh v2, v3, v2
 ; CHECK-AIX-32-P9-NEXT:    blr
 entry:
@@ -1519,24 +1506,21 @@ define <16 x i8> @test_v2i64_v8i16(ptr %a, ptr %b) local_unnamed_addr {
 ; CHECK-AIX-32-P8-LABEL: test_v2i64_v8i16:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    lhz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    sth r3, -32(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r4)
-; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT:    sth r3, -16(r1)
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT:    lfiwax f0, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
-; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v3, v2
+; CHECK-AIX-32-P8-NEXT:    xxspltw v2, vs0, 1
+; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v2, v3
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v2i64_v8i16:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    lhz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    sth r3, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r4)
-; CHECK-AIX-32-P9-NEXT:    lxv v2, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    stw r3, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    lxv v3, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    sth r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    lfiwax f0, 0, r4
+; CHECK-AIX-32-P9-NEXT:    lxv v2, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    xxspltw v3, vs0, 1
 ; CHECK-AIX-32-P9-NEXT:    vmrghh v2, v3, v2
 ; CHECK-AIX-32-P9-NEXT:    blr
 entry:



More information about the llvm-commits mailing list