[llvm] 26ba186 - [PowerPC] Improve pwr7 codegen for v4i8 load (#104507)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 4 09:55:30 PDT 2024
Author: RolandF77
Date: 2024-09-04T12:55:27-04:00
New Revision: 26ba186bd0a22fac7d08ed566b00c03236b6b7a9
URL: https://github.com/llvm/llvm-project/commit/26ba186bd0a22fac7d08ed566b00c03236b6b7a9
DIFF: https://github.com/llvm/llvm-project/commit/26ba186bd0a22fac7d08ed566b00c03236b6b7a9.diff
LOG: [PowerPC] Improve pwr7 codegen for v4i8 load (#104507)
There are no partial vector loads on pwr7 so current v4i8 codegen is an
int load then store to vector sized temp and re-load as vector. Try to
use lfiwax to load 32 bits into an FP reg and take advantage of VSX FP
and vector reg sharing to move the result to the right vector position.
Added:
Modified:
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
llvm/test/CodeGen/PowerPC/load-and-splat.ll
llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll
llvm/test/CodeGen/PowerPC/test-vector-insert.ll
llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll
llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll
llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll
llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 8a0858e2462520..f1bd14d7ee0116 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -11492,13 +11492,33 @@ SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ SDValue Op0 = Op.getOperand(0);
+ ReuseLoadInfo RLI;
+ if (Subtarget.hasLFIWAX() && Subtarget.hasVSX() &&
+ Op.getValueType() == MVT::v4i32 && Op0.getOpcode() == ISD::LOAD &&
+ Op0.getValueType() == MVT::i32 && Op0.hasOneUse() &&
+ canReuseLoadAddress(Op0, MVT::i32, RLI, DAG, ISD::NON_EXTLOAD)) {
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
+ RLI.Alignment, RLI.AAInfo, RLI.Ranges);
+ SDValue Ops[] = {RLI.Chain, RLI.Ptr, DAG.getValueType(Op.getValueType())};
+ SDValue Bits = DAG.getMemIntrinsicNode(
+ PPCISD::LD_SPLAT, dl, DAG.getVTList(MVT::v4i32, MVT::Other), Ops,
+ MVT::i32, MMO);
+ spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
+ return Bits.getValue(0);
+ }
+
// Create a stack slot that is 16-byte aligned.
- MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
- SDValue Val = Op.getOperand(0);
+ SDValue Val = Op0;
EVT ValVT = Val.getValueType();
// P10 hardware store forwarding requires that a single store contains all
// the data for the load. P10 is able to merge a pair of adjacent stores. Try
diff --git a/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll b/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
index 6d35a7281de6b4..fba6725e2b2a3f 100644
--- a/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
+++ b/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
@@ -27,20 +27,17 @@ define <2 x i64> @build_v2i64_extload_0(ptr nocapture noundef readonly %p) {
; PWR7-LE-LABEL: build_v2i64_extload_0:
; PWR7-LE: # %bb.0: # %entry
; PWR7-LE-NEXT: li 4, 0
-; PWR7-LE-NEXT: lwz 3, 0(3)
; PWR7-LE-NEXT: stw 4, -16(1)
; PWR7-LE-NEXT: addis 4, 2, .LCPI0_0 at toc@ha
+; PWR7-LE-NEXT: lfiwzx 0, 0, 3
+; PWR7-LE-NEXT: addi 3, 1, -16
; PWR7-LE-NEXT: addi 4, 4, .LCPI0_0 at toc@l
-; PWR7-LE-NEXT: stw 3, -32(1)
-; PWR7-LE-NEXT: addi 3, 1, -32
-; PWR7-LE-NEXT: lxvd2x 0, 0, 4
-; PWR7-LE-NEXT: addi 4, 1, -16
; PWR7-LE-NEXT: lxvd2x 1, 0, 4
-; PWR7-LE-NEXT: xxswapd 34, 0
+; PWR7-LE-NEXT: xxspltw 35, 0, 1
; PWR7-LE-NEXT: lxvd2x 0, 0, 3
-; PWR7-LE-NEXT: xxswapd 35, 1
+; PWR7-LE-NEXT: xxswapd 34, 1
; PWR7-LE-NEXT: xxswapd 36, 0
-; PWR7-LE-NEXT: vperm 2, 3, 4, 2
+; PWR7-LE-NEXT: vperm 2, 4, 3, 2
; PWR7-LE-NEXT: blr
;
; PWR8-LE-LABEL: build_v2i64_extload_0:
@@ -337,17 +334,13 @@ entry:
define <4 x i32> @build_v4i32_load_0(ptr nocapture noundef readonly %p) {
; PWR7-BE-LABEL: build_v4i32_load_0:
; PWR7-BE: # %bb.0: # %entry
-; PWR7-BE-NEXT: lwz 3, 0(3)
-; PWR7-BE-NEXT: xxlxor 36, 36, 36
-; PWR7-BE-NEXT: sldi 3, 3, 32
-; PWR7-BE-NEXT: std 3, -32(1)
-; PWR7-BE-NEXT: std 3, -24(1)
+; PWR7-BE-NEXT: lfiwzx 0, 0, 3
; PWR7-BE-NEXT: addis 3, 2, .LCPI8_0 at toc@ha
+; PWR7-BE-NEXT: xxlxor 36, 36, 36
; PWR7-BE-NEXT: addi 3, 3, .LCPI8_0 at toc@l
-; PWR7-BE-NEXT: lxvw4x 34, 0, 3
-; PWR7-BE-NEXT: addi 3, 1, -32
; PWR7-BE-NEXT: lxvw4x 35, 0, 3
-; PWR7-BE-NEXT: vperm 2, 3, 4, 2
+; PWR7-BE-NEXT: xxspltw 34, 0, 1
+; PWR7-BE-NEXT: vperm 2, 2, 4, 3
; PWR7-BE-NEXT: blr
;
; PWR8-BE-LABEL: build_v4i32_load_0:
@@ -365,20 +358,17 @@ define <4 x i32> @build_v4i32_load_0(ptr nocapture noundef readonly %p) {
; PWR7-LE-LABEL: build_v4i32_load_0:
; PWR7-LE: # %bb.0: # %entry
; PWR7-LE-NEXT: li 4, 0
-; PWR7-LE-NEXT: lwz 3, 0(3)
; PWR7-LE-NEXT: stw 4, -16(1)
; PWR7-LE-NEXT: addis 4, 2, .LCPI8_0 at toc@ha
+; PWR7-LE-NEXT: lfiwzx 0, 0, 3
+; PWR7-LE-NEXT: addi 3, 1, -16
; PWR7-LE-NEXT: addi 4, 4, .LCPI8_0 at toc@l
-; PWR7-LE-NEXT: stw 3, -32(1)
-; PWR7-LE-NEXT: addi 3, 1, -32
-; PWR7-LE-NEXT: lxvd2x 0, 0, 4
-; PWR7-LE-NEXT: addi 4, 1, -16
; PWR7-LE-NEXT: lxvd2x 1, 0, 4
-; PWR7-LE-NEXT: xxswapd 34, 0
+; PWR7-LE-NEXT: xxspltw 35, 0, 1
; PWR7-LE-NEXT: lxvd2x 0, 0, 3
-; PWR7-LE-NEXT: xxswapd 35, 1
+; PWR7-LE-NEXT: xxswapd 34, 1
; PWR7-LE-NEXT: xxswapd 36, 0
-; PWR7-LE-NEXT: vperm 2, 3, 4, 2
+; PWR7-LE-NEXT: vperm 2, 4, 3, 2
; PWR7-LE-NEXT: blr
;
; PWR8-LE-LABEL: build_v4i32_load_0:
@@ -400,17 +390,13 @@ entry:
define <4 x i32> @build_v4i32_load_1(ptr nocapture noundef readonly %p) {
; PWR7-BE-LABEL: build_v4i32_load_1:
; PWR7-BE: # %bb.0: # %entry
-; PWR7-BE-NEXT: lwz 3, 0(3)
-; PWR7-BE-NEXT: xxlxor 36, 36, 36
-; PWR7-BE-NEXT: sldi 3, 3, 32
-; PWR7-BE-NEXT: std 3, -16(1)
-; PWR7-BE-NEXT: std 3, -8(1)
+; PWR7-BE-NEXT: lfiwzx 0, 0, 3
; PWR7-BE-NEXT: addis 3, 2, .LCPI9_0 at toc@ha
+; PWR7-BE-NEXT: xxlxor 36, 36, 36
; PWR7-BE-NEXT: addi 3, 3, .LCPI9_0 at toc@l
-; PWR7-BE-NEXT: lxvw4x 34, 0, 3
-; PWR7-BE-NEXT: addi 3, 1, -16
; PWR7-BE-NEXT: lxvw4x 35, 0, 3
-; PWR7-BE-NEXT: vperm 2, 4, 3, 2
+; PWR7-BE-NEXT: xxspltw 34, 0, 1
+; PWR7-BE-NEXT: vperm 2, 4, 2, 3
; PWR7-BE-NEXT: blr
;
; PWR8-BE-LABEL: build_v4i32_load_1:
@@ -427,20 +413,17 @@ define <4 x i32> @build_v4i32_load_1(ptr nocapture noundef readonly %p) {
; PWR7-LE-LABEL: build_v4i32_load_1:
; PWR7-LE: # %bb.0: # %entry
; PWR7-LE-NEXT: li 4, 0
-; PWR7-LE-NEXT: lwz 3, 0(3)
-; PWR7-LE-NEXT: stw 4, -32(1)
+; PWR7-LE-NEXT: stw 4, -16(1)
; PWR7-LE-NEXT: addis 4, 2, .LCPI9_0 at toc@ha
-; PWR7-LE-NEXT: addi 4, 4, .LCPI9_0 at toc@l
-; PWR7-LE-NEXT: stw 3, -16(1)
+; PWR7-LE-NEXT: lfiwzx 0, 0, 3
; PWR7-LE-NEXT: addi 3, 1, -16
-; PWR7-LE-NEXT: lxvd2x 0, 0, 4
-; PWR7-LE-NEXT: addi 4, 1, -32
+; PWR7-LE-NEXT: addi 4, 4, .LCPI9_0 at toc@l
; PWR7-LE-NEXT: lxvd2x 1, 0, 4
-; PWR7-LE-NEXT: xxswapd 34, 0
+; PWR7-LE-NEXT: xxspltw 35, 0, 1
; PWR7-LE-NEXT: lxvd2x 0, 0, 3
-; PWR7-LE-NEXT: xxswapd 35, 1
+; PWR7-LE-NEXT: xxswapd 34, 1
; PWR7-LE-NEXT: xxswapd 36, 0
-; PWR7-LE-NEXT: vperm 2, 4, 3, 2
+; PWR7-LE-NEXT: vperm 2, 3, 4, 2
; PWR7-LE-NEXT: blr
;
; PWR8-LE-LABEL: build_v4i32_load_1:
@@ -463,17 +446,13 @@ entry:
define <4 x i32> @build_v4i32_load_2(ptr nocapture noundef readonly %p) {
; PWR7-BE-LABEL: build_v4i32_load_2:
; PWR7-BE: # %bb.0: # %entry
-; PWR7-BE-NEXT: lwz 3, 0(3)
-; PWR7-BE-NEXT: xxlxor 36, 36, 36
-; PWR7-BE-NEXT: sldi 3, 3, 32
-; PWR7-BE-NEXT: std 3, -16(1)
-; PWR7-BE-NEXT: std 3, -8(1)
+; PWR7-BE-NEXT: lfiwzx 0, 0, 3
; PWR7-BE-NEXT: addis 3, 2, .LCPI10_0 at toc@ha
+; PWR7-BE-NEXT: xxlxor 36, 36, 36
; PWR7-BE-NEXT: addi 3, 3, .LCPI10_0 at toc@l
-; PWR7-BE-NEXT: lxvw4x 34, 0, 3
-; PWR7-BE-NEXT: addi 3, 1, -16
; PWR7-BE-NEXT: lxvw4x 35, 0, 3
-; PWR7-BE-NEXT: vperm 2, 4, 3, 2
+; PWR7-BE-NEXT: xxspltw 34, 0, 1
+; PWR7-BE-NEXT: vperm 2, 4, 2, 3
; PWR7-BE-NEXT: blr
;
; PWR8-BE-LABEL: build_v4i32_load_2:
@@ -491,20 +470,17 @@ define <4 x i32> @build_v4i32_load_2(ptr nocapture noundef readonly %p) {
; PWR7-LE-LABEL: build_v4i32_load_2:
; PWR7-LE: # %bb.0: # %entry
; PWR7-LE-NEXT: li 4, 0
-; PWR7-LE-NEXT: lwz 3, 0(3)
-; PWR7-LE-NEXT: stw 4, -32(1)
+; PWR7-LE-NEXT: stw 4, -16(1)
; PWR7-LE-NEXT: addis 4, 2, .LCPI10_0 at toc@ha
-; PWR7-LE-NEXT: addi 4, 4, .LCPI10_0 at toc@l
-; PWR7-LE-NEXT: stw 3, -16(1)
+; PWR7-LE-NEXT: lfiwzx 0, 0, 3
; PWR7-LE-NEXT: addi 3, 1, -16
-; PWR7-LE-NEXT: lxvd2x 0, 0, 4
-; PWR7-LE-NEXT: addi 4, 1, -32
+; PWR7-LE-NEXT: addi 4, 4, .LCPI10_0 at toc@l
; PWR7-LE-NEXT: lxvd2x 1, 0, 4
-; PWR7-LE-NEXT: xxswapd 34, 0
+; PWR7-LE-NEXT: xxspltw 35, 0, 1
; PWR7-LE-NEXT: lxvd2x 0, 0, 3
-; PWR7-LE-NEXT: xxswapd 35, 1
+; PWR7-LE-NEXT: xxswapd 34, 1
; PWR7-LE-NEXT: xxswapd 36, 0
-; PWR7-LE-NEXT: vperm 2, 4, 3, 2
+; PWR7-LE-NEXT: vperm 2, 3, 4, 2
; PWR7-LE-NEXT: blr
;
; PWR8-LE-LABEL: build_v4i32_load_2:
@@ -526,17 +502,13 @@ entry:
define <4 x i32> @build_v4i32_load_3(ptr nocapture noundef readonly %p) {
; PWR7-BE-LABEL: build_v4i32_load_3:
; PWR7-BE: # %bb.0: # %entry
-; PWR7-BE-NEXT: lwz 3, 0(3)
-; PWR7-BE-NEXT: xxlxor 36, 36, 36
-; PWR7-BE-NEXT: sldi 3, 3, 32
-; PWR7-BE-NEXT: std 3, -16(1)
-; PWR7-BE-NEXT: std 3, -8(1)
+; PWR7-BE-NEXT: lfiwzx 0, 0, 3
; PWR7-BE-NEXT: addis 3, 2, .LCPI11_0 at toc@ha
+; PWR7-BE-NEXT: xxlxor 36, 36, 36
; PWR7-BE-NEXT: addi 3, 3, .LCPI11_0 at toc@l
-; PWR7-BE-NEXT: lxvw4x 34, 0, 3
-; PWR7-BE-NEXT: addi 3, 1, -16
; PWR7-BE-NEXT: lxvw4x 35, 0, 3
-; PWR7-BE-NEXT: vperm 2, 4, 3, 2
+; PWR7-BE-NEXT: xxspltw 34, 0, 1
+; PWR7-BE-NEXT: vperm 2, 4, 2, 3
; PWR7-BE-NEXT: blr
;
; PWR8-BE-LABEL: build_v4i32_load_3:
@@ -553,20 +525,17 @@ define <4 x i32> @build_v4i32_load_3(ptr nocapture noundef readonly %p) {
; PWR7-LE-LABEL: build_v4i32_load_3:
; PWR7-LE: # %bb.0: # %entry
; PWR7-LE-NEXT: li 4, 0
-; PWR7-LE-NEXT: lwz 3, 0(3)
-; PWR7-LE-NEXT: stw 4, -32(1)
+; PWR7-LE-NEXT: stw 4, -16(1)
; PWR7-LE-NEXT: addis 4, 2, .LCPI11_0 at toc@ha
-; PWR7-LE-NEXT: addi 4, 4, .LCPI11_0 at toc@l
-; PWR7-LE-NEXT: stw 3, -16(1)
+; PWR7-LE-NEXT: lfiwzx 0, 0, 3
; PWR7-LE-NEXT: addi 3, 1, -16
-; PWR7-LE-NEXT: lxvd2x 0, 0, 4
-; PWR7-LE-NEXT: addi 4, 1, -32
+; PWR7-LE-NEXT: addi 4, 4, .LCPI11_0 at toc@l
; PWR7-LE-NEXT: lxvd2x 1, 0, 4
-; PWR7-LE-NEXT: xxswapd 34, 0
+; PWR7-LE-NEXT: xxspltw 35, 0, 1
; PWR7-LE-NEXT: lxvd2x 0, 0, 3
-; PWR7-LE-NEXT: xxswapd 35, 1
+; PWR7-LE-NEXT: xxswapd 34, 1
; PWR7-LE-NEXT: xxswapd 36, 0
-; PWR7-LE-NEXT: vperm 2, 4, 3, 2
+; PWR7-LE-NEXT: vperm 2, 3, 4, 2
; PWR7-LE-NEXT: blr
;
; PWR8-LE-LABEL: build_v4i32_load_3:
diff --git a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
index c26f98c5b0495d..e1159e56e23ebe 100644
--- a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
+++ b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
@@ -536,15 +536,12 @@ define dso_local <8 x i16> @testmrglb3(ptr nocapture readonly %a) local_unnamed_
;
; P8-AIX-32-LABEL: testmrglb3:
; P8-AIX-32: # %bb.0: # %entry
-; P8-AIX-32-NEXT: lwz r4, 4(r3)
+; P8-AIX-32-NEXT: li r4, 4
+; P8-AIX-32-NEXT: lfiwzx f1, 0, r3
; P8-AIX-32-NEXT: xxlxor v3, v3, v3
-; P8-AIX-32-NEXT: stw r4, -16(r1)
-; P8-AIX-32-NEXT: lwz r3, 0(r3)
-; P8-AIX-32-NEXT: stw r3, -32(r1)
-; P8-AIX-32-NEXT: addi r3, r1, -16
-; P8-AIX-32-NEXT: lxvw4x vs0, 0, r3
-; P8-AIX-32-NEXT: addi r3, r1, -32
-; P8-AIX-32-NEXT: lxvw4x vs1, 0, r3
+; P8-AIX-32-NEXT: lfiwzx f0, r3, r4
+; P8-AIX-32-NEXT: xxspltw vs1, vs1, 1
+; P8-AIX-32-NEXT: xxspltw vs0, vs0, 1
; P8-AIX-32-NEXT: xxmrghw v2, vs1, vs0
; P8-AIX-32-NEXT: vmrghb v2, v3, v2
; P8-AIX-32-NEXT: blr
@@ -852,17 +849,15 @@ define dso_local <16 x i8> @no_RAUW_in_combine_during_legalize(ptr nocapture rea
;
; P8-AIX-32-LABEL: no_RAUW_in_combine_during_legalize:
; P8-AIX-32: # %bb.0: # %entry
+; P8-AIX-32-NEXT: li r5, 0
; P8-AIX-32-NEXT: slwi r4, r4, 2
; P8-AIX-32-NEXT: xxlxor v3, v3, v3
-; P8-AIX-32-NEXT: lwzx r3, r3, r4
-; P8-AIX-32-NEXT: li r4, 0
-; P8-AIX-32-NEXT: stw r4, -32(r1)
-; P8-AIX-32-NEXT: stw r3, -16(r1)
-; P8-AIX-32-NEXT: addi r3, r1, -32
-; P8-AIX-32-NEXT: lxvw4x vs0, 0, r3
+; P8-AIX-32-NEXT: stw r5, -16(r1)
+; P8-AIX-32-NEXT: lfiwzx f0, r3, r4
; P8-AIX-32-NEXT: addi r3, r1, -16
; P8-AIX-32-NEXT: lxvw4x vs1, 0, r3
-; P8-AIX-32-NEXT: xxmrghw v2, vs0, vs1
+; P8-AIX-32-NEXT: xxspltw vs0, vs0, 1
+; P8-AIX-32-NEXT: xxmrghw v2, vs1, vs0
; P8-AIX-32-NEXT: vmrghb v2, v2, v3
; P8-AIX-32-NEXT: blr
entry:
@@ -1026,14 +1021,11 @@ define dso_local <2 x i64> @testSplat8(ptr nocapture readonly %ptr) local_unname
;
; P8-AIX-32-LABEL: testSplat8:
; P8-AIX-32: # %bb.0: # %entry
-; P8-AIX-32-NEXT: lwz r4, 4(r3)
-; P8-AIX-32-NEXT: stw r4, -16(r1)
-; P8-AIX-32-NEXT: lwz r3, 0(r3)
-; P8-AIX-32-NEXT: stw r3, -32(r1)
-; P8-AIX-32-NEXT: addi r3, r1, -16
-; P8-AIX-32-NEXT: lxvw4x vs0, 0, r3
-; P8-AIX-32-NEXT: addi r3, r1, -32
-; P8-AIX-32-NEXT: lxvw4x vs1, 0, r3
+; P8-AIX-32-NEXT: li r4, 4
+; P8-AIX-32-NEXT: lfiwzx f1, 0, r3
+; P8-AIX-32-NEXT: lfiwzx f0, r3, r4
+; P8-AIX-32-NEXT: xxspltw vs1, vs1, 1
+; P8-AIX-32-NEXT: xxspltw vs0, vs0, 1
; P8-AIX-32-NEXT: xxmrghw vs0, vs1, vs0
; P8-AIX-32-NEXT: xxmrghd v2, vs0, vs0
; P8-AIX-32-NEXT: blr
@@ -1081,17 +1073,14 @@ define <2 x i64> @testSplati64_0(ptr nocapture readonly %ptr) #0 {
;
; P8-AIX-32-LABEL: testSplati64_0:
; P8-AIX-32: # %bb.0: # %entry
-; P8-AIX-32-NEXT: lwz r4, 0(r3)
-; P8-AIX-32-NEXT: lwz r3, 4(r3)
-; P8-AIX-32-NEXT: stw r3, -16(r1)
+; P8-AIX-32-NEXT: li r4, 4
+; P8-AIX-32-NEXT: lfiwzx f0, r3, r4
+; P8-AIX-32-NEXT: xxspltw v2, vs0, 1
+; P8-AIX-32-NEXT: lfiwzx f0, 0, r3
; P8-AIX-32-NEXT: lwz r3, L..C3(r2) # %const.0
-; P8-AIX-32-NEXT: stw r4, -32(r1)
-; P8-AIX-32-NEXT: lxvw4x v2, 0, r3
-; P8-AIX-32-NEXT: addi r3, r1, -16
-; P8-AIX-32-NEXT: lxvw4x v3, 0, r3
-; P8-AIX-32-NEXT: addi r3, r1, -32
; P8-AIX-32-NEXT: lxvw4x v4, 0, r3
-; P8-AIX-32-NEXT: vperm v2, v4, v3, v2
+; P8-AIX-32-NEXT: xxspltw v3, vs0, 1
+; P8-AIX-32-NEXT: vperm v2, v3, v2, v4
; P8-AIX-32-NEXT: blr
entry:
%0 = load <1 x i64>, ptr %ptr, align 8
diff --git a/llvm/test/CodeGen/PowerPC/load-and-splat.ll b/llvm/test/CodeGen/PowerPC/load-and-splat.ll
index bc68ad2a67bf5d..c9ee3a51f41724 100644
--- a/llvm/test/CodeGen/PowerPC/load-and-splat.ll
+++ b/llvm/test/CodeGen/PowerPC/load-and-splat.ll
@@ -208,47 +208,41 @@ define dso_local void @test4(ptr nocapture %c, ptr nocapture readonly %a) local_
;
; P9-AIX32-LABEL: test4:
; P9-AIX32: # %bb.0: # %entry
-; P9-AIX32-NEXT: lwz r5, 24(r4)
-; P9-AIX32-NEXT: lwz r4, 28(r4)
-; P9-AIX32-NEXT: stw r4, -16(r1)
+; P9-AIX32-NEXT: li r5, 28
+; P9-AIX32-NEXT: lxvwsx vs0, r4, r5
+; P9-AIX32-NEXT: li r5, 24
+; P9-AIX32-NEXT: lxvwsx vs1, r4, r5
; P9-AIX32-NEXT: lwz r4, L..C0(r2) # %const.0
-; P9-AIX32-NEXT: stw r5, -32(r1)
-; P9-AIX32-NEXT: lxv vs1, -16(r1)
-; P9-AIX32-NEXT: lxv vs2, -32(r1)
-; P9-AIX32-NEXT: lxv vs0, 0(r4)
-; P9-AIX32-NEXT: xxperm vs1, vs2, vs0
-; P9-AIX32-NEXT: stxv vs1, 0(r3)
+; P9-AIX32-NEXT: lxv vs2, 0(r4)
+; P9-AIX32-NEXT: xxperm vs0, vs1, vs2
+; P9-AIX32-NEXT: stxv vs0, 0(r3)
; P9-AIX32-NEXT: blr
;
; P8-AIX32-LABEL: test4:
; P8-AIX32: # %bb.0: # %entry
-; P8-AIX32-NEXT: lwz r5, 24(r4)
-; P8-AIX32-NEXT: lwz r4, 28(r4)
-; P8-AIX32-NEXT: stw r4, -16(r1)
+; P8-AIX32-NEXT: li r5, 28
+; P8-AIX32-NEXT: lfiwzx f0, r4, r5
+; P8-AIX32-NEXT: li r5, 24
+; P8-AIX32-NEXT: xxspltw v2, vs0, 1
+; P8-AIX32-NEXT: lfiwzx f0, r4, r5
; P8-AIX32-NEXT: lwz r4, L..C0(r2) # %const.0
-; P8-AIX32-NEXT: stw r5, -32(r1)
-; P8-AIX32-NEXT: lxvw4x v2, 0, r4
-; P8-AIX32-NEXT: addi r4, r1, -16
-; P8-AIX32-NEXT: lxvw4x v3, 0, r4
-; P8-AIX32-NEXT: addi r4, r1, -32
; P8-AIX32-NEXT: lxvw4x v4, 0, r4
-; P8-AIX32-NEXT: vperm v2, v4, v3, v2
+; P8-AIX32-NEXT: xxspltw v3, vs0, 1
+; P8-AIX32-NEXT: vperm v2, v3, v2, v4
; P8-AIX32-NEXT: stxvw4x v2, 0, r3
; P8-AIX32-NEXT: blr
;
; P7-AIX32-LABEL: test4:
; P7-AIX32: # %bb.0: # %entry
-; P7-AIX32-NEXT: lwz r5, 24(r4)
-; P7-AIX32-NEXT: lwz r4, 28(r4)
-; P7-AIX32-NEXT: stw r4, -16(r1)
+; P7-AIX32-NEXT: li r5, 28
+; P7-AIX32-NEXT: lfiwzx f0, r4, r5
+; P7-AIX32-NEXT: li r5, 24
+; P7-AIX32-NEXT: xxspltw v2, vs0, 1
+; P7-AIX32-NEXT: lfiwzx f0, r4, r5
; P7-AIX32-NEXT: lwz r4, L..C0(r2) # %const.0
-; P7-AIX32-NEXT: stw r5, -32(r1)
-; P7-AIX32-NEXT: lxvw4x v2, 0, r4
-; P7-AIX32-NEXT: addi r4, r1, -16
-; P7-AIX32-NEXT: lxvw4x v3, 0, r4
-; P7-AIX32-NEXT: addi r4, r1, -32
; P7-AIX32-NEXT: lxvw4x v4, 0, r4
-; P7-AIX32-NEXT: vperm v2, v4, v3, v2
+; P7-AIX32-NEXT: xxspltw v3, vs0, 1
+; P7-AIX32-NEXT: vperm v2, v3, v2, v4
; P7-AIX32-NEXT: stxvw4x v2, 0, r3
; P7-AIX32-NEXT: blr
entry:
@@ -362,47 +356,41 @@ define void @test6(ptr %a, ptr %in) {
;
; P9-AIX32-LABEL: test6:
; P9-AIX32: # %bb.0: # %entry
-; P9-AIX32-NEXT: lwz r4, 0(r4)
; P9-AIX32-NEXT: li r5, 0
-; P9-AIX32-NEXT: stw r5, -32(r1)
-; P9-AIX32-NEXT: lxv vs1, -32(r1)
-; P9-AIX32-NEXT: stw r4, -16(r1)
-; P9-AIX32-NEXT: lwz r4, L..C2(r2) # %const.0
+; P9-AIX32-NEXT: stw r5, -16(r1)
+; P9-AIX32-NEXT: lwz r5, L..C2(r2) # %const.0
+; P9-AIX32-NEXT: lxvwsx vs1, 0, r4
; P9-AIX32-NEXT: lxv vs2, -16(r1)
-; P9-AIX32-NEXT: lxv vs0, 0(r4)
-; P9-AIX32-NEXT: xxperm vs2, vs1, vs0
-; P9-AIX32-NEXT: stxv vs2, 0(r3)
+; P9-AIX32-NEXT: lxv vs0, 0(r5)
+; P9-AIX32-NEXT: xxperm vs1, vs2, vs0
+; P9-AIX32-NEXT: stxv vs1, 0(r3)
; P9-AIX32-NEXT: blr
;
; P8-AIX32-LABEL: test6:
; P8-AIX32: # %bb.0: # %entry
-; P8-AIX32-NEXT: lwz r4, 0(r4)
; P8-AIX32-NEXT: li r5, 0
-; P8-AIX32-NEXT: stw r5, -32(r1)
-; P8-AIX32-NEXT: stw r4, -16(r1)
+; P8-AIX32-NEXT: stw r5, -16(r1)
+; P8-AIX32-NEXT: lfiwzx f0, 0, r4
; P8-AIX32-NEXT: lwz r4, L..C2(r2) # %const.0
-; P8-AIX32-NEXT: lxvw4x v2, 0, r4
-; P8-AIX32-NEXT: addi r4, r1, -32
; P8-AIX32-NEXT: lxvw4x v3, 0, r4
; P8-AIX32-NEXT: addi r4, r1, -16
; P8-AIX32-NEXT: lxvw4x v4, 0, r4
-; P8-AIX32-NEXT: vperm v2, v3, v4, v2
+; P8-AIX32-NEXT: xxspltw v2, vs0, 1
+; P8-AIX32-NEXT: vperm v2, v4, v2, v3
; P8-AIX32-NEXT: stxvw4x v2, 0, r3
; P8-AIX32-NEXT: blr
;
; P7-AIX32-LABEL: test6:
; P7-AIX32: # %bb.0: # %entry
-; P7-AIX32-NEXT: lwz r4, 0(r4)
; P7-AIX32-NEXT: li r5, 0
-; P7-AIX32-NEXT: stw r5, -32(r1)
-; P7-AIX32-NEXT: stw r4, -16(r1)
+; P7-AIX32-NEXT: stw r5, -16(r1)
+; P7-AIX32-NEXT: lfiwzx f0, 0, r4
; P7-AIX32-NEXT: lwz r4, L..C2(r2) # %const.0
-; P7-AIX32-NEXT: lxvw4x v2, 0, r4
-; P7-AIX32-NEXT: addi r4, r1, -32
; P7-AIX32-NEXT: lxvw4x v3, 0, r4
; P7-AIX32-NEXT: addi r4, r1, -16
; P7-AIX32-NEXT: lxvw4x v4, 0, r4
-; P7-AIX32-NEXT: vperm v2, v3, v4, v2
+; P7-AIX32-NEXT: xxspltw v2, vs0, 1
+; P7-AIX32-NEXT: vperm v2, v4, v2, v3
; P7-AIX32-NEXT: stxvw4x v2, 0, r3
; P7-AIX32-NEXT: blr
entry:
@@ -810,40 +798,31 @@ define <16 x i8> @unadjusted_lxvdsx(ptr %s, ptr %t) {
;
; P9-AIX32-LABEL: unadjusted_lxvdsx:
; P9-AIX32: # %bb.0: # %entry
-; P9-AIX32-NEXT: lwz r4, 4(r3)
-; P9-AIX32-NEXT: stw r4, -16(r1)
-; P9-AIX32-NEXT: lwz r3, 0(r3)
-; P9-AIX32-NEXT: lxv vs0, -16(r1)
-; P9-AIX32-NEXT: stw r3, -32(r1)
-; P9-AIX32-NEXT: lxv vs1, -32(r1)
+; P9-AIX32-NEXT: li r4, 4
+; P9-AIX32-NEXT: lxvwsx vs1, 0, r3
+; P9-AIX32-NEXT: lxvwsx vs0, r3, r4
; P9-AIX32-NEXT: xxmrghw vs0, vs1, vs0
; P9-AIX32-NEXT: xxmrghd v2, vs0, vs0
; P9-AIX32-NEXT: blr
;
; P8-AIX32-LABEL: unadjusted_lxvdsx:
; P8-AIX32: # %bb.0: # %entry
-; P8-AIX32-NEXT: lwz r4, 4(r3)
-; P8-AIX32-NEXT: stw r4, -16(r1)
-; P8-AIX32-NEXT: lwz r3, 0(r3)
-; P8-AIX32-NEXT: stw r3, -32(r1)
-; P8-AIX32-NEXT: addi r3, r1, -16
-; P8-AIX32-NEXT: lxvw4x vs0, 0, r3
-; P8-AIX32-NEXT: addi r3, r1, -32
-; P8-AIX32-NEXT: lxvw4x vs1, 0, r3
+; P8-AIX32-NEXT: li r4, 4
+; P8-AIX32-NEXT: lfiwzx f1, 0, r3
+; P8-AIX32-NEXT: lfiwzx f0, r3, r4
+; P8-AIX32-NEXT: xxspltw vs1, vs1, 1
+; P8-AIX32-NEXT: xxspltw vs0, vs0, 1
; P8-AIX32-NEXT: xxmrghw vs0, vs1, vs0
; P8-AIX32-NEXT: xxmrghd v2, vs0, vs0
; P8-AIX32-NEXT: blr
;
; P7-AIX32-LABEL: unadjusted_lxvdsx:
; P7-AIX32: # %bb.0: # %entry
-; P7-AIX32-NEXT: lwz r4, 4(r3)
-; P7-AIX32-NEXT: stw r4, -16(r1)
-; P7-AIX32-NEXT: lwz r3, 0(r3)
-; P7-AIX32-NEXT: stw r3, -32(r1)
-; P7-AIX32-NEXT: addi r3, r1, -16
-; P7-AIX32-NEXT: lxvw4x vs0, 0, r3
-; P7-AIX32-NEXT: addi r3, r1, -32
-; P7-AIX32-NEXT: lxvw4x vs1, 0, r3
+; P7-AIX32-NEXT: li r4, 4
+; P7-AIX32-NEXT: lfiwzx f1, 0, r3
+; P7-AIX32-NEXT: lfiwzx f0, r3, r4
+; P7-AIX32-NEXT: xxspltw vs1, vs1, 1
+; P7-AIX32-NEXT: xxspltw vs0, vs0, 1
; P7-AIX32-NEXT: xxmrghw vs0, vs1, vs0
; P7-AIX32-NEXT: xxmrghd v2, vs0, vs0
; P7-AIX32-NEXT: blr
diff --git a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
index 4da36c9af5c101..4435484ae0b947 100644
--- a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
+++ b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
@@ -85,23 +85,20 @@ define void @test64(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
;
; P9BE-AIX32-LABEL: test64:
; P9BE-AIX32: # %bb.0: # %entry
-; P9BE-AIX32-NEXT: lwzux 4, 3, 4
+; P9BE-AIX32-NEXT: add 5, 3, 4
+; P9BE-AIX32-NEXT: lxvwsx 0, 3, 4
+; P9BE-AIX32-NEXT: li 3, 4
; P9BE-AIX32-NEXT: xxlxor 2, 2, 2
; P9BE-AIX32-NEXT: vspltisw 4, 8
-; P9BE-AIX32-NEXT: stw 4, -48(1)
+; P9BE-AIX32-NEXT: lxvwsx 1, 5, 3
+; P9BE-AIX32-NEXT: lwz 3, L..C0(2) # %const.0
; P9BE-AIX32-NEXT: vadduwm 4, 4, 4
-; P9BE-AIX32-NEXT: lwz 4, 4(3)
-; P9BE-AIX32-NEXT: lxv 0, -48(1)
-; P9BE-AIX32-NEXT: stw 4, -32(1)
-; P9BE-AIX32-NEXT: lwz 4, L..C0(2) # %const.0
-; P9BE-AIX32-NEXT: lxv 1, -32(1)
-; P9BE-AIX32-NEXT: lwz 3, 8(3)
-; P9BE-AIX32-NEXT: stw 3, -16(1)
-; P9BE-AIX32-NEXT: lwz 3, L..C1(2) # %const.1
; P9BE-AIX32-NEXT: xxmrghw 2, 0, 1
-; P9BE-AIX32-NEXT: lxv 0, 0(4)
+; P9BE-AIX32-NEXT: lxv 0, 0(3)
+; P9BE-AIX32-NEXT: li 3, 8
; P9BE-AIX32-NEXT: xxperm 2, 2, 0
-; P9BE-AIX32-NEXT: lxv 0, -16(1)
+; P9BE-AIX32-NEXT: lxvwsx 0, 5, 3
+; P9BE-AIX32-NEXT: lwz 3, L..C1(2) # %const.1
; P9BE-AIX32-NEXT: xxmrghw 3, 1, 0
; P9BE-AIX32-NEXT: lxv 0, 0(3)
; P9BE-AIX32-NEXT: xxperm 3, 3, 0
diff --git a/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll b/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll
index 25e1baa28f7ef3..c8e0d0d25f4f7e 100644
--- a/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll
+++ b/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll
@@ -73,13 +73,11 @@ define <4 x i32> @s2v_test1(ptr nocapture readonly %int32, <4 x i32> %vec) {
;
; P8-AIX-32-LABEL: s2v_test1:
; P8-AIX-32: # %bb.0: # %entry
-; P8-AIX-32-NEXT: lwz r3, 0(r3)
-; P8-AIX-32-NEXT: stw r3, -16(r1)
+; P8-AIX-32-NEXT: lfiwzx f0, 0, r3
; P8-AIX-32-NEXT: lwz r3, L..C0(r2) # %const.0
-; P8-AIX-32-NEXT: lxvw4x v3, 0, r3
-; P8-AIX-32-NEXT: addi r3, r1, -16
; P8-AIX-32-NEXT: lxvw4x v4, 0, r3
-; P8-AIX-32-NEXT: vperm v2, v4, v2, v3
+; P8-AIX-32-NEXT: xxspltw v3, vs0, 1
+; P8-AIX-32-NEXT: vperm v2, v3, v2, v4
; P8-AIX-32-NEXT: blr
entry:
%0 = load i32, ptr %int32, align 4
@@ -142,13 +140,12 @@ define <4 x i32> @s2v_test2(ptr nocapture readonly %int32, <4 x i32> %vec) {
;
; P8-AIX-32-LABEL: s2v_test2:
; P8-AIX-32: # %bb.0: # %entry
-; P8-AIX-32-NEXT: lwz r3, 4(r3)
-; P8-AIX-32-NEXT: stw r3, -16(r1)
+; P8-AIX-32-NEXT: addi r3, r3, 4
+; P8-AIX-32-NEXT: lfiwzx f0, 0, r3
; P8-AIX-32-NEXT: lwz r3, L..C1(r2) # %const.0
-; P8-AIX-32-NEXT: lxvw4x v3, 0, r3
-; P8-AIX-32-NEXT: addi r3, r1, -16
; P8-AIX-32-NEXT: lxvw4x v4, 0, r3
-; P8-AIX-32-NEXT: vperm v2, v4, v2, v3
+; P8-AIX-32-NEXT: xxspltw v3, vs0, 1
+; P8-AIX-32-NEXT: vperm v2, v3, v2, v4
; P8-AIX-32-NEXT: blr
entry:
%arrayidx = getelementptr inbounds i32, ptr %int32, i64 1
@@ -224,13 +221,11 @@ define <4 x i32> @s2v_test3(ptr nocapture readonly %int32, <4 x i32> %vec, i32 s
; P8-AIX-32-LABEL: s2v_test3:
; P8-AIX-32: # %bb.0: # %entry
; P8-AIX-32-NEXT: slwi r4, r4, 2
-; P8-AIX-32-NEXT: lwzx r3, r3, r4
-; P8-AIX-32-NEXT: stw r3, -16(r1)
+; P8-AIX-32-NEXT: lfiwzx f0, r3, r4
; P8-AIX-32-NEXT: lwz r3, L..C2(r2) # %const.0
-; P8-AIX-32-NEXT: lxvw4x v3, 0, r3
-; P8-AIX-32-NEXT: addi r3, r1, -16
; P8-AIX-32-NEXT: lxvw4x v4, 0, r3
-; P8-AIX-32-NEXT: vperm v2, v4, v2, v3
+; P8-AIX-32-NEXT: xxspltw v3, vs0, 1
+; P8-AIX-32-NEXT: vperm v2, v3, v2, v4
; P8-AIX-32-NEXT: blr
entry:
%idxprom = sext i32 %Idx to i64
@@ -295,13 +290,12 @@ define <4 x i32> @s2v_test4(ptr nocapture readonly %int32, <4 x i32> %vec) {
;
; P8-AIX-32-LABEL: s2v_test4:
; P8-AIX-32: # %bb.0: # %entry
-; P8-AIX-32-NEXT: lwz r3, 4(r3)
-; P8-AIX-32-NEXT: stw r3, -16(r1)
+; P8-AIX-32-NEXT: addi r3, r3, 4
+; P8-AIX-32-NEXT: lfiwzx f0, 0, r3
; P8-AIX-32-NEXT: lwz r3, L..C3(r2) # %const.0
-; P8-AIX-32-NEXT: lxvw4x v3, 0, r3
-; P8-AIX-32-NEXT: addi r3, r1, -16
; P8-AIX-32-NEXT: lxvw4x v4, 0, r3
-; P8-AIX-32-NEXT: vperm v2, v4, v2, v3
+; P8-AIX-32-NEXT: xxspltw v3, vs0, 1
+; P8-AIX-32-NEXT: vperm v2, v3, v2, v4
; P8-AIX-32-NEXT: blr
entry:
%arrayidx = getelementptr inbounds i32, ptr %int32, i64 1
@@ -362,13 +356,11 @@ define <4 x i32> @s2v_test5(<4 x i32> %vec, ptr nocapture readonly %ptr1) {
;
; P8-AIX-32-LABEL: s2v_test5:
; P8-AIX-32: # %bb.0: # %entry
-; P8-AIX-32-NEXT: lwz r3, 0(r3)
-; P8-AIX-32-NEXT: stw r3, -16(r1)
+; P8-AIX-32-NEXT: lfiwzx f0, 0, r3
; P8-AIX-32-NEXT: lwz r3, L..C4(r2) # %const.0
-; P8-AIX-32-NEXT: lxvw4x v3, 0, r3
-; P8-AIX-32-NEXT: addi r3, r1, -16
; P8-AIX-32-NEXT: lxvw4x v4, 0, r3
-; P8-AIX-32-NEXT: vperm v2, v4, v2, v3
+; P8-AIX-32-NEXT: xxspltw v3, vs0, 1
+; P8-AIX-32-NEXT: vperm v2, v3, v2, v4
; P8-AIX-32-NEXT: blr
entry:
%0 = load i32, ptr %ptr1, align 4
diff --git a/llvm/test/CodeGen/PowerPC/test-vector-insert.ll b/llvm/test/CodeGen/PowerPC/test-vector-insert.ll
index 73b4ad8a507b82..47fa6f2a5b4d29 100644
--- a/llvm/test/CodeGen/PowerPC/test-vector-insert.ll
+++ b/llvm/test/CodeGen/PowerPC/test-vector-insert.ll
@@ -25,16 +25,13 @@ define dso_local <4 x i32> @test(<4 x i32> %a, double %b) {
; CHECK-LE-P7: # %bb.0: # %entry
; CHECK-LE-P7-NEXT: xscvdpsxws f0, f1
; CHECK-LE-P7-NEXT: addi r3, r1, -4
+; CHECK-LE-P7-NEXT: addis r4, r2, .LCPI0_0 at toc@ha
+; CHECK-LE-P7-NEXT: addi r4, r4, .LCPI0_0 at toc@l
; CHECK-LE-P7-NEXT: stfiwx f0, 0, r3
-; CHECK-LE-P7-NEXT: lwz r3, -4(r1)
-; CHECK-LE-P7-NEXT: stw r3, -32(r1)
-; CHECK-LE-P7-NEXT: addis r3, r2, .LCPI0_0 at toc@ha
-; CHECK-LE-P7-NEXT: addi r3, r3, .LCPI0_0 at toc@l
-; CHECK-LE-P7-NEXT: lxvd2x vs0, 0, r3
-; CHECK-LE-P7-NEXT: addi r3, r1, -32
+; CHECK-LE-P7-NEXT: lxvd2x vs0, 0, r4
; CHECK-LE-P7-NEXT: xxswapd v3, vs0
-; CHECK-LE-P7-NEXT: lxvd2x vs0, 0, r3
-; CHECK-LE-P7-NEXT: xxswapd v4, vs0
+; CHECK-LE-P7-NEXT: lfiwzx f0, 0, r3
+; CHECK-LE-P7-NEXT: xxspltw v4, vs0, 1
; CHECK-LE-P7-NEXT: vperm v2, v4, v2, v3
; CHECK-LE-P7-NEXT: blr
;
@@ -59,16 +56,12 @@ define dso_local <4 x i32> @test(<4 x i32> %a, double %b) {
; CHECK-BE-P7-NEXT: xscvdpsxws f0, f1
; CHECK-BE-P7-NEXT: addi r3, r1, -4
; CHECK-BE-P7-NEXT: stfiwx f0, 0, r3
-; CHECK-BE-P7-NEXT: lwz r3, -4(r1)
-; CHECK-BE-P7-NEXT: sldi r3, r3, 32
-; CHECK-BE-P7-NEXT: std r3, -32(r1)
-; CHECK-BE-P7-NEXT: std r3, -24(r1)
+; CHECK-BE-P7-NEXT: lfiwzx f0, 0, r3
; CHECK-BE-P7-NEXT: addis r3, r2, .LCPI0_0 at toc@ha
; CHECK-BE-P7-NEXT: addi r3, r3, .LCPI0_0 at toc@l
-; CHECK-BE-P7-NEXT: lxvw4x v3, 0, r3
-; CHECK-BE-P7-NEXT: addi r3, r1, -32
; CHECK-BE-P7-NEXT: lxvw4x v4, 0, r3
-; CHECK-BE-P7-NEXT: vperm v2, v2, v4, v3
+; CHECK-BE-P7-NEXT: xxspltw v3, vs0, 1
+; CHECK-BE-P7-NEXT: vperm v2, v2, v3, v4
; CHECK-BE-P7-NEXT: blr
;
; CHECK-BE-P8-LABEL: test:
@@ -96,16 +89,13 @@ define dso_local <4 x i32> @test2(<4 x i32> %a, float %b) {
; CHECK-LE-P7: # %bb.0: # %entry
; CHECK-LE-P7-NEXT: xscvdpsxws f0, f1
; CHECK-LE-P7-NEXT: addi r3, r1, -4
+; CHECK-LE-P7-NEXT: addis r4, r2, .LCPI1_0 at toc@ha
+; CHECK-LE-P7-NEXT: addi r4, r4, .LCPI1_0 at toc@l
; CHECK-LE-P7-NEXT: stfiwx f0, 0, r3
-; CHECK-LE-P7-NEXT: lwz r3, -4(r1)
-; CHECK-LE-P7-NEXT: stw r3, -32(r1)
-; CHECK-LE-P7-NEXT: addis r3, r2, .LCPI1_0 at toc@ha
-; CHECK-LE-P7-NEXT: addi r3, r3, .LCPI1_0 at toc@l
-; CHECK-LE-P7-NEXT: lxvd2x vs0, 0, r3
-; CHECK-LE-P7-NEXT: addi r3, r1, -32
+; CHECK-LE-P7-NEXT: lxvd2x vs0, 0, r4
; CHECK-LE-P7-NEXT: xxswapd v3, vs0
-; CHECK-LE-P7-NEXT: lxvd2x vs0, 0, r3
-; CHECK-LE-P7-NEXT: xxswapd v4, vs0
+; CHECK-LE-P7-NEXT: lfiwzx f0, 0, r3
+; CHECK-LE-P7-NEXT: xxspltw v4, vs0, 1
; CHECK-LE-P7-NEXT: vperm v2, v4, v2, v3
; CHECK-LE-P7-NEXT: blr
;
@@ -130,16 +120,12 @@ define dso_local <4 x i32> @test2(<4 x i32> %a, float %b) {
; CHECK-BE-P7-NEXT: xscvdpsxws f0, f1
; CHECK-BE-P7-NEXT: addi r3, r1, -4
; CHECK-BE-P7-NEXT: stfiwx f0, 0, r3
-; CHECK-BE-P7-NEXT: lwz r3, -4(r1)
-; CHECK-BE-P7-NEXT: sldi r3, r3, 32
-; CHECK-BE-P7-NEXT: std r3, -32(r1)
-; CHECK-BE-P7-NEXT: std r3, -24(r1)
+; CHECK-BE-P7-NEXT: lfiwzx f0, 0, r3
; CHECK-BE-P7-NEXT: addis r3, r2, .LCPI1_0 at toc@ha
; CHECK-BE-P7-NEXT: addi r3, r3, .LCPI1_0 at toc@l
-; CHECK-BE-P7-NEXT: lxvw4x v3, 0, r3
-; CHECK-BE-P7-NEXT: addi r3, r1, -32
; CHECK-BE-P7-NEXT: lxvw4x v4, 0, r3
-; CHECK-BE-P7-NEXT: vperm v2, v2, v4, v3
+; CHECK-BE-P7-NEXT: xxspltw v3, vs0, 1
+; CHECK-BE-P7-NEXT: vperm v2, v2, v3, v4
; CHECK-BE-P7-NEXT: blr
;
; CHECK-BE-P8-LABEL: test2:
@@ -167,16 +153,13 @@ define dso_local <4 x i32> @test3(<4 x i32> %a, double %b) {
; CHECK-LE-P7: # %bb.0: # %entry
; CHECK-LE-P7-NEXT: xscvdpuxws f0, f1
; CHECK-LE-P7-NEXT: addi r3, r1, -4
+; CHECK-LE-P7-NEXT: addis r4, r2, .LCPI2_0 at toc@ha
+; CHECK-LE-P7-NEXT: addi r4, r4, .LCPI2_0 at toc@l
; CHECK-LE-P7-NEXT: stfiwx f0, 0, r3
-; CHECK-LE-P7-NEXT: lwz r3, -4(r1)
-; CHECK-LE-P7-NEXT: stw r3, -32(r1)
-; CHECK-LE-P7-NEXT: addis r3, r2, .LCPI2_0 at toc@ha
-; CHECK-LE-P7-NEXT: addi r3, r3, .LCPI2_0 at toc@l
-; CHECK-LE-P7-NEXT: lxvd2x vs0, 0, r3
-; CHECK-LE-P7-NEXT: addi r3, r1, -32
+; CHECK-LE-P7-NEXT: lxvd2x vs0, 0, r4
; CHECK-LE-P7-NEXT: xxswapd v3, vs0
-; CHECK-LE-P7-NEXT: lxvd2x vs0, 0, r3
-; CHECK-LE-P7-NEXT: xxswapd v4, vs0
+; CHECK-LE-P7-NEXT: lfiwzx f0, 0, r3
+; CHECK-LE-P7-NEXT: xxspltw v4, vs0, 1
; CHECK-LE-P7-NEXT: vperm v2, v4, v2, v3
; CHECK-LE-P7-NEXT: blr
;
@@ -201,16 +184,12 @@ define dso_local <4 x i32> @test3(<4 x i32> %a, double %b) {
; CHECK-BE-P7-NEXT: xscvdpuxws f0, f1
; CHECK-BE-P7-NEXT: addi r3, r1, -4
; CHECK-BE-P7-NEXT: stfiwx f0, 0, r3
-; CHECK-BE-P7-NEXT: lwz r3, -4(r1)
-; CHECK-BE-P7-NEXT: sldi r3, r3, 32
-; CHECK-BE-P7-NEXT: std r3, -32(r1)
-; CHECK-BE-P7-NEXT: std r3, -24(r1)
+; CHECK-BE-P7-NEXT: lfiwzx f0, 0, r3
; CHECK-BE-P7-NEXT: addis r3, r2, .LCPI2_0 at toc@ha
; CHECK-BE-P7-NEXT: addi r3, r3, .LCPI2_0 at toc@l
-; CHECK-BE-P7-NEXT: lxvw4x v3, 0, r3
-; CHECK-BE-P7-NEXT: addi r3, r1, -32
; CHECK-BE-P7-NEXT: lxvw4x v4, 0, r3
-; CHECK-BE-P7-NEXT: vperm v2, v2, v4, v3
+; CHECK-BE-P7-NEXT: xxspltw v3, vs0, 1
+; CHECK-BE-P7-NEXT: vperm v2, v2, v3, v4
; CHECK-BE-P7-NEXT: blr
;
; CHECK-BE-P8-LABEL: test3:
@@ -238,16 +217,13 @@ define dso_local <4 x i32> @test4(<4 x i32> %a, float %b) {
; CHECK-LE-P7: # %bb.0: # %entry
; CHECK-LE-P7-NEXT: xscvdpuxws f0, f1
; CHECK-LE-P7-NEXT: addi r3, r1, -4
+; CHECK-LE-P7-NEXT: addis r4, r2, .LCPI3_0 at toc@ha
+; CHECK-LE-P7-NEXT: addi r4, r4, .LCPI3_0 at toc@l
; CHECK-LE-P7-NEXT: stfiwx f0, 0, r3
-; CHECK-LE-P7-NEXT: lwz r3, -4(r1)
-; CHECK-LE-P7-NEXT: stw r3, -32(r1)
-; CHECK-LE-P7-NEXT: addis r3, r2, .LCPI3_0 at toc@ha
-; CHECK-LE-P7-NEXT: addi r3, r3, .LCPI3_0 at toc@l
-; CHECK-LE-P7-NEXT: lxvd2x vs0, 0, r3
-; CHECK-LE-P7-NEXT: addi r3, r1, -32
+; CHECK-LE-P7-NEXT: lxvd2x vs0, 0, r4
; CHECK-LE-P7-NEXT: xxswapd v3, vs0
-; CHECK-LE-P7-NEXT: lxvd2x vs0, 0, r3
-; CHECK-LE-P7-NEXT: xxswapd v4, vs0
+; CHECK-LE-P7-NEXT: lfiwzx f0, 0, r3
+; CHECK-LE-P7-NEXT: xxspltw v4, vs0, 1
; CHECK-LE-P7-NEXT: vperm v2, v4, v2, v3
; CHECK-LE-P7-NEXT: blr
;
@@ -272,16 +248,12 @@ define dso_local <4 x i32> @test4(<4 x i32> %a, float %b) {
; CHECK-BE-P7-NEXT: xscvdpuxws f0, f1
; CHECK-BE-P7-NEXT: addi r3, r1, -4
; CHECK-BE-P7-NEXT: stfiwx f0, 0, r3
-; CHECK-BE-P7-NEXT: lwz r3, -4(r1)
-; CHECK-BE-P7-NEXT: sldi r3, r3, 32
-; CHECK-BE-P7-NEXT: std r3, -32(r1)
-; CHECK-BE-P7-NEXT: std r3, -24(r1)
+; CHECK-BE-P7-NEXT: lfiwzx f0, 0, r3
; CHECK-BE-P7-NEXT: addis r3, r2, .LCPI3_0 at toc@ha
; CHECK-BE-P7-NEXT: addi r3, r3, .LCPI3_0 at toc@l
-; CHECK-BE-P7-NEXT: lxvw4x v3, 0, r3
-; CHECK-BE-P7-NEXT: addi r3, r1, -32
; CHECK-BE-P7-NEXT: lxvw4x v4, 0, r3
-; CHECK-BE-P7-NEXT: vperm v2, v2, v4, v3
+; CHECK-BE-P7-NEXT: xxspltw v3, vs0, 1
+; CHECK-BE-P7-NEXT: vperm v2, v2, v3, v4
; CHECK-BE-P7-NEXT: blr
;
; CHECK-BE-P8-LABEL: test4:
diff --git a/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll
index 11cc8abd2c7fa3..31d0960e19f4ef 100644
--- a/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll
+++ b/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll
@@ -2045,31 +2045,25 @@ define <16 x i8> @test_v4i32_v2i64(ptr nocapture noundef readonly %a, ptr nocapt
;
; CHECK-AIX-32-P8-LABEL: test_v4i32_v2i64:
; CHECK-AIX-32-P8: # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT: lxsiwzx v2, 0, r3
-; CHECK-AIX-32-P8-NEXT: lwz r3, 4(r4)
-; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1)
-; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r4)
-; CHECK-AIX-32-P8-NEXT: stw r3, -32(r1)
-; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16
-; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r3
-; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3
+; CHECK-AIX-32-P8-NEXT: li r5, 4
+; CHECK-AIX-32-P8-NEXT: lfiwzx f1, 0, r4
+; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r3
; CHECK-AIX-32-P8-NEXT: lwz r3, L..C9(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT: lfiwzx f0, r4, r5
; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3
-; CHECK-AIX-32-P8-NEXT: xxmrghw v3, vs1, vs0
-; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4
+; CHECK-AIX-32-P8-NEXT: xxspltw vs1, vs1, 1
+; CHECK-AIX-32-P8-NEXT: xxspltw vs0, vs0, 1
+; CHECK-AIX-32-P8-NEXT: xxmrghw v2, vs1, vs0
+; CHECK-AIX-32-P8-NEXT: vperm v2, v3, v2, v4
; CHECK-AIX-32-P8-NEXT: blr
;
; CHECK-AIX-32-P9-LABEL: test_v4i32_v2i64:
; CHECK-AIX-32-P9: # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT: lfiwzx f0, 0, r3
-; CHECK-AIX-32-P9-NEXT: lwz r3, 4(r4)
-; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1)
-; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r4)
-; CHECK-AIX-32-P9-NEXT: lxv vs1, -16(r1)
-; CHECK-AIX-32-P9-NEXT: stw r3, -32(r1)
+; CHECK-AIX-32-P9-NEXT: li r3, 4
+; CHECK-AIX-32-P9-NEXT: lxvwsx vs2, 0, r4
+; CHECK-AIX-32-P9-NEXT: lxvwsx vs1, r4, r3
; CHECK-AIX-32-P9-NEXT: lwz r3, L..C5(r2) # %const.0
-; CHECK-AIX-32-P9-NEXT: lxv vs2, -32(r1)
; CHECK-AIX-32-P9-NEXT: xxmrghw v2, vs2, vs1
; CHECK-AIX-32-P9-NEXT: lxv vs1, 0(r3)
; CHECK-AIX-32-P9-NEXT: xxperm v2, vs0, vs1
diff --git a/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll
index 8bb71e073e8146..56c8c128ba9f40 100644
--- a/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll
+++ b/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll
@@ -1685,43 +1685,29 @@ define <2 x i64> @test_v2i64_v2i64(ptr nocapture noundef readonly %a, ptr nocapt
;
; CHECK-AIX-32-P8-LABEL: test_v2i64_v2i64:
; CHECK-AIX-32-P8: # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT: lwz r5, 4(r3)
-; CHECK-AIX-32-P8-NEXT: stw r5, -16(r1)
-; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT: stw r3, -32(r1)
-; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16
-; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r3
-; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3
-; CHECK-AIX-32-P8-NEXT: lwz r3, 4(r4)
-; CHECK-AIX-32-P8-NEXT: stw r3, -48(r1)
-; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r4)
-; CHECK-AIX-32-P8-NEXT: stw r3, -64(r1)
-; CHECK-AIX-32-P8-NEXT: addi r3, r1, -48
+; CHECK-AIX-32-P8-NEXT: li r5, 4
+; CHECK-AIX-32-P8-NEXT: lfiwzx f1, 0, r3
+; CHECK-AIX-32-P8-NEXT: lfiwzx f3, 0, r4
+; CHECK-AIX-32-P8-NEXT: lfiwzx f0, r3, r5
+; CHECK-AIX-32-P8-NEXT: lfiwzx f2, r4, r5
+; CHECK-AIX-32-P8-NEXT: xxspltw vs1, vs1, 1
+; CHECK-AIX-32-P8-NEXT: xxspltw vs3, vs3, 1
+; CHECK-AIX-32-P8-NEXT: xxspltw vs0, vs0, 1
+; CHECK-AIX-32-P8-NEXT: xxspltw vs2, vs2, 1
; CHECK-AIX-32-P8-NEXT: xxmrghw v2, vs1, vs0
-; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r3
-; CHECK-AIX-32-P8-NEXT: addi r3, r1, -64
-; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3
-; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs1, vs0
+; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs3, vs2
; CHECK-AIX-32-P8-NEXT: xxmrghd v3, v2, vs0
; CHECK-AIX-32-P8-NEXT: vaddudm v2, v3, v2
; CHECK-AIX-32-P8-NEXT: blr
;
; CHECK-AIX-32-P9-LABEL: test_v2i64_v2i64:
; CHECK-AIX-32-P9: # %bb.0: # %entry
-; CHECK-AIX-32-P9-NEXT: lwz r5, 4(r3)
-; CHECK-AIX-32-P9-NEXT: stw r5, -16(r1)
-; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1)
-; CHECK-AIX-32-P9-NEXT: stw r3, -32(r1)
-; CHECK-AIX-32-P9-NEXT: lwz r3, 4(r4)
-; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1)
-; CHECK-AIX-32-P9-NEXT: stw r3, -48(r1)
-; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r4)
+; CHECK-AIX-32-P9-NEXT: li r5, 4
+; CHECK-AIX-32-P9-NEXT: lxvwsx vs1, 0, r3
+; CHECK-AIX-32-P9-NEXT: lxvwsx vs0, r3, r5
; CHECK-AIX-32-P9-NEXT: xxmrghw v2, vs1, vs0
-; CHECK-AIX-32-P9-NEXT: lxv vs0, -48(r1)
-; CHECK-AIX-32-P9-NEXT: stw r3, -64(r1)
-; CHECK-AIX-32-P9-NEXT: lxv vs1, -64(r1)
+; CHECK-AIX-32-P9-NEXT: lxvwsx vs0, r4, r5
+; CHECK-AIX-32-P9-NEXT: lxvwsx vs1, 0, r4
; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs1, vs0
; CHECK-AIX-32-P9-NEXT: xxmrghd v3, v2, vs0
; CHECK-AIX-32-P9-NEXT: vaddudm v2, v3, v2
diff --git a/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll
index 4ca55d276647bf..c8e7b20e4b8c37 100644
--- a/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll
+++ b/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll
@@ -743,25 +743,21 @@ define void @test_v8i16_v4i32(ptr %a) {
; CHECK-AIX-32-P8-LABEL: test_v8i16_v4i32:
; CHECK-AIX-32-P8: # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT: lhz r4, 0(r3)
-; CHECK-AIX-32-P8-NEXT: sth r4, -32(r1)
-; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32
-; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r4
-; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: sth r4, -16(r1)
+; CHECK-AIX-32-P8-NEXT: lfiwzx f0, 0, r3
; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16
; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3
-; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs0, vs1
+; CHECK-AIX-32-P8-NEXT: xxspltw vs0, vs0, 1
+; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs1, vs0
; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3
; CHECK-AIX-32-P8-NEXT: blr
;
; CHECK-AIX-32-P9-LABEL: test_v8i16_v4i32:
; CHECK-AIX-32-P9: # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT: lhz r4, 0(r3)
-; CHECK-AIX-32-P9-NEXT: sth r4, -32(r1)
-; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT: lxv vs0, -32(r1)
-; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1)
-; CHECK-AIX-32-P9-NEXT: lxv vs1, -16(r1)
+; CHECK-AIX-32-P9-NEXT: sth r4, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxvwsx vs1, 0, r3
; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs0, vs1
; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3)
; CHECK-AIX-32-P9-NEXT: blr
@@ -842,25 +838,21 @@ define void @test_v8i16_v2i64(ptr %a) {
; CHECK-AIX-32-P8-LABEL: test_v8i16_v2i64:
; CHECK-AIX-32-P8: # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT: lhz r4, 0(r3)
-; CHECK-AIX-32-P8-NEXT: sth r4, -32(r1)
-; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32
-; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r4
-; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: sth r4, -16(r1)
+; CHECK-AIX-32-P8-NEXT: lfiwzx f0, 0, r3
; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16
; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3
-; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs0, vs1
+; CHECK-AIX-32-P8-NEXT: xxspltw vs0, vs0, 1
+; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs1, vs0
; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3
; CHECK-AIX-32-P8-NEXT: blr
;
; CHECK-AIX-32-P9-LABEL: test_v8i16_v2i64:
; CHECK-AIX-32-P9: # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT: lhz r4, 0(r3)
-; CHECK-AIX-32-P9-NEXT: sth r4, -32(r1)
-; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT: lxv vs0, -32(r1)
-; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1)
-; CHECK-AIX-32-P9-NEXT: lxv vs1, -16(r1)
+; CHECK-AIX-32-P9-NEXT: sth r4, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxvwsx vs1, 0, r3
; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs0, vs1
; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3)
; CHECK-AIX-32-P9-NEXT: blr
@@ -1030,25 +1022,21 @@ define void @test_v4i32_v8i16(ptr %a) {
; CHECK-AIX-32-P8-LABEL: test_v4i32_v8i16:
; CHECK-AIX-32-P8: # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT: lhz r4, 0(r3)
-; CHECK-AIX-32-P8-NEXT: sth r4, -32(r1)
-; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32
-; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r4
-; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: sth r4, -16(r1)
+; CHECK-AIX-32-P8-NEXT: lfiwzx f0, 0, r3
; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16
; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3
-; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs1, vs0
+; CHECK-AIX-32-P8-NEXT: xxspltw vs0, vs0, 1
+; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs0, vs1
; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3
; CHECK-AIX-32-P8-NEXT: blr
;
; CHECK-AIX-32-P9-LABEL: test_v4i32_v8i16:
; CHECK-AIX-32-P9: # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT: lhz r4, 0(r3)
-; CHECK-AIX-32-P9-NEXT: sth r4, -32(r1)
-; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT: lxv vs0, -32(r1)
-; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1)
-; CHECK-AIX-32-P9-NEXT: lxv vs1, -16(r1)
+; CHECK-AIX-32-P9-NEXT: sth r4, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxvwsx vs1, 0, r3
; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs1, vs0
; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3)
; CHECK-AIX-32-P9-NEXT: blr
@@ -1125,26 +1113,18 @@ define void @test_v4i32_v2i64(ptr %a) {
;
; CHECK-AIX-32-P8-LABEL: test_v4i32_v2i64:
; CHECK-AIX-32-P8: # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT: lwz r4, 0(r3)
-; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT: stw r3, -32(r1)
-; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16
-; CHECK-AIX-32-P8-NEXT: stw r4, -16(r1)
-; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r3
-; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3
+; CHECK-AIX-32-P8-NEXT: lfiwzx f0, 0, r3
+; CHECK-AIX-32-P8-NEXT: lfiwzx f1, 0, r3
+; CHECK-AIX-32-P8-NEXT: xxspltw vs0, vs0, 1
+; CHECK-AIX-32-P8-NEXT: xxspltw vs1, vs1, 1
; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs1, vs0
; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3
; CHECK-AIX-32-P8-NEXT: blr
;
; CHECK-AIX-32-P9-LABEL: test_v4i32_v2i64:
; CHECK-AIX-32-P9: # %bb.0: # %entry
-; CHECK-AIX-32-P9-NEXT: lwz r4, 0(r3)
-; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT: stw r4, -16(r1)
-; CHECK-AIX-32-P9-NEXT: stw r3, -32(r1)
-; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1)
-; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lxvwsx vs0, 0, r3
+; CHECK-AIX-32-P9-NEXT: lxvwsx vs1, 0, r3
; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs1, vs0
; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3)
; CHECK-AIX-32-P9-NEXT: blr
@@ -1212,14 +1192,11 @@ define void @test_v2i64_v2i64(ptr %a) {
;
; CHECK-AIX-32-P8-LABEL: test_v2i64_v2i64:
; CHECK-AIX-32-P8: # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT: lwz r4, 4(r3)
-; CHECK-AIX-32-P8-NEXT: stw r4, -16(r1)
-; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT: stw r3, -32(r1)
-; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16
-; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r3
-; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3
+; CHECK-AIX-32-P8-NEXT: li r4, 4
+; CHECK-AIX-32-P8-NEXT: lfiwzx f1, 0, r3
+; CHECK-AIX-32-P8-NEXT: lfiwzx f0, r3, r4
+; CHECK-AIX-32-P8-NEXT: xxspltw vs1, vs1, 1
+; CHECK-AIX-32-P8-NEXT: xxspltw vs0, vs0, 1
; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs1, vs0
; CHECK-AIX-32-P8-NEXT: lfiwzx f1, 0, r3
; CHECK-AIX-32-P8-NEXT: xxspltw vs1, vs1, 1
@@ -1229,12 +1206,9 @@ define void @test_v2i64_v2i64(ptr %a) {
;
; CHECK-AIX-32-P9-LABEL: test_v2i64_v2i64:
; CHECK-AIX-32-P9: # %bb.0: # %entry
-; CHECK-AIX-32-P9-NEXT: lwz r4, 4(r3)
-; CHECK-AIX-32-P9-NEXT: stw r4, -16(r1)
-; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1)
-; CHECK-AIX-32-P9-NEXT: stw r3, -32(r1)
-; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1)
+; CHECK-AIX-32-P9-NEXT: li r4, 4
+; CHECK-AIX-32-P9-NEXT: lxvwsx vs1, 0, r3
+; CHECK-AIX-32-P9-NEXT: lxvwsx vs0, r3, r4
; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs1, vs0
; CHECK-AIX-32-P9-NEXT: lxvwsx vs1, 0, r3
; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs1, vs0
@@ -1308,26 +1282,18 @@ define void @test_v2i64_v4i32(ptr %a) {
;
; CHECK-AIX-32-P8-LABEL: test_v2i64_v4i32:
; CHECK-AIX-32-P8: # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT: lwz r4, 0(r3)
-; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1)
-; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16
-; CHECK-AIX-32-P8-NEXT: stw r4, -32(r1)
-; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r3
-; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3
+; CHECK-AIX-32-P8-NEXT: lfiwzx f0, 0, r3
+; CHECK-AIX-32-P8-NEXT: lfiwzx f1, 0, r3
+; CHECK-AIX-32-P8-NEXT: xxspltw vs0, vs0, 1
+; CHECK-AIX-32-P8-NEXT: xxspltw vs1, vs1, 1
; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs1, vs0
; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3
; CHECK-AIX-32-P8-NEXT: blr
;
; CHECK-AIX-32-P9-LABEL: test_v2i64_v4i32:
; CHECK-AIX-32-P9: # %bb.0: # %entry
-; CHECK-AIX-32-P9-NEXT: lwz r4, 0(r3)
-; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1)
-; CHECK-AIX-32-P9-NEXT: stw r4, -32(r1)
-; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1)
-; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lxvwsx vs0, 0, r3
+; CHECK-AIX-32-P9-NEXT: lxvwsx vs1, 0, r3
; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs1, vs0
; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3)
; CHECK-AIX-32-P9-NEXT: blr
@@ -1407,25 +1373,21 @@ define void @test_v2i64_v8i16(ptr %a) {
; CHECK-AIX-32-P8-LABEL: test_v2i64_v8i16:
; CHECK-AIX-32-P8: # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT: lhz r4, 0(r3)
-; CHECK-AIX-32-P8-NEXT: sth r4, -32(r1)
-; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32
-; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r4
-; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: sth r4, -16(r1)
+; CHECK-AIX-32-P8-NEXT: lfiwzx f0, 0, r3
; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16
; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3
-; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs1, vs0
+; CHECK-AIX-32-P8-NEXT: xxspltw vs0, vs0, 1
+; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs0, vs1
; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3
; CHECK-AIX-32-P8-NEXT: blr
;
; CHECK-AIX-32-P9-LABEL: test_v2i64_v8i16:
; CHECK-AIX-32-P9: # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT: lhz r4, 0(r3)
-; CHECK-AIX-32-P9-NEXT: sth r4, -32(r1)
-; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT: lxv vs0, -32(r1)
-; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1)
-; CHECK-AIX-32-P9-NEXT: lxv vs1, -16(r1)
+; CHECK-AIX-32-P9-NEXT: sth r4, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxvwsx vs1, 0, r3
; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs1, vs0
; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3)
; CHECK-AIX-32-P9-NEXT: blr
diff --git a/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll
index 201bc5be545068..e1aa531db449e5 100644
--- a/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll
+++ b/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll
@@ -654,17 +654,14 @@ define void @test_v2i64_none(ptr nocapture readonly %ptr1) {
;
; CHECK-AIX-32-P8-LABEL: test_v2i64_none:
; CHECK-AIX-32-P8: # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT: lwz r4, 4(r3)
+; CHECK-AIX-32-P8-NEXT: li r4, 4
+; CHECK-AIX-32-P8-NEXT: lfiwzx f1, 0, r3
; CHECK-AIX-32-P8-NEXT: xxlxor v4, v4, v4
-; CHECK-AIX-32-P8-NEXT: stw r4, -16(r1)
-; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT: stw r3, -32(r1)
-; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16
-; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r3
-; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3
+; CHECK-AIX-32-P8-NEXT: lfiwzx f0, r3, r4
; CHECK-AIX-32-P8-NEXT: lwz r3, L..C6(r2) # %const.0
; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3
+; CHECK-AIX-32-P8-NEXT: xxspltw vs1, vs1, 1
+; CHECK-AIX-32-P8-NEXT: xxspltw vs0, vs0, 1
; CHECK-AIX-32-P8-NEXT: xxmrghw v2, vs1, vs0
; CHECK-AIX-32-P8-NEXT: vperm v2, v4, v2, v3
; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r3
@@ -672,14 +669,11 @@ define void @test_v2i64_none(ptr nocapture readonly %ptr1) {
;
; CHECK-AIX-32-P9-LABEL: test_v2i64_none:
; CHECK-AIX-32-P9: # %bb.0: # %entry
-; CHECK-AIX-32-P9-NEXT: lwz r4, 4(r3)
+; CHECK-AIX-32-P9-NEXT: li r4, 4
+; CHECK-AIX-32-P9-NEXT: lxvwsx vs1, 0, r3
; CHECK-AIX-32-P9-NEXT: xxlxor vs2, vs2, vs2
-; CHECK-AIX-32-P9-NEXT: stw r4, -16(r1)
-; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1)
-; CHECK-AIX-32-P9-NEXT: stw r3, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lxvwsx vs0, r3, r4
; CHECK-AIX-32-P9-NEXT: lwz r3, L..C5(r2) # %const.0
-; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1)
; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs1, vs0
; CHECK-AIX-32-P9-NEXT: lxv vs1, 0(r3)
; CHECK-AIX-32-P9-NEXT: xxperm vs0, vs2, vs1
@@ -847,24 +841,20 @@ define <16 x i8> @test_v8i16_v4i32(ptr %a, ptr %b) local_unnamed_addr {
; CHECK-AIX-32-P8-LABEL: test_v8i16_v4i32:
; CHECK-AIX-32-P8: # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT: lhz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT: sth r3, -32(r1)
-; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r3
-; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r4)
-; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1)
; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT: lfiwzx f0, 0, r4
; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3
-; CHECK-AIX-32-P8-NEXT: vmrghh v2, v2, v3
+; CHECK-AIX-32-P8-NEXT: xxspltw v2, vs0, 1
+; CHECK-AIX-32-P8-NEXT: vmrghh v2, v3, v2
; CHECK-AIX-32-P8-NEXT: blr
;
; CHECK-AIX-32-P9-LABEL: test_v8i16_v4i32:
; CHECK-AIX-32-P9: # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT: lhz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT: sth r3, -32(r1)
-; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r4)
-; CHECK-AIX-32-P9-NEXT: lxv v2, -32(r1)
-; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1)
-; CHECK-AIX-32-P9-NEXT: lxv v3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: sth r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv v2, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxvwsx v3, 0, r4
; CHECK-AIX-32-P9-NEXT: vmrghh v2, v2, v3
; CHECK-AIX-32-P9-NEXT: blr
entry:
@@ -937,24 +927,20 @@ define <16 x i8> @test_v8i16_v2i64(ptr %a, ptr %b) local_unnamed_addr {
; CHECK-AIX-32-P8-LABEL: test_v8i16_v2i64:
; CHECK-AIX-32-P8: # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT: lhz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT: sth r3, -32(r1)
-; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r3
-; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r4)
-; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1)
; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT: lfiwzx f0, 0, r4
; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3
-; CHECK-AIX-32-P8-NEXT: vmrghh v2, v2, v3
+; CHECK-AIX-32-P8-NEXT: xxspltw v2, vs0, 1
+; CHECK-AIX-32-P8-NEXT: vmrghh v2, v3, v2
; CHECK-AIX-32-P8-NEXT: blr
;
; CHECK-AIX-32-P9-LABEL: test_v8i16_v2i64:
; CHECK-AIX-32-P9: # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT: lhz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT: sth r3, -32(r1)
-; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r4)
-; CHECK-AIX-32-P9-NEXT: lxv v2, -32(r1)
-; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1)
-; CHECK-AIX-32-P9-NEXT: lxv v3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: sth r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv v2, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxvwsx v3, 0, r4
; CHECK-AIX-32-P9-NEXT: vmrghh v2, v2, v3
; CHECK-AIX-32-P9-NEXT: blr
entry:
@@ -1149,24 +1135,20 @@ define <16 x i8> @test_v4i32_v8i16(ptr %a, ptr %b) local_unnamed_addr {
; CHECK-AIX-32-P8-LABEL: test_v4i32_v8i16:
; CHECK-AIX-32-P8: # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT: lhz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT: sth r3, -32(r1)
-; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r3
-; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r4)
-; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1)
; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT: lfiwzx f0, 0, r4
; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3
-; CHECK-AIX-32-P8-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-32-P8-NEXT: xxspltw v2, vs0, 1
+; CHECK-AIX-32-P8-NEXT: vmrghh v2, v2, v3
; CHECK-AIX-32-P8-NEXT: blr
;
; CHECK-AIX-32-P9-LABEL: test_v4i32_v8i16:
; CHECK-AIX-32-P9: # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT: lhz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT: sth r3, -32(r1)
-; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r4)
-; CHECK-AIX-32-P9-NEXT: lxv v2, -32(r1)
-; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1)
-; CHECK-AIX-32-P9-NEXT: lxv v3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: sth r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv v2, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxvwsx v3, 0, r4
; CHECK-AIX-32-P9-NEXT: vmrghh v2, v3, v2
; CHECK-AIX-32-P9-NEXT: blr
entry:
@@ -1519,24 +1501,20 @@ define <16 x i8> @test_v2i64_v8i16(ptr %a, ptr %b) local_unnamed_addr {
; CHECK-AIX-32-P8-LABEL: test_v2i64_v8i16:
; CHECK-AIX-32-P8: # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT: lhz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT: sth r3, -32(r1)
-; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r3
-; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r4)
-; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1)
; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT: lfiwzx f0, 0, r4
; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3
-; CHECK-AIX-32-P8-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-32-P8-NEXT: xxspltw v2, vs0, 1
+; CHECK-AIX-32-P8-NEXT: vmrghh v2, v2, v3
; CHECK-AIX-32-P8-NEXT: blr
;
; CHECK-AIX-32-P9-LABEL: test_v2i64_v8i16:
; CHECK-AIX-32-P9: # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT: lhz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT: sth r3, -32(r1)
-; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r4)
-; CHECK-AIX-32-P9-NEXT: lxv v2, -32(r1)
-; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1)
-; CHECK-AIX-32-P9-NEXT: lxv v3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: sth r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv v2, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxvwsx v3, 0, r4
; CHECK-AIX-32-P9-NEXT: vmrghh v2, v3, v2
; CHECK-AIX-32-P9-NEXT: blr
entry:
More information about the llvm-commits
mailing list