[llvm] [PowerPC] Use lxvp/stxvp for v256i1 types (PR #184447)
Lei Huang via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 3 21:12:37 PST 2026
https://github.com/lei137 updated https://github.com/llvm/llvm-project/pull/184447
>From 5d37cf4cf0cb679289ba1e45ee1090c952ef8af8 Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Tue, 3 Mar 2026 15:45:59 -0500
Subject: [PATCH 1/3] [PowerPC] Use lxvp/stxvp for v256i1 types
Added patterns to use paired vector instructions (lxvp/lxvpx/stxvp/stxvpx)
for v256i1 operations instead of splitting into two separate vector operations.
Assistend by AI.
---
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 39 +++--
llvm/lib/Target/PowerPC/PPCInstrFuture.td | 8 +
.../test/CodeGen/PowerPC/dmf-outer-product.ll | 156 ++++++------------
llvm/test/CodeGen/PowerPC/dmr-copy.ll | 20 +--
llvm/test/CodeGen/PowerPC/dmr-enable.ll | 72 +++-----
llvm/test/CodeGen/PowerPC/dmr-spill.ll | 9 +-
.../CodeGen/PowerPC/mma-acc-copy-hints.ll | 20 ++-
llvm/test/CodeGen/PowerPC/mma-acc-memops.ll | 32 ++--
.../test/CodeGen/PowerPC/mma-outer-product.ll | 60 +++----
.../CodeGen/PowerPC/vsx-ldst-with-length.ll | 24 +--
10 files changed, 175 insertions(+), 265 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 84d26448a7f4f..3b2b134c64149 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -12107,6 +12107,14 @@ SDValue PPCTargetLowering::DMFInsert1024(const SmallVectorImpl<SDValue> &Pairs,
0);
}
+static bool isPCRelNode(SDValue N) {
+ return (N.getOpcode() == PPCISD::MAT_PCREL_ADDR ||
+ isValidPCRelNode<ConstantPoolSDNode>(N) ||
+ isValidPCRelNode<GlobalAddressSDNode>(N) ||
+ isValidPCRelNode<JumpTableSDNode>(N) ||
+ isValidPCRelNode<BlockAddressSDNode>(N));
+}
+
SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -12122,12 +12130,20 @@ SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
return Op;
// Type v256i1 is used for pairs and v512i1 is used for accumulators.
- // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
- // 2 or 4 vsx registers.
assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
"Type unsupported without MMA");
assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
"Type unsupported without paired vector support");
+
+ // For v256i1 on ISA Future, let the load go through to instruction selection
+ // where it will be matched to lxvp by the instruction patterns, unless it's
+ // a PC-relative load which should use plxv instead.
+ if (VT == MVT::v256i1 && Subtarget.isISAFuture() &&
+ !isPCRelNode(LN->getBasePtr()))
+ return Op;
+
+ // For other cases, create 2 or 4 v16i8 loads to load the pair or accumulator
+ // value in 2 or 4 vsx registers.
Align Alignment = LN->getAlign();
SmallVector<SDValue, 4> Loads;
SmallVector<SDValue, 4> LoadChains;
@@ -12290,12 +12306,20 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
return Op;
// Type v256i1 is used for pairs and v512i1 is used for accumulators.
- // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
- // underlying registers individually.
assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
"Type unsupported without MMA");
assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
"Type unsupported without paired vector support");
+
+ // For v256i1 on ISA Future, let the store go through to instruction selection
+ // where it will be matched to stxvp by the instruction patterns, unless it's
+ // a PC-relative store which should use pstxv instead.
+ if (StoreVT == MVT::v256i1 && Subtarget.isISAFuture() &&
+ !isPCRelNode(SN->getBasePtr()))
+ return Op;
+
+ // For other cases, create 2 or 4 v16i8 stores to store the pair or
+ // accumulator underlying registers individually.
Align Alignment = SN->getAlign();
SmallVector<SDValue, 4> Stores;
unsigned NumVecs = 2;
@@ -20047,13 +20071,6 @@ static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
}
}
-static bool isPCRelNode(SDValue N) {
- return (N.getOpcode() == PPCISD::MAT_PCREL_ADDR ||
- isValidPCRelNode<ConstantPoolSDNode>(N) ||
- isValidPCRelNode<GlobalAddressSDNode>(N) ||
- isValidPCRelNode<JumpTableSDNode>(N) ||
- isValidPCRelNode<BlockAddressSDNode>(N));
-}
/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
/// the address flags of the load/store instruction that is to be matched.
diff --git a/llvm/lib/Target/PowerPC/PPCInstrFuture.td b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
index 717454f78e2a4..2b17bfc4b584b 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrFuture.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
@@ -596,6 +596,14 @@ def : Pat<(int_ppc_vsx_stxvprl v256i1:$XTp, addr:$RA, i64:$RB), (STXVPRL $XTp,
$RA, $RB)>;
def : Pat<(int_ppc_vsx_stxvprll v256i1:$XTp, addr:$RA, i64:$RB), (STXVPRLL $XTp,
$RA, $RB)>;
+
+// Regular load/store patterns for v256i1 (for ISA Future)
+let Predicates = [PairedVectorMemops, IsISAFuture] in {
+ def : Pat<(v256i1 (load xoaddr:$src)), (LXVPX xoaddr:$src)>;
+ def : Pat<(v256i1 (load iaddrX16:$src)), (LXVP iaddrX16:$src)>;
+ def : Pat<(store v256i1:$XSp, xoaddr:$dst), (STXVPX $XSp, xoaddr:$dst)>;
+ def : Pat<(store v256i1:$XSp, iaddrX16:$dst), (STXVP $XSp, iaddrX16:$dst)>;
+}
let Predicates = [HasVSX, IsISAFuture] in {
def : Pat<(v4i32 (rotl v4i32:$vA, v4i32:$vB)), (v4i32 (XVRLW v4i32:$vA,
v4i32:$vB))>;
diff --git a/llvm/test/CodeGen/PowerPC/dmf-outer-product.ll b/llvm/test/CodeGen/PowerPC/dmf-outer-product.ll
index ebdc4390bac28..0e7837d5ee659 100644
--- a/llvm/test/CodeGen/PowerPC/dmf-outer-product.ll
+++ b/llvm/test/CodeGen/PowerPC/dmf-outer-product.ll
@@ -11,9 +11,8 @@ declare <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4(<256 x i1>, <16 x i8>)
define void @test_dmxvi8gerx4(ptr %vpp, ptr %vcp, ptr %resp) {
; CHECK-LABEL: test_dmxvi8gerx4:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: lxv v2, 16(r3)
+; CHECK-NEXT: lxvpx vsp34, 0, r3
; CHECK-NEXT: lxv vs0, 0(r4)
-; CHECK-NEXT: lxv v3, 0(r3)
; CHECK-NEXT: dmxvi8gerx4 dmr0, vsp34, vs0
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-NEXT: stxvp vsp34, 96(r5)
@@ -25,9 +24,8 @@ define void @test_dmxvi8gerx4(ptr %vpp, ptr %vcp, ptr %resp) {
;
; CHECK-BE-LABEL: test_dmxvi8gerx4:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv v2, 0(r3)
+; CHECK-BE-NEXT: lxvpx vsp34, 0, r3
; CHECK-BE-NEXT: lxv vs0, 0(r4)
-; CHECK-BE-NEXT: lxv v3, 16(r3)
; CHECK-BE-NEXT: dmxvi8gerx4 dmr0, vsp34, vs0
; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; CHECK-BE-NEXT: stxvp vsp36, 96(r5)
@@ -53,8 +51,7 @@ define void @test_dmxvi8gerx4pp(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
; CHECK-NEXT: lxvp vsp36, 32(r3)
; CHECK-NEXT: lxvp vsp32, 64(r3)
; CHECK-NEXT: lxvp vsp38, 96(r3)
-; CHECK-NEXT: lxv v8, 16(r4)
-; CHECK-NEXT: lxv v9, 0(r4)
+; CHECK-NEXT: lxvpx vsp40, 0, r4
; CHECK-NEXT: lxv vs0, 0(r5)
; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -73,8 +70,7 @@ define void @test_dmxvi8gerx4pp(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
; CHECK-BE-NEXT: lxvp vsp32, 32(r3)
; CHECK-BE-NEXT: lxvp vsp38, 0(r3)
-; CHECK-BE-NEXT: lxv v8, 0(r4)
-; CHECK-BE-NEXT: lxv v9, 16(r4)
+; CHECK-BE-NEXT: lxvpx vsp40, 0, r4
; CHECK-BE-NEXT: lxv vs0, 0(r5)
; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -104,8 +100,7 @@ define void @test_dmxvi8gerx4spp(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
; CHECK-NEXT: lxvp vsp36, 32(r3)
; CHECK-NEXT: lxvp vsp32, 64(r3)
; CHECK-NEXT: lxvp vsp38, 96(r3)
-; CHECK-NEXT: lxv v8, 16(r4)
-; CHECK-NEXT: lxv v9, 0(r4)
+; CHECK-NEXT: lxvpx vsp40, 0, r4
; CHECK-NEXT: lxv vs0, 0(r5)
; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -124,8 +119,7 @@ define void @test_dmxvi8gerx4spp(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
; CHECK-BE-NEXT: lxvp vsp32, 32(r3)
; CHECK-BE-NEXT: lxvp vsp38, 0(r3)
-; CHECK-BE-NEXT: lxv v8, 0(r4)
-; CHECK-BE-NEXT: lxv v9, 16(r4)
+; CHECK-BE-NEXT: lxvpx vsp40, 0, r4
; CHECK-BE-NEXT: lxv vs0, 0(r5)
; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -155,8 +149,7 @@ define void @test_pmdmxvi8gerx4pp(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
; CHECK-NEXT: lxvp vsp36, 32(r3)
; CHECK-NEXT: lxvp vsp32, 64(r3)
; CHECK-NEXT: lxvp vsp38, 96(r3)
-; CHECK-NEXT: lxv v8, 16(r4)
-; CHECK-NEXT: lxv v9, 0(r4)
+; CHECK-NEXT: lxvpx vsp40, 0, r4
; CHECK-NEXT: lxv vs0, 0(r5)
; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -175,8 +168,7 @@ define void @test_pmdmxvi8gerx4pp(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
; CHECK-BE-NEXT: lxvp vsp32, 32(r3)
; CHECK-BE-NEXT: lxvp vsp38, 0(r3)
-; CHECK-BE-NEXT: lxv v8, 0(r4)
-; CHECK-BE-NEXT: lxv v9, 16(r4)
+; CHECK-BE-NEXT: lxvpx vsp40, 0, r4
; CHECK-BE-NEXT: lxv vs0, 0(r5)
; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -202,9 +194,8 @@ declare <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4(<256 x i1>, <16 x i8>, i32, i32,
define void @test_pmdmxvi8gerx4(ptr %vpp, ptr %vcp, ptr %resp) {
; CHECK-LABEL: test_pmdmxvi8gerx4:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: lxv v2, 16(r3)
+; CHECK-NEXT: lxvpx vsp34, 0, r3
; CHECK-NEXT: lxv vs0, 0(r4)
-; CHECK-NEXT: lxv v3, 0(r3)
; CHECK-NEXT: pmdmxvi8gerx4 dmr0, vsp34, vs0, 55, 5, 10
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-NEXT: stxvp vsp34, 96(r5)
@@ -216,9 +207,8 @@ define void @test_pmdmxvi8gerx4(ptr %vpp, ptr %vcp, ptr %resp) {
;
; CHECK-BE-LABEL: test_pmdmxvi8gerx4:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv v2, 0(r3)
+; CHECK-BE-NEXT: lxvpx vsp34, 0, r3
; CHECK-BE-NEXT: lxv vs0, 0(r4)
-; CHECK-BE-NEXT: lxv v3, 16(r3)
; CHECK-BE-NEXT: pmdmxvi8gerx4 dmr0, vsp34, vs0, 55, 5, 10
; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; CHECK-BE-NEXT: stxvp vsp36, 96(r5)
@@ -244,8 +234,7 @@ define dso_local void @test_pmdmxvi8gerx4spp(ptr %vop, ptr %vpp, ptr %vcp, ptr %
; CHECK-NEXT: lxvp vsp36, 32(r3)
; CHECK-NEXT: lxvp vsp32, 64(r3)
; CHECK-NEXT: lxvp vsp38, 96(r3)
-; CHECK-NEXT: lxv v8, 16(r4)
-; CHECK-NEXT: lxv v9, 0(r4)
+; CHECK-NEXT: lxvpx vsp40, 0, r4
; CHECK-NEXT: lxv vs0, 0(r5)
; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -264,8 +253,7 @@ define dso_local void @test_pmdmxvi8gerx4spp(ptr %vop, ptr %vpp, ptr %vcp, ptr %
; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
; CHECK-BE-NEXT: lxvp vsp32, 32(r3)
; CHECK-BE-NEXT: lxvp vsp38, 0(r3)
-; CHECK-BE-NEXT: lxv v8, 0(r4)
-; CHECK-BE-NEXT: lxv v9, 16(r4)
+; CHECK-BE-NEXT: lxvpx vsp40, 0, r4
; CHECK-BE-NEXT: lxv vs0, 0(r5)
; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -291,9 +279,8 @@ declare <1024 x i1> @llvm.ppc.mma.dmxvbf16gerx2(<256 x i1>, <16 x i8>)
define void @test_dmxvbf16gerx2(ptr %vpp, ptr %vcp, ptr %resp) {
; CHECK-LABEL: test_dmxvbf16gerx2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: lxv v2, 16(r3)
+; CHECK-NEXT: lxvpx vsp34, 0, r3
; CHECK-NEXT: lxv vs0, 0(r4)
-; CHECK-NEXT: lxv v3, 0(r3)
; CHECK-NEXT: dmxvbf16gerx2 dmr0, vsp34, vs0
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-NEXT: stxvp vsp34, 96(r5)
@@ -305,9 +292,8 @@ define void @test_dmxvbf16gerx2(ptr %vpp, ptr %vcp, ptr %resp) {
;
; CHECK-BE-LABEL: test_dmxvbf16gerx2:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv v2, 0(r3)
+; CHECK-BE-NEXT: lxvpx vsp34, 0, r3
; CHECK-BE-NEXT: lxv vs0, 0(r4)
-; CHECK-BE-NEXT: lxv v3, 16(r3)
; CHECK-BE-NEXT: dmxvbf16gerx2 dmr0, vsp34, vs0
; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; CHECK-BE-NEXT: stxvp vsp36, 96(r5)
@@ -333,8 +319,7 @@ define void @test_dmxvbf16gerx2pp(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
; CHECK-NEXT: lxvp vsp36, 32(r3)
; CHECK-NEXT: lxvp vsp32, 64(r3)
; CHECK-NEXT: lxvp vsp38, 96(r3)
-; CHECK-NEXT: lxv v8, 16(r4)
-; CHECK-NEXT: lxv v9, 0(r4)
+; CHECK-NEXT: lxvpx vsp40, 0, r4
; CHECK-NEXT: lxv vs0, 0(r5)
; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -353,8 +338,7 @@ define void @test_dmxvbf16gerx2pp(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
; CHECK-BE-NEXT: lxvp vsp32, 32(r3)
; CHECK-BE-NEXT: lxvp vsp38, 0(r3)
-; CHECK-BE-NEXT: lxv v8, 0(r4)
-; CHECK-BE-NEXT: lxv v9, 16(r4)
+; CHECK-BE-NEXT: lxvpx vsp40, 0, r4
; CHECK-BE-NEXT: lxv vs0, 0(r5)
; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -384,8 +368,7 @@ define void @test_dmxvbf16gerx2pn(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
; CHECK-NEXT: lxvp vsp36, 32(r3)
; CHECK-NEXT: lxvp vsp32, 64(r3)
; CHECK-NEXT: lxvp vsp38, 96(r3)
-; CHECK-NEXT: lxv v8, 16(r4)
-; CHECK-NEXT: lxv v9, 0(r4)
+; CHECK-NEXT: lxvpx vsp40, 0, r4
; CHECK-NEXT: lxv vs0, 0(r5)
; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -404,8 +387,7 @@ define void @test_dmxvbf16gerx2pn(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
; CHECK-BE-NEXT: lxvp vsp32, 32(r3)
; CHECK-BE-NEXT: lxvp vsp38, 0(r3)
-; CHECK-BE-NEXT: lxv v8, 0(r4)
-; CHECK-BE-NEXT: lxv v9, 16(r4)
+; CHECK-BE-NEXT: lxvpx vsp40, 0, r4
; CHECK-BE-NEXT: lxv vs0, 0(r5)
; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -435,8 +417,7 @@ define void @test_dmxvbf16gerx2np(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
; CHECK-NEXT: lxvp vsp36, 32(r3)
; CHECK-NEXT: lxvp vsp32, 64(r3)
; CHECK-NEXT: lxvp vsp38, 96(r3)
-; CHECK-NEXT: lxv v8, 16(r4)
-; CHECK-NEXT: lxv v9, 0(r4)
+; CHECK-NEXT: lxvpx vsp40, 0, r4
; CHECK-NEXT: lxv vs0, 0(r5)
; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -455,8 +436,7 @@ define void @test_dmxvbf16gerx2np(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
; CHECK-BE-NEXT: lxvp vsp32, 32(r3)
; CHECK-BE-NEXT: lxvp vsp38, 0(r3)
-; CHECK-BE-NEXT: lxv v8, 0(r4)
-; CHECK-BE-NEXT: lxv v9, 16(r4)
+; CHECK-BE-NEXT: lxvpx vsp40, 0, r4
; CHECK-BE-NEXT: lxv vs0, 0(r5)
; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -486,8 +466,7 @@ define void @test_dmxvbf16gerx2nn(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
; CHECK-NEXT: lxvp vsp36, 32(r3)
; CHECK-NEXT: lxvp vsp32, 64(r3)
; CHECK-NEXT: lxvp vsp38, 96(r3)
-; CHECK-NEXT: lxv v8, 16(r4)
-; CHECK-NEXT: lxv v9, 0(r4)
+; CHECK-NEXT: lxvpx vsp40, 0, r4
; CHECK-NEXT: lxv vs0, 0(r5)
; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -506,8 +485,7 @@ define void @test_dmxvbf16gerx2nn(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
; CHECK-BE-NEXT: lxvp vsp32, 32(r3)
; CHECK-BE-NEXT: lxvp vsp38, 0(r3)
-; CHECK-BE-NEXT: lxv v8, 0(r4)
-; CHECK-BE-NEXT: lxv v9, 16(r4)
+; CHECK-BE-NEXT: lxvpx vsp40, 0, r4
; CHECK-BE-NEXT: lxv vs0, 0(r5)
; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -533,9 +511,8 @@ declare <1024 x i1> @llvm.ppc.mma.pmdmxvbf16gerx2(<256 x i1>, <16 x i8>, i32, i3
define void @test_pmdmxvbf16gerx2(ptr %vpp, ptr %vcp, ptr %resp) {
; CHECK-LABEL: test_pmdmxvbf16gerx2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: lxv v2, 16(r3)
+; CHECK-NEXT: lxvpx vsp34, 0, r3
; CHECK-NEXT: lxv vs0, 0(r4)
-; CHECK-NEXT: lxv v3, 0(r3)
; CHECK-NEXT: pmdmxvbf16gerx2 dmr0, vsp34, vs0, 33, 5, 2
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-NEXT: stxvp vsp34, 96(r5)
@@ -547,9 +524,8 @@ define void @test_pmdmxvbf16gerx2(ptr %vpp, ptr %vcp, ptr %resp) {
;
; CHECK-BE-LABEL: test_pmdmxvbf16gerx2:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv v2, 0(r3)
+; CHECK-BE-NEXT: lxvpx vsp34, 0, r3
; CHECK-BE-NEXT: lxv vs0, 0(r4)
-; CHECK-BE-NEXT: lxv v3, 16(r3)
; CHECK-BE-NEXT: pmdmxvbf16gerx2 dmr0, vsp34, vs0, 33, 5, 2
; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; CHECK-BE-NEXT: stxvp vsp36, 96(r5)
@@ -575,8 +551,7 @@ define void @test_pmdmxvbf16gerx2pp(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
; CHECK-NEXT: lxvp vsp36, 32(r3)
; CHECK-NEXT: lxvp vsp32, 64(r3)
; CHECK-NEXT: lxvp vsp38, 96(r3)
-; CHECK-NEXT: lxv v8, 16(r4)
-; CHECK-NEXT: lxv v9, 0(r4)
+; CHECK-NEXT: lxvpx vsp40, 0, r4
; CHECK-NEXT: lxv vs0, 0(r5)
; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -595,8 +570,7 @@ define void @test_pmdmxvbf16gerx2pp(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
; CHECK-BE-NEXT: lxvp vsp32, 32(r3)
; CHECK-BE-NEXT: lxvp vsp38, 0(r3)
-; CHECK-BE-NEXT: lxv v8, 0(r4)
-; CHECK-BE-NEXT: lxv v9, 16(r4)
+; CHECK-BE-NEXT: lxvpx vsp40, 0, r4
; CHECK-BE-NEXT: lxv vs0, 0(r5)
; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -626,8 +600,7 @@ define void @test_pmdmxvbf16gerx2pn(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
; CHECK-NEXT: lxvp vsp36, 32(r3)
; CHECK-NEXT: lxvp vsp32, 64(r3)
; CHECK-NEXT: lxvp vsp38, 96(r3)
-; CHECK-NEXT: lxv v8, 16(r4)
-; CHECK-NEXT: lxv v9, 0(r4)
+; CHECK-NEXT: lxvpx vsp40, 0, r4
; CHECK-NEXT: lxv vs0, 0(r5)
; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -646,8 +619,7 @@ define void @test_pmdmxvbf16gerx2pn(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
; CHECK-BE-NEXT: lxvp vsp32, 32(r3)
; CHECK-BE-NEXT: lxvp vsp38, 0(r3)
-; CHECK-BE-NEXT: lxv v8, 0(r4)
-; CHECK-BE-NEXT: lxv v9, 16(r4)
+; CHECK-BE-NEXT: lxvpx vsp40, 0, r4
; CHECK-BE-NEXT: lxv vs0, 0(r5)
; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -677,8 +649,7 @@ define void @test_pmdmxvbf16gerx2np(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
; CHECK-NEXT: lxvp vsp36, 32(r3)
; CHECK-NEXT: lxvp vsp32, 64(r3)
; CHECK-NEXT: lxvp vsp38, 96(r3)
-; CHECK-NEXT: lxv v8, 16(r4)
-; CHECK-NEXT: lxv v9, 0(r4)
+; CHECK-NEXT: lxvpx vsp40, 0, r4
; CHECK-NEXT: lxv vs0, 0(r5)
; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -697,8 +668,7 @@ define void @test_pmdmxvbf16gerx2np(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
; CHECK-BE-NEXT: lxvp vsp32, 32(r3)
; CHECK-BE-NEXT: lxvp vsp38, 0(r3)
-; CHECK-BE-NEXT: lxv v8, 0(r4)
-; CHECK-BE-NEXT: lxv v9, 16(r4)
+; CHECK-BE-NEXT: lxvpx vsp40, 0, r4
; CHECK-BE-NEXT: lxv vs0, 0(r5)
; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -728,8 +698,7 @@ define void @test_pmdmxvbf16gerx2nn(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
; CHECK-NEXT: lxvp vsp36, 32(r3)
; CHECK-NEXT: lxvp vsp32, 64(r3)
; CHECK-NEXT: lxvp vsp38, 96(r3)
-; CHECK-NEXT: lxv v8, 16(r4)
-; CHECK-NEXT: lxv v9, 0(r4)
+; CHECK-NEXT: lxvpx vsp40, 0, r4
; CHECK-NEXT: lxv vs0, 0(r5)
; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -748,8 +717,7 @@ define void @test_pmdmxvbf16gerx2nn(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
; CHECK-BE-NEXT: lxvp vsp32, 32(r3)
; CHECK-BE-NEXT: lxvp vsp38, 0(r3)
-; CHECK-BE-NEXT: lxv v8, 0(r4)
-; CHECK-BE-NEXT: lxv v9, 16(r4)
+; CHECK-BE-NEXT: lxvpx vsp40, 0, r4
; CHECK-BE-NEXT: lxv vs0, 0(r5)
; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -774,9 +742,8 @@ declare <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2(<256 x i1>, <16 x i8>)
define void @test_dmxvf16gerx2(ptr %vpp, ptr %vcp, ptr %resp) {
; CHECK-LABEL: test_dmxvf16gerx2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: lxv v2, 16(r3)
+; CHECK-NEXT: lxvpx vsp34, 0, r3
; CHECK-NEXT: lxv vs0, 0(r4)
-; CHECK-NEXT: lxv v3, 0(r3)
; CHECK-NEXT: dmxvf16gerx2 dmr0, vsp34, vs0
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-NEXT: stxvp vsp34, 96(r5)
@@ -788,9 +755,8 @@ define void @test_dmxvf16gerx2(ptr %vpp, ptr %vcp, ptr %resp) {
;
; CHECK-BE-LABEL: test_dmxvf16gerx2:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv v2, 0(r3)
+; CHECK-BE-NEXT: lxvpx vsp34, 0, r3
; CHECK-BE-NEXT: lxv vs0, 0(r4)
-; CHECK-BE-NEXT: lxv v3, 16(r3)
; CHECK-BE-NEXT: dmxvf16gerx2 dmr0, vsp34, vs0
; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; CHECK-BE-NEXT: stxvp vsp36, 96(r5)
@@ -816,8 +782,7 @@ define void @test_dmxvf16gerx2pp(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
; CHECK-NEXT: lxvp vsp36, 32(r3)
; CHECK-NEXT: lxvp vsp32, 64(r3)
; CHECK-NEXT: lxvp vsp38, 96(r3)
-; CHECK-NEXT: lxv v8, 16(r4)
-; CHECK-NEXT: lxv v9, 0(r4)
+; CHECK-NEXT: lxvpx vsp40, 0, r4
; CHECK-NEXT: lxv vs0, 0(r5)
; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -836,8 +801,7 @@ define void @test_dmxvf16gerx2pp(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
; CHECK-BE-NEXT: lxvp vsp32, 32(r3)
; CHECK-BE-NEXT: lxvp vsp38, 0(r3)
-; CHECK-BE-NEXT: lxv v8, 0(r4)
-; CHECK-BE-NEXT: lxv v9, 16(r4)
+; CHECK-BE-NEXT: lxvpx vsp40, 0, r4
; CHECK-BE-NEXT: lxv vs0, 0(r5)
; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -867,8 +831,7 @@ define void @test_dmxvf16gerx2pn(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
; CHECK-NEXT: lxvp vsp36, 32(r3)
; CHECK-NEXT: lxvp vsp32, 64(r3)
; CHECK-NEXT: lxvp vsp38, 96(r3)
-; CHECK-NEXT: lxv v8, 16(r4)
-; CHECK-NEXT: lxv v9, 0(r4)
+; CHECK-NEXT: lxvpx vsp40, 0, r4
; CHECK-NEXT: lxv vs0, 0(r5)
; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -887,8 +850,7 @@ define void @test_dmxvf16gerx2pn(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
; CHECK-BE-NEXT: lxvp vsp32, 32(r3)
; CHECK-BE-NEXT: lxvp vsp38, 0(r3)
-; CHECK-BE-NEXT: lxv v8, 0(r4)
-; CHECK-BE-NEXT: lxv v9, 16(r4)
+; CHECK-BE-NEXT: lxvpx vsp40, 0, r4
; CHECK-BE-NEXT: lxv vs0, 0(r5)
; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -918,8 +880,7 @@ define void @test_dmxvf16gerx2np(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
; CHECK-NEXT: lxvp vsp36, 32(r3)
; CHECK-NEXT: lxvp vsp32, 64(r3)
; CHECK-NEXT: lxvp vsp38, 96(r3)
-; CHECK-NEXT: lxv v8, 16(r4)
-; CHECK-NEXT: lxv v9, 0(r4)
+; CHECK-NEXT: lxvpx vsp40, 0, r4
; CHECK-NEXT: lxv vs0, 0(r5)
; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -938,8 +899,7 @@ define void @test_dmxvf16gerx2np(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
; CHECK-BE-NEXT: lxvp vsp32, 32(r3)
; CHECK-BE-NEXT: lxvp vsp38, 0(r3)
-; CHECK-BE-NEXT: lxv v8, 0(r4)
-; CHECK-BE-NEXT: lxv v9, 16(r4)
+; CHECK-BE-NEXT: lxvpx vsp40, 0, r4
; CHECK-BE-NEXT: lxv vs0, 0(r5)
; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -969,8 +929,7 @@ define void @test_dmxvf16gerx2nn(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
; CHECK-NEXT: lxvp vsp36, 32(r3)
; CHECK-NEXT: lxvp vsp32, 64(r3)
; CHECK-NEXT: lxvp vsp38, 96(r3)
-; CHECK-NEXT: lxv v8, 16(r4)
-; CHECK-NEXT: lxv v9, 0(r4)
+; CHECK-NEXT: lxvpx vsp40, 0, r4
; CHECK-NEXT: lxv vs0, 0(r5)
; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -989,8 +948,7 @@ define void @test_dmxvf16gerx2nn(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
; CHECK-BE-NEXT: lxvp vsp32, 32(r3)
; CHECK-BE-NEXT: lxvp vsp38, 0(r3)
-; CHECK-BE-NEXT: lxv v8, 0(r4)
-; CHECK-BE-NEXT: lxv v9, 16(r4)
+; CHECK-BE-NEXT: lxvpx vsp40, 0, r4
; CHECK-BE-NEXT: lxv vs0, 0(r5)
; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -1016,9 +974,8 @@ declare <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2(<256 x i1>, <16 x i8>, i32, i32
define void @test_pmdmxvf16gerx2(ptr %vpp, ptr %vcp, ptr %resp) {
; CHECK-LABEL: test_pmdmxvf16gerx2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: lxv v2, 16(r3)
+; CHECK-NEXT: lxvpx vsp34, 0, r3
; CHECK-NEXT: lxv vs0, 0(r4)
-; CHECK-NEXT: lxv v3, 0(r3)
; CHECK-NEXT: pmdmxvf16gerx2 dmr0, vsp34, vs0, 33, 5, 2
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-NEXT: stxvp vsp34, 96(r5)
@@ -1030,9 +987,8 @@ define void @test_pmdmxvf16gerx2(ptr %vpp, ptr %vcp, ptr %resp) {
;
; CHECK-BE-LABEL: test_pmdmxvf16gerx2:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv v2, 0(r3)
+; CHECK-BE-NEXT: lxvpx vsp34, 0, r3
; CHECK-BE-NEXT: lxv vs0, 0(r4)
-; CHECK-BE-NEXT: lxv v3, 16(r3)
; CHECK-BE-NEXT: pmdmxvf16gerx2 dmr0, vsp34, vs0, 33, 5, 2
; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; CHECK-BE-NEXT: stxvp vsp36, 96(r5)
@@ -1058,8 +1014,7 @@ define void @test_pmdmxvf16gerx2pp(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
; CHECK-NEXT: lxvp vsp36, 32(r3)
; CHECK-NEXT: lxvp vsp32, 64(r3)
; CHECK-NEXT: lxvp vsp38, 96(r3)
-; CHECK-NEXT: lxv v8, 16(r4)
-; CHECK-NEXT: lxv v9, 0(r4)
+; CHECK-NEXT: lxvpx vsp40, 0, r4
; CHECK-NEXT: lxv vs0, 0(r5)
; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -1078,8 +1033,7 @@ define void @test_pmdmxvf16gerx2pp(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
; CHECK-BE-NEXT: lxvp vsp32, 32(r3)
; CHECK-BE-NEXT: lxvp vsp38, 0(r3)
-; CHECK-BE-NEXT: lxv v8, 0(r4)
-; CHECK-BE-NEXT: lxv v9, 16(r4)
+; CHECK-BE-NEXT: lxvpx vsp40, 0, r4
; CHECK-BE-NEXT: lxv vs0, 0(r5)
; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -1109,8 +1063,7 @@ define void @test_pmdmxvf16gerx2pn(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
; CHECK-NEXT: lxvp vsp36, 32(r3)
; CHECK-NEXT: lxvp vsp32, 64(r3)
; CHECK-NEXT: lxvp vsp38, 96(r3)
-; CHECK-NEXT: lxv v8, 16(r4)
-; CHECK-NEXT: lxv v9, 0(r4)
+; CHECK-NEXT: lxvpx vsp40, 0, r4
; CHECK-NEXT: lxv vs0, 0(r5)
; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -1129,8 +1082,7 @@ define void @test_pmdmxvf16gerx2pn(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
; CHECK-BE-NEXT: lxvp vsp32, 32(r3)
; CHECK-BE-NEXT: lxvp vsp38, 0(r3)
-; CHECK-BE-NEXT: lxv v8, 0(r4)
-; CHECK-BE-NEXT: lxv v9, 16(r4)
+; CHECK-BE-NEXT: lxvpx vsp40, 0, r4
; CHECK-BE-NEXT: lxv vs0, 0(r5)
; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -1160,8 +1112,7 @@ define void @test_pmdmxvf16gerx2np(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
; CHECK-NEXT: lxvp vsp36, 32(r3)
; CHECK-NEXT: lxvp vsp32, 64(r3)
; CHECK-NEXT: lxvp vsp38, 96(r3)
-; CHECK-NEXT: lxv v8, 16(r4)
-; CHECK-NEXT: lxv v9, 0(r4)
+; CHECK-NEXT: lxvpx vsp40, 0, r4
; CHECK-NEXT: lxv vs0, 0(r5)
; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -1180,8 +1131,7 @@ define void @test_pmdmxvf16gerx2np(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
; CHECK-BE-NEXT: lxvp vsp32, 32(r3)
; CHECK-BE-NEXT: lxvp vsp38, 0(r3)
-; CHECK-BE-NEXT: lxv v8, 0(r4)
-; CHECK-BE-NEXT: lxv v9, 16(r4)
+; CHECK-BE-NEXT: lxvpx vsp40, 0, r4
; CHECK-BE-NEXT: lxv vs0, 0(r5)
; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -1211,8 +1161,7 @@ define void @test_pmdmxvf16gerx2nn(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
; CHECK-NEXT: lxvp vsp36, 32(r3)
; CHECK-NEXT: lxvp vsp32, 64(r3)
; CHECK-NEXT: lxvp vsp38, 96(r3)
-; CHECK-NEXT: lxv v8, 16(r4)
-; CHECK-NEXT: lxv v9, 0(r4)
+; CHECK-NEXT: lxvpx vsp40, 0, r4
; CHECK-NEXT: lxv vs0, 0(r5)
; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -1231,8 +1180,7 @@ define void @test_pmdmxvf16gerx2nn(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
; CHECK-BE-NEXT: lxvp vsp32, 32(r3)
; CHECK-BE-NEXT: lxvp vsp38, 0(r3)
-; CHECK-BE-NEXT: lxv v8, 0(r4)
-; CHECK-BE-NEXT: lxv v9, 16(r4)
+; CHECK-BE-NEXT: lxvpx vsp40, 0, r4
; CHECK-BE-NEXT: lxv vs0, 0(r5)
; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
diff --git a/llvm/test/CodeGen/PowerPC/dmr-copy.ll b/llvm/test/CodeGen/PowerPC/dmr-copy.ll
index d5a24309f94d5..69aaba58e4c15 100644
--- a/llvm/test/CodeGen/PowerPC/dmr-copy.ll
+++ b/llvm/test/CodeGen/PowerPC/dmr-copy.ll
@@ -37,12 +37,10 @@ define void @test_wacc_copy(ptr noundef %vdmrp, ptr noundef %vpp, <16 x i8> noun
; CHECK-NEXT: stxvp vsp34, 160(r31)
; CHECK-NEXT: stxvp vsp36, 128(r31)
; CHECK-NEXT: ld r3, 352(r31)
-; CHECK-NEXT: lxv v2, 16(r3)
-; CHECK-NEXT: lxv v3, 0(r3)
-; CHECK-NEXT: stxv v2, 112(r31)
-; CHECK-NEXT: stxv v3, 96(r31)
-; CHECK-NEXT: lxv v2, 112(r31)
-; CHECK-NEXT: lxv v3, 96(r31)
+; CHECK-NEXT: lxvpx vsp34, 0, r3
+; CHECK-NEXT: addi r3, r31, 96
+; CHECK-NEXT: stxvpx vsp34, 0, r3
+; CHECK-NEXT: lxvpx vsp34, 0, r3
; CHECK-NEXT: lxv vs0, 336(r31)
; CHECK-NEXT: dmxvi8gerx4 dmr0, vsp34, vs0
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
@@ -96,12 +94,10 @@ define void @test_wacc_copy(ptr noundef %vdmrp, ptr noundef %vpp, <16 x i8> noun
; CHECK-BE-NEXT: stxvp vsp36, 160(r31)
; CHECK-BE-NEXT: stxvp vsp34, 128(r31)
; CHECK-BE-NEXT: ld r3, 352(r31)
-; CHECK-BE-NEXT: lxv v2, 0(r3)
-; CHECK-BE-NEXT: lxv v3, 16(r3)
-; CHECK-BE-NEXT: stxv v3, 112(r31)
-; CHECK-BE-NEXT: stxv v2, 96(r31)
-; CHECK-BE-NEXT: lxv v2, 96(r31)
-; CHECK-BE-NEXT: lxv v3, 112(r31)
+; CHECK-BE-NEXT: lxvpx vsp34, 0, r3
+; CHECK-BE-NEXT: addi r3, r31, 96
+; CHECK-BE-NEXT: stxvpx vsp34, 0, r3
+; CHECK-BE-NEXT: lxvpx vsp34, 0, r3
; CHECK-BE-NEXT: lxv vs0, 336(r31)
; CHECK-BE-NEXT: dmxvi8gerx4 dmr0, vsp34, vs0
; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
diff --git a/llvm/test/CodeGen/PowerPC/dmr-enable.ll b/llvm/test/CodeGen/PowerPC/dmr-enable.ll
index fe76ddc19e817..347301ee719b0 100644
--- a/llvm/test/CodeGen/PowerPC/dmr-enable.ll
+++ b/llvm/test/CodeGen/PowerPC/dmr-enable.ll
@@ -134,22 +134,18 @@ define void @text512(ptr %vp1, ptr %rp1, ptr %rp2, ptr %rp3, ptr %rp4) {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: dmsetdmrz dmr0
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
-; CHECK-NEXT: stxv v2, 16(r4)
-; CHECK-NEXT: stxv v3, 0(r4)
+; CHECK-NEXT: stxvpx vsp34, 0, r4
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
-; CHECK-NEXT: stxv v2, 16(r6)
-; CHECK-NEXT: stxv v3, 0(r6)
+; CHECK-NEXT: stxvpx vsp34, 0, r6
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: text512:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: dmsetdmrz dmr0
; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
-; CHECK-BE-NEXT: stxv v3, 16(r4)
-; CHECK-BE-NEXT: stxv v2, 0(r4)
+; CHECK-BE-NEXT: stxvpx vsp34, 0, r4
; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
-; CHECK-BE-NEXT: stxv v3, 16(r6)
-; CHECK-BE-NEXT: stxv v2, 0(r6)
+; CHECK-BE-NEXT: stxvpx vsp34, 0, r6
; CHECK-BE-NEXT: blr
entry:
%z = call <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
@@ -167,34 +163,26 @@ define void @text256(ptr %vp1, ptr %rp1, ptr %rp2, ptr %rp3, ptr %rp4) {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: dmsetdmrz dmr0
; CHECK-NEXT: dmxxextfdmr256 vsp34, dmr0, 0
-; CHECK-NEXT: stxv v2, 16(r4)
-; CHECK-NEXT: stxv v3, 0(r4)
+; CHECK-NEXT: stxvpx vsp34, 0, r4
; CHECK-NEXT: dmxxextfdmr256 vsp34, dmr0, 1
-; CHECK-NEXT: stxv v2, 16(r5)
-; CHECK-NEXT: stxv v3, 0(r5)
+; CHECK-NEXT: stxvpx vsp34, 0, r5
; CHECK-NEXT: dmxxextfdmr256 vsp34, dmr0, 2
-; CHECK-NEXT: stxv v2, 16(r6)
-; CHECK-NEXT: stxv v3, 0(r6)
+; CHECK-NEXT: stxvpx vsp34, 0, r6
; CHECK-NEXT: dmxxextfdmr256 vsp34, dmr0, 3
-; CHECK-NEXT: stxv v2, 16(r7)
-; CHECK-NEXT: stxv v3, 0(r7)
+; CHECK-NEXT: stxvpx vsp34, 0, r7
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: text256:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: dmsetdmrz dmr0
; CHECK-BE-NEXT: dmxxextfdmr256 vsp34, dmr0, 0
-; CHECK-BE-NEXT: stxv v3, 16(r4)
-; CHECK-BE-NEXT: stxv v2, 0(r4)
+; CHECK-BE-NEXT: stxvpx vsp34, 0, r4
; CHECK-BE-NEXT: dmxxextfdmr256 vsp34, dmr0, 1
-; CHECK-BE-NEXT: stxv v3, 16(r5)
-; CHECK-BE-NEXT: stxv v2, 0(r5)
+; CHECK-BE-NEXT: stxvpx vsp34, 0, r5
; CHECK-BE-NEXT: dmxxextfdmr256 vsp34, dmr0, 2
-; CHECK-BE-NEXT: stxv v3, 16(r6)
-; CHECK-BE-NEXT: stxv v2, 0(r6)
+; CHECK-BE-NEXT: stxvpx vsp34, 0, r6
; CHECK-BE-NEXT: dmxxextfdmr256 vsp34, dmr0, 3
-; CHECK-BE-NEXT: stxv v3, 16(r7)
-; CHECK-BE-NEXT: stxv v2, 0(r7)
+; CHECK-BE-NEXT: stxvpx vsp34, 0, r7
; CHECK-BE-NEXT: blr
entry:
%z = call <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
@@ -212,10 +200,8 @@ entry:
define void @tins512(ptr %vp1, ptr %vp2, ptr %vp3, ptr %vp4, ptr %rp1, ptr %rp2) {
; CHECK-LABEL: tins512:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: lxv v2, 16(r3)
-; CHECK-NEXT: lxv v3, 0(r3)
-; CHECK-NEXT: lxv v4, 16(r4)
-; CHECK-NEXT: lxv v5, 0(r4)
+; CHECK-NEXT: lxvpx vsp34, 0, r3
+; CHECK-NEXT: lxvpx vsp36, 0, r4
; CHECK-NEXT: dmsetdmrz dmr0
; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
@@ -224,10 +210,8 @@ define void @tins512(ptr %vp1, ptr %vp2, ptr %vp3, ptr %vp4, ptr %rp1, ptr %rp2)
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; CHECK-NEXT: stxvp vsp34, 32(r7)
; CHECK-NEXT: stxvp vsp36, 0(r7)
-; CHECK-NEXT: lxv v2, 16(r5)
-; CHECK-NEXT: lxv v4, 16(r6)
-; CHECK-NEXT: lxv v3, 0(r5)
-; CHECK-NEXT: lxv v5, 0(r6)
+; CHECK-NEXT: lxvpx vsp34, 0, r5
+; CHECK-NEXT: lxvpx vsp36, 0, r6
; CHECK-NEXT: dmxxextfdmr512 vsp32, vsp38, wacc0, 0
; CHECK-NEXT: stxvp vsp32, 96(r8)
; CHECK-NEXT: stxvp vsp38, 64(r8)
@@ -239,10 +223,8 @@ define void @tins512(ptr %vp1, ptr %vp2, ptr %vp3, ptr %vp4, ptr %rp1, ptr %rp2)
;
; CHECK-BE-LABEL: tins512:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv v2, 0(r3)
-; CHECK-BE-NEXT: lxv v4, 0(r4)
-; CHECK-BE-NEXT: lxv v3, 16(r3)
-; CHECK-BE-NEXT: lxv v5, 16(r4)
+; CHECK-BE-NEXT: lxvpx vsp34, 0, r3
+; CHECK-BE-NEXT: lxvpx vsp36, 0, r4
; CHECK-BE-NEXT: dmsetdmrz dmr0
; CHECK-BE-NEXT: dmxxextfdmr512 vsp32, vsp38, wacc_hi0, 1
; CHECK-BE-NEXT: stxvp vsp38, 96(r7)
@@ -251,10 +233,8 @@ define void @tins512(ptr %vp1, ptr %vp2, ptr %vp3, ptr %vp4, ptr %rp1, ptr %rp2)
; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-BE-NEXT: stxvp vsp36, 32(r7)
; CHECK-BE-NEXT: stxvp vsp34, 0(r7)
-; CHECK-BE-NEXT: lxv v2, 0(r5)
-; CHECK-BE-NEXT: lxv v4, 0(r6)
-; CHECK-BE-NEXT: lxv v3, 16(r5)
-; CHECK-BE-NEXT: lxv v5, 16(r6)
+; CHECK-BE-NEXT: lxvpx vsp34, 0, r5
+; CHECK-BE-NEXT: lxvpx vsp36, 0, r6
; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp34, vsp36, 1
; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; CHECK-BE-NEXT: stxvp vsp36, 96(r8)
@@ -279,8 +259,7 @@ entry:
define void @tins256(ptr %vp1, ptr %vp2, ptr %vp3, ptr %vp4, ptr %rp1, ptr %rp2, ptr %rp3, ptr %rp4) {
; CHECK-LABEL: tins256:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: lxv v2, 16(r3)
-; CHECK-NEXT: lxv v3, 0(r3)
+; CHECK-NEXT: lxvpx vsp34, 0, r3
; CHECK-NEXT: dmsetdmrz dmr0
; CHECK-NEXT: dmxxinstdmr256 dmr0, vsp34, 0
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
@@ -289,8 +268,7 @@ define void @tins256(ptr %vp1, ptr %vp2, ptr %vp3, ptr %vp4, ptr %rp1, ptr %rp2,
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; CHECK-NEXT: stxvp vsp34, 32(r7)
; CHECK-NEXT: stxvp vsp36, 0(r7)
-; CHECK-NEXT: lxv v2, 16(r4)
-; CHECK-NEXT: lxv v3, 0(r4)
+; CHECK-NEXT: lxvpx vsp34, 0, r4
; CHECK-NEXT: dmxxinstdmr256 dmr0, vsp34, 1
; CHECK-NEXT: dmxxextfdmr512 vsp36, vsp32, wacc0, 0
; CHECK-NEXT: stxvp vsp36, 96(r8)
@@ -316,8 +294,7 @@ define void @tins256(ptr %vp1, ptr %vp2, ptr %vp3, ptr %vp4, ptr %rp1, ptr %rp2,
;
; CHECK-BE-LABEL: tins256:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv v2, 0(r3)
-; CHECK-BE-NEXT: lxv v3, 16(r3)
+; CHECK-BE-NEXT: lxvpx vsp34, 0, r3
; CHECK-BE-NEXT: dmsetdmrz dmr0
; CHECK-BE-NEXT: dmxxinstdmr256 dmr0, vsp34, 0
; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
@@ -326,8 +303,7 @@ define void @tins256(ptr %vp1, ptr %vp2, ptr %vp3, ptr %vp4, ptr %rp1, ptr %rp2,
; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-BE-NEXT: stxvp vsp36, 32(r7)
; CHECK-BE-NEXT: stxvp vsp34, 0(r7)
-; CHECK-BE-NEXT: lxv v2, 0(r4)
-; CHECK-BE-NEXT: lxv v3, 16(r4)
+; CHECK-BE-NEXT: lxvpx vsp34, 0, r4
; CHECK-BE-NEXT: dmxxextfdmr512 vsp36, vsp32, wacc_hi0, 1
; CHECK-BE-NEXT: stxvp vsp32, 96(r8)
; CHECK-BE-NEXT: stxvp vsp36, 64(r8)
diff --git a/llvm/test/CodeGen/PowerPC/dmr-spill.ll b/llvm/test/CodeGen/PowerPC/dmr-spill.ll
index e1d388354e198..9f922f95d40f5 100644
--- a/llvm/test/CodeGen/PowerPC/dmr-spill.ll
+++ b/llvm/test/CodeGen/PowerPC/dmr-spill.ll
@@ -23,8 +23,7 @@ define void @spillDMRreg(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) nounwind {
; CHECK-NEXT: lxvp vsp36, 32(r3)
; CHECK-NEXT: lxvp vsp32, 64(r3)
; CHECK-NEXT: lxvp vsp38, 96(r3)
-; CHECK-NEXT: lxv v8, 16(r4)
-; CHECK-NEXT: lxv v9, 0(r4)
+; CHECK-NEXT: lxvpx vsp40, 0, r4
; CHECK-NEXT: lxv vs0, 0(r5)
; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -65,8 +64,7 @@ define void @spillDMRreg(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) nounwind {
; AIX-NEXT: lxvp vsp36, 64(r3)
; AIX-NEXT: lxvp vsp32, 32(r3)
; AIX-NEXT: lxvp vsp38, 0(r3)
-; AIX-NEXT: lxv v8, 0(r4)
-; AIX-NEXT: lxv v9, 16(r4)
+; AIX-NEXT: lxvpx vsp40, 0, r4
; AIX-NEXT: lxv vs0, 0(r5)
; AIX-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; AIX-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -108,8 +106,7 @@ define void @spillDMRreg(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) nounwind {
; AIX32-NEXT: lxvp vsp36, 64(r3)
; AIX32-NEXT: lxvp vsp32, 32(r3)
; AIX32-NEXT: lxvp vsp38, 0(r3)
-; AIX32-NEXT: lxv v8, 0(r4)
-; AIX32-NEXT: lxv v9, 16(r4)
+; AIX32-NEXT: lxvpx vsp40, 0, r4
; AIX32-NEXT: lxv vs0, 0(r5)
; AIX32-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; AIX32-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
diff --git a/llvm/test/CodeGen/PowerPC/mma-acc-copy-hints.ll b/llvm/test/CodeGen/PowerPC/mma-acc-copy-hints.ll
index 94121f09e36be..4c029c0b90dd9 100644
--- a/llvm/test/CodeGen/PowerPC/mma-acc-copy-hints.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-acc-copy-hints.ll
@@ -111,17 +111,18 @@ define void @testMultiply(ptr nocapture noundef readonly %a, ptr nocapture nound
; CHECK-LE-WACC-NEXT: stxv v31, -48(r30) # 16-byte Folded Spill
; CHECK-LE-WACC-NEXT: lxv v31, 0(r3)
; CHECK-LE-WACC-NEXT: lxv v30, 0(r4)
-; CHECK-LE-WACC-NEXT: addi r3, r1, 32
+; CHECK-LE-WACC-NEXT: std r28, -32(r30) # 8-byte Folded Spill
; CHECK-LE-WACC-NEXT: std r29, -24(r30) # 8-byte Folded Spill
+; CHECK-LE-WACC-NEXT: addi r28, r1, 32
+; CHECK-LE-WACC-NEXT: mr r3, r28
; CHECK-LE-WACC-NEXT: vmr v2, v31
; CHECK-LE-WACC-NEXT: vmr v3, v30
; CHECK-LE-WACC-NEXT: mr r29, r5
; CHECK-LE-WACC-NEXT: bl _Z15buildVectorPairPu13__vector_pairDv16_hS0_ at notoc
; CHECK-LE-WACC-NEXT: dmxxsetaccz wacc0
; CHECK-LE-WACC-NEXT: xvf32gerpp wacc0, v31, v30
-; CHECK-LE-WACC-NEXT: lxv vs0, 48(r1)
-; CHECK-LE-WACC-NEXT: lxv vs1, 32(r1)
-; CHECK-LE-WACC-NEXT: xvf32gerpp wacc0, vs1, vs0
+; CHECK-LE-WACC-NEXT: lxvpx vsp34, 0, r28
+; CHECK-LE-WACC-NEXT: xvf32gerpp wacc0, v3, v2
; CHECK-LE-WACC-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc0, 0
; CHECK-LE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
; CHECK-LE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
@@ -132,6 +133,7 @@ define void @testMultiply(ptr nocapture noundef readonly %a, ptr nocapture nound
; CHECK-LE-WACC-NEXT: lxv v31, -48(r30) # 16-byte Folded Reload
; CHECK-LE-WACC-NEXT: lxv v30, -64(r30) # 16-byte Folded Reload
; CHECK-LE-WACC-NEXT: ld r29, -24(r30) # 8-byte Folded Reload
+; CHECK-LE-WACC-NEXT: ld r28, -32(r30) # 8-byte Folded Reload
; CHECK-LE-WACC-NEXT: mr r1, r30
; CHECK-LE-WACC-NEXT: ld r0, 16(r1)
; CHECK-LE-WACC-NEXT: ld r30, -16(r1)
@@ -151,8 +153,10 @@ define void @testMultiply(ptr nocapture noundef readonly %a, ptr nocapture nound
; CHECK-BE-WACC-NEXT: stxv v31, -48(r30) # 16-byte Folded Spill
; CHECK-BE-WACC-NEXT: lxv v31, 0(r3)
; CHECK-BE-WACC-NEXT: lxv v30, 0(r4)
-; CHECK-BE-WACC-NEXT: addi r3, r1, 128
+; CHECK-BE-WACC-NEXT: std r28, -32(r30) # 8-byte Folded Spill
; CHECK-BE-WACC-NEXT: std r29, -24(r30) # 8-byte Folded Spill
+; CHECK-BE-WACC-NEXT: addi r28, r1, 128
+; CHECK-BE-WACC-NEXT: mr r3, r28
; CHECK-BE-WACC-NEXT: vmr v2, v31
; CHECK-BE-WACC-NEXT: vmr v3, v30
; CHECK-BE-WACC-NEXT: mr r29, r5
@@ -160,9 +164,8 @@ define void @testMultiply(ptr nocapture noundef readonly %a, ptr nocapture nound
; CHECK-BE-WACC-NEXT: nop
; CHECK-BE-WACC-NEXT: dmxxsetaccz wacc0
; CHECK-BE-WACC-NEXT: xvf32gerpp wacc0, v31, v30
-; CHECK-BE-WACC-NEXT: lxv vs0, 128(r1)
-; CHECK-BE-WACC-NEXT: lxv vs1, 144(r1)
-; CHECK-BE-WACC-NEXT: xvf32gerpp wacc0, vs0, vs1
+; CHECK-BE-WACC-NEXT: lxvpx vsp34, 0, r28
+; CHECK-BE-WACC-NEXT: xvf32gerpp wacc0, v2, v3
; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-BE-WACC-NEXT: vmr v1, v2
; CHECK-BE-WACC-NEXT: vmr v7, v4
@@ -177,6 +180,7 @@ define void @testMultiply(ptr nocapture noundef readonly %a, ptr nocapture nound
; CHECK-BE-WACC-NEXT: lxv v31, -48(r30) # 16-byte Folded Reload
; CHECK-BE-WACC-NEXT: lxv v30, -64(r30) # 16-byte Folded Reload
; CHECK-BE-WACC-NEXT: ld r29, -24(r30) # 8-byte Folded Reload
+; CHECK-BE-WACC-NEXT: ld r28, -32(r30) # 8-byte Folded Reload
; CHECK-BE-WACC-NEXT: mr r1, r30
; CHECK-BE-WACC-NEXT: ld r0, 16(r1)
; CHECK-BE-WACC-NEXT: ld r30, -16(r1)
diff --git a/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll b/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
index bc5d5bed36e9b..516a926bf031f 100644
--- a/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
@@ -510,10 +510,10 @@ define dso_local void @testLdStPair(i64 %SrcIdx, i64 %DstIdx) {
; BE-PAIRED-WACC: # %bb.0: # %entry
; BE-PAIRED-WACC-NEXT: addis r3, r2, g at toc@ha
; BE-PAIRED-WACC-NEXT: addi r3, r3, g at toc@l
-; BE-PAIRED-WACC-NEXT: lxv vs0, 48(r3)
-; BE-PAIRED-WACC-NEXT: lxv vs1, 32(r3)
-; BE-PAIRED-WACC-NEXT: stxv vs0, 80(r3)
-; BE-PAIRED-WACC-NEXT: stxv vs1, 64(r3)
+; BE-PAIRED-WACC-NEXT: li r4, 32
+; BE-PAIRED-WACC-NEXT: lxvpx vsp34, r3, r4
+; BE-PAIRED-WACC-NEXT: li r4, 64
+; BE-PAIRED-WACC-NEXT: stxvpx vsp34, r3, r4
; BE-PAIRED-WACC-NEXT: blr
;
; LE-PWR9-LABEL: testLdStPair:
@@ -589,13 +589,9 @@ define dso_local void @testXLdStPair(i64 %SrcIdx, i64 %DstIdx) {
; LE-PAIRED-WACC: # %bb.0: # %entry
; LE-PAIRED-WACC-NEXT: sldi r3, r3, 5
; LE-PAIRED-WACC-NEXT: paddi r5, 0, g at PCREL, 1
-; LE-PAIRED-WACC-NEXT: add r6, r5, r3
-; LE-PAIRED-WACC-NEXT: lxvx vs0, r5, r3
-; LE-PAIRED-WACC-NEXT: lxv vs1, 16(r6)
+; LE-PAIRED-WACC-NEXT: lxvpx vsp34, r5, r3
; LE-PAIRED-WACC-NEXT: sldi r3, r4, 5
-; LE-PAIRED-WACC-NEXT: add r4, r5, r3
-; LE-PAIRED-WACC-NEXT: stxvx vs0, r5, r3
-; LE-PAIRED-WACC-NEXT: stxv vs1, 16(r4)
+; LE-PAIRED-WACC-NEXT: stxvpx vsp34, r5, r3
; LE-PAIRED-WACC-NEXT: blr
;
; BE-PAIRED-LABEL: testXLdStPair:
@@ -617,13 +613,9 @@ define dso_local void @testXLdStPair(i64 %SrcIdx, i64 %DstIdx) {
; BE-PAIRED-WACC-NEXT: addis r5, r2, g at toc@ha
; BE-PAIRED-WACC-NEXT: sldi r3, r3, 5
; BE-PAIRED-WACC-NEXT: addi r5, r5, g at toc@l
-; BE-PAIRED-WACC-NEXT: add r6, r5, r3
-; BE-PAIRED-WACC-NEXT: lxvx vs0, r5, r3
-; BE-PAIRED-WACC-NEXT: lxv vs1, 16(r6)
+; BE-PAIRED-WACC-NEXT: lxvpx vsp34, r5, r3
; BE-PAIRED-WACC-NEXT: sldi r3, r4, 5
-; BE-PAIRED-WACC-NEXT: add r4, r5, r3
-; BE-PAIRED-WACC-NEXT: stxvx vs0, r5, r3
-; BE-PAIRED-WACC-NEXT: stxv vs1, 16(r4)
+; BE-PAIRED-WACC-NEXT: stxvpx vsp34, r5, r3
; BE-PAIRED-WACC-NEXT: blr
;
; LE-PWR9-LABEL: testXLdStPair:
@@ -722,10 +714,10 @@ define dso_local void @testUnalignedLdStPair() {
; BE-PAIRED-WACC: # %bb.0: # %entry
; BE-PAIRED-WACC-NEXT: addis r3, r2, g at toc@ha
; BE-PAIRED-WACC-NEXT: addi r3, r3, g at toc@l
-; BE-PAIRED-WACC-NEXT: plxv vs0, 27(r3), 0
-; BE-PAIRED-WACC-NEXT: plxv vs1, 11(r3), 0
-; BE-PAIRED-WACC-NEXT: pstxv vs0, 35(r3), 0
-; BE-PAIRED-WACC-NEXT: pstxv vs1, 19(r3), 0
+; BE-PAIRED-WACC-NEXT: li r4, 11
+; BE-PAIRED-WACC-NEXT: lxvpx vsp34, r3, r4
+; BE-PAIRED-WACC-NEXT: li r4, 19
+; BE-PAIRED-WACC-NEXT: stxvpx vsp34, r3, r4
; BE-PAIRED-WACC-NEXT: blr
;
; LE-PWR9-LABEL: testUnalignedLdStPair:
diff --git a/llvm/test/CodeGen/PowerPC/mma-outer-product.ll b/llvm/test/CodeGen/PowerPC/mma-outer-product.ll
index 35f35706b8690..940438b6f8da5 100644
--- a/llvm/test/CodeGen/PowerPC/mma-outer-product.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-outer-product.ll
@@ -2280,8 +2280,7 @@ define void @test33(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
;
; CHECK-WACC-LABEL: test33:
; CHECK-WACC: # %bb.0: # %entry
-; CHECK-WACC-NEXT: lxv v4, 16(r4)
-; CHECK-WACC-NEXT: lxv v5, 0(r4)
+; CHECK-WACC-NEXT: lxvpx vsp36, 0, r4
; CHECK-WACC-NEXT: xvf64ger wacc0, vsp36, v2
; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-WACC-NEXT: stxv v4, 48(r7)
@@ -2292,8 +2291,7 @@ define void @test33(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
;
; CHECK-BE-WACC-LABEL: test33:
; CHECK-BE-WACC: # %bb.0: # %entry
-; CHECK-BE-WACC-NEXT: lxv v4, 0(r4)
-; CHECK-BE-WACC-NEXT: lxv v5, 16(r4)
+; CHECK-BE-WACC-NEXT: lxvpx vsp36, 0, r4
; CHECK-BE-WACC-NEXT: xvf64ger wacc0, vsp36, v2
; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
@@ -2352,8 +2350,7 @@ define void @test34(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-WACC-NEXT: lxv v1, 32(r3)
; CHECK-WACC-NEXT: lxv v4, 16(r3)
; CHECK-WACC-NEXT: lxv v0, 48(r3)
-; CHECK-WACC-NEXT: lxv v6, 16(r4)
-; CHECK-WACC-NEXT: lxv v7, 0(r4)
+; CHECK-WACC-NEXT: lxvpx vsp38, 0, r4
; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
; CHECK-WACC-NEXT: xvf64gerpp wacc0, vsp38, v2
; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
@@ -2369,8 +2366,7 @@ define void @test34(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-WACC-NEXT: lxv v1, 16(r3)
; CHECK-BE-WACC-NEXT: lxv v4, 32(r3)
; CHECK-BE-WACC-NEXT: lxv v0, 0(r3)
-; CHECK-BE-WACC-NEXT: lxv v6, 0(r4)
-; CHECK-BE-WACC-NEXT: lxv v7, 16(r4)
+; CHECK-BE-WACC-NEXT: lxvpx vsp38, 0, r4
; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
; CHECK-BE-WACC-NEXT: xvf64gerpp wacc0, vsp38, v2
; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
@@ -2431,8 +2427,7 @@ define void @test35(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-WACC-NEXT: lxv v1, 32(r3)
; CHECK-WACC-NEXT: lxv v4, 16(r3)
; CHECK-WACC-NEXT: lxv v0, 48(r3)
-; CHECK-WACC-NEXT: lxv v6, 16(r4)
-; CHECK-WACC-NEXT: lxv v7, 0(r4)
+; CHECK-WACC-NEXT: lxvpx vsp38, 0, r4
; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
; CHECK-WACC-NEXT: xvf64gerpn wacc0, vsp38, v2
; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
@@ -2448,8 +2443,7 @@ define void @test35(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-WACC-NEXT: lxv v1, 16(r3)
; CHECK-BE-WACC-NEXT: lxv v4, 32(r3)
; CHECK-BE-WACC-NEXT: lxv v0, 0(r3)
-; CHECK-BE-WACC-NEXT: lxv v6, 0(r4)
-; CHECK-BE-WACC-NEXT: lxv v7, 16(r4)
+; CHECK-BE-WACC-NEXT: lxvpx vsp38, 0, r4
; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
; CHECK-BE-WACC-NEXT: xvf64gerpn wacc0, vsp38, v2
; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
@@ -2510,8 +2504,7 @@ define void @test36(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-WACC-NEXT: lxv v1, 32(r3)
; CHECK-WACC-NEXT: lxv v4, 16(r3)
; CHECK-WACC-NEXT: lxv v0, 48(r3)
-; CHECK-WACC-NEXT: lxv v6, 16(r4)
-; CHECK-WACC-NEXT: lxv v7, 0(r4)
+; CHECK-WACC-NEXT: lxvpx vsp38, 0, r4
; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
; CHECK-WACC-NEXT: xvf64gernp wacc0, vsp38, v2
; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
@@ -2527,8 +2520,7 @@ define void @test36(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-WACC-NEXT: lxv v1, 16(r3)
; CHECK-BE-WACC-NEXT: lxv v4, 32(r3)
; CHECK-BE-WACC-NEXT: lxv v0, 0(r3)
-; CHECK-BE-WACC-NEXT: lxv v6, 0(r4)
-; CHECK-BE-WACC-NEXT: lxv v7, 16(r4)
+; CHECK-BE-WACC-NEXT: lxvpx vsp38, 0, r4
; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
; CHECK-BE-WACC-NEXT: xvf64gernp wacc0, vsp38, v2
; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
@@ -2589,8 +2581,7 @@ define void @test37(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-WACC-NEXT: lxv v1, 32(r3)
; CHECK-WACC-NEXT: lxv v4, 16(r3)
; CHECK-WACC-NEXT: lxv v0, 48(r3)
-; CHECK-WACC-NEXT: lxv v6, 16(r4)
-; CHECK-WACC-NEXT: lxv v7, 0(r4)
+; CHECK-WACC-NEXT: lxvpx vsp38, 0, r4
; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
; CHECK-WACC-NEXT: xvf64gernn wacc0, vsp38, v2
; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
@@ -2606,8 +2597,7 @@ define void @test37(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-WACC-NEXT: lxv v1, 16(r3)
; CHECK-BE-WACC-NEXT: lxv v4, 32(r3)
; CHECK-BE-WACC-NEXT: lxv v0, 0(r3)
-; CHECK-BE-WACC-NEXT: lxv v6, 0(r4)
-; CHECK-BE-WACC-NEXT: lxv v7, 16(r4)
+; CHECK-BE-WACC-NEXT: lxvpx vsp38, 0, r4
; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
; CHECK-BE-WACC-NEXT: xvf64gernn wacc0, vsp38, v2
; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
@@ -2654,8 +2644,7 @@ define void @test38(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
;
; CHECK-WACC-LABEL: test38:
; CHECK-WACC: # %bb.0: # %entry
-; CHECK-WACC-NEXT: lxv v4, 16(r4)
-; CHECK-WACC-NEXT: lxv v5, 0(r4)
+; CHECK-WACC-NEXT: lxvpx vsp36, 0, r4
; CHECK-WACC-NEXT: pmxvf64ger wacc0, vsp36, v2, 0, 0
; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-WACC-NEXT: stxv v4, 48(r7)
@@ -2666,8 +2655,7 @@ define void @test38(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
;
; CHECK-BE-WACC-LABEL: test38:
; CHECK-BE-WACC: # %bb.0: # %entry
-; CHECK-BE-WACC-NEXT: lxv v4, 0(r4)
-; CHECK-BE-WACC-NEXT: lxv v5, 16(r4)
+; CHECK-BE-WACC-NEXT: lxvpx vsp36, 0, r4
; CHECK-BE-WACC-NEXT: pmxvf64ger wacc0, vsp36, v2, 0, 0
; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-BE-WACC-NEXT: stxv v5, 48(r7)
@@ -2726,8 +2714,7 @@ define void @test39(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-WACC-NEXT: lxv v1, 32(r3)
; CHECK-WACC-NEXT: lxv v4, 16(r3)
; CHECK-WACC-NEXT: lxv v0, 48(r3)
-; CHECK-WACC-NEXT: lxv v6, 16(r4)
-; CHECK-WACC-NEXT: lxv v7, 0(r4)
+; CHECK-WACC-NEXT: lxvpx vsp38, 0, r4
; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
; CHECK-WACC-NEXT: pmxvf64gerpp wacc0, vsp38, v2, 0, 0
; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
@@ -2743,8 +2730,7 @@ define void @test39(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-WACC-NEXT: lxv v1, 16(r3)
; CHECK-BE-WACC-NEXT: lxv v4, 32(r3)
; CHECK-BE-WACC-NEXT: lxv v0, 0(r3)
-; CHECK-BE-WACC-NEXT: lxv v6, 0(r4)
-; CHECK-BE-WACC-NEXT: lxv v7, 16(r4)
+; CHECK-BE-WACC-NEXT: lxvpx vsp38, 0, r4
; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
; CHECK-BE-WACC-NEXT: pmxvf64gerpp wacc0, vsp38, v2, 0, 0
; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
@@ -2805,8 +2791,7 @@ define void @test40(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-WACC-NEXT: lxv v1, 32(r3)
; CHECK-WACC-NEXT: lxv v4, 16(r3)
; CHECK-WACC-NEXT: lxv v0, 48(r3)
-; CHECK-WACC-NEXT: lxv v6, 16(r4)
-; CHECK-WACC-NEXT: lxv v7, 0(r4)
+; CHECK-WACC-NEXT: lxvpx vsp38, 0, r4
; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
; CHECK-WACC-NEXT: pmxvf64gerpn wacc0, vsp38, v2, 0, 0
; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
@@ -2822,8 +2807,7 @@ define void @test40(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-WACC-NEXT: lxv v1, 16(r3)
; CHECK-BE-WACC-NEXT: lxv v4, 32(r3)
; CHECK-BE-WACC-NEXT: lxv v0, 0(r3)
-; CHECK-BE-WACC-NEXT: lxv v6, 0(r4)
-; CHECK-BE-WACC-NEXT: lxv v7, 16(r4)
+; CHECK-BE-WACC-NEXT: lxvpx vsp38, 0, r4
; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
; CHECK-BE-WACC-NEXT: pmxvf64gerpn wacc0, vsp38, v2, 0, 0
; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
@@ -2884,8 +2868,7 @@ define void @test41(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-WACC-NEXT: lxv v1, 32(r3)
; CHECK-WACC-NEXT: lxv v4, 16(r3)
; CHECK-WACC-NEXT: lxv v0, 48(r3)
-; CHECK-WACC-NEXT: lxv v6, 16(r4)
-; CHECK-WACC-NEXT: lxv v7, 0(r4)
+; CHECK-WACC-NEXT: lxvpx vsp38, 0, r4
; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
; CHECK-WACC-NEXT: pmxvf64gernp wacc0, vsp38, v2, 0, 0
; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
@@ -2901,8 +2884,7 @@ define void @test41(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-WACC-NEXT: lxv v1, 16(r3)
; CHECK-BE-WACC-NEXT: lxv v4, 32(r3)
; CHECK-BE-WACC-NEXT: lxv v0, 0(r3)
-; CHECK-BE-WACC-NEXT: lxv v6, 0(r4)
-; CHECK-BE-WACC-NEXT: lxv v7, 16(r4)
+; CHECK-BE-WACC-NEXT: lxvpx vsp38, 0, r4
; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
; CHECK-BE-WACC-NEXT: pmxvf64gernp wacc0, vsp38, v2, 0, 0
; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
@@ -2963,8 +2945,7 @@ define void @test42(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-WACC-NEXT: lxv v1, 32(r3)
; CHECK-WACC-NEXT: lxv v4, 16(r3)
; CHECK-WACC-NEXT: lxv v0, 48(r3)
-; CHECK-WACC-NEXT: lxv v6, 16(r4)
-; CHECK-WACC-NEXT: lxv v7, 0(r4)
+; CHECK-WACC-NEXT: lxvpx vsp38, 0, r4
; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
; CHECK-WACC-NEXT: pmxvf64gernn wacc0, vsp38, v2, 0, 0
; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
@@ -2980,8 +2961,7 @@ define void @test42(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-BE-WACC-NEXT: lxv v1, 16(r3)
; CHECK-BE-WACC-NEXT: lxv v4, 32(r3)
; CHECK-BE-WACC-NEXT: lxv v0, 0(r3)
-; CHECK-BE-WACC-NEXT: lxv v6, 0(r4)
-; CHECK-BE-WACC-NEXT: lxv v7, 16(r4)
+; CHECK-BE-WACC-NEXT: lxvpx vsp38, 0, r4
; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0
; CHECK-BE-WACC-NEXT: pmxvf64gernn wacc0, vsp38, v2, 0, 0
; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
diff --git a/llvm/test/CodeGen/PowerPC/vsx-ldst-with-length.ll b/llvm/test/CodeGen/PowerPC/vsx-ldst-with-length.ll
index e7bc8fbca3202..0483c60a72412 100644
--- a/llvm/test/CodeGen/PowerPC/vsx-ldst-with-length.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx-ldst-with-length.ll
@@ -78,15 +78,13 @@ define <256 x i1> @testLXVPRL(ptr %vpp, i64 %b) {
; CHECK-LABEL: testLXVPRL:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lxvprl vsp34, r4, r5
-; CHECK-NEXT: stxv v2, 16(r3)
-; CHECK-NEXT: stxv v3, 0(r3)
+; CHECK-NEXT: stxvpx vsp34, 0, r3
; CHECK-NEXT: blr
;
; AIX-LABEL: testLXVPRL:
; AIX: # %bb.0: # %entry
; AIX-NEXT: lxvprl vsp34, r4, r5
-; AIX-NEXT: stxv v3, 16(r3)
-; AIX-NEXT: stxv v2, 0(r3)
+; AIX-NEXT: stxvpx vsp34, 0, r3
; AIX-NEXT: blr
entry:
%0 = tail call <256 x i1> @llvm.ppc.vsx.lxvprl(ptr %vpp, i64 %b)
@@ -98,15 +96,13 @@ define <256 x i1> @testLXVPRLL(ptr %vpp, i64 %b) {
; CHECK-LABEL: testLXVPRLL:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lxvprll vsp34, r4, r5
-; CHECK-NEXT: stxv v2, 16(r3)
-; CHECK-NEXT: stxv v3, 0(r3)
+; CHECK-NEXT: stxvpx vsp34, 0, r3
; CHECK-NEXT: blr
;
; AIX-LABEL: testLXVPRLL:
; AIX: # %bb.0: # %entry
; AIX-NEXT: lxvprll vsp34, r4, r5
-; AIX-NEXT: stxv v3, 16(r3)
-; AIX-NEXT: stxv v2, 0(r3)
+; AIX-NEXT: stxvpx vsp34, 0, r3
; AIX-NEXT: blr
entry:
%0 = tail call <256 x i1> @llvm.ppc.vsx.lxvprll(ptr %vpp, i64 %b)
@@ -117,15 +113,13 @@ declare <256 x i1> @llvm.ppc.vsx.lxvprll(ptr, i64)
define void @testSTXVPRL(ptr %v, ptr %vp, i64 %len) {
; CHECK-LABEL: testSTXVPRL:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: lxv v2, 16(r3)
-; CHECK-NEXT: lxv v3, 0(r3)
+; CHECK-NEXT: lxvpx vsp34, 0, r3
; CHECK-NEXT: stxvprl vsp34, r4, r5
; CHECK-NEXT: blr
;
; AIX-LABEL: testSTXVPRL:
; AIX: # %bb.0: # %entry
-; AIX-NEXT: lxv v2, 0(r3)
-; AIX-NEXT: lxv v3, 16(r3)
+; AIX-NEXT: lxvpx vsp34, 0, r3
; AIX-NEXT: stxvprl vsp34, r4, r5
; AIX-NEXT: blr
entry:
@@ -138,15 +132,13 @@ declare void @llvm.ppc.vsx.stxvprl(<256 x i1>, ptr, i64)
define void @testSTXVPRLL(ptr %v, ptr %vp, i64 %len) {
; CHECK-LABEL: testSTXVPRLL:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: lxv v2, 16(r3)
-; CHECK-NEXT: lxv v3, 0(r3)
+; CHECK-NEXT: lxvpx vsp34, 0, r3
; CHECK-NEXT: stxvprll vsp34, r4, r5
; CHECK-NEXT: blr
;
; AIX-LABEL: testSTXVPRLL:
; AIX: # %bb.0: # %entry
-; AIX-NEXT: lxv v2, 0(r3)
-; AIX-NEXT: lxv v3, 16(r3)
+; AIX-NEXT: lxvpx vsp34, 0, r3
; AIX-NEXT: stxvprll vsp34, r4, r5
; AIX-NEXT: blr
entry:
>From 85a83d9dab98af95d4ecd6a1b0a02989b5e9a232 Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Tue, 3 Mar 2026 16:23:37 -0500
Subject: [PATCH 2/3] add pattern matching for prefixed lxvp/stxvp
---
llvm/lib/Target/PowerPC/PPC.td | 12 ++++++++----
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 12 ++++--------
llvm/lib/Target/PowerPC/PPCInstrFuture.td | 2 ++
llvm/test/CodeGen/PowerPC/mma-acc-memops.ll | 16 ++++++++--------
4 files changed, 22 insertions(+), 20 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td
index c0abbf6f50804..82d27ecfaecec 100644
--- a/llvm/lib/Target/PowerPC/PPC.td
+++ b/llvm/lib/Target/PowerPC/PPC.td
@@ -277,10 +277,6 @@ def FeatureISA3_1 : SubtargetFeature<"isa-v31-instructions", "IsISA3_1",
"true",
"Enable instructions in ISA 3.1.",
[FeatureISA3_0]>;
-def FeatureISAFuture : SubtargetFeature<"isa-future-instructions",
- "IsISAFuture", "true",
- "Enable instructions for Future ISA.",
- [FeatureISA3_1]>;
def FeatureP9Altivec : SubtargetFeature<"power9-altivec", "HasP9Altivec", "true",
"Enable POWER9 Altivec instructions",
[FeatureISA3_0, FeatureP8Altivec]>;
@@ -292,6 +288,14 @@ def FeatureP10Vector : SubtargetFeature<"power10-vector", "HasP10Vector",
"true",
"Enable POWER10 vector instructions",
[FeatureISA3_1, FeatureP9Vector]>;
+def FeatureFutureVector : SubtargetFeature<"future-vector", "HasFutureVector",
+ "true",
+ "Enable Future vector instructions",
+ [FeatureP10Vector]>;
+def FeatureISAFuture : SubtargetFeature<"isa-future-instructions",
+ "IsISAFuture", "true",
+ "Enable instructions for Future ISA.",
+ [FeatureISA3_1, FeatureFutureVector]>;
// A separate feature for this even though it is equivalent to P9Vector
// because this is a feature of the implementation rather than the architecture
// and may go away with future CPU's.
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 3b2b134c64149..9781ee9b158c2 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -12136,10 +12136,8 @@ SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
"Type unsupported without paired vector support");
// For v256i1 on ISA Future, let the load go through to instruction selection
- // where it will be matched to lxvp by the instruction patterns, unless it's
- // a PC-relative load which should use plxv instead.
- if (VT == MVT::v256i1 && Subtarget.isISAFuture() &&
- !isPCRelNode(LN->getBasePtr()))
+ // where it will be matched to lxvp/plxvp by the instruction patterns.
+ if (VT == MVT::v256i1 && Subtarget.isISAFuture())
return Op;
// For other cases, create 2 or 4 v16i8 loads to load the pair or accumulator
@@ -12312,10 +12310,8 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
"Type unsupported without paired vector support");
// For v256i1 on ISA Future, let the store go through to instruction selection
- // where it will be matched to stxvp by the instruction patterns, unless it's
- // a PC-relative store which should use pstxv instead.
- if (StoreVT == MVT::v256i1 && Subtarget.isISAFuture() &&
- !isPCRelNode(SN->getBasePtr()))
+ // where it will be matched to stxvp/pstxvp by the instruction patterns.
+ if (StoreVT == MVT::v256i1 && Subtarget.isISAFuture())
return Op;
// For other cases, create 2 or 4 v16i8 stores to store the pair or
diff --git a/llvm/lib/Target/PowerPC/PPCInstrFuture.td b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
index 2b17bfc4b584b..84494b8f8889d 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrFuture.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
@@ -601,8 +601,10 @@ def : Pat<(int_ppc_vsx_stxvprll v256i1:$XTp, addr:$RA, i64:$RB), (STXVPRLL $XTp,
let Predicates = [PairedVectorMemops, IsISAFuture] in {
def : Pat<(v256i1 (load xoaddr:$src)), (LXVPX xoaddr:$src)>;
def : Pat<(v256i1 (load iaddrX16:$src)), (LXVP iaddrX16:$src)>;
+ def : Pat<(v256i1 (load PDForm:$src)), (PLXVP memri34:$src)>;
def : Pat<(store v256i1:$XSp, xoaddr:$dst), (STXVPX $XSp, xoaddr:$dst)>;
def : Pat<(store v256i1:$XSp, iaddrX16:$dst), (STXVP $XSp, iaddrX16:$dst)>;
+ def : Pat<(store v256i1:$XSp, PDForm:$dst), (PSTXVP $XSp, memri34:$dst)>;
}
let Predicates = [HasVSX, IsISAFuture] in {
def : Pat<(v4i32 (rotl v4i32:$vA, v4i32:$vB)), (v4i32 (XVRLW v4i32:$vA,
diff --git a/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll b/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
index 516a926bf031f..8a963d53105bc 100644
--- a/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
@@ -490,10 +490,10 @@ define dso_local void @testLdStPair(i64 %SrcIdx, i64 %DstIdx) {
;
; LE-PAIRED-WACC-LABEL: testLdStPair:
; LE-PAIRED-WACC: # %bb.0: # %entry
-; LE-PAIRED-WACC-NEXT: plxv vs0, g at PCREL+48(0), 1
-; LE-PAIRED-WACC-NEXT: plxv vs1, g at PCREL+32(0), 1
-; LE-PAIRED-WACC-NEXT: pstxv vs0, g at PCREL+80(0), 1
-; LE-PAIRED-WACC-NEXT: pstxv vs1, g at PCREL+64(0), 1
+; LE-PAIRED-WACC-NEXT: paddi r3, 0, g at PCREL+32, 1
+; LE-PAIRED-WACC-NEXT: lxvpx vsp34, 0, r3
+; LE-PAIRED-WACC-NEXT: paddi r3, 0, g at PCREL+64, 1
+; LE-PAIRED-WACC-NEXT: stxvpx vsp34, 0, r3
; LE-PAIRED-WACC-NEXT: blr
;
; BE-PAIRED-LABEL: testLdStPair:
@@ -694,10 +694,10 @@ define dso_local void @testUnalignedLdStPair() {
;
; LE-PAIRED-WACC-LABEL: testUnalignedLdStPair:
; LE-PAIRED-WACC: # %bb.0: # %entry
-; LE-PAIRED-WACC-NEXT: plxv vs0, g at PCREL+27(0), 1
-; LE-PAIRED-WACC-NEXT: plxv vs1, g at PCREL+11(0), 1
-; LE-PAIRED-WACC-NEXT: pstxv vs0, g at PCREL+35(0), 1
-; LE-PAIRED-WACC-NEXT: pstxv vs1, g at PCREL+19(0), 1
+; LE-PAIRED-WACC-NEXT: paddi r3, 0, g at PCREL+11, 1
+; LE-PAIRED-WACC-NEXT: lxvpx vsp34, 0, r3
+; LE-PAIRED-WACC-NEXT: paddi r3, 0, g at PCREL+19, 1
+; LE-PAIRED-WACC-NEXT: stxvpx vsp34, 0, r3
; LE-PAIRED-WACC-NEXT: blr
;
; BE-PAIRED-LABEL: testUnalignedLdStPair:
>From 4c870787203f2e567ff8231bf66431e75d7f82be Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Wed, 4 Mar 2026 00:12:24 -0500
Subject: [PATCH 3/3] order matters
---
llvm/lib/Target/PowerPC/PPCInstrFuture.td | 5 +++--
llvm/test/CodeGen/PowerPC/vec_int_to_double_shuffle.ll | 8 ++++----
2 files changed, 7 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCInstrFuture.td b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
index 84494b8f8889d..f1097beaa8397 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrFuture.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
@@ -599,13 +599,14 @@ def : Pat<(int_ppc_vsx_stxvprll v256i1:$XTp, addr:$RA, i64:$RB), (STXVPRLL $XTp,
// Regular load/store patterns for v256i1 (for ISA Future)
let Predicates = [PairedVectorMemops, IsISAFuture] in {
- def : Pat<(v256i1 (load xoaddr:$src)), (LXVPX xoaddr:$src)>;
def : Pat<(v256i1 (load iaddrX16:$src)), (LXVP iaddrX16:$src)>;
def : Pat<(v256i1 (load PDForm:$src)), (PLXVP memri34:$src)>;
- def : Pat<(store v256i1:$XSp, xoaddr:$dst), (STXVPX $XSp, xoaddr:$dst)>;
+ def : Pat<(v256i1 (load xoaddr:$src)), (LXVPX xoaddr:$src)>;
def : Pat<(store v256i1:$XSp, iaddrX16:$dst), (STXVP $XSp, iaddrX16:$dst)>;
def : Pat<(store v256i1:$XSp, PDForm:$dst), (PSTXVP $XSp, memri34:$dst)>;
+ def : Pat<(store v256i1:$XSp, xoaddr:$dst), (STXVPX $XSp, xoaddr:$dst)>;
}
+
let Predicates = [HasVSX, IsISAFuture] in {
def : Pat<(v4i32 (rotl v4i32:$vA, v4i32:$vB)), (v4i32 (XVRLW v4i32:$vA,
v4i32:$vB))>;
diff --git a/llvm/test/CodeGen/PowerPC/vec_int_to_double_shuffle.ll b/llvm/test/CodeGen/PowerPC/vec_int_to_double_shuffle.ll
index 9bba60435c62e..5588d07f267c6 100644
--- a/llvm/test/CodeGen/PowerPC/vec_int_to_double_shuffle.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_int_to_double_shuffle.ll
@@ -12,8 +12,8 @@ define <2 x double> @foo(<4 x i32> %s) {
;
; CHECK-BE-LABEL: foo:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: xxsldwi 0, 34, 34, 1
-; CHECK-BE-NEXT: xvcvsxwdp 34, 0
+; CHECK-BE-NEXT: xxsldwi 34, 34, 34, 1
+; CHECK-BE-NEXT: xvcvsxwdp 34, 34
; CHECK-BE-NEXT: blr
entry:
%0 = shufflevector <4 x i32> %s, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
@@ -29,8 +29,8 @@ define <2 x double> @bar(<4 x i32> %s) {
;
; CHECK-BE-LABEL: bar:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: xxsldwi 0, 34, 34, 1
-; CHECK-BE-NEXT: xvcvuxwdp 34, 0
+; CHECK-BE-NEXT: xxsldwi 34, 34, 34, 1
+; CHECK-BE-NEXT: xvcvuxwdp 34, 34
; CHECK-BE-NEXT: blr
entry:
%0 = shufflevector <4 x i32> %s, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
More information about the llvm-commits
mailing list