[llvm] [PowerPC] Use lxvp/stxvp for v256i1 types (PR #184447)

Lei Huang via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 3 21:12:37 PST 2026


https://github.com/lei137 updated https://github.com/llvm/llvm-project/pull/184447

>From 5d37cf4cf0cb679289ba1e45ee1090c952ef8af8 Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Tue, 3 Mar 2026 15:45:59 -0500
Subject: [PATCH 1/3] [PowerPC] Use lxvp/stxvp for v256i1 types

Added patterns to use paired vector instructions (lxvp/lxvpx/stxvp/stxvpx)
for v256i1 operations instead of splitting into two separate vector operations.

Assistend by AI.
---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   |  39 +++--
 llvm/lib/Target/PowerPC/PPCInstrFuture.td     |   8 +
 .../test/CodeGen/PowerPC/dmf-outer-product.ll | 156 ++++++------------
 llvm/test/CodeGen/PowerPC/dmr-copy.ll         |  20 +--
 llvm/test/CodeGen/PowerPC/dmr-enable.ll       |  72 +++-----
 llvm/test/CodeGen/PowerPC/dmr-spill.ll        |   9 +-
 .../CodeGen/PowerPC/mma-acc-copy-hints.ll     |  20 ++-
 llvm/test/CodeGen/PowerPC/mma-acc-memops.ll   |  32 ++--
 .../test/CodeGen/PowerPC/mma-outer-product.ll |  60 +++----
 .../CodeGen/PowerPC/vsx-ldst-with-length.ll   |  24 +--
 10 files changed, 175 insertions(+), 265 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 84d26448a7f4f..3b2b134c64149 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -12107,6 +12107,14 @@ SDValue PPCTargetLowering::DMFInsert1024(const SmallVectorImpl<SDValue> &Pairs,
                  0);
 }
 
+static bool isPCRelNode(SDValue N) {
+  return (N.getOpcode() == PPCISD::MAT_PCREL_ADDR ||
+      isValidPCRelNode<ConstantPoolSDNode>(N) ||
+      isValidPCRelNode<GlobalAddressSDNode>(N) ||
+      isValidPCRelNode<JumpTableSDNode>(N) ||
+      isValidPCRelNode<BlockAddressSDNode>(N));
+}
+
 SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
                                            SelectionDAG &DAG) const {
   SDLoc dl(Op);
@@ -12122,12 +12130,20 @@ SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
     return Op;
 
   // Type v256i1 is used for pairs and v512i1 is used for accumulators.
-  // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
-  // 2 or 4 vsx registers.
   assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
          "Type unsupported without MMA");
   assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
          "Type unsupported without paired vector support");
+
+  // For v256i1 on ISA Future, let the load go through to instruction selection
+  // where it will be matched to lxvp by the instruction patterns, unless it's
+  // a PC-relative load which should use plxv instead.
+  if (VT == MVT::v256i1 && Subtarget.isISAFuture() &&
+      !isPCRelNode(LN->getBasePtr()))
+    return Op;
+
+  // For other cases, create 2 or 4 v16i8 loads to load the pair or accumulator
+  // value in 2 or 4 vsx registers.
   Align Alignment = LN->getAlign();
   SmallVector<SDValue, 4> Loads;
   SmallVector<SDValue, 4> LoadChains;
@@ -12290,12 +12306,20 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
     return Op;
 
   // Type v256i1 is used for pairs and v512i1 is used for accumulators.
-  // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
-  // underlying registers individually.
   assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
          "Type unsupported without MMA");
   assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
          "Type unsupported without paired vector support");
+
+  // For v256i1 on ISA Future, let the store go through to instruction selection
+  // where it will be matched to stxvp by the instruction patterns, unless it's
+  // a PC-relative store which should use pstxv instead.
+  if (StoreVT == MVT::v256i1 && Subtarget.isISAFuture() &&
+      !isPCRelNode(SN->getBasePtr()))
+    return Op;
+
+  // For other cases, create 2 or 4 v16i8 stores to store the pair or
+  // accumulator underlying registers individually.
   Align Alignment = SN->getAlign();
   SmallVector<SDValue, 4> Stores;
   unsigned NumVecs = 2;
@@ -20047,13 +20071,6 @@ static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
   }
 }
 
-static bool isPCRelNode(SDValue N) {
-  return (N.getOpcode() == PPCISD::MAT_PCREL_ADDR ||
-      isValidPCRelNode<ConstantPoolSDNode>(N) ||
-      isValidPCRelNode<GlobalAddressSDNode>(N) ||
-      isValidPCRelNode<JumpTableSDNode>(N) ||
-      isValidPCRelNode<BlockAddressSDNode>(N));
-}
 
 /// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
 /// the address flags of the load/store instruction that is to be matched.
diff --git a/llvm/lib/Target/PowerPC/PPCInstrFuture.td b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
index 717454f78e2a4..2b17bfc4b584b 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrFuture.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
@@ -596,6 +596,14 @@ def : Pat<(int_ppc_vsx_stxvprl v256i1:$XTp, addr:$RA, i64:$RB), (STXVPRL $XTp,
                                                                     $RA, $RB)>;
 def : Pat<(int_ppc_vsx_stxvprll v256i1:$XTp, addr:$RA, i64:$RB), (STXVPRLL $XTp,
                                                                      $RA, $RB)>;
+
+// Regular load/store patterns for v256i1 (for ISA Future)
+let Predicates = [PairedVectorMemops, IsISAFuture] in {
+  def : Pat<(v256i1 (load xoaddr:$src)), (LXVPX xoaddr:$src)>;
+  def : Pat<(v256i1 (load iaddrX16:$src)), (LXVP iaddrX16:$src)>;
+  def : Pat<(store v256i1:$XSp, xoaddr:$dst), (STXVPX $XSp, xoaddr:$dst)>;
+  def : Pat<(store v256i1:$XSp, iaddrX16:$dst), (STXVP $XSp, iaddrX16:$dst)>;
+}
 let Predicates = [HasVSX, IsISAFuture] in {
   def : Pat<(v4i32 (rotl v4i32:$vA, v4i32:$vB)), (v4i32 (XVRLW v4i32:$vA,
                                                      v4i32:$vB))>;
diff --git a/llvm/test/CodeGen/PowerPC/dmf-outer-product.ll b/llvm/test/CodeGen/PowerPC/dmf-outer-product.ll
index ebdc4390bac28..0e7837d5ee659 100644
--- a/llvm/test/CodeGen/PowerPC/dmf-outer-product.ll
+++ b/llvm/test/CodeGen/PowerPC/dmf-outer-product.ll
@@ -11,9 +11,8 @@ declare <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4(<256 x i1>, <16 x i8>)
 define void @test_dmxvi8gerx4(ptr %vpp, ptr %vcp, ptr %resp) {
 ; CHECK-LABEL: test_dmxvi8gerx4:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lxv v2, 16(r3)
+; CHECK-NEXT:    lxvpx vsp34, 0, r3
 ; CHECK-NEXT:    lxv vs0, 0(r4)
-; CHECK-NEXT:    lxv v3, 0(r3)
 ; CHECK-NEXT:    dmxvi8gerx4 dmr0, vsp34, vs0
 ; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
 ; CHECK-NEXT:    stxvp vsp34, 96(r5)
@@ -25,9 +24,8 @@ define void @test_dmxvi8gerx4(ptr %vpp, ptr %vcp, ptr %resp) {
 ;
 ; CHECK-BE-LABEL: test_dmxvi8gerx4:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv v2, 0(r3)
+; CHECK-BE-NEXT:    lxvpx vsp34, 0, r3
 ; CHECK-BE-NEXT:    lxv vs0, 0(r4)
-; CHECK-BE-NEXT:    lxv v3, 16(r3)
 ; CHECK-BE-NEXT:    dmxvi8gerx4 dmr0, vsp34, vs0
 ; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
 ; CHECK-BE-NEXT:    stxvp vsp36, 96(r5)
@@ -53,8 +51,7 @@ define void @test_dmxvi8gerx4pp(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
 ; CHECK-NEXT:    lxvp vsp36, 32(r3)
 ; CHECK-NEXT:    lxvp vsp32, 64(r3)
 ; CHECK-NEXT:    lxvp vsp38, 96(r3)
-; CHECK-NEXT:    lxv v8, 16(r4)
-; CHECK-NEXT:    lxv v9, 0(r4)
+; CHECK-NEXT:    lxvpx vsp40, 0, r4
 ; CHECK-NEXT:    lxv vs0, 0(r5)
 ; CHECK-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
 ; CHECK-NEXT:    dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -73,8 +70,7 @@ define void @test_dmxvi8gerx4pp(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
 ; CHECK-BE-NEXT:    lxvp vsp36, 64(r3)
 ; CHECK-BE-NEXT:    lxvp vsp32, 32(r3)
 ; CHECK-BE-NEXT:    lxvp vsp38, 0(r3)
-; CHECK-BE-NEXT:    lxv v8, 0(r4)
-; CHECK-BE-NEXT:    lxv v9, 16(r4)
+; CHECK-BE-NEXT:    lxvpx vsp40, 0, r4
 ; CHECK-BE-NEXT:    lxv vs0, 0(r5)
 ; CHECK-BE-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
 ; CHECK-BE-NEXT:    dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -104,8 +100,7 @@ define void @test_dmxvi8gerx4spp(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
 ; CHECK-NEXT:    lxvp vsp36, 32(r3)
 ; CHECK-NEXT:    lxvp vsp32, 64(r3)
 ; CHECK-NEXT:    lxvp vsp38, 96(r3)
-; CHECK-NEXT:    lxv v8, 16(r4)
-; CHECK-NEXT:    lxv v9, 0(r4)
+; CHECK-NEXT:    lxvpx vsp40, 0, r4
 ; CHECK-NEXT:    lxv vs0, 0(r5)
 ; CHECK-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
 ; CHECK-NEXT:    dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -124,8 +119,7 @@ define void @test_dmxvi8gerx4spp(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
 ; CHECK-BE-NEXT:    lxvp vsp36, 64(r3)
 ; CHECK-BE-NEXT:    lxvp vsp32, 32(r3)
 ; CHECK-BE-NEXT:    lxvp vsp38, 0(r3)
-; CHECK-BE-NEXT:    lxv v8, 0(r4)
-; CHECK-BE-NEXT:    lxv v9, 16(r4)
+; CHECK-BE-NEXT:    lxvpx vsp40, 0, r4
 ; CHECK-BE-NEXT:    lxv vs0, 0(r5)
 ; CHECK-BE-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
 ; CHECK-BE-NEXT:    dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -155,8 +149,7 @@ define void @test_pmdmxvi8gerx4pp(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
 ; CHECK-NEXT:    lxvp vsp36, 32(r3)
 ; CHECK-NEXT:    lxvp vsp32, 64(r3)
 ; CHECK-NEXT:    lxvp vsp38, 96(r3)
-; CHECK-NEXT:    lxv v8, 16(r4)
-; CHECK-NEXT:    lxv v9, 0(r4)
+; CHECK-NEXT:    lxvpx vsp40, 0, r4
 ; CHECK-NEXT:    lxv vs0, 0(r5)
 ; CHECK-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
 ; CHECK-NEXT:    dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -175,8 +168,7 @@ define void @test_pmdmxvi8gerx4pp(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
 ; CHECK-BE-NEXT:    lxvp vsp36, 64(r3)
 ; CHECK-BE-NEXT:    lxvp vsp32, 32(r3)
 ; CHECK-BE-NEXT:    lxvp vsp38, 0(r3)
-; CHECK-BE-NEXT:    lxv v8, 0(r4)
-; CHECK-BE-NEXT:    lxv v9, 16(r4)
+; CHECK-BE-NEXT:    lxvpx vsp40, 0, r4
 ; CHECK-BE-NEXT:    lxv vs0, 0(r5)
 ; CHECK-BE-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
 ; CHECK-BE-NEXT:    dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -202,9 +194,8 @@ declare <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4(<256 x i1>, <16 x i8>, i32, i32,
 define void @test_pmdmxvi8gerx4(ptr %vpp, ptr %vcp, ptr %resp) {
 ; CHECK-LABEL: test_pmdmxvi8gerx4:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lxv v2, 16(r3)
+; CHECK-NEXT:    lxvpx vsp34, 0, r3
 ; CHECK-NEXT:    lxv vs0, 0(r4)
-; CHECK-NEXT:    lxv v3, 0(r3)
 ; CHECK-NEXT:    pmdmxvi8gerx4 dmr0, vsp34, vs0, 55, 5, 10
 ; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
 ; CHECK-NEXT:    stxvp vsp34, 96(r5)
@@ -216,9 +207,8 @@ define void @test_pmdmxvi8gerx4(ptr %vpp, ptr %vcp, ptr %resp) {
 ;
 ; CHECK-BE-LABEL: test_pmdmxvi8gerx4:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv v2, 0(r3)
+; CHECK-BE-NEXT:    lxvpx vsp34, 0, r3
 ; CHECK-BE-NEXT:    lxv vs0, 0(r4)
-; CHECK-BE-NEXT:    lxv v3, 16(r3)
 ; CHECK-BE-NEXT:    pmdmxvi8gerx4 dmr0, vsp34, vs0, 55, 5, 10
 ; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
 ; CHECK-BE-NEXT:    stxvp vsp36, 96(r5)
@@ -244,8 +234,7 @@ define dso_local void @test_pmdmxvi8gerx4spp(ptr %vop, ptr %vpp, ptr %vcp, ptr %
 ; CHECK-NEXT:    lxvp vsp36, 32(r3)
 ; CHECK-NEXT:    lxvp vsp32, 64(r3)
 ; CHECK-NEXT:    lxvp vsp38, 96(r3)
-; CHECK-NEXT:    lxv v8, 16(r4)
-; CHECK-NEXT:    lxv v9, 0(r4)
+; CHECK-NEXT:    lxvpx vsp40, 0, r4
 ; CHECK-NEXT:    lxv vs0, 0(r5)
 ; CHECK-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
 ; CHECK-NEXT:    dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -264,8 +253,7 @@ define dso_local void @test_pmdmxvi8gerx4spp(ptr %vop, ptr %vpp, ptr %vcp, ptr %
 ; CHECK-BE-NEXT:    lxvp vsp36, 64(r3)
 ; CHECK-BE-NEXT:    lxvp vsp32, 32(r3)
 ; CHECK-BE-NEXT:    lxvp vsp38, 0(r3)
-; CHECK-BE-NEXT:    lxv v8, 0(r4)
-; CHECK-BE-NEXT:    lxv v9, 16(r4)
+; CHECK-BE-NEXT:    lxvpx vsp40, 0, r4
 ; CHECK-BE-NEXT:    lxv vs0, 0(r5)
 ; CHECK-BE-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
 ; CHECK-BE-NEXT:    dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -291,9 +279,8 @@ declare <1024 x i1> @llvm.ppc.mma.dmxvbf16gerx2(<256 x i1>, <16 x i8>)
 define void @test_dmxvbf16gerx2(ptr %vpp, ptr %vcp, ptr %resp) {
 ; CHECK-LABEL: test_dmxvbf16gerx2:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lxv v2, 16(r3)
+; CHECK-NEXT:    lxvpx vsp34, 0, r3
 ; CHECK-NEXT:    lxv vs0, 0(r4)
-; CHECK-NEXT:    lxv v3, 0(r3)
 ; CHECK-NEXT:    dmxvbf16gerx2 dmr0, vsp34, vs0
 ; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
 ; CHECK-NEXT:    stxvp vsp34, 96(r5)
@@ -305,9 +292,8 @@ define void @test_dmxvbf16gerx2(ptr %vpp, ptr %vcp, ptr %resp) {
 ;
 ; CHECK-BE-LABEL: test_dmxvbf16gerx2:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv v2, 0(r3)
+; CHECK-BE-NEXT:    lxvpx vsp34, 0, r3
 ; CHECK-BE-NEXT:    lxv vs0, 0(r4)
-; CHECK-BE-NEXT:    lxv v3, 16(r3)
 ; CHECK-BE-NEXT:    dmxvbf16gerx2 dmr0, vsp34, vs0
 ; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
 ; CHECK-BE-NEXT:    stxvp vsp36, 96(r5)
@@ -333,8 +319,7 @@ define void @test_dmxvbf16gerx2pp(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
 ; CHECK-NEXT:    lxvp vsp36, 32(r3)
 ; CHECK-NEXT:    lxvp vsp32, 64(r3)
 ; CHECK-NEXT:    lxvp vsp38, 96(r3)
-; CHECK-NEXT:    lxv v8, 16(r4)
-; CHECK-NEXT:    lxv v9, 0(r4)
+; CHECK-NEXT:    lxvpx vsp40, 0, r4
 ; CHECK-NEXT:    lxv vs0, 0(r5)
 ; CHECK-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
 ; CHECK-NEXT:    dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -353,8 +338,7 @@ define void @test_dmxvbf16gerx2pp(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
 ; CHECK-BE-NEXT:    lxvp vsp36, 64(r3)
 ; CHECK-BE-NEXT:    lxvp vsp32, 32(r3)
 ; CHECK-BE-NEXT:    lxvp vsp38, 0(r3)
-; CHECK-BE-NEXT:    lxv v8, 0(r4)
-; CHECK-BE-NEXT:    lxv v9, 16(r4)
+; CHECK-BE-NEXT:    lxvpx vsp40, 0, r4
 ; CHECK-BE-NEXT:    lxv vs0, 0(r5)
 ; CHECK-BE-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
 ; CHECK-BE-NEXT:    dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -384,8 +368,7 @@ define void @test_dmxvbf16gerx2pn(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
 ; CHECK-NEXT:    lxvp vsp36, 32(r3)
 ; CHECK-NEXT:    lxvp vsp32, 64(r3)
 ; CHECK-NEXT:    lxvp vsp38, 96(r3)
-; CHECK-NEXT:    lxv v8, 16(r4)
-; CHECK-NEXT:    lxv v9, 0(r4)
+; CHECK-NEXT:    lxvpx vsp40, 0, r4
 ; CHECK-NEXT:    lxv vs0, 0(r5)
 ; CHECK-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
 ; CHECK-NEXT:    dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -404,8 +387,7 @@ define void @test_dmxvbf16gerx2pn(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
 ; CHECK-BE-NEXT:    lxvp vsp36, 64(r3)
 ; CHECK-BE-NEXT:    lxvp vsp32, 32(r3)
 ; CHECK-BE-NEXT:    lxvp vsp38, 0(r3)
-; CHECK-BE-NEXT:    lxv v8, 0(r4)
-; CHECK-BE-NEXT:    lxv v9, 16(r4)
+; CHECK-BE-NEXT:    lxvpx vsp40, 0, r4
 ; CHECK-BE-NEXT:    lxv vs0, 0(r5)
 ; CHECK-BE-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
 ; CHECK-BE-NEXT:    dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -435,8 +417,7 @@ define void @test_dmxvbf16gerx2np(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
 ; CHECK-NEXT:    lxvp vsp36, 32(r3)
 ; CHECK-NEXT:    lxvp vsp32, 64(r3)
 ; CHECK-NEXT:    lxvp vsp38, 96(r3)
-; CHECK-NEXT:    lxv v8, 16(r4)
-; CHECK-NEXT:    lxv v9, 0(r4)
+; CHECK-NEXT:    lxvpx vsp40, 0, r4
 ; CHECK-NEXT:    lxv vs0, 0(r5)
 ; CHECK-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
 ; CHECK-NEXT:    dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -455,8 +436,7 @@ define void @test_dmxvbf16gerx2np(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
 ; CHECK-BE-NEXT:    lxvp vsp36, 64(r3)
 ; CHECK-BE-NEXT:    lxvp vsp32, 32(r3)
 ; CHECK-BE-NEXT:    lxvp vsp38, 0(r3)
-; CHECK-BE-NEXT:    lxv v8, 0(r4)
-; CHECK-BE-NEXT:    lxv v9, 16(r4)
+; CHECK-BE-NEXT:    lxvpx vsp40, 0, r4
 ; CHECK-BE-NEXT:    lxv vs0, 0(r5)
 ; CHECK-BE-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
 ; CHECK-BE-NEXT:    dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -486,8 +466,7 @@ define void @test_dmxvbf16gerx2nn(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
 ; CHECK-NEXT:    lxvp vsp36, 32(r3)
 ; CHECK-NEXT:    lxvp vsp32, 64(r3)
 ; CHECK-NEXT:    lxvp vsp38, 96(r3)
-; CHECK-NEXT:    lxv v8, 16(r4)
-; CHECK-NEXT:    lxv v9, 0(r4)
+; CHECK-NEXT:    lxvpx vsp40, 0, r4
 ; CHECK-NEXT:    lxv vs0, 0(r5)
 ; CHECK-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
 ; CHECK-NEXT:    dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -506,8 +485,7 @@ define void @test_dmxvbf16gerx2nn(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
 ; CHECK-BE-NEXT:    lxvp vsp36, 64(r3)
 ; CHECK-BE-NEXT:    lxvp vsp32, 32(r3)
 ; CHECK-BE-NEXT:    lxvp vsp38, 0(r3)
-; CHECK-BE-NEXT:    lxv v8, 0(r4)
-; CHECK-BE-NEXT:    lxv v9, 16(r4)
+; CHECK-BE-NEXT:    lxvpx vsp40, 0, r4
 ; CHECK-BE-NEXT:    lxv vs0, 0(r5)
 ; CHECK-BE-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
 ; CHECK-BE-NEXT:    dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -533,9 +511,8 @@ declare <1024 x i1> @llvm.ppc.mma.pmdmxvbf16gerx2(<256 x i1>, <16 x i8>, i32, i3
 define void @test_pmdmxvbf16gerx2(ptr %vpp, ptr %vcp, ptr %resp) {
 ; CHECK-LABEL: test_pmdmxvbf16gerx2:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lxv v2, 16(r3)
+; CHECK-NEXT:    lxvpx vsp34, 0, r3
 ; CHECK-NEXT:    lxv vs0, 0(r4)
-; CHECK-NEXT:    lxv v3, 0(r3)
 ; CHECK-NEXT:    pmdmxvbf16gerx2 dmr0, vsp34, vs0, 33, 5, 2
 ; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
 ; CHECK-NEXT:    stxvp vsp34, 96(r5)
@@ -547,9 +524,8 @@ define void @test_pmdmxvbf16gerx2(ptr %vpp, ptr %vcp, ptr %resp) {
 ;
 ; CHECK-BE-LABEL: test_pmdmxvbf16gerx2:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv v2, 0(r3)
+; CHECK-BE-NEXT:    lxvpx vsp34, 0, r3
 ; CHECK-BE-NEXT:    lxv vs0, 0(r4)
-; CHECK-BE-NEXT:    lxv v3, 16(r3)
 ; CHECK-BE-NEXT:    pmdmxvbf16gerx2 dmr0, vsp34, vs0, 33, 5, 2
 ; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
 ; CHECK-BE-NEXT:    stxvp vsp36, 96(r5)
@@ -575,8 +551,7 @@ define void @test_pmdmxvbf16gerx2pp(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
 ; CHECK-NEXT:    lxvp vsp36, 32(r3)
 ; CHECK-NEXT:    lxvp vsp32, 64(r3)
 ; CHECK-NEXT:    lxvp vsp38, 96(r3)
-; CHECK-NEXT:    lxv v8, 16(r4)
-; CHECK-NEXT:    lxv v9, 0(r4)
+; CHECK-NEXT:    lxvpx vsp40, 0, r4
 ; CHECK-NEXT:    lxv vs0, 0(r5)
 ; CHECK-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
 ; CHECK-NEXT:    dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -595,8 +570,7 @@ define void @test_pmdmxvbf16gerx2pp(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
 ; CHECK-BE-NEXT:    lxvp vsp36, 64(r3)
 ; CHECK-BE-NEXT:    lxvp vsp32, 32(r3)
 ; CHECK-BE-NEXT:    lxvp vsp38, 0(r3)
-; CHECK-BE-NEXT:    lxv v8, 0(r4)
-; CHECK-BE-NEXT:    lxv v9, 16(r4)
+; CHECK-BE-NEXT:    lxvpx vsp40, 0, r4
 ; CHECK-BE-NEXT:    lxv vs0, 0(r5)
 ; CHECK-BE-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
 ; CHECK-BE-NEXT:    dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -626,8 +600,7 @@ define void @test_pmdmxvbf16gerx2pn(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
 ; CHECK-NEXT:    lxvp vsp36, 32(r3)
 ; CHECK-NEXT:    lxvp vsp32, 64(r3)
 ; CHECK-NEXT:    lxvp vsp38, 96(r3)
-; CHECK-NEXT:    lxv v8, 16(r4)
-; CHECK-NEXT:    lxv v9, 0(r4)
+; CHECK-NEXT:    lxvpx vsp40, 0, r4
 ; CHECK-NEXT:    lxv vs0, 0(r5)
 ; CHECK-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
 ; CHECK-NEXT:    dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -646,8 +619,7 @@ define void @test_pmdmxvbf16gerx2pn(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
 ; CHECK-BE-NEXT:    lxvp vsp36, 64(r3)
 ; CHECK-BE-NEXT:    lxvp vsp32, 32(r3)
 ; CHECK-BE-NEXT:    lxvp vsp38, 0(r3)
-; CHECK-BE-NEXT:    lxv v8, 0(r4)
-; CHECK-BE-NEXT:    lxv v9, 16(r4)
+; CHECK-BE-NEXT:    lxvpx vsp40, 0, r4
 ; CHECK-BE-NEXT:    lxv vs0, 0(r5)
 ; CHECK-BE-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
 ; CHECK-BE-NEXT:    dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -677,8 +649,7 @@ define void @test_pmdmxvbf16gerx2np(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
 ; CHECK-NEXT:    lxvp vsp36, 32(r3)
 ; CHECK-NEXT:    lxvp vsp32, 64(r3)
 ; CHECK-NEXT:    lxvp vsp38, 96(r3)
-; CHECK-NEXT:    lxv v8, 16(r4)
-; CHECK-NEXT:    lxv v9, 0(r4)
+; CHECK-NEXT:    lxvpx vsp40, 0, r4
 ; CHECK-NEXT:    lxv vs0, 0(r5)
 ; CHECK-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
 ; CHECK-NEXT:    dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -697,8 +668,7 @@ define void @test_pmdmxvbf16gerx2np(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
 ; CHECK-BE-NEXT:    lxvp vsp36, 64(r3)
 ; CHECK-BE-NEXT:    lxvp vsp32, 32(r3)
 ; CHECK-BE-NEXT:    lxvp vsp38, 0(r3)
-; CHECK-BE-NEXT:    lxv v8, 0(r4)
-; CHECK-BE-NEXT:    lxv v9, 16(r4)
+; CHECK-BE-NEXT:    lxvpx vsp40, 0, r4
 ; CHECK-BE-NEXT:    lxv vs0, 0(r5)
 ; CHECK-BE-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
 ; CHECK-BE-NEXT:    dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -728,8 +698,7 @@ define void @test_pmdmxvbf16gerx2nn(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
 ; CHECK-NEXT:    lxvp vsp36, 32(r3)
 ; CHECK-NEXT:    lxvp vsp32, 64(r3)
 ; CHECK-NEXT:    lxvp vsp38, 96(r3)
-; CHECK-NEXT:    lxv v8, 16(r4)
-; CHECK-NEXT:    lxv v9, 0(r4)
+; CHECK-NEXT:    lxvpx vsp40, 0, r4
 ; CHECK-NEXT:    lxv vs0, 0(r5)
 ; CHECK-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
 ; CHECK-NEXT:    dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -748,8 +717,7 @@ define void @test_pmdmxvbf16gerx2nn(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
 ; CHECK-BE-NEXT:    lxvp vsp36, 64(r3)
 ; CHECK-BE-NEXT:    lxvp vsp32, 32(r3)
 ; CHECK-BE-NEXT:    lxvp vsp38, 0(r3)
-; CHECK-BE-NEXT:    lxv v8, 0(r4)
-; CHECK-BE-NEXT:    lxv v9, 16(r4)
+; CHECK-BE-NEXT:    lxvpx vsp40, 0, r4
 ; CHECK-BE-NEXT:    lxv vs0, 0(r5)
 ; CHECK-BE-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
 ; CHECK-BE-NEXT:    dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -774,9 +742,8 @@ declare <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2(<256 x i1>, <16 x i8>)
 define void @test_dmxvf16gerx2(ptr %vpp, ptr %vcp, ptr %resp) {
 ; CHECK-LABEL: test_dmxvf16gerx2:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lxv v2, 16(r3)
+; CHECK-NEXT:    lxvpx vsp34, 0, r3
 ; CHECK-NEXT:    lxv vs0, 0(r4)
-; CHECK-NEXT:    lxv v3, 0(r3)
 ; CHECK-NEXT:    dmxvf16gerx2 dmr0, vsp34, vs0
 ; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
 ; CHECK-NEXT:    stxvp vsp34, 96(r5)
@@ -788,9 +755,8 @@ define void @test_dmxvf16gerx2(ptr %vpp, ptr %vcp, ptr %resp) {
 ;
 ; CHECK-BE-LABEL: test_dmxvf16gerx2:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv v2, 0(r3)
+; CHECK-BE-NEXT:    lxvpx vsp34, 0, r3
 ; CHECK-BE-NEXT:    lxv vs0, 0(r4)
-; CHECK-BE-NEXT:    lxv v3, 16(r3)
 ; CHECK-BE-NEXT:    dmxvf16gerx2 dmr0, vsp34, vs0
 ; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
 ; CHECK-BE-NEXT:    stxvp vsp36, 96(r5)
@@ -816,8 +782,7 @@ define void @test_dmxvf16gerx2pp(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
 ; CHECK-NEXT:    lxvp vsp36, 32(r3)
 ; CHECK-NEXT:    lxvp vsp32, 64(r3)
 ; CHECK-NEXT:    lxvp vsp38, 96(r3)
-; CHECK-NEXT:    lxv v8, 16(r4)
-; CHECK-NEXT:    lxv v9, 0(r4)
+; CHECK-NEXT:    lxvpx vsp40, 0, r4
 ; CHECK-NEXT:    lxv vs0, 0(r5)
 ; CHECK-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
 ; CHECK-NEXT:    dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -836,8 +801,7 @@ define void @test_dmxvf16gerx2pp(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
 ; CHECK-BE-NEXT:    lxvp vsp36, 64(r3)
 ; CHECK-BE-NEXT:    lxvp vsp32, 32(r3)
 ; CHECK-BE-NEXT:    lxvp vsp38, 0(r3)
-; CHECK-BE-NEXT:    lxv v8, 0(r4)
-; CHECK-BE-NEXT:    lxv v9, 16(r4)
+; CHECK-BE-NEXT:    lxvpx vsp40, 0, r4
 ; CHECK-BE-NEXT:    lxv vs0, 0(r5)
 ; CHECK-BE-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
 ; CHECK-BE-NEXT:    dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -867,8 +831,7 @@ define void @test_dmxvf16gerx2pn(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
 ; CHECK-NEXT:    lxvp vsp36, 32(r3)
 ; CHECK-NEXT:    lxvp vsp32, 64(r3)
 ; CHECK-NEXT:    lxvp vsp38, 96(r3)
-; CHECK-NEXT:    lxv v8, 16(r4)
-; CHECK-NEXT:    lxv v9, 0(r4)
+; CHECK-NEXT:    lxvpx vsp40, 0, r4
 ; CHECK-NEXT:    lxv vs0, 0(r5)
 ; CHECK-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
 ; CHECK-NEXT:    dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -887,8 +850,7 @@ define void @test_dmxvf16gerx2pn(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
 ; CHECK-BE-NEXT:    lxvp vsp36, 64(r3)
 ; CHECK-BE-NEXT:    lxvp vsp32, 32(r3)
 ; CHECK-BE-NEXT:    lxvp vsp38, 0(r3)
-; CHECK-BE-NEXT:    lxv v8, 0(r4)
-; CHECK-BE-NEXT:    lxv v9, 16(r4)
+; CHECK-BE-NEXT:    lxvpx vsp40, 0, r4
 ; CHECK-BE-NEXT:    lxv vs0, 0(r5)
 ; CHECK-BE-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
 ; CHECK-BE-NEXT:    dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -918,8 +880,7 @@ define void @test_dmxvf16gerx2np(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
 ; CHECK-NEXT:    lxvp vsp36, 32(r3)
 ; CHECK-NEXT:    lxvp vsp32, 64(r3)
 ; CHECK-NEXT:    lxvp vsp38, 96(r3)
-; CHECK-NEXT:    lxv v8, 16(r4)
-; CHECK-NEXT:    lxv v9, 0(r4)
+; CHECK-NEXT:    lxvpx vsp40, 0, r4
 ; CHECK-NEXT:    lxv vs0, 0(r5)
 ; CHECK-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
 ; CHECK-NEXT:    dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -938,8 +899,7 @@ define void @test_dmxvf16gerx2np(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
 ; CHECK-BE-NEXT:    lxvp vsp36, 64(r3)
 ; CHECK-BE-NEXT:    lxvp vsp32, 32(r3)
 ; CHECK-BE-NEXT:    lxvp vsp38, 0(r3)
-; CHECK-BE-NEXT:    lxv v8, 0(r4)
-; CHECK-BE-NEXT:    lxv v9, 16(r4)
+; CHECK-BE-NEXT:    lxvpx vsp40, 0, r4
 ; CHECK-BE-NEXT:    lxv vs0, 0(r5)
 ; CHECK-BE-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
 ; CHECK-BE-NEXT:    dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -969,8 +929,7 @@ define void @test_dmxvf16gerx2nn(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
 ; CHECK-NEXT:    lxvp vsp36, 32(r3)
 ; CHECK-NEXT:    lxvp vsp32, 64(r3)
 ; CHECK-NEXT:    lxvp vsp38, 96(r3)
-; CHECK-NEXT:    lxv v8, 16(r4)
-; CHECK-NEXT:    lxv v9, 0(r4)
+; CHECK-NEXT:    lxvpx vsp40, 0, r4
 ; CHECK-NEXT:    lxv vs0, 0(r5)
 ; CHECK-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
 ; CHECK-NEXT:    dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -989,8 +948,7 @@ define void @test_dmxvf16gerx2nn(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
 ; CHECK-BE-NEXT:    lxvp vsp36, 64(r3)
 ; CHECK-BE-NEXT:    lxvp vsp32, 32(r3)
 ; CHECK-BE-NEXT:    lxvp vsp38, 0(r3)
-; CHECK-BE-NEXT:    lxv v8, 0(r4)
-; CHECK-BE-NEXT:    lxv v9, 16(r4)
+; CHECK-BE-NEXT:    lxvpx vsp40, 0, r4
 ; CHECK-BE-NEXT:    lxv vs0, 0(r5)
 ; CHECK-BE-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
 ; CHECK-BE-NEXT:    dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -1016,9 +974,8 @@ declare <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2(<256 x i1>, <16 x i8>, i32, i32
 define void @test_pmdmxvf16gerx2(ptr %vpp, ptr %vcp, ptr %resp) {
 ; CHECK-LABEL: test_pmdmxvf16gerx2:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lxv v2, 16(r3)
+; CHECK-NEXT:    lxvpx vsp34, 0, r3
 ; CHECK-NEXT:    lxv vs0, 0(r4)
-; CHECK-NEXT:    lxv v3, 0(r3)
 ; CHECK-NEXT:    pmdmxvf16gerx2 dmr0, vsp34, vs0, 33, 5, 2
 ; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
 ; CHECK-NEXT:    stxvp vsp34, 96(r5)
@@ -1030,9 +987,8 @@ define void @test_pmdmxvf16gerx2(ptr %vpp, ptr %vcp, ptr %resp) {
 ;
 ; CHECK-BE-LABEL: test_pmdmxvf16gerx2:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv v2, 0(r3)
+; CHECK-BE-NEXT:    lxvpx vsp34, 0, r3
 ; CHECK-BE-NEXT:    lxv vs0, 0(r4)
-; CHECK-BE-NEXT:    lxv v3, 16(r3)
 ; CHECK-BE-NEXT:    pmdmxvf16gerx2 dmr0, vsp34, vs0, 33, 5, 2
 ; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
 ; CHECK-BE-NEXT:    stxvp vsp36, 96(r5)
@@ -1058,8 +1014,7 @@ define void @test_pmdmxvf16gerx2pp(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
 ; CHECK-NEXT:    lxvp vsp36, 32(r3)
 ; CHECK-NEXT:    lxvp vsp32, 64(r3)
 ; CHECK-NEXT:    lxvp vsp38, 96(r3)
-; CHECK-NEXT:    lxv v8, 16(r4)
-; CHECK-NEXT:    lxv v9, 0(r4)
+; CHECK-NEXT:    lxvpx vsp40, 0, r4
 ; CHECK-NEXT:    lxv vs0, 0(r5)
 ; CHECK-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
 ; CHECK-NEXT:    dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -1078,8 +1033,7 @@ define void @test_pmdmxvf16gerx2pp(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
 ; CHECK-BE-NEXT:    lxvp vsp36, 64(r3)
 ; CHECK-BE-NEXT:    lxvp vsp32, 32(r3)
 ; CHECK-BE-NEXT:    lxvp vsp38, 0(r3)
-; CHECK-BE-NEXT:    lxv v8, 0(r4)
-; CHECK-BE-NEXT:    lxv v9, 16(r4)
+; CHECK-BE-NEXT:    lxvpx vsp40, 0, r4
 ; CHECK-BE-NEXT:    lxv vs0, 0(r5)
 ; CHECK-BE-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
 ; CHECK-BE-NEXT:    dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -1109,8 +1063,7 @@ define void @test_pmdmxvf16gerx2pn(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
 ; CHECK-NEXT:    lxvp vsp36, 32(r3)
 ; CHECK-NEXT:    lxvp vsp32, 64(r3)
 ; CHECK-NEXT:    lxvp vsp38, 96(r3)
-; CHECK-NEXT:    lxv v8, 16(r4)
-; CHECK-NEXT:    lxv v9, 0(r4)
+; CHECK-NEXT:    lxvpx vsp40, 0, r4
 ; CHECK-NEXT:    lxv vs0, 0(r5)
 ; CHECK-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
 ; CHECK-NEXT:    dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -1129,8 +1082,7 @@ define void @test_pmdmxvf16gerx2pn(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
 ; CHECK-BE-NEXT:    lxvp vsp36, 64(r3)
 ; CHECK-BE-NEXT:    lxvp vsp32, 32(r3)
 ; CHECK-BE-NEXT:    lxvp vsp38, 0(r3)
-; CHECK-BE-NEXT:    lxv v8, 0(r4)
-; CHECK-BE-NEXT:    lxv v9, 16(r4)
+; CHECK-BE-NEXT:    lxvpx vsp40, 0, r4
 ; CHECK-BE-NEXT:    lxv vs0, 0(r5)
 ; CHECK-BE-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
 ; CHECK-BE-NEXT:    dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -1160,8 +1112,7 @@ define void @test_pmdmxvf16gerx2np(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
 ; CHECK-NEXT:    lxvp vsp36, 32(r3)
 ; CHECK-NEXT:    lxvp vsp32, 64(r3)
 ; CHECK-NEXT:    lxvp vsp38, 96(r3)
-; CHECK-NEXT:    lxv v8, 16(r4)
-; CHECK-NEXT:    lxv v9, 0(r4)
+; CHECK-NEXT:    lxvpx vsp40, 0, r4
 ; CHECK-NEXT:    lxv vs0, 0(r5)
 ; CHECK-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
 ; CHECK-NEXT:    dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -1180,8 +1131,7 @@ define void @test_pmdmxvf16gerx2np(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
 ; CHECK-BE-NEXT:    lxvp vsp36, 64(r3)
 ; CHECK-BE-NEXT:    lxvp vsp32, 32(r3)
 ; CHECK-BE-NEXT:    lxvp vsp38, 0(r3)
-; CHECK-BE-NEXT:    lxv v8, 0(r4)
-; CHECK-BE-NEXT:    lxv v9, 16(r4)
+; CHECK-BE-NEXT:    lxvpx vsp40, 0, r4
 ; CHECK-BE-NEXT:    lxv vs0, 0(r5)
 ; CHECK-BE-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
 ; CHECK-BE-NEXT:    dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -1211,8 +1161,7 @@ define void @test_pmdmxvf16gerx2nn(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
 ; CHECK-NEXT:    lxvp vsp36, 32(r3)
 ; CHECK-NEXT:    lxvp vsp32, 64(r3)
 ; CHECK-NEXT:    lxvp vsp38, 96(r3)
-; CHECK-NEXT:    lxv v8, 16(r4)
-; CHECK-NEXT:    lxv v9, 0(r4)
+; CHECK-NEXT:    lxvpx vsp40, 0, r4
 ; CHECK-NEXT:    lxv vs0, 0(r5)
 ; CHECK-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
 ; CHECK-NEXT:    dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -1231,8 +1180,7 @@ define void @test_pmdmxvf16gerx2nn(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
 ; CHECK-BE-NEXT:    lxvp vsp36, 64(r3)
 ; CHECK-BE-NEXT:    lxvp vsp32, 32(r3)
 ; CHECK-BE-NEXT:    lxvp vsp38, 0(r3)
-; CHECK-BE-NEXT:    lxv v8, 0(r4)
-; CHECK-BE-NEXT:    lxv v9, 16(r4)
+; CHECK-BE-NEXT:    lxvpx vsp40, 0, r4
 ; CHECK-BE-NEXT:    lxv vs0, 0(r5)
 ; CHECK-BE-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
 ; CHECK-BE-NEXT:    dmxxinstdmr512 wacc0, vsp38, vsp32, 0
diff --git a/llvm/test/CodeGen/PowerPC/dmr-copy.ll b/llvm/test/CodeGen/PowerPC/dmr-copy.ll
index d5a24309f94d5..69aaba58e4c15 100644
--- a/llvm/test/CodeGen/PowerPC/dmr-copy.ll
+++ b/llvm/test/CodeGen/PowerPC/dmr-copy.ll
@@ -37,12 +37,10 @@ define void @test_wacc_copy(ptr noundef %vdmrp, ptr noundef %vpp, <16 x i8> noun
 ; CHECK-NEXT:    stxvp vsp34, 160(r31)
 ; CHECK-NEXT:    stxvp vsp36, 128(r31)
 ; CHECK-NEXT:    ld r3, 352(r31)
-; CHECK-NEXT:    lxv v2, 16(r3)
-; CHECK-NEXT:    lxv v3, 0(r3)
-; CHECK-NEXT:    stxv v2, 112(r31)
-; CHECK-NEXT:    stxv v3, 96(r31)
-; CHECK-NEXT:    lxv v2, 112(r31)
-; CHECK-NEXT:    lxv v3, 96(r31)
+; CHECK-NEXT:    lxvpx vsp34, 0, r3
+; CHECK-NEXT:    addi r3, r31, 96
+; CHECK-NEXT:    stxvpx vsp34, 0, r3
+; CHECK-NEXT:    lxvpx vsp34, 0, r3
 ; CHECK-NEXT:    lxv vs0, 336(r31)
 ; CHECK-NEXT:    dmxvi8gerx4 dmr0, vsp34, vs0
 ; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
@@ -96,12 +94,10 @@ define void @test_wacc_copy(ptr noundef %vdmrp, ptr noundef %vpp, <16 x i8> noun
 ; CHECK-BE-NEXT:    stxvp vsp36, 160(r31)
 ; CHECK-BE-NEXT:    stxvp vsp34, 128(r31)
 ; CHECK-BE-NEXT:    ld r3, 352(r31)
-; CHECK-BE-NEXT:    lxv v2, 0(r3)
-; CHECK-BE-NEXT:    lxv v3, 16(r3)
-; CHECK-BE-NEXT:    stxv v3, 112(r31)
-; CHECK-BE-NEXT:    stxv v2, 96(r31)
-; CHECK-BE-NEXT:    lxv v2, 96(r31)
-; CHECK-BE-NEXT:    lxv v3, 112(r31)
+; CHECK-BE-NEXT:    lxvpx vsp34, 0, r3
+; CHECK-BE-NEXT:    addi r3, r31, 96
+; CHECK-BE-NEXT:    stxvpx vsp34, 0, r3
+; CHECK-BE-NEXT:    lxvpx vsp34, 0, r3
 ; CHECK-BE-NEXT:    lxv vs0, 336(r31)
 ; CHECK-BE-NEXT:    dmxvi8gerx4 dmr0, vsp34, vs0
 ; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
diff --git a/llvm/test/CodeGen/PowerPC/dmr-enable.ll b/llvm/test/CodeGen/PowerPC/dmr-enable.ll
index fe76ddc19e817..347301ee719b0 100644
--- a/llvm/test/CodeGen/PowerPC/dmr-enable.ll
+++ b/llvm/test/CodeGen/PowerPC/dmr-enable.ll
@@ -134,22 +134,18 @@ define void @text512(ptr %vp1, ptr %rp1, ptr %rp2, ptr %rp3, ptr %rp4)  {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    dmsetdmrz dmr0
 ; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
-; CHECK-NEXT:    stxv v2, 16(r4)
-; CHECK-NEXT:    stxv v3, 0(r4)
+; CHECK-NEXT:    stxvpx vsp34, 0, r4
 ; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
-; CHECK-NEXT:    stxv v2, 16(r6)
-; CHECK-NEXT:    stxv v3, 0(r6)
+; CHECK-NEXT:    stxvpx vsp34, 0, r6
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: text512:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    dmsetdmrz dmr0
 ; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
-; CHECK-BE-NEXT:    stxv v3, 16(r4)
-; CHECK-BE-NEXT:    stxv v2, 0(r4)
+; CHECK-BE-NEXT:    stxvpx vsp34, 0, r4
 ; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
-; CHECK-BE-NEXT:    stxv v3, 16(r6)
-; CHECK-BE-NEXT:    stxv v2, 0(r6)
+; CHECK-BE-NEXT:    stxvpx vsp34, 0, r6
 ; CHECK-BE-NEXT:    blr
 entry:
   %z = call <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
@@ -167,34 +163,26 @@ define void @text256(ptr %vp1, ptr %rp1, ptr %rp2, ptr %rp3, ptr %rp4)  {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    dmsetdmrz dmr0
 ; CHECK-NEXT:    dmxxextfdmr256 vsp34, dmr0, 0
-; CHECK-NEXT:    stxv v2, 16(r4)
-; CHECK-NEXT:    stxv v3, 0(r4)
+; CHECK-NEXT:    stxvpx vsp34, 0, r4
 ; CHECK-NEXT:    dmxxextfdmr256 vsp34, dmr0, 1
-; CHECK-NEXT:    stxv v2, 16(r5)
-; CHECK-NEXT:    stxv v3, 0(r5)
+; CHECK-NEXT:    stxvpx vsp34, 0, r5
 ; CHECK-NEXT:    dmxxextfdmr256 vsp34, dmr0, 2
-; CHECK-NEXT:    stxv v2, 16(r6)
-; CHECK-NEXT:    stxv v3, 0(r6)
+; CHECK-NEXT:    stxvpx vsp34, 0, r6
 ; CHECK-NEXT:    dmxxextfdmr256 vsp34, dmr0, 3
-; CHECK-NEXT:    stxv v2, 16(r7)
-; CHECK-NEXT:    stxv v3, 0(r7)
+; CHECK-NEXT:    stxvpx vsp34, 0, r7
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: text256:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    dmsetdmrz dmr0
 ; CHECK-BE-NEXT:    dmxxextfdmr256 vsp34, dmr0, 0
-; CHECK-BE-NEXT:    stxv v3, 16(r4)
-; CHECK-BE-NEXT:    stxv v2, 0(r4)
+; CHECK-BE-NEXT:    stxvpx vsp34, 0, r4
 ; CHECK-BE-NEXT:    dmxxextfdmr256 vsp34, dmr0, 1
-; CHECK-BE-NEXT:    stxv v3, 16(r5)
-; CHECK-BE-NEXT:    stxv v2, 0(r5)
+; CHECK-BE-NEXT:    stxvpx vsp34, 0, r5
 ; CHECK-BE-NEXT:    dmxxextfdmr256 vsp34, dmr0, 2
-; CHECK-BE-NEXT:    stxv v3, 16(r6)
-; CHECK-BE-NEXT:    stxv v2, 0(r6)
+; CHECK-BE-NEXT:    stxvpx vsp34, 0, r6
 ; CHECK-BE-NEXT:    dmxxextfdmr256 vsp34, dmr0, 3
-; CHECK-BE-NEXT:    stxv v3, 16(r7)
-; CHECK-BE-NEXT:    stxv v2, 0(r7)
+; CHECK-BE-NEXT:    stxvpx vsp34, 0, r7
 ; CHECK-BE-NEXT:    blr
 entry:
   %z = call <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
@@ -212,10 +200,8 @@ entry:
 define void @tins512(ptr %vp1, ptr %vp2, ptr %vp3, ptr %vp4, ptr %rp1, ptr %rp2)  {
 ; CHECK-LABEL: tins512:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lxv v2, 16(r3)
-; CHECK-NEXT:    lxv v3, 0(r3)
-; CHECK-NEXT:    lxv v4, 16(r4)
-; CHECK-NEXT:    lxv v5, 0(r4)
+; CHECK-NEXT:    lxvpx vsp34, 0, r3
+; CHECK-NEXT:    lxvpx vsp36, 0, r4
 ; CHECK-NEXT:    dmsetdmrz dmr0
 ; CHECK-NEXT:    dmxxinstdmr512 wacc0, vsp34, vsp36, 0
 ; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
@@ -224,10 +210,8 @@ define void @tins512(ptr %vp1, ptr %vp2, ptr %vp3, ptr %vp4, ptr %rp1, ptr %rp2)
 ; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
 ; CHECK-NEXT:    stxvp vsp34, 32(r7)
 ; CHECK-NEXT:    stxvp vsp36, 0(r7)
-; CHECK-NEXT:    lxv v2, 16(r5)
-; CHECK-NEXT:    lxv v4, 16(r6)
-; CHECK-NEXT:    lxv v3, 0(r5)
-; CHECK-NEXT:    lxv v5, 0(r6)
+; CHECK-NEXT:    lxvpx vsp34, 0, r5
+; CHECK-NEXT:    lxvpx vsp36, 0, r6
 ; CHECK-NEXT:    dmxxextfdmr512 vsp32, vsp38, wacc0, 0
 ; CHECK-NEXT:    stxvp vsp32, 96(r8)
 ; CHECK-NEXT:    stxvp vsp38, 64(r8)
@@ -239,10 +223,8 @@ define void @tins512(ptr %vp1, ptr %vp2, ptr %vp3, ptr %vp4, ptr %rp1, ptr %rp2)
 ;
 ; CHECK-BE-LABEL: tins512:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv v2, 0(r3)
-; CHECK-BE-NEXT:    lxv v4, 0(r4)
-; CHECK-BE-NEXT:    lxv v3, 16(r3)
-; CHECK-BE-NEXT:    lxv v5, 16(r4)
+; CHECK-BE-NEXT:    lxvpx vsp34, 0, r3
+; CHECK-BE-NEXT:    lxvpx vsp36, 0, r4
 ; CHECK-BE-NEXT:    dmsetdmrz dmr0
 ; CHECK-BE-NEXT:    dmxxextfdmr512 vsp32, vsp38, wacc_hi0, 1
 ; CHECK-BE-NEXT:    stxvp vsp38, 96(r7)
@@ -251,10 +233,8 @@ define void @tins512(ptr %vp1, ptr %vp2, ptr %vp3, ptr %vp4, ptr %rp1, ptr %rp2)
 ; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
 ; CHECK-BE-NEXT:    stxvp vsp36, 32(r7)
 ; CHECK-BE-NEXT:    stxvp vsp34, 0(r7)
-; CHECK-BE-NEXT:    lxv v2, 0(r5)
-; CHECK-BE-NEXT:    lxv v4, 0(r6)
-; CHECK-BE-NEXT:    lxv v3, 16(r5)
-; CHECK-BE-NEXT:    lxv v5, 16(r6)
+; CHECK-BE-NEXT:    lxvpx vsp34, 0, r5
+; CHECK-BE-NEXT:    lxvpx vsp36, 0, r6
 ; CHECK-BE-NEXT:    dmxxinstdmr512 wacc_hi0, vsp34, vsp36, 1
 ; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
 ; CHECK-BE-NEXT:    stxvp vsp36, 96(r8)
@@ -279,8 +259,7 @@ entry:
 define void @tins256(ptr %vp1, ptr %vp2, ptr %vp3, ptr %vp4, ptr %rp1, ptr %rp2, ptr %rp3, ptr %rp4)  {
 ; CHECK-LABEL: tins256:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lxv v2, 16(r3)
-; CHECK-NEXT:    lxv v3, 0(r3)
+; CHECK-NEXT:    lxvpx vsp34, 0, r3
 ; CHECK-NEXT:    dmsetdmrz dmr0
 ; CHECK-NEXT:    dmxxinstdmr256 dmr0, vsp34, 0
 ; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
@@ -289,8 +268,7 @@ define void @tins256(ptr %vp1, ptr %vp2, ptr %vp3, ptr %vp4, ptr %rp1, ptr %rp2,
 ; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
 ; CHECK-NEXT:    stxvp vsp34, 32(r7)
 ; CHECK-NEXT:    stxvp vsp36, 0(r7)
-; CHECK-NEXT:    lxv v2, 16(r4)
-; CHECK-NEXT:    lxv v3, 0(r4)
+; CHECK-NEXT:    lxvpx vsp34, 0, r4
 ; CHECK-NEXT:    dmxxinstdmr256 dmr0, vsp34, 1
 ; CHECK-NEXT:    dmxxextfdmr512 vsp36, vsp32, wacc0, 0
 ; CHECK-NEXT:    stxvp vsp36, 96(r8)
@@ -316,8 +294,7 @@ define void @tins256(ptr %vp1, ptr %vp2, ptr %vp3, ptr %vp4, ptr %rp1, ptr %rp2,
 ;
 ; CHECK-BE-LABEL: tins256:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv v2, 0(r3)
-; CHECK-BE-NEXT:    lxv v3, 16(r3)
+; CHECK-BE-NEXT:    lxvpx vsp34, 0, r3
 ; CHECK-BE-NEXT:    dmsetdmrz dmr0
 ; CHECK-BE-NEXT:    dmxxinstdmr256 dmr0, vsp34, 0
 ; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
@@ -326,8 +303,7 @@ define void @tins256(ptr %vp1, ptr %vp2, ptr %vp3, ptr %vp4, ptr %rp1, ptr %rp2,
 ; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
 ; CHECK-BE-NEXT:    stxvp vsp36, 32(r7)
 ; CHECK-BE-NEXT:    stxvp vsp34, 0(r7)
-; CHECK-BE-NEXT:    lxv v2, 0(r4)
-; CHECK-BE-NEXT:    lxv v3, 16(r4)
+; CHECK-BE-NEXT:    lxvpx vsp34, 0, r4
 ; CHECK-BE-NEXT:    dmxxextfdmr512 vsp36, vsp32, wacc_hi0, 1
 ; CHECK-BE-NEXT:    stxvp vsp32, 96(r8)
 ; CHECK-BE-NEXT:    stxvp vsp36, 64(r8)
diff --git a/llvm/test/CodeGen/PowerPC/dmr-spill.ll b/llvm/test/CodeGen/PowerPC/dmr-spill.ll
index e1d388354e198..9f922f95d40f5 100644
--- a/llvm/test/CodeGen/PowerPC/dmr-spill.ll
+++ b/llvm/test/CodeGen/PowerPC/dmr-spill.ll
@@ -23,8 +23,7 @@ define void @spillDMRreg(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) nounwind {
 ; CHECK-NEXT:    lxvp vsp36, 32(r3)
 ; CHECK-NEXT:    lxvp vsp32, 64(r3)
 ; CHECK-NEXT:    lxvp vsp38, 96(r3)
-; CHECK-NEXT:    lxv v8, 16(r4)
-; CHECK-NEXT:    lxv v9, 0(r4)
+; CHECK-NEXT:    lxvpx vsp40, 0, r4
 ; CHECK-NEXT:    lxv vs0, 0(r5)
 ; CHECK-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
 ; CHECK-NEXT:    dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -65,8 +64,7 @@ define void @spillDMRreg(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) nounwind {
 ; AIX-NEXT:    lxvp vsp36, 64(r3)
 ; AIX-NEXT:    lxvp vsp32, 32(r3)
 ; AIX-NEXT:    lxvp vsp38, 0(r3)
-; AIX-NEXT:    lxv v8, 0(r4)
-; AIX-NEXT:    lxv v9, 16(r4)
+; AIX-NEXT:    lxvpx vsp40, 0, r4
 ; AIX-NEXT:    lxv vs0, 0(r5)
 ; AIX-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
 ; AIX-NEXT:    dmxxinstdmr512 wacc0, vsp38, vsp32, 0
@@ -108,8 +106,7 @@ define void @spillDMRreg(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) nounwind {
 ; AIX32-NEXT:    lxvp vsp36, 64(r3)
 ; AIX32-NEXT:    lxvp vsp32, 32(r3)
 ; AIX32-NEXT:    lxvp vsp38, 0(r3)
-; AIX32-NEXT:    lxv v8, 0(r4)
-; AIX32-NEXT:    lxv v9, 16(r4)
+; AIX32-NEXT:    lxvpx vsp40, 0, r4
 ; AIX32-NEXT:    lxv vs0, 0(r5)
 ; AIX32-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
 ; AIX32-NEXT:    dmxxinstdmr512 wacc0, vsp38, vsp32, 0
diff --git a/llvm/test/CodeGen/PowerPC/mma-acc-copy-hints.ll b/llvm/test/CodeGen/PowerPC/mma-acc-copy-hints.ll
index 94121f09e36be..4c029c0b90dd9 100644
--- a/llvm/test/CodeGen/PowerPC/mma-acc-copy-hints.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-acc-copy-hints.ll
@@ -111,17 +111,18 @@ define void @testMultiply(ptr nocapture noundef readonly %a, ptr nocapture nound
 ; CHECK-LE-WACC-NEXT:    stxv v31, -48(r30) # 16-byte Folded Spill
 ; CHECK-LE-WACC-NEXT:    lxv v31, 0(r3)
 ; CHECK-LE-WACC-NEXT:    lxv v30, 0(r4)
-; CHECK-LE-WACC-NEXT:    addi r3, r1, 32
+; CHECK-LE-WACC-NEXT:    std r28, -32(r30) # 8-byte Folded Spill
 ; CHECK-LE-WACC-NEXT:    std r29, -24(r30) # 8-byte Folded Spill
+; CHECK-LE-WACC-NEXT:    addi r28, r1, 32
+; CHECK-LE-WACC-NEXT:    mr r3, r28
 ; CHECK-LE-WACC-NEXT:    vmr v2, v31
 ; CHECK-LE-WACC-NEXT:    vmr v3, v30
 ; CHECK-LE-WACC-NEXT:    mr r29, r5
 ; CHECK-LE-WACC-NEXT:    bl _Z15buildVectorPairPu13__vector_pairDv16_hS0_ at notoc
 ; CHECK-LE-WACC-NEXT:    dmxxsetaccz wacc0
 ; CHECK-LE-WACC-NEXT:    xvf32gerpp wacc0, v31, v30
-; CHECK-LE-WACC-NEXT:    lxv vs0, 48(r1)
-; CHECK-LE-WACC-NEXT:    lxv vs1, 32(r1)
-; CHECK-LE-WACC-NEXT:    xvf32gerpp wacc0, vs1, vs0
+; CHECK-LE-WACC-NEXT:    lxvpx vsp34, 0, r28
+; CHECK-LE-WACC-NEXT:    xvf32gerpp wacc0, v3, v2
 ; CHECK-LE-WACC-NEXT:    dmxxextfdmr512 vsp36, vsp34, wacc0, 0
 ; CHECK-LE-WACC-NEXT:    dmxxinstdmr512 wacc0, vsp36, vsp34, 0
 ; CHECK-LE-WACC-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
@@ -132,6 +133,7 @@ define void @testMultiply(ptr nocapture noundef readonly %a, ptr nocapture nound
 ; CHECK-LE-WACC-NEXT:    lxv v31, -48(r30) # 16-byte Folded Reload
 ; CHECK-LE-WACC-NEXT:    lxv v30, -64(r30) # 16-byte Folded Reload
 ; CHECK-LE-WACC-NEXT:    ld r29, -24(r30) # 8-byte Folded Reload
+; CHECK-LE-WACC-NEXT:    ld r28, -32(r30) # 8-byte Folded Reload
 ; CHECK-LE-WACC-NEXT:    mr r1, r30
 ; CHECK-LE-WACC-NEXT:    ld r0, 16(r1)
 ; CHECK-LE-WACC-NEXT:    ld r30, -16(r1)
@@ -151,8 +153,10 @@ define void @testMultiply(ptr nocapture noundef readonly %a, ptr nocapture nound
 ; CHECK-BE-WACC-NEXT:    stxv v31, -48(r30) # 16-byte Folded Spill
 ; CHECK-BE-WACC-NEXT:    lxv v31, 0(r3)
 ; CHECK-BE-WACC-NEXT:    lxv v30, 0(r4)
-; CHECK-BE-WACC-NEXT:    addi r3, r1, 128
+; CHECK-BE-WACC-NEXT:    std r28, -32(r30) # 8-byte Folded Spill
 ; CHECK-BE-WACC-NEXT:    std r29, -24(r30) # 8-byte Folded Spill
+; CHECK-BE-WACC-NEXT:    addi r28, r1, 128
+; CHECK-BE-WACC-NEXT:    mr r3, r28
 ; CHECK-BE-WACC-NEXT:    vmr v2, v31
 ; CHECK-BE-WACC-NEXT:    vmr v3, v30
 ; CHECK-BE-WACC-NEXT:    mr r29, r5
@@ -160,9 +164,8 @@ define void @testMultiply(ptr nocapture noundef readonly %a, ptr nocapture nound
 ; CHECK-BE-WACC-NEXT:    nop
 ; CHECK-BE-WACC-NEXT:    dmxxsetaccz wacc0
 ; CHECK-BE-WACC-NEXT:    xvf32gerpp wacc0, v31, v30
-; CHECK-BE-WACC-NEXT:    lxv vs0, 128(r1)
-; CHECK-BE-WACC-NEXT:    lxv vs1, 144(r1)
-; CHECK-BE-WACC-NEXT:    xvf32gerpp wacc0, vs0, vs1
+; CHECK-BE-WACC-NEXT:    lxvpx vsp34, 0, r28
+; CHECK-BE-WACC-NEXT:    xvf32gerpp wacc0, v2, v3
 ; CHECK-BE-WACC-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
 ; CHECK-BE-WACC-NEXT:    vmr v1, v2
 ; CHECK-BE-WACC-NEXT:    vmr v7, v4
@@ -177,6 +180,7 @@ define void @testMultiply(ptr nocapture noundef readonly %a, ptr nocapture nound
 ; CHECK-BE-WACC-NEXT:    lxv v31, -48(r30) # 16-byte Folded Reload
 ; CHECK-BE-WACC-NEXT:    lxv v30, -64(r30) # 16-byte Folded Reload
 ; CHECK-BE-WACC-NEXT:    ld r29, -24(r30) # 8-byte Folded Reload
+; CHECK-BE-WACC-NEXT:    ld r28, -32(r30) # 8-byte Folded Reload
 ; CHECK-BE-WACC-NEXT:    mr r1, r30
 ; CHECK-BE-WACC-NEXT:    ld r0, 16(r1)
 ; CHECK-BE-WACC-NEXT:    ld r30, -16(r1)
diff --git a/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll b/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
index bc5d5bed36e9b..516a926bf031f 100644
--- a/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
@@ -510,10 +510,10 @@ define dso_local void @testLdStPair(i64 %SrcIdx, i64 %DstIdx) {
 ; BE-PAIRED-WACC:       # %bb.0: # %entry
 ; BE-PAIRED-WACC-NEXT:    addis r3, r2, g at toc@ha
 ; BE-PAIRED-WACC-NEXT:    addi r3, r3, g at toc@l
-; BE-PAIRED-WACC-NEXT:    lxv vs0, 48(r3)
-; BE-PAIRED-WACC-NEXT:    lxv vs1, 32(r3)
-; BE-PAIRED-WACC-NEXT:    stxv vs0, 80(r3)
-; BE-PAIRED-WACC-NEXT:    stxv vs1, 64(r3)
+; BE-PAIRED-WACC-NEXT:    li r4, 32
+; BE-PAIRED-WACC-NEXT:    lxvpx vsp34, r3, r4
+; BE-PAIRED-WACC-NEXT:    li r4, 64
+; BE-PAIRED-WACC-NEXT:    stxvpx vsp34, r3, r4
 ; BE-PAIRED-WACC-NEXT:    blr
 ;
 ; LE-PWR9-LABEL: testLdStPair:
@@ -589,13 +589,9 @@ define dso_local void @testXLdStPair(i64 %SrcIdx, i64 %DstIdx) {
 ; LE-PAIRED-WACC:       # %bb.0: # %entry
 ; LE-PAIRED-WACC-NEXT:    sldi r3, r3, 5
 ; LE-PAIRED-WACC-NEXT:    paddi r5, 0, g at PCREL, 1
-; LE-PAIRED-WACC-NEXT:    add r6, r5, r3
-; LE-PAIRED-WACC-NEXT:    lxvx vs0, r5, r3
-; LE-PAIRED-WACC-NEXT:    lxv vs1, 16(r6)
+; LE-PAIRED-WACC-NEXT:    lxvpx vsp34, r5, r3
 ; LE-PAIRED-WACC-NEXT:    sldi r3, r4, 5
-; LE-PAIRED-WACC-NEXT:    add r4, r5, r3
-; LE-PAIRED-WACC-NEXT:    stxvx vs0, r5, r3
-; LE-PAIRED-WACC-NEXT:    stxv vs1, 16(r4)
+; LE-PAIRED-WACC-NEXT:    stxvpx vsp34, r5, r3
 ; LE-PAIRED-WACC-NEXT:    blr
 ;
 ; BE-PAIRED-LABEL: testXLdStPair:
@@ -617,13 +613,9 @@ define dso_local void @testXLdStPair(i64 %SrcIdx, i64 %DstIdx) {
 ; BE-PAIRED-WACC-NEXT:    addis r5, r2, g at toc@ha
 ; BE-PAIRED-WACC-NEXT:    sldi r3, r3, 5
 ; BE-PAIRED-WACC-NEXT:    addi r5, r5, g at toc@l
-; BE-PAIRED-WACC-NEXT:    add r6, r5, r3
-; BE-PAIRED-WACC-NEXT:    lxvx vs0, r5, r3
-; BE-PAIRED-WACC-NEXT:    lxv vs1, 16(r6)
+; BE-PAIRED-WACC-NEXT:    lxvpx vsp34, r5, r3
 ; BE-PAIRED-WACC-NEXT:    sldi r3, r4, 5
-; BE-PAIRED-WACC-NEXT:    add r4, r5, r3
-; BE-PAIRED-WACC-NEXT:    stxvx vs0, r5, r3
-; BE-PAIRED-WACC-NEXT:    stxv vs1, 16(r4)
+; BE-PAIRED-WACC-NEXT:    stxvpx vsp34, r5, r3
 ; BE-PAIRED-WACC-NEXT:    blr
 ;
 ; LE-PWR9-LABEL: testXLdStPair:
@@ -722,10 +714,10 @@ define dso_local void @testUnalignedLdStPair() {
 ; BE-PAIRED-WACC:       # %bb.0: # %entry
 ; BE-PAIRED-WACC-NEXT:    addis r3, r2, g at toc@ha
 ; BE-PAIRED-WACC-NEXT:    addi r3, r3, g at toc@l
-; BE-PAIRED-WACC-NEXT:    plxv vs0, 27(r3), 0
-; BE-PAIRED-WACC-NEXT:    plxv vs1, 11(r3), 0
-; BE-PAIRED-WACC-NEXT:    pstxv vs0, 35(r3), 0
-; BE-PAIRED-WACC-NEXT:    pstxv vs1, 19(r3), 0
+; BE-PAIRED-WACC-NEXT:    li r4, 11
+; BE-PAIRED-WACC-NEXT:    lxvpx vsp34, r3, r4
+; BE-PAIRED-WACC-NEXT:    li r4, 19
+; BE-PAIRED-WACC-NEXT:    stxvpx vsp34, r3, r4
 ; BE-PAIRED-WACC-NEXT:    blr
 ;
 ; LE-PWR9-LABEL: testUnalignedLdStPair:
diff --git a/llvm/test/CodeGen/PowerPC/mma-outer-product.ll b/llvm/test/CodeGen/PowerPC/mma-outer-product.ll
index 35f35706b8690..940438b6f8da5 100644
--- a/llvm/test/CodeGen/PowerPC/mma-outer-product.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-outer-product.ll
@@ -2280,8 +2280,7 @@ define void @test33(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
 ;
 ; CHECK-WACC-LABEL: test33:
 ; CHECK-WACC:       # %bb.0: # %entry
-; CHECK-WACC-NEXT:    lxv v4, 16(r4)
-; CHECK-WACC-NEXT:    lxv v5, 0(r4)
+; CHECK-WACC-NEXT:    lxvpx vsp36, 0, r4
 ; CHECK-WACC-NEXT:    xvf64ger wacc0, vsp36, v2
 ; CHECK-WACC-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
 ; CHECK-WACC-NEXT:    stxv v4, 48(r7)
@@ -2292,8 +2291,7 @@ define void @test33(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
 ;
 ; CHECK-BE-WACC-LABEL: test33:
 ; CHECK-BE-WACC:       # %bb.0: # %entry
-; CHECK-BE-WACC-NEXT:    lxv v4, 0(r4)
-; CHECK-BE-WACC-NEXT:    lxv v5, 16(r4)
+; CHECK-BE-WACC-NEXT:    lxvpx vsp36, 0, r4
 ; CHECK-BE-WACC-NEXT:    xvf64ger wacc0, vsp36, v2
 ; CHECK-BE-WACC-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
 ; CHECK-BE-WACC-NEXT:    stxv v5, 48(r7)
@@ -2352,8 +2350,7 @@ define void @test34(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
 ; CHECK-WACC-NEXT:    lxv v1, 32(r3)
 ; CHECK-WACC-NEXT:    lxv v4, 16(r3)
 ; CHECK-WACC-NEXT:    lxv v0, 48(r3)
-; CHECK-WACC-NEXT:    lxv v6, 16(r4)
-; CHECK-WACC-NEXT:    lxv v7, 0(r4)
+; CHECK-WACC-NEXT:    lxvpx vsp38, 0, r4
 ; CHECK-WACC-NEXT:    dmxxinstdmr512 wacc0, vsp32, vsp36, 0
 ; CHECK-WACC-NEXT:    xvf64gerpp wacc0, vsp38, v2
 ; CHECK-WACC-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
@@ -2369,8 +2366,7 @@ define void @test34(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
 ; CHECK-BE-WACC-NEXT:    lxv v1, 16(r3)
 ; CHECK-BE-WACC-NEXT:    lxv v4, 32(r3)
 ; CHECK-BE-WACC-NEXT:    lxv v0, 0(r3)
-; CHECK-BE-WACC-NEXT:    lxv v6, 0(r4)
-; CHECK-BE-WACC-NEXT:    lxv v7, 16(r4)
+; CHECK-BE-WACC-NEXT:    lxvpx vsp38, 0, r4
 ; CHECK-BE-WACC-NEXT:    dmxxinstdmr512 wacc0, vsp32, vsp36, 0
 ; CHECK-BE-WACC-NEXT:    xvf64gerpp wacc0, vsp38, v2
 ; CHECK-BE-WACC-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
@@ -2431,8 +2427,7 @@ define void @test35(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
 ; CHECK-WACC-NEXT:    lxv v1, 32(r3)
 ; CHECK-WACC-NEXT:    lxv v4, 16(r3)
 ; CHECK-WACC-NEXT:    lxv v0, 48(r3)
-; CHECK-WACC-NEXT:    lxv v6, 16(r4)
-; CHECK-WACC-NEXT:    lxv v7, 0(r4)
+; CHECK-WACC-NEXT:    lxvpx vsp38, 0, r4
 ; CHECK-WACC-NEXT:    dmxxinstdmr512 wacc0, vsp32, vsp36, 0
 ; CHECK-WACC-NEXT:    xvf64gerpn wacc0, vsp38, v2
 ; CHECK-WACC-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
@@ -2448,8 +2443,7 @@ define void @test35(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
 ; CHECK-BE-WACC-NEXT:    lxv v1, 16(r3)
 ; CHECK-BE-WACC-NEXT:    lxv v4, 32(r3)
 ; CHECK-BE-WACC-NEXT:    lxv v0, 0(r3)
-; CHECK-BE-WACC-NEXT:    lxv v6, 0(r4)
-; CHECK-BE-WACC-NEXT:    lxv v7, 16(r4)
+; CHECK-BE-WACC-NEXT:    lxvpx vsp38, 0, r4
 ; CHECK-BE-WACC-NEXT:    dmxxinstdmr512 wacc0, vsp32, vsp36, 0
 ; CHECK-BE-WACC-NEXT:    xvf64gerpn wacc0, vsp38, v2
 ; CHECK-BE-WACC-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
@@ -2510,8 +2504,7 @@ define void @test36(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
 ; CHECK-WACC-NEXT:    lxv v1, 32(r3)
 ; CHECK-WACC-NEXT:    lxv v4, 16(r3)
 ; CHECK-WACC-NEXT:    lxv v0, 48(r3)
-; CHECK-WACC-NEXT:    lxv v6, 16(r4)
-; CHECK-WACC-NEXT:    lxv v7, 0(r4)
+; CHECK-WACC-NEXT:    lxvpx vsp38, 0, r4
 ; CHECK-WACC-NEXT:    dmxxinstdmr512 wacc0, vsp32, vsp36, 0
 ; CHECK-WACC-NEXT:    xvf64gernp wacc0, vsp38, v2
 ; CHECK-WACC-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
@@ -2527,8 +2520,7 @@ define void @test36(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
 ; CHECK-BE-WACC-NEXT:    lxv v1, 16(r3)
 ; CHECK-BE-WACC-NEXT:    lxv v4, 32(r3)
 ; CHECK-BE-WACC-NEXT:    lxv v0, 0(r3)
-; CHECK-BE-WACC-NEXT:    lxv v6, 0(r4)
-; CHECK-BE-WACC-NEXT:    lxv v7, 16(r4)
+; CHECK-BE-WACC-NEXT:    lxvpx vsp38, 0, r4
 ; CHECK-BE-WACC-NEXT:    dmxxinstdmr512 wacc0, vsp32, vsp36, 0
 ; CHECK-BE-WACC-NEXT:    xvf64gernp wacc0, vsp38, v2
 ; CHECK-BE-WACC-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
@@ -2589,8 +2581,7 @@ define void @test37(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
 ; CHECK-WACC-NEXT:    lxv v1, 32(r3)
 ; CHECK-WACC-NEXT:    lxv v4, 16(r3)
 ; CHECK-WACC-NEXT:    lxv v0, 48(r3)
-; CHECK-WACC-NEXT:    lxv v6, 16(r4)
-; CHECK-WACC-NEXT:    lxv v7, 0(r4)
+; CHECK-WACC-NEXT:    lxvpx vsp38, 0, r4
 ; CHECK-WACC-NEXT:    dmxxinstdmr512 wacc0, vsp32, vsp36, 0
 ; CHECK-WACC-NEXT:    xvf64gernn wacc0, vsp38, v2
 ; CHECK-WACC-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
@@ -2606,8 +2597,7 @@ define void @test37(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
 ; CHECK-BE-WACC-NEXT:    lxv v1, 16(r3)
 ; CHECK-BE-WACC-NEXT:    lxv v4, 32(r3)
 ; CHECK-BE-WACC-NEXT:    lxv v0, 0(r3)
-; CHECK-BE-WACC-NEXT:    lxv v6, 0(r4)
-; CHECK-BE-WACC-NEXT:    lxv v7, 16(r4)
+; CHECK-BE-WACC-NEXT:    lxvpx vsp38, 0, r4
 ; CHECK-BE-WACC-NEXT:    dmxxinstdmr512 wacc0, vsp32, vsp36, 0
 ; CHECK-BE-WACC-NEXT:    xvf64gernn wacc0, vsp38, v2
 ; CHECK-BE-WACC-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
@@ -2654,8 +2644,7 @@ define void @test38(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
 ;
 ; CHECK-WACC-LABEL: test38:
 ; CHECK-WACC:       # %bb.0: # %entry
-; CHECK-WACC-NEXT:    lxv v4, 16(r4)
-; CHECK-WACC-NEXT:    lxv v5, 0(r4)
+; CHECK-WACC-NEXT:    lxvpx vsp36, 0, r4
 ; CHECK-WACC-NEXT:    pmxvf64ger wacc0, vsp36, v2, 0, 0
 ; CHECK-WACC-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
 ; CHECK-WACC-NEXT:    stxv v4, 48(r7)
@@ -2666,8 +2655,7 @@ define void @test38(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
 ;
 ; CHECK-BE-WACC-LABEL: test38:
 ; CHECK-BE-WACC:       # %bb.0: # %entry
-; CHECK-BE-WACC-NEXT:    lxv v4, 0(r4)
-; CHECK-BE-WACC-NEXT:    lxv v5, 16(r4)
+; CHECK-BE-WACC-NEXT:    lxvpx vsp36, 0, r4
 ; CHECK-BE-WACC-NEXT:    pmxvf64ger wacc0, vsp36, v2, 0, 0
 ; CHECK-BE-WACC-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
 ; CHECK-BE-WACC-NEXT:    stxv v5, 48(r7)
@@ -2726,8 +2714,7 @@ define void @test39(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
 ; CHECK-WACC-NEXT:    lxv v1, 32(r3)
 ; CHECK-WACC-NEXT:    lxv v4, 16(r3)
 ; CHECK-WACC-NEXT:    lxv v0, 48(r3)
-; CHECK-WACC-NEXT:    lxv v6, 16(r4)
-; CHECK-WACC-NEXT:    lxv v7, 0(r4)
+; CHECK-WACC-NEXT:    lxvpx vsp38, 0, r4
 ; CHECK-WACC-NEXT:    dmxxinstdmr512 wacc0, vsp32, vsp36, 0
 ; CHECK-WACC-NEXT:    pmxvf64gerpp wacc0, vsp38, v2, 0, 0
 ; CHECK-WACC-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
@@ -2743,8 +2730,7 @@ define void @test39(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
 ; CHECK-BE-WACC-NEXT:    lxv v1, 16(r3)
 ; CHECK-BE-WACC-NEXT:    lxv v4, 32(r3)
 ; CHECK-BE-WACC-NEXT:    lxv v0, 0(r3)
-; CHECK-BE-WACC-NEXT:    lxv v6, 0(r4)
-; CHECK-BE-WACC-NEXT:    lxv v7, 16(r4)
+; CHECK-BE-WACC-NEXT:    lxvpx vsp38, 0, r4
 ; CHECK-BE-WACC-NEXT:    dmxxinstdmr512 wacc0, vsp32, vsp36, 0
 ; CHECK-BE-WACC-NEXT:    pmxvf64gerpp wacc0, vsp38, v2, 0, 0
 ; CHECK-BE-WACC-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
@@ -2805,8 +2791,7 @@ define void @test40(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
 ; CHECK-WACC-NEXT:    lxv v1, 32(r3)
 ; CHECK-WACC-NEXT:    lxv v4, 16(r3)
 ; CHECK-WACC-NEXT:    lxv v0, 48(r3)
-; CHECK-WACC-NEXT:    lxv v6, 16(r4)
-; CHECK-WACC-NEXT:    lxv v7, 0(r4)
+; CHECK-WACC-NEXT:    lxvpx vsp38, 0, r4
 ; CHECK-WACC-NEXT:    dmxxinstdmr512 wacc0, vsp32, vsp36, 0
 ; CHECK-WACC-NEXT:    pmxvf64gerpn wacc0, vsp38, v2, 0, 0
 ; CHECK-WACC-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
@@ -2822,8 +2807,7 @@ define void @test40(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
 ; CHECK-BE-WACC-NEXT:    lxv v1, 16(r3)
 ; CHECK-BE-WACC-NEXT:    lxv v4, 32(r3)
 ; CHECK-BE-WACC-NEXT:    lxv v0, 0(r3)
-; CHECK-BE-WACC-NEXT:    lxv v6, 0(r4)
-; CHECK-BE-WACC-NEXT:    lxv v7, 16(r4)
+; CHECK-BE-WACC-NEXT:    lxvpx vsp38, 0, r4
 ; CHECK-BE-WACC-NEXT:    dmxxinstdmr512 wacc0, vsp32, vsp36, 0
 ; CHECK-BE-WACC-NEXT:    pmxvf64gerpn wacc0, vsp38, v2, 0, 0
 ; CHECK-BE-WACC-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
@@ -2884,8 +2868,7 @@ define void @test41(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
 ; CHECK-WACC-NEXT:    lxv v1, 32(r3)
 ; CHECK-WACC-NEXT:    lxv v4, 16(r3)
 ; CHECK-WACC-NEXT:    lxv v0, 48(r3)
-; CHECK-WACC-NEXT:    lxv v6, 16(r4)
-; CHECK-WACC-NEXT:    lxv v7, 0(r4)
+; CHECK-WACC-NEXT:    lxvpx vsp38, 0, r4
 ; CHECK-WACC-NEXT:    dmxxinstdmr512 wacc0, vsp32, vsp36, 0
 ; CHECK-WACC-NEXT:    pmxvf64gernp wacc0, vsp38, v2, 0, 0
 ; CHECK-WACC-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
@@ -2901,8 +2884,7 @@ define void @test41(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
 ; CHECK-BE-WACC-NEXT:    lxv v1, 16(r3)
 ; CHECK-BE-WACC-NEXT:    lxv v4, 32(r3)
 ; CHECK-BE-WACC-NEXT:    lxv v0, 0(r3)
-; CHECK-BE-WACC-NEXT:    lxv v6, 0(r4)
-; CHECK-BE-WACC-NEXT:    lxv v7, 16(r4)
+; CHECK-BE-WACC-NEXT:    lxvpx vsp38, 0, r4
 ; CHECK-BE-WACC-NEXT:    dmxxinstdmr512 wacc0, vsp32, vsp36, 0
 ; CHECK-BE-WACC-NEXT:    pmxvf64gernp wacc0, vsp38, v2, 0, 0
 ; CHECK-BE-WACC-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
@@ -2963,8 +2945,7 @@ define void @test42(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
 ; CHECK-WACC-NEXT:    lxv v1, 32(r3)
 ; CHECK-WACC-NEXT:    lxv v4, 16(r3)
 ; CHECK-WACC-NEXT:    lxv v0, 48(r3)
-; CHECK-WACC-NEXT:    lxv v6, 16(r4)
-; CHECK-WACC-NEXT:    lxv v7, 0(r4)
+; CHECK-WACC-NEXT:    lxvpx vsp38, 0, r4
 ; CHECK-WACC-NEXT:    dmxxinstdmr512 wacc0, vsp32, vsp36, 0
 ; CHECK-WACC-NEXT:    pmxvf64gernn wacc0, vsp38, v2, 0, 0
 ; CHECK-WACC-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
@@ -2980,8 +2961,7 @@ define void @test42(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
 ; CHECK-BE-WACC-NEXT:    lxv v1, 16(r3)
 ; CHECK-BE-WACC-NEXT:    lxv v4, 32(r3)
 ; CHECK-BE-WACC-NEXT:    lxv v0, 0(r3)
-; CHECK-BE-WACC-NEXT:    lxv v6, 0(r4)
-; CHECK-BE-WACC-NEXT:    lxv v7, 16(r4)
+; CHECK-BE-WACC-NEXT:    lxvpx vsp38, 0, r4
 ; CHECK-BE-WACC-NEXT:    dmxxinstdmr512 wacc0, vsp32, vsp36, 0
 ; CHECK-BE-WACC-NEXT:    pmxvf64gernn wacc0, vsp38, v2, 0, 0
 ; CHECK-BE-WACC-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
diff --git a/llvm/test/CodeGen/PowerPC/vsx-ldst-with-length.ll b/llvm/test/CodeGen/PowerPC/vsx-ldst-with-length.ll
index e7bc8fbca3202..0483c60a72412 100644
--- a/llvm/test/CodeGen/PowerPC/vsx-ldst-with-length.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx-ldst-with-length.ll
@@ -78,15 +78,13 @@ define <256 x i1> @testLXVPRL(ptr %vpp, i64 %b) {
 ; CHECK-LABEL: testLXVPRL:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    lxvprl vsp34, r4, r5
-; CHECK-NEXT:    stxv v2, 16(r3)
-; CHECK-NEXT:    stxv v3, 0(r3)
+; CHECK-NEXT:    stxvpx vsp34, 0, r3
 ; CHECK-NEXT:    blr
 ;
 ; AIX-LABEL: testLXVPRL:
 ; AIX:       # %bb.0: # %entry
 ; AIX-NEXT:    lxvprl vsp34, r4, r5
-; AIX-NEXT:    stxv v3, 16(r3)
-; AIX-NEXT:    stxv v2, 0(r3)
+; AIX-NEXT:    stxvpx vsp34, 0, r3
 ; AIX-NEXT:    blr
 entry:
   %0 = tail call <256 x i1> @llvm.ppc.vsx.lxvprl(ptr %vpp, i64 %b)
@@ -98,15 +96,13 @@ define <256 x i1> @testLXVPRLL(ptr %vpp, i64 %b) {
 ; CHECK-LABEL: testLXVPRLL:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    lxvprll vsp34, r4, r5
-; CHECK-NEXT:    stxv v2, 16(r3)
-; CHECK-NEXT:    stxv v3, 0(r3)
+; CHECK-NEXT:    stxvpx vsp34, 0, r3
 ; CHECK-NEXT:    blr
 ;
 ; AIX-LABEL: testLXVPRLL:
 ; AIX:       # %bb.0: # %entry
 ; AIX-NEXT:    lxvprll vsp34, r4, r5
-; AIX-NEXT:    stxv v3, 16(r3)
-; AIX-NEXT:    stxv v2, 0(r3)
+; AIX-NEXT:    stxvpx vsp34, 0, r3
 ; AIX-NEXT:    blr
 entry:
   %0 = tail call <256 x i1> @llvm.ppc.vsx.lxvprll(ptr %vpp, i64 %b)
@@ -117,15 +113,13 @@ declare <256 x i1> @llvm.ppc.vsx.lxvprll(ptr, i64)
 define void @testSTXVPRL(ptr %v, ptr %vp, i64 %len) {
 ; CHECK-LABEL: testSTXVPRL:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lxv v2, 16(r3)
-; CHECK-NEXT:    lxv v3, 0(r3)
+; CHECK-NEXT:    lxvpx vsp34, 0, r3
 ; CHECK-NEXT:    stxvprl vsp34, r4, r5
 ; CHECK-NEXT:    blr
 ;
 ; AIX-LABEL: testSTXVPRL:
 ; AIX:       # %bb.0: # %entry
-; AIX-NEXT:    lxv v2, 0(r3)
-; AIX-NEXT:    lxv v3, 16(r3)
+; AIX-NEXT:    lxvpx vsp34, 0, r3
 ; AIX-NEXT:    stxvprl vsp34, r4, r5
 ; AIX-NEXT:    blr
 entry:
@@ -138,15 +132,13 @@ declare void @llvm.ppc.vsx.stxvprl(<256 x i1>, ptr, i64)
 define void @testSTXVPRLL(ptr %v, ptr %vp, i64 %len) {
 ; CHECK-LABEL: testSTXVPRLL:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lxv v2, 16(r3)
-; CHECK-NEXT:    lxv v3, 0(r3)
+; CHECK-NEXT:    lxvpx vsp34, 0, r3
 ; CHECK-NEXT:    stxvprll vsp34, r4, r5
 ; CHECK-NEXT:    blr
 ;
 ; AIX-LABEL: testSTXVPRLL:
 ; AIX:       # %bb.0: # %entry
-; AIX-NEXT:    lxv v2, 0(r3)
-; AIX-NEXT:    lxv v3, 16(r3)
+; AIX-NEXT:    lxvpx vsp34, 0, r3
 ; AIX-NEXT:    stxvprll vsp34, r4, r5
 ; AIX-NEXT:    blr
 entry:

>From 85a83d9dab98af95d4ecd6a1b0a02989b5e9a232 Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Tue, 3 Mar 2026 16:23:37 -0500
Subject: [PATCH 2/3] add pattern matching for prefixed lxvp/stxvp

---
 llvm/lib/Target/PowerPC/PPC.td              | 12 ++++++++----
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 12 ++++--------
 llvm/lib/Target/PowerPC/PPCInstrFuture.td   |  2 ++
 llvm/test/CodeGen/PowerPC/mma-acc-memops.ll | 16 ++++++++--------
 4 files changed, 22 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td
index c0abbf6f50804..82d27ecfaecec 100644
--- a/llvm/lib/Target/PowerPC/PPC.td
+++ b/llvm/lib/Target/PowerPC/PPC.td
@@ -277,10 +277,6 @@ def FeatureISA3_1 : SubtargetFeature<"isa-v31-instructions", "IsISA3_1",
                                      "true",
                                      "Enable instructions in ISA 3.1.",
                                      [FeatureISA3_0]>;
-def FeatureISAFuture : SubtargetFeature<"isa-future-instructions",
-                                        "IsISAFuture", "true",
-                                        "Enable instructions for Future ISA.",
-                                        [FeatureISA3_1]>;
 def FeatureP9Altivec : SubtargetFeature<"power9-altivec", "HasP9Altivec", "true",
                                         "Enable POWER9 Altivec instructions",
                                         [FeatureISA3_0, FeatureP8Altivec]>;
@@ -292,6 +288,14 @@ def FeatureP10Vector  : SubtargetFeature<"power10-vector", "HasP10Vector",
                                          "true",
                                          "Enable POWER10 vector instructions",
                                          [FeatureISA3_1, FeatureP9Vector]>;
+def FeatureFutureVector : SubtargetFeature<"future-vector", "HasFutureVector",
+                                           "true",
+                                           "Enable Future vector instructions",
+                                           [FeatureP10Vector]>;
+def FeatureISAFuture : SubtargetFeature<"isa-future-instructions",
+                                        "IsISAFuture", "true",
+                                        "Enable instructions for Future ISA.",
+                                        [FeatureISA3_1, FeatureFutureVector]>;
 // A separate feature for this even though it is equivalent to P9Vector
 // because this is a feature of the implementation rather than the architecture
 // and may go away with future CPU's.
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 3b2b134c64149..9781ee9b158c2 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -12136,10 +12136,8 @@ SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
          "Type unsupported without paired vector support");
 
   // For v256i1 on ISA Future, let the load go through to instruction selection
-  // where it will be matched to lxvp by the instruction patterns, unless it's
-  // a PC-relative load which should use plxv instead.
-  if (VT == MVT::v256i1 && Subtarget.isISAFuture() &&
-      !isPCRelNode(LN->getBasePtr()))
+  // where it will be matched to lxvp/plxvp by the instruction patterns.
+  if (VT == MVT::v256i1 && Subtarget.isISAFuture())
     return Op;
 
   // For other cases, create 2 or 4 v16i8 loads to load the pair or accumulator
@@ -12312,10 +12310,8 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
          "Type unsupported without paired vector support");
 
   // For v256i1 on ISA Future, let the store go through to instruction selection
-  // where it will be matched to stxvp by the instruction patterns, unless it's
-  // a PC-relative store which should use pstxv instead.
-  if (StoreVT == MVT::v256i1 && Subtarget.isISAFuture() &&
-      !isPCRelNode(SN->getBasePtr()))
+  // where it will be matched to stxvp/pstxvp by the instruction patterns.
+  if (StoreVT == MVT::v256i1 && Subtarget.isISAFuture())
     return Op;
 
   // For other cases, create 2 or 4 v16i8 stores to store the pair or
diff --git a/llvm/lib/Target/PowerPC/PPCInstrFuture.td b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
index 2b17bfc4b584b..84494b8f8889d 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrFuture.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
@@ -601,8 +601,10 @@ def : Pat<(int_ppc_vsx_stxvprll v256i1:$XTp, addr:$RA, i64:$RB), (STXVPRLL $XTp,
 let Predicates = [PairedVectorMemops, IsISAFuture] in {
   def : Pat<(v256i1 (load xoaddr:$src)), (LXVPX xoaddr:$src)>;
   def : Pat<(v256i1 (load iaddrX16:$src)), (LXVP iaddrX16:$src)>;
+  def : Pat<(v256i1 (load PDForm:$src)), (PLXVP memri34:$src)>;
   def : Pat<(store v256i1:$XSp, xoaddr:$dst), (STXVPX $XSp, xoaddr:$dst)>;
   def : Pat<(store v256i1:$XSp, iaddrX16:$dst), (STXVP $XSp, iaddrX16:$dst)>;
+  def : Pat<(store v256i1:$XSp, PDForm:$dst), (PSTXVP $XSp, memri34:$dst)>;
 }
 let Predicates = [HasVSX, IsISAFuture] in {
   def : Pat<(v4i32 (rotl v4i32:$vA, v4i32:$vB)), (v4i32 (XVRLW v4i32:$vA,
diff --git a/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll b/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
index 516a926bf031f..8a963d53105bc 100644
--- a/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
@@ -490,10 +490,10 @@ define dso_local void @testLdStPair(i64 %SrcIdx, i64 %DstIdx) {
 ;
 ; LE-PAIRED-WACC-LABEL: testLdStPair:
 ; LE-PAIRED-WACC:       # %bb.0: # %entry
-; LE-PAIRED-WACC-NEXT:    plxv vs0, g at PCREL+48(0), 1
-; LE-PAIRED-WACC-NEXT:    plxv vs1, g at PCREL+32(0), 1
-; LE-PAIRED-WACC-NEXT:    pstxv vs0, g at PCREL+80(0), 1
-; LE-PAIRED-WACC-NEXT:    pstxv vs1, g at PCREL+64(0), 1
+; LE-PAIRED-WACC-NEXT:    paddi r3, 0, g at PCREL+32, 1
+; LE-PAIRED-WACC-NEXT:    lxvpx vsp34, 0, r3
+; LE-PAIRED-WACC-NEXT:    paddi r3, 0, g at PCREL+64, 1
+; LE-PAIRED-WACC-NEXT:    stxvpx vsp34, 0, r3
 ; LE-PAIRED-WACC-NEXT:    blr
 ;
 ; BE-PAIRED-LABEL: testLdStPair:
@@ -694,10 +694,10 @@ define dso_local void @testUnalignedLdStPair() {
 ;
 ; LE-PAIRED-WACC-LABEL: testUnalignedLdStPair:
 ; LE-PAIRED-WACC:       # %bb.0: # %entry
-; LE-PAIRED-WACC-NEXT:    plxv vs0, g at PCREL+27(0), 1
-; LE-PAIRED-WACC-NEXT:    plxv vs1, g at PCREL+11(0), 1
-; LE-PAIRED-WACC-NEXT:    pstxv vs0, g at PCREL+35(0), 1
-; LE-PAIRED-WACC-NEXT:    pstxv vs1, g at PCREL+19(0), 1
+; LE-PAIRED-WACC-NEXT:    paddi r3, 0, g at PCREL+11, 1
+; LE-PAIRED-WACC-NEXT:    lxvpx vsp34, 0, r3
+; LE-PAIRED-WACC-NEXT:    paddi r3, 0, g at PCREL+19, 1
+; LE-PAIRED-WACC-NEXT:    stxvpx vsp34, 0, r3
 ; LE-PAIRED-WACC-NEXT:    blr
 ;
 ; BE-PAIRED-LABEL: testUnalignedLdStPair:

>From 4c870787203f2e567ff8231bf66431e75d7f82be Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Wed, 4 Mar 2026 00:12:24 -0500
Subject: [PATCH 3/3] order matters

---
 llvm/lib/Target/PowerPC/PPCInstrFuture.td              | 5 +++--
 llvm/test/CodeGen/PowerPC/vec_int_to_double_shuffle.ll | 8 ++++----
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCInstrFuture.td b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
index 84494b8f8889d..f1097beaa8397 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrFuture.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
@@ -599,13 +599,14 @@ def : Pat<(int_ppc_vsx_stxvprll v256i1:$XTp, addr:$RA, i64:$RB), (STXVPRLL $XTp,
 
 // Regular load/store patterns for v256i1 (for ISA Future)
 let Predicates = [PairedVectorMemops, IsISAFuture] in {
-  def : Pat<(v256i1 (load xoaddr:$src)), (LXVPX xoaddr:$src)>;
   def : Pat<(v256i1 (load iaddrX16:$src)), (LXVP iaddrX16:$src)>;
   def : Pat<(v256i1 (load PDForm:$src)), (PLXVP memri34:$src)>;
-  def : Pat<(store v256i1:$XSp, xoaddr:$dst), (STXVPX $XSp, xoaddr:$dst)>;
+  def : Pat<(v256i1 (load xoaddr:$src)), (LXVPX xoaddr:$src)>;
   def : Pat<(store v256i1:$XSp, iaddrX16:$dst), (STXVP $XSp, iaddrX16:$dst)>;
   def : Pat<(store v256i1:$XSp, PDForm:$dst), (PSTXVP $XSp, memri34:$dst)>;
+  def : Pat<(store v256i1:$XSp, xoaddr:$dst), (STXVPX $XSp, xoaddr:$dst)>;
 }
+
 let Predicates = [HasVSX, IsISAFuture] in {
   def : Pat<(v4i32 (rotl v4i32:$vA, v4i32:$vB)), (v4i32 (XVRLW v4i32:$vA,
                                                      v4i32:$vB))>;
diff --git a/llvm/test/CodeGen/PowerPC/vec_int_to_double_shuffle.ll b/llvm/test/CodeGen/PowerPC/vec_int_to_double_shuffle.ll
index 9bba60435c62e..5588d07f267c6 100644
--- a/llvm/test/CodeGen/PowerPC/vec_int_to_double_shuffle.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_int_to_double_shuffle.ll
@@ -12,8 +12,8 @@ define <2 x double> @foo(<4 x i32> %s) {
 ;
 ; CHECK-BE-LABEL: foo:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xxsldwi 0, 34, 34, 1
-; CHECK-BE-NEXT:    xvcvsxwdp 34, 0
+; CHECK-BE-NEXT:    xxsldwi 34, 34, 34, 1
+; CHECK-BE-NEXT:    xvcvsxwdp 34, 34
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = shufflevector <4 x i32> %s, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
@@ -29,8 +29,8 @@ define <2 x double> @bar(<4 x i32> %s) {
 ;
 ; CHECK-BE-LABEL: bar:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xxsldwi 0, 34, 34, 1
-; CHECK-BE-NEXT:    xvcvuxwdp 34, 0
+; CHECK-BE-NEXT:    xxsldwi 34, 34, 34, 1
+; CHECK-BE-NEXT:    xvcvuxwdp 34, 34
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = shufflevector <4 x i32> %s, <4 x i32> undef, <2 x i32> <i32 1, i32 3>



More information about the llvm-commits mailing list