[llvm-branch-commits] [llvm] c289297 - [PowerPC] Rename the vector pair intrinsics and builtins to replace the _mma_ prefix by _vsx_

Thu Dec 17 10:24:33 PST 2020

Author: Baptiste Saleil
Date: 2020-12-17T13:19:27-05:00
New Revision: c2892978e919bf66535729c70fba73c4c3224548

URL: https://github.com/llvm/llvm-project/commit/c2892978e919bf66535729c70fba73c4c3224548
DIFF: https://github.com/llvm/llvm-project/commit/c2892978e919bf66535729c70fba73c4c3224548.diff

LOG: [PowerPC] Rename the vector pair intrinsics and builtins to replace the _mma_ prefix by _vsx_

On PPC, the vector pair instructions are independent from MMA.
This patch renames the vector pair LLVM intrinsics and Clang builtins to replace the _mma_ prefix by _vsx_ in their names.
We also move the vector pair type/intrinsic/builtin tests to their own files.

Differential Revision: https://reviews.llvm.org/D91974

Added: 
    clang/test/CodeGen/builtins-ppc-pair-mma.c
    clang/test/Sema/ppc-pair-mma-types.c
    clang/test/SemaCXX/ppc-pair-mma-types.cpp
    llvm/test/CodeGen/PowerPC/paired-vector-intrinsics.ll

Modified: 
    clang/include/clang/Basic/BuiltinsPPC.def
    clang/lib/CodeGen/CGBuiltin.cpp
    clang/lib/Sema/SemaChecking.cpp
    llvm/include/llvm/IR/IntrinsicsPowerPC.td
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/lib/Target/PowerPC/PPCInstrPrefix.td
    llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
    llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
    llvm/test/CodeGen/PowerPC/dform-pair-load-store.ll
    llvm/test/CodeGen/PowerPC/loop-p10-pair-prepare.ll
    llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
    llvm/test/CodeGen/PowerPC/mma-outer-product.ll
    llvm/test/CodeGen/PowerPC/mma-phi-accs.ll
    llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll

Removed: 
    clang/test/CodeGen/builtins-ppc-mma.c
    clang/test/Sema/ppc-mma-types.c
    clang/test/SemaCXX/ppc-mma-types.cpp
    llvm/test/CodeGen/PowerPC/paired-vector-intrinsics-without-mma.ll


################################################################################
diff  --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def
index 8975d126b897..39c66f5daeb1 100644

--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -7,8 +7,9 @@
 //===----------------------------------------------------------------------===//
 //
 // This file defines the PowerPC-specific builtin function database.  Users of
-// this file must define the BUILTIN macro or the MMA_BUILTIN macro to make use
-// of this information.
+// this file must define the BUILTIN macro or the CUSTOM_BUILTIN macro to
+// make use of this information. The latter is used for builtins requiring
+// custom code generation and checking.
 //
 //===----------------------------------------------------------------------===//
 
@@ -18,9 +19,9 @@
 // The format of this database matches clang/Basic/Builtins.def except for the
 // MMA builtins that are using their own format documented below.
 
-#if defined(BUILTIN) && !defined(MMA_BUILTIN)
-#   define MMA_BUILTIN(ID, TYPES, ACCUMULATE) BUILTIN(__builtin_mma_##ID, "i.", "t")
-#elif defined(MMA_BUILTIN) && !defined(BUILTIN)
+#if defined(BUILTIN) && !defined(CUSTOM_BUILTIN)
+#   define CUSTOM_BUILTIN(ID, TYPES, ACCUMULATE) BUILTIN(__builtin_##ID, "i.", "t")
+#elif defined(CUSTOM_BUILTIN) && !defined(BUILTIN)
 #   define BUILTIN(ID, TYPES, ATTRS)
 #endif
 
@@ -659,94 +660,94 @@ BUILTIN(__builtin_setflm, "dd", "")
 // Cache built-ins
 BUILTIN(__builtin_dcbf, "vvC*", "")
 
-// MMA built-ins
-// All MMA built-ins are declared here using the MMA_BUILTIN macro. Because
-// these built-ins rely on target-dependent types and to avoid pervasive change,
-// they are type checked manually in Sema using custom type descriptors.
-// The first argument of the MMA_BUILTIN macro is the name of the built-in, the
-// second argument specifies the type of the function (result value, then each
-// argument) as follows:
+// Built-ins requiring custom code generation.
+// Because these built-ins rely on target-dependent types and to avoid pervasive
+// change, they are type checked manually in Sema using custom type descriptors.
+// The first argument of the CUSTOM_BUILTIN macro is the name of the built-in
+// with its prefix, the second argument specifies the type of the function
+// (result value, then each argument) as follows:
 //  i -> Unsigned integer followed by the greatest possible value for that
 //       argument or 0 if no constraint on the value.
 //       (e.g. i15 for a 4-bits value)
-//  v -> void
 //  V -> Vector type used with MMA builtins (vector unsigned char)
-//  W -> MMA vector type followed by the size of the vector type.
+//  W -> PPC Vector type followed by the size of the vector type.
 //       (e.g. W512 for __vector_quad)
+//  any other descriptor -> Fall back to generic type descriptor decoding.
 // The 'C' suffix can be used as a suffix to specify the const type.
 // The '*' suffix can be used as a suffix to specify a pointer to a type.
 // The third argument is set to true if the builtin accumulates its result into
 // its given accumulator.
 
-MMA_BUILTIN(assemble_acc, "vW512*VVVV", false)
-MMA_BUILTIN(disassemble_acc, "vv*W512*", false)
-MMA_BUILTIN(assemble_pair, "vW256*VV", false)
-MMA_BUILTIN(disassemble_pair, "vv*W256*", false)
-MMA_BUILTIN(xxmtacc, "vW512*", true)
-MMA_BUILTIN(xxmfacc, "vW512*", true)
-MMA_BUILTIN(xxsetaccz, "vW512*", false)
-MMA_BUILTIN(xvi4ger8, "vW512*VV", false)
-MMA_BUILTIN(xvi8ger4, "vW512*VV", false)
-MMA_BUILTIN(xvi16ger2, "vW512*VV", false)
-MMA_BUILTIN(xvi16ger2s, "vW512*VV", false)
-MMA_BUILTIN(xvf16ger2, "vW512*VV", false)
-MMA_BUILTIN(xvf32ger, "vW512*VV", false)
-MMA_BUILTIN(xvf64ger, "vW512*W256V", false)
-MMA_BUILTIN(pmxvi4ger8, "vW512*VVi15i15i255", false)
-MMA_BUILTIN(pmxvi8ger4, "vW512*VVi15i15i15", false)
-MMA_BUILTIN(pmxvi16ger2, "vW512*VVi15i15i3", false)
-MMA_BUILTIN(pmxvi16ger2s, "vW512*VVi15i15i3", false)
-MMA_BUILTIN(pmxvf16ger2, "vW512*VVi15i15i3", false)
-MMA_BUILTIN(pmxvf32ger, "vW512*VVi15i15", false)
-MMA_BUILTIN(pmxvf64ger, "vW512*W256Vi15i3", false)
-MMA_BUILTIN(xvi4ger8pp, "vW512*VV", true)
-MMA_BUILTIN(xvi8ger4pp, "vW512*VV", true)
-MMA_BUILTIN(xvi8ger4spp, "vW512*VV", true)
-MMA_BUILTIN(xvi16ger2pp, "vW512*VV", true)
-MMA_BUILTIN(xvi16ger2spp, "vW512*VV", true)
-MMA_BUILTIN(pmxvi4ger8pp, "vW512*VVi15i15i255", true)
-MMA_BUILTIN(pmxvi8ger4pp, "vW512*VVi15i15i15", true)
-MMA_BUILTIN(pmxvi8ger4spp, "vW512*VVi15i15i15", true)
-MMA_BUILTIN(pmxvi16ger2pp, "vW512*VVi15i15i3", true)
-MMA_BUILTIN(pmxvi16ger2spp, "vW512*VVi15i15i3", true)
-MMA_BUILTIN(xvf16ger2pp, "vW512*VV", true)
-MMA_BUILTIN(xvf16ger2pn, "vW512*VV", true)
-MMA_BUILTIN(xvf16ger2np, "vW512*VV", true)
-MMA_BUILTIN(xvf16ger2nn, "vW512*VV", true)
-MMA_BUILTIN(pmxvf16ger2pp, "vW512*VVi15i15i3", true)
-MMA_BUILTIN(pmxvf16ger2pn, "vW512*VVi15i15i3", true)
-MMA_BUILTIN(pmxvf16ger2np, "vW512*VVi15i15i3", true)
-MMA_BUILTIN(pmxvf16ger2nn, "vW512*VVi15i15i3", true)
-MMA_BUILTIN(xvf32gerpp, "vW512*VV", true)
-MMA_BUILTIN(xvf32gerpn, "vW512*VV", true)
-MMA_BUILTIN(xvf32gernp, "vW512*VV", true)
-MMA_BUILTIN(xvf32gernn, "vW512*VV", true)
-MMA_BUILTIN(pmxvf32gerpp, "vW512*VVi15i15", true)
-MMA_BUILTIN(pmxvf32gerpn, "vW512*VVi15i15", true)
-MMA_BUILTIN(pmxvf32gernp, "vW512*VVi15i15", true)
-MMA_BUILTIN(pmxvf32gernn, "vW512*VVi15i15", true)
-MMA_BUILTIN(xvf64gerpp, "vW512*W256V", true)
-MMA_BUILTIN(xvf64gerpn, "vW512*W256V", true)
-MMA_BUILTIN(xvf64gernp, "vW512*W256V", true)
-MMA_BUILTIN(xvf64gernn, "vW512*W256V", true)
-MMA_BUILTIN(pmxvf64gerpp, "vW512*W256Vi15i3", true)
-MMA_BUILTIN(pmxvf64gerpn, "vW512*W256Vi15i3", true)
-MMA_BUILTIN(pmxvf64gernp, "vW512*W256Vi15i3", true)
-MMA_BUILTIN(pmxvf64gernn, "vW512*W256Vi15i3", true)
-MMA_BUILTIN(xvbf16ger2, "vW512*VV", false)
-MMA_BUILTIN(pmxvbf16ger2, "vW512*VVi15i15i3", false)
-MMA_BUILTIN(xvbf16ger2pp, "vW512*VV", true)
-MMA_BUILTIN(xvbf16ger2pn, "vW512*VV", true)
-MMA_BUILTIN(xvbf16ger2np, "vW512*VV", true)
-MMA_BUILTIN(xvbf16ger2nn, "vW512*VV", true)
-MMA_BUILTIN(pmxvbf16ger2pp, "vW512*VVi15i15i3", true)
-MMA_BUILTIN(pmxvbf16ger2pn, "vW512*VVi15i15i3", true)
-MMA_BUILTIN(pmxvbf16ger2np, "vW512*VVi15i15i3", true)
-MMA_BUILTIN(pmxvbf16ger2nn, "vW512*VVi15i15i3", true)
-MMA_BUILTIN(lxvp, "W256SLLiW256C*", false)
-MMA_BUILTIN(stxvp, "vW256SLLiW256C*", false)
+CUSTOM_BUILTIN(vsx_lxvp, "W256SLLiW256C*", false)
+CUSTOM_BUILTIN(vsx_stxvp, "vW256SLLiW256C*", false)
+CUSTOM_BUILTIN(vsx_assemble_pair, "vW256*VV", false)
+CUSTOM_BUILTIN(vsx_disassemble_pair, "vv*W256*", false)
+
+CUSTOM_BUILTIN(mma_assemble_acc, "vW512*VVVV", false)
+CUSTOM_BUILTIN(mma_disassemble_acc, "vv*W512*", false)
+CUSTOM_BUILTIN(mma_xxmtacc, "vW512*", true)
+CUSTOM_BUILTIN(mma_xxmfacc, "vW512*", true)
+CUSTOM_BUILTIN(mma_xxsetaccz, "vW512*", false)
+CUSTOM_BUILTIN(mma_xvi4ger8, "vW512*VV", false)
+CUSTOM_BUILTIN(mma_xvi8ger4, "vW512*VV", false)
+CUSTOM_BUILTIN(mma_xvi16ger2, "vW512*VV", false)
+CUSTOM_BUILTIN(mma_xvi16ger2s, "vW512*VV", false)
+CUSTOM_BUILTIN(mma_xvf16ger2, "vW512*VV", false)
+CUSTOM_BUILTIN(mma_xvf32ger, "vW512*VV", false)
+CUSTOM_BUILTIN(mma_xvf64ger, "vW512*W256V", false)
+CUSTOM_BUILTIN(mma_pmxvi4ger8, "vW512*VVi15i15i255", false)
+CUSTOM_BUILTIN(mma_pmxvi8ger4, "vW512*VVi15i15i15", false)
+CUSTOM_BUILTIN(mma_pmxvi16ger2, "vW512*VVi15i15i3", false)
+CUSTOM_BUILTIN(mma_pmxvi16ger2s, "vW512*VVi15i15i3", false)
+CUSTOM_BUILTIN(mma_pmxvf16ger2, "vW512*VVi15i15i3", false)
+CUSTOM_BUILTIN(mma_pmxvf32ger, "vW512*VVi15i15", false)
+CUSTOM_BUILTIN(mma_pmxvf64ger, "vW512*W256Vi15i3", false)
+CUSTOM_BUILTIN(mma_xvi4ger8pp, "vW512*VV", true)
+CUSTOM_BUILTIN(mma_xvi8ger4pp, "vW512*VV", true)
+CUSTOM_BUILTIN(mma_xvi8ger4spp, "vW512*VV", true)
+CUSTOM_BUILTIN(mma_xvi16ger2pp, "vW512*VV", true)
+CUSTOM_BUILTIN(mma_xvi16ger2spp, "vW512*VV", true)
+CUSTOM_BUILTIN(mma_pmxvi4ger8pp, "vW512*VVi15i15i255", true)
+CUSTOM_BUILTIN(mma_pmxvi8ger4pp, "vW512*VVi15i15i15", true)
+CUSTOM_BUILTIN(mma_pmxvi8ger4spp, "vW512*VVi15i15i15", true)
+CUSTOM_BUILTIN(mma_pmxvi16ger2pp, "vW512*VVi15i15i3", true)
+CUSTOM_BUILTIN(mma_pmxvi16ger2spp, "vW512*VVi15i15i3", true)
+CUSTOM_BUILTIN(mma_xvf16ger2pp, "vW512*VV", true)
+CUSTOM_BUILTIN(mma_xvf16ger2pn, "vW512*VV", true)
+CUSTOM_BUILTIN(mma_xvf16ger2np, "vW512*VV", true)
+CUSTOM_BUILTIN(mma_xvf16ger2nn, "vW512*VV", true)
+CUSTOM_BUILTIN(mma_pmxvf16ger2pp, "vW512*VVi15i15i3", true)
+CUSTOM_BUILTIN(mma_pmxvf16ger2pn, "vW512*VVi15i15i3", true)
+CUSTOM_BUILTIN(mma_pmxvf16ger2np, "vW512*VVi15i15i3", true)
+CUSTOM_BUILTIN(mma_pmxvf16ger2nn, "vW512*VVi15i15i3", true)
+CUSTOM_BUILTIN(mma_xvf32gerpp, "vW512*VV", true)
+CUSTOM_BUILTIN(mma_xvf32gerpn, "vW512*VV", true)
+CUSTOM_BUILTIN(mma_xvf32gernp, "vW512*VV", true)
+CUSTOM_BUILTIN(mma_xvf32gernn, "vW512*VV", true)
+CUSTOM_BUILTIN(mma_pmxvf32gerpp, "vW512*VVi15i15", true)
+CUSTOM_BUILTIN(mma_pmxvf32gerpn, "vW512*VVi15i15", true)
+CUSTOM_BUILTIN(mma_pmxvf32gernp, "vW512*VVi15i15", true)
+CUSTOM_BUILTIN(mma_pmxvf32gernn, "vW512*VVi15i15", true)
+CUSTOM_BUILTIN(mma_xvf64gerpp, "vW512*W256V", true)
+CUSTOM_BUILTIN(mma_xvf64gerpn, "vW512*W256V", true)
+CUSTOM_BUILTIN(mma_xvf64gernp, "vW512*W256V", true)
+CUSTOM_BUILTIN(mma_xvf64gernn, "vW512*W256V", true)
+CUSTOM_BUILTIN(mma_pmxvf64gerpp, "vW512*W256Vi15i3", true)
+CUSTOM_BUILTIN(mma_pmxvf64gerpn, "vW512*W256Vi15i3", true)
+CUSTOM_BUILTIN(mma_pmxvf64gernp, "vW512*W256Vi15i3", true)
+CUSTOM_BUILTIN(mma_pmxvf64gernn, "vW512*W256Vi15i3", true)
+CUSTOM_BUILTIN(mma_xvbf16ger2, "vW512*VV", false)
+CUSTOM_BUILTIN(mma_pmxvbf16ger2, "vW512*VVi15i15i3", false)
+CUSTOM_BUILTIN(mma_xvbf16ger2pp, "vW512*VV", true)
+CUSTOM_BUILTIN(mma_xvbf16ger2pn, "vW512*VV", true)
+CUSTOM_BUILTIN(mma_xvbf16ger2np, "vW512*VV", true)
+CUSTOM_BUILTIN(mma_xvbf16ger2nn, "vW512*VV", true)
+CUSTOM_BUILTIN(mma_pmxvbf16ger2pp, "vW512*VVi15i15i3", true)
+CUSTOM_BUILTIN(mma_pmxvbf16ger2pn, "vW512*VVi15i15i3", true)
+CUSTOM_BUILTIN(mma_pmxvbf16ger2np, "vW512*VVi15i15i3", true)
+CUSTOM_BUILTIN(mma_pmxvbf16ger2nn, "vW512*VVi15i15i3", true)
 
 // FIXME: Obviously incomplete.
 
 #undef BUILTIN
-#undef MMA_BUILTIN
+#undef CUSTOM_BUILTIN

diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 74f6c9fee2c8..40bb5f5f0689 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -14861,8 +14861,8 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
   // use custom code generation to expand a builtin call with a pointer to a
   // load (if the corresponding instruction accumulates its result) followed by
   // the call to the intrinsic and a store of the result.
-#define MMA_BUILTIN(Name, Types, Accumulate) \
-  case PPC::BI__builtin_mma_##Name:
+#define CUSTOM_BUILTIN(Name, Types, Accumulate) \
+  case PPC::BI__builtin_##Name:
 #include "clang/Basic/BuiltinsPPC.def"
   {
     // The first argument of these two builtins is a pointer used to store their
@@ -14870,9 +14870,9 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
     // return values. So, here we emit code extracting these values from the
     // intrinsic results and storing them using that pointer.
     if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc ||
-        BuiltinID == PPC::BI__builtin_mma_disassemble_pair) {
+        BuiltinID == PPC::BI__builtin_vsx_disassemble_pair) {
       unsigned NumVecs = 2;
-      auto Intrinsic = Intrinsic::ppc_mma_disassemble_pair;
+      auto Intrinsic = Intrinsic::ppc_vsx_disassemble_pair;
       if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc) {
         NumVecs = 4;
         Intrinsic = Intrinsic::ppc_mma_disassemble_acc;
@@ -14893,16 +14893,16 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
     }
     bool Accumulate;
     switch (BuiltinID) {
-  #define MMA_BUILTIN(Name, Types, Acc) \
-    case PPC::BI__builtin_mma_##Name: \
-      ID = Intrinsic::ppc_mma_##Name; \
+  #define CUSTOM_BUILTIN(Name, Types, Acc) \
+    case PPC::BI__builtin_##Name: \
+      ID = Intrinsic::ppc_##Name; \
       Accumulate = Acc; \
       break;
   #include "clang/Basic/BuiltinsPPC.def"
     }
-    if (BuiltinID == PPC::BI__builtin_mma_lxvp ||
-        BuiltinID == PPC::BI__builtin_mma_stxvp) {
-      if (BuiltinID == PPC::BI__builtin_mma_lxvp) {
+    if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
+        BuiltinID == PPC::BI__builtin_vsx_stxvp) {
+      if (BuiltinID == PPC::BI__builtin_vsx_lxvp) {
         Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
         Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]);
       } else {

diff  --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 5fef59bed5af..6f5aefb10664 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3301,8 +3301,8 @@ bool Sema::CheckPPCBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
      return SemaBuiltinConstantArgRange(TheCall, 2, 0, 7);
   case PPC::BI__builtin_vsx_xxpermx:
      return SemaBuiltinConstantArgRange(TheCall, 3, 0, 7);
-#define MMA_BUILTIN(Name, Types, Acc) \
-  case PPC::BI__builtin_mma_##Name: \
+#define CUSTOM_BUILTIN(Name, Types, Acc) \
+  case PPC::BI__builtin_##Name: \
     return SemaBuiltinPPCMMACall(TheCall, Types);
 #include "clang/Basic/BuiltinsPPC.def"
   }

diff  --git a/clang/test/CodeGen/builtins-ppc-mma.c b/clang/test/CodeGen/builtins-ppc-pair-mma.c
similarity index 97%
rename from clang/test/CodeGen/builtins-ppc-mma.c
rename to clang/test/CodeGen/builtins-ppc-pair-mma.c
index 88ca36aa6714..324614ecac20 100644
--- a/clang/test/CodeGen/builtins-ppc-mma.c
+++ b/clang/test/CodeGen/builtins-ppc-pair-mma.c
@@ -44,7 +44,7 @@ void test2(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsi
 
 // CHECK-LABEL: @test3(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.mma.assemble.pair(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <256 x i1>*
 // CHECK-NEXT:    store <256 x i1> [[TMP0]], <256 x i1>* [[TMP1]], align 32, !tbaa !6
 // CHECK-NEXT:    ret void
@@ -53,7 +53,7 @@ void test3(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsi
   __vector_quad vq = *((__vector_quad *)vqp);
   __vector_pair vp = *((__vector_pair *)vpp);
   __vector_pair res;
-  __builtin_mma_assemble_pair(&res, vc, vc);
+  __builtin_vsx_assemble_pair(&res, vc, vc);
   *((__vector_pair *)resp) = res;
 }
 
@@ -61,7 +61,7 @@ void test3(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsi
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>*
 // CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, <256 x i1>* [[TMP0]], align 32
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> [[TMP1]])
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[TMP1]])
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <16 x i8>*
 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP2]], 0
 // CHECK-NEXT:    store <16 x i8> [[TMP4]], <16 x i8>* [[TMP3]], align 16
@@ -72,7 +72,7 @@ void test3(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsi
 // CHECK-NEXT:    ret void
 //
 void test4(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
-  __builtin_mma_disassemble_pair(resp, (__vector_pair*)vpp);
+  __builtin_vsx_disassemble_pair(resp, (__vector_pair*)vpp);
 }
 
 // CHECK-LABEL: @test5(
@@ -1040,104 +1040,104 @@ void test65(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
 // CHECK-LABEL: @test66(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* [[TMP0]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP0]])
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8*
-// CHECK-NEXT:    tail call void @llvm.ppc.mma.stxvp(<256 x i1> [[TMP1]], i8* [[TMP2]])
+// CHECK-NEXT:    tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP1]], i8* [[TMP2]])
 // CHECK-NEXT:    ret void
 //
 void test66(const __vector_pair *vpp, const __vector_pair *vp2) {
-  __vector_pair vp = __builtin_mma_lxvp(0LL, vpp);
-  __builtin_mma_stxvp(vp, 0LL, vp2);
+  __vector_pair vp = __builtin_vsx_lxvp(0LL, vpp);
+  __builtin_vsx_stxvp(vp, 0LL, vp2);
 }
 
 // CHECK-LABEL: @test67(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
 // CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 [[OFFSET:%.*]]
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* [[TMP1]])
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP1]])
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8*
 // CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 [[OFFSET]]
-// CHECK-NEXT:    tail call void @llvm.ppc.mma.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]])
+// CHECK-NEXT:    tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]])
 // CHECK-NEXT:    ret void
 //
 void test67(const __vector_pair *vpp, signed long long offset, const __vector_pair *vp2) {
-  __vector_pair vp = __builtin_mma_lxvp(offset, vpp);
-  __builtin_mma_stxvp(vp, offset, vp2);
+  __vector_pair vp = __builtin_vsx_lxvp(offset, vpp);
+  __builtin_vsx_stxvp(vp, offset, vp2);
 }
 
 // CHECK-LABEL: @test68(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
 // CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 18
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* [[TMP1]])
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP1]])
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8*
 // CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 18
-// CHECK-NEXT:    tail call void @llvm.ppc.mma.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]])
+// CHECK-NEXT:    tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]])
 // CHECK-NEXT:    ret void
 //
 void test68(const __vector_pair *vpp, const __vector_pair *vp2) {
-  __vector_pair vp = __builtin_mma_lxvp(18LL, vpp);
-  __builtin_mma_stxvp(vp, 18LL, vp2);
+  __vector_pair vp = __builtin_vsx_lxvp(18LL, vpp);
+  __builtin_vsx_stxvp(vp, 18LL, vp2);
 }
 
 // CHECK-LABEL: @test69(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
 // CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* [[TMP1]])
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP1]])
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8*
 // CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 1
-// CHECK-NEXT:    tail call void @llvm.ppc.mma.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]])
+// CHECK-NEXT:    tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]])
 // CHECK-NEXT:    ret void
 //
 void test69(const __vector_pair *vpp, const __vector_pair *vp2) {
-  __vector_pair vp = __builtin_mma_lxvp(1LL, vpp);
-  __builtin_mma_stxvp(vp, 1LL, vp2);
+  __vector_pair vp = __builtin_vsx_lxvp(1LL, vpp);
+  __builtin_vsx_stxvp(vp, 1LL, vp2);
 }
 
 // CHECK-LABEL: @test70(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
 // CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 42
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* [[TMP1]])
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP1]])
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8*
 // CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 42
-// CHECK-NEXT:    tail call void @llvm.ppc.mma.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]])
+// CHECK-NEXT:    tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]])
 // CHECK-NEXT:    ret void
 //
 void test70(const __vector_pair *vpp, const __vector_pair *vp2) {
-  __vector_pair vp = __builtin_mma_lxvp(42LL, vpp);
-  __builtin_mma_stxvp(vp, 42LL, vp2);
+  __vector_pair vp = __builtin_vsx_lxvp(42LL, vpp);
+  __builtin_vsx_stxvp(vp, 42LL, vp2);
 }
 
 // CHECK-LABEL: @test71(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <256 x i1>, <256 x i1>* [[VPP:%.*]], i64 128
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <256 x i1>* [[TMP0]] to i8*
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* [[TMP1]])
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP1]])
 // CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <256 x i1>, <256 x i1>* [[VP2:%.*]], i64 128
 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <256 x i1>* [[TMP3]] to i8*
-// CHECK-NEXT:    tail call void @llvm.ppc.mma.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]])
+// CHECK-NEXT:    tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]])
 // CHECK-NEXT:    ret void
 //
 void test71(const __vector_pair *vpp, const __vector_pair *vp2) {
-  __vector_pair vp = __builtin_mma_lxvp(32768LL, vpp);
-  __builtin_mma_stxvp(vp, 32768LL, vp2);
+  __vector_pair vp = __builtin_vsx_lxvp(32768LL, vpp);
+  __builtin_vsx_stxvp(vp, 32768LL, vp2);
 }
 
 // CHECK-LABEL: @test72(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
 // CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 32799
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* [[TMP1]])
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP1]])
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8*
 // CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 32799
-// CHECK-NEXT:    tail call void @llvm.ppc.mma.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]])
+// CHECK-NEXT:    tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]])
 // CHECK-NEXT:    ret void
 //
 void test72(const __vector_pair *vpp, const __vector_pair *vp2) {
-  __vector_pair vp = __builtin_mma_lxvp(32799LL, vpp);
-  __builtin_mma_stxvp(vp, 32799LL, vp2);
+  __vector_pair vp = __builtin_vsx_lxvp(32799LL, vpp);
+  __builtin_vsx_stxvp(vp, 32799LL, vp2);
 }
 
 // CHECK-LABEL: @test73(
@@ -1146,7 +1146,7 @@ void test72(const __vector_pair *vpp, const __vector_pair *vp2) {
 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, [[TBAA2:!tbaa !.*]]
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
 // CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, i8* [[TMP2]], i64 8
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* [[TMP3]])
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP3]])
 // CHECK-NEXT:    [[TMP5:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> [[TMP1]], <256 x i1> [[TMP4]], <16 x i8> [[VC:%.*]], i32 0, i32 0)
 // CHECK-NEXT:    [[TMP6:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
 // CHECK-NEXT:    store <512 x i1> [[TMP5]], <512 x i1>* [[TMP6]], align 64, [[TBAA2]]
@@ -1154,7 +1154,7 @@ void test72(const __vector_pair *vpp, const __vector_pair *vp2) {
 //
 void test73(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) {
   __vector_quad vq = *((__vector_quad *)vqp);
-  __vector_pair vp = __builtin_mma_lxvp(8LL, vpp);
+  __vector_pair vp = __builtin_vsx_lxvp(8LL, vpp);
   __builtin_mma_pmxvf64gernn(&vq, vp, vc, 0, 0);
   *((__vector_quad *)resp) = vq;
 }
@@ -1164,7 +1164,7 @@ void test73(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char v
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, [[TBAA2]]
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* [[TMP2]])
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP2]])
 // CHECK-NEXT:    [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]])
 // CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
 // CHECK-NEXT:    store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, [[TBAA2]]
@@ -1172,7 +1172,7 @@ void test73(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char v
 //
 void test74(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) {
   __vector_quad vq = *((__vector_quad *)vqp);
-  __vector_pair vp = __builtin_mma_lxvp(0LL, vpp);
+  __vector_pair vp = __builtin_vsx_lxvp(0LL, vpp);
   __builtin_mma_xvf64gernp(&vq, vp, vc);
   *((__vector_quad *)resp) = vq;
 }
@@ -1183,7 +1183,7 @@ void test74(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char v
 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, [[TBAA2:!tbaa !.*]]
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
 // CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, i8* [[TMP2]], i64 [[OFFS:%.*]]
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* [[TMP3]])
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP3]])
 // CHECK-NEXT:    [[TMP5:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP1]], <256 x i1> [[TMP4]], <16 x i8> [[VC:%.*]])
 // CHECK-NEXT:    [[TMP6:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
 // CHECK-NEXT:    store <512 x i1> [[TMP5]], <512 x i1>* [[TMP6]], align 64, [[TBAA2]]
@@ -1191,7 +1191,7 @@ void test74(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char v
 //
 void test75(unsigned char *vqp, signed long long offs, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) {
   __vector_quad vq = *((__vector_quad *)vqp);
-  __vector_pair vp = __builtin_mma_lxvp(offs, vpp);
+  __vector_pair vp = __builtin_vsx_lxvp(offs, vpp);
   __builtin_mma_xvf64gernp(&vq, vp, vc);
   *((__vector_quad *)resp) = vq;
 }

diff  --git a/clang/test/Sema/ppc-mma-types.c b/clang/test/Sema/ppc-pair-mma-types.c
similarity index 96%
rename from clang/test/Sema/ppc-mma-types.c
rename to clang/test/Sema/ppc-pair-mma-types.c
index 840e34845f58..dc12ff657ee4 100644
--- a/clang/test/Sema/ppc-mma-types.c
+++ b/clang/test/Sema/ppc-pair-mma-types.c
@@ -246,7 +246,7 @@ void testVPLocal(int *ptr, vector unsigned char vc) {
   __vector_pair *vpp = (__vector_pair *)ptr;
   __vector_pair vp1 = *vpp;
   __vector_pair vp2;
-  __builtin_mma_assemble_pair(&vp2, vc, vc);
+  __builtin_vsx_assemble_pair(&vp2, vc, vc);
   __vector_pair vp3;
   __vector_quad vq;
   __builtin_mma_xvf64ger(&vq, vp3, vc);
@@ -320,16 +320,16 @@ void testVPOperators4(int v, void *ptr) {
 }
 
 void testBuiltinTypes1(const __vector_pair *vpp, const __vector_pair *vp2, float f) {
-  __vector_pair vp = __builtin_mma_lxvp(f, vpp); // expected-error {{passing 'float' to parameter of incompatible type 'long long'}}
-  __builtin_mma_stxvp(vp, 32799, vp2);           // expected-error {{passing 'int' to parameter of incompatible type 'long long'}}
+  __vector_pair vp = __builtin_vsx_lxvp(f, vpp); // expected-error {{passing 'float' to parameter of incompatible type 'long long'}}
+  __builtin_vsx_stxvp(vp, 32799, vp2);           // expected-error {{passing 'int' to parameter of incompatible type 'long long'}}
 }
 
 void testBuiltinTypes2(__vector_pair *vpp, const __vector_pair *vp2, unsigned char c) {
-  __vector_pair vp = __builtin_mma_lxvp(6LL, vpp); // expected-error {{passing '__vector_pair *' to parameter of incompatible type 'const __vector_pair *'}}
-  __builtin_mma_stxvp(vp, c, vp2);                 // expected-error {{passing 'unsigned char' to parameter of incompatible type 'long long'}}
+  __vector_pair vp = __builtin_vsx_lxvp(6LL, vpp); // expected-error {{passing '__vector_pair *' to parameter of incompatible type 'const __vector_pair *'}}
+  __builtin_vsx_stxvp(vp, c, vp2);                 // expected-error {{passing 'unsigned char' to parameter of incompatible type 'long long'}}
 }
 
 void testBuiltinTypes3(vector int v, __vector_pair *vp2, signed long long ll, unsigned short s) {
-  __vector_pair vp = __builtin_mma_lxvp(ll, v); // expected-error {{passing '__vector int' (vector of 4 'int' values) to parameter of incompatible type 'const __vector_pair *'}}
-  __builtin_mma_stxvp(vp, ll, s);               // expected-error {{passing 'unsigned short' to parameter of incompatible type 'const __vector_pair *'}}
+  __vector_pair vp = __builtin_vsx_lxvp(ll, v); // expected-error {{passing '__vector int' (vector of 4 'int' values) to parameter of incompatible type 'const __vector_pair *'}}
+  __builtin_vsx_stxvp(vp, ll, s);               // expected-error {{passing 'unsigned short' to parameter of incompatible type 'const __vector_pair *'}}
 }

diff  --git a/clang/test/SemaCXX/ppc-mma-types.cpp b/clang/test/SemaCXX/ppc-pair-mma-types.cpp
similarity index 99%
rename from clang/test/SemaCXX/ppc-mma-types.cpp
rename to clang/test/SemaCXX/ppc-pair-mma-types.cpp
index 86487586e9b2..c32406fdc94d 100644
--- a/clang/test/SemaCXX/ppc-mma-types.cpp
+++ b/clang/test/SemaCXX/ppc-pair-mma-types.cpp
@@ -367,7 +367,7 @@ void TestVPLambda() {
     __vector_pair *vpp = (__vector_pair *)ptr;
     return *vpp; // expected-error {{invalid use of PPC MMA type}}
   };
-  auto f3 = [](vector unsigned char vc) { __vector_pair vp; __builtin_mma_assemble_pair(&vp, vc, vc); return vp; }; // expected-error {{invalid use of PPC MMA type}}
+  auto f3 = [](vector unsigned char vc) { __vector_pair vp; __builtin_vsx_assemble_pair(&vp, vc, vc); return vp; }; // expected-error {{invalid use of PPC MMA type}}
 }
 
 // cast

diff  --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index d559c000fd93..8a6ad8e8085c 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -1132,12 +1132,8 @@ def int_ppc_vsx_lxvl :
 def int_ppc_vsx_lxvll :
       Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrReadMem,
       IntrArgMemOnly]>;
-def int_ppc_vsx_stxvl :
-      Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty, llvm_i64_ty],
-      [IntrWriteMem, IntrArgMemOnly]>;
-def int_ppc_vsx_stxvll :
-      Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty, llvm_i64_ty],
-      [IntrWriteMem, IntrArgMemOnly]>;
+def int_ppc_vsx_lxvp :
+      Intrinsic<[llvm_v256i1_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>;
 
 // Vector store.
 def int_ppc_vsx_stxvw4x : Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty],
@@ -1148,6 +1144,15 @@ def int_ppc_vsx_stxvw4x_be : Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty],
                                        [IntrWriteMem, IntrArgMemOnly]>;
 def int_ppc_vsx_stxvd2x_be : Intrinsic<[], [llvm_v2f64_ty, llvm_ptr_ty],
                                        [IntrWriteMem, IntrArgMemOnly]>;
+def int_ppc_vsx_stxvl :
+      Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty, llvm_i64_ty],
+      [IntrWriteMem, IntrArgMemOnly]>;
+def int_ppc_vsx_stxvll :
+      Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty, llvm_i64_ty],
+      [IntrWriteMem, IntrArgMemOnly]>;
+def int_ppc_vsx_stxvp :
+      Intrinsic<[], [llvm_v256i1_ty, llvm_ptr_ty], [IntrWriteMem,
+      IntrArgMemOnly]>;
 // Vector and scalar maximum.
 def int_ppc_vsx_xvmaxdp : PowerPC_VSX_Vec_DDD_Intrinsic<"xvmaxdp">;
 def int_ppc_vsx_xvmaxsp : PowerPC_VSX_Vec_FFF_Intrinsic<"xvmaxsp">;
@@ -1406,6 +1411,14 @@ def int_ppc_setrnd : GCCBuiltin<"__builtin_setrnd">,
 }
 
 let TargetPrefix = "ppc" in {
+  def int_ppc_vsx_assemble_pair :
+        Intrinsic<[llvm_v256i1_ty],
+                  [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
+
+  def int_ppc_vsx_disassemble_pair :
+        Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty],
+                  [llvm_v256i1_ty], [IntrNoMem]>;
+
   def int_ppc_mma_assemble_acc :
         Intrinsic<[llvm_v512i1_ty],
                   [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
@@ -1415,14 +1428,6 @@ let TargetPrefix = "ppc" in {
         Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
                   [llvm_v512i1_ty], [IntrNoMem]>;
 
-  def int_ppc_mma_assemble_pair :
-        Intrinsic<[llvm_v256i1_ty],
-                  [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
-
-  def int_ppc_mma_disassemble_pair :
-        Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty],
-                  [llvm_v256i1_ty], [IntrNoMem]>;
-
   def int_ppc_mma_xxmtacc :
         Intrinsic<[llvm_v512i1_ty], [llvm_v512i1_ty], [IntrNoMem]>;
 
@@ -1432,14 +1437,6 @@ let TargetPrefix = "ppc" in {
   def int_ppc_mma_xxsetaccz :
         Intrinsic<[llvm_v512i1_ty], [], [IntrNoMem]>;
 
-  def int_ppc_mma_lxvp :
-        Intrinsic<[llvm_v256i1_ty], [llvm_ptr_ty],
-                  [IntrReadMem, IntrArgMemOnly]>;
-
-  def int_ppc_mma_stxvp :
-        Intrinsic<[], [llvm_v256i1_ty, llvm_ptr_ty],
-                  [IntrWriteMem, IntrArgMemOnly]>;
-
   // MMA Reduced-Precision: Outer Product Intrinsic Definitions.
   defm int_ppc_mma_xvi4ger8 :
         PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>;

diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index b9283f694030..3d9a54305cc6 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -10614,7 +10614,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     return DAG.getRegister(PPC::R2, MVT::i32);
 
   case Intrinsic::ppc_mma_disassemble_acc:
-  case Intrinsic::ppc_mma_disassemble_pair: {
+  case Intrinsic::ppc_vsx_disassemble_pair: {
     int NumVecs = 2;
     SDValue WideVec = Op.getOperand(1);
     if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
index a4ade0f82292..54e9adae40d7 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -1617,7 +1617,7 @@ let Predicates = [MMA] in {
 let Predicates = [PairedVectorMemops] in {
   def : Pat<(v256i1 (PPCPairBuild v4i32:$vs1, v4i32:$vs0)),
             Concats.VecsToVecPair0>;
-  def : Pat<(v256i1 (int_ppc_mma_assemble_pair v16i8:$vs1, v16i8:$vs0)),
+  def : Pat<(v256i1 (int_ppc_vsx_assemble_pair v16i8:$vs1, v16i8:$vs0)),
             Concats.VecsToVecPair0>;
   def : Pat<(v4i32 (PPCPairExtractVsx vsrpevenrc:$v, (i64 0))),
             (v4i32 (EXTRACT_SUBREG $v, sub_vsx0))>;
@@ -1659,18 +1659,18 @@ let mayLoad = 0, mayStore = 1, Predicates = [PairedVectorMemops, PrefixInstrs] i
 
 let Predicates = [PairedVectorMemops] in {
   // Intrinsics for Paired Vector Loads.
-  def : Pat<(v256i1 (int_ppc_mma_lxvp iaddrX16:$src)), (LXVP memrix16:$src)>;
-  def : Pat<(v256i1 (int_ppc_mma_lxvp xaddrX16:$src)), (LXVPX xaddrX16:$src)>;
+  def : Pat<(v256i1 (int_ppc_vsx_lxvp iaddrX16:$src)), (LXVP memrix16:$src)>;
+  def : Pat<(v256i1 (int_ppc_vsx_lxvp xaddrX16:$src)), (LXVPX xaddrX16:$src)>;
   let Predicates = [PairedVectorMemops, PrefixInstrs] in {
-    def : Pat<(v256i1 (int_ppc_mma_lxvp iaddrX34:$src)), (PLXVP memri34:$src)>;
+    def : Pat<(v256i1 (int_ppc_vsx_lxvp iaddrX34:$src)), (PLXVP memri34:$src)>;
   }
   // Intrinsics for Paired Vector Stores.
-  def : Pat<(int_ppc_mma_stxvp v256i1:$XSp, iaddrX16:$dst),
+  def : Pat<(int_ppc_vsx_stxvp v256i1:$XSp, iaddrX16:$dst),
             (STXVP $XSp, memrix16:$dst)>;
-  def : Pat<(int_ppc_mma_stxvp v256i1:$XSp, xaddrX16:$dst),
+  def : Pat<(int_ppc_vsx_stxvp v256i1:$XSp, xaddrX16:$dst),
             (STXVPX $XSp, xaddrX16:$dst)>;
   let Predicates = [PairedVectorMemops, PrefixInstrs] in {
-    def : Pat<(int_ppc_mma_stxvp v256i1:$XSp, iaddrX34:$dst),
+    def : Pat<(int_ppc_vsx_stxvp v256i1:$XSp, iaddrX34:$dst),
               (PSTXVP $XSp, memri34:$dst)>;
   }
 }

diff  --git a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
index 9c83f0a92482..c24240909797 100644
--- a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
+++ b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
@@ -276,9 +276,9 @@ static Value *GetPointerOperand(Value *MemI) {
     return SMemI->getPointerOperand();
   } else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(MemI)) {
     if (IMemI->getIntrinsicID() == Intrinsic::prefetch ||
-        IMemI->getIntrinsicID() == Intrinsic::ppc_mma_lxvp)
+        IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp)
       return IMemI->getArgOperand(0);
-    if (IMemI->getIntrinsicID() == Intrinsic::ppc_mma_stxvp)
+    if (IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp)
       return IMemI->getArgOperand(1);
   }
 
@@ -347,10 +347,10 @@ SmallVector<Bucket, 16> PPCLoopInstrFormPrep::collectCandidates(
         PtrValue = SMemI->getPointerOperand();
       } else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(&J)) {
         if (IMemI->getIntrinsicID() == Intrinsic::prefetch ||
-            IMemI->getIntrinsicID() == Intrinsic::ppc_mma_lxvp) {
+            IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp) {
           MemI = IMemI;
           PtrValue = IMemI->getArgOperand(0);
-        } else if (IMemI->getIntrinsicID() == Intrinsic::ppc_mma_stxvp) {
+        } else if (IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp) {
           MemI = IMemI;
           PtrValue = IMemI->getArgOperand(1);
         } else continue;
@@ -834,8 +834,8 @@ bool PPCLoopInstrFormPrep::runOnLoop(Loop *L) {
       return false;
     // There are no update forms for P10 lxvp/stxvp intrinsic.
     auto *II = dyn_cast<IntrinsicInst>(I);
-    if (II && ((II->getIntrinsicID() == Intrinsic::ppc_mma_lxvp) ||
-               II->getIntrinsicID() == Intrinsic::ppc_mma_stxvp))
+    if (II && ((II->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp) ||
+               II->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp))
       return false;
     // See getPreIndexedAddressParts, the displacement for LDU/STDU has to
     // be 4's multiple (DS-form). For i64 loads/stores when the displacement
@@ -877,8 +877,8 @@ bool PPCLoopInstrFormPrep::runOnLoop(Loop *L) {
     // Check if it is a P10 lxvp/stxvp intrinsic.
     auto *II = dyn_cast<IntrinsicInst>(I);
     if (II)
-      return II->getIntrinsicID() == Intrinsic::ppc_mma_lxvp ||
-             II->getIntrinsicID() == Intrinsic::ppc_mma_stxvp;
+      return II->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp ||
+             II->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp;
     // Check if it is a P9 vector load/store.
     return ST && ST->hasP9Vector() &&
            (PtrValue->getType()->getPointerElementType()->isVectorTy());

diff  --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index be4f3354ede4..011056c21b13 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -1224,7 +1224,7 @@ bool PPCTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
   case Intrinsic::ppc_vsx_lxvw4x_be:
   case Intrinsic::ppc_vsx_lxvl:
   case Intrinsic::ppc_vsx_lxvll:
-  case Intrinsic::ppc_mma_lxvp: {
+  case Intrinsic::ppc_vsx_lxvp: {
     Info.PtrVal = Inst->getArgOperand(0);
     Info.ReadMem = true;
     Info.WriteMem = false;
@@ -1241,7 +1241,7 @@ bool PPCTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
   case Intrinsic::ppc_vsx_stxvw4x_be:
   case Intrinsic::ppc_vsx_stxvl:
   case Intrinsic::ppc_vsx_stxvll:
-  case Intrinsic::ppc_mma_stxvp: {
+  case Intrinsic::ppc_vsx_stxvp: {
     Info.PtrVal = Inst->getArgOperand(1);
     Info.ReadMem = false;
     Info.WriteMem = true;

diff  --git a/llvm/test/CodeGen/PowerPC/dform-pair-load-store.ll b/llvm/test/CodeGen/PowerPC/dform-pair-load-store.ll
index a9041d8d9782..7365ded9922a 100644
--- a/llvm/test/CodeGen/PowerPC/dform-pair-load-store.ll
+++ b/llvm/test/CodeGen/PowerPC/dform-pair-load-store.ll
@@ -8,8 +8,8 @@
 ; This test checks that LSR properly recognizes lxvp/stxvp as load/store
 ; intrinsics to avoid generating x-form instructions instead of d-forms.
 
-declare <256 x i1> @llvm.ppc.mma.lxvp(i8*)
-declare void @llvm.ppc.mma.stxvp(<256 x i1>, i8*)
+declare <256 x i1> @llvm.ppc.vsx.lxvp(i8*)
+declare void @llvm.ppc.vsx.stxvp(<256 x i1>, i8*)
 define void @foo(i32 zeroext %n, <256 x i1>* %ptr, <256 x i1>* %ptr2) {
 ; CHECK-LABEL: foo:
 ; CHECK:       # %bb.0: # %entry
@@ -78,24 +78,24 @@ for.cond.cleanup:
 for.body:
   %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
   %2 = getelementptr i8, i8* %0, i64 %indvars.iv
-  %3 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %2)
+  %3 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %2)
   %add2 = add nuw nsw i64 %indvars.iv, 32
   %4 = getelementptr i8, i8* %0, i64 %add2
-  %5 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %4)
+  %5 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %4)
   %add4 = add nuw nsw i64 %indvars.iv, 64
   %6 = getelementptr i8, i8* %0, i64 %add4
-  %7 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %6)
+  %7 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %6)
   %add6 = add nuw nsw i64 %indvars.iv, 96
   %8 = getelementptr i8, i8* %0, i64 %add6
-  %9 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %8)
+  %9 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %8)
   %10 = getelementptr i8, i8* %1, i64 %indvars.iv
-  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %3, i8* %10)
+  tail call void @llvm.ppc.vsx.stxvp(<256 x i1> %3, i8* %10)
   %11 = getelementptr i8, i8* %1, i64 %add2
-  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %5, i8* %11)
+  tail call void @llvm.ppc.vsx.stxvp(<256 x i1> %5, i8* %11)
   %12 = getelementptr i8, i8* %1, i64 %add4
-  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %7, i8* %12)
+  tail call void @llvm.ppc.vsx.stxvp(<256 x i1> %7, i8* %12)
   %13 = getelementptr i8, i8* %1, i64 %add6
-  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %9, i8* %13)
+  tail call void @llvm.ppc.vsx.stxvp(<256 x i1> %9, i8* %13)
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body

diff  --git a/llvm/test/CodeGen/PowerPC/loop-p10-pair-prepare.ll b/llvm/test/CodeGen/PowerPC/loop-p10-pair-prepare.ll
index 816a28a61241..723df50f9f32 100644
--- a/llvm/test/CodeGen/PowerPC/loop-p10-pair-prepare.ll
+++ b/llvm/test/CodeGen/PowerPC/loop-p10-pair-prepare.ll
@@ -81,13 +81,13 @@ _loop_1_do_:                                      ; preds = %_loop_1_do_.lr.ph,
   %x_ix_dim_0_6 = getelementptr %_elem_type_of_x, %_elem_type_of_x* %x_rvo_based_addr_5, i64 %i.08
   %x_ix_dim_0_ = bitcast %_elem_type_of_x* %x_ix_dim_0_6 to i8*
   %0 = getelementptr i8, i8* %x_ix_dim_0_, i64 1
-  %1 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %0)
-  %2 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> %1)
+  %1 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %0)
+  %2 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %1)
   %.fca.0.extract1 = extractvalue { <16 x i8>, <16 x i8> } %2, 0
   %.fca.1.extract2 = extractvalue { <16 x i8>, <16 x i8> } %2, 1
   %3 = getelementptr i8, i8* %x_ix_dim_0_, i64 33
-  %4 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %3)
-  %5 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> %4)
+  %4 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %3)
+  %5 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %4)
   %.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %5, 0
   %.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %5, 1
   %6 = bitcast <16 x i8> %.fca.0.extract1 to <2 x double>
@@ -110,5 +110,5 @@ _return_bb:                                       ; preds = %_loop_1_loopHeader_
   ret void
 }
 
-declare <256 x i1> @llvm.ppc.mma.lxvp(i8*)
-declare { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1>)
+declare <256 x i1> @llvm.ppc.vsx.lxvp(i8*)
+declare { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1>)

diff  --git a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
index cbc7bd9cd0d7..fdc0257e4c5a 100644
--- a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
@@ -40,28 +40,6 @@ entry:
   ret void
 }
 
-; assemble_pair
-declare <256 x i1> @llvm.ppc.mma.assemble.pair(<16 x i8>, <16 x i8>)
-define void @ass_pair(<256 x i1>* %ptr, <16 x i8> %vc) {
-; CHECK-LABEL: ass_pair:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmr v3, v2
-; CHECK-NEXT:    stxv v2, 16(r3)
-; CHECK-NEXT:    stxv v3, 0(r3)
-; CHECK-NEXT:    blr
-;
-; CHECK-BE-LABEL: ass_pair:
-; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    vmr v3, v2
-; CHECK-BE-NEXT:    stxv v2, 16(r3)
-; CHECK-BE-NEXT:    stxv v2, 0(r3)
-; CHECK-BE-NEXT:    blr
-entry:
-  %0 = tail call <256 x i1> @llvm.ppc.mma.assemble.pair(<16 x i8> %vc, <16 x i8> %vc)
-  store <256 x i1> %0, <256 x i1>* %ptr, align 32
-  ret void
-}
-
 ; xxmtacc
 declare <512 x i1> @llvm.ppc.mma.xxmtacc(<512 x i1>)
 define void @int_xxmtacc(<512 x i1>* %ptr, <16 x i8> %vc) {
@@ -202,51 +180,23 @@ entry:
   ret void
 }
 
-; disassemble_pair
-declare { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1>)
-define void @disass_pair(<256 x i1>* %ptr1, <16 x i8>* %ptr2, <16 x i8>* %ptr3) {
-; CHECK-LABEL: disass_pair:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lxv vs1, 0(r3)
-; CHECK-NEXT:    lxv vs0, 16(r3)
-; CHECK-NEXT:    stxv vs1, 0(r4)
-; CHECK-NEXT:    stxv vs0, 0(r5)
-; CHECK-NEXT:    blr
-;
-; CHECK-BE-LABEL: disass_pair:
-; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs1, 16(r3)
-; CHECK-BE-NEXT:    lxv vs0, 0(r3)
-; CHECK-BE-NEXT:    stxv vs0, 0(r4)
-; CHECK-BE-NEXT:    stxv vs1, 0(r5)
-; CHECK-BE-NEXT:    blr
-entry:
-  %0 = load <256 x i1>, <256 x i1>* %ptr1, align 32
-  %1 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> %0)
-  %2 = extractvalue { <16 x i8>, <16 x i8> } %1, 0
-  %3 = extractvalue { <16 x i8>, <16 x i8> } %1, 1
-  store <16 x i8> %2, <16 x i8>* %ptr2, align 16
-  store <16 x i8> %3, <16 x i8>* %ptr3, align 16
-  ret void
-}
-
 declare <512 x i1> @llvm.ppc.mma.xvi4ger8pp(<512 x i1>, <16 x i8>, <16 x i8>)
 define void @testBranch(<512 x i1>* %ptr, <16 x i8> %vc, i32 %val) {
 ; CHECK-LABEL: testBranch:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    cmplwi r7, 0
-; CHECK-NEXT:    beq cr0, .LBB7_2
+; CHECK-NEXT:    beq cr0, .LBB5_2
 ; CHECK-NEXT:  # %bb.1: # %if.then
 ; CHECK-NEXT:    xxsetaccz acc0
-; CHECK-NEXT:    b .LBB7_3
-; CHECK-NEXT:  .LBB7_2: # %if.else
+; CHECK-NEXT:    b .LBB5_3
+; CHECK-NEXT:  .LBB5_2: # %if.else
 ; CHECK-NEXT:    lxv vs1, 32(r3)
 ; CHECK-NEXT:    lxv vs0, 48(r3)
 ; CHECK-NEXT:    lxv vs3, 0(r3)
 ; CHECK-NEXT:    lxv vs2, 16(r3)
 ; CHECK-NEXT:    xxmtacc acc0
 ; CHECK-NEXT:    xvi4ger8pp acc0, v2, v2
-; CHECK-NEXT:  .LBB7_3: # %if.end
+; CHECK-NEXT:  .LBB5_3: # %if.end
 ; CHECK-NEXT:    xxmfacc acc0
 ; CHECK-NEXT:    stxv vs0, 48(r3)
 ; CHECK-NEXT:    stxv vs1, 32(r3)
@@ -257,18 +207,18 @@ define void @testBranch(<512 x i1>* %ptr, <16 x i8> %vc, i32 %val) {
 ; CHECK-BE-LABEL: testBranch:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    cmplwi r7, 0
-; CHECK-BE-NEXT:    beq cr0, .LBB7_2
+; CHECK-BE-NEXT:    beq cr0, .LBB5_2
 ; CHECK-BE-NEXT:  # %bb.1: # %if.then
 ; CHECK-BE-NEXT:    xxsetaccz acc0
-; CHECK-BE-NEXT:    b .LBB7_3
-; CHECK-BE-NEXT:  .LBB7_2: # %if.else
+; CHECK-BE-NEXT:    b .LBB5_3
+; CHECK-BE-NEXT:  .LBB5_2: # %if.else
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    lxv vs3, 48(r3)
 ; CHECK-BE-NEXT:    lxv vs2, 32(r3)
 ; CHECK-BE-NEXT:    xxmtacc acc0
 ; CHECK-BE-NEXT:    xvi4ger8pp acc0, v2, v2
-; CHECK-BE-NEXT:  .LBB7_3: # %if.end
+; CHECK-BE-NEXT:  .LBB5_3: # %if.end
 ; CHECK-BE-NEXT:    xxmfacc acc0
 ; CHECK-BE-NEXT:    stxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    stxv vs0, 0(r3)
@@ -447,7 +397,7 @@ define void @testcse4(<512 x i1>* %res, i32 %lim, <16 x i8>* %vc) {
 ; CHECK-NEXT:    mtctr r4
 ; CHECK-NEXT:    li r4, 0
 ; CHECK-NEXT:    .p2align 4
-; CHECK-NEXT:  .LBB11_2: # %for.body
+; CHECK-NEXT:  .LBB9_2: # %for.body
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    rldic r7, r6, 4, 28
 ; CHECK-NEXT:    addi r6, r6, 6
@@ -482,7 +432,7 @@ define void @testcse4(<512 x i1>* %res, i32 %lim, <16 x i8>* %vc) {
 ; CHECK-NEXT:    stxv vs1, 160(r7)
 ; CHECK-NEXT:    stxv vs2, 144(r7)
 ; CHECK-NEXT:    stxv vs3, 128(r7)
-; CHECK-NEXT:    bdnz .LBB11_2
+; CHECK-NEXT:    bdnz .LBB9_2
 ; CHECK-NEXT:  # %bb.3: # %for.cond.cleanup
 ; CHECK-NEXT:    blr
 ;
@@ -496,7 +446,7 @@ define void @testcse4(<512 x i1>* %res, i32 %lim, <16 x i8>* %vc) {
 ; CHECK-BE-NEXT:    mtctr r4
 ; CHECK-BE-NEXT:    li r4, 0
 ; CHECK-BE-NEXT:    .p2align 4
-; CHECK-BE-NEXT:  .LBB11_2: # %for.body
+; CHECK-BE-NEXT:  .LBB9_2: # %for.body
 ; CHECK-BE-NEXT:    #
 ; CHECK-BE-NEXT:    rldic r7, r6, 4, 28
 ; CHECK-BE-NEXT:    addi r6, r6, 6
@@ -531,7 +481,7 @@ define void @testcse4(<512 x i1>* %res, i32 %lim, <16 x i8>* %vc) {
 ; CHECK-BE-NEXT:    stxv vs0, 128(r7)
 ; CHECK-BE-NEXT:    stxv vs3, 176(r7)
 ; CHECK-BE-NEXT:    stxv vs2, 160(r7)
-; CHECK-BE-NEXT:    bdnz .LBB11_2
+; CHECK-BE-NEXT:    bdnz .LBB9_2
 ; CHECK-BE-NEXT:  # %bb.3: # %for.cond.cleanup
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -674,189 +624,12 @@ entry:
   ret void
 }
 
-declare <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1>, <256 x i1>, <16 x i8>, i32, i32)
-declare <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1>, <256 x i1>, <16 x i8>)
-
-; Function Attrs: nounwind
-define void @test_ldst_1(<256 x i1>* %vpp, <256 x i1>* %vp2) {
-; CHECK-LABEL: test_ldst_1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lxvp vsp0, 0(r3)
-; CHECK-NEXT:    stxvp vsp0, 0(r4)
-; CHECK-NEXT:    blr
-;
-; CHECK-BE-LABEL: test_ldst_1:
-; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxvp vsp0, 0(r3)
-; CHECK-BE-NEXT:    stxvp vsp0, 0(r4)
-; CHECK-BE-NEXT:    blr
-entry:
-  %0 = bitcast <256 x i1>* %vpp to i8*
-  %1 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %0)
-  %2 = bitcast <256 x i1>* %vp2 to i8*
-  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %1, i8* %2)
-  ret void
-}
-
-; Function Attrs: argmemonly nounwind readonly
-declare <256 x i1> @llvm.ppc.mma.lxvp(i8*)
-
-; Function Attrs: argmemonly nounwind writeonly
-declare void @llvm.ppc.mma.stxvp(<256 x i1>, i8*)
-
-; Function Attrs: nounwind
-define void @test_ldst_2(<256 x i1>* %vpp, i64 %offset, <256 x i1>* %vp2)  {
-; CHECK-LABEL: test_ldst_2:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lxvpx vsp0, r3, r4
-; CHECK-NEXT:    stxvpx vsp0, r5, r4
-; CHECK-NEXT:    blr
-;
-; CHECK-BE-LABEL: test_ldst_2:
-; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxvpx vsp0, r3, r4
-; CHECK-BE-NEXT:    stxvpx vsp0, r5, r4
-; CHECK-BE-NEXT:    blr
-entry:
-  %0 = bitcast <256 x i1>* %vpp to i8*
-  %1 = getelementptr i8, i8* %0, i64 %offset
-  %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1)
-  %3 = bitcast <256 x i1>* %vp2 to i8*
-  %4 = getelementptr i8, i8* %3, i64 %offset
-  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4)
-  ret void
-}
-
-; Function Attrs: nounwind
-define void @test_ldst_3(<256 x i1>* %vpp, <256 x i1>* %vp2)  {
-; CHECK-LABEL: test_ldst_3:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    li r5, 18
-; CHECK-NEXT:    lxvpx vsp0, r3, r5
-; CHECK-NEXT:    stxvpx vsp0, r4, r5
-; CHECK-NEXT:    blr
-;
-; CHECK-BE-LABEL: test_ldst_3:
-; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    li r5, 18
-; CHECK-BE-NEXT:    lxvpx vsp0, r3, r5
-; CHECK-BE-NEXT:    stxvpx vsp0, r4, r5
-; CHECK-BE-NEXT:    blr
-entry:
-  %0 = bitcast <256 x i1>* %vpp to i8*
-  %1 = getelementptr i8, i8* %0, i64 18
-  %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1)
-  %3 = bitcast <256 x i1>* %vp2 to i8*
-  %4 = getelementptr i8, i8* %3, i64 18
-  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4)
-  ret void
-}
-
-; Function Attrs: nounwind
-define void @test_ldst_4(<256 x i1>* %vpp, <256 x i1>* %vp2)  {
-; CHECK-LABEL: test_ldst_4:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    li r5, 1
-; CHECK-NEXT:    lxvpx vsp0, r3, r5
-; CHECK-NEXT:    stxvpx vsp0, r4, r5
-; CHECK-NEXT:    blr
-;
-; CHECK-BE-LABEL: test_ldst_4:
-; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    li r5, 1
-; CHECK-BE-NEXT:    lxvpx vsp0, r3, r5
-; CHECK-BE-NEXT:    stxvpx vsp0, r4, r5
-; CHECK-BE-NEXT:    blr
-entry:
-  %0 = bitcast <256 x i1>* %vpp to i8*
-  %1 = getelementptr i8, i8* %0, i64 1
-  %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1)
-  %3 = bitcast <256 x i1>* %vp2 to i8*
-  %4 = getelementptr i8, i8* %3, i64 1
-  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4)
-  ret void
-}
-
-; Function Attrs: nounwind
-define void @test_ldst_5(<256 x i1>* %vpp, <256 x i1>* %vp2)  {
-; CHECK-LABEL: test_ldst_5:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    li r5, 42
-; CHECK-NEXT:    lxvpx vsp0, r3, r5
-; CHECK-NEXT:    stxvpx vsp0, r4, r5
-; CHECK-NEXT:    blr
-;
-; CHECK-BE-LABEL: test_ldst_5:
-; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    li r5, 42
-; CHECK-BE-NEXT:    lxvpx vsp0, r3, r5
-; CHECK-BE-NEXT:    stxvpx vsp0, r4, r5
-; CHECK-BE-NEXT:    blr
-entry:
-  %0 = bitcast <256 x i1>* %vpp to i8*
-  %1 = getelementptr i8, i8* %0, i64 42
-  %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1)
-  %3 = bitcast <256 x i1>* %vp2 to i8*
-  %4 = getelementptr i8, i8* %3, i64 42
-  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4)
-  ret void
-}
-
-; Function Attrs: nounwind
-define void @test_ldst_6(<256 x i1>* %vpp, <256 x i1>* %vp2)  {
-; CHECK-LABEL: test_ldst_6:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lxvp vsp0, 4096(r3)
-; CHECK-NEXT:    stxvp vsp0, 4096(r4)
-; CHECK-NEXT:    blr
-;
-; CHECK-BE-LABEL: test_ldst_6:
-; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxvp vsp0, 4096(r3)
-; CHECK-BE-NEXT:    stxvp vsp0, 4096(r4)
-; CHECK-BE-NEXT:    blr
-entry:
-  %0 = getelementptr <256 x i1>, <256 x i1>* %vpp, i64 128
-  %1 = bitcast <256 x i1>* %0 to i8*
-  %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1)
-  %3 = getelementptr <256 x i1>, <256 x i1>* %vp2, i64 128
-  %4 = bitcast <256 x i1>* %3 to i8*
-  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4)
-  ret void
-}
-
-; Function Attrs: nounwind
-define void @test_ldst_7(<256 x i1>* %vpp, <256 x i1>* %vp2)  {
-; FIXME: A prefixed load (plxvp) is expected here as the offset in this
-; test case is a constant that fits within 34-bits.
-; CHECK-LABEL: test_ldst_7:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    li r5, 0
-; CHECK-NEXT:    ori r5, r5, 32799
-; CHECK-NEXT:    lxvpx vsp0, r3, r5
-; CHECK-NEXT:    stxvpx vsp0, r4, r5
-; CHECK-NEXT:    blr
-;
-; CHECK-BE-LABEL: test_ldst_7:
-; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    li r5, 0
-; CHECK-BE-NEXT:    ori r5, r5, 32799
-; CHECK-BE-NEXT:    lxvpx vsp0, r3, r5
-; CHECK-BE-NEXT:    stxvpx vsp0, r4, r5
-; CHECK-BE-NEXT:    blr
-entry:
-  %0 = bitcast <256 x i1>* %vpp to i8*
-  %1 = getelementptr i8, i8* %0, i64 32799
-  %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1)
-  %3 = bitcast <256 x i1>* %vp2 to i8*
-  %4 = getelementptr i8, i8* %3, i64 32799
-  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4)
-  ret void
-}
+declare <256 x i1> @llvm.ppc.vsx.lxvp(i8*)
+declare void @llvm.ppc.vsx.stxvp(<256 x i1>, i8*)
 
 ; Function Attrs: nofree nounwind
-define void @test_ldst_8(i8* nocapture readonly %vqp, <256 x i1>* %vpp, <16 x i8> %vc, i8* nocapture %resp)  {
-; CHECK-LABEL: test_ldst_8:
+define void @test_ldst_1(i8* nocapture readonly %vqp, <256 x i1>* %vpp, <16 x i8> %vc, i8* nocapture %resp)  {
+; CHECK-LABEL: test_ldst_1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    lxv vs1, 32(r3)
 ; CHECK-NEXT:    lxv vs0, 48(r3)
@@ -873,7 +646,7 @@ define void @test_ldst_8(i8* nocapture readonly %vqp, <256 x i1>* %vpp, <16 x i8
 ; CHECK-NEXT:    stxv vs3, 0(r7)
 ; CHECK-NEXT:    blr
 ;
-; CHECK-BE-LABEL: test_ldst_8:
+; CHECK-BE-LABEL: test_ldst_1:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
@@ -894,7 +667,7 @@ entry:
   %1 = load <512 x i1>, <512 x i1>* %0, align 64
   %2 = bitcast <256 x i1>* %vpp to i8*
   %3 = getelementptr i8, i8* %2, i64 8
-  %4 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %3)
+  %4 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %3)
   %5 = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> %1, <256 x i1> %4, <16 x i8> %vc, i32 0, i32 0)
   %6 = bitcast i8* %resp to <512 x i1>*
   store <512 x i1> %5, <512 x i1>* %6, align 64
@@ -902,8 +675,8 @@ entry:
 }
 
 ; Function Attrs: nofree nounwind
-define void @test_ldst_9(i8* nocapture readonly %vqp, <256 x i1>* %vpp, <16 x i8> %vc, i8* nocapture %resp)  {
-; CHECK-LABEL: test_ldst_9:
+define void @test_ldst_2(i8* nocapture readonly %vqp, <256 x i1>* %vpp, <16 x i8> %vc, i8* nocapture %resp)  {
+; CHECK-LABEL: test_ldst_2:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    lxv vs1, 32(r3)
 ; CHECK-NEXT:    lxv vs0, 48(r3)
@@ -919,7 +692,7 @@ define void @test_ldst_9(i8* nocapture readonly %vqp, <256 x i1>* %vpp, <16 x i8
 ; CHECK-NEXT:    stxv vs3, 0(r7)
 ; CHECK-NEXT:    blr
 ;
-; CHECK-BE-LABEL: test_ldst_9:
+; CHECK-BE-LABEL: test_ldst_2:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
@@ -938,7 +711,7 @@ entry:
   %0 = bitcast i8* %vqp to <512 x i1>*
   %1 = load <512 x i1>, <512 x i1>* %0, align 64
   %2 = bitcast <256 x i1>* %vpp to i8*
-  %3 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %2)
+  %3 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %2)
   %4 = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> %1, <256 x i1> %3, <16 x i8> %vc)
   %5 = bitcast i8* %resp to <512 x i1>*
   store <512 x i1> %4, <512 x i1>* %5, align 64
@@ -946,8 +719,8 @@ entry:
 }
 
 ; Function Attrs: nofree nounwind
-define void @test_ldst_10(i8* nocapture readonly %vqp, i64 %offs, <256 x i1>* %vpp, <16 x i8> %vc, i8* nocapture %resp)  {
-; CHECK-LABEL: test_ldst_10:
+define void @test_ldst_3(i8* nocapture readonly %vqp, i64 %offs, <256 x i1>* %vpp, <16 x i8> %vc, i8* nocapture %resp)  {
+; CHECK-LABEL: test_ldst_3:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    lxv vs1, 32(r3)
 ; CHECK-NEXT:    lxv vs0, 48(r3)
@@ -963,7 +736,7 @@ define void @test_ldst_10(i8* nocapture readonly %vqp, i64 %offs, <256 x i1>* %v
 ; CHECK-NEXT:    stxv vs3, 0(r9)
 ; CHECK-NEXT:    blr
 ;
-; CHECK-BE-LABEL: test_ldst_10:
+; CHECK-BE-LABEL: test_ldst_3:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
@@ -982,9 +755,12 @@ entry:
   %0 = bitcast i8* %vqp to <512 x i1>*
   %1 = load <512 x i1>, <512 x i1>* %0, align 64
   %2 = bitcast <256 x i1>* %vpp to i8*
-  %3 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %2)
+  %3 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %2)
   %4 = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> %1, <256 x i1> %3, <16 x i8> %vc)
   %5 = bitcast i8* %resp to <512 x i1>*
   store <512 x i1> %4, <512 x i1>* %5, align 64
   ret void
 }
+
+declare <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1>, <256 x i1>, <16 x i8>, i32, i32)
+declare <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1>, <256 x i1>, <16 x i8>)

diff  --git a/llvm/test/CodeGen/PowerPC/mma-outer-product.ll b/llvm/test/CodeGen/PowerPC/mma-outer-product.ll
index cbcc494b7153..9d245c768839 100644
--- a/llvm/test/CodeGen/PowerPC/mma-outer-product.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-outer-product.ll
@@ -7,7 +7,7 @@
 ; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
 
 declare <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
-declare <256 x i1> @llvm.ppc.mma.assemble.pair(<16 x i8>, <16 x i8>)
+declare <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8>, <16 x i8>)
 define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i8> %vc4, i8* %ptr) {
 ; CHECK-LABEL: intrinsics1:
 ; CHECK:       # %bb.0:
@@ -62,7 +62,7 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i
   %2 = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8pp(<512 x i1> %1, <16 x i8> %vc1, <16 x i8> %vc2)
   %3 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1> %2, <16 x i8> %vc1, <16 x i8> %vc3)
   %4 = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gerpn(<512 x i1> %3, <16 x i8> %vc2, <16 x i8> %vc4, i32 0, i32 0)
-  %5 = tail call <256 x i1> @llvm.ppc.mma.assemble.pair(<16 x i8> %vc4, <16 x i8> %vc1)
+  %5 = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %vc4, <16 x i8> %vc1)
   %6 = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernp(<512 x i1> %4, <256 x i1> %5, <16 x i8> %vc1, i32 0, i32 0)
   %7 = bitcast i8* %ptr to <512 x i1>*
   store <512 x i1> %6, <512 x i1>* %7, align 64
@@ -126,7 +126,7 @@ define void @intrinsics2(<16 x i8>* %ptr1, <16 x i8>* %ptr2, <16 x i8>* %ptr3, <
   %2 = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4pp(<512 x i1> %1, <16 x i8> %vc1, <16 x i8> %vc2)
   %3 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pn(<512 x i1> %2, <16 x i8> %vc1, <16 x i8> %vc3)
   %4 = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gernn(<512 x i1> %3, <16 x i8> %vc2, <16 x i8> %vc4, i32 0, i32 0)
-  %5 = tail call <256 x i1> @llvm.ppc.mma.assemble.pair(<16 x i8> %vc4, <16 x i8> %vc1)
+  %5 = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %vc4, <16 x i8> %vc1)
   %6 = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> %4, <256 x i1> %5, <16 x i8> %vc1, i32 0, i32 0)
   %7 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %6)
   %8 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %7, 0

diff  --git a/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll b/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll
index 45a0c68df520..d875fe4b7c4a 100644
--- a/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll
@@ -6,7 +6,7 @@
 ; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names \
 ; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
 
-declare <256 x i1> @llvm.ppc.mma.assemble.pair(<16 x i8>, <16 x i8>)
+declare <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8>, <16 x i8>)
 declare <512 x i1> @llvm.ppc.mma.xxsetaccz()
 declare <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1>, <256 x i1>, <16 x i8>)
 declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1>)
@@ -68,7 +68,7 @@ entry:
   %0 = load <16 x i8>, <16 x i8>* %Src, align 16
   %arrayidx1 = getelementptr inbounds <16 x i8>, <16 x i8>* %Src, i64 1
   %1 = load <16 x i8>, <16 x i8>* %arrayidx1, align 16
-  %2 = tail call <256 x i1> @llvm.ppc.mma.assemble.pair(<16 x i8> %0, <16 x i8> %1)
+  %2 = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %0, <16 x i8> %1)
   %3 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz()
   %cmp11 = icmp sgt i32 %Len, 2
   br i1 %cmp11, label %for.body.preheader, label %for.cond.cleanup
@@ -165,7 +165,7 @@ entry:
   %0 = load <16 x i8>, <16 x i8>* %Src, align 16
   %arrayidx1 = getelementptr inbounds <16 x i8>, <16 x i8>* %Src, i64 1
   %1 = load <16 x i8>, <16 x i8>* %arrayidx1, align 16
-  %2 = tail call <256 x i1> @llvm.ppc.mma.assemble.pair(<16 x i8> %0, <16 x i8> %1)
+  %2 = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %0, <16 x i8> %1)
   %arrayidx2 = getelementptr inbounds <16 x i8>, <16 x i8>* %Src, i64 2
   %3 = load <16 x i8>, <16 x i8>* %arrayidx2, align 16
   %4 = tail call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> %2, <16 x i8> %3)

diff  --git a/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll b/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll
index 93e1eb542989..dd4212569c13 100644
--- a/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll
+++ b/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll
@@ -469,38 +469,38 @@ _loop_2_do_:                                      ; preds = %_loop_2_do_.lr.ph,
   %_ix_x_len = shl nuw nsw i64 %indvars.iv, 3
   %x_ix_dim_0_113 = getelementptr inbounds %_elem_type_of_x, %_elem_type_of_x* %x_rvo_based_addr_112, i64 %indvars.iv
   %x_ix_dim_0_ = bitcast %_elem_type_of_x* %x_ix_dim_0_113 to i8*
-  %55 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* nonnull %x_ix_dim_0_)
+  %55 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull %x_ix_dim_0_)
   %a_ix_dim_1_ = getelementptr inbounds i8, i8* %a_ix_dim_0_, i64 %_ix_x_len
-  %56 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* nonnull %a_ix_dim_1_)
+  %56 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull %a_ix_dim_1_)
   %a_ix_dim_1_29 = getelementptr inbounds i8, i8* %a_ix_dim_0_25, i64 %_ix_x_len
-  %57 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* nonnull %a_ix_dim_1_29)
+  %57 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull %a_ix_dim_1_29)
   %a_ix_dim_1_45 = getelementptr inbounds i8, i8* %a_ix_dim_0_41, i64 %_ix_x_len
-  %58 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* nonnull %a_ix_dim_1_45)
+  %58 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull %a_ix_dim_1_45)
   %a_ix_dim_1_61 = getelementptr inbounds i8, i8* %a_ix_dim_0_57, i64 %_ix_x_len
-  %59 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* nonnull %a_ix_dim_1_61)
+  %59 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull %a_ix_dim_1_61)
   %a_ix_dim_1_77 = getelementptr inbounds i8, i8* %a_ix_dim_0_73, i64 %_ix_x_len
-  %60 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* nonnull %a_ix_dim_1_77)
+  %60 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull %a_ix_dim_1_77)
   %a_ix_dim_1_93 = getelementptr inbounds i8, i8* %a_ix_dim_0_89, i64 %_ix_x_len
-  %61 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* nonnull %a_ix_dim_1_93)
-  %62 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> %55)
+  %61 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull %a_ix_dim_1_93)
+  %62 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %55)
   %.fca.0.extract35 = extractvalue { <16 x i8>, <16 x i8> } %62, 0
   %.fca.1.extract36 = extractvalue { <16 x i8>, <16 x i8> } %62, 1
-  %63 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> %56)
+  %63 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %56)
   %.fca.0.extract29 = extractvalue { <16 x i8>, <16 x i8> } %63, 0
   %.fca.1.extract30 = extractvalue { <16 x i8>, <16 x i8> } %63, 1
-  %64 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> %57)
+  %64 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %57)
   %.fca.0.extract23 = extractvalue { <16 x i8>, <16 x i8> } %64, 0
   %.fca.1.extract24 = extractvalue { <16 x i8>, <16 x i8> } %64, 1
-  %65 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> %58)
+  %65 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %58)
   %.fca.0.extract17 = extractvalue { <16 x i8>, <16 x i8> } %65, 0
   %.fca.1.extract18 = extractvalue { <16 x i8>, <16 x i8> } %65, 1
-  %66 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> %59)
+  %66 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %59)
   %.fca.0.extract11 = extractvalue { <16 x i8>, <16 x i8> } %66, 0
   %.fca.1.extract12 = extractvalue { <16 x i8>, <16 x i8> } %66, 1
-  %67 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> %60)
+  %67 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %60)
   %.fca.0.extract5 = extractvalue { <16 x i8>, <16 x i8> } %67, 0
   %.fca.1.extract6 = extractvalue { <16 x i8>, <16 x i8> } %67, 1
-  %68 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> %61)
+  %68 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %61)
   %.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %68, 0
   %.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %68, 1
   %69 = bitcast <16 x i8> %.fca.0.extract29 to <2 x double>
@@ -518,38 +518,38 @@ _loop_2_do_:                                      ; preds = %_loop_2_do_.lr.ph,
   %81 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %80, <2 x double> %70, <2 x double> %49)
   %82 = getelementptr %_elem_type_of_x, %_elem_type_of_x* %x_ix_dim_0_113, i64 4
   %83 = bitcast %_elem_type_of_x* %82 to i8*
-  %84 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %83)
+  %84 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %83)
   %85 = getelementptr i8, i8* %a_ix_dim_1_, i64 32
-  %86 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %85)
+  %86 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %85)
   %87 = getelementptr i8, i8* %a_ix_dim_1_29, i64 32
-  %88 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %87)
+  %88 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %87)
   %89 = getelementptr i8, i8* %a_ix_dim_1_45, i64 32
-  %90 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %89)
+  %90 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %89)
   %91 = getelementptr i8, i8* %a_ix_dim_1_61, i64 32
-  %92 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %91)
+  %92 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %91)
   %93 = getelementptr i8, i8* %a_ix_dim_1_77, i64 32
-  %94 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %93)
+  %94 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %93)
   %95 = getelementptr i8, i8* %a_ix_dim_1_93, i64 32
-  %96 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %95)
-  %97 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> %84)
+  %96 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %95)
+  %97 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %84)
   %.fca.0.extract37 = extractvalue { <16 x i8>, <16 x i8> } %97, 0
   %.fca.1.extract39 = extractvalue { <16 x i8>, <16 x i8> } %97, 1
-  %98 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> %86)
+  %98 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %86)
   %.fca.0.extract31 = extractvalue { <16 x i8>, <16 x i8> } %98, 0
   %.fca.1.extract33 = extractvalue { <16 x i8>, <16 x i8> } %98, 1
-  %99 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> %88)
+  %99 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %88)
   %.fca.0.extract25 = extractvalue { <16 x i8>, <16 x i8> } %99, 0
   %.fca.1.extract27 = extractvalue { <16 x i8>, <16 x i8> } %99, 1
-  %100 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> %90)
+  %100 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %90)
   %.fca.0.extract19 = extractvalue { <16 x i8>, <16 x i8> } %100, 0
   %.fca.1.extract21 = extractvalue { <16 x i8>, <16 x i8> } %100, 1
-  %101 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> %92)
+  %101 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %92)
   %.fca.0.extract13 = extractvalue { <16 x i8>, <16 x i8> } %101, 0
   %.fca.1.extract15 = extractvalue { <16 x i8>, <16 x i8> } %101, 1
-  %102 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> %94)
+  %102 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %94)
   %.fca.0.extract7 = extractvalue { <16 x i8>, <16 x i8> } %102, 0
   %.fca.1.extract9 = extractvalue { <16 x i8>, <16 x i8> } %102, 1
-  %103 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> %96)
+  %103 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %96)
   %.fca.0.extract1 = extractvalue { <16 x i8>, <16 x i8> } %103, 0
   %.fca.1.extract3 = extractvalue { <16 x i8>, <16 x i8> } %103, 1
   %104 = bitcast <16 x i8> %.fca.1.extract30 to <2 x double>
@@ -631,7 +631,7 @@ _return_bb:                                       ; preds = %_loop_1_do_.lr.ph,
   ret void
 }
 
-declare <256 x i1> @llvm.ppc.mma.lxvp(i8*)
-declare { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1>)
+declare <256 x i1> @llvm.ppc.vsx.lxvp(i8*)
+declare { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1>)
 declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
 

diff  --git a/llvm/test/CodeGen/PowerPC/paired-vector-intrinsics-without-mma.ll b/llvm/test/CodeGen/PowerPC/paired-vector-intrinsics-without-mma.ll
deleted file mode 100644
index f09f8ac780e0..000000000000
--- a/llvm/test/CodeGen/PowerPC/paired-vector-intrinsics-without-mma.ll
+++ /dev/null
@@ -1,59 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O3 \
-; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -mattr=-mma \
-; RUN:   < %s | FileCheck %s
-
-; This test is to check that the paired vector intrinsics are available even
-; when MMA is disabled.
-
-define <16 x i8> @test1(<256 x i1>* %ptr) {
-; CHECK-LABEL: test1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lxv v3, 0(r3)
-; CHECK-NEXT:    lxv v2, 16(r3)
-; CHECK-NEXT:    vaddubm v2, v3, v2
-; CHECK-NEXT:    blr
-entry:
-  %0 = load <256 x i1>, <256 x i1>* %ptr, align 32
-  %1 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> %0)
-  %2 = extractvalue { <16 x i8>, <16 x i8> } %1, 0
-  %3 = extractvalue { <16 x i8>, <16 x i8> } %1, 1
-  %add = add <16 x i8> %2, %3
-  ret <16 x i8> %add
-}
-
-declare { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1>)
-
-define void @test2(<16 x i8> %v1, <16 x i8> %v2, <256 x i1>* %ptr) {
-; CHECK-LABEL: test2:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmr v4, v3
-; CHECK-NEXT:    vmr v5, v2
-; CHECK-NEXT:    stxv v4, 16(r7)
-; CHECK-NEXT:    stxv v5, 0(r7)
-; CHECK-NEXT:    blr
-entry:
-  %0 = tail call <256 x i1> @llvm.ppc.mma.assemble.pair(<16 x i8> %v2, <16 x i8> %v1)
-  store <256 x i1> %0, <256 x i1>* %ptr, align 32
-  ret void
-}
-
-declare <256 x i1> @llvm.ppc.mma.assemble.pair(<16 x i8>, <16 x i8>)
-
-define void @test3(<256 x i1>* %ptr) {
-; CHECK-LABEL: test3:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lxvp vsp0, 0(r3)
-; CHECK-NEXT:    stxvp vsp0, 32(r3)
-; CHECK-NEXT:    blr
-entry:
-  %0 = bitcast <256 x i1>* %ptr to i8*
-  %1 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %0)
-  %add.ptr1 = getelementptr inbounds <256 x i1>, <256 x i1>* %ptr, i64 1
-  %2 = bitcast <256 x i1>* %add.ptr1 to i8*
-  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %1, i8* %2)
-  ret void
-}
-
-declare <256 x i1> @llvm.ppc.mma.lxvp(i8*)
-declare void @llvm.ppc.mma.stxvp(<256 x i1>, i8*)

diff  --git a/llvm/test/CodeGen/PowerPC/paired-vector-intrinsics.ll b/llvm/test/CodeGen/PowerPC/paired-vector-intrinsics.ll
new file mode 100644
index 000000000000..8ed63654306c
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/paired-vector-intrinsics.ll
@@ -0,0 +1,357 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O3 \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
+; RUN:   < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O3 \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -mattr=-mma \
+; RUN:   < %s | FileCheck %s --check-prefix=CHECK-NOMMA
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O3 \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
+; RUN:   < %s | FileCheck %s --check-prefix=CHECK-BE
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O3 \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -mattr=-mma \
+; RUN:   < %s | FileCheck %s --check-prefix=CHECK-BE-NOMMA
+
+; This test also checks that the paired vector intrinsics are available even
+; when MMA is disabled.
+
+; assemble_pair
+declare <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8>, <16 x i8>)
+define void @ass_pair(<256 x i1>* %ptr, <16 x i8> %vc) {
+; CHECK-LABEL: ass_pair:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmr v3, v2
+; CHECK-NEXT:    stxv v2, 16(r3)
+; CHECK-NEXT:    stxv v3, 0(r3)
+; CHECK-NEXT:    blr
+;
+; CHECK-NOMMA-LABEL: ass_pair:
+; CHECK-NOMMA:       # %bb.0: # %entry
+; CHECK-NOMMA-NEXT:    vmr v3, v2
+; CHECK-NOMMA-NEXT:    stxv v2, 16(r3)
+; CHECK-NOMMA-NEXT:    stxv v3, 0(r3)
+; CHECK-NOMMA-NEXT:    blr
+;
+; CHECK-BE-LABEL: ass_pair:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    vmr v3, v2
+; CHECK-BE-NEXT:    stxv v2, 16(r3)
+; CHECK-BE-NEXT:    stxv v2, 0(r3)
+; CHECK-BE-NEXT:    blr
+;
+; CHECK-BE-NOMMA-LABEL: ass_pair:
+; CHECK-BE-NOMMA:       # %bb.0: # %entry
+; CHECK-BE-NOMMA-NEXT:    vmr v3, v2
+; CHECK-BE-NOMMA-NEXT:    stxv v2, 16(r3)
+; CHECK-BE-NOMMA-NEXT:    stxv v2, 0(r3)
+; CHECK-BE-NOMMA-NEXT:    blr
+entry:
+  %0 = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %vc, <16 x i8> %vc)
+  store <256 x i1> %0, <256 x i1>* %ptr, align 32
+  ret void
+}
+
+; disassemble_pair
+declare { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1>)
+define void @disass_pair(<256 x i1>* %ptr1, <16 x i8>* %ptr2, <16 x i8>* %ptr3) {
+; CHECK-LABEL: disass_pair:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lxv vs1, 0(r3)
+; CHECK-NEXT:    lxv vs0, 16(r3)
+; CHECK-NEXT:    stxv vs1, 0(r4)
+; CHECK-NEXT:    stxv vs0, 0(r5)
+; CHECK-NEXT:    blr
+;
+; CHECK-NOMMA-LABEL: disass_pair:
+; CHECK-NOMMA:       # %bb.0: # %entry
+; CHECK-NOMMA-NEXT:    lxv vs1, 0(r3)
+; CHECK-NOMMA-NEXT:    lxv vs0, 16(r3)
+; CHECK-NOMMA-NEXT:    stxv vs1, 0(r4)
+; CHECK-NOMMA-NEXT:    stxv vs0, 0(r5)
+; CHECK-NOMMA-NEXT:    blr
+;
+; CHECK-BE-LABEL: disass_pair:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs1, 16(r3)
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    stxv vs0, 0(r4)
+; CHECK-BE-NEXT:    stxv vs1, 0(r5)
+; CHECK-BE-NEXT:    blr
+;
+; CHECK-BE-NOMMA-LABEL: disass_pair:
+; CHECK-BE-NOMMA:       # %bb.0: # %entry
+; CHECK-BE-NOMMA-NEXT:    lxv vs1, 16(r3)
+; CHECK-BE-NOMMA-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NOMMA-NEXT:    stxv vs0, 0(r4)
+; CHECK-BE-NOMMA-NEXT:    stxv vs1, 0(r5)
+; CHECK-BE-NOMMA-NEXT:    blr
+entry:
+  %0 = load <256 x i1>, <256 x i1>* %ptr1, align 32
+  %1 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %0)
+  %2 = extractvalue { <16 x i8>, <16 x i8> } %1, 0
+  %3 = extractvalue { <16 x i8>, <16 x i8> } %1, 1
+  store <16 x i8> %2, <16 x i8>* %ptr2, align 16
+  store <16 x i8> %3, <16 x i8>* %ptr3, align 16
+  ret void
+}
+
+define void @test_ldst_1(<256 x i1>* %vpp, <256 x i1>* %vp2) {
+; CHECK-LABEL: test_ldst_1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lxvp vsp0, 0(r3)
+; CHECK-NEXT:    stxvp vsp0, 0(r4)
+; CHECK-NEXT:    blr
+;
+; CHECK-NOMMA-LABEL: test_ldst_1:
+; CHECK-NOMMA:       # %bb.0: # %entry
+; CHECK-NOMMA-NEXT:    lxvp vsp0, 0(r3)
+; CHECK-NOMMA-NEXT:    stxvp vsp0, 0(r4)
+; CHECK-NOMMA-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_ldst_1:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxvp vsp0, 0(r3)
+; CHECK-BE-NEXT:    stxvp vsp0, 0(r4)
+; CHECK-BE-NEXT:    blr
+;
+; CHECK-BE-NOMMA-LABEL: test_ldst_1:
+; CHECK-BE-NOMMA:       # %bb.0: # %entry
+; CHECK-BE-NOMMA-NEXT:    lxvp vsp0, 0(r3)
+; CHECK-BE-NOMMA-NEXT:    stxvp vsp0, 0(r4)
+; CHECK-BE-NOMMA-NEXT:    blr
+entry:
+  %0 = bitcast <256 x i1>* %vpp to i8*
+  %1 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %0)
+  %2 = bitcast <256 x i1>* %vp2 to i8*
+  tail call void @llvm.ppc.vsx.stxvp(<256 x i1> %1, i8* %2)
+  ret void
+}
+
+declare <256 x i1> @llvm.ppc.vsx.lxvp(i8*)
+declare void @llvm.ppc.vsx.stxvp(<256 x i1>, i8*)
+
+define void @test_ldst_2(<256 x i1>* %vpp, i64 %offset, <256 x i1>* %vp2)  {
+; CHECK-LABEL: test_ldst_2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lxvpx vsp0, r3, r4
+; CHECK-NEXT:    stxvpx vsp0, r5, r4
+; CHECK-NEXT:    blr
+;
+; CHECK-NOMMA-LABEL: test_ldst_2:
+; CHECK-NOMMA:       # %bb.0: # %entry
+; CHECK-NOMMA-NEXT:    lxvpx vsp0, r3, r4
+; CHECK-NOMMA-NEXT:    stxvpx vsp0, r5, r4
+; CHECK-NOMMA-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_ldst_2:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxvpx vsp0, r3, r4
+; CHECK-BE-NEXT:    stxvpx vsp0, r5, r4
+; CHECK-BE-NEXT:    blr
+;
+; CHECK-BE-NOMMA-LABEL: test_ldst_2:
+; CHECK-BE-NOMMA:       # %bb.0: # %entry
+; CHECK-BE-NOMMA-NEXT:    lxvpx vsp0, r3, r4
+; CHECK-BE-NOMMA-NEXT:    stxvpx vsp0, r5, r4
+; CHECK-BE-NOMMA-NEXT:    blr
+entry:
+  %0 = bitcast <256 x i1>* %vpp to i8*
+  %1 = getelementptr i8, i8* %0, i64 %offset
+  %2 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %1)
+  %3 = bitcast <256 x i1>* %vp2 to i8*
+  %4 = getelementptr i8, i8* %3, i64 %offset
+  tail call void @llvm.ppc.vsx.stxvp(<256 x i1> %2, i8* %4)
+  ret void
+}
+
+define void @test_ldst_3(<256 x i1>* %vpp, <256 x i1>* %vp2)  {
+; CHECK-LABEL: test_ldst_3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    li r5, 18
+; CHECK-NEXT:    lxvpx vsp0, r3, r5
+; CHECK-NEXT:    stxvpx vsp0, r4, r5
+; CHECK-NEXT:    blr
+;
+; CHECK-NOMMA-LABEL: test_ldst_3:
+; CHECK-NOMMA:       # %bb.0: # %entry
+; CHECK-NOMMA-NEXT:    li r5, 18
+; CHECK-NOMMA-NEXT:    lxvpx vsp0, r3, r5
+; CHECK-NOMMA-NEXT:    stxvpx vsp0, r4, r5
+; CHECK-NOMMA-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_ldst_3:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    li r5, 18
+; CHECK-BE-NEXT:    lxvpx vsp0, r3, r5
+; CHECK-BE-NEXT:    stxvpx vsp0, r4, r5
+; CHECK-BE-NEXT:    blr
+;
+; CHECK-BE-NOMMA-LABEL: test_ldst_3:
+; CHECK-BE-NOMMA:       # %bb.0: # %entry
+; CHECK-BE-NOMMA-NEXT:    li r5, 18
+; CHECK-BE-NOMMA-NEXT:    lxvpx vsp0, r3, r5
+; CHECK-BE-NOMMA-NEXT:    stxvpx vsp0, r4, r5
+; CHECK-BE-NOMMA-NEXT:    blr
+entry:
+  %0 = bitcast <256 x i1>* %vpp to i8*
+  %1 = getelementptr i8, i8* %0, i64 18
+  %2 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %1)
+  %3 = bitcast <256 x i1>* %vp2 to i8*
+  %4 = getelementptr i8, i8* %3, i64 18
+  tail call void @llvm.ppc.vsx.stxvp(<256 x i1> %2, i8* %4)
+  ret void
+}
+
+define void @test_ldst_4(<256 x i1>* %vpp, <256 x i1>* %vp2)  {
+; CHECK-LABEL: test_ldst_4:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    li r5, 1
+; CHECK-NEXT:    lxvpx vsp0, r3, r5
+; CHECK-NEXT:    stxvpx vsp0, r4, r5
+; CHECK-NEXT:    blr
+;
+; CHECK-NOMMA-LABEL: test_ldst_4:
+; CHECK-NOMMA:       # %bb.0: # %entry
+; CHECK-NOMMA-NEXT:    li r5, 1
+; CHECK-NOMMA-NEXT:    lxvpx vsp0, r3, r5
+; CHECK-NOMMA-NEXT:    stxvpx vsp0, r4, r5
+; CHECK-NOMMA-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_ldst_4:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    li r5, 1
+; CHECK-BE-NEXT:    lxvpx vsp0, r3, r5
+; CHECK-BE-NEXT:    stxvpx vsp0, r4, r5
+; CHECK-BE-NEXT:    blr
+;
+; CHECK-BE-NOMMA-LABEL: test_ldst_4:
+; CHECK-BE-NOMMA:       # %bb.0: # %entry
+; CHECK-BE-NOMMA-NEXT:    li r5, 1
+; CHECK-BE-NOMMA-NEXT:    lxvpx vsp0, r3, r5
+; CHECK-BE-NOMMA-NEXT:    stxvpx vsp0, r4, r5
+; CHECK-BE-NOMMA-NEXT:    blr
+entry:
+  %0 = bitcast <256 x i1>* %vpp to i8*
+  %1 = getelementptr i8, i8* %0, i64 1
+  %2 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %1)
+  %3 = bitcast <256 x i1>* %vp2 to i8*
+  %4 = getelementptr i8, i8* %3, i64 1
+  tail call void @llvm.ppc.vsx.stxvp(<256 x i1> %2, i8* %4)
+  ret void
+}
+
+define void @test_ldst_5(<256 x i1>* %vpp, <256 x i1>* %vp2)  {
+; CHECK-LABEL: test_ldst_5:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    li r5, 42
+; CHECK-NEXT:    lxvpx vsp0, r3, r5
+; CHECK-NEXT:    stxvpx vsp0, r4, r5
+; CHECK-NEXT:    blr
+;
+; CHECK-NOMMA-LABEL: test_ldst_5:
+; CHECK-NOMMA:       # %bb.0: # %entry
+; CHECK-NOMMA-NEXT:    li r5, 42
+; CHECK-NOMMA-NEXT:    lxvpx vsp0, r3, r5
+; CHECK-NOMMA-NEXT:    stxvpx vsp0, r4, r5
+; CHECK-NOMMA-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_ldst_5:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    li r5, 42
+; CHECK-BE-NEXT:    lxvpx vsp0, r3, r5
+; CHECK-BE-NEXT:    stxvpx vsp0, r4, r5
+; CHECK-BE-NEXT:    blr
+;
+; CHECK-BE-NOMMA-LABEL: test_ldst_5:
+; CHECK-BE-NOMMA:       # %bb.0: # %entry
+; CHECK-BE-NOMMA-NEXT:    li r5, 42
+; CHECK-BE-NOMMA-NEXT:    lxvpx vsp0, r3, r5
+; CHECK-BE-NOMMA-NEXT:    stxvpx vsp0, r4, r5
+; CHECK-BE-NOMMA-NEXT:    blr
+entry:
+  %0 = bitcast <256 x i1>* %vpp to i8*
+  %1 = getelementptr i8, i8* %0, i64 42
+  %2 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %1)
+  %3 = bitcast <256 x i1>* %vp2 to i8*
+  %4 = getelementptr i8, i8* %3, i64 42
+  tail call void @llvm.ppc.vsx.stxvp(<256 x i1> %2, i8* %4)
+  ret void
+}
+
+define void @test_ldst_6(<256 x i1>* %vpp, <256 x i1>* %vp2)  {
+; CHECK-LABEL: test_ldst_6:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lxvp vsp0, 4096(r3)
+; CHECK-NEXT:    stxvp vsp0, 4096(r4)
+; CHECK-NEXT:    blr
+;
+; CHECK-NOMMA-LABEL: test_ldst_6:
+; CHECK-NOMMA:       # %bb.0: # %entry
+; CHECK-NOMMA-NEXT:    lxvp vsp0, 4096(r3)
+; CHECK-NOMMA-NEXT:    stxvp vsp0, 4096(r4)
+; CHECK-NOMMA-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_ldst_6:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxvp vsp0, 4096(r3)
+; CHECK-BE-NEXT:    stxvp vsp0, 4096(r4)
+; CHECK-BE-NEXT:    blr
+;
+; CHECK-BE-NOMMA-LABEL: test_ldst_6:
+; CHECK-BE-NOMMA:       # %bb.0: # %entry
+; CHECK-BE-NOMMA-NEXT:    lxvp vsp0, 4096(r3)
+; CHECK-BE-NOMMA-NEXT:    stxvp vsp0, 4096(r4)
+; CHECK-BE-NOMMA-NEXT:    blr
+entry:
+  %0 = getelementptr <256 x i1>, <256 x i1>* %vpp, i64 128
+  %1 = bitcast <256 x i1>* %0 to i8*
+  %2 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %1)
+  %3 = getelementptr <256 x i1>, <256 x i1>* %vp2, i64 128
+  %4 = bitcast <256 x i1>* %3 to i8*
+  tail call void @llvm.ppc.vsx.stxvp(<256 x i1> %2, i8* %4)
+  ret void
+}
+
+define void @test_ldst_7(<256 x i1>* %vpp, <256 x i1>* %vp2)  {
+; FIXME: A prefixed load (plxvp) is expected here as the offset in this
+; test case is a constant that fits within 34-bits.
+; CHECK-LABEL: test_ldst_7:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    li r5, 0
+; CHECK-NEXT:    ori r5, r5, 32799
+; CHECK-NEXT:    lxvpx vsp0, r3, r5
+; CHECK-NEXT:    stxvpx vsp0, r4, r5
+; CHECK-NEXT:    blr
+;
+; CHECK-NOMMA-LABEL: test_ldst_7:
+; CHECK-NOMMA:       # %bb.0: # %entry
+; CHECK-NOMMA-NEXT:    li r5, 0
+; CHECK-NOMMA-NEXT:    ori r5, r5, 32799
+; CHECK-NOMMA-NEXT:    lxvpx vsp0, r3, r5
+; CHECK-NOMMA-NEXT:    stxvpx vsp0, r4, r5
+; CHECK-NOMMA-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_ldst_7:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    li r5, 0
+; CHECK-BE-NEXT:    ori r5, r5, 32799
+; CHECK-BE-NEXT:    lxvpx vsp0, r3, r5
+; CHECK-BE-NEXT:    stxvpx vsp0, r4, r5
+; CHECK-BE-NEXT:    blr
+;
+; CHECK-BE-NOMMA-LABEL: test_ldst_7:
+; CHECK-BE-NOMMA:       # %bb.0: # %entry
+; CHECK-BE-NOMMA-NEXT:    li r5, 0
+; CHECK-BE-NOMMA-NEXT:    ori r5, r5, 32799
+; CHECK-BE-NOMMA-NEXT:    lxvpx vsp0, r3, r5
+; CHECK-BE-NOMMA-NEXT:    stxvpx vsp0, r4, r5
+; CHECK-BE-NOMMA-NEXT:    blr
+entry:
+  %0 = bitcast <256 x i1>* %vpp to i8*
+  %1 = getelementptr i8, i8* %0, i64 32799
+  %2 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %1)
+  %3 = bitcast <256 x i1>* %vp2 to i8*
+  %4 = getelementptr i8, i8* %3, i64 32799
+  tail call void @llvm.ppc.vsx.stxvp(<256 x i1> %2, i8* %4)
+  ret void
+}