[clang] 25bbff6 - [PowerPC] Provide MMA builtins for compatibility
Ahsan Saghir via cfe-commits
cfe-commits at lists.llvm.org
Fri May 7 07:10:29 PDT 2021
Author: Ahsan Saghir
Date: 2021-05-07T09:10:16-05:00
New Revision: 25bbff632d018d178272a61c0732203d53d3a2e3
URL: https://github.com/llvm/llvm-project/commit/25bbff632d018d178272a61c0732203d53d3a2e3
DIFF: https://github.com/llvm/llvm-project/commit/25bbff632d018d178272a61c0732203d53d3a2e3.diff
LOG: [PowerPC] Provide MMA builtins for compatibility
Vector pair intrinsics and builtins were renamed in
https://reviews.llvm.org/D91974 to replace the _mma_ prefix by _vsx_.
However, some projects used the _mma_ version, so this patch adds
these intrinsics to provide compatibility.
Fixes Bugzilla: https://bugs.llvm.org/show_bug.cgi?id=50159
Reviewed By: nemanjai, amyk
Differential Revision: https://reviews.llvm.org/D100482
Added:
Modified:
clang/include/clang/Basic/BuiltinsPPC.def
clang/lib/CodeGen/CGBuiltin.cpp
clang/lib/Sema/SemaChecking.cpp
clang/test/CodeGen/builtins-ppc-pair-mma.c
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def
index 7b083a9333e2..f47c9b097ae3 100644
--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -20,11 +20,15 @@
// MMA builtins that are using their own format documented below.
#if defined(BUILTIN) && !defined(CUSTOM_BUILTIN)
-# define CUSTOM_BUILTIN(ID, TYPES, ACCUMULATE) BUILTIN(__builtin_##ID, "i.", "t")
+# define CUSTOM_BUILTIN(ID, INTR, TYPES, ACCUMULATE) \
+ BUILTIN(__builtin_##ID, "i.", "t")
#elif defined(CUSTOM_BUILTIN) && !defined(BUILTIN)
# define BUILTIN(ID, TYPES, ATTRS)
#endif
+#define UNALIASED_CUSTOM_BUILTIN(ID, TYPES, ACCUMULATE) \
+ CUSTOM_BUILTIN(ID, ID, TYPES, ACCUMULATE)
+
BUILTIN(__builtin_ppc_get_timebase, "ULLi", "n")
// This is just a placeholder, the types and attributes are wrong.
@@ -674,90 +678,103 @@ BUILTIN(__builtin_dcbf, "vvC*", "")
// Because these built-ins rely on target-dependent types and to avoid pervasive
// change, they are type checked manually in Sema using custom type descriptors.
// The first argument of the CUSTOM_BUILTIN macro is the name of the built-in
-// with its prefix, the second argument specifies the type of the function
+// with its prefix, the second argument is the name of the intrinsic this
+// built-in generates, the third argument specifies the type of the function
// (result value, then each argument) as follows:
// i -> Unsigned integer followed by the greatest possible value for that
// argument or 0 if no constraint on the value.
// (e.g. i15 for a 4-bits value)
-// V -> Vector type used with MMA builtins (vector unsigned char)
+// V -> Vector type used with MMA built-ins (vector unsigned char)
// W -> PPC Vector type followed by the size of the vector type.
// (e.g. W512 for __vector_quad)
// any other descriptor -> Fall back to generic type descriptor decoding.
// The 'C' suffix can be used as a suffix to specify the const type.
// The '*' suffix can be used as a suffix to specify a pointer to a type.
-// The third argument is set to true if the builtin accumulates its result into
+// The fourth argument is set to true if the built-in accumulates its result into
// its given accumulator.
-CUSTOM_BUILTIN(vsx_lxvp, "W256SLLiW256C*", false)
-CUSTOM_BUILTIN(vsx_stxvp, "vW256SLLiW256C*", false)
-CUSTOM_BUILTIN(vsx_assemble_pair, "vW256*VV", false)
-CUSTOM_BUILTIN(vsx_disassemble_pair, "vv*W256*", false)
-
-CUSTOM_BUILTIN(mma_assemble_acc, "vW512*VVVV", false)
-CUSTOM_BUILTIN(mma_disassemble_acc, "vv*W512*", false)
-CUSTOM_BUILTIN(mma_xxmtacc, "vW512*", true)
-CUSTOM_BUILTIN(mma_xxmfacc, "vW512*", true)
-CUSTOM_BUILTIN(mma_xxsetaccz, "vW512*", false)
-CUSTOM_BUILTIN(mma_xvi4ger8, "vW512*VV", false)
-CUSTOM_BUILTIN(mma_xvi8ger4, "vW512*VV", false)
-CUSTOM_BUILTIN(mma_xvi16ger2, "vW512*VV", false)
-CUSTOM_BUILTIN(mma_xvi16ger2s, "vW512*VV", false)
-CUSTOM_BUILTIN(mma_xvf16ger2, "vW512*VV", false)
-CUSTOM_BUILTIN(mma_xvf32ger, "vW512*VV", false)
-CUSTOM_BUILTIN(mma_xvf64ger, "vW512*W256V", false)
-CUSTOM_BUILTIN(mma_pmxvi4ger8, "vW512*VVi15i15i255", false)
-CUSTOM_BUILTIN(mma_pmxvi8ger4, "vW512*VVi15i15i15", false)
-CUSTOM_BUILTIN(mma_pmxvi16ger2, "vW512*VVi15i15i3", false)
-CUSTOM_BUILTIN(mma_pmxvi16ger2s, "vW512*VVi15i15i3", false)
-CUSTOM_BUILTIN(mma_pmxvf16ger2, "vW512*VVi15i15i3", false)
-CUSTOM_BUILTIN(mma_pmxvf32ger, "vW512*VVi15i15", false)
-CUSTOM_BUILTIN(mma_pmxvf64ger, "vW512*W256Vi15i3", false)
-CUSTOM_BUILTIN(mma_xvi4ger8pp, "vW512*VV", true)
-CUSTOM_BUILTIN(mma_xvi8ger4pp, "vW512*VV", true)
-CUSTOM_BUILTIN(mma_xvi8ger4spp, "vW512*VV", true)
-CUSTOM_BUILTIN(mma_xvi16ger2pp, "vW512*VV", true)
-CUSTOM_BUILTIN(mma_xvi16ger2spp, "vW512*VV", true)
-CUSTOM_BUILTIN(mma_pmxvi4ger8pp, "vW512*VVi15i15i255", true)
-CUSTOM_BUILTIN(mma_pmxvi8ger4pp, "vW512*VVi15i15i15", true)
-CUSTOM_BUILTIN(mma_pmxvi8ger4spp, "vW512*VVi15i15i15", true)
-CUSTOM_BUILTIN(mma_pmxvi16ger2pp, "vW512*VVi15i15i3", true)
-CUSTOM_BUILTIN(mma_pmxvi16ger2spp, "vW512*VVi15i15i3", true)
-CUSTOM_BUILTIN(mma_xvf16ger2pp, "vW512*VV", true)
-CUSTOM_BUILTIN(mma_xvf16ger2pn, "vW512*VV", true)
-CUSTOM_BUILTIN(mma_xvf16ger2np, "vW512*VV", true)
-CUSTOM_BUILTIN(mma_xvf16ger2nn, "vW512*VV", true)
-CUSTOM_BUILTIN(mma_pmxvf16ger2pp, "vW512*VVi15i15i3", true)
-CUSTOM_BUILTIN(mma_pmxvf16ger2pn, "vW512*VVi15i15i3", true)
-CUSTOM_BUILTIN(mma_pmxvf16ger2np, "vW512*VVi15i15i3", true)
-CUSTOM_BUILTIN(mma_pmxvf16ger2nn, "vW512*VVi15i15i3", true)
-CUSTOM_BUILTIN(mma_xvf32gerpp, "vW512*VV", true)
-CUSTOM_BUILTIN(mma_xvf32gerpn, "vW512*VV", true)
-CUSTOM_BUILTIN(mma_xvf32gernp, "vW512*VV", true)
-CUSTOM_BUILTIN(mma_xvf32gernn, "vW512*VV", true)
-CUSTOM_BUILTIN(mma_pmxvf32gerpp, "vW512*VVi15i15", true)
-CUSTOM_BUILTIN(mma_pmxvf32gerpn, "vW512*VVi15i15", true)
-CUSTOM_BUILTIN(mma_pmxvf32gernp, "vW512*VVi15i15", true)
-CUSTOM_BUILTIN(mma_pmxvf32gernn, "vW512*VVi15i15", true)
-CUSTOM_BUILTIN(mma_xvf64gerpp, "vW512*W256V", true)
-CUSTOM_BUILTIN(mma_xvf64gerpn, "vW512*W256V", true)
-CUSTOM_BUILTIN(mma_xvf64gernp, "vW512*W256V", true)
-CUSTOM_BUILTIN(mma_xvf64gernn, "vW512*W256V", true)
-CUSTOM_BUILTIN(mma_pmxvf64gerpp, "vW512*W256Vi15i3", true)
-CUSTOM_BUILTIN(mma_pmxvf64gerpn, "vW512*W256Vi15i3", true)
-CUSTOM_BUILTIN(mma_pmxvf64gernp, "vW512*W256Vi15i3", true)
-CUSTOM_BUILTIN(mma_pmxvf64gernn, "vW512*W256Vi15i3", true)
-CUSTOM_BUILTIN(mma_xvbf16ger2, "vW512*VV", false)
-CUSTOM_BUILTIN(mma_pmxvbf16ger2, "vW512*VVi15i15i3", false)
-CUSTOM_BUILTIN(mma_xvbf16ger2pp, "vW512*VV", true)
-CUSTOM_BUILTIN(mma_xvbf16ger2pn, "vW512*VV", true)
-CUSTOM_BUILTIN(mma_xvbf16ger2np, "vW512*VV", true)
-CUSTOM_BUILTIN(mma_xvbf16ger2nn, "vW512*VV", true)
-CUSTOM_BUILTIN(mma_pmxvbf16ger2pp, "vW512*VVi15i15i3", true)
-CUSTOM_BUILTIN(mma_pmxvbf16ger2pn, "vW512*VVi15i15i3", true)
-CUSTOM_BUILTIN(mma_pmxvbf16ger2np, "vW512*VVi15i15i3", true)
-CUSTOM_BUILTIN(mma_pmxvbf16ger2nn, "vW512*VVi15i15i3", true)
+// Provided builtins with _mma_ prefix for compatibility.
+CUSTOM_BUILTIN(mma_lxvp, vsx_lxvp, "W256SLLiW256C*", false)
+CUSTOM_BUILTIN(mma_stxvp, vsx_stxvp, "vW256SLLiW256C*", false)
+CUSTOM_BUILTIN(mma_assemble_pair, vsx_assemble_pair, "vW256*VV", false)
+CUSTOM_BUILTIN(mma_disassemble_pair, vsx_disassemble_pair, "vv*W256*", false)
+
+// UNALIASED_CUSTOM_BUILTIN macro is used for built-ins that have
+// the same name as that of the intrinsic they generate, i.e. the
+// ID and INTR are the same.
+// This avoids repeating the ID and INTR in the macro expression.
+
+UNALIASED_CUSTOM_BUILTIN(vsx_lxvp, "W256SLLiW256C*", false)
+UNALIASED_CUSTOM_BUILTIN(vsx_stxvp, "vW256SLLiW256C*", false)
+UNALIASED_CUSTOM_BUILTIN(vsx_assemble_pair, "vW256*VV", false)
+UNALIASED_CUSTOM_BUILTIN(vsx_disassemble_pair, "vv*W256*", false)
+
+UNALIASED_CUSTOM_BUILTIN(mma_assemble_acc, "vW512*VVVV", false)
+UNALIASED_CUSTOM_BUILTIN(mma_disassemble_acc, "vv*W512*", false)
+UNALIASED_CUSTOM_BUILTIN(mma_xxmtacc, "vW512*", true)
+UNALIASED_CUSTOM_BUILTIN(mma_xxmfacc, "vW512*", true)
+UNALIASED_CUSTOM_BUILTIN(mma_xxsetaccz, "vW512*", false)
+UNALIASED_CUSTOM_BUILTIN(mma_xvi4ger8, "vW512*VV", false)
+UNALIASED_CUSTOM_BUILTIN(mma_xvi8ger4, "vW512*VV", false)
+UNALIASED_CUSTOM_BUILTIN(mma_xvi16ger2, "vW512*VV", false)
+UNALIASED_CUSTOM_BUILTIN(mma_xvi16ger2s, "vW512*VV", false)
+UNALIASED_CUSTOM_BUILTIN(mma_xvf16ger2, "vW512*VV", false)
+UNALIASED_CUSTOM_BUILTIN(mma_xvf32ger, "vW512*VV", false)
+UNALIASED_CUSTOM_BUILTIN(mma_xvf64ger, "vW512*W256V", false)
+UNALIASED_CUSTOM_BUILTIN(mma_pmxvi4ger8, "vW512*VVi15i15i255", false)
+UNALIASED_CUSTOM_BUILTIN(mma_pmxvi8ger4, "vW512*VVi15i15i15", false)
+UNALIASED_CUSTOM_BUILTIN(mma_pmxvi16ger2, "vW512*VVi15i15i3", false)
+UNALIASED_CUSTOM_BUILTIN(mma_pmxvi16ger2s, "vW512*VVi15i15i3", false)
+UNALIASED_CUSTOM_BUILTIN(mma_pmxvf16ger2, "vW512*VVi15i15i3", false)
+UNALIASED_CUSTOM_BUILTIN(mma_pmxvf32ger, "vW512*VVi15i15", false)
+UNALIASED_CUSTOM_BUILTIN(mma_pmxvf64ger, "vW512*W256Vi15i3", false)
+UNALIASED_CUSTOM_BUILTIN(mma_xvi4ger8pp, "vW512*VV", true)
+UNALIASED_CUSTOM_BUILTIN(mma_xvi8ger4pp, "vW512*VV", true)
+UNALIASED_CUSTOM_BUILTIN(mma_xvi8ger4spp, "vW512*VV", true)
+UNALIASED_CUSTOM_BUILTIN(mma_xvi16ger2pp, "vW512*VV", true)
+UNALIASED_CUSTOM_BUILTIN(mma_xvi16ger2spp, "vW512*VV", true)
+UNALIASED_CUSTOM_BUILTIN(mma_pmxvi4ger8pp, "vW512*VVi15i15i255", true)
+UNALIASED_CUSTOM_BUILTIN(mma_pmxvi8ger4pp, "vW512*VVi15i15i15", true)
+UNALIASED_CUSTOM_BUILTIN(mma_pmxvi8ger4spp, "vW512*VVi15i15i15", true)
+UNALIASED_CUSTOM_BUILTIN(mma_pmxvi16ger2pp, "vW512*VVi15i15i3", true)
+UNALIASED_CUSTOM_BUILTIN(mma_pmxvi16ger2spp, "vW512*VVi15i15i3", true)
+UNALIASED_CUSTOM_BUILTIN(mma_xvf16ger2pp, "vW512*VV", true)
+UNALIASED_CUSTOM_BUILTIN(mma_xvf16ger2pn, "vW512*VV", true)
+UNALIASED_CUSTOM_BUILTIN(mma_xvf16ger2np, "vW512*VV", true)
+UNALIASED_CUSTOM_BUILTIN(mma_xvf16ger2nn, "vW512*VV", true)
+UNALIASED_CUSTOM_BUILTIN(mma_pmxvf16ger2pp, "vW512*VVi15i15i3", true)
+UNALIASED_CUSTOM_BUILTIN(mma_pmxvf16ger2pn, "vW512*VVi15i15i3", true)
+UNALIASED_CUSTOM_BUILTIN(mma_pmxvf16ger2np, "vW512*VVi15i15i3", true)
+UNALIASED_CUSTOM_BUILTIN(mma_pmxvf16ger2nn, "vW512*VVi15i15i3", true)
+UNALIASED_CUSTOM_BUILTIN(mma_xvf32gerpp, "vW512*VV", true)
+UNALIASED_CUSTOM_BUILTIN(mma_xvf32gerpn, "vW512*VV", true)
+UNALIASED_CUSTOM_BUILTIN(mma_xvf32gernp, "vW512*VV", true)
+UNALIASED_CUSTOM_BUILTIN(mma_xvf32gernn, "vW512*VV", true)
+UNALIASED_CUSTOM_BUILTIN(mma_pmxvf32gerpp, "vW512*VVi15i15", true)
+UNALIASED_CUSTOM_BUILTIN(mma_pmxvf32gerpn, "vW512*VVi15i15", true)
+UNALIASED_CUSTOM_BUILTIN(mma_pmxvf32gernp, "vW512*VVi15i15", true)
+UNALIASED_CUSTOM_BUILTIN(mma_pmxvf32gernn, "vW512*VVi15i15", true)
+UNALIASED_CUSTOM_BUILTIN(mma_xvf64gerpp, "vW512*W256V", true)
+UNALIASED_CUSTOM_BUILTIN(mma_xvf64gerpn, "vW512*W256V", true)
+UNALIASED_CUSTOM_BUILTIN(mma_xvf64gernp, "vW512*W256V", true)
+UNALIASED_CUSTOM_BUILTIN(mma_xvf64gernn, "vW512*W256V", true)
+UNALIASED_CUSTOM_BUILTIN(mma_pmxvf64gerpp, "vW512*W256Vi15i3", true)
+UNALIASED_CUSTOM_BUILTIN(mma_pmxvf64gerpn, "vW512*W256Vi15i3", true)
+UNALIASED_CUSTOM_BUILTIN(mma_pmxvf64gernp, "vW512*W256Vi15i3", true)
+UNALIASED_CUSTOM_BUILTIN(mma_pmxvf64gernn, "vW512*W256Vi15i3", true)
+UNALIASED_CUSTOM_BUILTIN(mma_xvbf16ger2, "vW512*VV", false)
+UNALIASED_CUSTOM_BUILTIN(mma_pmxvbf16ger2, "vW512*VVi15i15i3", false)
+UNALIASED_CUSTOM_BUILTIN(mma_xvbf16ger2pp, "vW512*VV", true)
+UNALIASED_CUSTOM_BUILTIN(mma_xvbf16ger2pn, "vW512*VV", true)
+UNALIASED_CUSTOM_BUILTIN(mma_xvbf16ger2np, "vW512*VV", true)
+UNALIASED_CUSTOM_BUILTIN(mma_xvbf16ger2nn, "vW512*VV", true)
+UNALIASED_CUSTOM_BUILTIN(mma_pmxvbf16ger2pp, "vW512*VVi15i15i3", true)
+UNALIASED_CUSTOM_BUILTIN(mma_pmxvbf16ger2pn, "vW512*VVi15i15i3", true)
+UNALIASED_CUSTOM_BUILTIN(mma_pmxvbf16ger2np, "vW512*VVi15i15i3", true)
+UNALIASED_CUSTOM_BUILTIN(mma_pmxvbf16ger2nn, "vW512*VVi15i15i3", true)
// FIXME: Obviously incomplete.
#undef BUILTIN
#undef CUSTOM_BUILTIN
+#undef UNALIASED_CUSTOM_BUILTIN
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index d1d248263d18..1c56089974d3 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -15349,7 +15349,7 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
// use custom code generation to expand a builtin call with a pointer to a
// load (if the corresponding instruction accumulates its result) followed by
// the call to the intrinsic and a store of the result.
-#define CUSTOM_BUILTIN(Name, Types, Accumulate) \
+#define CUSTOM_BUILTIN(Name, Intr, Types, Accumulate) \
case PPC::BI__builtin_##Name:
#include "clang/Basic/BuiltinsPPC.def"
{
@@ -15358,7 +15358,8 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
// return values. So, here we emit code extracting these values from the
// intrinsic results and storing them using that pointer.
if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc ||
- BuiltinID == PPC::BI__builtin_vsx_disassemble_pair) {
+ BuiltinID == PPC::BI__builtin_vsx_disassemble_pair ||
+ BuiltinID == PPC::BI__builtin_mma_disassemble_pair) {
unsigned NumVecs = 2;
auto Intrinsic = Intrinsic::ppc_vsx_disassemble_pair;
if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc) {
@@ -15381,16 +15382,19 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
}
bool Accumulate;
switch (BuiltinID) {
- #define CUSTOM_BUILTIN(Name, Types, Acc) \
+ #define CUSTOM_BUILTIN(Name, Intr, Types, Acc) \
case PPC::BI__builtin_##Name: \
- ID = Intrinsic::ppc_##Name; \
+ ID = Intrinsic::ppc_##Intr; \
Accumulate = Acc; \
break;
#include "clang/Basic/BuiltinsPPC.def"
}
if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
- BuiltinID == PPC::BI__builtin_vsx_stxvp) {
- if (BuiltinID == PPC::BI__builtin_vsx_lxvp) {
+ BuiltinID == PPC::BI__builtin_vsx_stxvp ||
+ BuiltinID == PPC::BI__builtin_mma_lxvp ||
+ BuiltinID == PPC::BI__builtin_mma_stxvp) {
+ if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
+ BuiltinID == PPC::BI__builtin_mma_lxvp) {
Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]);
} else {
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 415773e62ad5..fec02b85a03b 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3331,7 +3331,7 @@ bool Sema::CheckPPCBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
return SemaBuiltinConstantArgRange(TheCall, 2, 0, 7);
case PPC::BI__builtin_vsx_xxpermx:
return SemaBuiltinConstantArgRange(TheCall, 3, 0, 7);
-#define CUSTOM_BUILTIN(Name, Types, Acc) \
+#define CUSTOM_BUILTIN(Name, Intr, Types, Acc) \
case PPC::BI__builtin_##Name: \
return SemaBuiltinPPCMMACall(TheCall, Types);
#include "clang/Basic/BuiltinsPPC.def"
diff --git a/clang/test/CodeGen/builtins-ppc-pair-mma.c b/clang/test/CodeGen/builtins-ppc-pair-mma.c
index 324614ecac20..472da0393b51 100644
--- a/clang/test/CodeGen/builtins-ppc-pair-mma.c
+++ b/clang/test/CodeGen/builtins-ppc-pair-mma.c
@@ -5,7 +5,7 @@
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], <16 x i8> [[VC]], <16 x i8> [[VC]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2:![0-9]+]]
// CHECK-NEXT: ret void
//
void test1(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -46,7 +46,7 @@ void test2(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsi
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <256 x i1>*
-// CHECK-NEXT: store <256 x i1> [[TMP0]], <256 x i1>* [[TMP1]], align 32, !tbaa !6
+// CHECK-NEXT: store <256 x i1> [[TMP0]], <256 x i1>* [[TMP1]], align 32, !tbaa [[TBAA6:![0-9]+]]
// CHECK-NEXT: ret void
//
void test3(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -78,10 +78,10 @@ void test4(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsi
// CHECK-LABEL: @test5(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
-// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xxmtacc(<512 x i1> [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test5(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -94,10 +94,10 @@ void test5(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsi
// CHECK-LABEL: @test6(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
-// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xxmfacc(<512 x i1> [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test6(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -111,7 +111,7 @@ void test6(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsi
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz()
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test7(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -125,7 +125,7 @@ void test7(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsi
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test8(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -139,7 +139,7 @@ void test8(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsi
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test9(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -153,7 +153,7 @@ void test9(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsi
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test10(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -167,7 +167,7 @@ void test10(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2s(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test11(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -181,7 +181,7 @@ void test11(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test12(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -195,7 +195,7 @@ void test12(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32ger(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test13(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -208,10 +208,10 @@ void test13(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-LABEL: @test14(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>*
-// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, <256 x i1>* [[TMP0]], align 32, !tbaa !6
+// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, <256 x i1>* [[TMP0]], align 32, !tbaa [[TBAA6]]
// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test14(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -225,7 +225,7 @@ void test14(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi4ger8(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test15(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -239,7 +239,7 @@ void test15(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test16(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -253,7 +253,7 @@ void test16(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test17(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -267,7 +267,7 @@ void test17(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2s(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test18(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -281,7 +281,7 @@ void test18(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test19(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -295,7 +295,7 @@ void test19(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32ger(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0)
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test20(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -308,10 +308,10 @@ void test20(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-LABEL: @test21(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>*
-// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, <256 x i1>* [[TMP0]], align 32, !tbaa !6
+// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, <256 x i1>* [[TMP0]], align 32, !tbaa [[TBAA6]]
// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64ger(<256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0)
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test21(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -324,10 +324,10 @@ void test21(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-LABEL: @test22(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
-// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test22(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -340,10 +340,10 @@ void test22(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-LABEL: @test23(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
-// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test23(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -356,10 +356,10 @@ void test23(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-LABEL: @test24(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
-// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4spp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test24(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -372,10 +372,10 @@ void test24(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-LABEL: @test25(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
-// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test25(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -388,10 +388,10 @@ void test25(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-LABEL: @test26(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
-// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2spp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test26(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -404,10 +404,10 @@ void test26(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-LABEL: @test27(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
-// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi4ger8pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test27(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -420,10 +420,10 @@ void test27(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-LABEL: @test28(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
-// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test28(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -436,10 +436,10 @@ void test28(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-LABEL: @test29(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
-// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4spp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test29(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -452,10 +452,10 @@ void test29(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-LABEL: @test30(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
-// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test30(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -468,10 +468,10 @@ void test30(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-LABEL: @test31(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
-// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2spp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test31(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -484,10 +484,10 @@ void test31(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-LABEL: @test32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
-// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test32(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -500,10 +500,10 @@ void test32(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-LABEL: @test33(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
-// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test33(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -516,10 +516,10 @@ void test33(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-LABEL: @test34(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
-// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2np(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test34(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -532,10 +532,10 @@ void test34(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-LABEL: @test35(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
-// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2nn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test35(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -548,10 +548,10 @@ void test35(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-LABEL: @test36(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
-// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test36(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -564,10 +564,10 @@ void test36(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-LABEL: @test37(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
-// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test37(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -580,10 +580,10 @@ void test37(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-LABEL: @test38(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
-// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2np(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test38(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -596,10 +596,10 @@ void test38(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-LABEL: @test39(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
-// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2nn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test39(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -612,10 +612,10 @@ void test39(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-LABEL: @test40(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
-// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test40(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -628,10 +628,10 @@ void test40(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-LABEL: @test41(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
-// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test41(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -644,10 +644,10 @@ void test41(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-LABEL: @test42(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
-// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test42(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -660,10 +660,10 @@ void test42(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-LABEL: @test43(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
-// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gernn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test43(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -676,10 +676,10 @@ void test43(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-LABEL: @test44(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
-// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gerpp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0)
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test44(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -692,10 +692,10 @@ void test44(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-LABEL: @test45(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
-// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gerpn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0)
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test45(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -708,10 +708,10 @@ void test45(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-LABEL: @test46(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
-// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gernp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0)
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test46(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -724,10 +724,10 @@ void test46(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-LABEL: @test47(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
-// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gernn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0)
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test47(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -740,12 +740,12 @@ void test47(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-LABEL: @test48(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
-// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>*
-// CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa !6
+// CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa [[TBAA6]]
// CHECK-NEXT: [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]])
// CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test48(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -758,12 +758,12 @@ void test48(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-LABEL: @test49(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
-// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>*
-// CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa !6
+// CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa [[TBAA6]]
// CHECK-NEXT: [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpn(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]])
// CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test49(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -776,12 +776,12 @@ void test49(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-LABEL: @test50(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
-// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>*
-// CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa !6
+// CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa [[TBAA6]]
// CHECK-NEXT: [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]])
// CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test50(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -794,12 +794,12 @@ void test50(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-LABEL: @test51(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
-// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>*
-// CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa !6
+// CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa [[TBAA6]]
// CHECK-NEXT: [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernn(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]])
// CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test51(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -812,12 +812,12 @@ void test51(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-LABEL: @test52(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
-// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>*
-// CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa !6
+// CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa [[TBAA6]]
// CHECK-NEXT: [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gerpp(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]], i32 0, i32 0)
// CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test52(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -830,12 +830,12 @@ void test52(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-LABEL: @test53(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
-// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>*
-// CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa !6
+// CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa [[TBAA6]]
// CHECK-NEXT: [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gerpn(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]], i32 0, i32 0)
// CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test53(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -848,12 +848,12 @@ void test53(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-LABEL: @test54(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
-// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>*
-// CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa !6
+// CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa [[TBAA6]]
// CHECK-NEXT: [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernp(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]], i32 0, i32 0)
// CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test54(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -866,12 +866,12 @@ void test54(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-LABEL: @test55(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
-// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>*
-// CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa !6
+// CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa [[TBAA6]]
// CHECK-NEXT: [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]], i32 0, i32 0)
// CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test55(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -885,7 +885,7 @@ void test55(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test56(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -899,7 +899,7 @@ void test56(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test57(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -912,10 +912,10 @@ void test57(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-LABEL: @test58(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
-// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test58(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -928,10 +928,10 @@ void test58(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-LABEL: @test59(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
-// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2pn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test59(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -944,10 +944,10 @@ void test59(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-LABEL: @test60(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
-// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2np(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test60(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -960,10 +960,10 @@ void test60(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-LABEL: @test61(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
-// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2nn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test61(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -976,10 +976,10 @@ void test61(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-LABEL: @test62(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
-// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test62(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -992,10 +992,10 @@ void test62(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-LABEL: @test63(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
-// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test63(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -1008,10 +1008,10 @@ void test63(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-LABEL: @test64(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
-// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2np(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test64(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -1024,10 +1024,10 @@ void test64(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
// CHECK-LABEL: @test65(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
-// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2nn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
+// CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test65(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -1143,13 +1143,13 @@ void test72(const __vector_pair *vpp, const __vector_pair *vp2) {
// CHECK-LABEL: @test73(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
-// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, [[TBAA2:!tbaa !.*]]
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, i8* [[TMP2]], i64 8
// CHECK-NEXT: [[TMP4:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP3]])
// CHECK-NEXT: [[TMP5:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> [[TMP1]], <256 x i1> [[TMP4]], <16 x i8> [[VC:%.*]], i32 0, i32 0)
// CHECK-NEXT: [[TMP6:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP5]], <512 x i1>* [[TMP6]], align 64, [[TBAA2]]
+// CHECK-NEXT: store <512 x i1> [[TMP5]], <512 x i1>* [[TMP6]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test73(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -1162,12 +1162,12 @@ void test73(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char v
// CHECK-LABEL: @test74(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
-// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, [[TBAA2]]
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
// CHECK-NEXT: [[TMP3:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP2]])
// CHECK-NEXT: [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]])
// CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, [[TBAA2]]
+// CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test74(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -1180,13 +1180,13 @@ void test74(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char v
// CHECK-LABEL: @test75(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
-// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, [[TBAA2:!tbaa !.*]]
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, i8* [[TMP2]], i64 [[OFFS:%.*]]
// CHECK-NEXT: [[TMP4:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP3]])
// CHECK-NEXT: [[TMP5:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP1]], <256 x i1> [[TMP4]], <16 x i8> [[VC:%.*]])
// CHECK-NEXT: [[TMP6:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
-// CHECK-NEXT: store <512 x i1> [[TMP5]], <512 x i1>* [[TMP6]], align 64, [[TBAA2]]
+// CHECK-NEXT: store <512 x i1> [[TMP5]], <512 x i1>* [[TMP6]], align 64, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void test75(unsigned char *vqp, signed long long offs, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -1195,3 +1195,195 @@ void test75(unsigned char *vqp, signed long long offs, const __vector_pair *vpp,
__builtin_mma_xvf64gernp(&vq, vp, vc);
*((__vector_quad *)resp) = vq;
}
+
+// CHECK-LABEL: @test76(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <256 x i1>*
+// CHECK-NEXT: store <256 x i1> [[TMP0]], <256 x i1>* [[TMP1]], align 32, !tbaa [[TBAA6]]
+// CHECK-NEXT: ret void
+//
+void test76(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
+ __vector_quad vq = *((__vector_quad *)vqp);
+ __vector_pair vp = *((__vector_pair *)vpp);
+ __vector_pair res;
+ __builtin_mma_assemble_pair(&res, vc, vc);
+ *((__vector_pair *)resp) = res;
+}
+
+// CHECK-LABEL: @test77(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>*
+// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, <256 x i1>* [[TMP0]], align 32
+// CHECK-NEXT: [[TMP2:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[TMP1]])
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <16 x i8>*
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP2]], 0
+// CHECK-NEXT: store <16 x i8> [[TMP4]], <16 x i8>* [[TMP3]], align 16
+// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP2]], 1
+// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[RESP]], i64 16
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to <16 x i8>*
+// CHECK-NEXT: store <16 x i8> [[TMP5]], <16 x i8>* [[TMP7]], align 16
+// CHECK-NEXT: ret void
+//
+void test77(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
+ __builtin_mma_disassemble_pair(resp, (__vector_pair*)vpp);
+}
+
+// CHECK-LABEL: @test78(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8*
+// CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP1]], i8* [[TMP2]])
+// CHECK-NEXT: ret void
+//
+void test78(const __vector_pair *vpp, const __vector_pair *vp2) {
+ __vector_pair vp = __builtin_mma_lxvp(0LL, vpp);
+ __builtin_mma_stxvp(vp, 0LL, vp2);
+}
+
+// CHECK-LABEL: @test79(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
+// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 [[OFFSET:%.*]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP1]])
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8*
+// CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 [[OFFSET]]
+// CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]])
+// CHECK-NEXT: ret void
+//
+void test79(const __vector_pair *vpp, signed long long offset, const __vector_pair *vp2) {
+ __vector_pair vp = __builtin_mma_lxvp(offset, vpp);
+ __builtin_mma_stxvp(vp, offset, vp2);
+}
+
+// CHECK-LABEL: @test80(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
+// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 18
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP1]])
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8*
+// CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 18
+// CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]])
+// CHECK-NEXT: ret void
+//
+void test80(const __vector_pair *vpp, const __vector_pair *vp2) {
+ __vector_pair vp = __builtin_mma_lxvp(18LL, vpp);
+ __builtin_mma_stxvp(vp, 18LL, vp2);
+}
+
+// CHECK-LABEL: @test81(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
+// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 1
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP1]])
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8*
+// CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 1
+// CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]])
+// CHECK-NEXT: ret void
+//
+void test81(const __vector_pair *vpp, const __vector_pair *vp2) {
+ __vector_pair vp = __builtin_mma_lxvp(1LL, vpp);
+ __builtin_mma_stxvp(vp, 1LL, vp2);
+}
+
+// CHECK-LABEL: @test82(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
+// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 42
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP1]])
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8*
+// CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 42
+// CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]])
+// CHECK-NEXT: ret void
+//
+void test82(const __vector_pair *vpp, const __vector_pair *vp2) {
+ __vector_pair vp = __builtin_mma_lxvp(42LL, vpp);
+ __builtin_mma_stxvp(vp, 42LL, vp2);
+}
+
+// CHECK-LABEL: @test83(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = getelementptr <256 x i1>, <256 x i1>* [[VPP:%.*]], i64 128
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <256 x i1>* [[TMP0]] to i8*
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP1]])
+// CHECK-NEXT: [[TMP3:%.*]] = getelementptr <256 x i1>, <256 x i1>* [[VP2:%.*]], i64 128
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <256 x i1>* [[TMP3]] to i8*
+// CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]])
+// CHECK-NEXT: ret void
+//
+void test83(const __vector_pair *vpp, const __vector_pair *vp2) {
+ __vector_pair vp = __builtin_mma_lxvp(32768LL, vpp);
+ __builtin_mma_stxvp(vp, 32768LL, vp2);
+}
+
+// CHECK-LABEL: @test84(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
+// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 32799
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP1]])
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8*
+// CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 32799
+// CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]])
+// CHECK-NEXT: ret void
+//
+void test84(const __vector_pair *vpp, const __vector_pair *vp2) {
+ __vector_pair vp = __builtin_mma_lxvp(32799LL, vpp);
+ __builtin_mma_stxvp(vp, 32799LL, vp2);
+}
+
+// CHECK-LABEL: @test85(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
+// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, i8* [[TMP2]], i64 8
+// CHECK-NEXT: [[TMP4:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP3]])
+// CHECK-NEXT: [[TMP5:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> [[TMP1]], <256 x i1> [[TMP4]], <16 x i8> [[VC:%.*]], i32 0, i32 0)
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
+// CHECK-NEXT: store <512 x i1> [[TMP5]], <512 x i1>* [[TMP6]], align 64, !tbaa [[TBAA2]]
+// CHECK-NEXT: ret void
+//
+void test85(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) {
+ __vector_quad vq = *((__vector_quad *)vqp);
+ __vector_pair vp = __builtin_mma_lxvp(8LL, vpp);
+ __builtin_mma_pmxvf64gernn(&vq, vp, vc, 0, 0);
+ *((__vector_quad *)resp) = vq;
+}
+
+// CHECK-LABEL: @test86(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
+// CHECK-NEXT: [[TMP3:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP2]])
+// CHECK-NEXT: [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]])
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
+// CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]]
+// CHECK-NEXT: ret void
+//
+void test86(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) {
+ __vector_quad vq = *((__vector_quad *)vqp);
+ __vector_pair vp = __builtin_mma_lxvp(0LL, vpp);
+ __builtin_mma_xvf64gernp(&vq, vp, vc);
+ *((__vector_quad *)resp) = vq;
+}
+
+// CHECK-LABEL: @test87(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
+// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
+// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, i8* [[TMP2]], i64 [[OFFS:%.*]]
+// CHECK-NEXT: [[TMP4:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP3]])
+// CHECK-NEXT: [[TMP5:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP1]], <256 x i1> [[TMP4]], <16 x i8> [[VC:%.*]])
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
+// CHECK-NEXT: store <512 x i1> [[TMP5]], <512 x i1>* [[TMP6]], align 64, !tbaa [[TBAA2]]
+// CHECK-NEXT: ret void
+//
+void test87(unsigned char *vqp, signed long long offs, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) {
+ __vector_quad vq = *((__vector_quad *)vqp);
+ __vector_pair vp = __builtin_mma_lxvp(offs, vpp);
+ __builtin_mma_xvf64gernp(&vq, vp, vc);
+ *((__vector_quad *)resp) = vq;
+}
More information about the cfe-commits
mailing list