[clang] [llvm] [Clang][PowerPC] Add DMF crypto builtins for extended mnemonics (PR #185961)
Maryam Moghadas via cfe-commits
cfe-commits at lists.llvm.org
Thu Mar 19 09:40:50 PDT 2026
https://github.com/maryammo updated https://github.com/llvm/llvm-project/pull/185961
>From f4bd1e17350954aea1327b885033abfa6b3cecb3 Mon Sep 17 00:00:00 2001
From: Maryam Moghadas <maryammo at ca.ibm.com>
Date: Wed, 11 Mar 2026 19:39:45 +0000
Subject: [PATCH 1/2] [Clang][PowerPC] Add DMF crypto builtins for extended
mnemonics
---
clang/include/clang/Basic/BuiltinsPPC.def | 30 +-
clang/lib/CodeGen/TargetBuiltins/PPC.cpp | 77 ++-
clang/test/CodeGen/PowerPC/builtins-ppc-dmf.c | 420 ++++++++++---
.../CodeGen/PowerPC/ppc-dmf-mma-builtin-err.c | 32 +-
llvm/include/llvm/IR/IntrinsicsPowerPC.td | 6 +-
llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td | 6 +-
llvm/test/CodeGen/PowerPC/dmrp-spill.ll | 6 +-
llvm/test/CodeGen/PowerPC/mmaplus-crypto.ll | 554 +++++++++++++++++-
8 files changed, 1028 insertions(+), 103 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def
index 75d7d92c4f9d4..baa7e057a777d 100644
--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -1125,11 +1125,35 @@ UNALIASED_CUSTOM_BUILTIN(mma_disassemble_dmr, "vv*W1024*", false,
UNALIASED_CUSTOM_BUILTIN(mma_build_dmr, "vW1024*VVVVVVVV", false,
"mma,isa-future-instructions")
-UNALIASED_CUSTOM_BUILTIN(mma_dmsha2hash, "vW1024*W1024*Ii", true,
+UNALIASED_CUSTOM_BUILTIN(dmsha2hash, "vW1024*W1024*Ii", true,
"mma,isa-future-instructions")
-UNALIASED_CUSTOM_BUILTIN(mma_dmsha3hash, "vW2048*Ii", true,
+UNALIASED_CUSTOM_BUILTIN(dmsha3hash, "vW2048*Ii", true,
"mma,isa-future-instructions")
-UNALIASED_CUSTOM_BUILTIN(mma_dmxxshapad, "vW1024*VIiIiIi", true,
+UNALIASED_CUSTOM_BUILTIN(dmxxshapad, "vW1024*VIiIiIi", true,
+ "mma,isa-future-instructions")
+CUSTOM_BUILTIN(dmsha256hash, dmsha2hash, "vW1024*W1024*", true,
+ "mma,isa-future-instructions")
+CUSTOM_BUILTIN(dmsha512hash, dmsha2hash, "vW1024*W1024*", true,
+ "mma,isa-future-instructions")
+CUSTOM_BUILTIN(dmsha3dw, dmsha3hash, "vW2048*", true,
+ "mma,isa-future-instructions")
+CUSTOM_BUILTIN(dmcryshash, dmsha3hash, "vW2048*", true,
+ "mma,isa-future-instructions")
+CUSTOM_BUILTIN(dmxxsha3512pad, dmxxshapad, "vW1024*VIi", true,
+ "mma,isa-future-instructions")
+CUSTOM_BUILTIN(dmxxsha3384pad, dmxxshapad, "vW1024*VIi", true,
+ "mma,isa-future-instructions")
+CUSTOM_BUILTIN(dmxxsha3256pad, dmxxshapad, "vW1024*VIi", true,
+ "mma,isa-future-instructions")
+CUSTOM_BUILTIN(dmxxsha3224pad, dmxxshapad, "vW1024*VIi", true,
+ "mma,isa-future-instructions")
+CUSTOM_BUILTIN(dmxxshake256pad, dmxxshapad, "vW1024*VIi", true,
+ "mma,isa-future-instructions")
+CUSTOM_BUILTIN(dmxxshake128pad, dmxxshapad, "vW1024*VIi", true,
+ "mma,isa-future-instructions")
+CUSTOM_BUILTIN(dmxxsha384512pad, dmxxshapad, "vW1024*V", true,
+ "mma,isa-future-instructions")
+CUSTOM_BUILTIN(dmxxsha224256pad, dmxxshapad, "vW1024*V", true,
"mma,isa-future-instructions")
// MMA builtins with positive/negative multiply/accumulate.
diff --git a/clang/lib/CodeGen/TargetBuiltins/PPC.cpp b/clang/lib/CodeGen/TargetBuiltins/PPC.cpp
index 6568959351a5d..e915c1bd4d27a 100644
--- a/clang/lib/CodeGen/TargetBuiltins/PPC.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/PPC.cpp
@@ -1154,10 +1154,85 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
if (BuiltinID == PPC::BI__builtin_mma_dmmr ||
BuiltinID == PPC::BI__builtin_mma_dmxor ||
BuiltinID == PPC::BI__builtin_mma_disassemble_dmr ||
- BuiltinID == PPC::BI__builtin_mma_dmsha2hash) {
+ BuiltinID == PPC::BI__builtin_dmsha2hash) {
Address Addr = EmitPointerWithAlignment(E->getArg(1));
Ops[1] = Builder.CreateLoad(Addr);
}
+ if (BuiltinID == PPC::BI__builtin_dmsha256hash ||
+ BuiltinID == PPC::BI__builtin_dmsha512hash) {
+ Address Addr = EmitPointerWithAlignment(E->getArg(1));
+ Ops[1] = Builder.CreateLoad(Addr);
+ int Imm = (BuiltinID == PPC::BI__builtin_dmsha256hash) ? 0 : 1;
+ Ops.push_back(llvm::ConstantInt::get(Int32Ty, Imm));
+ }
+ if (BuiltinID == PPC::BI__builtin_dmsha3dw ||
+ BuiltinID == PPC::BI__builtin_dmcryshash) {
+ int Imm = (BuiltinID == PPC::BI__builtin_dmsha3dw) ? 0 : 12;
+ Ops.push_back(llvm::ConstantInt::get(Int32Ty, Imm));
+ }
+ if (BuiltinID == PPC::BI__builtin_dmxxsha3512pad ||
+ BuiltinID == PPC::BI__builtin_dmxxsha3384pad ||
+ BuiltinID == PPC::BI__builtin_dmxxsha3256pad ||
+ BuiltinID == PPC::BI__builtin_dmxxsha3224pad ||
+ BuiltinID == PPC::BI__builtin_dmxxshake256pad ||
+ BuiltinID == PPC::BI__builtin_dmxxshake128pad ||
+ BuiltinID == PPC::BI__builtin_dmxxsha384512pad ||
+ BuiltinID == PPC::BI__builtin_dmxxsha224256pad) {
+ int ID, BL;
+ bool hasE;
+ switch (BuiltinID) {
+ case PPC::BI__builtin_dmxxsha3512pad:
+ ID = 0;
+ BL = 0;
+ hasE = true;
+ break;
+ case PPC::BI__builtin_dmxxsha3384pad:
+ ID = 0;
+ BL = 1;
+ hasE = true;
+ break;
+ case PPC::BI__builtin_dmxxsha3256pad:
+ ID = 0;
+ BL = 2;
+ hasE = true;
+ break;
+ case PPC::BI__builtin_dmxxsha3224pad:
+ ID = 0;
+ BL = 3;
+ hasE = true;
+ break;
+ case PPC::BI__builtin_dmxxshake256pad:
+ ID = 1;
+ BL = 0;
+ hasE = true;
+ break;
+ case PPC::BI__builtin_dmxxshake128pad:
+ ID = 1;
+ BL = 1;
+ hasE = true;
+ break;
+ case PPC::BI__builtin_dmxxsha384512pad:
+ ID = 2;
+ BL = 0;
+ hasE = false;
+ break;
+ case PPC::BI__builtin_dmxxsha224256pad:
+ ID = 3;
+ BL = 0;
+ hasE = false;
+ break;
+ }
+ if (hasE) {
+ Value *E_val = Ops[2];
+ Ops[2] = ConstantInt::get(Int32Ty, ID);
+ Ops.push_back(E_val);
+ Ops.push_back(ConstantInt::get(Int32Ty, BL));
+ } else {
+ Ops.push_back(ConstantInt::get(Int32Ty, ID));
+ Ops.push_back(ConstantInt::get(Int32Ty, 0));
+ Ops.push_back(ConstantInt::get(Int32Ty, 0));
+ }
+ }
if (BuiltinID == PPC::BI__builtin_mma_disassemble_dmr)
return Builder.CreateAlignedStore(Ops[1], Ops[0], MaybeAlign());
for (unsigned i=1; i<Ops.size(); i++)
diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-dmf.c b/clang/test/CodeGen/PowerPC/builtins-ppc-dmf.c
index 585d8bac57181..c9274988b2e5d 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-dmf.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-dmf.c
@@ -8,17 +8,17 @@
// CHECK-LABEL: define dso_local void @test_dmxvi8gerx4(
// CHECK-SAME: ptr noundef readnone captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6:![0-9]+]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA5:![0-9]+]]
// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4(<256 x i1> [[TMP0]], <16 x i8> [[VC]])
-// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8:![0-9]+]]
+// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7:![0-9]+]]
// CHECK-NEXT: ret void
//
// AIX-LABEL: define void @test_dmxvi8gerx4(
// AIX-SAME: ptr noundef readnone captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
// AIX-NEXT: [[ENTRY:.*:]]
-// AIX-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6:![0-9]+]]
+// AIX-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA5:![0-9]+]]
// AIX-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4(<256 x i1> [[TMP0]], <16 x i8> [[VC]])
-// AIX-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8:![0-9]+]]
+// AIX-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7:![0-9]+]]
// AIX-NEXT: ret void
//
void test_dmxvi8gerx4(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -31,17 +31,17 @@ void test_dmxvi8gerx4(unsigned char *vdmrp, unsigned char *vpp, vector unsigned
// CHECK-LABEL: define dso_local void @test_pmdmxvi8gerx4(
// CHECK-SAME: ptr noundef readnone captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA5]]
// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4(<256 x i1> [[TMP0]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
-// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]]
+// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]]
// CHECK-NEXT: ret void
//
// AIX-LABEL: define void @test_pmdmxvi8gerx4(
// AIX-SAME: ptr noundef readnone captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
// AIX-NEXT: [[ENTRY:.*:]]
-// AIX-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]]
+// AIX-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA5]]
// AIX-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4(<256 x i1> [[TMP0]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
-// AIX-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]]
+// AIX-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]]
// AIX-NEXT: ret void
//
void test_pmdmxvi8gerx4(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -54,19 +54,19 @@ void test_pmdmxvi8gerx4(unsigned char *vdmrp, unsigned char *vpp, vector unsigne
// CHECK-LABEL: define dso_local void @test_dmxvi8gerx4pp(
// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]]
-// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA5]]
// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]])
-// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]]
+// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]]
// CHECK-NEXT: ret void
//
// AIX-LABEL: define void @test_dmxvi8gerx4pp(
// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
// AIX-NEXT: [[ENTRY:.*:]]
-// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]]
-// AIX-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]]
+// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// AIX-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA5]]
// AIX-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]])
-// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]]
+// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]]
// AIX-NEXT: ret void
//
void test_dmxvi8gerx4pp(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -79,19 +79,19 @@ void test_dmxvi8gerx4pp(unsigned char *vdmrp, unsigned char *vpp, vector unsigne
// CHECK-LABEL: define dso_local void @test_pmdmxvi8gerx4pp(
// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]]
-// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA5]]
// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
-// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]]
+// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]]
// CHECK-NEXT: ret void
//
// AIX-LABEL: define void @test_pmdmxvi8gerx4pp(
// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
// AIX-NEXT: [[ENTRY:.*:]]
-// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]]
-// AIX-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]]
+// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// AIX-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA5]]
// AIX-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
-// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]]
+// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]]
// AIX-NEXT: ret void
//
void test_pmdmxvi8gerx4pp(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -104,19 +104,19 @@ void test_pmdmxvi8gerx4pp(unsigned char *vdmrp, unsigned char *vpp, vector unsig
// CHECK-LABEL: define dso_local void @test_dmxvi8gerx4spp(
// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]]
-// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA5]]
// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4spp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]])
-// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]]
+// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]]
// CHECK-NEXT: ret void
//
// AIX-LABEL: define void @test_dmxvi8gerx4spp(
// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
// AIX-NEXT: [[ENTRY:.*:]]
-// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]]
-// AIX-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]]
+// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// AIX-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA5]]
// AIX-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4spp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]])
-// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]]
+// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]]
// AIX-NEXT: ret void
//
void test_dmxvi8gerx4spp(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -129,19 +129,19 @@ void test_dmxvi8gerx4spp(unsigned char *vdmrp, unsigned char *vpp, vector unsign
// CHECK-LABEL: define dso_local void @test_pmdmxvi8gerx4spp(
// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]]
-// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA5]]
// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4spp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
-// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]]
+// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]]
// CHECK-NEXT: ret void
//
// AIX-LABEL: define void @test_pmdmxvi8gerx4spp(
// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], ptr noundef readonly captures(none) [[VPP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
// AIX-NEXT: [[ENTRY:.*:]]
-// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]]
-// AIX-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA6]]
+// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// AIX-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP]], align 32, !tbaa [[__VECTOR_PAIR_TBAA5]]
// AIX-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4spp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
-// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]]
+// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]]
// AIX-NEXT: ret void
//
void test_pmdmxvi8gerx4spp(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
@@ -185,7 +185,7 @@ void test_dmf_basic(char *p, char *res1, char *res2) {
// CHECK-LABEL: define dso_local void @test_dmf_basic2(
// CHECK-SAME: ptr noundef readonly captures(none) [[P1:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RES1:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RES2:%.*]], ptr noundef readonly captures(none) [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[V]], align 16, !tbaa [[CHAR_TBAA10:![0-9]+]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[V]], align 16, !tbaa [[CHAR_TBAA9:![0-9]+]]
// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.build.dmr(<16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]])
// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RES2]], align 128
// CHECK-NEXT: [[TMP2:%.*]] = load <1024 x i1>, ptr [[P1]], align 128
@@ -195,7 +195,7 @@ void test_dmf_basic(char *p, char *res1, char *res2) {
// AIX-LABEL: define void @test_dmf_basic2(
// AIX-SAME: ptr noundef readonly captures(none) [[P1:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RES1:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RES2:%.*]], ptr noundef readonly captures(none) [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
// AIX-NEXT: [[ENTRY:.*:]]
-// AIX-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[V]], align 16, !tbaa [[CHAR_TBAA10:![0-9]+]]
+// AIX-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[V]], align 16, !tbaa [[CHAR_TBAA9:![0-9]+]]
// AIX-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.build.dmr(<16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]])
// AIX-NEXT: store <1024 x i1> [[TMP1]], ptr [[RES2]], align 128
// AIX-NEXT: [[TMP2:%.*]] = load <1024 x i1>, ptr [[P1]], align 128
@@ -212,89 +212,359 @@ void test_dmf_basic2(char *p1, char *res1, char *res2,
// CHECK-LABEL: define dso_local void @test_dmsha2hash(
// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP1:%.*]], ptr noundef readonly captures(none) [[VDMRP2:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP1]], align 128, !tbaa [[__DMR1024_TBAA8]]
-// CHECK-NEXT: [[TMP1:%.*]] = load <1024 x i1>, ptr [[VDMRP2]], align 128, !tbaa [[__DMR1024_TBAA8]]
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmsha2hash(<1024 x i1> [[TMP0]], <1024 x i1> [[TMP1]], i32 1)
-// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP1]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// CHECK-NEXT: [[TMP1:%.*]] = load <1024 x i1>, ptr [[VDMRP2]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.dmsha2hash(<1024 x i1> [[TMP0]], <1024 x i1> [[TMP1]], i32 1)
+// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]]
// CHECK-NEXT: ret void
//
// AIX-LABEL: define void @test_dmsha2hash(
// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP1:%.*]], ptr noundef readonly captures(none) [[VDMRP2:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
// AIX-NEXT: [[ENTRY:.*:]]
-// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP1]], align 128, !tbaa [[__DMR1024_TBAA8]]
-// AIX-NEXT: [[TMP1:%.*]] = load <1024 x i1>, ptr [[VDMRP2]], align 128, !tbaa [[__DMR1024_TBAA8]]
-// AIX-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmsha2hash(<1024 x i1> [[TMP0]], <1024 x i1> [[TMP1]], i32 1)
-// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]]
+// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP1]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// AIX-NEXT: [[TMP1:%.*]] = load <1024 x i1>, ptr [[VDMRP2]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// AIX-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.dmsha2hash(<1024 x i1> [[TMP0]], <1024 x i1> [[TMP1]], i32 1)
+// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]]
// AIX-NEXT: ret void
//
void test_dmsha2hash(unsigned char *vdmrp1, unsigned char *vdmrp2, unsigned char *resp) {
__dmr1024 vdmr1 = *((__dmr1024 *)vdmrp1);
__dmr1024 vdmr2 = *((__dmr1024 *)vdmrp2);
- __builtin_mma_dmsha2hash(&vdmr1, &vdmr2, 1);
+ __builtin_dmsha2hash(&vdmr1, &vdmr2, 1);
*((__dmr1024 *)resp) = vdmr1;
}
// CHECK-LABEL: define dso_local void @test_dmsha3hash(
// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRPP:%.*]], ptr noundef writeonly captures(none) initializes((0, 256)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = load <2048 x i1>, ptr [[VDMRPP]], align 256, !tbaa [[__DMR2048_TBAA11:![0-9]+]]
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2048 x i1> @llvm.ppc.mma.dmsha3hash(<2048 x i1> [[TMP0]], i32 4)
-// CHECK-NEXT: store <2048 x i1> [[TMP1]], ptr [[RESP]], align 256, !tbaa [[__DMR2048_TBAA11]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <2048 x i1>, ptr [[VDMRPP]], align 256, !tbaa [[__DMR2048_TBAA10:![0-9]+]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2048 x i1> @llvm.ppc.dmsha3hash(<2048 x i1> [[TMP0]], i32 4)
+// CHECK-NEXT: store <2048 x i1> [[TMP1]], ptr [[RESP]], align 256, !tbaa [[__DMR2048_TBAA10]]
// CHECK-NEXT: ret void
//
// AIX-LABEL: define void @test_dmsha3hash(
// AIX-SAME: ptr noundef readonly captures(none) [[VDMRPP:%.*]], ptr noundef writeonly captures(none) initializes((0, 256)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
// AIX-NEXT: [[ENTRY:.*:]]
-// AIX-NEXT: [[TMP0:%.*]] = load <2048 x i1>, ptr [[VDMRPP]], align 256, !tbaa [[__DMR2048_TBAA11:![0-9]+]]
-// AIX-NEXT: [[TMP1:%.*]] = tail call <2048 x i1> @llvm.ppc.mma.dmsha3hash(<2048 x i1> [[TMP0]], i32 4)
-// AIX-NEXT: store <2048 x i1> [[TMP1]], ptr [[RESP]], align 256, !tbaa [[__DMR2048_TBAA11]]
+// AIX-NEXT: [[TMP0:%.*]] = load <2048 x i1>, ptr [[VDMRPP]], align 256, !tbaa [[__DMR2048_TBAA10:![0-9]+]]
+// AIX-NEXT: [[TMP1:%.*]] = tail call <2048 x i1> @llvm.ppc.dmsha3hash(<2048 x i1> [[TMP0]], i32 4)
+// AIX-NEXT: store <2048 x i1> [[TMP1]], ptr [[RESP]], align 256, !tbaa [[__DMR2048_TBAA10]]
// AIX-NEXT: ret void
//
void test_dmsha3hash(unsigned char *vdmrpp, unsigned char *resp) {
__dmr2048 vdmrp = *((__dmr2048 *)vdmrpp);
- __builtin_mma_dmsha3hash(&vdmrp, 4);
+ __builtin_dmsha3hash(&vdmrp, 4);
*((__dmr2048 *)resp) = vdmrp;
}
// CHECK-LABEL: define dso_local void @test_dmxxshapad(
// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]]
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxxshapad(<1024 x i1> [[TMP0]], <16 x i8> [[VC]], i32 2, i32 1, i32 5)
-// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> [[TMP0]], <16 x i8> [[VC]], i32 2, i32 1, i32 3)
+// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]]
// CHECK-NEXT: ret void
//
// AIX-LABEL: define void @test_dmxxshapad(
// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
// AIX-NEXT: [[ENTRY:.*:]]
-// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA8]]
-// AIX-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxxshapad(<1024 x i1> [[TMP0]], <16 x i8> [[VC]], i32 2, i32 1, i32 5)
-// AIX-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA8]]
+// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// AIX-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> [[TMP0]], <16 x i8> [[VC]], i32 2, i32 1, i32 3)
+// AIX-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]]
// AIX-NEXT: ret void
//
void test_dmxxshapad(unsigned char *vdmrp, vector unsigned char vc, unsigned char *resp) {
__dmr1024 vdmr = *((__dmr1024 *)vdmrp);
- __builtin_mma_dmxxshapad(&vdmr, vc, 2, 1, 5);
+ __builtin_dmxxshapad(&vdmr, vc, 2, 1, 3);
+ *((__dmr1024 *)resp) = vdmr;
+}
+
+// CHECK-LABEL: define dso_local void @test_dmsha256hash(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP1:%.*]], ptr noundef readonly captures(none) [[VDMRP2:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP1]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// CHECK-NEXT: [[TMP1:%.*]] = load <1024 x i1>, ptr [[VDMRP2]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.dmsha2hash(<1024 x i1> [[TMP0]], <1024 x i1> [[TMP1]], i32 0)
+// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// CHECK-NEXT: ret void
+//
+// AIX-LABEL: define void @test_dmsha256hash(
+// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP1:%.*]], ptr noundef readonly captures(none) [[VDMRP2:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// AIX-NEXT: [[ENTRY:.*:]]
+// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP1]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// AIX-NEXT: [[TMP1:%.*]] = load <1024 x i1>, ptr [[VDMRP2]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// AIX-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.dmsha2hash(<1024 x i1> [[TMP0]], <1024 x i1> [[TMP1]], i32 0)
+// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// AIX-NEXT: ret void
+//
+void test_dmsha256hash(unsigned char *vdmrp1, unsigned char *vdmrp2, unsigned char *resp) {
+ __dmr1024 vdmr1 = *((__dmr1024 *)vdmrp1);
+ __dmr1024 vdmr2 = *((__dmr1024 *)vdmrp2);
+ __builtin_dmsha256hash(&vdmr1, &vdmr2);
+ *((__dmr1024 *)resp) = vdmr1;
+}
+
+// CHECK-LABEL: define dso_local void @test_dmsha512hash(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP1:%.*]], ptr noundef readonly captures(none) [[VDMRP2:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP1]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// CHECK-NEXT: [[TMP1:%.*]] = load <1024 x i1>, ptr [[VDMRP2]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.dmsha2hash(<1024 x i1> [[TMP0]], <1024 x i1> [[TMP1]], i32 1)
+// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// CHECK-NEXT: ret void
+//
+// AIX-LABEL: define void @test_dmsha512hash(
+// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP1:%.*]], ptr noundef readonly captures(none) [[VDMRP2:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// AIX-NEXT: [[ENTRY:.*:]]
+// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP1]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// AIX-NEXT: [[TMP1:%.*]] = load <1024 x i1>, ptr [[VDMRP2]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// AIX-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.dmsha2hash(<1024 x i1> [[TMP0]], <1024 x i1> [[TMP1]], i32 1)
+// AIX-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// AIX-NEXT: ret void
+//
+void test_dmsha512hash(unsigned char *vdmrp1, unsigned char *vdmrp2, unsigned char *resp) {
+ __dmr1024 vdmr1 = *((__dmr1024 *)vdmrp1);
+ __dmr1024 vdmr2 = *((__dmr1024 *)vdmrp2);
+ __builtin_dmsha512hash(&vdmr1, &vdmr2);
+ *((__dmr1024 *)resp) = vdmr1;
+}
+
+// CHECK-LABEL: define dso_local void @test_dmsha3dw(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRPP:%.*]], ptr noundef writeonly captures(none) initializes((0, 256)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <2048 x i1>, ptr [[VDMRPP]], align 256, !tbaa [[__DMR2048_TBAA10]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2048 x i1> @llvm.ppc.dmsha3hash(<2048 x i1> [[TMP0]], i32 0)
+// CHECK-NEXT: store <2048 x i1> [[TMP1]], ptr [[RESP]], align 256, !tbaa [[__DMR2048_TBAA10]]
+// CHECK-NEXT: ret void
+//
+// AIX-LABEL: define void @test_dmsha3dw(
+// AIX-SAME: ptr noundef readonly captures(none) [[VDMRPP:%.*]], ptr noundef writeonly captures(none) initializes((0, 256)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// AIX-NEXT: [[ENTRY:.*:]]
+// AIX-NEXT: [[TMP0:%.*]] = load <2048 x i1>, ptr [[VDMRPP]], align 256, !tbaa [[__DMR2048_TBAA10]]
+// AIX-NEXT: [[TMP1:%.*]] = tail call <2048 x i1> @llvm.ppc.dmsha3hash(<2048 x i1> [[TMP0]], i32 0)
+// AIX-NEXT: store <2048 x i1> [[TMP1]], ptr [[RESP]], align 256, !tbaa [[__DMR2048_TBAA10]]
+// AIX-NEXT: ret void
+//
+void test_dmsha3dw(unsigned char *vdmrpp, unsigned char *resp) {
+ __dmr2048 vdmrp = *((__dmr2048 *)vdmrpp);
+ __builtin_dmsha3dw(&vdmrp);
+ *((__dmr2048 *)resp) = vdmrp;
+}
+
+// CHECK-LABEL: define dso_local void @test_dmcryshash(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRPP:%.*]], ptr noundef writeonly captures(none) initializes((0, 256)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <2048 x i1>, ptr [[VDMRPP]], align 256, !tbaa [[__DMR2048_TBAA10]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <2048 x i1> @llvm.ppc.dmsha3hash(<2048 x i1> [[TMP0]], i32 12)
+// CHECK-NEXT: store <2048 x i1> [[TMP1]], ptr [[RESP]], align 256, !tbaa [[__DMR2048_TBAA10]]
+// CHECK-NEXT: ret void
+//
+// AIX-LABEL: define void @test_dmcryshash(
+// AIX-SAME: ptr noundef readonly captures(none) [[VDMRPP:%.*]], ptr noundef writeonly captures(none) initializes((0, 256)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// AIX-NEXT: [[ENTRY:.*:]]
+// AIX-NEXT: [[TMP0:%.*]] = load <2048 x i1>, ptr [[VDMRPP]], align 256, !tbaa [[__DMR2048_TBAA10]]
+// AIX-NEXT: [[TMP1:%.*]] = tail call <2048 x i1> @llvm.ppc.dmsha3hash(<2048 x i1> [[TMP0]], i32 12)
+// AIX-NEXT: store <2048 x i1> [[TMP1]], ptr [[RESP]], align 256, !tbaa [[__DMR2048_TBAA10]]
+// AIX-NEXT: ret void
+//
+void test_dmcryshash(unsigned char *vdmrpp, unsigned char *resp) {
+ __dmr2048 vdmrp = *((__dmr2048 *)vdmrpp);
+ __builtin_dmcryshash(&vdmrp);
+ *((__dmr2048 *)resp) = vdmrp;
+}
+
+// CHECK-LABEL: define dso_local void @test_dmxxsha3512pad(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> [[TMP0]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
+// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// CHECK-NEXT: ret void
+//
+// AIX-LABEL: define void @test_dmxxsha3512pad(
+// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// AIX-NEXT: [[ENTRY:.*:]]
+// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// AIX-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> [[TMP0]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
+// AIX-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// AIX-NEXT: ret void
+//
+void test_dmxxsha3512pad(unsigned char *vdmrp, vector unsigned char vc, unsigned char *resp) {
+ __dmr1024 vdmr = *((__dmr1024 *)vdmrp);
+ __builtin_dmxxsha3512pad(&vdmr, vc, 0);
+ *((__dmr1024 *)resp) = vdmr;
+}
+
+// CHECK-LABEL: define dso_local void @test_dmxxsha3384pad(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> [[TMP0]], <16 x i8> [[VC]], i32 0, i32 1, i32 1)
+// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// CHECK-NEXT: ret void
+//
+// AIX-LABEL: define void @test_dmxxsha3384pad(
+// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// AIX-NEXT: [[ENTRY:.*:]]
+// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// AIX-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> [[TMP0]], <16 x i8> [[VC]], i32 0, i32 1, i32 1)
+// AIX-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// AIX-NEXT: ret void
+//
+void test_dmxxsha3384pad(unsigned char *vdmrp, vector unsigned char vc, unsigned char *resp) {
+ __dmr1024 vdmr = *((__dmr1024 *)vdmrp);
+ __builtin_dmxxsha3384pad(&vdmr, vc, 1);
+ *((__dmr1024 *)resp) = vdmr;
+}
+
+// CHECK-LABEL: define dso_local void @test_dmxxsha3256pad(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> [[TMP0]], <16 x i8> [[VC]], i32 0, i32 0, i32 2)
+// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// CHECK-NEXT: ret void
+//
+// AIX-LABEL: define void @test_dmxxsha3256pad(
+// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// AIX-NEXT: [[ENTRY:.*:]]
+// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// AIX-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> [[TMP0]], <16 x i8> [[VC]], i32 0, i32 0, i32 2)
+// AIX-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// AIX-NEXT: ret void
+//
+void test_dmxxsha3256pad(unsigned char *vdmrp, vector unsigned char vc, unsigned char *resp) {
+ __dmr1024 vdmr = *((__dmr1024 *)vdmrp);
+ __builtin_dmxxsha3256pad(&vdmr, vc, 0);
+ *((__dmr1024 *)resp) = vdmr;
+}
+
+// CHECK-LABEL: define dso_local void @test_dmxxsha3224pad(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> [[TMP0]], <16 x i8> [[VC]], i32 0, i32 1, i32 3)
+// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// CHECK-NEXT: ret void
+//
+// AIX-LABEL: define void @test_dmxxsha3224pad(
+// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// AIX-NEXT: [[ENTRY:.*:]]
+// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// AIX-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> [[TMP0]], <16 x i8> [[VC]], i32 0, i32 1, i32 3)
+// AIX-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// AIX-NEXT: ret void
+//
+void test_dmxxsha3224pad(unsigned char *vdmrp, vector unsigned char vc, unsigned char *resp) {
+ __dmr1024 vdmr = *((__dmr1024 *)vdmrp);
+ __builtin_dmxxsha3224pad(&vdmr, vc, 1);
+ *((__dmr1024 *)resp) = vdmr;
+}
+
+// CHECK-LABEL: define dso_local void @test_dmxxshake256pad(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> [[TMP0]], <16 x i8> [[VC]], i32 1, i32 0, i32 0)
+// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// CHECK-NEXT: ret void
+//
+// AIX-LABEL: define void @test_dmxxshake256pad(
+// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// AIX-NEXT: [[ENTRY:.*:]]
+// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// AIX-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> [[TMP0]], <16 x i8> [[VC]], i32 1, i32 0, i32 0)
+// AIX-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// AIX-NEXT: ret void
+//
+void test_dmxxshake256pad(unsigned char *vdmrp, vector unsigned char vc, unsigned char *resp) {
+ __dmr1024 vdmr = *((__dmr1024 *)vdmrp);
+ __builtin_dmxxshake256pad(&vdmr, vc, 0);
+ *((__dmr1024 *)resp) = vdmr;
+}
+
+// CHECK-LABEL: define dso_local void @test_dmxxshake128pad(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> [[TMP0]], <16 x i8> [[VC]], i32 1, i32 1, i32 1)
+// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// CHECK-NEXT: ret void
+//
+// AIX-LABEL: define void @test_dmxxshake128pad(
+// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// AIX-NEXT: [[ENTRY:.*:]]
+// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// AIX-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> [[TMP0]], <16 x i8> [[VC]], i32 1, i32 1, i32 1)
+// AIX-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// AIX-NEXT: ret void
+//
+void test_dmxxshake128pad(unsigned char *vdmrp, vector unsigned char vc, unsigned char *resp) {
+ __dmr1024 vdmr = *((__dmr1024 *)vdmrp);
+ __builtin_dmxxshake128pad(&vdmr, vc, 1);
+ *((__dmr1024 *)resp) = vdmr;
+}
+
+// CHECK-LABEL: define dso_local void @test_dmxxsha384512pad(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> [[TMP0]], <16 x i8> [[VC]], i32 2, i32 0, i32 0)
+// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// CHECK-NEXT: ret void
+//
+// AIX-LABEL: define void @test_dmxxsha384512pad(
+// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// AIX-NEXT: [[ENTRY:.*:]]
+// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// AIX-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> [[TMP0]], <16 x i8> [[VC]], i32 2, i32 0, i32 0)
+// AIX-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// AIX-NEXT: ret void
+//
+void test_dmxxsha384512pad(unsigned char *vdmrp, vector unsigned char vc, unsigned char *resp) {
+ __dmr1024 vdmr = *((__dmr1024 *)vdmrp);
+ __builtin_dmxxsha384512pad(&vdmr, vc);
+ *((__dmr1024 *)resp) = vdmr;
+}
+
+// CHECK-LABEL: define dso_local void @test_dmxxsha224256pad(
+// CHECK-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> [[TMP0]], <16 x i8> [[VC]], i32 3, i32 0, i32 0)
+// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// CHECK-NEXT: ret void
+//
+// AIX-LABEL: define void @test_dmxxsha224256pad(
+// AIX-SAME: ptr noundef readonly captures(none) [[VDMRP:%.*]], <16 x i8> noundef [[VC:%.*]], ptr noundef writeonly captures(none) initializes((0, 128)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// AIX-NEXT: [[ENTRY:.*:]]
+// AIX-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// AIX-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> [[TMP0]], <16 x i8> [[VC]], i32 3, i32 0, i32 0)
+// AIX-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP]], align 128, !tbaa [[__DMR1024_TBAA7]]
+// AIX-NEXT: ret void
+//
+void test_dmxxsha224256pad(unsigned char *vdmrp, vector unsigned char vc, unsigned char *resp) {
+ __dmr1024 vdmr = *((__dmr1024 *)vdmrp);
+ __builtin_dmxxsha224256pad(&vdmr, vc);
*((__dmr1024 *)resp) = vdmr;
}
//.
-// CHECK: [[META4:![0-9]+]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
-// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"}
-// CHECK: [[__VECTOR_PAIR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0}
-// CHECK: [[META7]] = !{!"__vector_pair", [[META4]], i64 0}
-// CHECK: [[__DMR1024_TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0}
-// CHECK: [[META9]] = !{!"__dmr1024", [[META4]], i64 0}
-// CHECK: [[CHAR_TBAA10]] = !{[[META4]], [[META4]], i64 0}
-// CHECK: [[__DMR2048_TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0}
-// CHECK: [[META12]] = !{!"__dmr2048", [[META4]], i64 0}
+// CHECK: [[META3:![0-9]+]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0}
+// CHECK: [[META4]] = !{!"Simple C/C++ TBAA"}
+// CHECK: [[__VECTOR_PAIR_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0}
+// CHECK: [[META6]] = !{!"__vector_pair", [[META3]], i64 0}
+// CHECK: [[__DMR1024_TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0}
+// CHECK: [[META8]] = !{!"__dmr1024", [[META3]], i64 0}
+// CHECK: [[CHAR_TBAA9]] = !{[[META3]], [[META3]], i64 0}
+// CHECK: [[__DMR2048_TBAA10]] = !{[[META11:![0-9]+]], [[META11]], i64 0}
+// CHECK: [[META11]] = !{!"__dmr2048", [[META3]], i64 0}
//.
-// AIX: [[META4:![0-9]+]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
-// AIX: [[META5]] = !{!"Simple C/C++ TBAA"}
-// AIX: [[__VECTOR_PAIR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0}
-// AIX: [[META7]] = !{!"__vector_pair", [[META4]], i64 0}
-// AIX: [[__DMR1024_TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0}
-// AIX: [[META9]] = !{!"__dmr1024", [[META4]], i64 0}
-// AIX: [[CHAR_TBAA10]] = !{[[META4]], [[META4]], i64 0}
-// AIX: [[__DMR2048_TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0}
-// AIX: [[META12]] = !{!"__dmr2048", [[META4]], i64 0}
+// AIX: [[META3:![0-9]+]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0}
+// AIX: [[META4]] = !{!"Simple C/C++ TBAA"}
+// AIX: [[__VECTOR_PAIR_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0}
+// AIX: [[META6]] = !{!"__vector_pair", [[META3]], i64 0}
+// AIX: [[__DMR1024_TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0}
+// AIX: [[META8]] = !{!"__dmr1024", [[META3]], i64 0}
+// AIX: [[CHAR_TBAA9]] = !{[[META3]], [[META3]], i64 0}
+// AIX: [[__DMR2048_TBAA10]] = !{[[META11:![0-9]+]], [[META11]], i64 0}
+// AIX: [[META11]] = !{!"__dmr2048", [[META3]], i64 0}
//.
diff --git a/clang/test/CodeGen/PowerPC/ppc-dmf-mma-builtin-err.c b/clang/test/CodeGen/PowerPC/ppc-dmf-mma-builtin-err.c
index 66b9d797c65d3..a13ce3e0abc28 100644
--- a/clang/test/CodeGen/PowerPC/ppc-dmf-mma-builtin-err.c
+++ b/clang/test/CodeGen/PowerPC/ppc-dmf-mma-builtin-err.c
@@ -25,9 +25,21 @@ void test_mma(unsigned char *vdmrpp, unsigned char *vdmrp, unsigned char *vpp, v
__builtin_mma_dmxor(&vdmr, (__dmr1024*)vpp);
__builtin_mma_build_dmr(&vdmr, vc, vc, vc, vc, vc, vc, vc, vc);
__builtin_mma_disassemble_dmr(vdmrp, &vdmr);
- __builtin_mma_dmsha2hash(&vdmr, &vdmr, 0);
- __builtin_mma_dmsha3hash(&vdmrpair, 0);
- __builtin_mma_dmxxshapad(&vdmr, vc, 0, 0, 0);
+ __builtin_dmsha2hash(&vdmr, &vdmr, 0);
+ __builtin_dmsha3hash(&vdmrpair, 0);
+ __builtin_dmxxshapad(&vdmr, vc, 0, 0, 0);
+ __builtin_dmsha256hash(&vdmr, &vdmr);
+ __builtin_dmsha512hash(&vdmr, &vdmr);
+ __builtin_dmsha3dw(&vdmrpair);
+ __builtin_dmcryshas(&vdmrpair);
+ __builtin_dmxxsha3512pad(&vdmr, vc, 0);
+ __builtin_dmxxsha3384pad(&vdmr, vc, 0);
+ __builtin_dmxxsha3256pad(&vdmr, vc, 0);
+ __builtin_dmxxsha3224pad(&vdmr, vc, 0);
+ __builtin_dmxxshake256pad(&vdmr, vc, 0);
+ __builtin_dmxxshake128pad(&vdmr, vc, 0);
+ __builtin_dmxxsha384512pad(&vdmr, vc);
+ __builtin_dmxxsha224256pad(&vdmr, vc);
// CHECK: error: '__builtin_mma_dmxvi8gerx4' needs target feature mma,paired-vector-memops
// CHECK: error: '__builtin_mma_pmdmxvi8gerx4' needs target feature mma,paired-vector-memops
@@ -40,9 +52,17 @@ void test_mma(unsigned char *vdmrpp, unsigned char *vdmrp, unsigned char *vpp, v
// ISA_FUTURE: error: '__builtin_mma_dmxor' needs target feature mma,isa-future-instructions
// ISA_FUTURE: error: '__builtin_mma_build_dmr' needs target feature mma,isa-future-instructions
// ISA_FUTURE: error: '__builtin_mma_disassemble_dmr' needs target feature mma,isa-future-instructions
-// CHECK: error: '__builtin_mma_dmsha2hash' needs target feature mma,isa-future-instructions
-// CHECK: error: '__builtin_mma_dmsha3hash' needs target feature mma,isa-future-instructions
-// CHECK: error: '__builtin_mma_dmxxshapad' needs target feature mma,isa-future-instructions
+// CHECK: error: '__builtin_dmsha2hash' needs target feature mma,isa-future-instructions
+// CHECK: error: '__builtin_dmsha3hash' needs target feature mma,isa-future-instructions
+// CHECK: error: '__builtin_dmxxshapad' needs target feature mma,isa-future-instructions
+// CHECK: error: '__builtin_dmxxsha3512pad' needs target feature mma,isa-future-instructions
+// CHECK: error: '__builtin_dmxxsha3384pad' needs target feature mma,isa-future-instructions
+// CHECK: error: '__builtin_dmxxsha3256pad' needs target feature mma,isa-future-instructions
+// CHECK: error: '__builtin_dmxxsha3224pad' needs target feature mma,isa-future-instructions
+// CHECK: error: '__builtin_dmxxshake256pad' needs target feature mma,isa-future-instructions
+// CHECK: error: '__builtin_dmxxshake128pad' needs target feature mma,isa-future-instructions
+// CHECK: error: '__builtin_dmxxsha384512pad' needs target feature mma,isa-future-instructions
+// CHECK: error: '__builtin_dmxxsha224256pad' needs target feature mma,isa-future-instructions
// DMF VSX Vector bfloat16 GER 2x builtins.
diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index ec33af88c72d9..1fd74745e84e1 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -1860,16 +1860,16 @@ let TargetPrefix = "ppc" in {
defm int_ppc_mma_pmdmxvf16gerx2 :
PowerPC_MMA_DMR_Intrinsic<[llvm_v256i1_ty, llvm_v16i8_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty]>;
- def int_ppc_mma_dmsha2hash :
+ def int_ppc_dmsha2hash :
DefaultAttrsIntrinsic<[llvm_v1024i1_ty], [llvm_v1024i1_ty,
llvm_v1024i1_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
- def int_ppc_mma_dmsha3hash :
+ def int_ppc_dmsha3hash :
DefaultAttrsIntrinsic<[llvm_v2048i1_ty], [llvm_v2048i1_ty,
llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_ppc_mma_dmxxshapad :
+ def int_ppc_dmxxshapad :
DefaultAttrsIntrinsic<[llvm_v1024i1_ty], [llvm_v1024i1_ty,
llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>,
diff --git a/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td b/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td
index 1b4b58f724bc8..5df334b271549 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td
@@ -511,14 +511,14 @@ let Predicates = [MMA, IsISAFuture] in {
: XForm_AT3_T1_AB3<
31, 14, 177, (outs dmr:$AT), (ins dmr:$ATi, dmr:$AB, u1imm:$T),
"dmsha2hash $AT, $AB, $T",
- [(set v1024i1:$AT, (int_ppc_mma_dmsha2hash v1024i1:$ATi,
+ [(set v1024i1:$AT, (int_ppc_dmsha2hash v1024i1:$ATi,
v1024i1:$AB, u1imm_timm:$T))]>,
RegConstraint<"$ATi = $AT">;
def DMSHA3HASH
: XForm_ATp2_SR5<31, 15, 177, (outs dmrp:$ATp),
(ins dmrp:$ATpi, u5imm:$SR), "dmsha3hash $ATp, $SR",
[(set v2048i1:$ATp,
- (int_ppc_mma_dmsha3hash v2048i1:$ATpi,
+ (int_ppc_dmsha3hash v2048i1:$ATpi,
u5imm_timm:$SR))]>,
RegConstraint<"$ATpi = $ATp">;
def DMXXSHAPAD
@@ -593,7 +593,7 @@ let Predicates = [MMA, IsISAFuture] in {
(DMXVF16GERX2NN $ATi, $XAp, RCCp.BToVSRC)>;
// Cryptography Intrinsic
- def : Pat<(v1024i1 (int_ppc_mma_dmxxshapad v1024i1:$ATi, v16i8:$XB,
+ def : Pat<(v1024i1 (int_ppc_dmxxshapad v1024i1:$ATi, v16i8:$XB,
u2imm_timm:$ID, u1imm_timm:$E, u2imm_timm:$BL)),
(DMXXSHAPAD $ATi, RCCp.BToVSRC, $ID, $E, $BL)>;
}
diff --git a/llvm/test/CodeGen/PowerPC/dmrp-spill.ll b/llvm/test/CodeGen/PowerPC/dmrp-spill.ll
index 7a26c49b89df5..88afec18f7b1d 100644
--- a/llvm/test/CodeGen/PowerPC/dmrp-spill.ll
+++ b/llvm/test/CodeGen/PowerPC/dmrp-spill.ll
@@ -10,7 +10,7 @@
; RUN: -ppc-vsr-nums-as-vr -mcpu=future < %s | FileCheck %s --check-prefix=AIX32
declare void @dummy_func()
-declare <2048 x i1> @llvm.ppc.mma.dmsha3hash(<2048 x i1>, i32)
+declare <2048 x i1> @llvm.ppc.dmsha3hash(<2048 x i1>, i32)
define dso_local void @test_dmsha3hash(ptr %vopp, ptr %resp) nounwind {
; CHECK-LABEL: test_dmsha3hash:
@@ -205,9 +205,9 @@ define dso_local void @test_dmsha3hash(ptr %vopp, ptr %resp) nounwind {
; AIX32-NEXT: blr
entry:
%0 = load <2048 x i1>, ptr %vopp, align 64
- %2 = tail call <2048 x i1> @llvm.ppc.mma.dmsha3hash(<2048 x i1> %0, i32 5)
+ %2 = tail call <2048 x i1> @llvm.ppc.dmsha3hash(<2048 x i1> %0, i32 5)
tail call void @dummy_func()
- %3 = tail call <2048 x i1> @llvm.ppc.mma.dmsha3hash(<2048 x i1> %0, i32 5)
+ %3 = tail call <2048 x i1> @llvm.ppc.dmsha3hash(<2048 x i1> %0, i32 5)
store <2048 x i1> %2, ptr %resp, align 64
ret void
}
diff --git a/llvm/test/CodeGen/PowerPC/mmaplus-crypto.ll b/llvm/test/CodeGen/PowerPC/mmaplus-crypto.ll
index ab2324a4646b3..157baabb93014 100644
--- a/llvm/test/CodeGen/PowerPC/mmaplus-crypto.ll
+++ b/llvm/test/CodeGen/PowerPC/mmaplus-crypto.ll
@@ -6,10 +6,10 @@
; RUN: -mcpu=future -ppc-asm-full-reg-names \
; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
-declare <1024 x i1> @llvm.ppc.mma.dmsha2hash(<1024 x i1>, <1024 x i1>, i32)
+declare <1024 x i1> @llvm.ppc.dmsha2hash(<1024 x i1>, <1024 x i1>, i32)
-define dso_local void @test_dmsha2hash(ptr %vop, ptr %vinp, ptr %resp) {
-; CHECK-LABEL: test_dmsha2hash:
+define dso_local void @test_dmsha256hash(ptr %vop, ptr %vinp, ptr %resp) {
+; CHECK-LABEL: test_dmsha256hash:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lxvp vsp34, 0(r3)
; CHECK-NEXT: lxvp vsp36, 32(r3)
@@ -32,7 +32,7 @@ define dso_local void @test_dmsha2hash(ptr %vop, ptr %vinp, ptr %resp) {
; CHECK-NEXT: stxvp vsp36, 0(r5)
; CHECK-NEXT: blr
;
-; CHECK-BE-LABEL: test_dmsha2hash:
+; CHECK-BE-LABEL: test_dmsha256hash:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxvp vsp34, 96(r3)
; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
@@ -57,12 +57,12 @@ define dso_local void @test_dmsha2hash(ptr %vop, ptr %vinp, ptr %resp) {
entry:
%0 = load <1024 x i1>, ptr %vop, align 64
%1 = load <1024 x i1>, ptr %vinp, align 64
- %3 = tail call <1024 x i1> @llvm.ppc.mma.dmsha2hash(<1024 x i1> %0, <1024 x i1> %1, i32 0)
+ %3 = tail call <1024 x i1> @llvm.ppc.dmsha2hash(<1024 x i1> %0, <1024 x i1> %1, i32 0)
store <1024 x i1> %3, ptr %resp, align 64
ret void
}
-declare <2048 x i1> @llvm.ppc.mma.dmsha3hash(<2048 x i1>, i32)
+declare <2048 x i1> @llvm.ppc.dmsha3hash(<2048 x i1>, i32)
define dso_local void @test_dmsha3hash(ptr %vopp, ptr %resp) {
; CHECK-LABEL: test_dmsha3hash:
@@ -124,12 +124,12 @@ define dso_local void @test_dmsha3hash(ptr %vopp, ptr %resp) {
; CHECK-BE-NEXT: blr
entry:
%0 = load <2048 x i1>, ptr %vopp, align 64
- %2 = tail call <2048 x i1> @llvm.ppc.mma.dmsha3hash(<2048 x i1> %0, i32 5)
+ %2 = tail call <2048 x i1> @llvm.ppc.dmsha3hash(<2048 x i1> %0, i32 5)
store <2048 x i1> %2, ptr %resp, align 64
ret void
}
-declare <1024 x i1> @llvm.ppc.mma.dmxxshapad(<1024 x i1>, <16 x i8>, i32, i32, i32)
+declare <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1>, <16 x i8>, i32, i32, i32)
define dso_local void @test_dmxxshapad(ptr %vopp, ptr %vcp, ptr %resp) {
; CHECK-LABEL: test_dmxxshapad:
@@ -170,7 +170,543 @@ define dso_local void @test_dmxxshapad(ptr %vopp, ptr %vcp, ptr %resp) {
entry:
%0 = load <1024 x i1>, ptr %vopp, align 64
%1 = load <16 x i8>, ptr %vcp, align 64
- %2 = tail call <1024 x i1> @llvm.ppc.mma.dmxxshapad(<1024 x i1> %0, <16 x i8> %1, i32 2, i32 1, i32 3)
+ %2 = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> %0, <16 x i8> %1, i32 2, i32 1, i32 3)
+ store <1024 x i1> %2, ptr %resp, align 64
+ ret void
+}
+
+define dso_local void @test_dmsha512hash(ptr %vop, ptr %vinp, ptr %resp) {
+; CHECK-LABEL: test_dmsha512hash:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lxvp vsp34, 0(r3)
+; CHECK-NEXT: lxvp vsp36, 32(r3)
+; CHECK-NEXT: lxvp vsp32, 64(r3)
+; CHECK-NEXT: lxvp vsp38, 96(r3)
+; CHECK-NEXT: lxvp vsp40, 0(r4)
+; CHECK-NEXT: lxvp vsp42, 32(r4)
+; CHECK-NEXT: lxvp vsp44, 64(r4)
+; CHECK-NEXT: lxvp vsp46, 96(r4)
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi1, vsp42, vsp40, 1
+; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
+; CHECK-NEXT: dmxxinstdmr512 wacc1, vsp46, vsp44, 0
+; CHECK-NEXT: dmsha512hash dmr0, dmr1
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT: stxvp vsp34, 96(r5)
+; CHECK-NEXT: stxvp vsp36, 64(r5)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp34, 32(r5)
+; CHECK-NEXT: stxvp vsp36, 0(r5)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: test_dmsha512hash:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lxvp vsp34, 96(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
+; CHECK-BE-NEXT: lxvp vsp32, 32(r3)
+; CHECK-BE-NEXT: lxvp vsp38, 0(r3)
+; CHECK-BE-NEXT: lxvp vsp40, 96(r4)
+; CHECK-BE-NEXT: lxvp vsp42, 64(r4)
+; CHECK-BE-NEXT: lxvp vsp44, 32(r4)
+; CHECK-BE-NEXT: lxvp vsp46, 0(r4)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi1, vsp42, vsp40, 1
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc1, vsp46, vsp44, 0
+; CHECK-BE-NEXT: dmsha512hash dmr0, dmr1
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT: stxvp vsp36, 96(r5)
+; CHECK-BE-NEXT: stxvp vsp34, 64(r5)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT: stxvp vsp36, 32(r5)
+; CHECK-BE-NEXT: stxvp vsp34, 0(r5)
+; CHECK-BE-NEXT: blr
+entry:
+ %0 = load <1024 x i1>, ptr %vop, align 64
+ %1 = load <1024 x i1>, ptr %vinp, align 64
+ %3 = tail call <1024 x i1> @llvm.ppc.dmsha2hash(<1024 x i1> %0, <1024 x i1> %1, i32 1)
+ store <1024 x i1> %3, ptr %resp, align 64
+ ret void
+}
+
+define dso_local void @test_dmsha3dw(ptr %vopp, ptr %resp) {
+; CHECK-LABEL: test_dmsha3dw:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lxvp vsp34, 0(r3)
+; CHECK-NEXT: lxvp vsp36, 32(r3)
+; CHECK-NEXT: lxvp vsp32, 64(r3)
+; CHECK-NEXT: lxvp vsp38, 96(r3)
+; CHECK-NEXT: lxvp vsp40, 128(r3)
+; CHECK-NEXT: lxvp vsp42, 160(r3)
+; CHECK-NEXT: lxvp vsp44, 192(r3)
+; CHECK-NEXT: lxvp vsp46, 224(r3)
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1
+; CHECK-NEXT: dmxxinstdmr512 wacc1, vsp38, vsp32, 0
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp42, vsp40, 1
+; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp46, vsp44, 0
+; CHECK-NEXT: dmsha3dw dmrp0
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT: stxvp vsp34, 224(r4)
+; CHECK-NEXT: stxvp vsp36, 192(r4)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp34, 160(r4)
+; CHECK-NEXT: stxvp vsp36, 128(r4)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0
+; CHECK-NEXT: stxvp vsp34, 96(r4)
+; CHECK-NEXT: stxvp vsp36, 64(r4)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi1, 1
+; CHECK-NEXT: stxvp vsp34, 32(r4)
+; CHECK-NEXT: stxvp vsp36, 0(r4)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: test_dmsha3dw:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lxvp vsp34, 224(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 192(r3)
+; CHECK-BE-NEXT: lxvp vsp32, 160(r3)
+; CHECK-BE-NEXT: lxvp vsp38, 128(r3)
+; CHECK-BE-NEXT: lxvp vsp40, 96(r3)
+; CHECK-BE-NEXT: lxvp vsp42, 64(r3)
+; CHECK-BE-NEXT: lxvp vsp44, 32(r3)
+; CHECK-BE-NEXT: lxvp vsp46, 0(r3)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc1, vsp38, vsp32, 0
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp42, vsp40, 1
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp46, vsp44, 0
+; CHECK-BE-NEXT: dmsha3dw dmrp0
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi1, 1
+; CHECK-BE-NEXT: stxvp vsp36, 224(r4)
+; CHECK-BE-NEXT: stxvp vsp34, 192(r4)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0
+; CHECK-BE-NEXT: stxvp vsp36, 160(r4)
+; CHECK-BE-NEXT: stxvp vsp34, 128(r4)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT: stxvp vsp36, 96(r4)
+; CHECK-BE-NEXT: stxvp vsp34, 64(r4)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT: stxvp vsp36, 32(r4)
+; CHECK-BE-NEXT: stxvp vsp34, 0(r4)
+; CHECK-BE-NEXT: blr
+entry:
+ %0 = load <2048 x i1>, ptr %vopp, align 64
+ %2 = tail call <2048 x i1> @llvm.ppc.dmsha3hash(<2048 x i1> %0, i32 0)
+ store <2048 x i1> %2, ptr %resp, align 64
+ ret void
+}
+
+define dso_local void @test_dmcryshash(ptr %vopp, ptr %resp) {
+; CHECK-LABEL: test_dmcryshash:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lxvp vsp34, 0(r3)
+; CHECK-NEXT: lxvp vsp36, 32(r3)
+; CHECK-NEXT: lxvp vsp32, 64(r3)
+; CHECK-NEXT: lxvp vsp38, 96(r3)
+; CHECK-NEXT: lxvp vsp40, 128(r3)
+; CHECK-NEXT: lxvp vsp42, 160(r3)
+; CHECK-NEXT: lxvp vsp44, 192(r3)
+; CHECK-NEXT: lxvp vsp46, 224(r3)
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1
+; CHECK-NEXT: dmxxinstdmr512 wacc1, vsp38, vsp32, 0
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp42, vsp40, 1
+; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp46, vsp44, 0
+; CHECK-NEXT: dmcryshash dmrp0
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT: stxvp vsp34, 224(r4)
+; CHECK-NEXT: stxvp vsp36, 192(r4)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp34, 160(r4)
+; CHECK-NEXT: stxvp vsp36, 128(r4)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0
+; CHECK-NEXT: stxvp vsp34, 96(r4)
+; CHECK-NEXT: stxvp vsp36, 64(r4)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi1, 1
+; CHECK-NEXT: stxvp vsp34, 32(r4)
+; CHECK-NEXT: stxvp vsp36, 0(r4)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: test_dmcryshash:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lxvp vsp34, 224(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 192(r3)
+; CHECK-BE-NEXT: lxvp vsp32, 160(r3)
+; CHECK-BE-NEXT: lxvp vsp38, 128(r3)
+; CHECK-BE-NEXT: lxvp vsp40, 96(r3)
+; CHECK-BE-NEXT: lxvp vsp42, 64(r3)
+; CHECK-BE-NEXT: lxvp vsp44, 32(r3)
+; CHECK-BE-NEXT: lxvp vsp46, 0(r3)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc1, vsp38, vsp32, 0
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp42, vsp40, 1
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp46, vsp44, 0
+; CHECK-BE-NEXT: dmcryshash dmrp0
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi1, 1
+; CHECK-BE-NEXT: stxvp vsp36, 224(r4)
+; CHECK-BE-NEXT: stxvp vsp34, 192(r4)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0
+; CHECK-BE-NEXT: stxvp vsp36, 160(r4)
+; CHECK-BE-NEXT: stxvp vsp34, 128(r4)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT: stxvp vsp36, 96(r4)
+; CHECK-BE-NEXT: stxvp vsp34, 64(r4)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT: stxvp vsp36, 32(r4)
+; CHECK-BE-NEXT: stxvp vsp34, 0(r4)
+; CHECK-BE-NEXT: blr
+entry:
+ %0 = load <2048 x i1>, ptr %vopp, align 64
+ %2 = tail call <2048 x i1> @llvm.ppc.dmsha3hash(<2048 x i1> %0, i32 12)
+ store <2048 x i1> %2, ptr %resp, align 64
+ ret void
+}
+
+define dso_local void @test_dmxxsha3512pad(ptr %vopp, ptr %vcp, ptr %resp) {
+; CHECK-LABEL: test_dmxxsha3512pad:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lxvp vsp34, 0(r3)
+; CHECK-NEXT: lxvp vsp36, 32(r3)
+; CHECK-NEXT: lxvp vsp32, 64(r3)
+; CHECK-NEXT: lxvp vsp38, 96(r3)
+; CHECK-NEXT: lxv vs0, 0(r4)
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
+; CHECK-NEXT: dmxxsha3512pad dmr0, vs0, 1
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT: stxvp vsp34, 96(r5)
+; CHECK-NEXT: stxvp vsp36, 64(r5)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp34, 32(r5)
+; CHECK-NEXT: stxvp vsp36, 0(r5)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: test_dmxxsha3512pad:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lxvp vsp34, 96(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
+; CHECK-BE-NEXT: lxvp vsp32, 32(r3)
+; CHECK-BE-NEXT: lxvp vsp38, 0(r3)
+; CHECK-BE-NEXT: lxv vs0, 0(r4)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
+; CHECK-BE-NEXT: dmxxsha3512pad dmr0, vs0, 1
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT: stxvp vsp36, 96(r5)
+; CHECK-BE-NEXT: stxvp vsp34, 64(r5)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT: stxvp vsp36, 32(r5)
+; CHECK-BE-NEXT: stxvp vsp34, 0(r5)
+; CHECK-BE-NEXT: blr
+entry:
+ %0 = load <1024 x i1>, ptr %vopp, align 64
+ %1 = load <16 x i8>, ptr %vcp, align 64
+ %2 = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> %0, <16 x i8> %1, i32 0, i32 1, i32 0)
+ store <1024 x i1> %2, ptr %resp, align 64
+ ret void
+}
+
+define dso_local void @test_dmxxsha3384pad(ptr %vopp, ptr %vcp, ptr %resp) {
+; CHECK-LABEL: test_dmxxsha3384pad:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lxvp vsp34, 0(r3)
+; CHECK-NEXT: lxvp vsp36, 32(r3)
+; CHECK-NEXT: lxvp vsp32, 64(r3)
+; CHECK-NEXT: lxvp vsp38, 96(r3)
+; CHECK-NEXT: lxv vs0, 0(r4)
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
+; CHECK-NEXT: dmxxsha3384pad dmr0, vs0, 1
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT: stxvp vsp34, 96(r5)
+; CHECK-NEXT: stxvp vsp36, 64(r5)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp34, 32(r5)
+; CHECK-NEXT: stxvp vsp36, 0(r5)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: test_dmxxsha3384pad:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lxvp vsp34, 96(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
+; CHECK-BE-NEXT: lxvp vsp32, 32(r3)
+; CHECK-BE-NEXT: lxvp vsp38, 0(r3)
+; CHECK-BE-NEXT: lxv vs0, 0(r4)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
+; CHECK-BE-NEXT: dmxxsha3384pad dmr0, vs0, 1
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT: stxvp vsp36, 96(r5)
+; CHECK-BE-NEXT: stxvp vsp34, 64(r5)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT: stxvp vsp36, 32(r5)
+; CHECK-BE-NEXT: stxvp vsp34, 0(r5)
+; CHECK-BE-NEXT: blr
+entry:
+ %0 = load <1024 x i1>, ptr %vopp, align 64
+ %1 = load <16 x i8>, ptr %vcp, align 64
+ %2 = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> %0, <16 x i8> %1, i32 0, i32 1, i32 1)
+ store <1024 x i1> %2, ptr %resp, align 64
+ ret void
+}
+
+define dso_local void @test_dmxxsha3256pad(ptr %vopp, ptr %vcp, ptr %resp) {
+; CHECK-LABEL: test_dmxxsha3256pad:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lxvp vsp34, 0(r3)
+; CHECK-NEXT: lxvp vsp36, 32(r3)
+; CHECK-NEXT: lxvp vsp32, 64(r3)
+; CHECK-NEXT: lxvp vsp38, 96(r3)
+; CHECK-NEXT: lxv vs0, 0(r4)
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
+; CHECK-NEXT: dmxxsha3256pad dmr0, vs0, 1
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT: stxvp vsp34, 96(r5)
+; CHECK-NEXT: stxvp vsp36, 64(r5)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp34, 32(r5)
+; CHECK-NEXT: stxvp vsp36, 0(r5)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: test_dmxxsha3256pad:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lxvp vsp34, 96(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
+; CHECK-BE-NEXT: lxvp vsp32, 32(r3)
+; CHECK-BE-NEXT: lxvp vsp38, 0(r3)
+; CHECK-BE-NEXT: lxv vs0, 0(r4)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
+; CHECK-BE-NEXT: dmxxsha3256pad dmr0, vs0, 1
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT: stxvp vsp36, 96(r5)
+; CHECK-BE-NEXT: stxvp vsp34, 64(r5)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT: stxvp vsp36, 32(r5)
+; CHECK-BE-NEXT: stxvp vsp34, 0(r5)
+; CHECK-BE-NEXT: blr
+entry:
+ %0 = load <1024 x i1>, ptr %vopp, align 64
+ %1 = load <16 x i8>, ptr %vcp, align 64
+ %2 = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> %0, <16 x i8> %1, i32 0, i32 1, i32 2)
+ store <1024 x i1> %2, ptr %resp, align 64
+ ret void
+}
+
+define dso_local void @test_dmxxsha3224pad(ptr %vopp, ptr %vcp, ptr %resp) {
+; CHECK-LABEL: test_dmxxsha3224pad:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lxvp vsp34, 0(r3)
+; CHECK-NEXT: lxvp vsp36, 32(r3)
+; CHECK-NEXT: lxvp vsp32, 64(r3)
+; CHECK-NEXT: lxvp vsp38, 96(r3)
+; CHECK-NEXT: lxv vs0, 0(r4)
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
+; CHECK-NEXT: dmxxsha3224pad dmr0, vs0, 1
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT: stxvp vsp34, 96(r5)
+; CHECK-NEXT: stxvp vsp36, 64(r5)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp34, 32(r5)
+; CHECK-NEXT: stxvp vsp36, 0(r5)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: test_dmxxsha3224pad:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lxvp vsp34, 96(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
+; CHECK-BE-NEXT: lxvp vsp32, 32(r3)
+; CHECK-BE-NEXT: lxvp vsp38, 0(r3)
+; CHECK-BE-NEXT: lxv vs0, 0(r4)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
+; CHECK-BE-NEXT: dmxxsha3224pad dmr0, vs0, 1
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT: stxvp vsp36, 96(r5)
+; CHECK-BE-NEXT: stxvp vsp34, 64(r5)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT: stxvp vsp36, 32(r5)
+; CHECK-BE-NEXT: stxvp vsp34, 0(r5)
+; CHECK-BE-NEXT: blr
+entry:
+ %0 = load <1024 x i1>, ptr %vopp, align 64
+ %1 = load <16 x i8>, ptr %vcp, align 64
+ %2 = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> %0, <16 x i8> %1, i32 0, i32 1, i32 3)
+ store <1024 x i1> %2, ptr %resp, align 64
+ ret void
+}
+
+define dso_local void @test_dmxxshake256pad(ptr %vopp, ptr %vcp, ptr %resp) {
+; CHECK-LABEL: test_dmxxshake256pad:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lxvp vsp34, 0(r3)
+; CHECK-NEXT: lxvp vsp36, 32(r3)
+; CHECK-NEXT: lxvp vsp32, 64(r3)
+; CHECK-NEXT: lxvp vsp38, 96(r3)
+; CHECK-NEXT: lxv vs0, 0(r4)
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
+; CHECK-NEXT: dmxxshake256pad dmr0, vs0, 1
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT: stxvp vsp34, 96(r5)
+; CHECK-NEXT: stxvp vsp36, 64(r5)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp34, 32(r5)
+; CHECK-NEXT: stxvp vsp36, 0(r5)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: test_dmxxshake256pad:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lxvp vsp34, 96(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
+; CHECK-BE-NEXT: lxvp vsp32, 32(r3)
+; CHECK-BE-NEXT: lxvp vsp38, 0(r3)
+; CHECK-BE-NEXT: lxv vs0, 0(r4)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
+; CHECK-BE-NEXT: dmxxshake256pad dmr0, vs0, 1
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT: stxvp vsp36, 96(r5)
+; CHECK-BE-NEXT: stxvp vsp34, 64(r5)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT: stxvp vsp36, 32(r5)
+; CHECK-BE-NEXT: stxvp vsp34, 0(r5)
+; CHECK-BE-NEXT: blr
+entry:
+ %0 = load <1024 x i1>, ptr %vopp, align 64
+ %1 = load <16 x i8>, ptr %vcp, align 64
+ %2 = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> %0, <16 x i8> %1, i32 1, i32 1, i32 0)
+ store <1024 x i1> %2, ptr %resp, align 64
+ ret void
+}
+
+define dso_local void @test_dmxxshake128pad(ptr %vopp, ptr %vcp, ptr %resp) {
+; CHECK-LABEL: test_dmxxshake128pad:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lxvp vsp34, 0(r3)
+; CHECK-NEXT: lxvp vsp36, 32(r3)
+; CHECK-NEXT: lxvp vsp32, 64(r3)
+; CHECK-NEXT: lxvp vsp38, 96(r3)
+; CHECK-NEXT: lxv vs0, 0(r4)
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
+; CHECK-NEXT: dmxxshake128pad dmr0, vs0, 1
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT: stxvp vsp34, 96(r5)
+; CHECK-NEXT: stxvp vsp36, 64(r5)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp34, 32(r5)
+; CHECK-NEXT: stxvp vsp36, 0(r5)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: test_dmxxshake128pad:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lxvp vsp34, 96(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
+; CHECK-BE-NEXT: lxvp vsp32, 32(r3)
+; CHECK-BE-NEXT: lxvp vsp38, 0(r3)
+; CHECK-BE-NEXT: lxv vs0, 0(r4)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
+; CHECK-BE-NEXT: dmxxshake128pad dmr0, vs0, 1
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT: stxvp vsp36, 96(r5)
+; CHECK-BE-NEXT: stxvp vsp34, 64(r5)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT: stxvp vsp36, 32(r5)
+; CHECK-BE-NEXT: stxvp vsp34, 0(r5)
+; CHECK-BE-NEXT: blr
+entry:
+ %0 = load <1024 x i1>, ptr %vopp, align 64
+ %1 = load <16 x i8>, ptr %vcp, align 64
+ %2 = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> %0, <16 x i8> %1, i32 1, i32 1, i32 1)
+ store <1024 x i1> %2, ptr %resp, align 64
+ ret void
+}
+
+define dso_local void @test_dmxxsha384512pad(ptr %vopp, ptr %vcp, ptr %resp) {
+; CHECK-LABEL: test_dmxxsha384512pad:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lxvp vsp34, 0(r3)
+; CHECK-NEXT: lxvp vsp36, 32(r3)
+; CHECK-NEXT: lxvp vsp32, 64(r3)
+; CHECK-NEXT: lxvp vsp38, 96(r3)
+; CHECK-NEXT: lxv vs0, 0(r4)
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
+; CHECK-NEXT: dmxxsha384512pad dmr0, vs0
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT: stxvp vsp34, 96(r5)
+; CHECK-NEXT: stxvp vsp36, 64(r5)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp34, 32(r5)
+; CHECK-NEXT: stxvp vsp36, 0(r5)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: test_dmxxsha384512pad:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lxvp vsp34, 96(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
+; CHECK-BE-NEXT: lxvp vsp32, 32(r3)
+; CHECK-BE-NEXT: lxvp vsp38, 0(r3)
+; CHECK-BE-NEXT: lxv vs0, 0(r4)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
+; CHECK-BE-NEXT: dmxxsha384512pad dmr0, vs0
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT: stxvp vsp36, 96(r5)
+; CHECK-BE-NEXT: stxvp vsp34, 64(r5)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT: stxvp vsp36, 32(r5)
+; CHECK-BE-NEXT: stxvp vsp34, 0(r5)
+; CHECK-BE-NEXT: blr
+entry:
+ %0 = load <1024 x i1>, ptr %vopp, align 64
+ %1 = load <16 x i8>, ptr %vcp, align 64
+ %2 = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> %0, <16 x i8> %1, i32 2, i32 0, i32 0)
+ store <1024 x i1> %2, ptr %resp, align 64
+ ret void
+}
+
+define dso_local void @test_dmxxsha224256pad(ptr %vopp, ptr %vcp, ptr %resp) {
+; CHECK-LABEL: test_dmxxsha224256pad:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lxvp vsp34, 0(r3)
+; CHECK-NEXT: lxvp vsp36, 32(r3)
+; CHECK-NEXT: lxvp vsp32, 64(r3)
+; CHECK-NEXT: lxvp vsp38, 96(r3)
+; CHECK-NEXT: lxv vs0, 0(r4)
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
+; CHECK-NEXT: dmxxsha224256pad dmr0, vs0
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT: stxvp vsp34, 96(r5)
+; CHECK-NEXT: stxvp vsp36, 64(r5)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp34, 32(r5)
+; CHECK-NEXT: stxvp vsp36, 0(r5)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: test_dmxxsha224256pad:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lxvp vsp34, 96(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
+; CHECK-BE-NEXT: lxvp vsp32, 32(r3)
+; CHECK-BE-NEXT: lxvp vsp38, 0(r3)
+; CHECK-BE-NEXT: lxv vs0, 0(r4)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0
+; CHECK-BE-NEXT: dmxxsha224256pad dmr0, vs0
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT: stxvp vsp36, 96(r5)
+; CHECK-BE-NEXT: stxvp vsp34, 64(r5)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT: stxvp vsp36, 32(r5)
+; CHECK-BE-NEXT: stxvp vsp34, 0(r5)
+; CHECK-BE-NEXT: blr
+entry:
+ %0 = load <1024 x i1>, ptr %vopp, align 64
+ %1 = load <16 x i8>, ptr %vcp, align 64
+ %2 = tail call <1024 x i1> @llvm.ppc.dmxxshapad(<1024 x i1> %0, <16 x i8> %1, i32 3, i32 0, i32 0)
store <1024 x i1> %2, ptr %resp, align 64
ret void
}
>From 053e37c47da8aef891f1050e5c89257171a5afb6 Mon Sep 17 00:00:00 2001
From: Maryam Moghadas <maryammo at ca.ibm.com>
Date: Thu, 19 Mar 2026 16:34:29 +0000
Subject: [PATCH 2/2] Address review comments
---
clang/include/clang/Basic/BuiltinsPPC.def | 18 +++++-----
clang/test/Sema/builtins-ppc-crypto.c | 43 +++++++++++++++++++++++
2 files changed, 52 insertions(+), 9 deletions(-)
create mode 100644 clang/test/Sema/builtins-ppc-crypto.c
diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def
index baa7e057a777d..7996a5a87e648 100644
--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -1125,11 +1125,11 @@ UNALIASED_CUSTOM_BUILTIN(mma_disassemble_dmr, "vv*W1024*", false,
UNALIASED_CUSTOM_BUILTIN(mma_build_dmr, "vW1024*VVVVVVVV", false,
"mma,isa-future-instructions")
-UNALIASED_CUSTOM_BUILTIN(dmsha2hash, "vW1024*W1024*Ii", true,
+UNALIASED_CUSTOM_BUILTIN(dmsha2hash, "vW1024*W1024*i1", true,
"mma,isa-future-instructions")
-UNALIASED_CUSTOM_BUILTIN(dmsha3hash, "vW2048*Ii", true,
+UNALIASED_CUSTOM_BUILTIN(dmsha3hash, "vW2048*i31", true,
"mma,isa-future-instructions")
-UNALIASED_CUSTOM_BUILTIN(dmxxshapad, "vW1024*VIiIiIi", true,
+UNALIASED_CUSTOM_BUILTIN(dmxxshapad, "vW1024*Vi3i1i3", true,
"mma,isa-future-instructions")
CUSTOM_BUILTIN(dmsha256hash, dmsha2hash, "vW1024*W1024*", true,
"mma,isa-future-instructions")
@@ -1139,17 +1139,17 @@ CUSTOM_BUILTIN(dmsha3dw, dmsha3hash, "vW2048*", true,
"mma,isa-future-instructions")
CUSTOM_BUILTIN(dmcryshash, dmsha3hash, "vW2048*", true,
"mma,isa-future-instructions")
-CUSTOM_BUILTIN(dmxxsha3512pad, dmxxshapad, "vW1024*VIi", true,
+CUSTOM_BUILTIN(dmxxsha3512pad, dmxxshapad, "vW1024*Vi1", true,
"mma,isa-future-instructions")
-CUSTOM_BUILTIN(dmxxsha3384pad, dmxxshapad, "vW1024*VIi", true,
+CUSTOM_BUILTIN(dmxxsha3384pad, dmxxshapad, "vW1024*Vi1", true,
"mma,isa-future-instructions")
-CUSTOM_BUILTIN(dmxxsha3256pad, dmxxshapad, "vW1024*VIi", true,
+CUSTOM_BUILTIN(dmxxsha3256pad, dmxxshapad, "vW1024*Vi1", true,
"mma,isa-future-instructions")
-CUSTOM_BUILTIN(dmxxsha3224pad, dmxxshapad, "vW1024*VIi", true,
+CUSTOM_BUILTIN(dmxxsha3224pad, dmxxshapad, "vW1024*Vi1", true,
"mma,isa-future-instructions")
-CUSTOM_BUILTIN(dmxxshake256pad, dmxxshapad, "vW1024*VIi", true,
+CUSTOM_BUILTIN(dmxxshake256pad, dmxxshapad, "vW1024*Vi1", true,
"mma,isa-future-instructions")
-CUSTOM_BUILTIN(dmxxshake128pad, dmxxshapad, "vW1024*VIi", true,
+CUSTOM_BUILTIN(dmxxshake128pad, dmxxshapad, "vW1024*Vi1", true,
"mma,isa-future-instructions")
CUSTOM_BUILTIN(dmxxsha384512pad, dmxxshapad, "vW1024*V", true,
"mma,isa-future-instructions")
diff --git a/clang/test/Sema/builtins-ppc-crypto.c b/clang/test/Sema/builtins-ppc-crypto.c
new file mode 100644
index 0000000000000..83543aafcf80e
--- /dev/null
+++ b/clang/test/Sema/builtins-ppc-crypto.c
@@ -0,0 +1,43 @@
+// RUN: %clang_cc1 -triple powerpc64-unknown-unknown -target-cpu future \
+// RUN: -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -target-cpu future \
+// RUN: -fsyntax-only -verify %s
+
+
+void test_crypto(unsigned char *vdmrpp, unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc) {
+ __dmr2048 vdmrpair = *((__dmr2048 *)vdmrpp);
+ __dmr1024 vdmr = *((__dmr1024 *)vdmrp);
+ __vector_pair vp = *((__vector_pair *)vpp);
+ int ia;
+
+ __builtin_dmsha2hash(&vdmr, &vdmr, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
+ __builtin_dmsha2hash(&vdmr, &vdmr, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
+ __builtin_dmsha2hash(&vdmr, &vdmr, ia); // expected-error {{argument to '__builtin_dmsha2hash' must be a constant integer}}
+
+ __builtin_dmsha3hash(&vdmrpair, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}}
+ __builtin_dmsha3hash(&vdmrpair, -2); // expected-error {{argument value -2 is outside the valid range [0, 31]}}
+ __builtin_dmsha3hash(&vdmrpair, ia); // expected-error {{argument to '__builtin_dmsha3hash' must be a constant integer}}
+
+ __builtin_dmxxshapad(&vdmr, vc, 4, 0, 3); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
+ __builtin_dmxxshapad(&vdmr, vc, 3, 2, 3); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
+ __builtin_dmxxshapad(&vdmr, vc, 3, 1, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
+ __builtin_dmxxshapad(&vdmr, vc, ia, 1, -1); // expected-error {{argument to '__builtin_dmxxshapad' must be a constant integer}}
+
+ __builtin_dmxxsha3512pad(&vdmr, vc, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
+ __builtin_dmxxsha3512pad(&vdmr, vc, ia); // expected-error {{argument to '__builtin_dmxxsha3512pad' must be a constant integer}}
+
+ __builtin_dmxxsha3384pad(&vdmr, vc, 3); // expected-error {{argument value 3 is outside the valid range [0, 1]}}
+ __builtin_dmxxsha3384pad(&vdmr, vc, ia); // expected-error {{argument to '__builtin_dmxxsha3384pad' must be a constant integer}}
+
+ __builtin_dmxxsha3256pad(&vdmr, vc, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
+ __builtin_dmxxsha3256pad(&vdmr, vc, ia); // expected-error {{argument to '__builtin_dmxxsha3256pad' must be a constant integer}}
+
+ __builtin_dmxxsha3224pad(&vdmr, vc, 4); // expected-error {{argument value 4 is outside the valid range [0, 1]}}
+ __builtin_dmxxsha3224pad(&vdmr, vc, ia); // expected-error {{argument to '__builtin_dmxxsha3224pad' must be a constant integer}}
+
+ __builtin_dmxxshake256pad(&vdmr, vc, -2); // expected-error {{argument value -2 is outside the valid range [0, 1]}}
+ __builtin_dmxxshake256pad(&vdmr, vc, ia); // expected-error {{argument to '__builtin_dmxxshake256pad' must be a constant integer}}
+
+ __builtin_dmxxshake128pad(&vdmr, vc, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
+ __builtin_dmxxshake128pad(&vdmr, vc, ia); // expected-error {{argument to '__builtin_dmxxshake128pad' must be a constant integer}}
+}
More information about the cfe-commits
mailing list