[clang] [PowerPC][CLANG] DMF VSX Vector float GER 2x (rank-2 update) (PR #147383)
Lei Huang via cfe-commits
cfe-commits at lists.llvm.org
Thu Jul 17 11:48:43 PDT 2025
https://github.com/lei137 updated https://github.com/llvm/llvm-project/pull/147383
>From 627e01c2fa4fe756c5feff9b069026f43a2afa45 Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Fri, 4 Jul 2025 14:53:15 -0500
Subject: [PATCH 1/4] RFC02658:CLANG: DMF VSX Vector bfloat16 GER 2x (rank-2
update)
---
clang/include/clang/Basic/BuiltinsPPC.def | 20 +++
.../PowerPC/builtins-dmf-vsx-vector-float.c | 161 ++++++++++++++++++
.../CodeGen/PowerPC/ppc-dmf-mma-builtin-err.c | 22 +++
3 files changed, 203 insertions(+)
create mode 100644 clang/test/CodeGen/PowerPC/builtins-dmf-vsx-vector-float.c
diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def
index e7d6741eb695e..12a81c90ec2d0 100644
--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -1098,6 +1098,26 @@ UNALIASED_CUSTOM_BUILTIN(mma_dmmr, "vW1024*W1024*", false,
"mma,isa-future-instructions")
UNALIASED_CUSTOM_BUILTIN(mma_dmxor, "vW1024*W1024*", true,
"mma,isa-future-instructions")
+UNALIASED_CUSTOM_MMA_BUILTIN(mma_xvf16ger2, "vW512*VV",
+ "mma,paired-vector-memops")
+UNALIASED_CUSTOM_MMA_BUILTIN(mma_xvf32ger, "vW512*VV",
+ "mma,paired-vector-memops")
+UNALIASED_CUSTOM_MMA_BUILTIN(mma_xvf64ger, "vW512*W256V",
+ "mma,paired-vector-memops")
+UNALIASED_CUSTOM_MMA_BUILTIN(mma_pmxvf16ger2, "vW512*VVi15i15i3",
+ "mma,paired-vector-memops")
+UNALIASED_CUSTOM_MMA_BUILTIN(mma_pmxvf32ger, "vW512*VVi15i15",
+ "mma,paired-vector-memops")
+UNALIASED_CUSTOM_MMA_BUILTIN(mma_pmxvf64ger, "vW512*W256Vi15i3",
+ "mma,paired-vector-memops")
+UNALIASED_CUSTOM_MMA_BUILTIN(mma_xvbf16ger2, "vW512*VV",
+ "mma,paired-vector-memops")
+UNALIASED_CUSTOM_MMA_BUILTIN(mma_pmxvbf16ger2, "vW512*VVi15i15i3",
+ "mma,paired-vector-memops")
+UNALIASED_CUSTOM_MMA_BUILTIN(mma_dmxvbf16gerx2, "vW1024*W256V",
+ "mma,paired-vector-memops")
+UNALIASED_CUSTOM_MMA_BUILTIN(mma_pmdmxvbf16gerx2, "vW1024*W256Vi255i15i3",
+ "mma,paired-vector-memops")
// MMA builtins with positive/negative multiply/accumulate.
UNALIASED_CUSTOM_MMA_BUILTIN(mma_xvf16ger2, "vW512*VV",
diff --git a/clang/test/CodeGen/PowerPC/builtins-dmf-vsx-vector-float.c b/clang/test/CodeGen/PowerPC/builtins-dmf-vsx-vector-float.c
new file mode 100644
index 0000000000000..953815ecc42b6
--- /dev/null
+++ b/clang/test/CodeGen/PowerPC/builtins-dmf-vsx-vector-float.c
@@ -0,0 +1,161 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// Update then manual applied to commonize the checks for AIX and LoP.
+// RUN: %clang_cc1 -O3 -triple powerpc64le-unknown-unknown -target-cpu future \
+// RUN: -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -O3 -triple powerpc64-ibm-aix -target-cpu future \
+// RUN: -emit-llvm %s -o - | FileCheck %s
+
+// CHECK-LABEL: void @test_dmxvbf16gerx2(
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvbf16gerx2(<256 x i1> [[TMP0]], <16 x i8> [[VC:%.*]])
+// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6:![0-9]+]]
+// CHECK-NEXT: ret void
+//
+void test_dmxvbf16gerx2(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
+ __dmr1024 vdmr = *((__dmr1024 *)vdmrp);
+ __vector_pair vp = *((__vector_pair *)vpp);
+ __builtin_mma_dmxvbf16gerx2(&vdmr, vp, vc);
+ *((__dmr1024 *)resp) = vdmr;
+}
+
+// CHECK-LABEL: void @test_dmxvbf16gerx2nn(
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvbf16gerx2nn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
+// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
+// CHECK-NEXT: ret void
+//
+void test_dmxvbf16gerx2nn(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
+ __dmr1024 vdmr = *((__dmr1024 *)vdmrp);
+ __vector_pair vp = *((__vector_pair *)vpp);
+ __builtin_mma_dmxvbf16gerx2nn(&vdmr, vp, vc);
+ *((__dmr1024 *)resp) = vdmr;
+}
+
+// CHECK-LABEL: void @test_dmxvbf16gerx2np(
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvbf16gerx2np(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
+// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
+// CHECK-NEXT: ret void
+//
+void test_dmxvbf16gerx2np(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
+ __dmr1024 vdmr = *((__dmr1024 *)vdmrp);
+ __vector_pair vp = *((__vector_pair *)vpp);
+ __builtin_mma_dmxvbf16gerx2np(&vdmr, vp, vc);
+ *((__dmr1024 *)resp) = vdmr;
+}
+
+// CHECK-LABEL: void @test_dmxvbf16gerx2pn(
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvbf16gerx2pn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
+// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
+// CHECK-NEXT: ret void
+//
+void test_dmxvbf16gerx2pn(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
+ __dmr1024 vdmr = *((__dmr1024 *)vdmrp);
+ __vector_pair vp = *((__vector_pair *)vpp);
+ __builtin_mma_dmxvbf16gerx2pn(&vdmr, vp, vc);
+ *((__dmr1024 *)resp) = vdmr;
+}
+
+// CHECK-LABEL: void @test_dmxvbf16gerx2pp(
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvbf16gerx2pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
+// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
+// CHECK-NEXT: ret void
+//
+void test_dmxvbf16gerx2pp(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
+ __dmr1024 vdmr = *((__dmr1024 *)vdmrp);
+ __vector_pair vp = *((__vector_pair *)vpp);
+ __builtin_mma_dmxvbf16gerx2pp(&vdmr, vp, vc);
+ *((__dmr1024 *)resp) = vdmr;
+}
+
+// CHECK-LABEL: void @test_pmdmxvbf16gerx2(
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvbf16gerx2(<256 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0)
+// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
+// CHECK-NEXT: ret void
+//
+void test_pmdmxvbf16gerx2(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
+ __dmr1024 vdmr = *((__dmr1024 *)vdmrp);
+ __vector_pair vp = *((__vector_pair *)vpp);
+ __builtin_mma_pmdmxvbf16gerx2(&vdmr, vp, vc, 0, 0, 0);
+ *((__dmr1024 *)resp) = vdmr;
+}
+
+// CHECK-LABEL: void @test_pmdmxvbf16gerx2nn(
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvbf16gerx2nn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0)
+// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
+// CHECK-NEXT: ret void
+//
+void test_pmdmxvbf16gerx2nn(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
+ __dmr1024 vdmr = *((__dmr1024 *)vdmrp);
+ __vector_pair vp = *((__vector_pair *)vpp);
+ __builtin_mma_pmdmxvbf16gerx2nn(&vdmr, vp, vc, 0, 0, 0);
+ *((__dmr1024 *)resp) = vdmr;
+}
+
+// CHECK-LABEL: void @test_pmdmxvbf16gerx2np(
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvbf16gerx2np(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0)
+// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
+// CHECK-NEXT: ret void
+//
+void test_pmdmxvbf16gerx2np(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
+ __dmr1024 vdmr = *((__dmr1024 *)vdmrp);
+ __vector_pair vp = *((__vector_pair *)vpp);
+ __builtin_mma_pmdmxvbf16gerx2np(&vdmr, vp, vc, 0, 0, 0);
+ *((__dmr1024 *)resp) = vdmr;
+}
+
+// CHECK-LABEL: void @test_pmdmxvbf16gerx2pn(
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvbf16gerx2pn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0)
+// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
+// CHECK-NEXT: ret void
+//
+void test_pmdmxvbf16gerx2pn(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
+ __dmr1024 vdmr = *((__dmr1024 *)vdmrp);
+ __vector_pair vp = *((__vector_pair *)vpp);
+ __builtin_mma_pmdmxvbf16gerx2pn(&vdmr, vp, vc, 0, 0, 0);
+ *((__dmr1024 *)resp) = vdmr;
+}
+
+// CHECK-LABEL: void @test_pmdmxvbf16gerx2pp(
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvbf16gerx2pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0)
+// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
+// CHECK-NEXT: ret void
+//
+void test_pmdmxvbf16gerx2pp(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
+ __dmr1024 vdmr = *((__dmr1024 *)vdmrp);
+ __vector_pair vp = *((__vector_pair *)vpp);
+ __builtin_mma_pmdmxvbf16gerx2pp(&vdmr, vp, vc, 0, 0, 0);
+ *((__dmr1024 *)resp) = vdmr;
+}
+
+// CHECK: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// CHECK: [[META3]] = !{!"__vector_pair", [[META4:![0-9]+]], i64 0}
+// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
+// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"}
+// CHECK: [[TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0}
+// CHECK: [[META7]] = !{!"__dmr1024", [[META4]], i64 0}
diff --git a/clang/test/CodeGen/PowerPC/ppc-dmf-mma-builtin-err.c b/clang/test/CodeGen/PowerPC/ppc-dmf-mma-builtin-err.c
index c02274696244a..a74f83f164e13 100644
--- a/clang/test/CodeGen/PowerPC/ppc-dmf-mma-builtin-err.c
+++ b/clang/test/CodeGen/PowerPC/ppc-dmf-mma-builtin-err.c
@@ -15,6 +15,18 @@ void test_mma(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc)
__builtin_mma_dmmr(&vdmr, (__dmr1024*)vpp);
__builtin_mma_dmxor(&vdmr, (__dmr1024*)vpp);
+ // DMF VSX Vector bfloat16 GER 2x builtins.
+ __builtin_mma_dmxvbf16gerx2(&vdmr, vp, vc);
+ __builtin_mma_dmxvbf16gerx2nn(&vdmr, vp, vc);
+ __builtin_mma_dmxvbf16gerx2np(&vdmr, vp, vc);
+ __builtin_mma_dmxvbf16gerx2pn(&vdmr, vp, vc);
+ __builtin_mma_dmxvbf16gerx2pp(&vdmr, vp, vc);
+ __builtin_mma_pmdmxvbf16gerx2(&vdmr, vp, vc, 0, 0, 0);
+ __builtin_mma_pmdmxvbf16gerx2nn(&vdmr, vp, vc, 0, 0, 0);
+ __builtin_mma_pmdmxvbf16gerx2np(&vdmr, vp, vc, 0, 0, 0);
+ __builtin_mma_pmdmxvbf16gerx2pn(&vdmr, vp, vc, 0, 0, 0);
+ __builtin_mma_pmdmxvbf16gerx2pp(&vdmr, vp, vc, 0, 0, 0);
+
// CHECK: error: '__builtin_mma_dmxvi8gerx4' needs target feature mma,paired-vector-memops
// CHECK: error: '__builtin_mma_pmdmxvi8gerx4' needs target feature mma,paired-vector-memops
// CHECK: error: '__builtin_mma_dmxvi8gerx4pp' needs target feature mma,paired-vector-memops
@@ -24,4 +36,14 @@ void test_mma(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc)
// CHECK: error: '__builtin_mma_dmsetdmrz' needs target feature mma,isa-future-instructions
// CHECK: error: '__builtin_mma_dmmr' needs target feature mma,isa-future-instructions
// CHECK: error: '__builtin_mma_dmxor' needs target feature mma,isa-future-instructions
+// CHECK: error: '__builtin_mma_dmxvbf16gerx2' needs target feature mma,paired-vector-memops
+// CHECK: error: '__builtin_mma_dmxvbf16gerx2nn' needs target feature mma,paired-vector-memops
+// CHECK: error: '__builtin_mma_dmxvbf16gerx2np' needs target feature mma,paired-vector-memops
+// CHECK: error: '__builtin_mma_dmxvbf16gerx2pn' needs target feature mma,paired-vector-memops
+// CHECK: error: '__builtin_mma_dmxvbf16gerx2pp' needs target feature mma,paired-vector-memops
+// CHECK: error: '__builtin_mma_pmdmxvbf16gerx2' needs target feature mma,paired-vector-memops
+// CHECK: error: '__builtin_mma_pmdmxvbf16gerx2nn' needs target feature mma,paired-vector-memops
+// CHECK: error: '__builtin_mma_pmdmxvbf16gerx2np' needs target feature mma,paired-vector-memops
+// CHECK: error: '__builtin_mma_pmdmxvbf16gerx2pn' needs target feature mma,paired-vector-memops
+// CHECK: error: '__builtin_mma_pmdmxvbf16gerx2pp' needs target feature mma,paired-vector-memops
}
>From 4c3ef08370a9c79a62be8b144119bfaaf539937e Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Mon, 7 Jul 2025 14:31:16 -0500
Subject: [PATCH 2/4] add dmf vsx vector 16-bit floating point builtin
---
clang/include/clang/Basic/BuiltinsPPC.def | 4 +
.../PowerPC/builtins-dmf-vsx-vector-float.c | 148 ++++++++++++++++++
.../CodeGen/PowerPC/ppc-dmf-mma-builtin-err.c | 44 ++++--
3 files changed, 187 insertions(+), 9 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def
index 12a81c90ec2d0..290730674ed5e 100644
--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -1118,6 +1118,10 @@ UNALIASED_CUSTOM_MMA_BUILTIN(mma_dmxvbf16gerx2, "vW1024*W256V",
"mma,paired-vector-memops")
UNALIASED_CUSTOM_MMA_BUILTIN(mma_pmdmxvbf16gerx2, "vW1024*W256Vi255i15i3",
"mma,paired-vector-memops")
+UNALIASED_CUSTOM_MMA_BUILTIN(mma_dmxvf16gerx2, "vW1024*W256V",
+ "mma,paired-vector-memops")
+UNALIASED_CUSTOM_MMA_BUILTIN(mma_pmdmxvf16gerx2, "vW1024*W256Vi255i15i3",
+ "mma,paired-vector-memops")
// MMA builtins with positive/negative multiply/accumulate.
UNALIASED_CUSTOM_MMA_BUILTIN(mma_xvf16ger2, "vW512*VV",
diff --git a/clang/test/CodeGen/PowerPC/builtins-dmf-vsx-vector-float.c b/clang/test/CodeGen/PowerPC/builtins-dmf-vsx-vector-float.c
index 953815ecc42b6..8fc9a68a5a613 100644
--- a/clang/test/CodeGen/PowerPC/builtins-dmf-vsx-vector-float.c
+++ b/clang/test/CodeGen/PowerPC/builtins-dmf-vsx-vector-float.c
@@ -153,6 +153,154 @@ void test_pmdmxvbf16gerx2pp(unsigned char *vdmrp, unsigned char *vpp, vector uns
*((__dmr1024 *)resp) = vdmr;
}
+// CHECK-LABEL: void @test_dmxvf16gerx2(
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2(<256 x i1> [[TMP0]], <16 x i8> [[VC:%.*]])
+// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6:![0-9]+]]
+// CHECK-NEXT: ret void
+//
+void test_dmxvf16gerx2(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
+ __dmr1024 vdmr = *((__dmr1024 *)vdmrp);
+ __vector_pair vp = *((__vector_pair *)vpp);
+ __builtin_mma_dmxvf16gerx2(&vdmr, vp, vc);
+ *((__dmr1024 *)resp) = vdmr;
+}
+
+// CHECK-LABEL: void @test_dmxvf16gerx2nn(
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2nn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
+// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
+// CHECK-NEXT: ret void
+//
+void test_dmxvf16gerx2nn(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
+ __dmr1024 vdmr = *((__dmr1024 *)vdmrp);
+ __vector_pair vp = *((__vector_pair *)vpp);
+ __builtin_mma_dmxvf16gerx2nn(&vdmr, vp, vc);
+ *((__dmr1024 *)resp) = vdmr;
+}
+
+// CHECK-LABEL: void @test_dmxvf16gerx2np(
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2np(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
+// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
+// CHECK-NEXT: ret void
+//
+void test_dmxvf16gerx2np(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
+ __dmr1024 vdmr = *((__dmr1024 *)vdmrp);
+ __vector_pair vp = *((__vector_pair *)vpp);
+ __builtin_mma_dmxvf16gerx2np(&vdmr, vp, vc);
+ *((__dmr1024 *)resp) = vdmr;
+}
+
+// CHECK-LABEL: void @test_dmxvf16gerx2pn(
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2pn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
+// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
+// CHECK-NEXT: ret void
+//
+void test_dmxvf16gerx2pn(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
+ __dmr1024 vdmr = *((__dmr1024 *)vdmrp);
+ __vector_pair vp = *((__vector_pair *)vpp);
+ __builtin_mma_dmxvf16gerx2pn(&vdmr, vp, vc);
+ *((__dmr1024 *)resp) = vdmr;
+}
+
+// CHECK-LABEL: void @test_dmxvf16gerx2pp(
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
+// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
+// CHECK-NEXT: ret void
+//
+void test_dmxvf16gerx2pp(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
+ __dmr1024 vdmr = *((__dmr1024 *)vdmrp);
+ __vector_pair vp = *((__vector_pair *)vpp);
+ __builtin_mma_dmxvf16gerx2pp(&vdmr, vp, vc);
+ *((__dmr1024 *)resp) = vdmr;
+}
+
+// CHECK-LABEL: void @test_pmdmxvf16gerx2(
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2(<256 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0)
+// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
+// CHECK-NEXT: ret void
+//
+void test_pmdmxvf16gerx2(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
+ __dmr1024 vdmr = *((__dmr1024 *)vdmrp);
+ __vector_pair vp = *((__vector_pair *)vpp);
+ __builtin_mma_pmdmxvf16gerx2(&vdmr, vp, vc, 0, 0, 0);
+ *((__dmr1024 *)resp) = vdmr;
+}
+
+// CHECK-LABEL: void @test_pmdmxvf16gerx2nn(
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2nn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0)
+// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
+// CHECK-NEXT: ret void
+//
+void test_pmdmxvf16gerx2nn(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
+ __dmr1024 vdmr = *((__dmr1024 *)vdmrp);
+ __vector_pair vp = *((__vector_pair *)vpp);
+ __builtin_mma_pmdmxvf16gerx2nn(&vdmr, vp, vc, 0, 0, 0);
+ *((__dmr1024 *)resp) = vdmr;
+}
+
+// CHECK-LABEL: void @test_pmdmxvf16gerx2np(
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2np(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0)
+// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
+// CHECK-NEXT: ret void
+//
+void test_pmdmxvf16gerx2np(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
+ __dmr1024 vdmr = *((__dmr1024 *)vdmrp);
+ __vector_pair vp = *((__vector_pair *)vpp);
+ __builtin_mma_pmdmxvf16gerx2np(&vdmr, vp, vc, 0, 0, 0);
+ *((__dmr1024 *)resp) = vdmr;
+}
+
+// CHECK-LABEL: void @test_pmdmxvf16gerx2pn(
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2pn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0)
+// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
+// CHECK-NEXT: ret void
+//
+void test_pmdmxvf16gerx2pn(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
+ __dmr1024 vdmr = *((__dmr1024 *)vdmrp);
+ __vector_pair vp = *((__vector_pair *)vpp);
+ __builtin_mma_pmdmxvf16gerx2pn(&vdmr, vp, vc, 0, 0, 0);
+ *((__dmr1024 *)resp) = vdmr;
+}
+
+// CHECK-LABEL: void @test_pmdmxvf16gerx2pp(
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0)
+// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
+// CHECK-NEXT: ret void
+//
+void test_pmdmxvf16gerx2pp(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
+ __dmr1024 vdmr = *((__dmr1024 *)vdmrp);
+ __vector_pair vp = *((__vector_pair *)vpp);
+ __builtin_mma_pmdmxvf16gerx2pp(&vdmr, vp, vc, 0, 0, 0);
+ *((__dmr1024 *)resp) = vdmr;
+}
+
// CHECK: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
// CHECK: [[META3]] = !{!"__vector_pair", [[META4:![0-9]+]], i64 0}
// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
diff --git a/clang/test/CodeGen/PowerPC/ppc-dmf-mma-builtin-err.c b/clang/test/CodeGen/PowerPC/ppc-dmf-mma-builtin-err.c
index a74f83f164e13..103b9c4935b50 100644
--- a/clang/test/CodeGen/PowerPC/ppc-dmf-mma-builtin-err.c
+++ b/clang/test/CodeGen/PowerPC/ppc-dmf-mma-builtin-err.c
@@ -15,7 +15,18 @@ void test_mma(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc)
__builtin_mma_dmmr(&vdmr, (__dmr1024*)vpp);
__builtin_mma_dmxor(&vdmr, (__dmr1024*)vpp);
+// CHECK: error: '__builtin_mma_dmxvi8gerx4' needs target feature mma,paired-vector-memops
+// CHECK: error: '__builtin_mma_pmdmxvi8gerx4' needs target feature mma,paired-vector-memops
+// CHECK: error: '__builtin_mma_dmxvi8gerx4pp' needs target feature mma,paired-vector-memops
+// CHECK: error: '__builtin_mma_pmdmxvi8gerx4pp' needs target feature mma,paired-vector-memops
+// CHECK: error: '__builtin_mma_dmxvi8gerx4spp' needs target feature mma,paired-vector-memops
+// CHECK: error: '__builtin_mma_pmdmxvi8gerx4spp' needs target feature mma,paired-vector-memops
+// CHECK: error: '__builtin_mma_dmsetdmrz' needs target feature mma,isa-future-instructions
+// CHECK: error: '__builtin_mma_dmmr' needs target feature mma,isa-future-instructions
+// CHECK: error: '__builtin_mma_dmxor' needs target feature mma,isa-future-instructions
+
// DMF VSX Vector bfloat16 GER 2x builtins.
+
__builtin_mma_dmxvbf16gerx2(&vdmr, vp, vc);
__builtin_mma_dmxvbf16gerx2nn(&vdmr, vp, vc);
__builtin_mma_dmxvbf16gerx2np(&vdmr, vp, vc);
@@ -27,15 +38,6 @@ void test_mma(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc)
__builtin_mma_pmdmxvbf16gerx2pn(&vdmr, vp, vc, 0, 0, 0);
__builtin_mma_pmdmxvbf16gerx2pp(&vdmr, vp, vc, 0, 0, 0);
-// CHECK: error: '__builtin_mma_dmxvi8gerx4' needs target feature mma,paired-vector-memops
-// CHECK: error: '__builtin_mma_pmdmxvi8gerx4' needs target feature mma,paired-vector-memops
-// CHECK: error: '__builtin_mma_dmxvi8gerx4pp' needs target feature mma,paired-vector-memops
-// CHECK: error: '__builtin_mma_pmdmxvi8gerx4pp' needs target feature mma,paired-vector-memops
-// CHECK: error: '__builtin_mma_dmxvi8gerx4spp' needs target feature mma,paired-vector-memops
-// CHECK: error: '__builtin_mma_pmdmxvi8gerx4spp' needs target feature mma,paired-vector-memops
-// CHECK: error: '__builtin_mma_dmsetdmrz' needs target feature mma,isa-future-instructions
-// CHECK: error: '__builtin_mma_dmmr' needs target feature mma,isa-future-instructions
-// CHECK: error: '__builtin_mma_dmxor' needs target feature mma,isa-future-instructions
// CHECK: error: '__builtin_mma_dmxvbf16gerx2' needs target feature mma,paired-vector-memops
// CHECK: error: '__builtin_mma_dmxvbf16gerx2nn' needs target feature mma,paired-vector-memops
// CHECK: error: '__builtin_mma_dmxvbf16gerx2np' needs target feature mma,paired-vector-memops
@@ -46,4 +48,28 @@ void test_mma(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc)
// CHECK: error: '__builtin_mma_pmdmxvbf16gerx2np' needs target feature mma,paired-vector-memops
// CHECK: error: '__builtin_mma_pmdmxvbf16gerx2pn' needs target feature mma,paired-vector-memops
// CHECK: error: '__builtin_mma_pmdmxvbf16gerx2pp' needs target feature mma,paired-vector-memops
+
+ // DMF VSX Vector 16-bitFloating-point GER 2x builtins.
+
+ __builtin_mma_dmxvf16gerx2(&vdmr, vp, vc);
+ __builtin_mma_dmxvf16gerx2nn(&vdmr, vp, vc);
+ __builtin_mma_dmxvf16gerx2np(&vdmr, vp, vc);
+ __builtin_mma_dmxvf16gerx2pn(&vdmr, vp, vc);
+ __builtin_mma_dmxvf16gerx2pp(&vdmr, vp, vc);
+ __builtin_mma_pmdmxvf16gerx2(&vdmr, vp, vc, 0, 0, 0);
+ __builtin_mma_pmdmxvf16gerx2nn(&vdmr, vp, vc, 0, 0, 0);
+ __builtin_mma_pmdmxvf16gerx2np(&vdmr, vp, vc, 0, 0, 0);
+ __builtin_mma_pmdmxvf16gerx2pn(&vdmr, vp, vc, 0, 0, 0);
+ __builtin_mma_pmdmxvf16gerx2pp(&vdmr, vp, vc, 0, 0, 0);
+
+// CHECK: error: '__builtin_mma_dmxvf16gerx2' needs target feature mma,paired-vector-memops
+// CHECK: error: '__builtin_mma_dmxvf16gerx2nn' needs target feature mma,paired-vector-memops
+// CHECK: error: '__builtin_mma_dmxvf16gerx2np' needs target feature mma,paired-vector-memops
+// CHECK: error: '__builtin_mma_dmxvf16gerx2pn' needs target feature mma,paired-vector-memops
+// CHECK: error: '__builtin_mma_dmxvf16gerx2pp' needs target feature mma,paired-vector-memops
+// CHECK: error: '__builtin_mma_pmdmxvf16gerx2' needs target feature mma,paired-vector-memops
+// CHECK: error: '__builtin_mma_pmdmxvf16gerx2nn' needs target feature mma,paired-vector-memops
+// CHECK: error: '__builtin_mma_pmdmxvf16gerx2np' needs target feature mma,paired-vector-memops
+// CHECK: error: '__builtin_mma_pmdmxvf16gerx2pn' needs target feature mma,paired-vector-memops
+// CHECK: error: '__builtin_mma_pmdmxvf16gerx2pp' needs target feature mma,paired-vector-memops
}
>From 3b18dea62c09d15bfd6301c55973b607f4d69574 Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Wed, 9 Jul 2025 14:40:28 -0500
Subject: [PATCH 3/4] fix err in rebase
---
clang/include/clang/Basic/BuiltinsPPC.def | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def
index 290730674ed5e..420455bc61de9 100644
--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -1098,6 +1098,8 @@ UNALIASED_CUSTOM_BUILTIN(mma_dmmr, "vW1024*W1024*", false,
"mma,isa-future-instructions")
UNALIASED_CUSTOM_BUILTIN(mma_dmxor, "vW1024*W1024*", true,
"mma,isa-future-instructions")
+
+// MMA builtins with positive/negative multiply/accumulate.
UNALIASED_CUSTOM_MMA_BUILTIN(mma_xvf16ger2, "vW512*VV",
"mma,paired-vector-memops")
UNALIASED_CUSTOM_MMA_BUILTIN(mma_xvf32ger, "vW512*VV",
@@ -1141,6 +1143,8 @@ UNALIASED_CUSTOM_MMA_BUILTIN(mma_xvbf16ger2, "vW512*VV",
UNALIASED_CUSTOM_MMA_BUILTIN(mma_pmxvbf16ger2, "vW512*VVi15i15i3",
"mma,paired-vector-memops")
+=======
+>>>>>>> 4cfc00bee224 (fix err in rebase)
// FIXME: Obviously incomplete.
#undef BUILTIN
>From 6c3c2d4163a8c3b762214bf2635af31c20c13d0b Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Thu, 17 Jul 2025 18:52:14 +0000
Subject: [PATCH 4/4] mix rebase err
---
clang/include/clang/Basic/BuiltinsPPC.def | 20 --------------------
1 file changed, 20 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def
index 420455bc61de9..9cae8e1f3aa1a 100644
--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -1125,26 +1125,6 @@ UNALIASED_CUSTOM_MMA_BUILTIN(mma_dmxvf16gerx2, "vW1024*W256V",
UNALIASED_CUSTOM_MMA_BUILTIN(mma_pmdmxvf16gerx2, "vW1024*W256Vi255i15i3",
"mma,paired-vector-memops")
-// MMA builtins with positive/negative multiply/accumulate.
-UNALIASED_CUSTOM_MMA_BUILTIN(mma_xvf16ger2, "vW512*VV",
- "mma,paired-vector-memops")
-UNALIASED_CUSTOM_MMA_BUILTIN(mma_xvf32ger, "vW512*VV",
- "mma,paired-vector-memops")
-UNALIASED_CUSTOM_MMA_BUILTIN(mma_xvf64ger, "vW512*W256V",
- "mma,paired-vector-memops")
-UNALIASED_CUSTOM_MMA_BUILTIN(mma_pmxvf16ger2, "vW512*VVi15i15i3",
- "mma,paired-vector-memops")
-UNALIASED_CUSTOM_MMA_BUILTIN(mma_pmxvf32ger, "vW512*VVi15i15",
- "mma,paired-vector-memops")
-UNALIASED_CUSTOM_MMA_BUILTIN(mma_pmxvf64ger, "vW512*W256Vi15i3",
- "mma,paired-vector-memops")
-UNALIASED_CUSTOM_MMA_BUILTIN(mma_xvbf16ger2, "vW512*VV",
- "mma,paired-vector-memops")
-UNALIASED_CUSTOM_MMA_BUILTIN(mma_pmxvbf16ger2, "vW512*VVi15i15i3",
- "mma,paired-vector-memops")
-
-=======
->>>>>>> 4cfc00bee224 (fix err in rebase)
// FIXME: Obviously incomplete.
#undef BUILTIN
More information about the cfe-commits
mailing list