[llvm] [PowerPC] Add intrinsics and tests for basic Dense Math enablement instructions (PR #129913)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 5 11:33:04 PST 2025
https://github.com/RolandF77 updated https://github.com/llvm/llvm-project/pull/129913
>From cd3606902144d367503db089607664099e4e1918 Mon Sep 17 00:00:00 2001
From: Roland Froese <froese at ca.ibm.com>
Date: Wed, 5 Mar 2025 19:16:12 +0000
Subject: [PATCH 1/2] dense math enablement
---
llvm/include/llvm/IR/IntrinsicsPowerPC.td | 10 ++++++++++
llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td | 9 ++++++---
2 files changed, 16 insertions(+), 3 deletions(-)
diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index 6f49ed39d8a09..af66b8206182e 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -1644,6 +1644,16 @@ let TargetPrefix = "ppc" in {
def int_ppc_mma_xxsetaccz :
DefaultAttrsIntrinsic<[llvm_v512i1_ty], [], [IntrNoMem]>;
+ def int_ppc_mma_dmsetdmrz :
+ DefaultAttrsIntrinsic<[llvm_v1024i1_ty], [], [IntrNoMem]>;
+
+ def int_ppc_mma_dmmr :
+ DefaultAttrsIntrinsic<[llvm_v1024i1_ty], [llvm_v1024i1_ty], [IntrNoMem]>;
+
+ def int_ppc_mma_dmxor :
+ DefaultAttrsIntrinsic<[llvm_v1024i1_ty], [llvm_v1024i1_ty,
+ llvm_v1024i1_ty], [IntrNoMem]>;
+
// MMA Reduced-Precision: Outer Product Intrinsic Definitions.
defm int_ppc_mma_xvi4ger8 :
PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td b/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td
index 4da2969857d55..b7100462cb967 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td
@@ -105,12 +105,15 @@ let Predicates = [IsISAFuture] in {
"dmxxinstfdmr256 $AT, $XBp, $P", []>;
def DMMR : XForm_ATB3<31, 6, 177, (outs dmr:$AT), (ins dmr:$AB),
- "dmmr $AT, $AB", []>;
+ "dmmr $AT, $AB",
+ [(set v1024i1:$AT, (int_ppc_mma_dmmr v1024i1:$AB))]>;
def DMXOR : XForm_ATB3<31, 7, 177, (outs dmr:$AT), (ins dmr:$ATi, dmr:$AB),
- "dmxor $AT, $AB", []>,
+ "dmxor $AT, $AB",
+ [(set v1024i1:$AT, (int_ppc_mma_dmxor v1024i1:$ATi, v1024i1:$AB))]>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
def DMSETDMRZ : XForm_AT3<31, 2, 177, (outs dmr:$AT), (ins),
- "dmsetdmrz $AT", NoItinerary, []>;
+ "dmsetdmrz $AT", NoItinerary,
+ [(set v1024i1:$AT, (int_ppc_mma_dmsetdmrz))]>;
}
>From f46c4bc142d1db070d1265ad25b091d0a5482ce4 Mon Sep 17 00:00:00 2001
From: Roland Froese <froese at ca.ibm.com>
Date: Wed, 5 Mar 2025 19:50:01 +0000
Subject: [PATCH 2/2] test
---
llvm/test/CodeGen/PowerPC/dmr-enable.ll | 134 ++++++++++++++++++++++++
1 file changed, 134 insertions(+)
create mode 100644 llvm/test/CodeGen/PowerPC/dmr-enable.ll
diff --git a/llvm/test/CodeGen/PowerPC/dmr-enable.ll b/llvm/test/CodeGen/PowerPC/dmr-enable.ll
new file mode 100644
index 0000000000000..e4e86148ca044
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/dmr-enable.ll
@@ -0,0 +1,134 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -mcpu=future -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN: -mcpu=future -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
+
+define void @tdmrz(ptr nocapture readonly %vp1, ptr nocapture %resp) {
+; CHECK-LABEL: tdmrz:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: dmsetdmrz dmr0
+; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-NEXT: stxvp vsp34, 96(r4)
+; CHECK-NEXT: stxvp vsp36, 64(r4)
+; CHECK-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1
+; CHECK-NEXT: stxvp vsp34, 32(r4)
+; CHECK-NEXT: stxvp vsp36, 0(r4)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: tdmrz:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: dmsetdmrz dmr0
+; CHECK-BE-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1
+; CHECK-BE-NEXT: stxvp vsp36, 96(r4)
+; CHECK-BE-NEXT: stxvp vsp34, 64(r4)
+; CHECK-BE-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-BE-NEXT: stxvp vsp36, 32(r4)
+; CHECK-BE-NEXT: stxvp vsp34, 0(r4)
+; CHECK-BE-NEXT: blr
+entry:
+ %z = call <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
+ store <1024 x i1> %z, ptr %resp, align 32
+ ret void
+}
+
+define void @tdmmr(ptr nocapture readonly %vp1, ptr nocapture %resp) {
+; CHECK-LABEL: tdmmr:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lxvp vsp34, 0(r3)
+; CHECK-NEXT: lxvp vsp36, 32(r3)
+; CHECK-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-NEXT: lxvp vsp34, 64(r3)
+; CHECK-NEXT: lxvp vsp36, 96(r3)
+; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-NEXT: dmmr dmr0, dmr0
+; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-NEXT: stxvp vsp34, 96(r4)
+; CHECK-NEXT: stxvp vsp36, 64(r4)
+; CHECK-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1
+; CHECK-NEXT: stxvp vsp34, 32(r4)
+; CHECK-NEXT: stxvp vsp36, 0(r4)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: tdmmr:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lxvp vsp34, 96(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
+; CHECK-BE-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-BE-NEXT: lxvp vsp34, 32(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 0(r3)
+; CHECK-BE-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-BE-NEXT: dmmr dmr0, dmr0
+; CHECK-BE-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1
+; CHECK-BE-NEXT: stxvp vsp36, 96(r4)
+; CHECK-BE-NEXT: stxvp vsp34, 64(r4)
+; CHECK-BE-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-BE-NEXT: stxvp vsp36, 32(r4)
+; CHECK-BE-NEXT: stxvp vsp34, 0(r4)
+; CHECK-BE-NEXT: blr
+entry:
+ %l = load <1024 x i1>, ptr %vp1, align 32
+ %c = call <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1> %l)
+ store <1024 x i1> %c, ptr %resp, align 32
+ ret void
+}
+
+define void @tdmxor(ptr nocapture readonly %vp1, ptr %vp2, ptr nocapture %resp) {
+; CHECK-LABEL: tdmxor:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lxvp vsp34, 0(r3)
+; CHECK-NEXT: lxvp vsp36, 32(r3)
+; CHECK-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-NEXT: lxvp vsp34, 64(r3)
+; CHECK-NEXT: lxvp vsp36, 96(r3)
+; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-NEXT: lxvp vsp34, 0(r4)
+; CHECK-NEXT: lxvp vsp36, 32(r4)
+; CHECK-NEXT: dmxxinstfdmr512 wacc_hi1, vsp36, vsp34, 1
+; CHECK-NEXT: lxvp vsp34, 64(r4)
+; CHECK-NEXT: lxvp vsp36, 96(r4)
+; CHECK-NEXT: dmxxinstfdmr512 wacc1, vsp36, vsp34, 0
+; CHECK-NEXT: dmxor dmr0, dmr1
+; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-NEXT: stxvp vsp34, 96(r5)
+; CHECK-NEXT: stxvp vsp36, 64(r5)
+; CHECK-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1
+; CHECK-NEXT: stxvp vsp34, 32(r5)
+; CHECK-NEXT: stxvp vsp36, 0(r5)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: tdmxor:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lxvp vsp34, 96(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
+; CHECK-BE-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-BE-NEXT: lxvp vsp34, 32(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 0(r3)
+; CHECK-BE-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-BE-NEXT: lxvp vsp34, 96(r4)
+; CHECK-BE-NEXT: lxvp vsp36, 64(r4)
+; CHECK-BE-NEXT: dmxxinstfdmr512 wacc_hi1, vsp36, vsp34, 1
+; CHECK-BE-NEXT: lxvp vsp34, 32(r4)
+; CHECK-BE-NEXT: lxvp vsp36, 0(r4)
+; CHECK-BE-NEXT: dmxxinstfdmr512 wacc1, vsp36, vsp34, 0
+; CHECK-BE-NEXT: dmxor dmr0, dmr1
+; CHECK-BE-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1
+; CHECK-BE-NEXT: stxvp vsp36, 96(r5)
+; CHECK-BE-NEXT: stxvp vsp34, 64(r5)
+; CHECK-BE-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-BE-NEXT: stxvp vsp36, 32(r5)
+; CHECK-BE-NEXT: stxvp vsp34, 0(r5)
+; CHECK-BE-NEXT: blr
+entry:
+ %l = load <1024 x i1>, ptr %vp1, align 32
+ %r = load <1024 x i1>, ptr %vp2, align 32
+ %x = call <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1> %l, <1024 x i1> %r)
+ store <1024 x i1> %x, ptr %resp, align 32
+ ret void
+}
+
+declare <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
+declare <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1>)
+declare <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1>, <1024 x i1>)
More information about the llvm-commits
mailing list