[llvm] 4518780 - [PowerPC] Add intrinsics and tests for basic Dense Math enablement instructions (#129913)

via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 12 09:55:33 PDT 2025


Author: RolandF77
Date: 2025-03-12T12:55:29-04:00
New Revision: 4518780c3cffbb34b24c5edc6a73b9f17826156a

URL: https://github.com/llvm/llvm-project/commit/4518780c3cffbb34b24c5edc6a73b9f17826156a
DIFF: https://github.com/llvm/llvm-project/commit/4518780c3cffbb34b24c5edc6a73b9f17826156a.diff

LOG: [PowerPC] Add intrinsics and tests for basic Dense Math enablement instructions (#129913)

Add intrinsics and tests for Dense Math basic enablement instructions
dmsetdmrz, dmmr, dmxor.

Added: 
    llvm/test/CodeGen/PowerPC/dmr-enable.ll

Modified: 
    llvm/include/llvm/IR/IntrinsicsPowerPC.td
    llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index 6f49ed39d8a09..af66b8206182e 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -1644,6 +1644,16 @@ let TargetPrefix = "ppc" in {
   def int_ppc_mma_xxsetaccz :
       DefaultAttrsIntrinsic<[llvm_v512i1_ty], [], [IntrNoMem]>;
 
+  def int_ppc_mma_dmsetdmrz :
+      DefaultAttrsIntrinsic<[llvm_v1024i1_ty], [], [IntrNoMem]>;
+
+  def int_ppc_mma_dmmr :
+      DefaultAttrsIntrinsic<[llvm_v1024i1_ty], [llvm_v1024i1_ty], [IntrNoMem]>;
+
+  def int_ppc_mma_dmxor :
+      DefaultAttrsIntrinsic<[llvm_v1024i1_ty], [llvm_v1024i1_ty,
+                             llvm_v1024i1_ty], [IntrNoMem]>;
+
   // MMA Reduced-Precision: Outer Product Intrinsic Definitions.
   defm int_ppc_mma_xvi4ger8 :
         PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>;

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td b/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td
index 4da2969857d55..b7100462cb967 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td
@@ -105,12 +105,15 @@ let Predicates = [IsISAFuture] in {
                                             "dmxxinstfdmr256 $AT, $XBp, $P", []>;
 
   def DMMR : XForm_ATB3<31, 6, 177, (outs dmr:$AT), (ins dmr:$AB),
-                        "dmmr $AT, $AB", []>;
+                        "dmmr $AT, $AB", 
+                        [(set v1024i1:$AT, (int_ppc_mma_dmmr v1024i1:$AB))]>;
 
   def DMXOR : XForm_ATB3<31, 7, 177, (outs dmr:$AT), (ins dmr:$ATi, dmr:$AB),
-                         "dmxor $AT, $AB", []>,
+                         "dmxor $AT, $AB",
+                         [(set v1024i1:$AT, (int_ppc_mma_dmxor v1024i1:$ATi, v1024i1:$AB))]>,
                          RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
 
   def DMSETDMRZ : XForm_AT3<31, 2, 177, (outs dmr:$AT), (ins),
-                            "dmsetdmrz $AT", NoItinerary, []>;
+                            "dmsetdmrz $AT", NoItinerary,
+                            [(set v1024i1:$AT, (int_ppc_mma_dmsetdmrz))]>;
 }

diff  --git a/llvm/test/CodeGen/PowerPC/dmr-enable.ll b/llvm/test/CodeGen/PowerPC/dmr-enable.ll
new file mode 100644
index 0000000000000..31c61190c0729
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/dmr-enable.ll
@@ -0,0 +1,134 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=future -ppc-asm-full-reg-names \
+; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix \
+; RUN:   -mcpu=future -ppc-asm-full-reg-names \
+; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
+
+define void @tdmrz(ptr nocapture readonly %vp1, ptr nocapture %resp)  {
+; CHECK-LABEL: tdmrz:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    dmsetdmrz dmr0
+; CHECK-NEXT:    dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-NEXT:    stxvp vsp34, 96(r4)
+; CHECK-NEXT:    stxvp vsp36, 64(r4)
+; CHECK-NEXT:    dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1
+; CHECK-NEXT:    stxvp vsp34, 32(r4)
+; CHECK-NEXT:    stxvp vsp36, 0(r4)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: tdmrz:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    dmsetdmrz dmr0
+; CHECK-BE-NEXT:    dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1
+; CHECK-BE-NEXT:    stxvp vsp36, 96(r4)
+; CHECK-BE-NEXT:    stxvp vsp34, 64(r4)
+; CHECK-BE-NEXT:    dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-BE-NEXT:    stxvp vsp36, 32(r4)
+; CHECK-BE-NEXT:    stxvp vsp34, 0(r4)
+; CHECK-BE-NEXT:    blr
+entry:
+  %z = call <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
+  store <1024 x i1> %z, ptr %resp, align 32
+  ret void
+}
+
+define void @tdmmr(ptr nocapture readonly %vp1, ptr nocapture %resp)  {
+; CHECK-LABEL: tdmmr:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lxvp vsp34, 0(r3)
+; CHECK-NEXT:    lxvp vsp36, 32(r3)
+; CHECK-NEXT:    dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-NEXT:    lxvp vsp34, 64(r3)
+; CHECK-NEXT:    lxvp vsp36, 96(r3)
+; CHECK-NEXT:    dmxxinstfdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-NEXT:    dmmr dmr0, dmr0
+; CHECK-NEXT:    dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-NEXT:    stxvp vsp34, 96(r4)
+; CHECK-NEXT:    stxvp vsp36, 64(r4)
+; CHECK-NEXT:    dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1
+; CHECK-NEXT:    stxvp vsp34, 32(r4)
+; CHECK-NEXT:    stxvp vsp36, 0(r4)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: tdmmr:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxvp vsp34, 96(r3)
+; CHECK-BE-NEXT:    lxvp vsp36, 64(r3)
+; CHECK-BE-NEXT:    dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-BE-NEXT:    lxvp vsp34, 32(r3)
+; CHECK-BE-NEXT:    lxvp vsp36, 0(r3)
+; CHECK-BE-NEXT:    dmxxinstfdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-BE-NEXT:    dmmr dmr0, dmr0
+; CHECK-BE-NEXT:    dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1
+; CHECK-BE-NEXT:    stxvp vsp36, 96(r4)
+; CHECK-BE-NEXT:    stxvp vsp34, 64(r4)
+; CHECK-BE-NEXT:    dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-BE-NEXT:    stxvp vsp36, 32(r4)
+; CHECK-BE-NEXT:    stxvp vsp34, 0(r4)
+; CHECK-BE-NEXT:    blr
+entry:
+  %l = load <1024 x i1>, ptr %vp1, align 32
+  %c = call <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1> %l)
+  store <1024 x i1> %c, ptr %resp, align 32
+  ret void
+}
+
+define void @tdmxor(ptr nocapture readonly %vp1, ptr %vp2, ptr nocapture %resp)  {
+; CHECK-LABEL: tdmxor:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lxvp vsp34, 0(r3)
+; CHECK-NEXT:    lxvp vsp36, 32(r3)
+; CHECK-NEXT:    dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-NEXT:    lxvp vsp34, 64(r3)
+; CHECK-NEXT:    lxvp vsp36, 96(r3)
+; CHECK-NEXT:    dmxxinstfdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-NEXT:    lxvp vsp34, 0(r4)
+; CHECK-NEXT:    lxvp vsp36, 32(r4)
+; CHECK-NEXT:    dmxxinstfdmr512 wacc_hi1, vsp36, vsp34, 1
+; CHECK-NEXT:    lxvp vsp34, 64(r4)
+; CHECK-NEXT:    lxvp vsp36, 96(r4)
+; CHECK-NEXT:    dmxxinstfdmr512 wacc1, vsp36, vsp34, 0
+; CHECK-NEXT:    dmxor dmr0, dmr1
+; CHECK-NEXT:    dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-NEXT:    stxvp vsp34, 96(r5)
+; CHECK-NEXT:    stxvp vsp36, 64(r5)
+; CHECK-NEXT:    dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1
+; CHECK-NEXT:    stxvp vsp34, 32(r5)
+; CHECK-NEXT:    stxvp vsp36, 0(r5)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: tdmxor:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxvp vsp34, 96(r3)
+; CHECK-BE-NEXT:    lxvp vsp36, 64(r3)
+; CHECK-BE-NEXT:    dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-BE-NEXT:    lxvp vsp34, 32(r3)
+; CHECK-BE-NEXT:    lxvp vsp36, 0(r3)
+; CHECK-BE-NEXT:    dmxxinstfdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-BE-NEXT:    lxvp vsp34, 96(r4)
+; CHECK-BE-NEXT:    lxvp vsp36, 64(r4)
+; CHECK-BE-NEXT:    dmxxinstfdmr512 wacc_hi1, vsp36, vsp34, 1
+; CHECK-BE-NEXT:    lxvp vsp34, 32(r4)
+; CHECK-BE-NEXT:    lxvp vsp36, 0(r4)
+; CHECK-BE-NEXT:    dmxxinstfdmr512 wacc1, vsp36, vsp34, 0
+; CHECK-BE-NEXT:    dmxor dmr0, dmr1
+; CHECK-BE-NEXT:    dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1
+; CHECK-BE-NEXT:    stxvp vsp36, 96(r5)
+; CHECK-BE-NEXT:    stxvp vsp34, 64(r5)
+; CHECK-BE-NEXT:    dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-BE-NEXT:    stxvp vsp36, 32(r5)
+; CHECK-BE-NEXT:    stxvp vsp34, 0(r5)
+; CHECK-BE-NEXT:    blr
+entry:
+  %l = load <1024 x i1>, ptr %vp1, align 32
+  %r = load <1024 x i1>, ptr %vp2, align 32
+  %x = call <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1> %l, <1024 x i1> %r)
+  store <1024 x i1> %x, ptr %resp, align 32
+  ret void
+}
+
+declare <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
+declare <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1>)
+declare <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1>, <1024 x i1>)


        


More information about the llvm-commits mailing list