[llvm] a903c7b - [PowerPC] Intrinsics and tests for dmr insert/extract (#135653)

via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 24 08:27:26 PDT 2025


Author: RolandF77
Date: 2025-04-24T11:27:22-04:00
New Revision: a903c7b7f5d1cb8d72c170d494d94dc251fc7204

URL: https://github.com/llvm/llvm-project/commit/a903c7b7f5d1cb8d72c170d494d94dc251fc7204
DIFF: https://github.com/llvm/llvm-project/commit/a903c7b7f5d1cb8d72c170d494d94dc251fc7204.diff

LOG: [PowerPC] Intrinsics and tests for dmr insert/extract (#135653)

Add some intrinsics and LIT tests for PPC dmr insert/extract
instructions.

Added: 
    

Modified: 
    llvm/include/llvm/IR/IntrinsicsPowerPC.td
    llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
    llvm/test/CodeGen/PowerPC/dmr-enable.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index b57102ef68f09..8d91b5fdd04fe 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -1677,6 +1677,22 @@ let TargetPrefix = "ppc" in {
       DefaultAttrsIntrinsic<[llvm_v1024i1_ty], [llvm_v1024i1_ty,
                              llvm_v1024i1_ty], [IntrNoMem]>;
 
+  def int_ppc_mma_dmxxextfdmr512 :
+      DefaultAttrsIntrinsic<[llvm_v256i1_ty, llvm_v256i1_ty], [llvm_v1024i1_ty,
+                             llvm_i32_ty], [IntrNoMem]>;
+
+  def int_ppc_mma_dmxxinstdmr512 :
+      DefaultAttrsIntrinsic<[llvm_v1024i1_ty], [llvm_v1024i1_ty, llvm_v256i1_ty,
+                             llvm_v256i1_ty, llvm_i32_ty], [IntrNoMem]>;
+
+  def int_ppc_mma_dmxxextfdmr256 :
+      DefaultAttrsIntrinsic<[llvm_v256i1_ty], [llvm_v1024i1_ty, llvm_i32_ty],
+                            [IntrNoMem]>;
+
+  def int_ppc_mma_dmxxinstdmr256 :
+      DefaultAttrsIntrinsic<[llvm_v1024i1_ty], [llvm_v1024i1_ty, llvm_v256i1_ty,
+                             llvm_i32_ty], [IntrNoMem]>;
+
   // MMA Reduced-Precision: Outer Product Intrinsic Definitions.
   defm int_ppc_mma_xvi4ger8 :
         PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>;

diff  --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index d6744014949ce..a088096c92a68 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -294,6 +294,10 @@ static inline bool isVFRegister(unsigned Reg) {
 static inline bool isVRRegister(unsigned Reg) {
   return Reg >= PPC::V0 && Reg <= PPC::V31;
 }
+
+static inline bool isDMRROWpRegister(unsigned Reg) {
+  return Reg >= PPC::DMRROWp0 && Reg <= PPC::DMRROWp31;
+}
 } // namespace PPC
 } // namespace llvm
 

diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 1f75425752a78..0800ed5dfce2c 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -11146,6 +11146,116 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     return DAG.getMergeValues(RetOps, dl);
   }
 
+  case Intrinsic::ppc_mma_dmxxextfdmr512: {
+    assert(Subtarget.isISAFuture() && "dmxxextfdmr512 requires ISA Future");
+    auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+    assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
+           "Specify P of 0 or 1 for lower or upper 512 bytes");
+    unsigned HiLo = Idx->getSExtValue();
+    unsigned Opcode;
+    unsigned Subx;
+    if (HiLo == 0) {
+      Opcode = PPC::DMXXEXTFDMR512;
+      Subx = PPC::sub_wacc_lo;
+    } else {
+      Opcode = PPC::DMXXEXTFDMR512_HI;
+      Subx = PPC::sub_wacc_hi;
+    }
+    SDValue Subreg(
+        DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
+                           Op.getOperand(1),
+                           DAG.getTargetConstant(Subx, dl, MVT::i32)),
+        0);
+    EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
+    return SDValue(DAG.getMachineNode(Opcode, dl, ReturnTypes, Subreg), 0);
+  }
+
+  case Intrinsic::ppc_mma_dmxxextfdmr256: {
+    assert(Subtarget.isISAFuture() && "dmxxextfdmr256 requires ISA Future");
+    auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+    assert(Idx && (Idx->getSExtValue() >= 0 || Idx->getSExtValue() <= 3) &&
+           "Specify a dmr row pair 0-3");
+    unsigned IdxVal = Idx->getSExtValue();
+    unsigned Subx;
+    switch (IdxVal) {
+    case 0:
+      Subx = PPC::sub_dmrrowp0;
+      break;
+    case 1:
+      Subx = PPC::sub_dmrrowp1;
+      break;
+    case 2:
+      Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
+      break;
+    case 3:
+      Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
+      break;
+    }
+    SDValue Subreg(
+        DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v256i1,
+                           Op.getOperand(1),
+                           DAG.getTargetConstant(Subx, dl, MVT::i32)),
+        0);
+    SDValue P = DAG.getTargetConstant(IdxVal, dl, MVT::i32);
+    return SDValue(
+        DAG.getMachineNode(PPC::DMXXEXTFDMR256, dl, MVT::v256i1, {Subreg, P}),
+        0);
+  }
+
+  case Intrinsic::ppc_mma_dmxxinstdmr512: {
+    assert(Subtarget.isISAFuture() && "dmxxinstdmr512 requires ISA Future");
+    auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(4));
+    assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
+           "Specify P of 0 or 1 for lower or upper 512 bytes");
+    unsigned HiLo = Idx->getSExtValue();
+    unsigned Opcode;
+    unsigned Subx;
+    if (HiLo == 0) {
+      Opcode = PPC::DMXXINSTDMR512;
+      Subx = PPC::sub_wacc_lo;
+    } else {
+      Opcode = PPC::DMXXINSTDMR512_HI;
+      Subx = PPC::sub_wacc_hi;
+    }
+    SDValue Ops[] = {Op.getOperand(2), Op.getOperand(3)};
+    SDValue Wacc = SDValue(DAG.getMachineNode(Opcode, dl, MVT::v512i1, Ops), 0);
+    SDValue SubReg = DAG.getTargetConstant(Subx, dl, MVT::i32);
+    return SDValue(DAG.getMachineNode(PPC::INSERT_SUBREG, dl, MVT::v1024i1,
+                                      Op.getOperand(1), Wacc, SubReg),
+                   0);
+  }
+
+  case Intrinsic::ppc_mma_dmxxinstdmr256: {
+    assert(Subtarget.isISAFuture() && "dmxxinstdmr256 requires ISA Future");
+    auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(3));
+    assert(Idx && (Idx->getSExtValue() >= 0 || Idx->getSExtValue() <= 3) &&
+           "Specify a dmr row pair 0-3");
+    unsigned IdxVal = Idx->getSExtValue();
+    unsigned Subx;
+    switch (IdxVal) {
+    case 0:
+      Subx = PPC::sub_dmrrowp0;
+      break;
+    case 1:
+      Subx = PPC::sub_dmrrowp1;
+      break;
+    case 2:
+      Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
+      break;
+    case 3:
+      Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
+      break;
+    }
+    SDValue SubReg = DAG.getTargetConstant(Subx, dl, MVT::i32);
+    SDValue P = DAG.getTargetConstant(IdxVal, dl, MVT::i32);
+    SDValue Ops[] = {Op.getOperand(2), P};
+    SDValue DMRRowp = SDValue(
+        DAG.getMachineNode(PPC::DMXXINSTDMR256, dl, MVT::v256i1, Ops), 0);
+    return SDValue(DAG.getMachineNode(PPC::INSERT_SUBREG, dl, MVT::v1024i1,
+                                      Op.getOperand(1), DMRRowp, SubReg),
+                   0);
+  }
+
   case Intrinsic::ppc_mma_xxmfacc:
   case Intrinsic::ppc_mma_xxmtacc: {
     // Allow pre-isa-future subtargets to lower as normal.

diff  --git a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
index 13cee8dd783bb..0a04b7fb8d169 100644
--- a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -196,6 +196,12 @@ bool llvm::LowerPPCMachineOperandToMCOperand(const MachineOperand &MO,
     assert(MO.getReg() > PPC::NoRegister &&
            MO.getReg() < PPC::NUM_TARGET_REGS &&
            "Invalid register for this target!");
+    // ISA instructions refer to the containing dmr reg.
+    if (PPC::isDMRROWpRegister(MO.getReg())) {
+      OutMO =
+          MCOperand::createReg(PPC::DMR0 + (MO.getReg() - PPC::DMRROWp0) / 4);
+      return true;
+    }
     // Ignore all implicit register operands.
     if (MO.isImplicit())
       return false;

diff  --git a/llvm/test/CodeGen/PowerPC/dmr-enable.ll b/llvm/test/CodeGen/PowerPC/dmr-enable.ll
index a6c99a751e2c5..1e3014405ac4e 100644
--- a/llvm/test/CodeGen/PowerPC/dmr-enable.ll
+++ b/llvm/test/CodeGen/PowerPC/dmr-enable.ll
@@ -129,6 +129,248 @@ entry:
   ret void
 }
 
+define void @text512(ptr %vp1, ptr %rp1, ptr %rp2, ptr %rp3, ptr %rp4)  {
+; CHECK-LABEL: text512:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    dmsetdmrz dmr0
+; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT:    stxv v2, 16(r4)
+; CHECK-NEXT:    stxv v3, 0(r4)
+; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT:    stxv v2, 16(r6)
+; CHECK-NEXT:    stxv v3, 0(r6)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: text512:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    dmsetdmrz dmr0
+; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT:    stxv v3, 16(r4)
+; CHECK-BE-NEXT:    stxv v2, 0(r4)
+; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT:    stxv v3, 16(r6)
+; CHECK-BE-NEXT:    stxv v2, 0(r6)
+; CHECK-BE-NEXT:    blr
+entry:
+  %z = call <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
+  %x = call { <256 x i1>, <256 x i1> } @llvm.ppc.mma.dmxxextfdmr512(<1024 x i1> %z, i32 0)
+  %p = extractvalue { <256 x i1>, <256 x i1 > } %x, 0
+  store <256 x i1> %p, ptr %rp1, align 16
+  %y = call { <256 x i1>, <256 x i1> } @llvm.ppc.mma.dmxxextfdmr512(<1024 x i1> %z, i32 1)
+  %q = extractvalue { <256 x i1>, <256 x i1 > } %y, 0
+  store <256 x i1> %q, ptr %rp3, align 16
+  ret void
+}
+
+define void @text256(ptr %vp1, ptr %rp1, ptr %rp2, ptr %rp3, ptr %rp4)  {
+; CHECK-LABEL: text256:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    dmsetdmrz dmr0
+; CHECK-NEXT:    dmxxextfdmr256 vsp34, dmr0, 0
+; CHECK-NEXT:    stxv v2, 16(r4)
+; CHECK-NEXT:    stxv v3, 0(r4)
+; CHECK-NEXT:    dmxxextfdmr256 vsp34, dmr0, 1
+; CHECK-NEXT:    stxv v2, 16(r5)
+; CHECK-NEXT:    stxv v3, 0(r5)
+; CHECK-NEXT:    dmxxextfdmr256 vsp34, dmr0, 2
+; CHECK-NEXT:    stxv v2, 16(r6)
+; CHECK-NEXT:    stxv v3, 0(r6)
+; CHECK-NEXT:    dmxxextfdmr256 vsp34, dmr0, 3
+; CHECK-NEXT:    stxv v2, 16(r7)
+; CHECK-NEXT:    stxv v3, 0(r7)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: text256:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    dmsetdmrz dmr0
+; CHECK-BE-NEXT:    dmxxextfdmr256 vsp34, dmr0, 0
+; CHECK-BE-NEXT:    stxv v3, 16(r4)
+; CHECK-BE-NEXT:    stxv v2, 0(r4)
+; CHECK-BE-NEXT:    dmxxextfdmr256 vsp34, dmr0, 1
+; CHECK-BE-NEXT:    stxv v3, 16(r5)
+; CHECK-BE-NEXT:    stxv v2, 0(r5)
+; CHECK-BE-NEXT:    dmxxextfdmr256 vsp34, dmr0, 2
+; CHECK-BE-NEXT:    stxv v3, 16(r6)
+; CHECK-BE-NEXT:    stxv v2, 0(r6)
+; CHECK-BE-NEXT:    dmxxextfdmr256 vsp34, dmr0, 3
+; CHECK-BE-NEXT:    stxv v3, 16(r7)
+; CHECK-BE-NEXT:    stxv v2, 0(r7)
+; CHECK-BE-NEXT:    blr
+entry:
+  %z = call <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
+  %x = call <256 x i1> @llvm.ppc.mma.dmxxextfdmr256(<1024 x i1> %z, i32 0)
+  store <256 x i1> %x, ptr %rp1, align 16
+  %q = call <256 x i1> @llvm.ppc.mma.dmxxextfdmr256(<1024 x i1> %z, i32 1)
+  store <256 x i1> %q, ptr %rp2, align 16
+  %w = call <256 x i1> @llvm.ppc.mma.dmxxextfdmr256(<1024 x i1> %z, i32 2)
+  store <256 x i1> %w, ptr %rp3, align 16
+  %y = call <256 x i1> @llvm.ppc.mma.dmxxextfdmr256(<1024 x i1> %z, i32 3)
+  store <256 x i1> %y, ptr %rp4, align 16
+  ret void
+}
+
+define void @tins512(ptr %vp1, ptr %vp2, ptr %vp3, ptr %vp4, ptr %rp1, ptr %rp2)  {
+; CHECK-LABEL: tins512:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lxv v2, 16(r3)
+; CHECK-NEXT:    lxv v3, 0(r3)
+; CHECK-NEXT:    lxv v4, 16(r4)
+; CHECK-NEXT:    lxv v5, 0(r4)
+; CHECK-NEXT:    dmsetdmrz dmr0
+; CHECK-NEXT:    dmxxinstdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT:    stxvp vsp34, 96(r7)
+; CHECK-NEXT:    stxvp vsp36, 64(r7)
+; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT:    stxvp vsp34, 32(r7)
+; CHECK-NEXT:    stxvp vsp36, 0(r7)
+; CHECK-NEXT:    lxv v2, 16(r5)
+; CHECK-NEXT:    lxv v4, 16(r6)
+; CHECK-NEXT:    lxv v3, 0(r5)
+; CHECK-NEXT:    lxv v5, 0(r6)
+; CHECK-NEXT:    dmxxinstdmr512 wacc_hi0, vsp34, vsp36, 1
+; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT:    stxvp vsp34, 96(r8)
+; CHECK-NEXT:    stxvp vsp36, 64(r8)
+; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT:    stxvp vsp34, 32(r8)
+; CHECK-NEXT:    stxvp vsp36, 0(r8)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: tins512:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv v2, 0(r3)
+; CHECK-BE-NEXT:    lxv v3, 16(r3)
+; CHECK-BE-NEXT:    lxv v4, 0(r4)
+; CHECK-BE-NEXT:    lxv v5, 16(r4)
+; CHECK-BE-NEXT:    dmsetdmrz dmr0
+; CHECK-BE-NEXT:    dmxxinstdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT:    stxvp vsp36, 96(r7)
+; CHECK-BE-NEXT:    stxvp vsp34, 64(r7)
+; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT:    stxvp vsp36, 32(r7)
+; CHECK-BE-NEXT:    stxvp vsp34, 0(r7)
+; CHECK-BE-NEXT:    lxv v2, 0(r5)
+; CHECK-BE-NEXT:    lxv v4, 0(r6)
+; CHECK-BE-NEXT:    lxv v3, 16(r5)
+; CHECK-BE-NEXT:    lxv v5, 16(r6)
+; CHECK-BE-NEXT:    dmxxinstdmr512 wacc_hi0, vsp34, vsp36, 1
+; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT:    stxvp vsp36, 96(r8)
+; CHECK-BE-NEXT:    stxvp vsp34, 64(r8)
+; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT:    stxvp vsp36, 32(r8)
+; CHECK-BE-NEXT:    stxvp vsp34, 0(r8)
+; CHECK-BE-NEXT:    blr
+entry:
+  %z = call <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
+  %l1 = load <256 x i1>, ptr %vp1, align 16
+  %r1 = load <256 x i1>, ptr %vp2, align 16
+  %a = call <1024 x i1> @llvm.ppc.mma.dmxxinstdmr512(<1024 x i1> %z, <256 x i1> %l1, <256 x i1> %r1, i32 0)
+  store <1024 x i1> %a, ptr %rp1, align 16
+  %l2 = load <256 x i1>, ptr %vp3, align 16
+  %r2 = load <256 x i1>, ptr %vp4, align 16
+  %b = call <1024 x i1> @llvm.ppc.mma.dmxxinstdmr512(<1024 x i1> %a, <256 x i1> %l2, <256 x i1> %r2, i32 1)
+  store <1024 x i1> %b, ptr %rp2, align 16
+  ret void
+}
+
+define void @tins256(ptr %vp1, ptr %vp2, ptr %vp3, ptr %vp4, ptr %rp1, ptr %rp2, ptr %rp3, ptr %rp4)  {
+; CHECK-LABEL: tins256:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lxv v2, 16(r3)
+; CHECK-NEXT:    lxv v3, 0(r3)
+; CHECK-NEXT:    dmsetdmrz dmr0
+; CHECK-NEXT:    dmxxinstdmr256 dmr0, vsp34, 0
+; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT:    stxvp vsp34, 96(r7)
+; CHECK-NEXT:    stxvp vsp36, 64(r7)
+; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT:    stxvp vsp34, 32(r7)
+; CHECK-NEXT:    stxvp vsp36, 0(r7)
+; CHECK-NEXT:    lxv v2, 16(r4)
+; CHECK-NEXT:    lxv v3, 0(r4)
+; CHECK-NEXT:    dmxxinstdmr256 dmr0, vsp34, 1
+; CHECK-NEXT:    dmxxextfdmr512 vsp36, vsp32, wacc0, 0
+; CHECK-NEXT:    stxvp vsp36, 96(r8)
+; CHECK-NEXT:    stxvp vsp32, 64(r8)
+; CHECK-NEXT:    dmxxextfdmr512 vsp36, vsp32, wacc_hi0, 1
+; CHECK-NEXT:    stxvp vsp36, 32(r8)
+; CHECK-NEXT:    stxvp vsp32, 0(r8)
+; CHECK-NEXT:    dmxxinstdmr256 dmr0, vsp34, 2
+; CHECK-NEXT:    dmxxextfdmr512 vsp36, vsp32, wacc0, 0
+; CHECK-NEXT:    stxvp vsp36, 96(r9)
+; CHECK-NEXT:    stxvp vsp32, 64(r9)
+; CHECK-NEXT:    dmxxextfdmr512 vsp36, vsp32, wacc_hi0, 1
+; CHECK-NEXT:    stxvp vsp36, 32(r9)
+; CHECK-NEXT:    stxvp vsp32, 0(r9)
+; CHECK-NEXT:    dmxxinstdmr256 dmr0, vsp34, 3
+; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT:    stxvp vsp34, 96(r10)
+; CHECK-NEXT:    stxvp vsp36, 64(r10)
+; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT:    stxvp vsp34, 32(r10)
+; CHECK-NEXT:    stxvp vsp36, 0(r10)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: tins256:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv v2, 0(r3)
+; CHECK-BE-NEXT:    lxv v3, 16(r3)
+; CHECK-BE-NEXT:    dmsetdmrz dmr0
+; CHECK-BE-NEXT:    dmxxinstdmr256 dmr0, vsp34, 0
+; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT:    stxvp vsp36, 96(r7)
+; CHECK-BE-NEXT:    stxvp vsp34, 64(r7)
+; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT:    stxvp vsp36, 32(r7)
+; CHECK-BE-NEXT:    stxvp vsp34, 0(r7)
+; CHECK-BE-NEXT:    lxv v2, 0(r4)
+; CHECK-BE-NEXT:    lxv v3, 16(r4)
+; CHECK-BE-NEXT:    dmxxinstdmr256 dmr0, vsp34, 1
+; CHECK-BE-NEXT:    dmxxextfdmr512 vsp36, vsp32, wacc_hi0, 1
+; CHECK-BE-NEXT:    stxvp vsp32, 96(r8)
+; CHECK-BE-NEXT:    stxvp vsp36, 64(r8)
+; CHECK-BE-NEXT:    dmxxextfdmr512 vsp36, vsp32, wacc0, 0
+; CHECK-BE-NEXT:    stxvp vsp32, 32(r8)
+; CHECK-BE-NEXT:    stxvp vsp36, 0(r8)
+; CHECK-BE-NEXT:    dmxxinstdmr256 dmr0, vsp34, 2
+; CHECK-BE-NEXT:    dmxxextfdmr512 vsp36, vsp32, wacc_hi0, 1
+; CHECK-BE-NEXT:    stxvp vsp32, 96(r9)
+; CHECK-BE-NEXT:    stxvp vsp36, 64(r9)
+; CHECK-BE-NEXT:    dmxxextfdmr512 vsp36, vsp32, wacc0, 0
+; CHECK-BE-NEXT:    stxvp vsp32, 32(r9)
+; CHECK-BE-NEXT:    stxvp vsp36, 0(r9)
+; CHECK-BE-NEXT:    dmxxinstdmr256 dmr0, vsp34, 3
+; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT:    stxvp vsp36, 96(r10)
+; CHECK-BE-NEXT:    stxvp vsp34, 64(r10)
+; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT:    stxvp vsp36, 32(r10)
+; CHECK-BE-NEXT:    stxvp vsp34, 0(r10)
+; CHECK-BE-NEXT:    blr
+entry:
+  %z = call <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
+  %l1 = load <256 x i1>, ptr %vp1, align 16
+  %a = call <1024 x i1> @llvm.ppc.mma.dmxxinstdmr256(<1024 x i1> %z, <256 x i1> %l1, i32 0)
+  store <1024 x i1> %a, ptr %rp1, align 16
+  %l2 = load <256 x i1>, ptr %vp2, align 16
+  %b = call <1024 x i1> @llvm.ppc.mma.dmxxinstdmr256(<1024 x i1> %a, <256 x i1> %l2, i32 1)
+  store <1024 x i1> %b, ptr %rp2, align 16
+  %l3 = load <256 x i1>, ptr %vp3, align 16
+  %c = call <1024 x i1> @llvm.ppc.mma.dmxxinstdmr256(<1024 x i1> %b, <256 x i1> %l2, i32 2)
+  store <1024 x i1> %c, ptr %rp3, align 16
+  %l4 = load <256 x i1>, ptr %vp4, align 16
+  %d = call <1024 x i1> @llvm.ppc.mma.dmxxinstdmr256(<1024 x i1> %c, <256 x i1> %l2, i32 3)
+  store <1024 x i1> %d, ptr %rp4, align 16
+  ret void
+}
+
 declare <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
 declare <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1>)
 declare <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1>, <1024 x i1>)
+declare <1024 x i1> @llvm.ppc.mma.dmxxinstdmr512(<1024 x i1>, <256 x i1>, <256 x i1>, i32)
+declare <1024 x i1> @llvm.ppc.mma.dmxxinstdmr256(<1024 x i1>, <256 x i1>, i32)
+declare { <256 x i1>, <256 x i1> } @llvm.ppc.mma.dmxxextfdmr512(<1024 x i1>, i32)
+declare <256 x i1> @llvm.ppc.mma.dmxxextfdmr256(<1024 x i1>, i32)


        


More information about the llvm-commits mailing list