[llvm] [PowerPC] Intrinsics and tests for dmr insert/extract (PR #135653)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 14 11:08:33 PDT 2025
https://github.com/RolandF77 created https://github.com/llvm/llvm-project/pull/135653
Add some intrinsics and LIT tests for PPC dmr insert/extract instructions.
>From 7c4fa80a9e3ced561e9525af069e5e3ef30e6b67 Mon Sep 17 00:00:00 2001
From: Roland Froese <froese at ca.ibm.com>
Date: Fri, 11 Apr 2025 21:06:17 +0000
Subject: [PATCH 1/3] initial implementation
---
llvm/include/llvm/IR/IntrinsicsPowerPC.td | 16 ++++
.../PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp | 4 +
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 96 +++++++++++++++++++
3 files changed, 116 insertions(+)
diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index e4d39134a4a25..a1f1a1707013f 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -1661,6 +1661,22 @@ let TargetPrefix = "ppc" in {
DefaultAttrsIntrinsic<[llvm_v1024i1_ty], [llvm_v1024i1_ty,
llvm_v1024i1_ty], [IntrNoMem]>;
+ def int_ppc_mma_dmxxextfdmr512 :
+ DefaultAttrsIntrinsic<[llvm_v256i1_ty, llvm_v256i1_ty], [llvm_v1024i1_ty,
+ llvm_i32_ty], [IntrNoMem]>;
+
+ def int_ppc_mma_dmxxinstdmr512 :
+ DefaultAttrsIntrinsic<[llvm_v1024i1_ty], [llvm_v1024i1_ty, llvm_v256i1_ty,
+ llvm_v256i1_ty, llvm_i32_ty], [IntrNoMem]>;
+
+ def int_ppc_mma_dmxxextfdmr256 :
+ DefaultAttrsIntrinsic<[llvm_v256i1_ty], [llvm_v1024i1_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+
+ def int_ppc_mma_dmxxinstdmr256 :
+ DefaultAttrsIntrinsic<[llvm_v1024i1_ty], [llvm_v1024i1_ty, llvm_v256i1_ty,
+ llvm_i32_ty], [IntrNoMem]>;
+
// MMA Reduced-Precision: Outer Product Intrinsic Definitions.
defm int_ppc_mma_xvi4ger8 :
PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>;
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 03a034182ae15..76dbecb45d7a6 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -135,6 +135,10 @@ MCRegister PPC::getRegNumForOperand(const MCInstrDesc &Desc, MCRegister Reg,
if (PPC::isVRRegister(Reg))
return PPC::VSX32 + (Reg - PPC::V0);
break;
+ case PPC::DMRROWpRCRegClassID: {
+ // Reference to dmr reg. There are four dmrrow pairs per dmr.
+ return PPC::DMR0 + ((Reg - PPC::DMRROWp0) / 4);
+ }
// Other RegClass doesn't need mapping
default:
break;
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 1f75425752a78..e05854722932a 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -11146,6 +11146,102 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getMergeValues(RetOps, dl);
}
+ case Intrinsic::ppc_mma_dmxxextfdmr512: {
+ auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+ assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
+ "Specify P of 0 or 1 for lower or upper 512 bytes");
+ unsigned HiLo = Idx->getSExtValue();
+ unsigned Opcode;
+ unsigned Subx;
+ if (HiLo == 0) {
+ Opcode = PPC::DMXXEXTFDMR512;
+ Subx = PPC::sub_wacc_lo;
+ } else {
+ Opcode = PPC::DMXXEXTFDMR512_HI;
+ Subx = PPC::sub_wacc_hi;
+ }
+ SDValue Subreg(
+ DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
+ Op.getOperand(1),
+ DAG.getTargetConstant(Subx, dl, MVT::i32)),
+ 0);
+ EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
+ return SDValue(DAG.getMachineNode(Opcode, dl, ReturnTypes, Subreg), 0);
+ }
+
+ case Intrinsic::ppc_mma_dmxxextfdmr256: {
+ auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+ assert(Idx && (Idx->getSExtValue() >= 0 || Idx->getSExtValue() <= 3) &&
+ "Specify a dmr row pair 0-3");
+ unsigned IdxVal = Idx->getSExtValue();
+ unsigned Pairx;
+ switch (IdxVal) {
+ case 0: Pairx = PPC::sub_dmrrowp0; break;
+ case 1: Pairx = PPC::sub_dmrrowp1; break;
+ case 2: Pairx = PPC::sub_wacc_hi_then_sub_dmrrowp0; break;
+ case 3: Pairx = PPC::sub_wacc_hi_then_sub_dmrrowp1; break;
+ }
+ SDValue Pair(
+ DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v256i1,
+ Op.getOperand(1),
+ DAG.getTargetConstant(Pairx, dl, MVT::i32)),
+ 0);
+ SDValue C = DAG.getTargetConstant(IdxVal, dl, MVT::i32);
+ return SDValue(
+ DAG.getMachineNode(PPC::DMXXEXTFDMR256, dl, MVT::v256i1, {Pair, C}), 0);
+ }
+
+ case Intrinsic::ppc_mma_dmxxinstdmr512: {
+ auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(4));
+ assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
+ "Specify P of 0 or 1 for lower or upper 512 bytes");
+ unsigned HiLo = Idx->getSExtValue();
+ unsigned Opcode;
+ unsigned Subx;
+ if (HiLo == 0) {
+ Opcode = PPC::DMXXINSTDMR512;
+ Subx = PPC::sub_wacc_lo;
+ } else {
+ Opcode = PPC::DMXXINSTDMR512_HI;
+ Subx = PPC::sub_wacc_hi;
+ }
+ SDValue Ops[] = { Op.getOperand(2), Op.getOperand(3) };
+ SDValue WideVec =
+ SDValue(DAG.getMachineNode(Opcode, dl, MVT::v512i1, Ops), 0);
+ SDValue SubReg = DAG.getTargetConstant(Subx, dl, MVT::i32);
+ return
+ SDValue(DAG.getMachineNode(PPC::INSERT_SUBREG, dl,
+ MVT::v1024i1, Op.getOperand(1), WideVec,
+ SubReg),
+ 0);
+ }
+
+ case Intrinsic::ppc_mma_dmxxinstdmr256: {
+ auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(3));
+ assert(Idx && (Idx->getSExtValue() >= 0 || Idx->getSExtValue() <= 3) &&
+ "Specify a dmr row pair 0-3");
+ unsigned IdxVal = Idx->getSExtValue();
+ unsigned Pairx;
+ unsigned Subx;
+ switch (IdxVal) {
+ case 0: Subx = PPC::sub_dmrrowp0; break;
+ case 1: Subx = PPC::sub_dmrrowp1; break;
+ case 2: Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0; break;
+ case 3: Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1; break;
+ }
+ SDValue SubReg = DAG.getTargetConstant(Subx, dl, MVT::i32);
+ SDValue C = DAG.getTargetConstant(IdxVal, dl, MVT::i32);
+ SDValue Ops[] = { Op.getOperand(2), C };
+ SDValue WideVec =
+ SDValue(DAG.getMachineNode(PPC::DMXXINSTDMR256, dl, MVT::v512i1, Ops),
+ 0);
+ return
+ SDValue(DAG.getMachineNode(PPC::INSERT_SUBREG, dl,
+ MVT::v1024i1, Op.getOperand(1), WideVec,
+ SubReg),
+ 0);
+ }
+
case Intrinsic::ppc_mma_xxmfacc:
case Intrinsic::ppc_mma_xxmtacc: {
// Allow pre-isa-future subtargets to lower as normal.
>From 5cf2abd257db7e7af6bd3d7c210111c8059d4ec7 Mon Sep 17 00:00:00 2001
From: Roland Froese <froese at ca.ibm.com>
Date: Mon, 14 Apr 2025 16:39:37 +0000
Subject: [PATCH 2/3] test
---
llvm/test/CodeGen/PowerPC/dmr-enable.ll | 242 ++++++++++++++++++++++++
1 file changed, 242 insertions(+)
diff --git a/llvm/test/CodeGen/PowerPC/dmr-enable.ll b/llvm/test/CodeGen/PowerPC/dmr-enable.ll
index a6c99a751e2c5..303ca60fc62d8 100644
--- a/llvm/test/CodeGen/PowerPC/dmr-enable.ll
+++ b/llvm/test/CodeGen/PowerPC/dmr-enable.ll
@@ -129,6 +129,248 @@ entry:
ret void
}
+define void @text512(ptr %vp1, ptr %rp1, ptr %rp2, ptr %rp3, ptr %rp4) {
+; CHECK-LABEL: text512:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: dmsetdmrz dmr0
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT: stxv v2, 16(r4)
+; CHECK-NEXT: stxv v3, 0(r4)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT: stxv v2, 16(r6)
+; CHECK-NEXT: stxv v3, 0(r6)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: text512:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: dmsetdmrz dmr0
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT: stxv v3, 16(r4)
+; CHECK-BE-NEXT: stxv v2, 0(r4)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT: stxv v3, 16(r6)
+; CHECK-BE-NEXT: stxv v2, 0(r6)
+; CHECK-BE-NEXT: blr
+entry:
+ %z = call <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
+ %x = call { <256 x i1>, <256 x i1> } @llvm.ppc.mma.dmxxextfdmr512(<1024 x i1> %z, i32 0)
+ %p = extractvalue { <256 x i1>, <256 x i1 > } %x, 0
+ store <256 x i1> %p, ptr %rp1, align 16
+ %y = call { <256 x i1>, <256 x i1> } @llvm.ppc.mma.dmxxextfdmr512(<1024 x i1> %z, i32 1)
+ %q = extractvalue { <256 x i1>, <256 x i1 > } %y, 0
+ store <256 x i1> %q, ptr %rp3, align 16
+ ret void
+}
+
+define void @text256(ptr %vp1, ptr %rp1, ptr %rp2, ptr %rp3, ptr %rp4) {
+; CHECK-LABEL: text256:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: dmsetdmrz dmr0
+; CHECK-NEXT: dmxxextfdmr256 vsp34, dmrrowp0, 0
+; CHECK-NEXT: stxv v2, 16(r4)
+; CHECK-NEXT: stxv v3, 0(r4)
+; CHECK-NEXT: dmxxextfdmr256 vsp34, dmrrowp1, 1
+; CHECK-NEXT: stxv v2, 16(r5)
+; CHECK-NEXT: stxv v3, 0(r5)
+; CHECK-NEXT: dmxxextfdmr256 vsp34, dmrrowp2, 2
+; CHECK-NEXT: stxv v2, 16(r6)
+; CHECK-NEXT: stxv v3, 0(r6)
+; CHECK-NEXT: dmxxextfdmr256 vsp34, dmrrowp3, 3
+; CHECK-NEXT: stxv v2, 16(r7)
+; CHECK-NEXT: stxv v3, 0(r7)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: text256:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: dmsetdmrz dmr0
+; CHECK-BE-NEXT: dmxxextfdmr256 vsp34, dmrrowp0, 0
+; CHECK-BE-NEXT: stxv v3, 16(r4)
+; CHECK-BE-NEXT: stxv v2, 0(r4)
+; CHECK-BE-NEXT: dmxxextfdmr256 vsp34, dmrrowp1, 1
+; CHECK-BE-NEXT: stxv v3, 16(r5)
+; CHECK-BE-NEXT: stxv v2, 0(r5)
+; CHECK-BE-NEXT: dmxxextfdmr256 vsp34, dmrrowp2, 2
+; CHECK-BE-NEXT: stxv v3, 16(r6)
+; CHECK-BE-NEXT: stxv v2, 0(r6)
+; CHECK-BE-NEXT: dmxxextfdmr256 vsp34, dmrrowp3, 3
+; CHECK-BE-NEXT: stxv v3, 16(r7)
+; CHECK-BE-NEXT: stxv v2, 0(r7)
+; CHECK-BE-NEXT: blr
+entry:
+ %z = call <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
+ %x = call <256 x i1> @llvm.ppc.mma.dmxxextfdmr256(<1024 x i1> %z, i32 0)
+ store <256 x i1> %x, ptr %rp1, align 16
+ %q = call <256 x i1> @llvm.ppc.mma.dmxxextfdmr256(<1024 x i1> %z, i32 1)
+ store <256 x i1> %q, ptr %rp2, align 16
+ %w = call <256 x i1> @llvm.ppc.mma.dmxxextfdmr256(<1024 x i1> %z, i32 2)
+ store <256 x i1> %w, ptr %rp3, align 16
+ %y = call <256 x i1> @llvm.ppc.mma.dmxxextfdmr256(<1024 x i1> %z, i32 3)
+ store <256 x i1> %y, ptr %rp4, align 16
+ ret void
+}
+
+define void @tins512(ptr %vp1, ptr %vp2, ptr %vp3, ptr %vp4, ptr %rp1, ptr %rp2) {
+; CHECK-LABEL: tins512:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lxv v2, 16(r3)
+; CHECK-NEXT: lxv v3, 0(r3)
+; CHECK-NEXT: lxv v4, 16(r4)
+; CHECK-NEXT: lxv v5, 0(r4)
+; CHECK-NEXT: dmsetdmrz dmr0
+; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT: stxvp vsp34, 96(r7)
+; CHECK-NEXT: stxvp vsp36, 64(r7)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp34, 32(r7)
+; CHECK-NEXT: stxvp vsp36, 0(r7)
+; CHECK-NEXT: lxv v2, 16(r5)
+; CHECK-NEXT: lxv v4, 16(r6)
+; CHECK-NEXT: lxv v3, 0(r5)
+; CHECK-NEXT: lxv v5, 0(r6)
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp34, vsp36, 1
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT: stxvp vsp34, 96(r8)
+; CHECK-NEXT: stxvp vsp36, 64(r8)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp34, 32(r8)
+; CHECK-NEXT: stxvp vsp36, 0(r8)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: tins512:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lxv v2, 0(r3)
+; CHECK-BE-NEXT: lxv v3, 16(r3)
+; CHECK-BE-NEXT: lxv v4, 0(r4)
+; CHECK-BE-NEXT: lxv v5, 16(r4)
+; CHECK-BE-NEXT: dmsetdmrz dmr0
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT: stxvp vsp36, 96(r7)
+; CHECK-BE-NEXT: stxvp vsp34, 64(r7)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT: stxvp vsp36, 32(r7)
+; CHECK-BE-NEXT: stxvp vsp34, 0(r7)
+; CHECK-BE-NEXT: lxv v2, 0(r5)
+; CHECK-BE-NEXT: lxv v4, 0(r6)
+; CHECK-BE-NEXT: lxv v3, 16(r5)
+; CHECK-BE-NEXT: lxv v5, 16(r6)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp34, vsp36, 1
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT: stxvp vsp36, 96(r8)
+; CHECK-BE-NEXT: stxvp vsp34, 64(r8)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT: stxvp vsp36, 32(r8)
+; CHECK-BE-NEXT: stxvp vsp34, 0(r8)
+; CHECK-BE-NEXT: blr
+entry:
+ %z = call <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
+ %l1 = load <256 x i1>, ptr %vp1, align 16
+ %r1 = load <256 x i1>, ptr %vp2, align 16
+ %a = call <1024 x i1> @llvm.ppc.mma.dmxxinstdmr512(<1024 x i1> %z, <256 x i1> %l1, <256 x i1> %r1, i32 0)
+ store <1024 x i1> %a, ptr %rp1, align 16
+ %l2 = load <256 x i1>, ptr %vp3, align 16
+ %r2 = load <256 x i1>, ptr %vp4, align 16
+ %b = call <1024 x i1> @llvm.ppc.mma.dmxxinstdmr512(<1024 x i1> %a, <256 x i1> %l2, <256 x i1> %r2, i32 1)
+ store <1024 x i1> %b, ptr %rp2, align 16
+ ret void
+}
+
+define void @tins256(ptr %vp1, ptr %vp2, ptr %vp3, ptr %vp4, ptr %rp1, ptr %rp2, ptr %rp3, ptr %rp4) {
+; CHECK-LABEL: tins256:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lxv v2, 16(r3)
+; CHECK-NEXT: lxv v3, 0(r3)
+; CHECK-NEXT: dmsetdmrz dmr0
+; CHECK-NEXT: dmxxinstdmr256 dmrrowp0, vsp34, 0
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT: stxvp vsp34, 96(r7)
+; CHECK-NEXT: stxvp vsp36, 64(r7)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp34, 32(r7)
+; CHECK-NEXT: stxvp vsp36, 0(r7)
+; CHECK-NEXT: lxv v2, 16(r4)
+; CHECK-NEXT: lxv v3, 0(r4)
+; CHECK-NEXT: dmxxinstdmr256 dmrrowp1, vsp34, 1
+; CHECK-NEXT: dmxxextfdmr512 vsp36, vsp32, wacc0, 0
+; CHECK-NEXT: stxvp vsp36, 96(r8)
+; CHECK-NEXT: stxvp vsp32, 64(r8)
+; CHECK-NEXT: dmxxextfdmr512 vsp36, vsp32, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp36, 32(r8)
+; CHECK-NEXT: stxvp vsp32, 0(r8)
+; CHECK-NEXT: dmxxinstdmr256 dmrrowp2, vsp34, 2
+; CHECK-NEXT: dmxxextfdmr512 vsp36, vsp32, wacc0, 0
+; CHECK-NEXT: stxvp vsp36, 96(r9)
+; CHECK-NEXT: stxvp vsp32, 64(r9)
+; CHECK-NEXT: dmxxextfdmr512 vsp36, vsp32, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp36, 32(r9)
+; CHECK-NEXT: stxvp vsp32, 0(r9)
+; CHECK-NEXT: dmxxinstdmr256 dmrrowp3, vsp34, 3
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT: stxvp vsp34, 96(r10)
+; CHECK-NEXT: stxvp vsp36, 64(r10)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp34, 32(r10)
+; CHECK-NEXT: stxvp vsp36, 0(r10)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: tins256:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lxv v2, 0(r3)
+; CHECK-BE-NEXT: lxv v3, 16(r3)
+; CHECK-BE-NEXT: dmsetdmrz dmr0
+; CHECK-BE-NEXT: dmxxinstdmr256 dmrrowp0, vsp34, 0
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT: stxvp vsp36, 96(r7)
+; CHECK-BE-NEXT: stxvp vsp34, 64(r7)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT: stxvp vsp36, 32(r7)
+; CHECK-BE-NEXT: stxvp vsp34, 0(r7)
+; CHECK-BE-NEXT: lxv v2, 0(r4)
+; CHECK-BE-NEXT: lxv v3, 16(r4)
+; CHECK-BE-NEXT: dmxxinstdmr256 dmrrowp1, vsp34, 1
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp36, vsp32, wacc_hi0, 1
+; CHECK-BE-NEXT: stxvp vsp32, 96(r8)
+; CHECK-BE-NEXT: stxvp vsp36, 64(r8)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp36, vsp32, wacc0, 0
+; CHECK-BE-NEXT: stxvp vsp32, 32(r8)
+; CHECK-BE-NEXT: stxvp vsp36, 0(r8)
+; CHECK-BE-NEXT: dmxxinstdmr256 dmrrowp2, vsp34, 2
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp36, vsp32, wacc_hi0, 1
+; CHECK-BE-NEXT: stxvp vsp32, 96(r9)
+; CHECK-BE-NEXT: stxvp vsp36, 64(r9)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp36, vsp32, wacc0, 0
+; CHECK-BE-NEXT: stxvp vsp32, 32(r9)
+; CHECK-BE-NEXT: stxvp vsp36, 0(r9)
+; CHECK-BE-NEXT: dmxxinstdmr256 dmrrowp3, vsp34, 3
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT: stxvp vsp36, 96(r10)
+; CHECK-BE-NEXT: stxvp vsp34, 64(r10)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT: stxvp vsp36, 32(r10)
+; CHECK-BE-NEXT: stxvp vsp34, 0(r10)
+; CHECK-BE-NEXT: blr
+entry:
+ %z = call <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
+ %l1 = load <256 x i1>, ptr %vp1, align 16
+ %a = call <1024 x i1> @llvm.ppc.mma.dmxxinstdmr256(<1024 x i1> %z, <256 x i1> %l1, i32 0)
+ store <1024 x i1> %a, ptr %rp1, align 16
+ %l2 = load <256 x i1>, ptr %vp2, align 16
+ %b = call <1024 x i1> @llvm.ppc.mma.dmxxinstdmr256(<1024 x i1> %a, <256 x i1> %l2, i32 1)
+ store <1024 x i1> %b, ptr %rp2, align 16
+ %l3 = load <256 x i1>, ptr %vp3, align 16
+ %c = call <1024 x i1> @llvm.ppc.mma.dmxxinstdmr256(<1024 x i1> %b, <256 x i1> %l2, i32 2)
+ store <1024 x i1> %c, ptr %rp3, align 16
+ %l4 = load <256 x i1>, ptr %vp4, align 16
+ %d = call <1024 x i1> @llvm.ppc.mma.dmxxinstdmr256(<1024 x i1> %c, <256 x i1> %l2, i32 3)
+ store <1024 x i1> %d, ptr %rp4, align 16
+ ret void
+}
+
declare <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
declare <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1>)
declare <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1>, <1024 x i1>)
+declare <1024 x i1> @llvm.ppc.mma.dmxxinstdmr512(<1024 x i1>, <256 x i1>, <256 x i1>, i32)
+declare <1024 x i1> @llvm.ppc.mma.dmxxinstdmr256(<1024 x i1>, <256 x i1>, i32)
+declare { <256 x i1>, <256 x i1> } @llvm.ppc.mma.dmxxextfdmr512(<1024 x i1>, i32)
+declare <256 x i1> @llvm.ppc.mma.dmxxextfdmr256(<1024 x i1>, i32)
>From 71ec8c6f1c1c5d6e192a972d3089cb67f86e7f6e Mon Sep 17 00:00:00 2001
From: Roland Froese <froese at ca.ibm.com>
Date: Mon, 14 Apr 2025 17:23:36 +0000
Subject: [PATCH 3/3] cleanup
---
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 78 ++++++++++++---------
1 file changed, 46 insertions(+), 32 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index e05854722932a..0800ed5dfce2c 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -11147,6 +11147,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
}
case Intrinsic::ppc_mma_dmxxextfdmr512: {
+ assert(Subtarget.isISAFuture() && "dmxxextfdmr512 requires ISA Future");
auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
"Specify P of 0 or 1 for lower or upper 512 bytes");
@@ -11170,28 +11171,39 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
}
case Intrinsic::ppc_mma_dmxxextfdmr256: {
+ assert(Subtarget.isISAFuture() && "dmxxextfdmr256 requires ISA Future");
auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
assert(Idx && (Idx->getSExtValue() >= 0 || Idx->getSExtValue() <= 3) &&
"Specify a dmr row pair 0-3");
unsigned IdxVal = Idx->getSExtValue();
- unsigned Pairx;
+ unsigned Subx;
switch (IdxVal) {
- case 0: Pairx = PPC::sub_dmrrowp0; break;
- case 1: Pairx = PPC::sub_dmrrowp1; break;
- case 2: Pairx = PPC::sub_wacc_hi_then_sub_dmrrowp0; break;
- case 3: Pairx = PPC::sub_wacc_hi_then_sub_dmrrowp1; break;
+ case 0:
+ Subx = PPC::sub_dmrrowp0;
+ break;
+ case 1:
+ Subx = PPC::sub_dmrrowp1;
+ break;
+ case 2:
+ Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
+ break;
+ case 3:
+ Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
+ break;
}
- SDValue Pair(
+ SDValue Subreg(
DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v256i1,
Op.getOperand(1),
- DAG.getTargetConstant(Pairx, dl, MVT::i32)),
+ DAG.getTargetConstant(Subx, dl, MVT::i32)),
0);
- SDValue C = DAG.getTargetConstant(IdxVal, dl, MVT::i32);
+ SDValue P = DAG.getTargetConstant(IdxVal, dl, MVT::i32);
return SDValue(
- DAG.getMachineNode(PPC::DMXXEXTFDMR256, dl, MVT::v256i1, {Pair, C}), 0);
+ DAG.getMachineNode(PPC::DMXXEXTFDMR256, dl, MVT::v256i1, {Subreg, P}),
+ 0);
}
case Intrinsic::ppc_mma_dmxxinstdmr512: {
+ assert(Subtarget.isISAFuture() && "dmxxinstdmr512 requires ISA Future");
auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(4));
assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
"Specify P of 0 or 1 for lower or upper 512 bytes");
@@ -11205,41 +11217,43 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Opcode = PPC::DMXXINSTDMR512_HI;
Subx = PPC::sub_wacc_hi;
}
- SDValue Ops[] = { Op.getOperand(2), Op.getOperand(3) };
- SDValue WideVec =
- SDValue(DAG.getMachineNode(Opcode, dl, MVT::v512i1, Ops), 0);
+ SDValue Ops[] = {Op.getOperand(2), Op.getOperand(3)};
+ SDValue Wacc = SDValue(DAG.getMachineNode(Opcode, dl, MVT::v512i1, Ops), 0);
SDValue SubReg = DAG.getTargetConstant(Subx, dl, MVT::i32);
- return
- SDValue(DAG.getMachineNode(PPC::INSERT_SUBREG, dl,
- MVT::v1024i1, Op.getOperand(1), WideVec,
- SubReg),
- 0);
+ return SDValue(DAG.getMachineNode(PPC::INSERT_SUBREG, dl, MVT::v1024i1,
+ Op.getOperand(1), Wacc, SubReg),
+ 0);
}
case Intrinsic::ppc_mma_dmxxinstdmr256: {
+ assert(Subtarget.isISAFuture() && "dmxxinstdmr256 requires ISA Future");
auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(3));
assert(Idx && (Idx->getSExtValue() >= 0 || Idx->getSExtValue() <= 3) &&
"Specify a dmr row pair 0-3");
unsigned IdxVal = Idx->getSExtValue();
- unsigned Pairx;
unsigned Subx;
switch (IdxVal) {
- case 0: Subx = PPC::sub_dmrrowp0; break;
- case 1: Subx = PPC::sub_dmrrowp1; break;
- case 2: Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0; break;
- case 3: Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1; break;
+ case 0:
+ Subx = PPC::sub_dmrrowp0;
+ break;
+ case 1:
+ Subx = PPC::sub_dmrrowp1;
+ break;
+ case 2:
+ Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
+ break;
+ case 3:
+ Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
+ break;
}
SDValue SubReg = DAG.getTargetConstant(Subx, dl, MVT::i32);
- SDValue C = DAG.getTargetConstant(IdxVal, dl, MVT::i32);
- SDValue Ops[] = { Op.getOperand(2), C };
- SDValue WideVec =
- SDValue(DAG.getMachineNode(PPC::DMXXINSTDMR256, dl, MVT::v512i1, Ops),
- 0);
- return
- SDValue(DAG.getMachineNode(PPC::INSERT_SUBREG, dl,
- MVT::v1024i1, Op.getOperand(1), WideVec,
- SubReg),
- 0);
+ SDValue P = DAG.getTargetConstant(IdxVal, dl, MVT::i32);
+ SDValue Ops[] = {Op.getOperand(2), P};
+ SDValue DMRRowp = SDValue(
+ DAG.getMachineNode(PPC::DMXXINSTDMR256, dl, MVT::v256i1, Ops), 0);
+ return SDValue(DAG.getMachineNode(PPC::INSERT_SUBREG, dl, MVT::v1024i1,
+ Op.getOperand(1), DMRRowp, SubReg),
+ 0);
}
case Intrinsic::ppc_mma_xxmfacc:
More information about the llvm-commits
mailing list