[llvm] [PowerPC] Add dense math half-precision floating-point outer-product accumulate to DMR instructions (PR #133272)
Maryam Moghadas via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 22 09:02:14 PDT 2025
https://github.com/maryammo updated https://github.com/llvm/llvm-project/pull/133272
>From 8b7aeb7585ba560815af721d3d92baae3bb8d5f2 Mon Sep 17 00:00:00 2001
From: Maryam Moghadas <maryammo at ca.ibm.com>
Date: Thu, 27 Mar 2025 16:30:41 +0000
Subject: [PATCH] [PowerPC] Add dense math half-precision floating-point
outer-product accumulate to DMR instructions
This patch adds the following Dense Math Facility 16-bit half-precision
floating-point calculation instructions: dmxvf16gerx2, dmxvf16gerx2pp,
dmxvf16gerx2pn, dmxvf16gerx2np, dmxvf16gerx2nn, pmdmxvf16gerx2,
pmdmxvf16gerx2pp, pmdmxvf16gerx2pn, pmdmxvf16gerx2np, pmdmxvf16gerx2nn,
along with their corresponding intrinsics and tests.
---
llvm/include/llvm/IR/IntrinsicsPowerPC.td | 7 +
llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td | 116 ++++-
.../test/CodeGen/PowerPC/dmf-outer-product.ll | 483 ++++++++++++++++++
.../PowerPC/ppc-encoding-ISAFuture.txt | 30 ++
.../PowerPC/ppc64le-encoding-ISAFuture.txt | 30 ++
llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s | 50 ++
6 files changed, 714 insertions(+), 2 deletions(-)
diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index b57102ef68f09..bcc3fc6f0fc13 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -1755,6 +1755,13 @@ let TargetPrefix = "ppc" in {
defm int_ppc_mma_pmdmxvbf16gerx2 :
PowerPC_MMA_DMR_Intrinsic<[llvm_v256i1_ty, llvm_v16i8_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty]>;
+
+ // MMA+ Half-precision Outer Product Intrinsic Definitions.
+ defm int_ppc_mma_dmxvf16gerx2 :
+ PowerPC_MMA_DMR_Intrinsic<[llvm_v256i1_ty, llvm_v16i8_ty]>;
+ defm int_ppc_mma_pmdmxvf16gerx2 :
+ PowerPC_MMA_DMR_Intrinsic<[llvm_v256i1_ty, llvm_v16i8_ty, llvm_i32_ty,
+ llvm_i32_ty, llvm_i32_ty]>;
}
// XL Compat intrinsics.
diff --git a/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td b/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td
index e9c69f7c16858..def9609f46b05 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td
@@ -207,7 +207,7 @@ multiclass DMR_BF16_UM_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
}
}
-multiclass DMR_UM_M284_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
+multiclass DMR_BF16_UM_M284_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
string asmstr> {
defm NAME : DMR_BF16_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>;
let Predicates = [MMA, PrefixInstrs, IsISAFuture] in {
@@ -228,9 +228,30 @@ multiclass DMR_UM_M284_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
}
}
+multiclass DMR_F16_UM_M284_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
+ string asmstr> {
+ defm NAME : DMR_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>;
+ let Predicates = [MMA, PrefixInstrs, IsISAFuture] in {
+ def PM#NAME :
+ MMIRR_XX3Form_X8Y4P2_XAp5B6<
+ opcode, !or(xo, 0x01), (outs dmr:$AT),
+ !con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK)),
+ !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"@earlyclobber $AT">;
+ def PM#NAME#PP :
+ MMIRR_XX3Form_X8Y4P2_XAp5B6<
+ opcode, xo, (outs dmr:$AT),
+ !con((ins dmr:$ATi), !con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
+ !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ }
+}
+
multiclass DMR_NEG_UM_M284_XOXORf939a0<bits<6> opcode, bits<8> xo, dag IOL,
string asmbase, string asmstr> {
- defm NAME : DMR_UM_M284_XOEO<opcode, xo, IOL, asmbase, asmstr>;
+ defm NAME : DMR_BF16_UM_M284_XOEO<opcode, xo, IOL, asmbase, asmstr>;
let Predicates = [MMA, IsISAFuture] in {
def PN : XX3Form_AT3_XAp5B6<
opcode, !xor(xo, 0xF9), (outs dmr:$AT), !con((ins dmr:$ATi), IOL),
@@ -270,6 +291,48 @@ multiclass DMR_NEG_UM_M284_XOXORf939a0<bits<6> opcode, bits<8> xo, dag IOL,
}
}
+multiclass DMR_NEG_UM_M284_XOXORd11188<bits<6> opcode, bits<8> xo, dag IOL,
+ string asmbase, string asmstr> {
+ defm NAME : DMR_F16_UM_M284_XOEO<opcode, xo, IOL, asmbase, asmstr>;
+ let Predicates = [MMA, IsISAFuture] in {
+ def PN : XX3Form_AT3_XAp5B6<
+ opcode, !xor(xo, 0xD1), (outs dmr:$AT), !con((ins dmr:$ATi), IOL),
+ !strconcat(asmbase#"pn ", asmstr), IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ def NP : XX3Form_AT3_XAp5B6<
+ opcode, !xor(xo, 0x11), (outs dmr:$AT), !con((ins dmr:$ATi), IOL),
+ !strconcat(asmbase#"np ", asmstr), IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ def NN : XX3Form_AT3_XAp5B6<
+ opcode, !xor(xo, 0x88), (outs dmr:$AT), !con((ins dmr:$ATi), IOL),
+ !strconcat(asmbase#"nn ", asmstr), IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ }
+ let Predicates = [MMA, PrefixInstrs, IsISAFuture] in {
+ def PM#NAME#PN :
+ MMIRR_XX3Form_X8Y4P2_XAp5B6<
+ opcode, !xor(xo, 0xD1), (outs dmr:$AT),
+ !con((ins dmr:$ATi), !con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
+ !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK, $PMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ def PM#NAME#NP :
+ MMIRR_XX3Form_X8Y4P2_XAp5B6<
+ opcode, !xor(xo, 0x11), (outs dmr:$AT),
+ !con((ins dmr:$ATi), !con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
+ !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK, $PMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ def PM#NAME#NN :
+ MMIRR_XX3Form_X8Y4P2_XAp5B6<
+ opcode, !xor(xo, 0x88), (outs dmr:$AT),
+ !con((ins dmr:$ATi), !con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
+ !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK, $PMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ }
+}
+
let Predicates = [IsISAFuture] in {
def DMXXEXTFDMR512 : XX3Form_AT3_XABp5_P1<60, 226,
(outs vsrprc:$XAp, vsrprc:$XBp),
@@ -347,6 +410,11 @@ let Predicates = [MMA, PrefixInstrs, IsISAFuture] in {
defm DMXVBF16GERX2 : DMR_NEG_UM_M284_XOXORf939a0<59, 74, (ins vsrprc:$XAp, vsrc:$XB),
"dmxvbf16gerx2", "$AT, $XAp, $XB">;
+// DMXVF16GERX2, DMXVF16GERX2PP, DMXVF16GERX2PN, dMXVF16GERX2NP, DMXVF16GERX2NN
+// PMDMXVF16GERX2, PMDMXVF16GERX2PP, PMDMXVF16GERX2PN, PMDMXVF16GERX2NP, PMDMXVF16GERX2NN
+defm DMXVF16GERX2 : DMR_NEG_UM_M284_XOXORd11188<59, 66, (ins vsrprc:$XAp, vsrc:$XB),
+ "dmxvf16gerx2", "$AT, $XAp, $XB">;
+
// MMA+ Intrinsics
let Predicates = [MMA, IsISAFuture] in {
def : Pat<(v1024i1 (int_ppc_mma_dmxvi8gerx4 v256i1:$XAp, v16i8:$XB)),
@@ -371,6 +439,21 @@ let Predicates = [MMA, IsISAFuture] in {
def : Pat<(v1024i1 (int_ppc_mma_dmxvbf16gerx2nn v1024i1:$ATi, v256i1:$XAp, v16i8:$XB)),
(DMXVBF16GERX2NN $ATi, $XAp, RCCp.BToVSRC)>;
+
+ def : Pat<(v1024i1 (int_ppc_mma_dmxvf16gerx2 v256i1:$XAp, v16i8:$XB)),
+ (DMXVF16GERX2 $XAp, RCCp.BToVSRC)>;
+
+ def : Pat<(v1024i1 (int_ppc_mma_dmxvf16gerx2pp v1024i1:$ATi, v256i1:$XAp, v16i8:$XB)),
+ (DMXVF16GERX2PP $ATi, $XAp, RCCp.BToVSRC)>;
+
+ def : Pat<(v1024i1 (int_ppc_mma_dmxvf16gerx2pn v1024i1:$ATi, v256i1:$XAp, v16i8:$XB)),
+ (DMXVF16GERX2PN $ATi, $XAp, RCCp.BToVSRC)>;
+
+ def : Pat<(v1024i1 (int_ppc_mma_dmxvf16gerx2np v1024i1:$ATi, v256i1:$XAp, v16i8:$XB)),
+ (DMXVF16GERX2NP $ATi, $XAp, RCCp.BToVSRC)>;
+
+ def : Pat<(v1024i1 (int_ppc_mma_dmxvf16gerx2nn v1024i1:$ATi, v256i1:$XAp, v16i8:$XB)),
+ (DMXVF16GERX2NN $ATi, $XAp, RCCp.BToVSRC)>;
}
let Predicates = [MMA, PrefixInstrs, IsISAFuture] in {
@@ -419,4 +502,33 @@ let Predicates = [MMA, PrefixInstrs, IsISAFuture] in {
Msk2Imm:$PMSK)),
(PMDMXVBF16GERX2NN $ATi, $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+
+ def : Pat<(v1024i1 (int_ppc_mma_pmdmxvf16gerx2 v256i1:$XAp, v16i8:$XB, Msk8Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)),
+ (PMDMXVF16GERX2 $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+
+ def : Pat<(v1024i1 (int_ppc_mma_pmdmxvf16gerx2pp v1024i1:$ATi, v256i1:$XAp, v16i8:$XB,
+ Msk8Imm:$XMSK, Msk4Imm:$YMSK,
+ Msk2Imm:$PMSK)),
+ (PMDMXVF16GERX2PP $ATi, $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+
+ def : Pat<(v1024i1 (int_ppc_mma_pmdmxvf16gerx2pn v1024i1:$ATi, v256i1:$XAp, v16i8:$XB,
+ Msk8Imm:$XMSK, Msk4Imm:$YMSK,
+ Msk2Imm:$PMSK)),
+ (PMDMXVF16GERX2PN $ATi, $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+
+ def : Pat<(v1024i1 (int_ppc_mma_pmdmxvf16gerx2np v1024i1:$ATi, v256i1:$XAp, v16i8:$XB,
+ Msk8Imm:$XMSK, Msk4Imm:$YMSK,
+ Msk2Imm:$PMSK)),
+ (PMDMXVF16GERX2NP $ATi, $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+
+ def : Pat<(v1024i1 (int_ppc_mma_pmdmxvf16gerx2nn v1024i1:$ATi, v256i1:$XAp, v16i8:$XB,
+ Msk8Imm:$XMSK, Msk4Imm:$YMSK,
+ Msk2Imm:$PMSK)),
+ (PMDMXVF16GERX2NN $ATi, $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
}
diff --git a/llvm/test/CodeGen/PowerPC/dmf-outer-product.ll b/llvm/test/CodeGen/PowerPC/dmf-outer-product.ll
index c120b0d94f738..4dccbf2d2cc1d 100644
--- a/llvm/test/CodeGen/PowerPC/dmf-outer-product.ll
+++ b/llvm/test/CodeGen/PowerPC/dmf-outer-product.ll
@@ -769,3 +769,486 @@ entry:
store <1024 x i1> %call, ptr %resp, align 64
ret void
}
+
+declare <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2(<256 x i1>, <16 x i8>)
+define void @test_dmxvf16gerx2(ptr %vpp, ptr %vcp, ptr %resp) {
+; CHECK-LABEL: test_dmxvf16gerx2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lxv v2, 16(r3)
+; CHECK-NEXT: lxv vs0, 0(r4)
+; CHECK-NEXT: lxv v3, 0(r3)
+; CHECK-NEXT: dmxvf16gerx2 dmr0, vsp34, vs0
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT: stxvp vsp34, 96(r5)
+; CHECK-NEXT: stxvp vsp36, 64(r5)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp34, 32(r5)
+; CHECK-NEXT: stxvp vsp36, 0(r5)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: test_dmxvf16gerx2:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lxv v2, 0(r3)
+; CHECK-BE-NEXT: lxv vs0, 0(r4)
+; CHECK-BE-NEXT: lxv v3, 16(r3)
+; CHECK-BE-NEXT: dmxvf16gerx2 dmr0, vsp34, vs0
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT: stxvp vsp36, 96(r5)
+; CHECK-BE-NEXT: stxvp vsp34, 64(r5)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT: stxvp vsp36, 32(r5)
+; CHECK-BE-NEXT: stxvp vsp34, 0(r5)
+; CHECK-BE-NEXT: blr
+entry:
+ %v1 = load <256 x i1>, ptr %vpp, align 32
+ %v2 = load <16 x i8>, ptr %vcp, align 32
+ %call = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2(<256 x i1> %v1, <16 x i8> %v2)
+ store <1024 x i1> %call, ptr %resp, align 64
+ ret void
+}
+
+declare <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2pp(<1024 x i1>, <256 x i1>, <16 x i8>)
+
+define void @test_dmxvf16gerx2pp(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
+; CHECK-LABEL: test_dmxvf16gerx2pp:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lxvp vsp34, 0(r3)
+; CHECK-NEXT: lxvp vsp36, 32(r3)
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-NEXT: lxvp vsp34, 64(r3)
+; CHECK-NEXT: lxvp vsp36, 96(r3)
+; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-NEXT: lxv v2, 16(r4)
+; CHECK-NEXT: lxv vs0, 0(r5)
+; CHECK-NEXT: lxv v3, 0(r4)
+; CHECK-NEXT: dmxvf16gerx2pp dmr0, vsp34, vs0
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT: stxvp vsp34, 96(r6)
+; CHECK-NEXT: stxvp vsp36, 64(r6)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp34, 32(r6)
+; CHECK-NEXT: stxvp vsp36, 0(r6)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: test_dmxvf16gerx2pp:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lxvp vsp34, 96(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-BE-NEXT: lxvp vsp34, 32(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 0(r3)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-BE-NEXT: lxv v2, 0(r4)
+; CHECK-BE-NEXT: lxv vs0, 0(r5)
+; CHECK-BE-NEXT: lxv v3, 16(r4)
+; CHECK-BE-NEXT: dmxvf16gerx2pp dmr0, vsp34, vs0
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT: stxvp vsp36, 96(r6)
+; CHECK-BE-NEXT: stxvp vsp34, 64(r6)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT: stxvp vsp36, 32(r6)
+; CHECK-BE-NEXT: stxvp vsp34, 0(r6)
+; CHECK-BE-NEXT: blr
+entry:
+ %v.dmr = load <1024 x i1>, ptr %vop, align 64
+ %v1 = load <256 x i1>, ptr %vpp, align 32
+ %v2 = load <16 x i8>, ptr %vcp, align 32
+ %call = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2pp(<1024 x i1> %v.dmr, <256 x i1> %v1, <16 x i8> %v2)
+ store <1024 x i1> %call, ptr %resp, align 64
+ ret void
+}
+
+declare <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2pn(<1024 x i1>, <256 x i1>, <16 x i8>)
+
+define void @test_dmxvf16gerx2pn(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
+; CHECK-LABEL: test_dmxvf16gerx2pn:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lxvp vsp34, 0(r3)
+; CHECK-NEXT: lxvp vsp36, 32(r3)
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-NEXT: lxvp vsp34, 64(r3)
+; CHECK-NEXT: lxvp vsp36, 96(r3)
+; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-NEXT: lxv v2, 16(r4)
+; CHECK-NEXT: lxv vs0, 0(r5)
+; CHECK-NEXT: lxv v3, 0(r4)
+; CHECK-NEXT: dmxvf16gerx2pn dmr0, vsp34, vs0
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT: stxvp vsp34, 96(r6)
+; CHECK-NEXT: stxvp vsp36, 64(r6)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp34, 32(r6)
+; CHECK-NEXT: stxvp vsp36, 0(r6)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: test_dmxvf16gerx2pn:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lxvp vsp34, 96(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-BE-NEXT: lxvp vsp34, 32(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 0(r3)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-BE-NEXT: lxv v2, 0(r4)
+; CHECK-BE-NEXT: lxv vs0, 0(r5)
+; CHECK-BE-NEXT: lxv v3, 16(r4)
+; CHECK-BE-NEXT: dmxvf16gerx2pn dmr0, vsp34, vs0
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT: stxvp vsp36, 96(r6)
+; CHECK-BE-NEXT: stxvp vsp34, 64(r6)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT: stxvp vsp36, 32(r6)
+; CHECK-BE-NEXT: stxvp vsp34, 0(r6)
+; CHECK-BE-NEXT: blr
+entry:
+ %v.dmr = load <1024 x i1>, ptr %vop, align 64
+ %v1 = load <256 x i1>, ptr %vpp, align 32
+ %v2 = load <16 x i8>, ptr %vcp, align 32
+ %call = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2pn(<1024 x i1> %v.dmr, <256 x i1> %v1, <16 x i8> %v2)
+ store <1024 x i1> %call, ptr %resp, align 64
+ ret void
+}
+
+declare <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2np(<1024 x i1>, <256 x i1>, <16 x i8>)
+
+define void @test_dmxvf16gerx2np(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
+; CHECK-LABEL: test_dmxvf16gerx2np:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lxvp vsp34, 0(r3)
+; CHECK-NEXT: lxvp vsp36, 32(r3)
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-NEXT: lxvp vsp34, 64(r3)
+; CHECK-NEXT: lxvp vsp36, 96(r3)
+; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-NEXT: lxv v2, 16(r4)
+; CHECK-NEXT: lxv vs0, 0(r5)
+; CHECK-NEXT: lxv v3, 0(r4)
+; CHECK-NEXT: dmxvf16gerx2np dmr0, vsp34, vs0
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT: stxvp vsp34, 96(r6)
+; CHECK-NEXT: stxvp vsp36, 64(r6)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp34, 32(r6)
+; CHECK-NEXT: stxvp vsp36, 0(r6)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: test_dmxvf16gerx2np:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lxvp vsp34, 96(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-BE-NEXT: lxvp vsp34, 32(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 0(r3)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-BE-NEXT: lxv v2, 0(r4)
+; CHECK-BE-NEXT: lxv vs0, 0(r5)
+; CHECK-BE-NEXT: lxv v3, 16(r4)
+; CHECK-BE-NEXT: dmxvf16gerx2np dmr0, vsp34, vs0
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT: stxvp vsp36, 96(r6)
+; CHECK-BE-NEXT: stxvp vsp34, 64(r6)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT: stxvp vsp36, 32(r6)
+; CHECK-BE-NEXT: stxvp vsp34, 0(r6)
+; CHECK-BE-NEXT: blr
+entry:
+ %v.dmr = load <1024 x i1>, ptr %vop, align 64
+ %v1 = load <256 x i1>, ptr %vpp, align 32
+ %v2 = load <16 x i8>, ptr %vcp, align 32
+ %call = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2np(<1024 x i1> %v.dmr, <256 x i1> %v1, <16 x i8> %v2)
+ store <1024 x i1> %call, ptr %resp, align 64
+ ret void
+}
+
+declare <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2nn(<1024 x i1>, <256 x i1>, <16 x i8>)
+
+define void @test_dmxvf16gerx2nn(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
+; CHECK-LABEL: test_dmxvf16gerx2nn:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lxvp vsp34, 0(r3)
+; CHECK-NEXT: lxvp vsp36, 32(r3)
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-NEXT: lxvp vsp34, 64(r3)
+; CHECK-NEXT: lxvp vsp36, 96(r3)
+; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-NEXT: lxv v2, 16(r4)
+; CHECK-NEXT: lxv vs0, 0(r5)
+; CHECK-NEXT: lxv v3, 0(r4)
+; CHECK-NEXT: dmxvf16gerx2nn dmr0, vsp34, vs0
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT: stxvp vsp34, 96(r6)
+; CHECK-NEXT: stxvp vsp36, 64(r6)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp34, 32(r6)
+; CHECK-NEXT: stxvp vsp36, 0(r6)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: test_dmxvf16gerx2nn:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lxvp vsp34, 96(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-BE-NEXT: lxvp vsp34, 32(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 0(r3)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-BE-NEXT: lxv v2, 0(r4)
+; CHECK-BE-NEXT: lxv vs0, 0(r5)
+; CHECK-BE-NEXT: lxv v3, 16(r4)
+; CHECK-BE-NEXT: dmxvf16gerx2nn dmr0, vsp34, vs0
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT: stxvp vsp36, 96(r6)
+; CHECK-BE-NEXT: stxvp vsp34, 64(r6)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT: stxvp vsp36, 32(r6)
+; CHECK-BE-NEXT: stxvp vsp34, 0(r6)
+; CHECK-BE-NEXT: blr
+entry:
+ %v.dmr = load <1024 x i1>, ptr %vop, align 64
+ %v1 = load <256 x i1>, ptr %vpp, align 32
+ %v2 = load <16 x i8>, ptr %vcp, align 32
+ %call = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2nn(<1024 x i1> %v.dmr, <256 x i1> %v1, <16 x i8> %v2)
+ store <1024 x i1> %call, ptr %resp, align 64
+ ret void
+}
+
+declare <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2(<256 x i1>, <16 x i8>, i32, i32, i32)
+
+define void @test_pmdmxvf16gerx2(ptr %vpp, ptr %vcp, ptr %resp) {
+; CHECK-LABEL: test_pmdmxvf16gerx2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lxv v2, 16(r3)
+; CHECK-NEXT: lxv vs0, 0(r4)
+; CHECK-NEXT: lxv v3, 0(r3)
+; CHECK-NEXT: pmdmxvf16gerx2 dmr0, vsp34, vs0, 33, 5, 2
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT: stxvp vsp34, 96(r5)
+; CHECK-NEXT: stxvp vsp36, 64(r5)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp34, 32(r5)
+; CHECK-NEXT: stxvp vsp36, 0(r5)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: test_pmdmxvf16gerx2:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lxv v2, 0(r3)
+; CHECK-BE-NEXT: lxv vs0, 0(r4)
+; CHECK-BE-NEXT: lxv v3, 16(r3)
+; CHECK-BE-NEXT: pmdmxvf16gerx2 dmr0, vsp34, vs0, 33, 5, 2
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT: stxvp vsp36, 96(r5)
+; CHECK-BE-NEXT: stxvp vsp34, 64(r5)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT: stxvp vsp36, 32(r5)
+; CHECK-BE-NEXT: stxvp vsp34, 0(r5)
+; CHECK-BE-NEXT: blr
+entry:
+ %v1 = load <256 x i1>, ptr %vpp, align 32
+ %v2 = load <16 x i8>, ptr %vcp, align 32
+ %call = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2(<256 x i1> %v1, <16 x i8> %v2, i32 33, i32 5, i32 2)
+ store <1024 x i1> %call, ptr %resp, align 64
+ ret void
+}
+
+declare <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2pp(<1024 x i1>, <256 x i1>, <16 x i8>, i32, i32, i32)
+
+define void @test_pmdmxvf16gerx2pp(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
+; CHECK-LABEL: test_pmdmxvf16gerx2pp:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lxvp vsp34, 0(r3)
+; CHECK-NEXT: lxvp vsp36, 32(r3)
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-NEXT: lxvp vsp34, 64(r3)
+; CHECK-NEXT: lxvp vsp36, 96(r3)
+; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-NEXT: lxv v2, 16(r4)
+; CHECK-NEXT: lxv vs0, 0(r5)
+; CHECK-NEXT: lxv v3, 0(r4)
+; CHECK-NEXT: pmdmxvf16gerx2pp dmr0, vsp34, vs0, 33, 5, 2
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT: stxvp vsp34, 96(r6)
+; CHECK-NEXT: stxvp vsp36, 64(r6)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp34, 32(r6)
+; CHECK-NEXT: stxvp vsp36, 0(r6)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: test_pmdmxvf16gerx2pp:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lxvp vsp34, 96(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-BE-NEXT: lxvp vsp34, 32(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 0(r3)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-BE-NEXT: lxv v2, 0(r4)
+; CHECK-BE-NEXT: lxv vs0, 0(r5)
+; CHECK-BE-NEXT: lxv v3, 16(r4)
+; CHECK-BE-NEXT: pmdmxvf16gerx2pp dmr0, vsp34, vs0, 33, 5, 2
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT: stxvp vsp36, 96(r6)
+; CHECK-BE-NEXT: stxvp vsp34, 64(r6)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT: stxvp vsp36, 32(r6)
+; CHECK-BE-NEXT: stxvp vsp34, 0(r6)
+; CHECK-BE-NEXT: blr
+entry:
+ %v.dmr = load <1024 x i1>, ptr %vop, align 64
+ %v1 = load <256 x i1>, ptr %vpp, align 32
+ %v2 = load <16 x i8>, ptr %vcp, align 32
+ %call = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2pp(<1024 x i1> %v.dmr, <256 x i1> %v1, <16 x i8> %v2, i32 33, i32 5, i32 2)
+ store <1024 x i1> %call, ptr %resp, align 64
+ ret void
+}
+
+declare <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2pn(<1024 x i1>, <256 x i1>, <16 x i8>, i32, i32, i32)
+
+define void @test_pmdmxvf16gerx2pn(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
+; CHECK-LABEL: test_pmdmxvf16gerx2pn:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lxvp vsp34, 0(r3)
+; CHECK-NEXT: lxvp vsp36, 32(r3)
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-NEXT: lxvp vsp34, 64(r3)
+; CHECK-NEXT: lxvp vsp36, 96(r3)
+; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-NEXT: lxv v2, 16(r4)
+; CHECK-NEXT: lxv vs0, 0(r5)
+; CHECK-NEXT: lxv v3, 0(r4)
+; CHECK-NEXT: pmdmxvf16gerx2pn dmr0, vsp34, vs0, 33, 5, 2
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT: stxvp vsp34, 96(r6)
+; CHECK-NEXT: stxvp vsp36, 64(r6)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp34, 32(r6)
+; CHECK-NEXT: stxvp vsp36, 0(r6)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: test_pmdmxvf16gerx2pn:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lxvp vsp34, 96(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-BE-NEXT: lxvp vsp34, 32(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 0(r3)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-BE-NEXT: lxv v2, 0(r4)
+; CHECK-BE-NEXT: lxv vs0, 0(r5)
+; CHECK-BE-NEXT: lxv v3, 16(r4)
+; CHECK-BE-NEXT: pmdmxvf16gerx2pn dmr0, vsp34, vs0, 33, 5, 2
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT: stxvp vsp36, 96(r6)
+; CHECK-BE-NEXT: stxvp vsp34, 64(r6)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT: stxvp vsp36, 32(r6)
+; CHECK-BE-NEXT: stxvp vsp34, 0(r6)
+; CHECK-BE-NEXT: blr
+entry:
+ %v.dmr = load <1024 x i1>, ptr %vop, align 64
+ %v1 = load <256 x i1>, ptr %vpp, align 32
+ %v2 = load <16 x i8>, ptr %vcp, align 32
+ %call = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2pn(<1024 x i1> %v.dmr, <256 x i1> %v1, <16 x i8> %v2, i32 33, i32 5, i32 2)
+ store <1024 x i1> %call, ptr %resp, align 64
+ ret void
+}
+
+declare <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2np(<1024 x i1>, <256 x i1>, <16 x i8>, i32, i32, i32)
+
+define void @test_pmdmxvf16gerx2np(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
+; CHECK-LABEL: test_pmdmxvf16gerx2np:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lxvp vsp34, 0(r3)
+; CHECK-NEXT: lxvp vsp36, 32(r3)
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-NEXT: lxvp vsp34, 64(r3)
+; CHECK-NEXT: lxvp vsp36, 96(r3)
+; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-NEXT: lxv v2, 16(r4)
+; CHECK-NEXT: lxv vs0, 0(r5)
+; CHECK-NEXT: lxv v3, 0(r4)
+; CHECK-NEXT: pmdmxvf16gerx2np dmr0, vsp34, vs0, 33, 5, 2
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT: stxvp vsp34, 96(r6)
+; CHECK-NEXT: stxvp vsp36, 64(r6)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp34, 32(r6)
+; CHECK-NEXT: stxvp vsp36, 0(r6)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: test_pmdmxvf16gerx2np:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lxvp vsp34, 96(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-BE-NEXT: lxvp vsp34, 32(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 0(r3)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-BE-NEXT: lxv v2, 0(r4)
+; CHECK-BE-NEXT: lxv vs0, 0(r5)
+; CHECK-BE-NEXT: lxv v3, 16(r4)
+; CHECK-BE-NEXT: pmdmxvf16gerx2np dmr0, vsp34, vs0, 33, 5, 2
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT: stxvp vsp36, 96(r6)
+; CHECK-BE-NEXT: stxvp vsp34, 64(r6)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT: stxvp vsp36, 32(r6)
+; CHECK-BE-NEXT: stxvp vsp34, 0(r6)
+; CHECK-BE-NEXT: blr
+entry:
+ %v.dmr = load <1024 x i1>, ptr %vop, align 64
+ %v1 = load <256 x i1>, ptr %vpp, align 32
+ %v2 = load <16 x i8>, ptr %vcp, align 32
+ %call = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2np(<1024 x i1> %v.dmr, <256 x i1> %v1, <16 x i8> %v2, i32 33, i32 5, i32 2)
+ store <1024 x i1> %call, ptr %resp, align 64
+ ret void
+}
+
+declare <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2nn(<1024 x i1>, <256 x i1>, <16 x i8>, i32, i32, i32)
+
+define void @test_pmdmxvf16gerx2nn(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
+; CHECK-LABEL: test_pmdmxvf16gerx2nn:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lxvp vsp34, 0(r3)
+; CHECK-NEXT: lxvp vsp36, 32(r3)
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-NEXT: lxvp vsp34, 64(r3)
+; CHECK-NEXT: lxvp vsp36, 96(r3)
+; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-NEXT: lxv v2, 16(r4)
+; CHECK-NEXT: lxv vs0, 0(r5)
+; CHECK-NEXT: lxv v3, 0(r4)
+; CHECK-NEXT: pmdmxvbf16gerx2nn dmr0, vsp34, vs0, 33, 5, 2
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT: stxvp vsp34, 96(r6)
+; CHECK-NEXT: stxvp vsp36, 64(r6)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp34, 32(r6)
+; CHECK-NEXT: stxvp vsp36, 0(r6)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: test_pmdmxvf16gerx2nn:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lxvp vsp34, 96(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-BE-NEXT: lxvp vsp34, 32(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 0(r3)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-BE-NEXT: lxv v2, 0(r4)
+; CHECK-BE-NEXT: lxv vs0, 0(r5)
+; CHECK-BE-NEXT: lxv v3, 16(r4)
+; CHECK-BE-NEXT: pmdmxvbf16gerx2nn dmr0, vsp34, vs0, 33, 5, 2
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT: stxvp vsp36, 96(r6)
+; CHECK-BE-NEXT: stxvp vsp34, 64(r6)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT: stxvp vsp36, 32(r6)
+; CHECK-BE-NEXT: stxvp vsp34, 0(r6)
+; CHECK-BE-NEXT: blr
+entry:
+ %v.dmr = load <1024 x i1>, ptr %vop, align 64
+ %v1 = load <256 x i1>, ptr %vpp, align 32
+ %v2 = load <16 x i8>, ptr %vcp, align 32
+ %call = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvbf16gerx2nn(<1024 x i1> %v.dmr, <256 x i1> %v1, <16 x i8> %v2, i32 33, i32 5, i32 2)
+ store <1024 x i1> %call, ptr %resp, align 64
+ ret void
+}
diff --git a/llvm/test/MC/Disassembler/PowerPC/ppc-encoding-ISAFuture.txt b/llvm/test/MC/Disassembler/PowerPC/ppc-encoding-ISAFuture.txt
index b265b36c91277..3936f4dc3806d 100644
--- a/llvm/test/MC/Disassembler/PowerPC/ppc-encoding-ISAFuture.txt
+++ b/llvm/test/MC/Disassembler/PowerPC/ppc-encoding-ISAFuture.txt
@@ -123,3 +123,33 @@
#CHECK: pmdmxvbf16gerx2nn 1, 2, 4, 8, 4, 2
0x07,0x90,0x80,0x84,0xec,0x82,0x27,0x50
+
+#CHECK: dmxvf16gerx2 1, 0, 2
+0xec,0x80,0x12,0x18
+
+#CHECK: dmxvf16gerx2pp 1, 0, 2
+0xec,0x80,0x12,0x10
+
+#CHECK: dmxvf16gerx2pn 1, 0, 2
+0xec,0x80,0x14,0x98
+
+#CHECK: dmxvf16gerx2np 1, 0, 2
+0xec,0x80,0x12,0x98
+
+#CHECK: dmxvf16gerx2nn 1, 0, 2
+0xec,0x80,0x16,0x50
+
+#CHECK: pmdmxvf16gerx2 0, 2, 4, 12, 5, 3
+0x07,0x90,0xc0,0xc5,0xec,0x02,0x22,0x18
+
+#CHECK: pmdmxvf16gerx2pp 0, 2, 4, 12, 5, 3
+0x07,0x90,0xc0,0xc5,0xec,0x02,0x22,0x10
+
+#CHECK: pmdmxvf16gerx2pn 0, 2, 4, 12, 5, 3
+0x07,0x90,0xc0,0xc5,0xec,0x02,0x24,0x98
+
+#CHECK: pmdmxvf16gerx2np 0, 2, 4, 12, 5, 3
+0x07,0x90,0xc0,0xc5,0xec,0x02,0x22,0x98
+
+#CHECK: pmdmxvf16gerx2nn 0, 2, 4, 12, 5, 3
+0x07,0x90,0xc0,0xc5,0xec,0x02,0x26,0x50
diff --git a/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding-ISAFuture.txt b/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding-ISAFuture.txt
index d89588845c138..6ac13ec08af54 100644
--- a/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding-ISAFuture.txt
+++ b/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding-ISAFuture.txt
@@ -117,3 +117,33 @@
#CHECK: pmdmxvbf16gerx2nn 1, 2, 4, 8, 4, 2
0x84,0x80,0x90,0x07,0x50,0x27,0x82,0xec
+
+#CHECK: dmxvf16gerx2 1, 0, 2
+0x18,0x12,0x80,0xec
+
+#CHECK: dmxvf16gerx2pp 1, 0, 2
+0x10,0x12,0x80,0xec
+
+#CHECK: dmxvf16gerx2pn 1, 0, 2
+0x98,0x14,0x80,0xec
+
+#CHECK: dmxvf16gerx2np 1, 0, 2
+0x98,0x12,0x80,0xec
+
+#CHECK: dmxvf16gerx2nn 1, 0, 2
+0x50,0x16,0x80,0xec
+
+#CHECK: pmdmxvf16gerx2 0, 2, 4, 12, 5, 3
+0xc5,0xc0,0x90,0x07,0x18,0x22,0x02,0xec
+
+#CHECK: pmdmxvf16gerx2pp 0, 2, 4, 12, 5, 3
+0xc5,0xc0,0x90,0x07,0x10,0x22,0x02,0xec
+
+#CHECK: pmdmxvf16gerx2pn 0, 2, 4, 12, 5, 3
+0xc5,0xc0,0x90,0x07,0x98,0x24,0x02,0xec
+
+#CHECK: pmdmxvf16gerx2np 0, 2, 4, 12, 5, 3
+0xc5,0xc0,0x90,0x07,0x98,0x22,0x02,0xec
+
+#CHECK: pmdmxvf16gerx2nn 0, 2, 4, 12, 5, 3
+0xc5,0xc0,0x90,0x07,0x50,0x26,0x02,0xec
diff --git a/llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s b/llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s
index 1160bff24a49a..984ddf7eff0a6 100644
--- a/llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s
+++ b/llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s
@@ -176,3 +176,53 @@
#CHECK-BE-SAME: 0xec,0x82,0x27,0x50]
#CHECK-LE: pmdmxvbf16gerx2nn 1, 2, 4, 8, 4, 2 # encoding: [0x84,0x80,0x90,0x07,
#CHECK-LE-SAME: 0x50,0x27,0x82,0xec]
+
+ dmxvf16gerx2 1, 0, 2
+#CHECK-BE: dmxvf16gerx2 1, 0, 2 # encoding: [0xec,0x80,0x12,0x18]
+#CHECK-LE: dmxvf16gerx2 1, 0, 2 # encoding: [0x18,0x12,0x80,0xec]
+
+ dmxvf16gerx2pp 1, 0, 2
+#CHECK-BE: dmxvf16gerx2pp 1, 0, 2 # encoding: [0xec,0x80,0x12,0x10]
+#CHECK-LE: dmxvf16gerx2pp 1, 0, 2 # encoding: [0x10,0x12,0x80,0xec]
+
+ dmxvf16gerx2pn 1, 0, 2
+#CHECK-BE: dmxvf16gerx2pn 1, 0, 2 # encoding: [0xec,0x80,0x14,0x98]
+#CHECK-LE: dmxvf16gerx2pn 1, 0, 2 # encoding: [0x98,0x14,0x80,0xec]
+
+ dmxvf16gerx2np 1, 0, 2
+#CHECK-BE: dmxvf16gerx2np 1, 0, 2 # encoding: [0xec,0x80,0x12,0x98]
+#CHECK-LE: dmxvf16gerx2np 1, 0, 2 # encoding: [0x98,0x12,0x80,0xec]
+
+ dmxvf16gerx2nn 1, 0, 2
+#CHECK-BE: dmxvf16gerx2nn 1, 0, 2 # encoding: [0xec,0x80,0x16,0x50]
+#CHECK-LE: dmxvf16gerx2nn 1, 0, 2 # encoding: [0x50,0x16,0x80,0xec]
+
+ pmdmxvf16gerx2 0, 2, 4, 12, 5, 3
+#CHECK-BE: pmdmxvf16gerx2 0, 2, 4, 12, 5, 3 # encoding: [0x07,0x90,0xc0,0xc5,
+#CHECK-BE-SAME: 0xec,0x02,0x22,0x18]
+#CHECK-LE: pmdmxvf16gerx2 0, 2, 4, 12, 5, 3 # encoding: [0xc5,0xc0,0x90,0x07,
+#CHECK-LE-SAME: 0x18,0x22,0x02,0xec]
+
+ pmdmxvf16gerx2pp 0, 2, 4, 12, 5, 3
+#CHECK-BE: pmdmxvf16gerx2pp 0, 2, 4, 12, 5, 3 # encoding: [0x07,0x90,0xc0,0xc5,
+#CHECK-BE-SAME: 0xec,0x02,0x22,0x10]
+#CHECK-LE: pmdmxvf16gerx2pp 0, 2, 4, 12, 5, 3 # encoding: [0xc5,0xc0,0x90,0x07,
+#CHECK-LE-SAME: 0x10,0x22,0x02,0xec]
+
+ pmdmxvf16gerx2pn 0, 2, 4, 12, 5, 3
+#CHECK-BE: pmdmxvf16gerx2pn 0, 2, 4, 12, 5, 3 # encoding: [0x07,0x90,0xc0,0xc5,
+#CHECK-BE-SAME: 0xec,0x02,0x24,0x98]
+#CHECK-LE: pmdmxvf16gerx2pn 0, 2, 4, 12, 5, 3 # encoding: [0xc5,0xc0,0x90,0x07,
+#CHECK-LE-SAME: 0x98,0x24,0x02,0xec]
+
+ pmdmxvf16gerx2np 0, 2, 4, 12, 5, 3
+#CHECK-BE: pmdmxvf16gerx2np 0, 2, 4, 12, 5, 3 # encoding: [0x07,0x90,0xc0,0xc5,
+#CHECK-BE-SAME: 0xec,0x02,0x22,0x98]
+#CHECK-LE: pmdmxvf16gerx2np 0, 2, 4, 12, 5, 3 # encoding: [0xc5,0xc0,0x90,0x07,
+#CHECK-LE-SAME: 0x98,0x22,0x02,0xec]
+
+ pmdmxvf16gerx2nn 0, 2, 4, 12, 5, 3
+#CHECK-BE: pmdmxvf16gerx2nn 0, 2, 4, 12, 5, 3 # encoding: [0x07,0x90,0xc0,0xc5,
+#CHECK-BE-SAME: 0xec,0x02,0x26,0x50]
+#CHECK-LE: pmdmxvf16gerx2nn 0, 2, 4, 12, 5, 3 # encoding: [0xc5,0xc0,0x90,0x07,
+#CHECK-LE-SAME: 0x50,0x26,0x02,0xec]
More information about the llvm-commits
mailing list