[llvm] f3202b3 - [PowerPC] Add assemble disassemble intrinsics for MMA
Ahsan Saghir via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 13 11:22:16 PDT 2020
Author: Ahsan Saghir
Date: 2020-10-13T13:21:58-05:00
New Revision: f3202b30b8e6fea838c595632641e86719ff8264
URL: https://github.com/llvm/llvm-project/commit/f3202b30b8e6fea838c595632641e86719ff8264
DIFF: https://github.com/llvm/llvm-project/commit/f3202b30b8e6fea838c595632641e86719ff8264.diff
LOG: [PowerPC] Add assemble disassemble intrinsics for MMA
This patch adds support for assemble disassemble intrinsics
for MMA.
Reviewed By: bsaleil, #powerpc
Differential Revision: https://reviews.llvm.org/D88739
Added:
llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
Modified:
llvm/include/llvm/IR/Intrinsics.td
llvm/include/llvm/IR/IntrinsicsPowerPC.td
llvm/lib/IR/Function.cpp
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
llvm/lib/Target/PowerPC/PPCInstrPrefix.td
llvm/utils/TableGen/IntrinsicEmitter.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index d05b9982c487..8e8bb0b35bce 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -260,6 +260,7 @@ def llvm_v16i1_ty : LLVMType<v16i1>; // 16 x i1
def llvm_v32i1_ty : LLVMType<v32i1>; // 32 x i1
def llvm_v64i1_ty : LLVMType<v64i1>; // 64 x i1
def llvm_v128i1_ty : LLVMType<v128i1>; // 128 x i1
+def llvm_v256i1_ty : LLVMType<v256i1>; // 256 x i1
def llvm_v512i1_ty : LLVMType<v512i1>; // 512 x i1
def llvm_v1024i1_ty : LLVMType<v1024i1>; //1024 x i1
diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index 7ab4ee301bb5..9f3dd637a9f8 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -1364,3 +1364,32 @@ def int_ppc_setrnd : GCCBuiltin<"__builtin_setrnd">,
Intrinsic<[llvm_double_ty], [llvm_i32_ty], []>;
}
+
+let TargetPrefix = "ppc" in {
+ def int_ppc_mma_assemble_acc :
+ Intrinsic<[llvm_v512i1_ty],
+ [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
+ [IntrNoMem]>;
+
+ def int_ppc_mma_disassemble_acc :
+ Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
+ [llvm_v512i1_ty], [IntrNoMem]>;
+
+ def int_ppc_mma_assemble_pair :
+ Intrinsic<[llvm_v256i1_ty],
+ [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
+
+ def int_ppc_mma_disassemble_pair :
+ Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty],
+ [llvm_v256i1_ty], [IntrNoMem]>;
+
+ def int_ppc_mma_xxmtacc :
+ Intrinsic<[llvm_v512i1_ty], [llvm_v512i1_ty], [IntrNoMem]>;
+
+ def int_ppc_mma_xxmfacc :
+ Intrinsic<[llvm_v512i1_ty], [llvm_v512i1_ty], [IntrNoMem]>;
+
+ def int_ppc_mma_xxsetaccz :
+ Intrinsic<[llvm_v512i1_ty], [], [IntrNoMem]>;
+}
+
diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp
index 8d741c3125a8..e228c3e0a4eb 100644
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@@ -834,7 +834,8 @@ enum IIT_Info {
IIT_VEC_OF_BITCASTS_TO_INT = 46,
IIT_V128 = 47,
IIT_BF16 = 48,
- IIT_STRUCT9 = 49
+ IIT_STRUCT9 = 49,
+ IIT_V256 = 50
};
static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
@@ -928,6 +929,10 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
OutputTable.push_back(IITDescriptor::getVector(128, IsScalableVector));
DecodeIITType(NextElt, Infos, Info, OutputTable);
return;
+ case IIT_V256:
+ OutputTable.push_back(IITDescriptor::getVector(256, IsScalableVector));
+ DecodeIITType(NextElt, Infos, Info, OutputTable);
+ return;
case IIT_V512:
OutputTable.push_back(IITDescriptor::getVector(512, IsScalableVector));
DecodeIITType(NextElt, Infos, Info, OutputTable);
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 2c2bc8ac3418..8c9c1649d21f 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -10416,11 +10416,32 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SDLoc dl(Op);
- if (IntrinsicID == Intrinsic::thread_pointer) {
+ switch (IntrinsicID) {
+ case Intrinsic::thread_pointer:
// Reads the thread pointer register, used for __builtin_thread_pointer.
if (Subtarget.isPPC64())
return DAG.getRegister(PPC::X13, MVT::i64);
return DAG.getRegister(PPC::R2, MVT::i32);
+
+ case Intrinsic::ppc_mma_disassemble_acc:
+ case Intrinsic::ppc_mma_disassemble_pair: {
+ int NumVecs = 2;
+ SDValue WideVec = Op.getOperand(1);
+ if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
+ NumVecs = 4;
+ WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
+ }
+ SmallVector<SDValue, 4> RetOps;
+ for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
+ SDValue Extract = DAG.getNode(
+ PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
+ DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
+ : VecNo,
+ dl, MVT::i64));
+ RetOps.push_back(Extract);
+ }
+ return DAG.getMergeValues(RetOps, dl);
+ }
}
// If this is a lowered altivec predicate compare, CompareOpc is set to the
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index cc0779cac6dd..fd83b5b6d4b8 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -672,6 +672,7 @@ bool PPCInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
case PPC::V_SETALLONES:
case PPC::CRSET:
case PPC::CRUNSET:
+ case PPC::XXSETACCZ:
return true;
}
return false;
@@ -1340,6 +1341,22 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg) ||
PPC::VSSRCRegClass.contains(DestReg, SrcReg))
Opc = (Subtarget.hasP9Vector()) ? PPC::XSCPSGNDP : PPC::XXLORf;
+ else if (Subtarget.pairedVectorMemops() &&
+ PPC::VSRpRCRegClass.contains(DestReg, SrcReg)) {
+ if (SrcReg > PPC::VSRp15)
+ SrcReg = PPC::V0 + (SrcReg - PPC::VSRp16) * 2;
+ else
+ SrcReg = PPC::VSL0 + (SrcReg - PPC::VSRp0) * 2;
+ if (DestReg > PPC::VSRp15)
+ DestReg = PPC::V0 + (DestReg - PPC::VSRp16) * 2;
+ else
+ DestReg = PPC::VSL0 + (DestReg - PPC::VSRp0) * 2;
+ BuildMI(MBB, I, DL, get(PPC::XXLOR), DestReg).
+ addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc));
+ BuildMI(MBB, I, DL, get(PPC::XXLOR), DestReg + 1).
+ addReg(SrcReg + 1).addReg(SrcReg + 1, getKillRegState(KillSrc));
+ return;
+ }
else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::CROR;
else if (PPC::SPERCRegClass.contains(DestReg, SrcReg))
diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
index 4ca03298ba88..384cf1c08fbd 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -1264,12 +1264,14 @@ multiclass ACC_NEG_UM_M42_XOM84C<bits<6> opcode, bits<8> xo, dag IOL,
let Predicates = [MMA] in {
def XXMFACC :
XForm_AT3<31, 0, 177, (outs acc:$ASo), (ins acc:$AS), "xxmfacc $AS",
- IIC_VecGeneral, []>, RegConstraint<"$ASo = $AS">,
- NoEncode<"$ASo">;
+ IIC_VecGeneral,
+ [(set v512i1:$ASo, (int_ppc_mma_xxmfacc v512i1:$AS))]>,
+ RegConstraint<"$ASo = $AS">, NoEncode<"$ASo">;
def XXMTACC :
XForm_AT3<31, 1, 177, (outs acc:$AT), (ins acc:$ATi), "xxmtacc $AT",
- IIC_VecGeneral, []>, RegConstraint<"$ATi = $AT">,
- NoEncode<"$ATi">;
+ IIC_VecGeneral,
+ [(set v512i1:$AT, (int_ppc_mma_xxmtacc v512i1:$ATi))]>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
def KILL_PAIR : PPCPostRAExpPseudo<(outs vsrprc:$XTp), (ins vsrprc:$XSp),
"#KILL_PAIR", []>,
RegConstraint<"$XTp = $XSp">;
@@ -1280,8 +1282,8 @@ let Predicates = [MMA] in {
// register and this copy is more expensive than calling the intrinsic again.
let isAsCheapAsAMove = 1, isReMaterializable = 1 in {
def XXSETACCZ :
- XForm_AT3<31, 3, 177, (outs acc:$AT), (ins), "xxsetaccz $AT",
- IIC_VecGeneral, []>;
+ XForm_AT3<31, 3, 177, (outs acc:$AT), (ins), "xxsetaccz $AT", IIC_VecGeneral,
+ [(set v512i1:$AT, (int_ppc_mma_xxsetaccz))]>;
}
def XVI8GER4SPP :
XX3Form_AT3_XAB6<59, 99, (outs acc:$AT), (ins acc:$ATi, vsrc:$XA, vsrc:$XB),
@@ -1369,6 +1371,11 @@ let Predicates = [MMA] in {
(XXMTACC Concats.VecsToVecQuad)>;
def : Pat<(v256i1 (PPCPairBuild v4i32:$vs1, v4i32:$vs0)),
Concats.VecsToVecPair0>;
+ def : Pat<(v512i1 (int_ppc_mma_assemble_acc v16i8:$vs1, v16i8:$vs0,
+ v16i8:$vs3, v16i8:$vs2)),
+ (XXMTACC Concats.VecsToVecQuad)>;
+ def : Pat<(v256i1 (int_ppc_mma_assemble_pair v16i8:$vs1, v16i8:$vs0)),
+ Concats.VecsToVecPair0>;
def : Pat<(v512i1 (PPCxxmfacc v512i1:$AS)), (XXMFACC acc:$AS)>;
def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, (i64 0))),
Extracts.Vec0>;
diff --git a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
new file mode 100644
index 000000000000..8e288ee5837d
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
@@ -0,0 +1,250 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
+
+; assemble_acc
+declare <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
+define void @ass_acc(<512 x i1>* %ptr, <16 x i8> %vc) {
+; CHECK-LABEL: ass_acc:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmr v3, v2
+; CHECK-NEXT: xxlor vs0, v2, v2
+; CHECK-NEXT: xxlor vs1, v3, v3
+; CHECK-NEXT: xxlor vs2, v2, v2
+; CHECK-NEXT: xxlor vs3, v3, v3
+; CHECK-NEXT: xxmtacc acc0
+; CHECK-NEXT: xxmfacc acc0
+; CHECK-NEXT: stxv vs0, 48(r3)
+; CHECK-NEXT: stxv vs1, 32(r3)
+; CHECK-NEXT: stxv vs2, 16(r3)
+; CHECK-NEXT: stxv vs3, 0(r3)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: ass_acc:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: vmr v3, v2
+; CHECK-BE-NEXT: xxlor vs0, v2, v2
+; CHECK-BE-NEXT: xxlor vs1, v3, v3
+; CHECK-BE-NEXT: xxlor vs2, v2, v2
+; CHECK-BE-NEXT: xxlor vs3, v3, v3
+; CHECK-BE-NEXT: xxmtacc acc0
+; CHECK-BE-NEXT: xxmfacc acc0
+; CHECK-BE-NEXT: stxv vs1, 16(r3)
+; CHECK-BE-NEXT: stxv vs0, 0(r3)
+; CHECK-BE-NEXT: stxv vs3, 48(r3)
+; CHECK-BE-NEXT: stxv vs2, 32(r3)
+; CHECK-BE-NEXT: blr
+; CHECK-O0-LABEL: ass_acc:
+; CHECK-O0: # %bb.0: # %entry
+; CHECK-BE-O0-LABEL: ass_acc:
+; CHECK-BE-O0: # %bb.0: # %entry
+entry:
+ %0 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc)
+ store <512 x i1> %0, <512 x i1>* %ptr, align 64
+ ret void
+}
+
+; assemble_pair
+declare <256 x i1> @llvm.ppc.mma.assemble.pair(<16 x i8>, <16 x i8>)
+define void @ass_pair(<256 x i1>* %ptr, <16 x i8> %vc) {
+; CHECK-LABEL: ass_pair:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmr v3, v2
+; CHECK-NEXT: stxv v2, 16(r3)
+; CHECK-NEXT: stxv v3, 0(r3)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: ass_pair:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: vmr v3, v2
+; CHECK-BE-NEXT: stxv v2, 16(r3)
+; CHECK-BE-NEXT: stxv v2, 0(r3)
+; CHECK-BE-NEXT: blr
+entry:
+ %0 = tail call <256 x i1> @llvm.ppc.mma.assemble.pair(<16 x i8> %vc, <16 x i8> %vc)
+ store <256 x i1> %0, <256 x i1>* %ptr, align 32
+ ret void
+}
+
+; xxmtacc
+declare <512 x i1> @llvm.ppc.mma.xxmtacc(<512 x i1>)
+define void @int_xxmtacc(<512 x i1>* %ptr, <16 x i8> %vc) {
+; CHECK-LABEL: int_xxmtacc:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmr v3, v2
+; CHECK-NEXT: xxlor vs0, v2, v2
+; CHECK-NEXT: xxlor vs1, v3, v3
+; CHECK-NEXT: xxlor vs2, v2, v2
+; CHECK-NEXT: xxlor vs3, v3, v3
+; CHECK-NEXT: xxmtacc acc0
+; CHECK-NEXT: xxmtacc acc0
+; CHECK-NEXT: xxmfacc acc0
+; CHECK-NEXT: stxv vs0, 48(r3)
+; CHECK-NEXT: stxv vs1, 32(r3)
+; CHECK-NEXT: stxv vs2, 16(r3)
+; CHECK-NEXT: stxv vs3, 0(r3)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: int_xxmtacc:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: vmr v3, v2
+; CHECK-BE-NEXT: xxlor vs0, v2, v2
+; CHECK-BE-NEXT: xxlor vs1, v3, v3
+; CHECK-BE-NEXT: xxlor vs2, v2, v2
+; CHECK-BE-NEXT: xxlor vs3, v3, v3
+; CHECK-BE-NEXT: xxmtacc acc0
+; CHECK-BE-NEXT: xxmtacc acc0
+; CHECK-BE-NEXT: xxmfacc acc0
+; CHECK-BE-NEXT: stxv vs1, 16(r3)
+; CHECK-BE-NEXT: stxv vs0, 0(r3)
+; CHECK-BE-NEXT: stxv vs3, 48(r3)
+; CHECK-BE-NEXT: stxv vs2, 32(r3)
+; CHECK-BE-NEXT: blr
+entry:
+; One xxmtacc is generated from the call to assemble.acc then one xxmtacc is
+; generated from the call to xxmtacc then one xxmfacc is generated for the store
+ %0 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc)
+ %1 = tail call <512 x i1> @llvm.ppc.mma.xxmtacc(<512 x i1> %0)
+ store <512 x i1> %1, <512 x i1>* %ptr, align 64
+ ret void
+}
+
+; xxmfacc
+declare <512 x i1> @llvm.ppc.mma.xxmfacc(<512 x i1>)
+define void @int_xxmfacc(<512 x i1>* %ptr, <16 x i8> %vc) {
+; CHECK-LABEL: int_xxmfacc:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmr v3, v2
+; CHECK-NEXT: xxlor vs0, v2, v2
+; CHECK-NEXT: xxlor vs1, v3, v3
+; CHECK-NEXT: xxlor vs2, v2, v2
+; CHECK-NEXT: xxlor vs3, v3, v3
+; CHECK-NEXT: xxmtacc acc0
+; CHECK-NEXT: xxmfacc acc0
+; CHECK-NEXT: xxmfacc acc0
+; CHECK-NEXT: stxv vs0, 48(r3)
+; CHECK-NEXT: stxv vs1, 32(r3)
+; CHECK-NEXT: stxv vs2, 16(r3)
+; CHECK-NEXT: stxv vs3, 0(r3)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: int_xxmfacc:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: vmr v3, v2
+; CHECK-BE-NEXT: xxlor vs0, v2, v2
+; CHECK-BE-NEXT: xxlor vs1, v3, v3
+; CHECK-BE-NEXT: xxlor vs2, v2, v2
+; CHECK-BE-NEXT: xxlor vs3, v3, v3
+; CHECK-BE-NEXT: xxmtacc acc0
+; CHECK-BE-NEXT: xxmfacc acc0
+; CHECK-BE-NEXT: xxmfacc acc0
+; CHECK-BE-NEXT: stxv vs1, 16(r3)
+; CHECK-BE-NEXT: stxv vs0, 0(r3)
+; CHECK-BE-NEXT: stxv vs3, 48(r3)
+; CHECK-BE-NEXT: stxv vs2, 32(r3)
+; CHECK-BE-NEXT: blr
+entry:
+; One xxmtacc is generated from the call to assemble.acc then one xxmfacc is
+; generated from the call to xxmfacc then one xxmfacc is generated for the store
+ %0 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc)
+ %1 = tail call <512 x i1> @llvm.ppc.mma.xxmfacc(<512 x i1> %0)
+ store <512 x i1> %1, <512 x i1>* %ptr, align 64
+ ret void
+}
+
+; xxsetaccz
+declare <512 x i1> @llvm.ppc.mma.xxsetaccz()
+define void @int_xxsetaccz(<512 x i1>* %ptr) {
+; CHECK-LABEL: int_xxsetaccz:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xxsetaccz acc0
+; CHECK-NEXT: xxmfacc acc0
+; CHECK-NEXT: stxv vs0, 48(r3)
+; CHECK-NEXT: stxv vs1, 32(r3)
+; CHECK-NEXT: stxv vs2, 16(r3)
+; CHECK-NEXT: stxv vs3, 0(r3)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: int_xxsetaccz:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: xxsetaccz acc0
+; CHECK-BE-NEXT: xxmfacc acc0
+; CHECK-BE-NEXT: stxv vs1, 16(r3)
+; CHECK-BE-NEXT: stxv vs0, 0(r3)
+; CHECK-BE-NEXT: stxv vs3, 48(r3)
+; CHECK-BE-NEXT: stxv vs2, 32(r3)
+; CHECK-BE-NEXT: blr
+entry:
+ %0 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz()
+ store <512 x i1> %0, <512 x i1>* %ptr, align 64
+ ret void
+}
+
+; disassemble_acc
+declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1>)
+define void @disass_acc(<16 x i8>* %ptr1, <16 x i8>* %ptr2, <16 x i8>* %ptr3, <16 x i8>* %ptr4) {
+; CHECK-LABEL: disass_acc:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xxsetaccz acc0
+; CHECK-NEXT: xxmfacc acc0
+; CHECK-NEXT: stxv vs3, 0(r3)
+; CHECK-NEXT: stxv vs2, 0(r4)
+; CHECK-NEXT: stxv vs1, 0(r5)
+; CHECK-NEXT: stxv vs0, 0(r6)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: disass_acc:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: xxsetaccz acc0
+; CHECK-BE-NEXT: xxmfacc acc0
+; CHECK-BE-NEXT: stxv vs0, 0(r3)
+; CHECK-BE-NEXT: stxv vs1, 0(r4)
+; CHECK-BE-NEXT: stxv vs2, 0(r5)
+; CHECK-BE-NEXT: stxv vs3, 0(r6)
+; CHECK-BE-NEXT: blr
+entry:
+ %0 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz()
+ %1 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %0)
+ %2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 0
+ %3 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 1
+ %4 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 2
+ %5 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 3
+ store <16 x i8> %2, <16 x i8>* %ptr1, align 16
+ store <16 x i8> %3, <16 x i8>* %ptr2, align 16
+ store <16 x i8> %4, <16 x i8>* %ptr3, align 16
+ store <16 x i8> %5, <16 x i8>* %ptr4, align 16
+ ret void
+}
+
+; disassemble_pair
+declare { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1>)
+define void @disass_pair(<256 x i1>* %ptr1, <16 x i8>* %ptr2, <16 x i8>* %ptr3) {
+; CHECK-LABEL: disass_pair:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lxv vs1, 0(r3)
+; CHECK-NEXT: lxv vs0, 16(r3)
+; CHECK-NEXT: stxv vs1, 0(r4)
+; CHECK-NEXT: stxv vs0, 0(r5)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: disass_pair:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lxv vs1, 16(r3)
+; CHECK-BE-NEXT: lxv vs0, 0(r3)
+; CHECK-BE-NEXT: stxv vs0, 0(r4)
+; CHECK-BE-NEXT: stxv vs1, 0(r5)
+; CHECK-BE-NEXT: blr
+entry:
+ %0 = load <256 x i1>, <256 x i1>* %ptr1, align 32
+ %1 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> %0)
+ %2 = extractvalue { <16 x i8>, <16 x i8> } %1, 0
+ %3 = extractvalue { <16 x i8>, <16 x i8> } %1, 1
+ store <16 x i8> %2, <16 x i8>* %ptr2, align 16
+ store <16 x i8> %3, <16 x i8>* %ptr3, align 16
+ ret void
+}
+
diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp
index 6a8a60d00639..4274a4c14ec7 100644
--- a/llvm/utils/TableGen/IntrinsicEmitter.cpp
+++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp
@@ -247,7 +247,8 @@ enum IIT_Info {
IIT_VEC_OF_BITCASTS_TO_INT = 46,
IIT_V128 = 47,
IIT_BF16 = 48,
- IIT_STRUCT9 = 49
+ IIT_STRUCT9 = 49,
+ IIT_V256 = 50
};
static void EncodeFixedValueType(MVT::SimpleValueType VT,
@@ -385,6 +386,7 @@ static void EncodeFixedType(Record *R, std::vector<unsigned char> &ArgCodes,
case 32: Sig.push_back(IIT_V32); break;
case 64: Sig.push_back(IIT_V64); break;
case 128: Sig.push_back(IIT_V128); break;
+ case 256: Sig.push_back(IIT_V256); break;
case 512: Sig.push_back(IIT_V512); break;
case 1024: Sig.push_back(IIT_V1024); break;
}
More information about the llvm-commits
mailing list