[llvm] f3202b3 - [PowerPC] Add assemble disassemble intrinsics for MMA

Ahsan Saghir via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 13 11:22:16 PDT 2020


Author: Ahsan Saghir
Date: 2020-10-13T13:21:58-05:00
New Revision: f3202b30b8e6fea838c595632641e86719ff8264

URL: https://github.com/llvm/llvm-project/commit/f3202b30b8e6fea838c595632641e86719ff8264
DIFF: https://github.com/llvm/llvm-project/commit/f3202b30b8e6fea838c595632641e86719ff8264.diff

LOG: [PowerPC] Add assemble disassemble intrinsics for MMA

This patch adds support for assemble disassemble intrinsics
for MMA.

Reviewed By: bsaleil, #powerpc

Differential Revision: https://reviews.llvm.org/D88739

Added: 
    llvm/test/CodeGen/PowerPC/mma-intrinsics.ll

Modified: 
    llvm/include/llvm/IR/Intrinsics.td
    llvm/include/llvm/IR/IntrinsicsPowerPC.td
    llvm/lib/IR/Function.cpp
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
    llvm/lib/Target/PowerPC/PPCInstrPrefix.td
    llvm/utils/TableGen/IntrinsicEmitter.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index d05b9982c487..8e8bb0b35bce 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -260,6 +260,7 @@ def llvm_v16i1_ty      : LLVMType<v16i1>;    //  16 x i1
 def llvm_v32i1_ty      : LLVMType<v32i1>;    //  32 x i1
 def llvm_v64i1_ty      : LLVMType<v64i1>;    //  64 x i1
 def llvm_v128i1_ty     : LLVMType<v128i1>;   // 128 x i1
+def llvm_v256i1_ty     : LLVMType<v256i1>;   // 256 x i1
 def llvm_v512i1_ty     : LLVMType<v512i1>;   // 512 x i1
 def llvm_v1024i1_ty    : LLVMType<v1024i1>;  //1024 x i1
 

diff  --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index 7ab4ee301bb5..9f3dd637a9f8 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -1364,3 +1364,32 @@ def int_ppc_setrnd : GCCBuiltin<"__builtin_setrnd">,
       Intrinsic<[llvm_double_ty], [llvm_i32_ty], []>;
 
 }
+
+let TargetPrefix = "ppc" in {
+  def int_ppc_mma_assemble_acc :
+        Intrinsic<[llvm_v512i1_ty],
+                  [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
+                  [IntrNoMem]>;
+
+  def int_ppc_mma_disassemble_acc :
+        Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
+                  [llvm_v512i1_ty], [IntrNoMem]>;
+
+  def int_ppc_mma_assemble_pair :
+        Intrinsic<[llvm_v256i1_ty],
+                  [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
+
+  def int_ppc_mma_disassemble_pair :
+        Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty],
+                  [llvm_v256i1_ty], [IntrNoMem]>;
+
+  def int_ppc_mma_xxmtacc :
+        Intrinsic<[llvm_v512i1_ty], [llvm_v512i1_ty], [IntrNoMem]>;
+
+  def int_ppc_mma_xxmfacc :
+        Intrinsic<[llvm_v512i1_ty], [llvm_v512i1_ty], [IntrNoMem]>;
+
+  def int_ppc_mma_xxsetaccz :
+        Intrinsic<[llvm_v512i1_ty], [], [IntrNoMem]>;
+}
+

diff  --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp
index 8d741c3125a8..e228c3e0a4eb 100644
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@@ -834,7 +834,8 @@ enum IIT_Info {
   IIT_VEC_OF_BITCASTS_TO_INT = 46,
   IIT_V128 = 47,
   IIT_BF16 = 48,
-  IIT_STRUCT9 = 49
+  IIT_STRUCT9 = 49,
+  IIT_V256 = 50
 };
 
 static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
@@ -928,6 +929,10 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
     OutputTable.push_back(IITDescriptor::getVector(128, IsScalableVector));
     DecodeIITType(NextElt, Infos, Info, OutputTable);
     return;
+  case IIT_V256:
+    OutputTable.push_back(IITDescriptor::getVector(256, IsScalableVector));
+    DecodeIITType(NextElt, Infos, Info, OutputTable);
+    return;
   case IIT_V512:
     OutputTable.push_back(IITDescriptor::getVector(512, IsScalableVector));
     DecodeIITType(NextElt, Infos, Info, OutputTable);

diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 2c2bc8ac3418..8c9c1649d21f 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -10416,11 +10416,32 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
 
   SDLoc dl(Op);
 
-  if (IntrinsicID == Intrinsic::thread_pointer) {
+  switch (IntrinsicID) {
+  case Intrinsic::thread_pointer:
     // Reads the thread pointer register, used for __builtin_thread_pointer.
     if (Subtarget.isPPC64())
       return DAG.getRegister(PPC::X13, MVT::i64);
     return DAG.getRegister(PPC::R2, MVT::i32);
+
+  case Intrinsic::ppc_mma_disassemble_acc:
+  case Intrinsic::ppc_mma_disassemble_pair: {
+    int NumVecs = 2;
+    SDValue WideVec = Op.getOperand(1);
+    if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
+      NumVecs = 4;
+      WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
+    }
+    SmallVector<SDValue, 4> RetOps;
+    for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
+      SDValue Extract = DAG.getNode(
+          PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
+          DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
+                                                     : VecNo,
+                          dl, MVT::i64));
+      RetOps.push_back(Extract);
+    }
+    return DAG.getMergeValues(RetOps, dl);
+  }
   }
 
   // If this is a lowered altivec predicate compare, CompareOpc is set to the

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index cc0779cac6dd..fd83b5b6d4b8 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -672,6 +672,7 @@ bool PPCInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
   case PPC::V_SETALLONES:
   case PPC::CRSET:
   case PPC::CRUNSET:
+  case PPC::XXSETACCZ:
     return true;
   }
   return false;
@@ -1340,6 +1341,22 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg) ||
            PPC::VSSRCRegClass.contains(DestReg, SrcReg))
     Opc = (Subtarget.hasP9Vector()) ? PPC::XSCPSGNDP : PPC::XXLORf;
+  else if (Subtarget.pairedVectorMemops() &&
+           PPC::VSRpRCRegClass.contains(DestReg, SrcReg)) {
+    if (SrcReg > PPC::VSRp15)
+      SrcReg = PPC::V0 + (SrcReg - PPC::VSRp16) * 2;
+    else
+      SrcReg = PPC::VSL0 + (SrcReg - PPC::VSRp0) * 2;
+    if (DestReg > PPC::VSRp15)
+      DestReg = PPC::V0 + (DestReg - PPC::VSRp16) * 2;
+    else
+      DestReg = PPC::VSL0 + (DestReg - PPC::VSRp0) * 2;
+    BuildMI(MBB, I, DL, get(PPC::XXLOR), DestReg).
+      addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc));
+    BuildMI(MBB, I, DL, get(PPC::XXLOR), DestReg + 1).
+      addReg(SrcReg + 1).addReg(SrcReg + 1, getKillRegState(KillSrc));
+    return;
+  }
   else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
     Opc = PPC::CROR;
   else if (PPC::SPERCRegClass.contains(DestReg, SrcReg))

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
index 4ca03298ba88..384cf1c08fbd 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -1264,12 +1264,14 @@ multiclass ACC_NEG_UM_M42_XOM84C<bits<6> opcode, bits<8> xo, dag IOL,
 let Predicates = [MMA] in {
   def XXMFACC :
     XForm_AT3<31, 0, 177, (outs acc:$ASo), (ins acc:$AS), "xxmfacc $AS",
-              IIC_VecGeneral, []>, RegConstraint<"$ASo = $AS">,
-              NoEncode<"$ASo">;
+              IIC_VecGeneral,
+              [(set v512i1:$ASo, (int_ppc_mma_xxmfacc v512i1:$AS))]>,
+              RegConstraint<"$ASo = $AS">, NoEncode<"$ASo">;
   def XXMTACC :
     XForm_AT3<31, 1, 177, (outs acc:$AT), (ins acc:$ATi), "xxmtacc $AT",
-              IIC_VecGeneral, []>, RegConstraint<"$ATi = $AT">,
-              NoEncode<"$ATi">;
+              IIC_VecGeneral,
+              [(set v512i1:$AT, (int_ppc_mma_xxmtacc v512i1:$ATi))]>,
+              RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
   def KILL_PAIR : PPCPostRAExpPseudo<(outs vsrprc:$XTp), (ins vsrprc:$XSp),
                                       "#KILL_PAIR", []>,
                                       RegConstraint<"$XTp = $XSp">;
@@ -1280,8 +1282,8 @@ let Predicates = [MMA] in {
   // register and this copy is more expensive than calling the intrinsic again.
   let isAsCheapAsAMove = 1, isReMaterializable = 1 in {
     def XXSETACCZ :
-      XForm_AT3<31, 3, 177, (outs acc:$AT), (ins), "xxsetaccz $AT",
-                IIC_VecGeneral, []>;
+      XForm_AT3<31, 3, 177, (outs acc:$AT), (ins), "xxsetaccz $AT", IIC_VecGeneral,
+                [(set v512i1:$AT, (int_ppc_mma_xxsetaccz))]>;
   }
   def XVI8GER4SPP :
     XX3Form_AT3_XAB6<59, 99, (outs acc:$AT), (ins acc:$ATi, vsrc:$XA, vsrc:$XB),
@@ -1369,6 +1371,11 @@ let Predicates = [MMA] in {
             (XXMTACC Concats.VecsToVecQuad)>;
   def : Pat<(v256i1 (PPCPairBuild v4i32:$vs1, v4i32:$vs0)),
             Concats.VecsToVecPair0>;
+  def : Pat<(v512i1 (int_ppc_mma_assemble_acc v16i8:$vs1, v16i8:$vs0,
+                                              v16i8:$vs3, v16i8:$vs2)),
+            (XXMTACC Concats.VecsToVecQuad)>;
+  def : Pat<(v256i1 (int_ppc_mma_assemble_pair v16i8:$vs1, v16i8:$vs0)),
+            Concats.VecsToVecPair0>;
   def : Pat<(v512i1 (PPCxxmfacc v512i1:$AS)), (XXMFACC acc:$AS)>;
   def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, (i64 0))),
             Extracts.Vec0>;

diff  --git a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
new file mode 100644
index 000000000000..8e288ee5837d
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
@@ -0,0 +1,250 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names \
+; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names \
+; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
+
+; assemble_acc
+declare <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
+define void @ass_acc(<512 x i1>* %ptr, <16 x i8> %vc) {
+; CHECK-LABEL: ass_acc:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmr v3, v2
+; CHECK-NEXT:    xxlor vs0, v2, v2
+; CHECK-NEXT:    xxlor vs1, v3, v3
+; CHECK-NEXT:    xxlor vs2, v2, v2
+; CHECK-NEXT:    xxlor vs3, v3, v3
+; CHECK-NEXT:    xxmtacc acc0
+; CHECK-NEXT:    xxmfacc acc0
+; CHECK-NEXT:    stxv vs0, 48(r3)
+; CHECK-NEXT:    stxv vs1, 32(r3)
+; CHECK-NEXT:    stxv vs2, 16(r3)
+; CHECK-NEXT:    stxv vs3, 0(r3)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: ass_acc:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    vmr v3, v2
+; CHECK-BE-NEXT:    xxlor vs0, v2, v2
+; CHECK-BE-NEXT:    xxlor vs1, v3, v3
+; CHECK-BE-NEXT:    xxlor vs2, v2, v2
+; CHECK-BE-NEXT:    xxlor vs3, v3, v3
+; CHECK-BE-NEXT:    xxmtacc acc0
+; CHECK-BE-NEXT:    xxmfacc acc0
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    stxv vs3, 48(r3)
+; CHECK-BE-NEXT:    stxv vs2, 32(r3)
+; CHECK-BE-NEXT:    blr
+; CHECK-O0-LABEL: ass_acc:
+; CHECK-O0:       # %bb.0: # %entry
+; CHECK-BE-O0-LABEL: ass_acc:
+; CHECK-BE-O0:       # %bb.0: # %entry
+entry:
+  %0 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc)
+  store <512 x i1> %0, <512 x i1>* %ptr, align 64
+  ret void
+}
+
+; assemble_pair
+declare <256 x i1> @llvm.ppc.mma.assemble.pair(<16 x i8>, <16 x i8>)
+define void @ass_pair(<256 x i1>* %ptr, <16 x i8> %vc) {
+; CHECK-LABEL: ass_pair:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmr v3, v2
+; CHECK-NEXT:    stxv v2, 16(r3)
+; CHECK-NEXT:    stxv v3, 0(r3)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: ass_pair:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    vmr v3, v2
+; CHECK-BE-NEXT:    stxv v2, 16(r3)
+; CHECK-BE-NEXT:    stxv v2, 0(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = tail call <256 x i1> @llvm.ppc.mma.assemble.pair(<16 x i8> %vc, <16 x i8> %vc)
+  store <256 x i1> %0, <256 x i1>* %ptr, align 32
+  ret void
+}
+
+; xxmtacc
+declare <512 x i1> @llvm.ppc.mma.xxmtacc(<512 x i1>)
+define void @int_xxmtacc(<512 x i1>* %ptr, <16 x i8> %vc) {
+; CHECK-LABEL: int_xxmtacc:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmr v3, v2
+; CHECK-NEXT:    xxlor vs0, v2, v2
+; CHECK-NEXT:    xxlor vs1, v3, v3
+; CHECK-NEXT:    xxlor vs2, v2, v2
+; CHECK-NEXT:    xxlor vs3, v3, v3
+; CHECK-NEXT:    xxmtacc acc0
+; CHECK-NEXT:    xxmtacc acc0
+; CHECK-NEXT:    xxmfacc acc0
+; CHECK-NEXT:    stxv vs0, 48(r3)
+; CHECK-NEXT:    stxv vs1, 32(r3)
+; CHECK-NEXT:    stxv vs2, 16(r3)
+; CHECK-NEXT:    stxv vs3, 0(r3)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: int_xxmtacc:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    vmr v3, v2
+; CHECK-BE-NEXT:    xxlor vs0, v2, v2
+; CHECK-BE-NEXT:    xxlor vs1, v3, v3
+; CHECK-BE-NEXT:    xxlor vs2, v2, v2
+; CHECK-BE-NEXT:    xxlor vs3, v3, v3
+; CHECK-BE-NEXT:    xxmtacc acc0
+; CHECK-BE-NEXT:    xxmtacc acc0
+; CHECK-BE-NEXT:    xxmfacc acc0
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    stxv vs3, 48(r3)
+; CHECK-BE-NEXT:    stxv vs2, 32(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+; One xxmtacc is generated from the call to assemble.acc then one xxmtacc is
+; generated from the call to xxmtacc then one xxmfacc is generated for the store
+  %0 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc)
+  %1 = tail call <512 x i1> @llvm.ppc.mma.xxmtacc(<512 x i1> %0)
+  store <512 x i1> %1, <512 x i1>* %ptr, align 64
+  ret void
+}
+
+; xxmfacc
+declare <512 x i1> @llvm.ppc.mma.xxmfacc(<512 x i1>)
+define void @int_xxmfacc(<512 x i1>* %ptr, <16 x i8> %vc) {
+; CHECK-LABEL: int_xxmfacc:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmr v3, v2
+; CHECK-NEXT:    xxlor vs0, v2, v2
+; CHECK-NEXT:    xxlor vs1, v3, v3
+; CHECK-NEXT:    xxlor vs2, v2, v2
+; CHECK-NEXT:    xxlor vs3, v3, v3
+; CHECK-NEXT:    xxmtacc acc0
+; CHECK-NEXT:    xxmfacc acc0
+; CHECK-NEXT:    xxmfacc acc0
+; CHECK-NEXT:    stxv vs0, 48(r3)
+; CHECK-NEXT:    stxv vs1, 32(r3)
+; CHECK-NEXT:    stxv vs2, 16(r3)
+; CHECK-NEXT:    stxv vs3, 0(r3)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: int_xxmfacc:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    vmr v3, v2
+; CHECK-BE-NEXT:    xxlor vs0, v2, v2
+; CHECK-BE-NEXT:    xxlor vs1, v3, v3
+; CHECK-BE-NEXT:    xxlor vs2, v2, v2
+; CHECK-BE-NEXT:    xxlor vs3, v3, v3
+; CHECK-BE-NEXT:    xxmtacc acc0
+; CHECK-BE-NEXT:    xxmfacc acc0
+; CHECK-BE-NEXT:    xxmfacc acc0
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    stxv vs3, 48(r3)
+; CHECK-BE-NEXT:    stxv vs2, 32(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+; One xxmtacc is generated from the call to assemble.acc then one xxmfacc is
+; generated from the call to xxmfacc then one xxmfacc is generated for the store
+  %0 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc)
+  %1 = tail call <512 x i1> @llvm.ppc.mma.xxmfacc(<512 x i1> %0)
+  store <512 x i1> %1, <512 x i1>* %ptr, align 64
+  ret void
+}
+
+; xxsetaccz
+declare <512 x i1> @llvm.ppc.mma.xxsetaccz()
+define void @int_xxsetaccz(<512 x i1>* %ptr) {
+; CHECK-LABEL: int_xxsetaccz:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxsetaccz acc0
+; CHECK-NEXT:    xxmfacc acc0
+; CHECK-NEXT:    stxv vs0, 48(r3)
+; CHECK-NEXT:    stxv vs1, 32(r3)
+; CHECK-NEXT:    stxv vs2, 16(r3)
+; CHECK-NEXT:    stxv vs3, 0(r3)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: int_xxsetaccz:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xxsetaccz acc0
+; CHECK-BE-NEXT:    xxmfacc acc0
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    stxv vs3, 48(r3)
+; CHECK-BE-NEXT:    stxv vs2, 32(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz()
+  store <512 x i1> %0, <512 x i1>* %ptr, align 64
+  ret void
+}
+
+; disassemble_acc
+declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1>)
+define void @disass_acc(<16 x i8>* %ptr1, <16 x i8>* %ptr2, <16 x i8>* %ptr3, <16 x i8>* %ptr4) {
+; CHECK-LABEL: disass_acc:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxsetaccz acc0
+; CHECK-NEXT:    xxmfacc acc0
+; CHECK-NEXT:    stxv vs3, 0(r3)
+; CHECK-NEXT:    stxv vs2, 0(r4)
+; CHECK-NEXT:    stxv vs1, 0(r5)
+; CHECK-NEXT:    stxv vs0, 0(r6)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: disass_acc:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xxsetaccz acc0
+; CHECK-BE-NEXT:    xxmfacc acc0
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    stxv vs1, 0(r4)
+; CHECK-BE-NEXT:    stxv vs2, 0(r5)
+; CHECK-BE-NEXT:    stxv vs3, 0(r6)
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz()
+  %1 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %0)
+  %2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 0
+  %3 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 1
+  %4 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 2
+  %5 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 3
+  store <16 x i8> %2, <16 x i8>* %ptr1, align 16
+  store <16 x i8> %3, <16 x i8>* %ptr2, align 16
+  store <16 x i8> %4, <16 x i8>* %ptr3, align 16
+  store <16 x i8> %5, <16 x i8>* %ptr4, align 16
+  ret void
+}
+
+; disassemble_pair
+declare { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1>)
+define void @disass_pair(<256 x i1>* %ptr1, <16 x i8>* %ptr2, <16 x i8>* %ptr3) {
+; CHECK-LABEL: disass_pair:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lxv vs1, 0(r3)
+; CHECK-NEXT:    lxv vs0, 16(r3)
+; CHECK-NEXT:    stxv vs1, 0(r4)
+; CHECK-NEXT:    stxv vs0, 0(r5)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: disass_pair:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs1, 16(r3)
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    stxv vs0, 0(r4)
+; CHECK-BE-NEXT:    stxv vs1, 0(r5)
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = load <256 x i1>, <256 x i1>* %ptr1, align 32
+  %1 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> %0)
+  %2 = extractvalue { <16 x i8>, <16 x i8> } %1, 0
+  %3 = extractvalue { <16 x i8>, <16 x i8> } %1, 1
+  store <16 x i8> %2, <16 x i8>* %ptr2, align 16
+  store <16 x i8> %3, <16 x i8>* %ptr3, align 16
+  ret void
+}
+

diff  --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp
index 6a8a60d00639..4274a4c14ec7 100644
--- a/llvm/utils/TableGen/IntrinsicEmitter.cpp
+++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp
@@ -247,7 +247,8 @@ enum IIT_Info {
   IIT_VEC_OF_BITCASTS_TO_INT = 46,
   IIT_V128 = 47,
   IIT_BF16 = 48,
-  IIT_STRUCT9 = 49
+  IIT_STRUCT9 = 49,
+  IIT_V256 = 50
 };
 
 static void EncodeFixedValueType(MVT::SimpleValueType VT,
@@ -385,6 +386,7 @@ static void EncodeFixedType(Record *R, std::vector<unsigned char> &ArgCodes,
     case 32: Sig.push_back(IIT_V32); break;
     case 64: Sig.push_back(IIT_V64); break;
     case 128: Sig.push_back(IIT_V128); break;
+    case 256: Sig.push_back(IIT_V256); break;
     case 512: Sig.push_back(IIT_V512); break;
     case 1024: Sig.push_back(IIT_V1024); break;
     }


        


More information about the llvm-commits mailing list