[llvm] 1ac6956 - [PowerPC] Add handling for WACC register spilling.

Stefan Pintilie via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 22 07:37:58 PST 2022


Author: Stefan Pintilie
Date: 2022-11-22T09:37:52-06:00
New Revision: 1ac6956b52c70e00314cac99732d5e3a4ff0b43f

URL: https://github.com/llvm/llvm-project/commit/1ac6956b52c70e00314cac99732d5e3a4ff0b43f
DIFF: https://github.com/llvm/llvm-project/commit/1ac6956b52c70e00314cac99732d5e3a4ff0b43f.diff

LOG: [PowerPC] Add handling for WACC register spilling.

This patch adds spilling for the new WACC registers.

In order to get the spilling test to work the MMA instructions from Power 10 are
now supported for Future CPU except that they are all using the new WACC
registers instead of the ACC registers from Power 10.

Reviewed By: amyk, saghir

Differential Revision: https://reviews.llvm.org/D136728

Added: 
    llvm/test/CodeGen/PowerPC/mmaplus-acc-spill.ll
    llvm/test/CodeGen/PowerPC/mmaplus-intrinsics.ll

Modified: 
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
    llvm/lib/Target/PowerPC/PPCInstrInfo.h
    llvm/lib/Target/PowerPC/PPCInstrInfo.td
    llvm/lib/Target/PowerPC/PPCInstrMMA.td
    llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
    llvm/lib/Target/PowerPC/PPCRegisterInfo.h

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 7d824c520150e..98db9073f06a1 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1306,7 +1306,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
     setOperationAction(ISD::STORE, MVT::v256i1, Custom);
   }
   if (Subtarget.hasMMA()) {
-    addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
+    if (Subtarget.isISAFuture())
+      addRegisterClass(MVT::v512i1, &PPC::WACCRCRegClass);
+    else
+      addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
     setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
     setOperationAction(ISD::STORE, MVT::v512i1, Custom);
     setOperationAction(ISD::BUILD_VECTOR, MVT::v512i1, Custom);
@@ -10490,7 +10493,46 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
       return DAG.getRegister(PPC::X13, MVT::i64);
     return DAG.getRegister(PPC::R2, MVT::i32);
 
-  case Intrinsic::ppc_mma_disassemble_acc:
+  case Intrinsic::ppc_mma_disassemble_acc: {
+    if (Subtarget.isISAFuture()) {
+      EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
+      SDValue WideVec = SDValue(DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl,
+                                                   makeArrayRef(ReturnTypes, 2),
+                                                   Op.getOperand(1)),
+                                0);
+      SmallVector<SDValue, 4> RetOps;
+      SDValue Value = SDValue(WideVec.getNode(), 0);
+      SDValue Value2 = SDValue(WideVec.getNode(), 1);
+
+      SDValue Extract;
+      Extract = DAG.getNode(
+          PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
+          Subtarget.isLittleEndian() ? Value2 : Value,
+          DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
+                          dl, getPointerTy(DAG.getDataLayout())));
+      RetOps.push_back(Extract);
+      Extract = DAG.getNode(
+          PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
+          Subtarget.isLittleEndian() ? Value2 : Value,
+          DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
+                          dl, getPointerTy(DAG.getDataLayout())));
+      RetOps.push_back(Extract);
+      Extract = DAG.getNode(
+          PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
+          Subtarget.isLittleEndian() ? Value : Value2,
+          DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
+                          dl, getPointerTy(DAG.getDataLayout())));
+      RetOps.push_back(Extract);
+      Extract = DAG.getNode(
+          PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
+          Subtarget.isLittleEndian() ? Value : Value2,
+          DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
+                          dl, getPointerTy(DAG.getDataLayout())));
+      RetOps.push_back(Extract);
+      return DAG.getMergeValues(RetOps, dl);
+    }
+    LLVM_FALLTHROUGH;
+  }
   case Intrinsic::ppc_vsx_disassemble_pair: {
     int NumVecs = 2;
     SDValue WideVec = Op.getOperand(1);
@@ -10944,6 +10986,7 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
   SDValue StoreChain = SN->getChain();
   SDValue BasePtr = SN->getBasePtr();
   SDValue Value = SN->getValue();
+  SDValue Value2 = SN->getValue();
   EVT StoreVT = Value.getValueType();
 
   if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
@@ -10960,13 +11003,30 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
   SmallVector<SDValue, 4> Stores;
   unsigned NumVecs = 2;
   if (StoreVT == MVT::v512i1) {
-    Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
+    if (Subtarget.isISAFuture()) {
+      EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
+      MachineSDNode *ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl,
+                          makeArrayRef(ReturnTypes, 2),
+                          Op.getOperand(1));
+
+      Value = SDValue(ExtNode, 0);
+      Value2 = SDValue(ExtNode, 1);
+    } else
+      Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
     NumVecs = 4;
   }
   for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
     unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
-    SDValue Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
-                              DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
+    SDValue Elt;
+    if (Subtarget.isISAFuture()) {
+      VecNum = Subtarget.isLittleEndian() ? 1 - (Idx % 2) : (Idx % 2);
+      Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
+                        Idx > 1 ? Value2 : Value,
+                        DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
+    } else
+      Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
+                        DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
+
     SDValue Store =
         DAG.getStore(StoreChain, dl, Elt, BasePtr,
                      SN->getPointerInfo().getWithOffset(Idx * 16),

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 8e3a6fc3d2927..9cbf28454ff57 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -1116,6 +1116,7 @@ bool PPCInstrInfo::isReallyTriviallyReMaterializable(
   case PPC::CRSET:
   case PPC::CRUNSET:
   case PPC::XXSETACCZ:
+  case PPC::XXSETACCZW:
     return true;
   }
   return false;
@@ -1897,6 +1898,10 @@ unsigned PPCInstrInfo::getSpillIndex(const TargetRegisterClass *RC) const {
     assert(Subtarget.pairedVectorMemops() &&
            "Register unexpected when paired memops are disabled.");
     OpcodeIndex = SOK_UAccumulatorSpill;
+  } else if (PPC::WACCRCRegClass.hasSubClassEq(RC)) {
+    assert(Subtarget.pairedVectorMemops() &&
+           "Register unexpected when paired memops are disabled.");
+    OpcodeIndex = SOK_WAccumulatorSpill;
   } else if (PPC::VSRpRCRegClass.hasSubClassEq(RC)) {
     assert(Subtarget.pairedVectorMemops() &&
            "Register unexpected when paired memops are disabled.");
@@ -3442,7 +3447,9 @@ unsigned PPCInstrInfo::getSpillTarget() const {
   // With P10, we may need to spill paired vector registers or accumulator
   // registers. MMA implies paired vectors, so we can just check that.
   bool IsP10Variant = Subtarget.isISA3_1() || Subtarget.pairedVectorMemops();
-  return IsP10Variant ? 2 : Subtarget.hasP9Vector() ? 1 : 0;
+  return Subtarget.isISAFuture() ? 3 : IsP10Variant ?
+                                   2 : Subtarget.hasP9Vector() ?
+                                   1 : 0;
 }
 
 ArrayRef<unsigned> PPCInstrInfo::getStoreOpcodesForSpillArray() const {

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 3bd4d0ed204b4..ab2abb93ec098 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -130,6 +130,7 @@ enum SpillOpcodeKey {
   SOK_PairedVecSpill,
   SOK_AccumulatorSpill,
   SOK_UAccumulatorSpill,
+  SOK_WAccumulatorSpill,
   SOK_SPESpill,
   SOK_PairedG8Spill,
   SOK_LastOpcodeSpill // This must be last on the enum.
@@ -141,7 +142,7 @@ enum SpillOpcodeKey {
   {                                                                            \
     PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR,                    \
         PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXVD2X, PPC::LXSDX, PPC::LXSSPX,    \
-        PPC::SPILLTOVSR_LD, NoInstr, NoInstr, NoInstr, PPC::EVLDD,             \
+        PPC::SPILLTOVSR_LD, NoInstr, NoInstr, NoInstr, NoInstr, PPC::EVLDD,    \
         PPC::RESTORE_QUADWORD                                                  \
   }
 
@@ -150,7 +151,7 @@ enum SpillOpcodeKey {
     PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR,                    \
         PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXV, PPC::DFLOADf64,                \
         PPC::DFLOADf32, PPC::SPILLTOVSR_LD, NoInstr, NoInstr, NoInstr,         \
-        NoInstr, PPC::RESTORE_QUADWORD                                         \
+        NoInstr, NoInstr, PPC::RESTORE_QUADWORD                                \
   }
 
 #define Pwr10LoadOpcodes                                                       \
@@ -158,14 +159,22 @@ enum SpillOpcodeKey {
     PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR,                    \
         PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXV, PPC::DFLOADf64,                \
         PPC::DFLOADf32, PPC::SPILLTOVSR_LD, PPC::LXVP, PPC::RESTORE_ACC,       \
-        PPC::RESTORE_UACC, NoInstr, PPC::RESTORE_QUADWORD                      \
+        PPC::RESTORE_UACC, NoInstr, NoInstr, PPC::RESTORE_QUADWORD             \
+  }
+
+#define FutureLoadOpcodes                                                      \
+  {                                                                            \
+    PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR,                    \
+        PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXV, PPC::DFLOADf64,                \
+        PPC::DFLOADf32, PPC::SPILLTOVSR_LD, PPC::LXVP, PPC::RESTORE_ACC,       \
+        PPC::RESTORE_UACC, PPC::RESTORE_WACC, NoInstr, PPC::RESTORE_QUADWORD   \
   }
 
 #define Pwr8StoreOpcodes                                                       \
   {                                                                            \
     PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \
         PPC::STVX, PPC::STXVD2X, PPC::STXSDX, PPC::STXSSPX,                    \
-        PPC::SPILLTOVSR_ST, NoInstr, NoInstr, NoInstr, PPC::EVSTDD,            \
+        PPC::SPILLTOVSR_ST, NoInstr, NoInstr, NoInstr, NoInstr, PPC::EVSTDD,   \
         PPC::SPILL_QUADWORD                                                    \
   }
 
@@ -173,7 +182,7 @@ enum SpillOpcodeKey {
   {                                                                            \
     PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \
         PPC::STVX, PPC::STXV, PPC::DFSTOREf64, PPC::DFSTOREf32,                \
-        PPC::SPILLTOVSR_ST, NoInstr, NoInstr, NoInstr, NoInstr,                \
+        PPC::SPILLTOVSR_ST, NoInstr, NoInstr, NoInstr, NoInstr, NoInstr,       \
         PPC::SPILL_QUADWORD                                                    \
   }
 
@@ -182,22 +191,30 @@ enum SpillOpcodeKey {
     PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \
         PPC::STVX, PPC::STXV, PPC::DFSTOREf64, PPC::DFSTOREf32,                \
         PPC::SPILLTOVSR_ST, PPC::STXVP, PPC::SPILL_ACC, PPC::SPILL_UACC,       \
-        NoInstr, PPC::SPILL_QUADWORD                                           \
+        NoInstr, NoInstr, PPC::SPILL_QUADWORD                                  \
+  }
+
+#define FutureStoreOpcodes                                                     \
+  {                                                                            \
+    PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \
+        PPC::STVX, PPC::STXV, PPC::DFSTOREf64, PPC::DFSTOREf32,                \
+        PPC::SPILLTOVSR_ST, PPC::STXVP, PPC::SPILL_ACC, PPC::SPILL_UACC,       \
+        PPC::SPILL_WACC, NoInstr, PPC::SPILL_QUADWORD                          \
   }
 
 // Initialize arrays for load and store spill opcodes on supported subtargets.
 #define StoreOpcodesForSpill                                                   \
-  { Pwr8StoreOpcodes, Pwr9StoreOpcodes, Pwr10StoreOpcodes }
+  { Pwr8StoreOpcodes, Pwr9StoreOpcodes, Pwr10StoreOpcodes, FutureStoreOpcodes }
 #define LoadOpcodesForSpill                                                    \
-  { Pwr8LoadOpcodes, Pwr9LoadOpcodes, Pwr10LoadOpcodes }
+  { Pwr8LoadOpcodes, Pwr9LoadOpcodes, Pwr10LoadOpcodes, FutureLoadOpcodes }
 
 class PPCSubtarget;
 class PPCInstrInfo : public PPCGenInstrInfo {
   PPCSubtarget &Subtarget;
   const PPCRegisterInfo RI;
-  const unsigned StoreSpillOpcodesArray[3][SOK_LastOpcodeSpill] =
+  const unsigned StoreSpillOpcodesArray[4][SOK_LastOpcodeSpill] =
       StoreOpcodesForSpill;
-  const unsigned LoadSpillOpcodesArray[3][SOK_LastOpcodeSpill] =
+  const unsigned LoadSpillOpcodesArray[4][SOK_LastOpcodeSpill] =
       LoadOpcodesForSpill;
 
   void StoreRegToStackSlot(MachineFunction &MF, unsigned SrcReg, bool isKill,

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 8c8891b4b05cd..45a69e2865326 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -711,6 +711,7 @@ def ModernAs: Predicate<"!Subtarget->isAIXABI() || Subtarget->HasModernAIXAs">,
 def IsAIX : Predicate<"Subtarget->isAIXABI()">;
 def NotAIX : Predicate<"!Subtarget->isAIXABI()">;
 def IsISAFuture : Predicate<"Subtarget->isISAFuture()">;
+def IsNotISAFuture : Predicate<"!Subtarget->isISAFuture()">;
 
 //===----------------------------------------------------------------------===//
 // PowerPC Multiclass Definitions.

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrMMA.td b/llvm/lib/Target/PowerPC/PPCInstrMMA.td
index a7e85cda781f5..ad2a294c68d42 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrMMA.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrMMA.td
@@ -14,7 +14,7 @@ def MMA : Predicate<"Subtarget->hasMMA()">;
 // is even/odd.
 multiclass ACC_UM_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
                        string asmstr> {
-  let Predicates = [MMA] in {
+  let Predicates = [MMA, IsNotISAFuture] in {
   def NAME :
     XX3Form_AT3_XAB6<opcode, !or(xo, 0x01), (outs acc:$AT), IOL,
                      !strconcat(asmbase#" ", asmstr), IIC_VecFP, []>,
@@ -24,6 +24,16 @@ multiclass ACC_UM_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
                      !strconcat(asmbase#"pp ", asmstr), IIC_VecFP, []>,
     RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
   }
+  let Predicates = [MMA, IsISAFuture], isCodeGenOnly = 1 in {
+  def NAME#W :
+    XX3Form_AT3_XAB6<opcode, !or(xo, 0x01), (outs wacc:$AT), IOL,
+                     !strconcat(asmbase#" ", asmstr), IIC_VecFP, []>,
+    RegConstraint<"@earlyclobber $AT">;
+  def WPP :
+    XX3Form_AT3_XAB6<opcode, xo, (outs wacc:$AT), !con((ins wacc:$ATi), IOL),
+                     !strconcat(asmbase#"pp ", asmstr), IIC_VecFP, []>,
+    RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+  }
 }
 
 // Defines 4 instructions, masked/unmasked with masks 8, 4, 4 bits.
@@ -31,7 +41,7 @@ multiclass ACC_UM_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
 multiclass ACC_UM_M844_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
                             string asmstr> {
   defm NAME : ACC_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>;
-  let Predicates = [MMA, PrefixInstrs] in {
+  let Predicates = [MMA, PrefixInstrs, IsNotISAFuture] in {
   def PM#NAME :
     MMIRR_XX3Form_XY4P8_XAB6<
       opcode, !or(xo, 0x01), (outs acc:$AT),
@@ -48,6 +58,23 @@ multiclass ACC_UM_M844_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
       IIC_VecFP, []>,
     RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
   }
+  let Predicates = [MMA, PrefixInstrs, IsISAFuture], isCodeGenOnly = 1 in {
+  def PM#NAME#W :
+    MMIRR_XX3Form_XY4P8_XAB6<
+      opcode, !or(xo, 0x01), (outs wacc:$AT),
+      !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u8imm:$PMSK)),
+      !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"),
+      IIC_VecFP, []>,
+    RegConstraint<"@earlyclobber $AT">;
+  def PM#NAME#WPP :
+    MMIRR_XX3Form_XY4P8_XAB6<
+      opcode, xo, (outs wacc:$AT),
+      !con((ins wacc:$ATi),
+           !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u8imm:$PMSK))),
+      !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"),
+      IIC_VecFP, []>,
+    RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+  }
 }
 
 // Defines 4 instructions, masked/unmasked with masks 4, 4, 4 bits.
@@ -55,7 +82,7 @@ multiclass ACC_UM_M844_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
 multiclass ACC_UM_M444_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
                             string asmstr> {
   defm NAME : ACC_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>;
-  let Predicates = [MMA, PrefixInstrs] in {
+  let Predicates = [MMA, PrefixInstrs, IsNotISAFuture] in {
   def PM#NAME :
     MMIRR_XX3Form_XYP4_XAB6<
       opcode, !or(xo, 0x01), (outs acc:$AT),
@@ -72,6 +99,23 @@ multiclass ACC_UM_M444_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
       IIC_VecFP, []>,
     RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
   }
+  let Predicates = [MMA, PrefixInstrs, IsISAFuture], isCodeGenOnly = 1 in {
+  def PM#NAME#W :
+    MMIRR_XX3Form_XYP4_XAB6<
+      opcode, !or(xo, 0x01), (outs wacc:$AT),
+      !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u4imm:$PMSK)),
+      !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"),
+      IIC_VecFP, []>,
+    RegConstraint<"@earlyclobber $AT">;
+  def PM#NAME#WPP :
+    MMIRR_XX3Form_XYP4_XAB6<
+      opcode, xo, (outs wacc:$AT),
+      !con((ins wacc:$ATi),
+           !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u4imm:$PMSK))),
+      !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"),
+      IIC_VecFP, []>,
+    RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+  }
 }
 
 // Defines 4 instructions, masked/unmasked with masks 2, 4, 4 bits.
@@ -79,7 +123,7 @@ multiclass ACC_UM_M444_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
 multiclass ACC_UM_M244_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
                             string asmstr> {
   defm NAME : ACC_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>;
-  let Predicates = [MMA, PrefixInstrs] in {
+  let Predicates = [MMA, PrefixInstrs, IsNotISAFuture] in {
   def PM#NAME :
     MMIRR_XX3Form_XY4P2_XAB6<
       opcode, !or(xo, 0x01), (outs acc:$AT),
@@ -95,13 +139,29 @@ multiclass ACC_UM_M244_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
       IIC_VecFP, []>,
     RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
   }
+  let Predicates = [MMA, PrefixInstrs, IsISAFuture], isCodeGenOnly = 1 in {
+  def PM#NAME#W :
+    MMIRR_XX3Form_XY4P2_XAB6<
+      opcode, !or(xo, 0x01), (outs wacc:$AT),
+      !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK)),
+      !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"),
+      IIC_VecFP, []>,
+    RegConstraint<"@earlyclobber $AT">;
+  def PM#NAME#WPP :
+    MMIRR_XX3Form_XY4P2_XAB6<
+      opcode, xo, (outs wacc:$AT),
+      !con((ins wacc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
+      !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"),
+      IIC_VecFP, []>,
+    RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+  }
 }
 
 // Defines 4 instructions, masked/unmasked with masks 2, 4, 4 bits.
 // Upper nibble of XO field for acc/non-acc version is 0x4/0x6.
 multiclass ACC_UM_M244_XO46<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
                             string asmstr> {
-  let Predicates = [MMA] in {
+  let Predicates = [MMA, IsNotISAFuture] in {
   def NAME :
     XX3Form_AT3_XAB6<opcode, xo, (outs acc:$AT), IOL,
                      !strconcat(asmbase#" ", asmstr), IIC_VecFP, []>,
@@ -112,7 +172,7 @@ multiclass ACC_UM_M244_XO46<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
       !strconcat(asmbase#"pp ", asmstr), IIC_VecFP, []>,
     RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
   }
-  let Predicates = [MMA, PrefixInstrs] in {
+  let Predicates = [MMA, PrefixInstrs, IsNotISAFuture] in {
   def PM#NAME :
     MMIRR_XX3Form_XY4P2_XAB6<
       opcode, xo, (outs acc:$AT),
@@ -129,6 +189,34 @@ multiclass ACC_UM_M244_XO46<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
       IIC_VecFP, []>,
     RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
   }
+  let Predicates = [MMA, IsISAFuture], isCodeGenOnly = 1 in {
+  def NAME#W :
+    XX3Form_AT3_XAB6<opcode, xo, (outs wacc:$AT), IOL,
+                     !strconcat(asmbase#" ", asmstr), IIC_VecFP, []>,
+    RegConstraint<"@earlyclobber $AT">;
+  def WPP :
+    XX3Form_AT3_XAB6<
+      opcode, !or(xo, 0x20), (outs wacc:$AT), !con((ins wacc:$ATi), IOL),
+      !strconcat(asmbase#"pp ", asmstr), IIC_VecFP, []>,
+    RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+  }
+  let Predicates = [MMA, PrefixInstrs, IsISAFuture], isCodeGenOnly = 1 in {
+  def PM#NAME#W :
+    MMIRR_XX3Form_XY4P2_XAB6<
+      opcode, xo, (outs wacc:$AT),
+      !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK)),
+      !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"),
+      IIC_VecFP, []>,
+    RegConstraint<"@earlyclobber $AT">;
+  def PM#NAME#WPP :
+    MMIRR_XX3Form_XY4P2_XAB6<
+      opcode, !or(xo, 0x20), (outs acc:$AT),
+      !con((ins wacc:$ATi),
+           !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
+      !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"),
+      IIC_VecFP, []>,
+    RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+  }
 }
 
 // Defines 10 instructions, operand negating, unmasked, masked with 2, 4, 4
@@ -136,7 +224,7 @@ multiclass ACC_UM_M244_XO46<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
 multiclass ACC_NEG_UM_M244_XOM84C<bits<6> opcode, bits<8> xo, dag IOL,
                                   string asmbase, string asmstr> {
   defm NAME : ACC_UM_M244_XOEO<opcode, xo, IOL, asmbase, asmstr>;
-  let Predicates = [MMA] in {
+  let Predicates = [MMA, IsNotISAFuture] in {
   def PN : XX3Form_AT3_XAB6<
              opcode, !or(xo, 0x80), (outs acc:$AT), !con((ins acc:$ATi), IOL),
              !strconcat(asmbase#"pn ", asmstr), IIC_VecFP, []>,
@@ -150,7 +238,21 @@ multiclass ACC_NEG_UM_M244_XOM84C<bits<6> opcode, bits<8> xo, dag IOL,
              !strconcat(asmbase#"nn ", asmstr), IIC_VecFP, []>,
            RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
   }
-  let Predicates = [MMA, PrefixInstrs] in {
+  let Predicates = [MMA, IsISAFuture], isCodeGenOnly = 1 in {
+  def WPN : XX3Form_AT3_XAB6<
+              opcode, !or(xo, 0x80), (outs wacc:$AT), !con((ins wacc:$ATi), IOL),
+              !strconcat(asmbase#"pn ", asmstr), IIC_VecFP, []>,
+           RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+  def WNP : XX3Form_AT3_XAB6<
+              opcode, !or(xo, 0x40), (outs wacc:$AT), !con((ins wacc:$ATi), IOL),
+              !strconcat(asmbase#"np ", asmstr), IIC_VecFP, []>,
+           RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+  def WNN : XX3Form_AT3_XAB6<
+              opcode, !or(xo, 0xC0), (outs wacc:$AT), !con((ins wacc:$ATi), IOL),
+              !strconcat(asmbase#"nn ", asmstr), IIC_VecFP, []>,
+           RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+  }
+  let Predicates = [MMA, PrefixInstrs, IsNotISAFuture] in {
   def PM#NAME#PN :
     MMIRR_XX3Form_XY4P2_XAB6<
       opcode, !or(xo, 0x80), (outs acc:$AT),
@@ -173,6 +275,29 @@ multiclass ACC_NEG_UM_M244_XOM84C<bits<6> opcode, bits<8> xo, dag IOL,
       IIC_VecFP, []>,
     RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
   }
+  let Predicates = [MMA, PrefixInstrs, IsISAFuture], isCodeGenOnly = 1 in {
+  def PM#NAME#WPN :
+    MMIRR_XX3Form_XY4P2_XAB6<
+      opcode, !or(xo, 0x80), (outs wacc:$AT),
+      !con((ins wacc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
+      !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK, $PMSK"),
+      IIC_VecFP, []>,
+    RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+  def PM#NAME#WNP :
+    MMIRR_XX3Form_XY4P2_XAB6<
+      opcode, !or(xo, 0x40), (outs wacc:$AT),
+      !con((ins wacc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
+      !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK, $PMSK"),
+      IIC_VecFP, []>,
+    RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+  def PM#NAME#WNN :
+    MMIRR_XX3Form_XY4P2_XAB6<
+      opcode, !or(xo, 0xC0), (outs wacc:$AT),
+      !con((ins wacc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
+      !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK, $PMSK"),
+      IIC_VecFP, []>,
+    RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+  }
 }
 
 // Defines 5 instructions, unmasked, operand negating.
@@ -180,7 +305,7 @@ multiclass ACC_NEG_UM_M244_XOM84C<bits<6> opcode, bits<8> xo, dag IOL,
 multiclass ACC_NEG_UM_XOM84C<bits<6> opcode, bits<8> xo, dag IOL,
                              string asmbase, string asmstr> {
   defm NAME : ACC_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>;
-  let Predicates = [MMA] in {
+  let Predicates = [MMA, IsNotISAFuture] in {
   def PN : XX3Form_AT3_XAB6<opcode, !or(xo, 0x80), (outs acc:$AT),
                             !con((ins acc:$ATi), IOL),
                             !strconcat(asmbase#"pn ", asmstr), IIC_VecFP, []>,
@@ -194,6 +319,20 @@ multiclass ACC_NEG_UM_XOM84C<bits<6> opcode, bits<8> xo, dag IOL,
                             !strconcat(asmbase#"nn ", asmstr), IIC_VecFP, []>,
            RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
   }
+  let Predicates = [MMA, IsISAFuture], isCodeGenOnly = 1 in {
+  def WPN : XX3Form_AT3_XAB6<opcode, !or(xo, 0x80), (outs wacc:$AT),
+                            !con((ins wacc:$ATi), IOL),
+                            !strconcat(asmbase#"pn ", asmstr), IIC_VecFP, []>,
+           RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+  def WNP : XX3Form_AT3_XAB6<opcode, !or(xo, 0x40), (outs wacc:$AT),
+                            !con((ins wacc:$ATi), IOL),
+                            !strconcat(asmbase#"np ", asmstr), IIC_VecFP, []>,
+           RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+  def WNN : XX3Form_AT3_XAB6<opcode, !or(xo, 0xC0), (outs wacc:$AT),
+                            !con((ins wacc:$ATi), IOL),
+                            !strconcat(asmbase#"nn ", asmstr), IIC_VecFP, []>,
+           RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+  }
 }
 
 // Defines 10 instructions, operand negating, unmasked, masked with 4, 4 bits.
@@ -201,7 +340,7 @@ multiclass ACC_NEG_UM_XOM84C<bits<6> opcode, bits<8> xo, dag IOL,
 multiclass ACC_NEG_UM_M44_XOM84C<bits<6> opcode, bits<8> xo, dag IOL,
                                  string asmbase, string asmstr> {
   defm NAME : ACC_NEG_UM_XOM84C<opcode, xo, IOL, asmbase, asmstr>;
-  let Predicates = [MMA, PrefixInstrs] in {
+  let Predicates = [MMA, PrefixInstrs, IsNotISAFuture] in {
   def PM#NAME :
     MMIRR_XX3Form_XY4_XAB6<
       opcode, !or(xo, 0x01), (outs acc:$AT),
@@ -238,6 +377,43 @@ multiclass ACC_NEG_UM_M44_XOM84C<bits<6> opcode, bits<8> xo, dag IOL,
       IIC_VecFP, []>,
     RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
   }
+  let Predicates = [MMA, PrefixInstrs, IsISAFuture], isCodeGenOnly = 1 in {
+  def PM#NAME#W :
+    MMIRR_XX3Form_XY4_XAB6<
+      opcode, !or(xo, 0x01), (outs wacc:$AT),
+      !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK)),
+      !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK"),
+      IIC_VecFP, []>,
+    RegConstraint<"@earlyclobber $AT">;
+  def PM#NAME#WPP :
+    MMIRR_XX3Form_XY4_XAB6<
+      opcode, xo, (outs wacc:$AT),
+      !con((ins wacc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))),
+      !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK"),
+      IIC_VecFP, []>,
+    RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+  def PM#NAME#WPN :
+    MMIRR_XX3Form_XY4_XAB6<
+      opcode, !or(xo, 0x80), (outs wacc:$AT),
+      !con((ins wacc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))),
+      !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK"),
+      IIC_VecFP, []>,
+    RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+  def PM#NAME#WNP :
+    MMIRR_XX3Form_XY4_XAB6<
+      opcode, !or(xo, 0x40), (outs wacc:$AT),
+      !con((ins wacc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))),
+      !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK"),
+      IIC_VecFP, []>,
+    RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+  def PM#NAME#WNN :
+    MMIRR_XX3Form_XY4_XAB6<
+      opcode, !or(xo, 0xC0), (outs wacc:$AT),
+      !con((ins wacc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))),
+      !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK"),
+      IIC_VecFP, []>,
+    RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+  }
 }
 
 // Defines 10 instructions, operand negating, unmasked, masked with 4, 2 bits.
@@ -245,7 +421,7 @@ multiclass ACC_NEG_UM_M44_XOM84C<bits<6> opcode, bits<8> xo, dag IOL,
 multiclass ACC_NEG_UM_M42_XOM84C<bits<6> opcode, bits<8> xo, dag IOL,
                                  string asmbase, string asmstr> {
   defm NAME : ACC_NEG_UM_XOM84C<opcode, xo, IOL, asmbase, asmstr>;
-  let Predicates = [MMA, PrefixInstrs] in {
+  let Predicates = [MMA, PrefixInstrs, IsNotISAFuture] in {
   def PM#NAME :
     MMIRR_XX3Form_X4Y2_XAB6<
       opcode, !or(xo, 0x01), (outs acc:$AT),
@@ -282,12 +458,49 @@ multiclass ACC_NEG_UM_M42_XOM84C<bits<6> opcode, bits<8> xo, dag IOL,
       IIC_VecFP, []>,
     RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
   }
+  let Predicates = [MMA, PrefixInstrs, IsISAFuture], isCodeGenOnly = 1 in {
+  def PM#NAME#W :
+    MMIRR_XX3Form_X4Y2_XAB6<
+      opcode, !or(xo, 0x01), (outs wacc:$AT),
+      !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK)),
+      !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK"),
+      IIC_VecFP, []>,
+    RegConstraint<"@earlyclobber $AT">;
+  def PM#NAME#WPP :
+    MMIRR_XX3Form_X4Y2_XAB6<
+      opcode, xo, (outs wacc:$AT),
+      !con((ins wacc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))),
+      !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK"),
+      IIC_VecFP, []>,
+    RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+  def PM#NAME#WPN :
+    MMIRR_XX3Form_X4Y2_XAB6<
+      opcode, !or(xo, 0x80), (outs wacc:$AT),
+      !con((ins wacc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))),
+      !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK"),
+      IIC_VecFP, []>,
+    RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+  def PM#NAME#WNP :
+    MMIRR_XX3Form_X4Y2_XAB6<
+      opcode, !or(xo, 0x40), (outs wacc:$AT),
+      !con((ins wacc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))),
+      !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK"),
+      IIC_VecFP, []>,
+    RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+  def PM#NAME#WNN :
+    MMIRR_XX3Form_X4Y2_XAB6<
+      opcode, !or(xo, 0xC0), (outs wacc:$AT),
+      !con((ins wacc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))),
+      !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK"),
+      IIC_VecFP, []>,
+    RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+  }
 }
 
 // End of class definitions.
 //-----------------------------------------------------------------------------
 
-let Predicates = [MMA] in {
+let Predicates = [MMA, IsNotISAFuture] in {
   def XXMFACC :
     XForm_AT3<31, 0, 177, (outs acc:$ASo), (ins acc:$AS), "xxmfacc $AS",
               IIC_VecGeneral,
@@ -329,7 +542,45 @@ let Predicates = [MMA] in {
   }
 }
 
-let Predicates = [MMA, PrefixInstrs] in {
+let Predicates = [MMA, IsISAFuture], isCodeGenOnly = 1 in {
+  // For Future and up XXMFACCW and XXMTACCW will not have patterns.
+  // On Future CPU the wacc registers no longer overlap with the vsr registers
+  // and so register allocation would have to know to match 4 vsr registers
+  // with one wacc register.
+  // On top of that Future CPU has a more convenient way to move between vsrs
+  // and wacc registers using xxextfdmr512 and xxinstdmr512.
+  def XXMFACCW :
+    XForm_AT3<31, 0, 177, (outs wacc:$ASo), (ins wacc:$AS), "xxmfacc $AS",
+              IIC_VecGeneral, []>,
+              RegConstraint<"$ASo = $AS">, NoEncode<"$ASo">;
+  def XXMTACCW :
+    XForm_AT3<31, 1, 177, (outs wacc:$AT), (ins wacc:$ATi), "xxmtacc $AT",
+              IIC_VecGeneral, []>,
+              RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+
+  let isAsCheapAsAMove = 1, isReMaterializable = 1 in {
+    def XXSETACCZW :
+      XForm_AT3<31, 3, 177, (outs wacc:$AT), (ins), "xxsetaccz $AT",
+                IIC_VecGeneral, [(set v512i1:$AT, (int_ppc_mma_xxsetaccz))]>;
+  }
+
+  def XVI8GER4WSPP :
+    XX3Form_AT3_XAB6<59, 99, (outs wacc:$AT),
+                     (ins wacc:$ATi, vsrc:$XA, vsrc:$XB),
+                     "xvi8ger4spp $AT, $XA, $XB", IIC_VecGeneral, []>,
+                     RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+
+  let mayStore = 1 in {
+    def SPILL_WACC: PPCEmitTimePseudo<(outs), (ins wacc:$AT, memrix16:$dst),
+                                      "#SPILL_WACC", []>;
+  }
+  let mayLoad = 1, hasSideEffects = 0 in {
+    def RESTORE_WACC: PPCEmitTimePseudo<(outs wacc:$AT), (ins memrix16:$src),
+                                        "#RESTORE_WACC", []>;
+  }
+}
+
+let Predicates = [MMA, PrefixInstrs, IsNotISAFuture] in {
   def PMXVI8GER4SPP :
     MMIRR_XX3Form_XYP4_XAB6<59, 99, (outs acc:$AT),
                             (ins acc:$ATi, vsrc:$XA,vsrc:$XB, u4imm:$XMSK,
@@ -339,6 +590,16 @@ let Predicates = [MMA, PrefixInstrs] in {
     RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
 }
 
+let Predicates = [MMA, PrefixInstrs, IsISAFuture], isCodeGenOnly = 1 in {
+  def PMXVI8GER4WSPP :
+    MMIRR_XX3Form_XYP4_XAB6<59, 99, (outs wacc:$AT),
+                            (ins wacc:$ATi, vsrc:$XA,vsrc:$XB, u4imm:$XMSK,
+                             u4imm:$YMSK, u4imm:$PMSK),
+                            "pmxvi8ger4spp $AT, $XA, $XB, $XMSK, $YMSK, $PMSK",
+                            IIC_VecGeneral, []>,
+    RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+}
+
 // MMA accumulating/non-accumulating instructions.
 //------------------------------------------------------------------------------
 
@@ -380,7 +641,7 @@ defm XVF64GER : ACC_NEG_UM_M42_XOM84C<59, 58, (ins vsrpevenrc:$XA, vsrc:$XB),
 //------------------------------------------------------------------------------
 
 // MMA Intrinsics
-let Predicates = [MMA] in {
+let Predicates = [MMA, IsNotISAFuture] in {
   def : Pat<(v512i1 (int_ppc_mma_xvi4ger8 v16i8:$XA, v16i8:$XB)),
             (XVI4GER8 RCCp.AToVSRC, RCCp.BToVSRC)>;
   def : Pat<(v512i1 (int_ppc_mma_xvi4ger8pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
@@ -395,7 +656,26 @@ let Predicates = [MMA] in {
             (XVI16GER2S RCCp.AToVSRC, RCCp.BToVSRC)>;
   def : Pat<(v512i1 (int_ppc_mma_xvi16ger2spp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
             (XVI16GER2SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+}
 
+let Predicates = [MMA, IsISAFuture] in {
+  def : Pat<(v512i1 (int_ppc_mma_xvi4ger8 v16i8:$XA, v16i8:$XB)),
+            (XVI4GER8W RCCp.AToVSRC, RCCp.BToVSRC)>;
+  def : Pat<(v512i1 (int_ppc_mma_xvi4ger8pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+            (XVI4GER8WPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+
+  def : Pat<(v512i1 (int_ppc_mma_xvi8ger4 v16i8:$XA, v16i8:$XB)),
+            (XVI8GER4W RCCp.AToVSRC, RCCp.BToVSRC)>;
+  def : Pat<(v512i1 (int_ppc_mma_xvi8ger4pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+            (XVI8GER4WPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+
+  def : Pat<(v512i1 (int_ppc_mma_xvi16ger2s v16i8:$XA, v16i8:$XB)),
+            (XVI16GER2SW RCCp.AToVSRC, RCCp.BToVSRC)>;
+  def : Pat<(v512i1 (int_ppc_mma_xvi16ger2spp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+            (XVI16GER2SWPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+}
+
+let Predicates = [MMA, IsNotISAFuture] in {
   def : Pat<(v512i1 (int_ppc_mma_xvf16ger2 v16i8:$XA, v16i8:$XB)),
             (XVF16GER2 RCCp.AToVSRC, RCCp.BToVSRC)>;
   def : Pat<(v512i1 (int_ppc_mma_xvf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
@@ -406,7 +686,22 @@ let Predicates = [MMA] in {
             (XVF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
   def : Pat<(v512i1 (int_ppc_mma_xvf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
             (XVF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+}
 
+let Predicates = [MMA, IsISAFuture] in {
+  def : Pat<(v512i1 (int_ppc_mma_xvf16ger2 v16i8:$XA, v16i8:$XB)),
+            (XVF16GER2W RCCp.AToVSRC, RCCp.BToVSRC)>;
+  def : Pat<(v512i1 (int_ppc_mma_xvf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+            (XVF16GER2WPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+  def : Pat<(v512i1 (int_ppc_mma_xvf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+            (XVF16GER2WPN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+  def : Pat<(v512i1 (int_ppc_mma_xvf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+            (XVF16GER2WNP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+  def : Pat<(v512i1 (int_ppc_mma_xvf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+            (XVF16GER2WNN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+}
+
+let Predicates = [MMA, IsNotISAFuture] in {
   def : Pat<(v512i1 (int_ppc_mma_xvf32ger v16i8:$XA, v16i8:$XB)),
             (XVF32GER RCCp.AToVSRC, RCCp.BToVSRC)>;
   def : Pat<(v512i1 (int_ppc_mma_xvf32gerpp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
@@ -446,8 +741,48 @@ let Predicates = [MMA] in {
             (XVI8GER4SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
 }
 
+let Predicates = [MMA, IsISAFuture] in {
+  def : Pat<(v512i1 (int_ppc_mma_xvf32ger v16i8:$XA, v16i8:$XB)),
+            (XVF32GERW RCCp.AToVSRC, RCCp.BToVSRC)>;
+  def : Pat<(v512i1 (int_ppc_mma_xvf32gerpp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+            (XVF32GERWPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+  def : Pat<(v512i1 (int_ppc_mma_xvf32gerpn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+            (XVF32GERWPN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+  def : Pat<(v512i1 (int_ppc_mma_xvf32gernp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+            (XVF32GERWNP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+  def : Pat<(v512i1 (int_ppc_mma_xvf32gernn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+            (XVF32GERWNN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+  def : Pat<(v512i1 (int_ppc_mma_xvf64ger v256i1:$XA, v16i8:$XB)),
+            (XVF64GERW $XA, RCCp.BToVSRC)>;
+  def : Pat<(v512i1 (int_ppc_mma_xvf64gerpp v512i1:$ATi, v256i1:$XA, v16i8:$XB)),
+            (XVF64GERWPP $ATi, $XA, RCCp.BToVSRC)>;
+  def : Pat<(v512i1 (int_ppc_mma_xvf64gerpn v512i1:$ATi, v256i1:$XA, v16i8:$XB)),
+            (XVF64GERWPN $ATi, $XA, RCCp.BToVSRC)>;
+  def : Pat<(v512i1 (int_ppc_mma_xvf64gernp v512i1:$ATi, v256i1:$XA, v16i8:$XB)),
+            (XVF64GERNP $ATi, $XA, RCCp.BToVSRC)>;
+  def : Pat<(v512i1 (int_ppc_mma_xvf64gernn v512i1:$ATi, v256i1:$XA, v16i8:$XB)),
+            (XVF64GERWNN $ATi, $XA, RCCp.BToVSRC)>;
+
+  def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2 v16i8:$XA, v16i8:$XB)),
+            (XVBF16GER2W RCCp.AToVSRC, RCCp.BToVSRC)>;
+  def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+            (XVBF16GER2WPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+  def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+            (XVBF16GER2WPN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+  def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+            (XVBF16GER2WNP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+  def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+            (XVBF16GER2WNN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+  def : Pat<(v512i1 (int_ppc_mma_xvi16ger2 v16i8:$XA, v16i8:$XB)),
+            (XVI16GER2W RCCp.AToVSRC, RCCp.BToVSRC)>;
+  def : Pat<(v512i1 (int_ppc_mma_xvi16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+            (XVI16GER2WPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+  def : Pat<(v512i1 (int_ppc_mma_xvi8ger4spp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+            (XVI8GER4WSPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+}
 // MMA Intrinsics
-let Predicates = [MMA, PrefixInstrs] in {
+
+let Predicates = [MMA, PrefixInstrs, IsNotISAFuture] in {
   def : Pat<(v512i1 (int_ppc_mma_pmxvi4ger8 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
                                             Msk4Imm:$YMSK, Msk8Imm:$PMSK)),
             (PMXVI4GER8 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
@@ -583,6 +918,142 @@ let Predicates = [MMA, PrefixInstrs] in {
                            Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
 }
 
+let Predicates = [MMA, PrefixInstrs, IsISAFuture] in {
+  def : Pat<(v512i1 (int_ppc_mma_pmxvi4ger8 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
+                                            Msk4Imm:$YMSK, Msk8Imm:$PMSK)),
+            (PMXVI4GER8W RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+                        Msk4Imm:$YMSK, Msk8Imm:$PMSK)>;
+  def : Pat<(v512i1 (int_ppc_mma_pmxvi4ger8pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+                                              Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+                                              Msk8Imm:$PMSK)),
+            (PMXVI4GER8WPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+                          Msk4Imm:$YMSK, Msk8Imm:$PMSK)>;
+
+  def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
+                                            Msk4Imm:$YMSK, Msk4Imm:$PMSK)),
+            (PMXVI8GER4W RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+                        Msk4Imm:$YMSK, Msk4Imm:$PMSK)>;
+  def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+                                              Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+                                              Msk4Imm:$PMSK)),
+            (PMXVI8GER4WPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+                          Msk4Imm:$YMSK, Msk4Imm:$PMSK)>;
+
+  def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2s v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
+                                              Msk4Imm:$YMSK, Msk2Imm:$PMSK)),
+            (PMXVI16GER2SW RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+                          Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+  def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2spp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+                                                Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+                                                Msk2Imm:$PMSK)),
+            (PMXVI16GER2SWPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+                            Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+  def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
+                                             Msk4Imm:$YMSK, Msk2Imm:$PMSK)),
+            (PMXVF16GER2W RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+                         Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+  def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+                                               Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+                                               Msk2Imm:$PMSK)),
+            (PMXVF16GER2WPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+                           Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+  def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+                                               Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+                                               Msk2Imm:$PMSK)),
+            (PMXVF16GER2WPN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+                           Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+  def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+                                               Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+                                               Msk2Imm:$PMSK)),
+            (PMXVF16GER2WNP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+                           Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+  def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+                                               Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+                                               Msk2Imm:$PMSK)),
+            (PMXVF16GER2WNN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+                           Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+
+  def : Pat<(v512i1 (int_ppc_mma_pmxvf32ger v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
+                                            Msk4Imm:$YMSK)),
+            (PMXVF32GERW RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+                        Msk4Imm:$YMSK)>;
+  def : Pat<(v512i1 (int_ppc_mma_pmxvf32gerpp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+                                              Msk4Imm:$XMSK, Msk4Imm:$YMSK)),
+            (PMXVF32GERWPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+                          Msk4Imm:$YMSK)>;
+  def : Pat<(v512i1 (int_ppc_mma_pmxvf32gerpn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+                                              Msk4Imm:$XMSK, Msk4Imm:$YMSK)),
+            (PMXVF32GERWPN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+                          Msk4Imm:$YMSK)>;
+  def : Pat<(v512i1 (int_ppc_mma_pmxvf32gernp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+                                              Msk4Imm:$XMSK, Msk4Imm:$YMSK)),
+            (PMXVF32GERWNP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+                          Msk4Imm:$YMSK)>;
+  def : Pat<(v512i1 (int_ppc_mma_pmxvf32gernn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+                                              Msk4Imm:$XMSK, Msk4Imm:$YMSK)),
+            (PMXVF32GERWNN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+                          Msk4Imm:$YMSK)>;
+
+  def : Pat<(v512i1 (int_ppc_mma_pmxvf64ger v256i1:$XA, v16i8:$XB, Msk4Imm:$XMSK,
+                                            Msk2Imm:$YMSK)),
+            (PMXVF64GERW $XA, RCCp.BToVSRC, Msk4Imm:$XMSK, Msk2Imm:$YMSK)>;
+  def : Pat<(v512i1 (int_ppc_mma_pmxvf64gerpp v512i1:$ATi, v256i1:$XA, v16i8:$XB,
+                                              Msk4Imm:$XMSK, Msk2Imm:$YMSK)),
+            (PMXVF64GERWPP $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK,
+                          Msk2Imm:$YMSK)>;
+  def : Pat<(v512i1 (int_ppc_mma_pmxvf64gerpn v512i1:$ATi, v256i1:$XA, v16i8:$XB,
+                                              Msk4Imm:$XMSK, Msk2Imm:$YMSK)),
+            (PMXVF64GERWPN $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK,
+                          Msk2Imm:$YMSK)>;
+  def : Pat<(v512i1 (int_ppc_mma_pmxvf64gernp v512i1:$ATi, v256i1:$XA, v16i8:$XB,
+                                              Msk4Imm:$XMSK, Msk2Imm:$YMSK)),
+            (PMXVF64GERWNP $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK,
+                          Msk2Imm:$YMSK)>;
+  def : Pat<(v512i1 (int_ppc_mma_pmxvf64gernn v512i1:$ATi, v256i1:$XA, v16i8:$XB,
+                                              Msk4Imm:$XMSK, Msk2Imm:$YMSK)),
+            (PMXVF64GERWNN $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK,
+                          Msk2Imm:$YMSK)>;
+
+  def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
+                                              Msk4Imm:$YMSK, Msk2Imm:$PMSK)),
+            (PMXVBF16GER2W RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+                          Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+  def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+                                                Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+                                                Msk2Imm:$PMSK)),
+            (PMXVBF16GER2WPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+                            Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+  def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+                                                Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+                                                Msk2Imm:$PMSK)),
+            (PMXVBF16GER2WPN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+                            Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+  def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+                                                Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+                                                Msk2Imm:$PMSK)),
+            (PMXVBF16GER2WNP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+                            Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+  def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+                                                Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+                                                Msk2Imm:$PMSK)),
+            (PMXVBF16GER2WNN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+                            Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+  def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
+                                             Msk4Imm:$YMSK, Msk2Imm:$PMSK)),
+            (PMXVI16GER2W RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+                         Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+  def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4spp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+                                               Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+                                               Msk2Imm:$PMSK)),
+            (PMXVI8GER4WSPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+                           Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+  def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+                                               Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+                                               Msk2Imm:$PMSK)),
+            (PMXVI16GER2WPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+                           Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+}
+
 def ConcatsMMA {
   dag VecsToVecPair0 =
     (v256i1 (INSERT_SUBREG
@@ -608,7 +1079,7 @@ def Extracts {
   dag Vec3 = (v4i32 (EXTRACT_SUBREG Pair1, sub_vsx1));
 }
 
-let Predicates = [MMA] in {
+let Predicates = [MMA, IsNotISAFuture] in {
   def : Pat<(v512i1 (PPCAccBuild v4i32:$vs1, v4i32:$vs0, v4i32:$vs3, v4i32:$vs2)),
             (XXMTACC ConcatsMMA.VecsToVecQuad)>;
   def : Pat<(v512i1 (int_ppc_mma_assemble_acc v16i8:$vs1, v16i8:$vs0,
@@ -625,4 +1096,11 @@ let Predicates = [MMA] in {
             Extracts.Vec3>;
 }
 
-
+let Predicates = [MMA, IsISAFuture] in {
+  def : Pat<(v512i1 (PPCAccBuild v4i32:$vs1, v4i32:$vs0, v4i32:$vs3, v4i32:$vs2)),
+            (DMXXINSTFDMR512 ConcatsMMA.VecsToVecPair0, ConcatsMMA.VecsToVecPair1)>;
+  def : Pat<(v512i1 (int_ppc_mma_assemble_acc v16i8:$vs1, v16i8:$vs0,
+                                              v16i8:$vs3, v16i8:$vs2)),
+            (DMXXINSTFDMR512 ConcatsMMA.VecsToVecPair0, ConcatsMMA.VecsToVecPair1)>;
+  def : Pat<(v512i1 immAllZerosV), (XXSETACCZW)>;
+}

diff  --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index a94378a9b1cd7..eaa55af535074 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -575,6 +575,13 @@ bool PPCRegisterInfo::getRegAllocationHints(Register VirtReg,
   // as we are just looking to provide a hint.
   bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints(
       VirtReg, Order, Hints, MF, VRM, Matrix);
+
+  // Don't use the allocation hints for ISAFuture.
+  // The WACC registers used in ISAFuture are unlike the ACC registers on
+  // Power 10 and so this logic to register allocation hints does not apply.
+  if (MF.getSubtarget<PPCSubtarget>().isISAFuture())
+    return BaseImplRetVal;
+
   // We are interested in instructions that copy values to ACC/UACC.
   // The copy into UACC will be simply a COPY to a subreg so we
   // want to allocate the corresponding physical subreg for the source.
@@ -1234,6 +1241,11 @@ static void spillRegPairs(MachineBasicBlock &MBB,
                           unsigned FrameIndex, bool IsLittleEndian,
                           bool IsKilled, bool TwoPairs) {
   unsigned Offset = 0;
+  // The register arithmetic in this function does not support virtual
+  // registers.
+  assert(!SrcReg.isVirtual() &&
+         "Spilling register pairs does not support virtual registers.");
+
   if (TwoPairs)
     Offset = IsLittleEndian ? 48 : 0;
   else
@@ -1281,6 +1293,18 @@ void PPCRegisterInfo::lowerOctWordSpilling(MachineBasicBlock::iterator II,
   MBB.erase(II);
 }
 
+static void emitWAccSpillRestoreInfo(MachineBasicBlock &MBB, bool IsRestore) {
+#ifdef NDEBUG
+  return;
+#else
+  if (ReportAccMoves) {
+    dbgs() << "Emitting wacc register " << (IsRestore ? "restore" : "spill")
+           << ":\n";
+    MBB.dump();
+  }
+#endif
+}
+
 /// lowerACCSpilling - Generate the code for spilling the accumulator register.
 /// Similarly to other spills/reloads that use pseudo-ops, we do not actually
 /// eliminate the FrameIndex here nor compute the stack offset. We simply
@@ -1362,6 +1386,73 @@ void PPCRegisterInfo::lowerACCRestore(MachineBasicBlock::iterator II,
   MBB.erase(II);
 }
 
+/// lowerWACCSpilling - Generate the code for spilling the wide accumulator
+/// register.
+void PPCRegisterInfo::lowerWACCSpilling(MachineBasicBlock::iterator II,
+                                        unsigned FrameIndex) const {
+  MachineInstr &MI = *II; // SPILL_WACC <SrcReg>, <offset>
+  MachineBasicBlock &MBB = *MI.getParent();
+  MachineFunction &MF = *MBB.getParent();
+  const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+  const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+  DebugLoc DL = MI.getDebugLoc();
+  bool IsLittleEndian = Subtarget.isLittleEndian();
+
+  emitWAccSpillRestoreInfo(MBB, false);
+
+  const TargetRegisterClass *RC = &PPC::VSRpRCRegClass;
+  Register VSRpReg0 = MF.getRegInfo().createVirtualRegister(RC);
+  Register VSRpReg1 = MF.getRegInfo().createVirtualRegister(RC);
+  Register SrcReg = MI.getOperand(0).getReg();
+
+  BuildMI(MBB, II, DL, TII.get(PPC::DMXXEXTFDMR512), VSRpReg0)
+      .addDef(VSRpReg1)
+      .addReg(SrcReg);
+
+  addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP))
+                        .addReg(VSRpReg0, RegState::Kill),
+                    FrameIndex, IsLittleEndian ? 32 : 0);
+  addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP))
+                        .addReg(VSRpReg1, RegState::Kill),
+                    FrameIndex, IsLittleEndian ? 0 : 32);
+
+  // Discard the pseudo instruction.
+  MBB.erase(II);
+}
+
+/// lowerWACCRestore - Generate the code to restore the wide accumulator
+/// register.
+void PPCRegisterInfo::lowerWACCRestore(MachineBasicBlock::iterator II,
+                                       unsigned FrameIndex) const {
+  MachineInstr &MI = *II; // <DestReg> = RESTORE_WACC <offset>
+  MachineBasicBlock &MBB = *MI.getParent();
+  MachineFunction &MF = *MBB.getParent();
+  const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+  const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+  DebugLoc DL = MI.getDebugLoc();
+  bool IsLittleEndian = Subtarget.isLittleEndian();
+
+  emitWAccSpillRestoreInfo(MBB, true);
+
+  const TargetRegisterClass *RC = &PPC::VSRpRCRegClass;
+  Register VSRpReg0 = MF.getRegInfo().createVirtualRegister(RC);
+  Register VSRpReg1 = MF.getRegInfo().createVirtualRegister(RC);
+  Register DestReg = MI.getOperand(0).getReg();
+
+  addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), VSRpReg0),
+                    FrameIndex, IsLittleEndian ? 32 : 0);
+  addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), VSRpReg1),
+                    FrameIndex, IsLittleEndian ? 0 : 32);
+
+  // Kill VSRpReg0, VSRpReg1   (killedRegState::Killed)
+  BuildMI(MBB, II, DL, TII.get(PPC::DMXXINSTFDMR512), DestReg)
+      .addReg(VSRpReg0, RegState::Kill)
+      .addReg(VSRpReg1, RegState::Kill);
+
+  // Discard the pseudo instruction.
+  MBB.erase(II);
+}
+
 /// lowerQuadwordSpilling - Generate code to spill paired general register.
 void PPCRegisterInfo::lowerQuadwordSpilling(MachineBasicBlock::iterator II,
                                             unsigned FrameIndex) const {
@@ -1562,6 +1653,12 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   } else if (OpC == PPC::STXVP && DisableAutoPairedVecSt) {
     lowerOctWordSpilling(II, FrameIndex);
     return true;
+  } else if (OpC == PPC::SPILL_WACC) {
+    lowerWACCSpilling(II, FrameIndex);
+    return true;
+  } else if (OpC == PPC::RESTORE_WACC) {
+    lowerWACCRestore(II, FrameIndex);
+    return true;
   } else if (OpC == PPC::SPILL_QUADWORD) {
     lowerQuadwordSpilling(II, FrameIndex);
     return true;

diff  --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
index 069163fed8764..781f6255dc72b 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -137,6 +137,11 @@ class PPCRegisterInfo : public PPCGenRegisterInfo {
   void lowerACCRestore(MachineBasicBlock::iterator II,
                        unsigned FrameIndex) const;
 
+  void lowerWACCSpilling(MachineBasicBlock::iterator II,
+                         unsigned FrameIndex) const;
+  void lowerWACCRestore(MachineBasicBlock::iterator II,
+                        unsigned FrameIndex) const;
+
   void lowerQuadwordSpilling(MachineBasicBlock::iterator II,
                              unsigned FrameIndex) const;
   void lowerQuadwordRestore(MachineBasicBlock::iterator II,

diff  --git a/llvm/test/CodeGen/PowerPC/mmaplus-acc-spill.ll b/llvm/test/CodeGen/PowerPC/mmaplus-acc-spill.ll
new file mode 100644
index 0000000000000..be395b468a17c
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/mmaplus-acc-spill.ll
@@ -0,0 +1,116 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; This test is a copy of mma-acc-spill.ll except that it uses mcpu=future.
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -disable-auto-paired-vec-st=false \
+; RUN:   -mcpu=future -ppc-asm-full-reg-names \
+; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -disable-auto-paired-vec-st=false \
+; RUN:   -mcpu=future -ppc-asm-full-reg-names \
+; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
+
+declare <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1>, <16 x i8>, <16 x i8>)
+declare <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
+declare void @foo()
+define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i8> %vc4, ptr %ptr) {
+; CHECK-LABEL: intrinsics1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mflr r0
+; CHECK-NEXT:    std r0, 16(r1)
+; CHECK-NEXT:    stdu r1, -176(r1)
+; CHECK-NEXT:    .cfi_def_cfa_offset 176
+; CHECK-NEXT:    .cfi_offset lr, 16
+; CHECK-NEXT:    .cfi_offset r30, -16
+; CHECK-NEXT:    .cfi_offset v28, -80
+; CHECK-NEXT:    .cfi_offset v29, -64
+; CHECK-NEXT:    .cfi_offset v30, -48
+; CHECK-NEXT:    .cfi_offset v31, -32
+; CHECK-NEXT:    stxv v28, 96(r1) # 16-byte Folded Spill
+; CHECK-NEXT:    stxv v29, 112(r1) # 16-byte Folded Spill
+; CHECK-NEXT:    stxv v30, 128(r1) # 16-byte Folded Spill
+; CHECK-NEXT:    stxv v31, 144(r1) # 16-byte Folded Spill
+; CHECK-NEXT:    vmr v31, v5
+; CHECK-NEXT:    vmr v30, v4
+; CHECK-NEXT:    vmr v29, v3
+; CHECK-NEXT:    vmr v28, v2
+; CHECK-NEXT:    std r30, 160(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    ld r30, 272(r1)
+; CHECK-NEXT:    dmxxinstfdmr512 wacc0, vsp60, vsp62, 0
+; CHECK-NEXT:    xvf16ger2pp wacc0, v2, v4
+; CHECK-NEXT:    dmxxextfdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-NEXT:    stxvp vsp36, 64(r1)
+; CHECK-NEXT:    stxvp vsp34, 32(r1)
+; CHECK-NEXT:    bl foo at notoc
+; CHECK-NEXT:    lxvp vsp34, 64(r1)
+; CHECK-NEXT:    lxvp vsp36, 32(r1)
+; CHECK-NEXT:    dmxxinstfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-NEXT:    xvf16ger2pp wacc0, v28, v30
+; CHECK-NEXT:    dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-NEXT:    stxv v4, 48(r30)
+; CHECK-NEXT:    stxv v5, 32(r30)
+; CHECK-NEXT:    stxv v2, 16(r30)
+; CHECK-NEXT:    stxv v3, 0(r30)
+; CHECK-NEXT:    lxv v31, 144(r1) # 16-byte Folded Reload
+; CHECK-NEXT:    lxv v30, 128(r1) # 16-byte Folded Reload
+; CHECK-NEXT:    lxv v29, 112(r1) # 16-byte Folded Reload
+; CHECK-NEXT:    lxv v28, 96(r1) # 16-byte Folded Reload
+; CHECK-NEXT:    ld r30, 160(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    addi r1, r1, 176
+; CHECK-NEXT:    ld r0, 16(r1)
+; CHECK-NEXT:    mtlr r0
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: intrinsics1:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    mflr r0
+; CHECK-BE-NEXT:    std r0, 16(r1)
+; CHECK-BE-NEXT:    stdu r1, -256(r1)
+; CHECK-BE-NEXT:    .cfi_def_cfa_offset 256
+; CHECK-BE-NEXT:    .cfi_offset lr, 16
+; CHECK-BE-NEXT:    .cfi_offset r30, -16
+; CHECK-BE-NEXT:    .cfi_offset v28, -80
+; CHECK-BE-NEXT:    .cfi_offset v29, -64
+; CHECK-BE-NEXT:    .cfi_offset v30, -48
+; CHECK-BE-NEXT:    .cfi_offset v31, -32
+; CHECK-BE-NEXT:    stxv v28, 176(r1) # 16-byte Folded Spill
+; CHECK-BE-NEXT:    stxv v29, 192(r1) # 16-byte Folded Spill
+; CHECK-BE-NEXT:    stxv v30, 208(r1) # 16-byte Folded Spill
+; CHECK-BE-NEXT:    stxv v31, 224(r1) # 16-byte Folded Spill
+; CHECK-BE-NEXT:    vmr v31, v5
+; CHECK-BE-NEXT:    vmr v30, v4
+; CHECK-BE-NEXT:    vmr v29, v3
+; CHECK-BE-NEXT:    vmr v28, v2
+; CHECK-BE-NEXT:    std r30, 240(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    ld r30, 368(r1)
+; CHECK-BE-NEXT:    dmxxinstfdmr512 wacc0, vsp60, vsp62, 0
+; CHECK-BE-NEXT:    xvf16ger2pp wacc0, v2, v4
+; CHECK-BE-NEXT:    dmxxextfdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-BE-NEXT:    stxvp vsp36, 112(r1)
+; CHECK-BE-NEXT:    stxvp vsp34, 144(r1)
+; CHECK-BE-NEXT:    bl foo
+; CHECK-BE-NEXT:    nop
+; CHECK-BE-NEXT:    lxvp vsp34, 112(r1)
+; CHECK-BE-NEXT:    lxvp vsp36, 144(r1)
+; CHECK-BE-NEXT:    dmxxinstfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-BE-NEXT:    xvf16ger2pp wacc0, v28, v30
+; CHECK-BE-NEXT:    dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-BE-NEXT:    stxv v5, 48(r30)
+; CHECK-BE-NEXT:    stxv v4, 32(r30)
+; CHECK-BE-NEXT:    stxv v3, 16(r30)
+; CHECK-BE-NEXT:    stxv v2, 0(r30)
+; CHECK-BE-NEXT:    lxv v31, 224(r1) # 16-byte Folded Reload
+; CHECK-BE-NEXT:    lxv v30, 208(r1) # 16-byte Folded Reload
+; CHECK-BE-NEXT:    lxv v29, 192(r1) # 16-byte Folded Reload
+; CHECK-BE-NEXT:    lxv v28, 176(r1) # 16-byte Folded Reload
+; CHECK-BE-NEXT:    ld r30, 240(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    addi r1, r1, 256
+; CHECK-BE-NEXT:    ld r0, 16(r1)
+; CHECK-BE-NEXT:    mtlr r0
+; CHECK-BE-NEXT:    blr
+  %1 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i8> %vc4)
+  %2 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1> %1, <16 x i8> %vc1, <16 x i8> %vc3)
+  tail call void @foo()
+  %3 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1> %2, <16 x i8> %vc1, <16 x i8> %vc3)
+  store <512 x i1> %3, ptr %ptr, align 64
+  ret void
+}

diff  --git a/llvm/test/CodeGen/PowerPC/mmaplus-intrinsics.ll b/llvm/test/CodeGen/PowerPC/mmaplus-intrinsics.ll
new file mode 100644
index 0000000000000..9dbac7e2f3739
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/mmaplus-intrinsics.ll
@@ -0,0 +1,386 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; This test is a copy of mma-intrinsics.ll except that it uses mcpu=future.
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=future -ppc-asm-full-reg-names \
+; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=future -ppc-asm-full-reg-names \
+; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=future -ppc-asm-full-reg-names \
+; RUN:   -ppc-vsr-nums-as-vr -O0 < %s | FileCheck %s --check-prefix=CHECK-O0
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=future -ppc-asm-full-reg-names \
+; RUN:   -ppc-vsr-nums-as-vr -O0 < %s | FileCheck %s --check-prefix=CHECK-O0-BE
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-aix- \
+; RUN:   -mcpu=future -vec-extabi \
+; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-AIX64
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-aix- \
+; RUN:   -mcpu=future -vec-extabi \
+; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-AIX32
+
+; TODO: This test is missing some of the tests from mma-intrinsics.ll because
+;       those tests do not work for mcpu=future. Once the fixes are in they
+;       should be added back to this file.
+
+; assemble_acc
+declare <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
+define void @ass_acc(ptr %ptr, <16 x i8> %vc) {
+; CHECK-LABEL: ass_acc:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmr v3, v2
+; CHECK-NEXT:    dmxxinstfdmr512 wacc0, vsp34, vsp34, 0
+; CHECK-NEXT:    dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-NEXT:    stxv v4, 48(r3)
+; CHECK-NEXT:    stxv v5, 32(r3)
+; CHECK-NEXT:    stxv v2, 16(r3)
+; CHECK-NEXT:    stxv v3, 0(r3)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: ass_acc:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    vmr v3, v2
+; CHECK-BE-NEXT:    dmxxinstfdmr512 wacc0, vsp34, vsp34, 0
+; CHECK-BE-NEXT:    dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-BE-NEXT:    stxv v5, 48(r3)
+; CHECK-BE-NEXT:    stxv v4, 32(r3)
+; CHECK-BE-NEXT:    stxv v3, 16(r3)
+; CHECK-BE-NEXT:    stxv v2, 0(r3)
+; CHECK-BE-NEXT:    blr
+;
+; CHECK-O0-LABEL: ass_acc:
+; CHECK-O0:       # %bb.0: # %entry
+; CHECK-O0-NEXT:    vmr v4, v2
+; CHECK-O0-NEXT:    # implicit-def: $vsrp17
+; CHECK-O0-NEXT:    vmr v3, v4
+; CHECK-O0-NEXT:    vmr v2, v4
+; CHECK-O0-NEXT:    dmxxinstfdmr512 wacc0, vsp34, vsp34, 0
+; CHECK-O0-NEXT:    dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-O0-NEXT:    xxlor vs0, v4, v4
+; CHECK-O0-NEXT:    stxv vs0, 48(r3)
+; CHECK-O0-NEXT:    xxlor vs0, v5, v5
+; CHECK-O0-NEXT:    stxv vs0, 32(r3)
+; CHECK-O0-NEXT:    xxlor vs0, v2, v2
+; CHECK-O0-NEXT:    stxv vs0, 16(r3)
+; CHECK-O0-NEXT:    xxlor vs0, v3, v3
+; CHECK-O0-NEXT:    stxv vs0, 0(r3)
+; CHECK-O0-NEXT:    blr
+;
+; CHECK-O0-BE-LABEL: ass_acc:
+; CHECK-O0-BE:       # %bb.0: # %entry
+; CHECK-O0-BE-NEXT:    vmr v4, v2
+; CHECK-O0-BE-NEXT:    # implicit-def: $vsrp17
+; CHECK-O0-BE-NEXT:    vmr v3, v4
+; CHECK-O0-BE-NEXT:    vmr v2, v4
+; CHECK-O0-BE-NEXT:    dmxxinstfdmr512 wacc0, vsp34, vsp34, 0
+; CHECK-O0-BE-NEXT:    dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-O0-BE-NEXT:    xxlor vs0, v5, v5
+; CHECK-O0-BE-NEXT:    stxv vs0, 48(r3)
+; CHECK-O0-BE-NEXT:    xxlor vs0, v4, v4
+; CHECK-O0-BE-NEXT:    stxv vs0, 32(r3)
+; CHECK-O0-BE-NEXT:    xxlor vs0, v3, v3
+; CHECK-O0-BE-NEXT:    stxv vs0, 16(r3)
+; CHECK-O0-BE-NEXT:    xxlor vs0, v2, v2
+; CHECK-O0-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-O0-BE-NEXT:    blr
+;
+; CHECK-AIX64-LABEL: ass_acc:
+; CHECK-AIX64:       # %bb.0: # %entry
+; CHECK-AIX64-NEXT:    vmr 3, 2
+; CHECK-AIX64-NEXT:    dmxxinstfdmr512 0, 34, 34, 0
+; CHECK-AIX64-NEXT:    dmxxextfdmr512 0, 34, 36, 0
+; CHECK-AIX64-NEXT:    stxv 5, 48(3)
+; CHECK-AIX64-NEXT:    stxv 4, 32(3)
+; CHECK-AIX64-NEXT:    stxv 3, 16(3)
+; CHECK-AIX64-NEXT:    stxv 2, 0(3)
+; CHECK-AIX64-NEXT:    blr
+;
+; CHECK-AIX32-LABEL: ass_acc:
+; CHECK-AIX32:       # %bb.0: # %entry
+; CHECK-AIX32-NEXT:    vmr 3, 2
+; CHECK-AIX32-NEXT:    dmxxinstfdmr512 0, 34, 34, 0
+; CHECK-AIX32-NEXT:    dmxxextfdmr512 0, 34, 36, 0
+; CHECK-AIX32-NEXT:    stxv 5, 48(3)
+; CHECK-AIX32-NEXT:    stxv 4, 32(3)
+; CHECK-AIX32-NEXT:    stxv 3, 16(3)
+; CHECK-AIX32-NEXT:    stxv 2, 0(3)
+; CHECK-AIX32-NEXT:    blr
+entry:
+  %0 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc)
+  store <512 x i1> %0, ptr %ptr, align 64
+  ret void
+}
+
+; xxsetaccz
+declare <512 x i1> @llvm.ppc.mma.xxsetaccz()
+define void @int_xxsetaccz(ptr %ptr) {
+; CHECK-LABEL: int_xxsetaccz:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxsetaccz wacc0
+; CHECK-NEXT:    dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-NEXT:    stxv v4, 48(r3)
+; CHECK-NEXT:    stxv v5, 32(r3)
+; CHECK-NEXT:    stxv v2, 16(r3)
+; CHECK-NEXT:    stxv v3, 0(r3)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: int_xxsetaccz:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xxsetaccz wacc0
+; CHECK-BE-NEXT:    dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-BE-NEXT:    stxv v5, 48(r3)
+; CHECK-BE-NEXT:    stxv v4, 32(r3)
+; CHECK-BE-NEXT:    stxv v3, 16(r3)
+; CHECK-BE-NEXT:    stxv v2, 0(r3)
+; CHECK-BE-NEXT:    blr
+;
+; CHECK-O0-LABEL: int_xxsetaccz:
+; CHECK-O0:       # %bb.0: # %entry
+; CHECK-O0-NEXT:    xxsetaccz wacc0
+; CHECK-O0-NEXT:    dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-O0-NEXT:    xxlor vs0, v4, v4
+; CHECK-O0-NEXT:    stxv vs0, 48(r3)
+; CHECK-O0-NEXT:    xxlor vs0, v5, v5
+; CHECK-O0-NEXT:    stxv vs0, 32(r3)
+; CHECK-O0-NEXT:    xxlor vs0, v2, v2
+; CHECK-O0-NEXT:    stxv vs0, 16(r3)
+; CHECK-O0-NEXT:    xxlor vs0, v3, v3
+; CHECK-O0-NEXT:    stxv vs0, 0(r3)
+; CHECK-O0-NEXT:    blr
+;
+; CHECK-O0-BE-LABEL: int_xxsetaccz:
+; CHECK-O0-BE:       # %bb.0: # %entry
+; CHECK-O0-BE-NEXT:    xxsetaccz wacc0
+; CHECK-O0-BE-NEXT:    dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-O0-BE-NEXT:    xxlor vs0, v5, v5
+; CHECK-O0-BE-NEXT:    stxv vs0, 48(r3)
+; CHECK-O0-BE-NEXT:    xxlor vs0, v4, v4
+; CHECK-O0-BE-NEXT:    stxv vs0, 32(r3)
+; CHECK-O0-BE-NEXT:    xxlor vs0, v3, v3
+; CHECK-O0-BE-NEXT:    stxv vs0, 16(r3)
+; CHECK-O0-BE-NEXT:    xxlor vs0, v2, v2
+; CHECK-O0-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-O0-BE-NEXT:    blr
+;
+; CHECK-AIX64-LABEL: int_xxsetaccz:
+; CHECK-AIX64:       # %bb.0: # %entry
+; CHECK-AIX64-NEXT:    xxsetaccz 0
+; CHECK-AIX64-NEXT:    dmxxextfdmr512 0, 34, 36, 0
+; CHECK-AIX64-NEXT:    stxv 5, 48(3)
+; CHECK-AIX64-NEXT:    stxv 4, 32(3)
+; CHECK-AIX64-NEXT:    stxv 3, 16(3)
+; CHECK-AIX64-NEXT:    stxv 2, 0(3)
+; CHECK-AIX64-NEXT:    blr
+;
+; CHECK-AIX32-LABEL: int_xxsetaccz:
+; CHECK-AIX32:       # %bb.0: # %entry
+; CHECK-AIX32-NEXT:    xxsetaccz 0
+; CHECK-AIX32-NEXT:    dmxxextfdmr512 0, 34, 36, 0
+; CHECK-AIX32-NEXT:    stxv 5, 48(3)
+; CHECK-AIX32-NEXT:    stxv 4, 32(3)
+; CHECK-AIX32-NEXT:    stxv 3, 16(3)
+; CHECK-AIX32-NEXT:    stxv 2, 0(3)
+; CHECK-AIX32-NEXT:    blr
+entry:
+  %0 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz()
+  store <512 x i1> %0, ptr %ptr, align 64
+  ret void
+}
+
+; disassemble_acc
+declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1>)
+define void @disass_acc(ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr %ptr4) {
+; CHECK-LABEL: disass_acc:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxsetaccz wacc0
+; CHECK-NEXT:    dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-NEXT:    stxv v5, 0(r3)
+; CHECK-NEXT:    stxv v4, 0(r4)
+; CHECK-NEXT:    stxv v3, 0(r5)
+; CHECK-NEXT:    stxv v2, 0(r6)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: disass_acc:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xxsetaccz wacc0
+; CHECK-BE-NEXT:    dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-BE-NEXT:    stxv v2, 0(r3)
+; CHECK-BE-NEXT:    stxv v3, 0(r4)
+; CHECK-BE-NEXT:    stxv v4, 0(r5)
+; CHECK-BE-NEXT:    stxv v5, 0(r6)
+; CHECK-BE-NEXT:    blr
+;
+; CHECK-O0-LABEL: disass_acc:
+; CHECK-O0:       # %bb.0: # %entry
+; CHECK-O0-NEXT:    xxsetaccz wacc0
+; CHECK-O0-NEXT:    dmxxextfdmr512 wacc0, vsp32, vsp36, 0
+; CHECK-O0-NEXT:    vmr v2, v0
+; CHECK-O0-NEXT:    xxlor vs0, v1, v1
+; CHECK-O0-NEXT:    xxlor vs1, v4, v4
+; CHECK-O0-NEXT:    xxlor vs2, v5, v5
+; CHECK-O0-NEXT:    stxv vs2, 0(r3)
+; CHECK-O0-NEXT:    stxv vs1, 0(r4)
+; CHECK-O0-NEXT:    stxv vs0, 0(r5)
+; CHECK-O0-NEXT:    stxv v2, 0(r6)
+; CHECK-O0-NEXT:    blr
+;
+; CHECK-O0-BE-LABEL: disass_acc:
+; CHECK-O0-BE:       # %bb.0: # %entry
+; CHECK-O0-BE-NEXT:    xxsetaccz wacc0
+; CHECK-O0-BE-NEXT:    dmxxextfdmr512 wacc0, vsp36, vsp32, 0
+; CHECK-O0-BE-NEXT:    vmr v2, v1
+; CHECK-O0-BE-NEXT:    xxlor vs0, v0, v0
+; CHECK-O0-BE-NEXT:    xxlor vs1, v5, v5
+; CHECK-O0-BE-NEXT:    xxlor vs2, v4, v4
+; CHECK-O0-BE-NEXT:    stxv vs2, 0(r3)
+; CHECK-O0-BE-NEXT:    stxv vs1, 0(r4)
+; CHECK-O0-BE-NEXT:    stxv vs0, 0(r5)
+; CHECK-O0-BE-NEXT:    stxv v2, 0(r6)
+; CHECK-O0-BE-NEXT:    blr
+;
+; CHECK-AIX64-LABEL: disass_acc:
+; CHECK-AIX64:       # %bb.0: # %entry
+; CHECK-AIX64-NEXT:    xxsetaccz 0
+; CHECK-AIX64-NEXT:    dmxxextfdmr512 0, 34, 36, 0
+; CHECK-AIX64-NEXT:    stxv 2, 0(3)
+; CHECK-AIX64-NEXT:    stxv 3, 0(4)
+; CHECK-AIX64-NEXT:    stxv 4, 0(5)
+; CHECK-AIX64-NEXT:    stxv 5, 0(6)
+; CHECK-AIX64-NEXT:    blr
+;
+; CHECK-AIX32-LABEL: disass_acc:
+; CHECK-AIX32:       # %bb.0: # %entry
+; CHECK-AIX32-NEXT:    xxsetaccz 0
+; CHECK-AIX32-NEXT:    dmxxextfdmr512 0, 34, 36, 0
+; CHECK-AIX32-NEXT:    stxv 2, 0(3)
+; CHECK-AIX32-NEXT:    stxv 3, 0(4)
+; CHECK-AIX32-NEXT:    stxv 4, 0(5)
+; CHECK-AIX32-NEXT:    stxv 5, 0(6)
+; CHECK-AIX32-NEXT:    blr
+entry:
+  %0 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz()
+  %1 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %0)
+  %2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 0
+  %3 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 1
+  %4 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 2
+  %5 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 3
+  store <16 x i8> %2, ptr %ptr1, align 16
+  store <16 x i8> %3, ptr %ptr2, align 16
+  store <16 x i8> %4, ptr %ptr3, align 16
+  store <16 x i8> %5, ptr %ptr4, align 16
+  ret void
+}
+
+declare <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1>, <16 x i8>, <16 x i8>)
+declare <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1>, <16 x i8>, <16 x i8>)
+declare <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1>, <16 x i8>, <16 x i8>)
+
+define void @testcse(ptr %res, <16 x i8> %vc) {
+; CHECK-LABEL: testcse:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxsetaccz wacc0
+; CHECK-NEXT:    xvf32gerpp wacc0, v2, v2
+; CHECK-NEXT:    dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-NEXT:    stxv v4, 48(r3)
+; CHECK-NEXT:    stxv v5, 32(r3)
+; CHECK-NEXT:    stxv v2, 16(r3)
+; CHECK-NEXT:    stxv v3, 0(r3)
+; CHECK-NEXT:    stxv v4, 112(r3)
+; CHECK-NEXT:    stxv v5, 96(r3)
+; CHECK-NEXT:    stxv v2, 80(r3)
+; CHECK-NEXT:    stxv v3, 64(r3)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: testcse:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xxsetaccz wacc0
+; CHECK-BE-NEXT:    xvf32gerpp wacc0, v2, v2
+; CHECK-BE-NEXT:    dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-BE-NEXT:    stxv v5, 48(r3)
+; CHECK-BE-NEXT:    stxv v4, 32(r3)
+; CHECK-BE-NEXT:    stxv v3, 16(r3)
+; CHECK-BE-NEXT:    stxv v2, 0(r3)
+; CHECK-BE-NEXT:    stxv v5, 112(r3)
+; CHECK-BE-NEXT:    stxv v4, 96(r3)
+; CHECK-BE-NEXT:    stxv v3, 80(r3)
+; CHECK-BE-NEXT:    stxv v2, 64(r3)
+; CHECK-BE-NEXT:    blr
+;
+; CHECK-O0-LABEL: testcse:
+; CHECK-O0:       # %bb.0: # %entry
+; CHECK-O0-NEXT:    xxsetaccz wacc0
+; CHECK-O0-NEXT:    xvf32gerpp wacc0, v2, v2
+; CHECK-O0-NEXT:    dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-O0-NEXT:    xxlor vs3, v4, v4
+; CHECK-O0-NEXT:    stxv vs3, 48(r3)
+; CHECK-O0-NEXT:    xxlor vs2, v5, v5
+; CHECK-O0-NEXT:    stxv vs2, 32(r3)
+; CHECK-O0-NEXT:    xxlor vs1, v2, v2
+; CHECK-O0-NEXT:    stxv vs1, 16(r3)
+; CHECK-O0-NEXT:    xxlor vs0, v3, v3
+; CHECK-O0-NEXT:    stxv vs0, 0(r3)
+; CHECK-O0-NEXT:    stxv vs3, 112(r3)
+; CHECK-O0-NEXT:    stxv vs2, 96(r3)
+; CHECK-O0-NEXT:    stxv vs1, 80(r3)
+; CHECK-O0-NEXT:    stxv vs0, 64(r3)
+; CHECK-O0-NEXT:    blr
+;
+; CHECK-O0-BE-LABEL: testcse:
+; CHECK-O0-BE:       # %bb.0: # %entry
+; CHECK-O0-BE-NEXT:    xxsetaccz wacc0
+; CHECK-O0-BE-NEXT:    xvf32gerpp wacc0, v2, v2
+; CHECK-O0-BE-NEXT:    dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-O0-BE-NEXT:    xxlor vs3, v5, v5
+; CHECK-O0-BE-NEXT:    stxv vs3, 48(r3)
+; CHECK-O0-BE-NEXT:    xxlor vs2, v4, v4
+; CHECK-O0-BE-NEXT:    stxv vs2, 32(r3)
+; CHECK-O0-BE-NEXT:    xxlor vs1, v3, v3
+; CHECK-O0-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-O0-BE-NEXT:    xxlor vs0, v2, v2
+; CHECK-O0-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-O0-BE-NEXT:    stxv vs3, 112(r3)
+; CHECK-O0-BE-NEXT:    stxv vs2, 96(r3)
+; CHECK-O0-BE-NEXT:    stxv vs1, 80(r3)
+; CHECK-O0-BE-NEXT:    stxv vs0, 64(r3)
+; CHECK-O0-BE-NEXT:    blr
+;
+; CHECK-AIX64-LABEL: testcse:
+; CHECK-AIX64:       # %bb.0: # %entry
+; CHECK-AIX64-NEXT:    xxsetaccz 0
+; CHECK-AIX64-NEXT:    xvf32gerpp 0, 2, 2
+; CHECK-AIX64-NEXT:    dmxxextfdmr512 0, 34, 36, 0
+; CHECK-AIX64-NEXT:    stxv 5, 48(3)
+; CHECK-AIX64-NEXT:    stxv 4, 32(3)
+; CHECK-AIX64-NEXT:    stxv 3, 16(3)
+; CHECK-AIX64-NEXT:    stxv 2, 0(3)
+; CHECK-AIX64-NEXT:    stxv 5, 112(3)
+; CHECK-AIX64-NEXT:    stxv 4, 96(3)
+; CHECK-AIX64-NEXT:    stxv 3, 80(3)
+; CHECK-AIX64-NEXT:    stxv 2, 64(3)
+; CHECK-AIX64-NEXT:    blr
+;
+; CHECK-AIX32-LABEL: testcse:
+; CHECK-AIX32:       # %bb.0: # %entry
+; CHECK-AIX32-NEXT:    xxsetaccz 0
+; CHECK-AIX32-NEXT:    xvf32gerpp 0, 2, 2
+; CHECK-AIX32-NEXT:    dmxxextfdmr512 0, 34, 36, 0
+; CHECK-AIX32-NEXT:    stxv 5, 48(3)
+; CHECK-AIX32-NEXT:    stxv 4, 32(3)
+; CHECK-AIX32-NEXT:    stxv 3, 16(3)
+; CHECK-AIX32-NEXT:    stxv 2, 0(3)
+; CHECK-AIX32-NEXT:    stxv 5, 112(3)
+; CHECK-AIX32-NEXT:    stxv 4, 96(3)
+; CHECK-AIX32-NEXT:    stxv 3, 80(3)
+; CHECK-AIX32-NEXT:    stxv 2, 64(3)
+; CHECK-AIX32-NEXT:    blr
+entry:
+  %0 = call <512 x i1> @llvm.ppc.mma.xxsetaccz()
+  %1 = call <512 x i1> @llvm.ppc.mma.xxsetaccz()
+  %2 = call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc)
+  %3 = call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc)
+  %4 = getelementptr inbounds <512 x i1>, ptr %res, i64 0
+  %5 = getelementptr inbounds <512 x i1>, ptr %res, i64 1
+  store <512 x i1> %2, ptr %4, align 64
+  store <512 x i1> %3, ptr %5, align 64
+  ret void
+}


        


More information about the llvm-commits mailing list