[llvm] aded4f0 - [X86-64] Support Intel AMX instructions

Xiang1 Zhang via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 1 17:57:43 PDT 2020


Author: Xiang1 Zhang
Date: 2020-07-02T08:57:04+08:00
New Revision: aded4f0cc070fcef6763c9a3c2ba764d652b692e

URL: https://github.com/llvm/llvm-project/commit/aded4f0cc070fcef6763c9a3c2ba764d652b692e
DIFF: https://github.com/llvm/llvm-project/commit/aded4f0cc070fcef6763c9a3c2ba764d652b692e.diff

LOG: [X86-64] Support Intel AMX instructions

Summary:
INTEL ADVANCED MATRIX EXTENSIONS (AMX).
AMX is a new programming paradigm, it has a set of 2-dimensional registers
(TILES) representing sub-arrays from a larger 2-dimensional memory image and
operate on TILES.

Spec can be found in Chapter 3 here https://software.intel.com/content/www/us/en/develop/download/intel-architecture-instruction-set-extensions-programming-reference.html

Reviewers: LuoYuanke, annita.zhang, pengfei, RKSimon, xiangzhangllvm

Reviewed By: xiangzhangllvm

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D82705

Added: 
    llvm/lib/Target/X86/X86InstrAMX.td
    llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-bf16-att.txt
    llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-bf16-intel.txt
    llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-error.txt
    llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-int8-att.txt
    llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-int8-intel.txt
    llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-tile-att.txt
    llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-tile-intel.txt
    llvm/test/MC/X86/AMX/x86-64-amx-bf16-att.s
    llvm/test/MC/X86/AMX/x86-64-amx-bf16-intel.s
    llvm/test/MC/X86/AMX/x86-64-amx-error.s
    llvm/test/MC/X86/AMX/x86-64-amx-int8-att.s
    llvm/test/MC/X86/AMX/x86-64-amx-int8-intel.s
    llvm/test/MC/X86/AMX/x86-64-amx-tile-att.s
    llvm/test/MC/X86/AMX/x86-64-amx-tile-intel.s

Modified: 
    llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h
    llvm/lib/Support/Host.cpp
    llvm/lib/Target/X86/AsmParser/X86Operand.h
    llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
    llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
    llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
    llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
    llvm/lib/Target/X86/X86.td
    llvm/lib/Target/X86/X86InstrFormats.td
    llvm/lib/Target/X86/X86InstrInfo.td
    llvm/lib/Target/X86/X86RegisterInfo.td
    llvm/lib/Target/X86/X86Subtarget.h
    llvm/test/CodeGen/X86/ipra-reg-usage.ll
    llvm/tools/llvm-exegesis/lib/X86/Target.cpp
    llvm/utils/TableGen/X86DisassemblerTables.cpp
    llvm/utils/TableGen/X86ModRMFilters.cpp
    llvm/utils/TableGen/X86ModRMFilters.h
    llvm/utils/TableGen/X86RecognizableInstr.cpp
    llvm/utils/TableGen/X86RecognizableInstr.h

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h b/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h
index 8e5311054e24..b03b0dd1a87f 100644
--- a/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h
+++ b/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h
@@ -323,6 +323,7 @@ typedef uint16_t InstrUID;
   ENUM_ENTRY(MODRM_SPLITRM)   \
   ENUM_ENTRY(MODRM_SPLITMISC)  \
   ENUM_ENTRY(MODRM_SPLITREG)  \
+  ENUM_ENTRY(MODRM_SPLITREGM) \
   ENUM_ENTRY(MODRM_FULL)
 
 #define ENUM_ENTRY(n) n,
@@ -361,6 +362,7 @@ enum ModRMDecisionType {
   ENUM_ENTRY(ENCODING_RM_CD16,"R/M operand with CDisp scaling of 16")          \
   ENUM_ENTRY(ENCODING_RM_CD32,"R/M operand with CDisp scaling of 32")          \
   ENUM_ENTRY(ENCODING_RM_CD64,"R/M operand with CDisp scaling of 64")          \
+  ENUM_ENTRY(ENCODING_SIB,      "Force SIB operand in ModR/M byte.")           \
   ENUM_ENTRY(ENCODING_VSIB,     "VSIB operand in ModR/M byte.")                \
   ENUM_ENTRY(ENCODING_VSIB_CD2, "VSIB operand with CDisp scaling of 2")        \
   ENUM_ENTRY(ENCODING_VSIB_CD4, "VSIB operand with CDisp scaling of 4")        \
@@ -411,6 +413,7 @@ enum OperandEncoding {
   ENUM_ENTRY(TYPE_IMM,        "immediate operand")                             \
   ENUM_ENTRY(TYPE_UIMM8,      "1-byte unsigned immediate operand")             \
   ENUM_ENTRY(TYPE_M,          "Memory operand")                                \
+  ENUM_ENTRY(TYPE_MSIB,       "Memory operand force sib encoding")             \
   ENUM_ENTRY(TYPE_MVSIBX,     "Memory operand using XMM index")                \
   ENUM_ENTRY(TYPE_MVSIBY,     "Memory operand using YMM index")                \
   ENUM_ENTRY(TYPE_MVSIBZ,     "Memory operand using ZMM index")                \
@@ -424,6 +427,7 @@ enum OperandEncoding {
   ENUM_ENTRY(TYPE_ZMM,        "64-byte")                                       \
   ENUM_ENTRY(TYPE_VK,         "mask register")                                 \
   ENUM_ENTRY(TYPE_VK_PAIR,    "mask register pair")                            \
+  ENUM_ENTRY(TYPE_TMM,        "tile")                                          \
   ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand")                      \
   ENUM_ENTRY(TYPE_DEBUGREG,   "Debug register operand")                        \
   ENUM_ENTRY(TYPE_CONTROLREG, "Control register operand")                      \

diff  --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp
index e4c6b879884a..3db39e79c3e2 100644
--- a/llvm/lib/Support/Host.cpp
+++ b/llvm/lib/Support/Host.cpp
@@ -1392,8 +1392,8 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
   // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
   // indicates that the AVX registers will be saved and restored on context
   // switch, then we have full AVX support.
-  bool HasAVXSave = ((ECX >> 27) & 1) && ((ECX >> 28) & 1) &&
-                    !getX86XCR0(&EAX, &EDX) && ((EAX & 0x6) == 0x6);
+  bool HasXSave = ((ECX >> 27) & 1) && !getX86XCR0(&EAX, &EDX);
+  bool HasAVXSave = HasXSave && ((ECX >> 28) & 1) && ((EAX & 0x6) == 0x6);
 #if defined(__APPLE__)
   // Darwin lazily saves the AVX512 context on first use: trust that the OS will
   // save the AVX512 context if we use AVX512 instructions, even the bit is not
@@ -1403,6 +1403,9 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
   // AVX512 requires additional context to be saved by the OS.
   bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0);
 #endif
+  // AMX requires additional context to be saved by the OS.
+  const unsigned AMXBits = (1 << 17) | (1 << 18);
+  bool HasAMXSave = HasXSave && ((EAX & AMXBits) == AMXBits);
 
   Features["avx"]   = HasAVXSave;
   Features["fma"]   = ((ECX >> 12) & 1) && HasAVXSave;
@@ -1493,6 +1496,9 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
   // detecting features using the "-march=native" flag.
   // For more info, see X86 ISA docs.
   Features["pconfig"] = HasLeaf7 && ((EDX >> 18) & 1);
+  Features["amx-bf16"]   = HasLeaf7 && ((EDX >> 22) & 1) && HasAMXSave;
+  Features["amx-tile"]   = HasLeaf7 && ((EDX >> 24) & 1) && HasAMXSave;
+  Features["amx-int8"]   = HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave;
   bool HasLeaf7Subleaf1 =
       MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
   Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save;

diff  --git a/llvm/lib/Target/X86/AsmParser/X86Operand.h b/llvm/lib/Target/X86/AsmParser/X86Operand.h
index 5393d90f16ba..5cf4516ede97 100644
--- a/llvm/lib/Target/X86/AsmParser/X86Operand.h
+++ b/llvm/lib/Target/X86/AsmParser/X86Operand.h
@@ -315,6 +315,11 @@ struct X86Operand final : public MCParsedAsmOperand {
   bool isMem512() const {
     return Kind == Memory && (!Mem.Size || Mem.Size == 512);
   }
+
+  bool isSibMem() const {
+    return isMem() && Mem.BaseReg != X86::RIP && Mem.BaseReg != X86::EIP;
+  }
+
   bool isMemIndexReg(unsigned LowR, unsigned HighR) const {
     assert(Kind == Memory && "Invalid access!");
     return Mem.IndexReg >= LowR && Mem.IndexReg <= HighR;

diff  --git a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
index ea8c606d1564..befdb2c608cf 100644
--- a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -166,6 +166,9 @@ static InstrUID decode(OpcodeType type, InstructionContext insnContext,
     if (modFromModRM(modRM) == 0x3)
       return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3) + 8];
     return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3)];
+  case MODRM_SPLITREGM:
+    assert(modFromModRM(modRM) == 0x3);
+    return modRMTable[dec->instructionIDs+(modRM & 0x7)];
   case MODRM_SPLITMISC:
     if (modFromModRM(modRM) == 0x3)
       return modRMTable[dec->instructionIDs + (modRM & 0x3f) + 8];
@@ -776,6 +779,10 @@ static int readModRM(struct InternalInstruction *insn) {
       return prefix##_YMM0 + index;                                            \
     case TYPE_XMM:                                                             \
       return prefix##_XMM0 + index;                                            \
+    case TYPE_TMM:                                                             \
+      if (index > 7)                                                           \
+        *valid = 0;                                                            \
+      return prefix##_TMM0 + index;                                            \
     case TYPE_VK:                                                              \
       index &= 0xf;                                                            \
       if (index > 7)                                                           \
@@ -849,6 +856,7 @@ static int fixupReg(struct InternalInstruction *insn,
     if (!valid)
       return -1;
     break;
+  case ENCODING_SIB:
   CASE_ENCODING_RM:
     if (insn->eaBase >= insn->eaRegBase) {
       insn->eaBase = (EABase)fixupRMValue(
@@ -1533,6 +1541,15 @@ static int readOperands(struct InternalInstruction *insn) {
       if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
         insn->displacement *= 1 << (Op.encoding - ENCODING_VSIB);
       break;
+    case ENCODING_SIB:
+      // Reject if SIB wasn't used.
+      if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64)
+        return -1;
+      if (readModRM(insn))
+        return -1;
+      if (fixupReg(insn, &Op))
+        return -1;
+      break;
     case ENCODING_REG:
     CASE_ENCODING_RM:
       if (readModRM(insn))
@@ -2006,9 +2023,11 @@ static bool translateRMRegister(MCInst &mcInst,
 /// @param mcInst       - The MCInst to append to.
 /// @param insn         - The instruction to extract Mod, R/M, and SIB fields
 ///                       from.
+/// @param ForceSIB     - The instruction must use SIB.
 /// @return             - 0 on success; nonzero otherwise
 static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
-                              const MCDisassembler *Dis) {
+                              const MCDisassembler *Dis,
+                              bool ForceSIB = false) {
   // Addresses in an MCInst are represented as five operands:
   //   1. basereg       (register)  The R/M base, or (if there is a SIB) the
   //                                SIB base
@@ -2067,11 +2086,12 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
       // -Any base register used other than ESP/RSP/R12D/R12. Using these as a
       //  base always requires a SIB byte.
       // -A scale other than 1 is used.
-      if (insn.sibScale != 1 ||
-          (insn.sibBase == SIB_BASE_NONE && insn.mode != MODE_64BIT) ||
-          (insn.sibBase != SIB_BASE_NONE &&
-           insn.sibBase != SIB_BASE_ESP && insn.sibBase != SIB_BASE_RSP &&
-           insn.sibBase != SIB_BASE_R12D && insn.sibBase != SIB_BASE_R12)) {
+      if (!ForceSIB &&
+          (insn.sibScale != 1 ||
+           (insn.sibBase == SIB_BASE_NONE && insn.mode != MODE_64BIT) ||
+           (insn.sibBase != SIB_BASE_NONE &&
+            insn.sibBase != SIB_BASE_ESP && insn.sibBase != SIB_BASE_RSP &&
+            insn.sibBase != SIB_BASE_R12D && insn.sibBase != SIB_BASE_R12))) {
         indexReg = MCOperand::createReg(insn.addressSize == 4 ? X86::EIZ :
                                                                 X86::RIZ);
       } else
@@ -2182,6 +2202,7 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
   case TYPE_XMM:
   case TYPE_YMM:
   case TYPE_ZMM:
+  case TYPE_TMM:
   case TYPE_VK_PAIR:
   case TYPE_VK:
   case TYPE_DEBUGREG:
@@ -2193,6 +2214,8 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
   case TYPE_MVSIBY:
   case TYPE_MVSIBZ:
     return translateRMMemory(mcInst, insn, Dis);
+  case TYPE_MSIB:
+    return translateRMMemory(mcInst, insn, Dis, true);
   }
 }
 
@@ -2242,6 +2265,7 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
     return false;
   case ENCODING_WRITEMASK:
     return translateMaskRegister(mcInst, insn.writemask);
+  case ENCODING_SIB:
   CASE_ENCODING_RM:
   CASE_ENCODING_VSIB:
     return translateRM(mcInst, operand, insn, Dis);

diff  --git a/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
index a950deb95b26..4318c17f03a0 100644
--- a/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
+++ b/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -380,6 +380,17 @@ namespace X86Disassembler {
   ENTRY(BND2)         \
   ENTRY(BND3)
 
+#undef  REGS_TMM
+#define REGS_TMM  \
+  ENTRY(TMM0)     \
+  ENTRY(TMM1)     \
+  ENTRY(TMM2)     \
+  ENTRY(TMM3)     \
+  ENTRY(TMM4)     \
+  ENTRY(TMM5)     \
+  ENTRY(TMM6)     \
+  ENTRY(TMM7)
+
 #define ALL_EA_BASES  \
   EA_BASES_16BIT      \
   EA_BASES_32BIT      \
@@ -404,6 +415,7 @@ namespace X86Disassembler {
   REGS_DEBUG          \
   REGS_CONTROL        \
   REGS_BOUND          \
+  REGS_TMM            \
   ENTRY(RIP)
 
 /// All possible values of the base field for effective-address

diff  --git a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index 01e865ac684c..79f07d3c7792 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -623,6 +623,15 @@ namespace X86II {
     /// information.  In the intel manual these are represented as /0, /1, ...
     ///
 
+    // Instructions operate on a register Reg/Opcode operand not the r/m field.
+    MRMr0 = 21,
+
+    /// MRMSrcMem - But force to use the SIB field.
+    MRMSrcMemFSIB  = 22,
+
+    /// MRMDestMem - But force to use the SIB field.
+    MRMDestMemFSIB = 23,
+
     /// MRMDestMem - This form is used for instructions that use the Mod/RM byte
     /// to specify a destination, which in this case is memory.
     ///
@@ -1082,8 +1091,10 @@ namespace X86II {
     case X86II::PrefixByte:
       return -1;
     case X86II::MRMDestMem:
+    case X86II::MRMDestMemFSIB:
       return 0;
     case X86II::MRMSrcMem:
+    case X86II::MRMSrcMemFSIB:
       // Start from 1, skip any registers encoded in VEX_VVVV or I8IMM, or a
       // mask register.
       return 1 + HasVEX_4V + HasEVEX_K;
@@ -1103,6 +1114,7 @@ namespace X86II {
     case X86II::MRMSrcRegOp4:
     case X86II::MRMSrcRegCC:
     case X86II::MRMXrCC:
+    case X86II::MRMr0:
     case X86II::MRMXr:
     case X86II::MRM0r: case X86II::MRM1r:
     case X86II::MRM2r: case X86II::MRM3r:

diff  --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 98330f60e63d..7dea0760a831 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -78,7 +78,8 @@ class X86MCCodeEmitter : public MCCodeEmitter {
   void emitMemModRMByte(const MCInst &MI, unsigned Op, unsigned RegOpcodeField,
                         uint64_t TSFlags, bool HasREX, uint64_t StartByte,
                         raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups,
-                        const MCSubtargetInfo &STI) const;
+                        const MCSubtargetInfo &STI,
+                        bool ForceSIB = false) const;
 
   bool emitPrefixImpl(unsigned &CurOp, const MCInst &MI,
                       const MCSubtargetInfo &STI, raw_ostream &OS) const;
@@ -382,7 +383,8 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
                                         uint64_t TSFlags, bool HasREX,
                                         uint64_t StartByte, raw_ostream &OS,
                                         SmallVectorImpl<MCFixup> &Fixups,
-                                        const MCSubtargetInfo &STI) const {
+                                        const MCSubtargetInfo &STI,
+                                        bool ForceSIB) const {
   const MCOperand &Disp = MI.getOperand(Op + X86::AddrDisp);
   const MCOperand &Base = MI.getOperand(Op + X86::AddrBaseReg);
   const MCOperand &Scale = MI.getOperand(Op + X86::AddrScaleAmt);
@@ -395,7 +397,8 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
       BaseReg == X86::EIP) { // [disp32+rIP] in X86-64 mode
     assert(STI.hasFeature(X86::Mode64Bit) &&
            "Rip-relative addressing requires 64-bit mode");
-    assert(IndexReg.getReg() == 0 && "Invalid rip-relative address");
+    assert(IndexReg.getReg() == 0 && !ForceSIB &&
+           "Invalid rip-relative address");
     emitByte(modRMByte(0, RegOpcodeField, 5), OS);
 
     unsigned Opcode = MI.getOpcode();
@@ -510,7 +513,7 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
   // 2-7) and absolute references.
 
   if ( // The SIB byte must be used if there is an index register.
-      IndexReg.getReg() == 0 &&
+      !ForceSIB && IndexReg.getReg() == 0 &&
       // The SIB byte must be used if the base is ESP/RSP/R12, all of which
       // encode to an R/M value of 4, which indicates that a SIB byte is
       // present.
@@ -883,9 +886,11 @@ void X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
   switch (TSFlags & X86II::FormMask) {
   default:
     llvm_unreachable("Unexpected form in emitVEXOpcodePrefix!");
+  case X86II::MRM_C0:
   case X86II::RawFrm:
   case X86II::PrefixByte:
     break;
+  case X86II::MRMDestMemFSIB:
   case X86II::MRMDestMem: {
     // MRMDestMem instructions forms:
     //  MemAddr, src1(ModR/M)
@@ -916,6 +921,7 @@ void X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
     EVEX_R2 = ~(RegEnc >> 4) & 1;
     break;
   }
+  case X86II::MRMSrcMemFSIB:
   case X86II::MRMSrcMem: {
     // MRMSrcMem instructions forms:
     //  src1(ModR/M), MemAddr
@@ -1097,6 +1103,15 @@ void X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
       EncodeRC = true;
     break;
   }
+  case X86II::MRMr0: {
+    // MRMr0 instructions forms:
+    //  11:rrr:000
+    //  dst(ModR/M)
+    unsigned RegEnc = getX86RegEncoding(MI, CurOp++);
+    VEX_R = ~(RegEnc >> 3) & 1;
+    EVEX_R2 = ~(RegEnc >> 4) & 1;
+    break;
+  }
   case X86II::MRM0r:
   case X86II::MRM1r:
   case X86II::MRM2r:
@@ -1267,6 +1282,11 @@ bool X86MCCodeEmitter::emitREXPrefix(int MemOperand, const MCInst &MI,
     case X86II::MRM7r:
       REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B
       break;
+    case X86II::MRMr0:
+      REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R
+      break;
+    case X86II::MRMDestMemFSIB:
+      llvm_unreachable("FSIB format never need REX prefix!");
     }
     if (REX && UsesHighByteReg)
       report_fatal_error(
@@ -1481,6 +1501,7 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
     CurOp = SrcRegNum + 1;
     break;
   }
+  case X86II::MRMDestMemFSIB:
   case X86II::MRMDestMem: {
     emitByte(BaseOpcode, OS);
     unsigned SrcRegNum = CurOp + X86::AddrNumOperands;
@@ -1491,8 +1512,9 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
     if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
       ++SrcRegNum;
 
+    bool ForceSIB = (Form == X86II::MRMDestMemFSIB);
     emitMemModRMByte(MI, CurOp, getX86RegNum(MI.getOperand(SrcRegNum)), TSFlags,
-                     HasREX, StartByte, OS, Fixups, STI);
+                     HasREX, StartByte, OS, Fixups, STI, ForceSIB);
     CurOp = SrcRegNum + 1;
     break;
   }
@@ -1553,6 +1575,7 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
                      getX86RegNum(MI.getOperand(FirstOp)), OS);
     break;
   }
+  case X86II::MRMSrcMemFSIB:
   case X86II::MRMSrcMem: {
     unsigned FirstMemOp = CurOp + 1;
 
@@ -1564,8 +1587,9 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
 
     emitByte(BaseOpcode, OS);
 
+    bool ForceSIB = (Form == X86II::MRMSrcMemFSIB);
     emitMemModRMByte(MI, FirstMemOp, getX86RegNum(MI.getOperand(CurOp)),
-                     TSFlags, HasREX, StartByte, OS, Fixups, STI);
+                     TSFlags, HasREX, StartByte, OS, Fixups, STI, ForceSIB);
     CurOp = FirstMemOp + X86::AddrNumOperands;
     if (HasVEX_I8Reg)
       I8RegNum = getX86RegEncoding(MI, CurOp++);
@@ -1637,6 +1661,10 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
     emitRegModRMByte(MI.getOperand(CurOp++),
                      (Form == X86II::MRMXr) ? 0 : Form - X86II::MRM0r, OS);
     break;
+  case X86II::MRMr0:
+    emitByte(BaseOpcode, OS);
+    emitByte(modRMByte(3, getX86RegNum(MI.getOperand(CurOp++)),0), OS);
+    break;
 
   case X86II::MRMXmCC: {
     unsigned FirstMemOp = CurOp;

diff  --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 1d8b5f4f6899..eb50e6bf9ff1 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -249,6 +249,14 @@ def FeaturePTWRITE  : SubtargetFeature<"ptwrite", "HasPTWRITE", "true",
 // target-feature attribute.
 def FeatureDeprecatedMPX : SubtargetFeature<"mpx", "DeprecatedHasMPX", "false",
                                       "Deprecated. Support MPX instructions">;
+def FeatureAMXTILE     : SubtargetFeature<"amx-tile", "HasAMXTILE", "true",
+                                      "Support AMX-TILE instructions">;
+def FeatureAMXINT8     : SubtargetFeature<"amx-int8", "HasAMXINT8", "true",
+                                      "Support AMX-INT8 instructions",
+                                      [FeatureAMXTILE]>;
+def FeatureAMXBF16     : SubtargetFeature<"amx-bf16", "HasAMXBF16", "true",
+                                      "Support AMX-BF16 instructions",
+                                      [FeatureAMXTILE]>;
 def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
                                      "Use LEA for adjusting the stack pointer">;
 def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb",

diff  --git a/llvm/lib/Target/X86/X86InstrAMX.td b/llvm/lib/Target/X86/X86InstrAMX.td
new file mode 100644
index 000000000000..deefb3eecf39
--- /dev/null
+++ b/llvm/lib/Target/X86/X86InstrAMX.td
@@ -0,0 +1,76 @@
+//===---- X86InstrAMX.td - AMX Instruction Set Extension --*- tablegen -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the instructions that make up the Intel AMX instruction
+// set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// AMX instructions
+
+let Predicates = [HasAMXTILE, In64BitMode] in {
+  let SchedRW = [WriteSystem] in {
+    let Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in
+    def LDTILECFG : I <0x49, MRM0m, (outs), (ins opaquemem:$src),
+                       "ldtilecfg\t$src", []>, VEX, T8PS;
+    def STTILECFG : I <0x49, MRM0m, (outs), (ins opaquemem:$src),
+                       "sttilecfg\t$src", []>, VEX, T8PD;
+    def TILELOADD : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst),
+                      (ins sibmem:$src),
+                      "tileloadd\t{$src, $dst|$dst, $src}", []>,
+                      VEX, T8XD;
+    def TILELOADDT1 : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst),
+                        (ins sibmem:$src),
+                        "tileloaddt1\t{$src, $dst|$dst, $src}", []>,
+                        VEX, T8PD;
+    let Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in
+    def TILERELEASE : I<0x49, MRM_C0, (outs), (ins),
+                        "tilerelease", []>, VEX, T8PS;
+    def TILESTORED : I<0x4b, MRMDestMemFSIB, (outs),
+                       (ins sibmem:$dst, TILE:$src),
+                       "tilestored\t{$src, $dst|$dst, $src}", []>,
+                       VEX, T8XS;
+    def TILEZERO : I<0x49, MRMr0, (outs TILE:$dst), (ins),
+                     "tilezero\t$dst", []>,
+                     VEX, T8XD;
+  } // SchedRW
+} // HasAMXTILE
+
+let Predicates = [HasAMXINT8, In64BitMode] in {
+  let SchedRW = [WriteSystem] in {
+    let Constraints = "$src1 = $dst" in {
+      def TDPBSSD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst),
+                      (ins TILE:$src1, TILE:$src2, TILE:$src3),
+                      "tdpbssd\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
+                      VEX_4V, T8XD;
+      def TDPBSUD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst),
+                      (ins TILE:$src1, TILE:$src2, TILE:$src3),
+                      "tdpbsud\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
+                      VEX_4V, T8XS;
+      def TDPBUSD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst),
+                      (ins TILE:$src1, TILE:$src2, TILE:$src3),
+                      "tdpbusd\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
+                      VEX_4V, T8PD;
+      def TDPBUUD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst),
+                      (ins TILE:$src1, TILE:$src2, TILE:$src3),
+                      "tdpbuud\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
+                      VEX_4V, T8PS;
+    }
+  }
+} // HasAMXTILE
+
+let Predicates = [HasAMXBF16, In64BitMode] in {
+  let SchedRW = [WriteSystem] in {
+    let Constraints = "$src1 = $dst" in
+    def TDPBF16PS : I<0x5c, MRMSrcReg4VOp3, (outs TILE:$dst),
+                      (ins TILE:$src1, TILE:$src2, TILE:$src3),
+                      "tdpbf16ps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                      []>, VEX_4V, T8XS;
+  }
+} // HasAMXTILE, HasAMXBF16

diff  --git a/llvm/lib/Target/X86/X86InstrFormats.td b/llvm/lib/Target/X86/X86InstrFormats.td
index 41326a6bbe07..d7752e656b55 100644
--- a/llvm/lib/Target/X86/X86InstrFormats.td
+++ b/llvm/lib/Target/X86/X86InstrFormats.td
@@ -28,6 +28,9 @@ def RawFrmImm8    : Format<7>;
 def RawFrmImm16   : Format<8>;
 def AddCCFrm      : Format<9>;
 def PrefixByte    : Format<10>;
+def MRMr0          : Format<21>;
+def MRMSrcMemFSIB  : Format<22>;
+def MRMDestMemFSIB : Format<23>;
 def MRMDestMem     : Format<24>;
 def MRMSrcMem      : Format<25>;
 def MRMSrcMem4VOp3 : Format<26>;

diff  --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index 46ca1896e4e2..23841c3d7e50 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -361,6 +361,8 @@ let RenderMethod = "addMemOperands", SuperClasses = [X86MemAsmOperand] in {
   def X86Mem512_RC256XOperand : AsmOperandClass { let Name = "Mem512_RC256X"; }
   def X86Mem256_RC512Operand  : AsmOperandClass { let Name = "Mem256_RC512"; }
   def X86Mem512_RC512Operand  : AsmOperandClass { let Name = "Mem512_RC512"; }
+
+  def X86SibMemOperand : AsmOperandClass { let Name = "SibMem"; }
 }
 
 def X86AbsMemAsmOperand : AsmOperandClass {
@@ -392,6 +394,8 @@ def X86any_fcmp : PatFrags<(ops node:$lhs, node:$rhs),
 // restrict to only unsized memory.
 def opaquemem : X86MemOperand<"printMemReference">;
 
+def sibmem: X86MemOperand<"printMemReference", X86SibMemOperand>;
+
 def i8mem   : X86MemOperand<"printbytemem",   X86Mem8AsmOperand>;
 def i16mem  : X86MemOperand<"printwordmem",  X86Mem16AsmOperand>;
 def i32mem  : X86MemOperand<"printdwordmem",  X86Mem32AsmOperand>;
@@ -955,6 +959,9 @@ def HasPCONFIG   : Predicate<"Subtarget->hasPCONFIG()">;
 def HasENQCMD    : Predicate<"Subtarget->hasENQCMD()">;
 def HasSERIALIZE : Predicate<"Subtarget->hasSERIALIZE()">;
 def HasTSXLDTRK  : Predicate<"Subtarget->hasTSXLDTRK()">;
+def HasAMXTILE   : Predicate<"Subtarget->hasAMXTILE()">;
+def HasAMXBF16   : Predicate<"Subtarget->hasAMXBF16()">;
+def HasAMXINT8   : Predicate<"Subtarget->hasAMXINT8()">;
 def Not64BitMode : Predicate<"!Subtarget->is64Bit()">,
                              AssemblerPredicate<(all_of (not Mode64Bit)), "Not 64-bit mode">;
 def In64BitMode  : Predicate<"Subtarget->is64Bit()">,
@@ -3070,6 +3077,9 @@ include "X86InstrSVM.td"
 include "X86InstrTSX.td"
 include "X86InstrSGX.td"
 
+// AMX instructions
+include "X86InstrAMX.td"
+
 // System instructions.
 include "X86InstrSystem.td"
 

diff  --git a/llvm/lib/Target/X86/X86RegisterInfo.td b/llvm/lib/Target/X86/X86RegisterInfo.td
index b2c9025f1f71..8de5b94bbffa 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.td
+++ b/llvm/lib/Target/X86/X86RegisterInfo.td
@@ -265,6 +265,16 @@ let SubRegIndices = [sub_ymm] in {
   }
 }
 
+// Tile "registers".
+def TMM0:  X86Reg<"tmm0",   0>;
+def TMM1:  X86Reg<"tmm1",   1>;
+def TMM2:  X86Reg<"tmm2",   2>;
+def TMM3:  X86Reg<"tmm3",   3>;
+def TMM4:  X86Reg<"tmm4",   4>;
+def TMM5:  X86Reg<"tmm5",   5>;
+def TMM6:  X86Reg<"tmm6",   6>;
+def TMM7:  X86Reg<"tmm7",   7>;
+
 // Mask Registers, used by AVX-512 instructions.
 def K0 : X86Reg<"k0", 0>, DwarfRegNum<[118,  93,  93]>;
 def K1 : X86Reg<"k1", 1>, DwarfRegNum<[119,  94,  94]>;
@@ -621,3 +631,8 @@ def VK64WM  : RegisterClass<"X86", [v64i1], 64, (add VK32WM)> {let Size = 64;}
 
 // Bound registers
 def BNDR : RegisterClass<"X86", [v2i64], 128, (sequence "BND%u", 0, 3)>;
+
+// Tiles
+let isAllocatable = 0 in
+def TILE : RegisterClass<"X86", [untyped], 0,
+                         (sequence "TMM%u", 0, 7)> {let Size = 8192;}

diff  --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index 16483e835f50..6a2879e4a5d7 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -403,6 +403,11 @@ class X86Subtarget final : public X86GenSubtargetInfo {
   /// Processor supports TSXLDTRK instruction
   bool HasTSXLDTRK = false;
 
+  /// Processor has AMX support
+  bool HasAMXTILE = false;
+  bool HasAMXBF16 = false;
+  bool HasAMXINT8 = false;
+
   /// Processor has a single uop BEXTR implementation.
   bool HasFastBEXTR = false;
 
@@ -735,6 +740,9 @@ class X86Subtarget final : public X86GenSubtargetInfo {
   bool useRetpolineIndirectBranches() const {
     return UseRetpolineIndirectBranches;
   }
+  bool hasAMXTILE() const { return HasAMXTILE; }
+  bool hasAMXBF16() const { return HasAMXBF16; }
+  bool hasAMXINT8() const { return HasAMXINT8; }
   bool useRetpolineExternalThunk() const { return UseRetpolineExternalThunk; }
 
   // These are generic getters that OR together all of the thunk types

diff  --git a/llvm/test/CodeGen/X86/ipra-reg-usage.ll b/llvm/test/CodeGen/X86/ipra-reg-usage.ll
index 7d0560040490..e9039da29aa2 100644
--- a/llvm/test/CodeGen/X86/ipra-reg-usage.ll
+++ b/llvm/test/CodeGen/X86/ipra-reg-usage.ll
@@ -3,7 +3,7 @@
 target triple = "x86_64-unknown-unknown"
 declare void @bar1()
 define preserve_allcc void @foo()#0 {
-; CHECK: foo Clobbered Registers: $cs $df $ds $eflags $eip $eiz $es $fpcw $fpsw $fs $gs $hip $ip $mxcsr $rip $riz $ss $ssp $bnd0 $bnd1 $bnd2 $bnd3 $cr0 $cr1 $cr2 $cr3 $cr4 $cr5 $cr6 $cr7 $cr8 $cr9 $cr10 $cr11 $cr12 $cr13 $cr14 $cr15 $dr0 $dr1 $dr2 $dr3 $dr4 $dr5 $dr6 $dr7 $dr8 $dr9 $dr10 $dr11 $dr12 $dr13 $dr14 $dr15 $fp0 $fp1 $fp2 $fp3 $fp4 $fp5 $fp6 $fp7 $k0 $k1 $k2 $k3 $k4 $k5 $k6 $k7 $mm0 $mm1 $mm2 $mm3 $mm4 $mm5 $mm6 $mm7 $r11 $st0 $st1 $st2 $st3 $st4 $st5 $st6 $st7 $xmm16 $xmm17 $xmm18 $xmm19 $xmm20 $xmm21 $xmm22 $xmm23 $xmm24 $xmm25 $xmm26 $xmm27 $xmm28 $xmm29 $xmm30 $xmm31 $ymm0 $ymm1 $ymm2 $ymm3 $ymm4 $ymm5 $ymm6 $ymm7 $ymm8 $ymm9 $ymm10 $ymm11 $ymm12 $ymm13 $ymm14 $ymm15 $ymm16 $ymm17 $ymm18 $ymm19 $ymm20 $ymm21 $ymm22 $ymm23 $ymm24 $ymm25 $ymm26 $ymm27 $ymm28 $ymm29 $ymm30 $ymm31 $zmm0 $zmm1 $zmm2 $zmm3 $zmm4 $zmm5 $zmm6 $zmm7 $zmm8 $zmm9 $zmm10 $zmm11 $zmm12 $zmm13 $zmm14 $zmm15 $zmm16 $zmm17 $zmm18 $zmm19 $zmm20 $zmm21 $zmm22 $zmm23 $zmm24 $zmm25 $zmm26 $zmm27 $zmm28 $zmm29 $zmm30 $zmm31 $r11b $r11bh $r11d $r11w $r11wh
+; CHECK: foo Clobbered Registers: $cs $df $ds $eflags $eip $eiz $es $fpcw $fpsw $fs $gs $hip $ip $mxcsr $rip $riz $ss $ssp $bnd0 $bnd1 $bnd2 $bnd3 $cr0 $cr1 $cr2 $cr3 $cr4 $cr5 $cr6 $cr7 $cr8 $cr9 $cr10 $cr11 $cr12 $cr13 $cr14 $cr15 $dr0 $dr1 $dr2 $dr3 $dr4 $dr5 $dr6 $dr7 $dr8 $dr9 $dr10 $dr11 $dr12 $dr13 $dr14 $dr15 $fp0 $fp1 $fp2 $fp3 $fp4 $fp5 $fp6 $fp7 $k0 $k1 $k2 $k3 $k4 $k5 $k6 $k7 $mm0 $mm1 $mm2 $mm3 $mm4 $mm5 $mm6 $mm7 $r11 $st0 $st1 $st2 $st3 $st4 $st5 $st6 $st7 $tmm0 $tmm1 $tmm2 $tmm3 $tmm4 $tmm5 $tmm6 $tmm7 $xmm16 $xmm17 $xmm18 $xmm19 $xmm20 $xmm21 $xmm22 $xmm23 $xmm24 $xmm25 $xmm26 $xmm27 $xmm28 $xmm29 $xmm30 $xmm31 $ymm0 $ymm1 $ymm2 $ymm3 $ymm4 $ymm5 $ymm6 $ymm7 $ymm8 $ymm9 $ymm10 $ymm11 $ymm12 $ymm13 $ymm14 $ymm15 $ymm16 $ymm17 $ymm18 $ymm19 $ymm20 $ymm21 $ymm22 $ymm23 $ymm24 $ymm25 $ymm26 $ymm27 $ymm28 $ymm29 $ymm30 $ymm31 $zmm0 $zmm1 $zmm2 $zmm3 $zmm4 $zmm5 $zmm6 $zmm7 $zmm8 $zmm9 $zmm10 $zmm11 $zmm12 $zmm13 $zmm14 $zmm15 $zmm16 $zmm17 $zmm18 $zmm19 $zmm20 $zmm21 $zmm22 $zmm23 $zmm24 $zmm25 $zmm26 $zmm27 $zmm28 $zmm29 $zmm30 $zmm31 $r11b $r11bh $r11d $r11w $r11wh
   call void @bar1()
   call void @bar2()
   ret void

diff  --git a/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-bf16-att.txt b/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-bf16-att.txt
new file mode 100644
index 000000000000..e6725de1dc17
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-bf16-att.txt
@@ -0,0 +1,25 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64-apple-darwin9 | FileCheck %s
+
+# CHECK: tdpbf16ps %tmm4, %tmm5, %tmm6
+0xc4,0xe2,0x5a,0x5c,0xf5
+
+# CHECK: tdpbf16ps %tmm1, %tmm2, %tmm3
+0xc4,0xe2,0x72,0x5c,0xda
+
+# CHECK: tdpbf16ps %tmm4, %tmm5, %tmm6
+0xc4,0xe2,0x5a,0x5c,0xf5
+
+# CHECK: tdpbf16ps %tmm1, %tmm2, %tmm3
+0xc4,0xe2,0x72,0x5c,0xda
+
+# CHECK: tdpbf16ps %tmm4, %tmm5, %tmm6
+0xc4,0xe2,0x5a,0x5c,0xf5
+
+# CHECK: tdpbf16ps %tmm1, %tmm2, %tmm3
+0xc4,0xe2,0x72,0x5c,0xda
+
+# CHECK: tdpbf16ps %tmm4, %tmm5, %tmm6
+0xc4,0xe2,0x5a,0x5c,0xf5
+
+# CHECK: tdpbf16ps %tmm1, %tmm2, %tmm3
+0xc4,0xe2,0x72,0x5c,0xda

diff  --git a/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-bf16-intel.txt b/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-bf16-intel.txt
new file mode 100644
index 000000000000..8aa22cc0010a
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-bf16-intel.txt
@@ -0,0 +1,25 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s
+
+# CHECK: tdpbf16ps tmm6, tmm5, tmm4
+0xc4,0xe2,0x5a,0x5c,0xf5
+
+# CHECK: tdpbf16ps tmm3, tmm2, tmm1
+0xc4,0xe2,0x72,0x5c,0xda
+
+# CHECK: tdpbf16ps tmm6, tmm5, tmm4
+0xc4,0xe2,0x5a,0x5c,0xf5
+
+# CHECK: tdpbf16ps tmm3, tmm2, tmm1
+0xc4,0xe2,0x72,0x5c,0xda
+
+# CHECK: tdpbf16ps tmm6, tmm5, tmm4
+0xc4,0xe2,0x5a,0x5c,0xf5
+
+# CHECK: tdpbf16ps tmm3, tmm2, tmm1
+0xc4,0xe2,0x72,0x5c,0xda
+
+# CHECK: tdpbf16ps tmm6, tmm5, tmm4
+0xc4,0xe2,0x5a,0x5c,0xf5
+
+# CHECK: tdpbf16ps tmm3, tmm2, tmm1
+0xc4,0xe2,0x72,0x5c,0xda

diff  --git a/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-error.txt b/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-error.txt
new file mode 100644
index 000000000000..b5a1bb8db2af
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-error.txt
@@ -0,0 +1,4 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64 2>&1 | FileCheck %s
+
+# CHECK: invalid instruction encoding
+0xc4,0xe2,0x1a,0x5c,0xf5

diff  --git a/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-int8-att.txt b/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-int8-att.txt
new file mode 100644
index 000000000000..a6b55313c601
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-int8-att.txt
@@ -0,0 +1,97 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64-apple-darwin9 | FileCheck %s
+
+# CHECK: tdpbssd %tmm4, %tmm5, %tmm6
+0xc4,0xe2,0x5b,0x5e,0xf5
+
+# CHECK: tdpbssd %tmm1, %tmm2, %tmm3
+0xc4,0xe2,0x73,0x5e,0xda
+
+# CHECK: tdpbsud %tmm4, %tmm5, %tmm6
+0xc4,0xe2,0x5a,0x5e,0xf5
+
+# CHECK: tdpbsud %tmm1, %tmm2, %tmm3
+0xc4,0xe2,0x72,0x5e,0xda
+
+# CHECK: tdpbusd %tmm4, %tmm5, %tmm6
+0xc4,0xe2,0x59,0x5e,0xf5
+
+# CHECK: tdpbusd %tmm1, %tmm2, %tmm3
+0xc4,0xe2,0x71,0x5e,0xda
+
+# CHECK: tdpbuud %tmm4, %tmm5, %tmm6
+0xc4,0xe2,0x58,0x5e,0xf5
+
+# CHECK: tdpbuud %tmm1, %tmm2, %tmm3
+0xc4,0xe2,0x70,0x5e,0xda
+
+# CHECK: tdpbssd %tmm4, %tmm5, %tmm6
+0xc4,0xe2,0x5b,0x5e,0xf5
+
+# CHECK: tdpbssd %tmm1, %tmm2, %tmm3
+0xc4,0xe2,0x73,0x5e,0xda
+
+# CHECK: tdpbsud %tmm4, %tmm5, %tmm6
+0xc4,0xe2,0x5a,0x5e,0xf5
+
+# CHECK: tdpbsud %tmm1, %tmm2, %tmm3
+0xc4,0xe2,0x72,0x5e,0xda
+
+# CHECK: tdpbusd %tmm4, %tmm5, %tmm6
+0xc4,0xe2,0x59,0x5e,0xf5
+
+# CHECK: tdpbusd %tmm1, %tmm2, %tmm3
+0xc4,0xe2,0x71,0x5e,0xda
+
+# CHECK: tdpbuud %tmm4, %tmm5, %tmm6
+0xc4,0xe2,0x58,0x5e,0xf5
+
+# CHECK: tdpbuud %tmm1, %tmm2, %tmm3
+0xc4,0xe2,0x70,0x5e,0xda
+
+# CHECK: tdpbssd %tmm4, %tmm5, %tmm6
+0xc4,0xe2,0x5b,0x5e,0xf5
+
+# CHECK: tdpbssd %tmm1, %tmm2, %tmm3
+0xc4,0xe2,0x73,0x5e,0xda
+
+# CHECK: tdpbsud %tmm4, %tmm5, %tmm6
+0xc4,0xe2,0x5a,0x5e,0xf5
+
+# CHECK: tdpbsud %tmm1, %tmm2, %tmm3
+0xc4,0xe2,0x72,0x5e,0xda
+
+# CHECK: tdpbusd %tmm4, %tmm5, %tmm6
+0xc4,0xe2,0x59,0x5e,0xf5
+
+# CHECK: tdpbusd %tmm1, %tmm2, %tmm3
+0xc4,0xe2,0x71,0x5e,0xda
+
+# CHECK: tdpbuud %tmm4, %tmm5, %tmm6
+0xc4,0xe2,0x58,0x5e,0xf5
+
+# CHECK: tdpbuud %tmm1, %tmm2, %tmm3
+0xc4,0xe2,0x70,0x5e,0xda
+
+# CHECK: tdpbssd %tmm4, %tmm5, %tmm6
+0xc4,0xe2,0x5b,0x5e,0xf5
+
+# CHECK: tdpbssd %tmm1, %tmm2, %tmm3
+0xc4,0xe2,0x73,0x5e,0xda
+
+# CHECK: tdpbsud %tmm4, %tmm5, %tmm6
+0xc4,0xe2,0x5a,0x5e,0xf5
+
+# CHECK: tdpbsud %tmm1, %tmm2, %tmm3
+0xc4,0xe2,0x72,0x5e,0xda
+
+# CHECK: tdpbusd %tmm4, %tmm5, %tmm6
+0xc4,0xe2,0x59,0x5e,0xf5
+
+# CHECK: tdpbusd %tmm1, %tmm2, %tmm3
+0xc4,0xe2,0x71,0x5e,0xda
+
+# CHECK: tdpbuud %tmm4, %tmm5, %tmm6
+0xc4,0xe2,0x58,0x5e,0xf5
+
+# CHECK: tdpbuud %tmm1, %tmm2, %tmm3
+0xc4,0xe2,0x70,0x5e,0xda

diff  --git a/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-int8-intel.txt b/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-int8-intel.txt
new file mode 100644
index 000000000000..8ef714b1ac8d
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-int8-intel.txt
@@ -0,0 +1,97 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s
+
+# CHECK: tdpbssd tmm6, tmm5, tmm4
+0xc4,0xe2,0x5b,0x5e,0xf5
+
+# CHECK: tdpbssd tmm3, tmm2, tmm1
+0xc4,0xe2,0x73,0x5e,0xda
+
+# CHECK: tdpbsud tmm6, tmm5, tmm4
+0xc4,0xe2,0x5a,0x5e,0xf5
+
+# CHECK: tdpbsud tmm3, tmm2, tmm1
+0xc4,0xe2,0x72,0x5e,0xda
+
+# CHECK: tdpbusd tmm6, tmm5, tmm4
+0xc4,0xe2,0x59,0x5e,0xf5
+
+# CHECK: tdpbusd tmm3, tmm2, tmm1
+0xc4,0xe2,0x71,0x5e,0xda
+
+# CHECK: tdpbuud tmm6, tmm5, tmm4
+0xc4,0xe2,0x58,0x5e,0xf5
+
+# CHECK: tdpbuud tmm3, tmm2, tmm1
+0xc4,0xe2,0x70,0x5e,0xda
+
+# CHECK: tdpbssd tmm6, tmm5, tmm4
+0xc4,0xe2,0x5b,0x5e,0xf5
+
+# CHECK: tdpbssd tmm3, tmm2, tmm1
+0xc4,0xe2,0x73,0x5e,0xda
+
+# CHECK: tdpbsud tmm6, tmm5, tmm4
+0xc4,0xe2,0x5a,0x5e,0xf5
+
+# CHECK: tdpbsud tmm3, tmm2, tmm1
+0xc4,0xe2,0x72,0x5e,0xda
+
+# CHECK: tdpbusd tmm6, tmm5, tmm4
+0xc4,0xe2,0x59,0x5e,0xf5
+
+# CHECK: tdpbusd tmm3, tmm2, tmm1
+0xc4,0xe2,0x71,0x5e,0xda
+
+# CHECK: tdpbuud tmm6, tmm5, tmm4
+0xc4,0xe2,0x58,0x5e,0xf5
+
+# CHECK: tdpbuud tmm3, tmm2, tmm1
+0xc4,0xe2,0x70,0x5e,0xda
+
+# CHECK: tdpbssd tmm6, tmm5, tmm4
+0xc4,0xe2,0x5b,0x5e,0xf5
+
+# CHECK: tdpbssd tmm3, tmm2, tmm1
+0xc4,0xe2,0x73,0x5e,0xda
+
+# CHECK: tdpbsud tmm6, tmm5, tmm4
+0xc4,0xe2,0x5a,0x5e,0xf5
+
+# CHECK: tdpbsud tmm3, tmm2, tmm1
+0xc4,0xe2,0x72,0x5e,0xda
+
+# CHECK: tdpbusd tmm6, tmm5, tmm4
+0xc4,0xe2,0x59,0x5e,0xf5
+
+# CHECK: tdpbusd tmm3, tmm2, tmm1
+0xc4,0xe2,0x71,0x5e,0xda
+
+# CHECK: tdpbuud tmm6, tmm5, tmm4
+0xc4,0xe2,0x58,0x5e,0xf5
+
+# CHECK: tdpbuud tmm3, tmm2, tmm1
+0xc4,0xe2,0x70,0x5e,0xda
+
+# CHECK: tdpbssd tmm6, tmm5, tmm4
+0xc4,0xe2,0x5b,0x5e,0xf5
+
+# CHECK: tdpbssd tmm3, tmm2, tmm1
+0xc4,0xe2,0x73,0x5e,0xda
+
+# CHECK: tdpbsud tmm6, tmm5, tmm4
+0xc4,0xe2,0x5a,0x5e,0xf5
+
+# CHECK: tdpbsud tmm3, tmm2, tmm1
+0xc4,0xe2,0x72,0x5e,0xda
+
+# CHECK: tdpbusd tmm6, tmm5, tmm4
+0xc4,0xe2,0x59,0x5e,0xf5
+
+# CHECK: tdpbusd tmm3, tmm2, tmm1
+0xc4,0xe2,0x71,0x5e,0xda
+
+# CHECK: tdpbuud tmm6, tmm5, tmm4
+0xc4,0xe2,0x58,0x5e,0xf5
+
+# CHECK: tdpbuud tmm3, tmm2, tmm1
+0xc4,0xe2,0x70,0x5e,0xda

diff  --git a/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-tile-att.txt b/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-tile-att.txt
new file mode 100644
index 000000000000..2bb5c12fd1b7
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-tile-att.txt
@@ -0,0 +1,145 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64-apple-darwin9 | FileCheck %s
+
+# CHECK: tilerelease
+0xc4,0xe2,0x78,0x49,0xc0
+
+# CHECK: tilezero %tmm6
+0xc4,0xe2,0x7b,0x49,0xf0
+
+# CHECK: tilezero %tmm3
+0xc4,0xe2,0x7b,0x49,0xd8
+
+# CHECK: tilezero %tmm6
+0xc4,0xe2,0x7b,0x49,0xf0
+
+# CHECK: tilezero %tmm3
+0xc4,0xe2,0x7b,0x49,0xd8
+
+# CHECK: ldtilecfg  268435456(%rbp,%r14,8)
+0xc4,0xa2,0x78,0x49,0x84,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: ldtilecfg  291(%r8,%rax,4)
+0xc4,0xc2,0x78,0x49,0x84,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: ldtilecfg  (%rip)
+0xc4,0xe2,0x78,0x49,0x05,0x00,0x00,0x00,0x00
+
+# CHECK: ldtilecfg  -2048(,%rbp,2)
+0xc4,0xe2,0x78,0x49,0x04,0x6d,0x00,0xf8,0xff,0xff
+
+# CHECK: sttilecfg  268435456(%rbp,%r14,8)
+0xc4,0xa2,0x79,0x49,0x84,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: sttilecfg  291(%r8,%rax,4)
+0xc4,0xc2,0x79,0x49,0x84,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: sttilecfg  (%rip)
+0xc4,0xe2,0x79,0x49,0x05,0x00,0x00,0x00,0x00
+
+# CHECK: sttilecfg  -2048(,%rbp,2)
+0xc4,0xe2,0x79,0x49,0x04,0x6d,0x00,0xf8,0xff,0xff
+
+# CHECK: tileloadd 268435456(%rbp,%r14,8), %tmm6
+0xc4,0xa2,0x7b,0x4b,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: tileloadd 291(%r8,%rax,4), %tmm3
+0xc4,0xc2,0x7b,0x4b,0x9c,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: tileloadd -32(,%rbp,2), %tmm3
+0xc4,0xe2,0x7b,0x4b,0x1c,0x6d,0xe0,0xff,0xff,0xff
+
+# CHECK: tileloadd 64(%rbx), %tmm4
+0xc4,0xe2,0x7b,0x4b,0x64,0x23,0x40
+
+# CHECK: tileloaddt1 268435456(%rbp,%r14,8), %tmm6
+0xc4,0xa2,0x79,0x4b,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: tileloaddt1 291(%r8,%rax,4), %tmm3
+0xc4,0xc2,0x79,0x4b,0x9c,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: tileloaddt1 -32(,%rbp,2), %tmm3
+0xc4,0xe2,0x79,0x4b,0x1c,0x6d,0xe0,0xff,0xff,0xff
+
+# CHECK: tileloaddt1 16(%rbp), %tmm6
+0xc4,0xe2,0x79,0x4b,0x74,0x25,0x10
+
+# CHECK: tilerelease
+0xc4,0xe2,0x78,0x49,0xc0
+
+# CHECK: tilestored %tmm6, 268435456(%rbp,%r14,8)
+0xc4,0xa2,0x7a,0x4b,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: tilestored %tmm3, 291(%r8,%rax,4)
+0xc4,0xc2,0x7a,0x4b,0x9c,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: tilestored %tmm3, -32(,%rbp,2)
+0xc4,0xe2,0x7a,0x4b,0x1c,0x6d,0xe0,0xff,0xff,0xff
+
+# CHECK: tilezero %tmm6
+0xc4,0xe2,0x7b,0x49,0xf0
+
+# CHECK: tilezero %tmm3
+0xc4,0xe2,0x7b,0x49,0xd8
+
+# CHECK: ldtilecfg  268435456(%rbp,%r14,8)
+0xc4,0xa2,0x78,0x49,0x84,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: ldtilecfg  291(%r8,%rax,4)
+0xc4,0xc2,0x78,0x49,0x84,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: ldtilecfg  (%rip)
+0xc4,0xe2,0x78,0x49,0x05,0x00,0x00,0x00,0x00
+
+# CHECK: ldtilecfg  -2048(,%rbp,2)
+0xc4,0xe2,0x78,0x49,0x04,0x6d,0x00,0xf8,0xff,0xff
+
+# CHECK: sttilecfg  268435456(%rbp,%r14,8)
+0xc4,0xa2,0x79,0x49,0x84,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: sttilecfg  291(%r8,%rax,4)
+0xc4,0xc2,0x79,0x49,0x84,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: sttilecfg  (%rip)
+0xc4,0xe2,0x79,0x49,0x05,0x00,0x00,0x00,0x00
+
+# CHECK: sttilecfg  -2048(,%rbp,2)
+0xc4,0xe2,0x79,0x49,0x04,0x6d,0x00,0xf8,0xff,0xff
+
+# CHECK: tileloadd 268435456(%rbp,%r14,8), %tmm6
+0xc4,0xa2,0x7b,0x4b,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: tileloadd 291(%r8,%rax,4), %tmm3
+0xc4,0xc2,0x7b,0x4b,0x9c,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: tileloadd -32(,%rbp,2), %tmm3
+0xc4,0xe2,0x7b,0x4b,0x1c,0x6d,0xe0,0xff,0xff,0xff
+
+# CHECK: tileloaddt1 268435456(%rbp,%r14,8), %tmm6
+0xc4,0xa2,0x79,0x4b,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: tileloaddt1 291(%r8,%rax,4), %tmm3
+0xc4,0xc2,0x79,0x4b,0x9c,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: tileloaddt1 -32(,%rbp,2), %tmm3
+0xc4,0xe2,0x79,0x4b,0x1c,0x6d,0xe0,0xff,0xff,0xff
+
+# CHECK: tilerelease
+0xc4,0xe2,0x78,0x49,0xc0
+
+# CHECK: tilestored %tmm6, 268435456(%rbp,%r14,8)
+0xc4,0xa2,0x7a,0x4b,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: tilestored %tmm3, 291(%r8,%rax,4)
+0xc4,0xc2,0x7a,0x4b,0x9c,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: tilestored %tmm3, -32(,%rbp,2)
+0xc4,0xe2,0x7a,0x4b,0x1c,0x6d,0xe0,0xff,0xff,0xff
+
+# CHECK: tilestored %tmm3, (%r8)
+0xc4,0xc2,0x7a,0x4b,0x1c,0x20
+
+# CHECK: tilezero %tmm6
+0xc4,0xe2,0x7b,0x49,0xf0
+
+# CHECK: tilezero %tmm3
+0xc4,0xe2,0x7b,0x49,0xd8

diff  --git a/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-tile-intel.txt b/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-tile-intel.txt
new file mode 100644
index 000000000000..2893f3d5a09e
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-tile-intel.txt
@@ -0,0 +1,148 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s
+
+# CHECK: tilerelease
+0xc4,0xe2,0x78,0x49,0xc0
+
+# CHECK: tilezero tmm6
+0xc4,0xe2,0x7b,0x49,0xf0
+
+# CHECK: tilezero tmm3
+0xc4,0xe2,0x7b,0x49,0xd8
+
+# CHECK: tilerelease
+0xc4,0xe2,0x78,0x49,0xc0
+
+# CHECK: tilezero tmm6
+0xc4,0xe2,0x7b,0x49,0xf0
+
+# CHECK: tilezero tmm3
+0xc4,0xe2,0x7b,0x49,0xd8
+
+# CHECK: ldtilecfg [rbp + 8*r14 + 268435456]
+0xc4,0xa2,0x78,0x49,0x84,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: ldtilecfg [r8 + 4*rax + 291]
+0xc4,0xc2,0x78,0x49,0x84,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: ldtilecfg [rip]
+0xc4,0xe2,0x78,0x49,0x05,0x00,0x00,0x00,0x00
+
+# CHECK: ldtilecfg [2*rbp - 2048]
+0xc4,0xe2,0x78,0x49,0x04,0x6d,0x00,0xf8,0xff,0xff
+
+# CHECK: sttilecfg [rbp + 8*r14 + 268435456]
+0xc4,0xa2,0x79,0x49,0x84,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: sttilecfg [r8 + 4*rax + 291]
+0xc4,0xc2,0x79,0x49,0x84,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: sttilecfg [rip]
+0xc4,0xe2,0x79,0x49,0x05,0x00,0x00,0x00,0x00
+
+# CHECK: sttilecfg [2*rbp - 2048]
+0xc4,0xe2,0x79,0x49,0x04,0x6d,0x00,0xf8,0xff,0xff
+
+# CHECK: tileloadd tmm6, [rbp + 8*r14 + 268435456]
+0xc4,0xa2,0x7b,0x4b,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: tileloadd tmm3, [r8 + 4*rax + 291]
+0xc4,0xc2,0x7b,0x4b,0x9c,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: tileloadd tmm3, [2*rbp - 32]
+0xc4,0xe2,0x7b,0x4b,0x1c,0x6d,0xe0,0xff,0xff,0xff
+
+# CHECK: tileloadd tmm4, [rbx + 64]
+0xc4,0xe2,0x7b,0x4b,0x64,0x23,0x40
+
+# CHECK: tileloaddt1 tmm6, [rbp + 8*r14 + 268435456]
+0xc4,0xa2,0x79,0x4b,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: tileloaddt1 tmm3, [r8 + 4*rax + 291]
+0xc4,0xc2,0x79,0x4b,0x9c,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: tileloaddt1 tmm3, [2*rbp - 32]
+0xc4,0xe2,0x79,0x4b,0x1c,0x6d,0xe0,0xff,0xff,0xff
+
+# CHECK: tileloaddt1     tmm6, [rbp + 16]
+0xc4,0xe2,0x79,0x4b,0x74,0x25,0x10
+
+# CHECK: tilerelease
+0xc4,0xe2,0x78,0x49,0xc0
+
+# CHECK: tilestored [rbp + 8*r14 + 268435456], tmm6
+0xc4,0xa2,0x7a,0x4b,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: tilestored [r8 + 4*rax + 291], tmm3
+0xc4,0xc2,0x7a,0x4b,0x9c,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: tilestored [2*rbp - 32], tmm3
+0xc4,0xe2,0x7a,0x4b,0x1c,0x6d,0xe0,0xff,0xff,0xff
+
+# CHECK: tilestored [r8], tmm3
+0xc4,0xc2,0x7a,0x4b,0x1c,0x20
+
+# CHECK: tilezero tmm6
+0xc4,0xe2,0x7b,0x49,0xf0
+
+# CHECK: tilezero tmm3
+0xc4,0xe2,0x7b,0x49,0xd8
+
+# CHECK: ldtilecfg [rbp + 8*r14 + 268435456]
+0xc4,0xa2,0x78,0x49,0x84,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: ldtilecfg [r8 + 4*rax + 291]
+0xc4,0xc2,0x78,0x49,0x84,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: ldtilecfg [rip]
+0xc4,0xe2,0x78,0x49,0x05,0x00,0x00,0x00,0x00
+
+# CHECK: ldtilecfg [2*rbp - 2048]
+0xc4,0xe2,0x78,0x49,0x04,0x6d,0x00,0xf8,0xff,0xff
+
+# CHECK: sttilecfg [rbp + 8*r14 + 268435456]
+0xc4,0xa2,0x79,0x49,0x84,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: sttilecfg [r8 + 4*rax + 291]
+0xc4,0xc2,0x79,0x49,0x84,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: sttilecfg [rip]
+0xc4,0xe2,0x79,0x49,0x05,0x00,0x00,0x00,0x00
+
+# CHECK: sttilecfg [2*rbp - 2048]
+0xc4,0xe2,0x79,0x49,0x04,0x6d,0x00,0xf8,0xff,0xff
+
+# CHECK: tileloadd tmm6, [rbp + 8*r14 + 268435456]
+0xc4,0xa2,0x7b,0x4b,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: tileloadd tmm3, [r8 + 4*rax + 291]
+0xc4,0xc2,0x7b,0x4b,0x9c,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: tileloadd tmm3, [2*rbp - 32]
+0xc4,0xe2,0x7b,0x4b,0x1c,0x6d,0xe0,0xff,0xff,0xff
+
+# CHECK: tileloaddt1 tmm6, [rbp + 8*r14 + 268435456]
+0xc4,0xa2,0x79,0x4b,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: tileloaddt1 tmm3, [r8 + 4*rax + 291]
+0xc4,0xc2,0x79,0x4b,0x9c,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: tileloaddt1 tmm3, [2*rbp - 32]
+0xc4,0xe2,0x79,0x4b,0x1c,0x6d,0xe0,0xff,0xff,0xff
+
+# CHECK: tilerelease
+0xc4,0xe2,0x78,0x49,0xc0
+
+# CHECK: tilestored [rbp + 8*r14 + 268435456], tmm6
+0xc4,0xa2,0x7a,0x4b,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: tilestored [r8 + 4*rax + 291], tmm3
+0xc4,0xc2,0x7a,0x4b,0x9c,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: tilestored [2*rbp - 32], tmm3
+0xc4,0xe2,0x7a,0x4b,0x1c,0x6d,0xe0,0xff,0xff,0xff
+
+# CHECK: tilezero tmm6
+0xc4,0xe2,0x7b,0x49,0xf0
+
+# CHECK: tilezero tmm3
+0xc4,0xe2,0x7b,0x49,0xd8

diff  --git a/llvm/test/MC/X86/AMX/x86-64-amx-bf16-att.s b/llvm/test/MC/X86/AMX/x86-64-amx-bf16-att.s
new file mode 100644
index 000000000000..5a8e759d6309
--- /dev/null
+++ b/llvm/test/MC/X86/AMX/x86-64-amx-bf16-att.s
@@ -0,0 +1,34 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -show-encoding %s | FileCheck %s
+// some AMX instruction must use SIB.
+
+// CHECK: tdpbf16ps %tmm4, %tmm5, %tmm6
+// CHECK: encoding: [0xc4,0xe2,0x5a,0x5c,0xf5]
+          tdpbf16ps %tmm4, %tmm5, %tmm6
+
+// CHECK: tdpbf16ps %tmm1, %tmm2, %tmm3
+// CHECK: encoding: [0xc4,0xe2,0x72,0x5c,0xda]
+          tdpbf16ps %tmm1, %tmm2, %tmm3
+
+// CHECK: tdpbf16ps %tmm4, %tmm5, %tmm6
+// CHECK: encoding: [0xc4,0xe2,0x5a,0x5c,0xf5]
+          tdpbf16ps %tmm4, %tmm5, %tmm6
+
+// CHECK: tdpbf16ps %tmm1, %tmm2, %tmm3
+// CHECK: encoding: [0xc4,0xe2,0x72,0x5c,0xda]
+          tdpbf16ps %tmm1, %tmm2, %tmm3
+
+// CHECK: tdpbf16ps %tmm4, %tmm5, %tmm6
+// CHECK: encoding: [0xc4,0xe2,0x5a,0x5c,0xf5]
+          tdpbf16ps %tmm4, %tmm5, %tmm6
+
+// CHECK: tdpbf16ps %tmm1, %tmm2, %tmm3
+// CHECK: encoding: [0xc4,0xe2,0x72,0x5c,0xda]
+          tdpbf16ps %tmm1, %tmm2, %tmm3
+
+// CHECK: tdpbf16ps %tmm4, %tmm5, %tmm6
+// CHECK: encoding: [0xc4,0xe2,0x5a,0x5c,0xf5]
+          tdpbf16ps %tmm4, %tmm5, %tmm6
+
+// CHECK: tdpbf16ps %tmm1, %tmm2, %tmm3
+// CHECK: encoding: [0xc4,0xe2,0x72,0x5c,0xda]
+          tdpbf16ps %tmm1, %tmm2, %tmm3

diff  --git a/llvm/test/MC/X86/AMX/x86-64-amx-bf16-intel.s b/llvm/test/MC/X86/AMX/x86-64-amx-bf16-intel.s
new file mode 100644
index 000000000000..b3b1e275ed3b
--- /dev/null
+++ b/llvm/test/MC/X86/AMX/x86-64-amx-bf16-intel.s
@@ -0,0 +1,33 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: tdpbf16ps tmm6, tmm5, tmm4
+// CHECK: encoding: [0xc4,0xe2,0x5a,0x5c,0xf5]
+          tdpbf16ps tmm6, tmm5, tmm4
+
+// CHECK: tdpbf16ps tmm3, tmm2, tmm1
+// CHECK: encoding: [0xc4,0xe2,0x72,0x5c,0xda]
+          tdpbf16ps tmm3, tmm2, tmm1
+
+// CHECK: tdpbf16ps tmm6, tmm5, tmm4
+// CHECK: encoding: [0xc4,0xe2,0x5a,0x5c,0xf5]
+          tdpbf16ps tmm6, tmm5, tmm4
+
+// CHECK: tdpbf16ps tmm3, tmm2, tmm1
+// CHECK: encoding: [0xc4,0xe2,0x72,0x5c,0xda]
+          tdpbf16ps tmm3, tmm2, tmm1
+
+// CHECK: tdpbf16ps tmm6, tmm5, tmm4
+// CHECK: encoding: [0xc4,0xe2,0x5a,0x5c,0xf5]
+          tdpbf16ps tmm6, tmm5, tmm4
+
+// CHECK: tdpbf16ps tmm3, tmm2, tmm1
+// CHECK: encoding: [0xc4,0xe2,0x72,0x5c,0xda]
+          tdpbf16ps tmm3, tmm2, tmm1
+
+// CHECK: tdpbf16ps tmm6, tmm5, tmm4
+// CHECK: encoding: [0xc4,0xe2,0x5a,0x5c,0xf5]
+          tdpbf16ps tmm6, tmm5, tmm4
+
+// CHECK: tdpbf16ps tmm3, tmm2, tmm1
+// CHECK: encoding: [0xc4,0xe2,0x72,0x5c,0xda]
+          tdpbf16ps tmm3, tmm2, tmm1

diff  --git a/llvm/test/MC/X86/AMX/x86-64-amx-error.s b/llvm/test/MC/X86/AMX/x86-64-amx-error.s
new file mode 100644
index 000000000000..e92fe9ee9a83
--- /dev/null
+++ b/llvm/test/MC/X86/AMX/x86-64-amx-error.s
@@ -0,0 +1,10 @@
+// RUN: not llvm-mc -triple x86_64-unknown-unknown %s -o /dev/null 2>&1 | FileCheck %s
+
+// CHECK: invalid operand for instruction
+tileloadd (%rip), %tmm0
+
+// CHECK: invalid operand for instruction
+tileloaddt1 1(%rip), %tmm1
+
+// CHECK: invalid operand for instruction
+tilestored %tmm2, (%rip)

diff  --git a/llvm/test/MC/X86/AMX/x86-64-amx-int8-att.s b/llvm/test/MC/X86/AMX/x86-64-amx-int8-att.s
new file mode 100644
index 000000000000..c9e693c144de
--- /dev/null
+++ b/llvm/test/MC/X86/AMX/x86-64-amx-int8-att.s
@@ -0,0 +1,130 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -show-encoding %s | FileCheck %s
+// some AMX instruction must use SIB.
+
+// CHECK: tdpbssd %tmm4, %tmm5, %tmm6
+// CHECK: encoding: [0xc4,0xe2,0x5b,0x5e,0xf5]
+          tdpbssd %tmm4, %tmm5, %tmm6
+
+// CHECK: tdpbssd %tmm1, %tmm2, %tmm3
+// CHECK: encoding: [0xc4,0xe2,0x73,0x5e,0xda]
+          tdpbssd %tmm1, %tmm2, %tmm3
+
+// CHECK: tdpbsud %tmm4, %tmm5, %tmm6
+// CHECK: encoding: [0xc4,0xe2,0x5a,0x5e,0xf5]
+          tdpbsud %tmm4, %tmm5, %tmm6
+
+// CHECK: tdpbsud %tmm1, %tmm2, %tmm3
+// CHECK: encoding: [0xc4,0xe2,0x72,0x5e,0xda]
+          tdpbsud %tmm1, %tmm2, %tmm3
+
+// CHECK: tdpbusd %tmm4, %tmm5, %tmm6
+// CHECK: encoding: [0xc4,0xe2,0x59,0x5e,0xf5]
+          tdpbusd %tmm4, %tmm5, %tmm6
+
+// CHECK: tdpbusd %tmm1, %tmm2, %tmm3
+// CHECK: encoding: [0xc4,0xe2,0x71,0x5e,0xda]
+          tdpbusd %tmm1, %tmm2, %tmm3
+
+// CHECK: tdpbuud %tmm4, %tmm5, %tmm6
+// CHECK: encoding: [0xc4,0xe2,0x58,0x5e,0xf5]
+          tdpbuud %tmm4, %tmm5, %tmm6
+
+// CHECK: tdpbuud %tmm1, %tmm2, %tmm3
+// CHECK: encoding: [0xc4,0xe2,0x70,0x5e,0xda]
+          tdpbuud %tmm1, %tmm2, %tmm3
+
+// CHECK: tdpbssd %tmm4, %tmm5, %tmm6
+// CHECK: encoding: [0xc4,0xe2,0x5b,0x5e,0xf5]
+          tdpbssd %tmm4, %tmm5, %tmm6
+
+// CHECK: tdpbssd %tmm1, %tmm2, %tmm3
+// CHECK: encoding: [0xc4,0xe2,0x73,0x5e,0xda]
+          tdpbssd %tmm1, %tmm2, %tmm3
+
+// CHECK: tdpbsud %tmm4, %tmm5, %tmm6
+// CHECK: encoding: [0xc4,0xe2,0x5a,0x5e,0xf5]
+          tdpbsud %tmm4, %tmm5, %tmm6
+
+// CHECK: tdpbsud %tmm1, %tmm2, %tmm3
+// CHECK: encoding: [0xc4,0xe2,0x72,0x5e,0xda]
+          tdpbsud %tmm1, %tmm2, %tmm3
+
+// CHECK: tdpbusd %tmm4, %tmm5, %tmm6
+// CHECK: encoding: [0xc4,0xe2,0x59,0x5e,0xf5]
+          tdpbusd %tmm4, %tmm5, %tmm6
+
+// CHECK: tdpbusd %tmm1, %tmm2, %tmm3
+// CHECK: encoding: [0xc4,0xe2,0x71,0x5e,0xda]
+          tdpbusd %tmm1, %tmm2, %tmm3
+
+// CHECK: tdpbuud %tmm4, %tmm5, %tmm6
+// CHECK: encoding: [0xc4,0xe2,0x58,0x5e,0xf5]
+          tdpbuud %tmm4, %tmm5, %tmm6
+
+// CHECK: tdpbuud %tmm1, %tmm2, %tmm3
+// CHECK: encoding: [0xc4,0xe2,0x70,0x5e,0xda]
+          tdpbuud %tmm1, %tmm2, %tmm3
+
+// CHECK: tdpbssd %tmm4, %tmm5, %tmm6
+// CHECK: encoding: [0xc4,0xe2,0x5b,0x5e,0xf5]
+          tdpbssd %tmm4, %tmm5, %tmm6
+
+// CHECK: tdpbssd %tmm1, %tmm2, %tmm3
+// CHECK: encoding: [0xc4,0xe2,0x73,0x5e,0xda]
+          tdpbssd %tmm1, %tmm2, %tmm3
+
+// CHECK: tdpbsud %tmm4, %tmm5, %tmm6
+// CHECK: encoding: [0xc4,0xe2,0x5a,0x5e,0xf5]
+          tdpbsud %tmm4, %tmm5, %tmm6
+
+// CHECK: tdpbsud %tmm1, %tmm2, %tmm3
+// CHECK: encoding: [0xc4,0xe2,0x72,0x5e,0xda]
+          tdpbsud %tmm1, %tmm2, %tmm3
+
+// CHECK: tdpbusd %tmm4, %tmm5, %tmm6
+// CHECK: encoding: [0xc4,0xe2,0x59,0x5e,0xf5]
+          tdpbusd %tmm4, %tmm5, %tmm6
+
+// CHECK: tdpbusd %tmm1, %tmm2, %tmm3
+// CHECK: encoding: [0xc4,0xe2,0x71,0x5e,0xda]
+          tdpbusd %tmm1, %tmm2, %tmm3
+
+// CHECK: tdpbuud %tmm4, %tmm5, %tmm6
+// CHECK: encoding: [0xc4,0xe2,0x58,0x5e,0xf5]
+          tdpbuud %tmm4, %tmm5, %tmm6
+
+// CHECK: tdpbuud %tmm1, %tmm2, %tmm3
+// CHECK: encoding: [0xc4,0xe2,0x70,0x5e,0xda]
+          tdpbuud %tmm1, %tmm2, %tmm3
+
+// CHECK: tdpbssd %tmm4, %tmm5, %tmm6
+// CHECK: encoding: [0xc4,0xe2,0x5b,0x5e,0xf5]
+          tdpbssd %tmm4, %tmm5, %tmm6
+
+// CHECK: tdpbssd %tmm1, %tmm2, %tmm3
+// CHECK: encoding: [0xc4,0xe2,0x73,0x5e,0xda]
+          tdpbssd %tmm1, %tmm2, %tmm3
+
+// CHECK: tdpbsud %tmm4, %tmm5, %tmm6
+// CHECK: encoding: [0xc4,0xe2,0x5a,0x5e,0xf5]
+          tdpbsud %tmm4, %tmm5, %tmm6
+
+// CHECK: tdpbsud %tmm1, %tmm2, %tmm3
+// CHECK: encoding: [0xc4,0xe2,0x72,0x5e,0xda]
+          tdpbsud %tmm1, %tmm2, %tmm3
+
+// CHECK: tdpbusd %tmm4, %tmm5, %tmm6
+// CHECK: encoding: [0xc4,0xe2,0x59,0x5e,0xf5]
+          tdpbusd %tmm4, %tmm5, %tmm6
+
+// CHECK: tdpbusd %tmm1, %tmm2, %tmm3
+// CHECK: encoding: [0xc4,0xe2,0x71,0x5e,0xda]
+          tdpbusd %tmm1, %tmm2, %tmm3
+
+// CHECK: tdpbuud %tmm4, %tmm5, %tmm6
+// CHECK: encoding: [0xc4,0xe2,0x58,0x5e,0xf5]
+          tdpbuud %tmm4, %tmm5, %tmm6
+
+// CHECK: tdpbuud %tmm1, %tmm2, %tmm3
+// CHECK: encoding: [0xc4,0xe2,0x70,0x5e,0xda]
+          tdpbuud %tmm1, %tmm2, %tmm3

diff  --git a/llvm/test/MC/X86/AMX/x86-64-amx-int8-intel.s b/llvm/test/MC/X86/AMX/x86-64-amx-int8-intel.s
new file mode 100644
index 000000000000..6b28b80c6660
--- /dev/null
+++ b/llvm/test/MC/X86/AMX/x86-64-amx-int8-intel.s
@@ -0,0 +1,129 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: tdpbssd tmm6, tmm5, tmm4
+// CHECK: encoding: [0xc4,0xe2,0x5b,0x5e,0xf5]
+          tdpbssd tmm6, tmm5, tmm4
+
+// CHECK: tdpbssd tmm3, tmm2, tmm1
+// CHECK: encoding: [0xc4,0xe2,0x73,0x5e,0xda]
+          tdpbssd tmm3, tmm2, tmm1
+
+// CHECK: tdpbsud tmm6, tmm5, tmm4
+// CHECK: encoding: [0xc4,0xe2,0x5a,0x5e,0xf5]
+          tdpbsud tmm6, tmm5, tmm4
+
+// CHECK: tdpbsud tmm3, tmm2, tmm1
+// CHECK: encoding: [0xc4,0xe2,0x72,0x5e,0xda]
+          tdpbsud tmm3, tmm2, tmm1
+
+// CHECK: tdpbusd tmm6, tmm5, tmm4
+// CHECK: encoding: [0xc4,0xe2,0x59,0x5e,0xf5]
+          tdpbusd tmm6, tmm5, tmm4
+
+// CHECK: tdpbusd tmm3, tmm2, tmm1
+// CHECK: encoding: [0xc4,0xe2,0x71,0x5e,0xda]
+          tdpbusd tmm3, tmm2, tmm1
+
+// CHECK: tdpbuud tmm6, tmm5, tmm4
+// CHECK: encoding: [0xc4,0xe2,0x58,0x5e,0xf5]
+          tdpbuud tmm6, tmm5, tmm4
+
+// CHECK: tdpbuud tmm3, tmm2, tmm1
+// CHECK: encoding: [0xc4,0xe2,0x70,0x5e,0xda]
+          tdpbuud tmm3, tmm2, tmm1
+
+// CHECK: tdpbssd tmm6, tmm5, tmm4
+// CHECK: encoding: [0xc4,0xe2,0x5b,0x5e,0xf5]
+          tdpbssd tmm6, tmm5, tmm4
+
+// CHECK: tdpbssd tmm3, tmm2, tmm1
+// CHECK: encoding: [0xc4,0xe2,0x73,0x5e,0xda]
+          tdpbssd tmm3, tmm2, tmm1
+
+// CHECK: tdpbsud tmm6, tmm5, tmm4
+// CHECK: encoding: [0xc4,0xe2,0x5a,0x5e,0xf5]
+          tdpbsud tmm6, tmm5, tmm4
+
+// CHECK: tdpbsud tmm3, tmm2, tmm1
+// CHECK: encoding: [0xc4,0xe2,0x72,0x5e,0xda]
+          tdpbsud tmm3, tmm2, tmm1
+
+// CHECK: tdpbusd tmm6, tmm5, tmm4
+// CHECK: encoding: [0xc4,0xe2,0x59,0x5e,0xf5]
+          tdpbusd tmm6, tmm5, tmm4
+
+// CHECK: tdpbusd tmm3, tmm2, tmm1
+// CHECK: encoding: [0xc4,0xe2,0x71,0x5e,0xda]
+          tdpbusd tmm3, tmm2, tmm1
+
+// CHECK: tdpbuud tmm6, tmm5, tmm4
+// CHECK: encoding: [0xc4,0xe2,0x58,0x5e,0xf5]
+          tdpbuud tmm6, tmm5, tmm4
+
+// CHECK: tdpbuud tmm3, tmm2, tmm1
+// CHECK: encoding: [0xc4,0xe2,0x70,0x5e,0xda]
+          tdpbuud tmm3, tmm2, tmm1
+
+// CHECK: tdpbssd tmm6, tmm5, tmm4
+// CHECK: encoding: [0xc4,0xe2,0x5b,0x5e,0xf5]
+          tdpbssd tmm6, tmm5, tmm4
+
+// CHECK: tdpbssd tmm3, tmm2, tmm1
+// CHECK: encoding: [0xc4,0xe2,0x73,0x5e,0xda]
+          tdpbssd tmm3, tmm2, tmm1
+
+// CHECK: tdpbsud tmm6, tmm5, tmm4
+// CHECK: encoding: [0xc4,0xe2,0x5a,0x5e,0xf5]
+          tdpbsud tmm6, tmm5, tmm4
+
+// CHECK: tdpbsud tmm3, tmm2, tmm1
+// CHECK: encoding: [0xc4,0xe2,0x72,0x5e,0xda]
+          tdpbsud tmm3, tmm2, tmm1
+
+// CHECK: tdpbusd tmm6, tmm5, tmm4
+// CHECK: encoding: [0xc4,0xe2,0x59,0x5e,0xf5]
+          tdpbusd tmm6, tmm5, tmm4
+
+// CHECK: tdpbusd tmm3, tmm2, tmm1
+// CHECK: encoding: [0xc4,0xe2,0x71,0x5e,0xda]
+          tdpbusd tmm3, tmm2, tmm1
+
+// CHECK: tdpbuud tmm6, tmm5, tmm4
+// CHECK: encoding: [0xc4,0xe2,0x58,0x5e,0xf5]
+          tdpbuud tmm6, tmm5, tmm4
+
+// CHECK: tdpbuud tmm3, tmm2, tmm1
+// CHECK: encoding: [0xc4,0xe2,0x70,0x5e,0xda]
+          tdpbuud tmm3, tmm2, tmm1
+
+// CHECK: tdpbssd tmm6, tmm5, tmm4
+// CHECK: encoding: [0xc4,0xe2,0x5b,0x5e,0xf5]
+          tdpbssd tmm6, tmm5, tmm4
+
+// CHECK: tdpbssd tmm3, tmm2, tmm1
+// CHECK: encoding: [0xc4,0xe2,0x73,0x5e,0xda]
+          tdpbssd tmm3, tmm2, tmm1
+
+// CHECK: tdpbsud tmm6, tmm5, tmm4
+// CHECK: encoding: [0xc4,0xe2,0x5a,0x5e,0xf5]
+          tdpbsud tmm6, tmm5, tmm4
+
+// CHECK: tdpbsud tmm3, tmm2, tmm1
+// CHECK: encoding: [0xc4,0xe2,0x72,0x5e,0xda]
+          tdpbsud tmm3, tmm2, tmm1
+
+// CHECK: tdpbusd tmm6, tmm5, tmm4
+// CHECK: encoding: [0xc4,0xe2,0x59,0x5e,0xf5]
+          tdpbusd tmm6, tmm5, tmm4
+
+// CHECK: tdpbusd tmm3, tmm2, tmm1
+// CHECK: encoding: [0xc4,0xe2,0x71,0x5e,0xda]
+          tdpbusd tmm3, tmm2, tmm1
+
+// CHECK: tdpbuud tmm6, tmm5, tmm4
+// CHECK: encoding: [0xc4,0xe2,0x58,0x5e,0xf5]
+          tdpbuud tmm6, tmm5, tmm4
+
+// CHECK: tdpbuud tmm3, tmm2, tmm1
+// CHECK: encoding: [0xc4,0xe2,0x70,0x5e,0xda]
+          tdpbuud tmm3, tmm2, tmm1

diff  --git a/llvm/test/MC/X86/AMX/x86-64-amx-tile-att.s b/llvm/test/MC/X86/AMX/x86-64-amx-tile-att.s
new file mode 100644
index 000000000000..9762c821f971
--- /dev/null
+++ b/llvm/test/MC/X86/AMX/x86-64-amx-tile-att.s
@@ -0,0 +1,198 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -show-encoding %s | FileCheck %s
+// some AMX instruction must use SIB.
+
+// CHECK: tilerelease
+// CHECK: encoding: [0xc4,0xe2,0x78,0x49,0xc0]
+          tilerelease
+
+// CHECK: tilezero %tmm6
+// CHECK: encoding: [0xc4,0xe2,0x7b,0x49,0xf0]
+          tilezero %tmm6
+
+// CHECK: tilezero %tmm3
+// CHECK: encoding: [0xc4,0xe2,0x7b,0x49,0xd8]
+          tilezero %tmm3
+
+// CHECK: tilerelease
+// CHECK: encoding: [0xc4,0xe2,0x78,0x49,0xc0]
+          tilerelease
+
+// CHECK: tilezero %tmm6
+// CHECK: encoding: [0xc4,0xe2,0x7b,0x49,0xf0]
+          tilezero %tmm6
+
+// CHECK: tilezero %tmm3
+// CHECK: encoding: [0xc4,0xe2,0x7b,0x49,0xd8]
+          tilezero %tmm3
+
+// CHECK: ldtilecfg  268435456(%rbp,%r14,8)
+// CHECK: encoding: [0xc4,0xa2,0x78,0x49,0x84,0xf5,0x00,0x00,0x00,0x10]
+          ldtilecfg  268435456(%rbp,%r14,8)
+
+// CHECK: ldtilecfg  291(%r8,%rax,4)
+// CHECK: encoding: [0xc4,0xc2,0x78,0x49,0x84,0x80,0x23,0x01,0x00,0x00]
+          ldtilecfg  291(%r8,%rax,4)
+
+// CHECK: ldtilecfg  (%rip)
+// CHECK: encoding: [0xc4,0xe2,0x78,0x49,0x05,0x00,0x00,0x00,0x00]
+          ldtilecfg  (%rip)
+
+// CHECK: ldtilecfg  -2048(,%rbp,2)
+// CHECK: encoding: [0xc4,0xe2,0x78,0x49,0x04,0x6d,0x00,0xf8,0xff,0xff]
+          ldtilecfg  -2048(,%rbp,2)
+
+// CHECK: sttilecfg  268435456(%rbp,%r14,8)
+// CHECK: encoding: [0xc4,0xa2,0x79,0x49,0x84,0xf5,0x00,0x00,0x00,0x10]
+          sttilecfg  268435456(%rbp,%r14,8)
+
+// CHECK: sttilecfg  291(%r8,%rax,4)
+// CHECK: encoding: [0xc4,0xc2,0x79,0x49,0x84,0x80,0x23,0x01,0x00,0x00]
+          sttilecfg  291(%r8,%rax,4)
+
+// CHECK: sttilecfg  (%rip)
+// CHECK: encoding: [0xc4,0xe2,0x79,0x49,0x05,0x00,0x00,0x00,0x00]
+          sttilecfg  (%rip)
+
+// CHECK: sttilecfg  -2048(,%rbp,2)
+// CHECK: encoding: [0xc4,0xe2,0x79,0x49,0x04,0x6d,0x00,0xf8,0xff,0xff]
+          sttilecfg  -2048(,%rbp,2)
+
+// CHECK: tileloadd 268435456(%rbp,%r14,8), %tmm6
+// CHECK: encoding: [0xc4,0xa2,0x7b,0x4b,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          tileloadd 268435456(%rbp,%r14,8), %tmm6
+
+// CHECK: tileloadd 291(%r8,%rax,4), %tmm3
+// CHECK: encoding: [0xc4,0xc2,0x7b,0x4b,0x9c,0x80,0x23,0x01,0x00,0x00]
+          tileloadd 291(%r8,%rax,4), %tmm3
+
+// CHECK: tileloadd 64(%rbx), %tmm4
+// CHECK: encoding: [0xc4,0xe2,0x7b,0x4b,0x64,0x23,0x40]
+          tileloadd 64(%rbx), %tmm4
+
+// CHECK: tileloadd -32(,%rbp,2), %tmm3
+// CHECK: encoding: [0xc4,0xe2,0x7b,0x4b,0x1c,0x6d,0xe0,0xff,0xff,0xff]
+          tileloadd -32(,%rbp,2), %tmm3
+
+// CHECK: tileloaddt1 268435456(%rbp,%r14,8), %tmm6
+// CHECK: encoding: [0xc4,0xa2,0x79,0x4b,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          tileloaddt1 268435456(%rbp,%r14,8), %tmm6
+
+// CHECK: tileloaddt1 291(%r8,%rax,4), %tmm3
+// CHECK: encoding: [0xc4,0xc2,0x79,0x4b,0x9c,0x80,0x23,0x01,0x00,0x00]
+          tileloaddt1 291(%r8,%rax,4), %tmm3
+
+// CHECK: tileloaddt1 -32(,%rbp,2), %tmm3
+// CHECK: encoding: [0xc4,0xe2,0x79,0x4b,0x1c,0x6d,0xe0,0xff,0xff,0xff]
+          tileloaddt1 -32(,%rbp,2), %tmm3
+
+// CHECK: tileloaddt1 16(%rbp), %tmm6
+// CHECK: encoding: [0xc4,0xe2,0x79,0x4b,0x74,0x25,0x10]
+          tileloaddt1 16(%rbp), %tmm6
+
+// CHECK: tilerelease
+// CHECK: encoding: [0xc4,0xe2,0x78,0x49,0xc0]
+          tilerelease
+
+// CHECK: tilestored %tmm6, 268435456(%rbp,%r14,8)
+// CHECK: encoding: [0xc4,0xa2,0x7a,0x4b,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          tilestored %tmm6, 268435456(%rbp,%r14,8)
+
+// CHECK: tilestored %tmm3, 291(%r8,%rax,4)
+// CHECK: encoding: [0xc4,0xc2,0x7a,0x4b,0x9c,0x80,0x23,0x01,0x00,0x00]
+          tilestored %tmm3, 291(%r8,%rax,4)
+
+// CHECK: tilestored %tmm3, -32(,%rbp,2)
+// CHECK: encoding: [0xc4,0xe2,0x7a,0x4b,0x1c,0x6d,0xe0,0xff,0xff,0xff]
+          tilestored %tmm3, -32(,%rbp,2)
+
+// CHECK: tilestored %tmm3, (%r8)
+// CHECK: encoding: [0xc4,0xc2,0x7a,0x4b,0x1c,0x20]
+          tilestored %tmm3, (%r8)
+
+// CHECK: tilezero %tmm6
+// CHECK: encoding: [0xc4,0xe2,0x7b,0x49,0xf0]
+          tilezero %tmm6
+
+// CHECK: tilezero %tmm3
+// CHECK: encoding: [0xc4,0xe2,0x7b,0x49,0xd8]
+          tilezero %tmm3
+
+// CHECK: ldtilecfg  268435456(%rbp,%r14,8)
+// CHECK: encoding: [0xc4,0xa2,0x78,0x49,0x84,0xf5,0x00,0x00,0x00,0x10]
+          ldtilecfg  268435456(%rbp,%r14,8)
+
+// CHECK: ldtilecfg  291(%r8,%rax,4)
+// CHECK: encoding: [0xc4,0xc2,0x78,0x49,0x84,0x80,0x23,0x01,0x00,0x00]
+          ldtilecfg  291(%r8,%rax,4)
+
+// CHECK: ldtilecfg  (%rip)
+// CHECK: encoding: [0xc4,0xe2,0x78,0x49,0x05,0x00,0x00,0x00,0x00]
+          ldtilecfg  (%rip)
+
+// CHECK: ldtilecfg  -2048(,%rbp,2)
+// CHECK: encoding: [0xc4,0xe2,0x78,0x49,0x04,0x6d,0x00,0xf8,0xff,0xff]
+          ldtilecfg  -2048(,%rbp,2)
+
+// CHECK: sttilecfg  268435456(%rbp,%r14,8)
+// CHECK: encoding: [0xc4,0xa2,0x79,0x49,0x84,0xf5,0x00,0x00,0x00,0x10]
+          sttilecfg  268435456(%rbp,%r14,8)
+
+// CHECK: sttilecfg  291(%r8,%rax,4)
+// CHECK: encoding: [0xc4,0xc2,0x79,0x49,0x84,0x80,0x23,0x01,0x00,0x00]
+          sttilecfg  291(%r8,%rax,4)
+
+// CHECK: sttilecfg  (%rip)
+// CHECK: encoding: [0xc4,0xe2,0x79,0x49,0x05,0x00,0x00,0x00,0x00]
+          sttilecfg  (%rip)
+
+// CHECK: sttilecfg  -2048(,%rbp,2)
+// CHECK: encoding: [0xc4,0xe2,0x79,0x49,0x04,0x6d,0x00,0xf8,0xff,0xff]
+          sttilecfg  -2048(,%rbp,2)
+
+// CHECK: tileloadd 268435456(%rbp,%r14,8), %tmm6
+// CHECK: encoding: [0xc4,0xa2,0x7b,0x4b,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          tileloadd 268435456(%rbp,%r14,8), %tmm6
+
+// CHECK: tileloadd 291(%r8,%rax,4), %tmm3
+// CHECK: encoding: [0xc4,0xc2,0x7b,0x4b,0x9c,0x80,0x23,0x01,0x00,0x00]
+          tileloadd 291(%r8,%rax,4), %tmm3
+
+// CHECK: tileloadd -32(,%rbp,2), %tmm3
+// CHECK: encoding: [0xc4,0xe2,0x7b,0x4b,0x1c,0x6d,0xe0,0xff,0xff,0xff]
+          tileloadd -32(,%rbp,2), %tmm3
+
+// CHECK: tileloaddt1 268435456(%rbp,%r14,8), %tmm6
+// CHECK: encoding: [0xc4,0xa2,0x79,0x4b,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          tileloaddt1 268435456(%rbp,%r14,8), %tmm6
+
+// CHECK: tileloaddt1 291(%r8,%rax,4), %tmm3
+// CHECK: encoding: [0xc4,0xc2,0x79,0x4b,0x9c,0x80,0x23,0x01,0x00,0x00]
+          tileloaddt1 291(%r8,%rax,4), %tmm3
+
+// CHECK: tileloaddt1 -32(,%rbp,2), %tmm3
+// CHECK: encoding: [0xc4,0xe2,0x79,0x4b,0x1c,0x6d,0xe0,0xff,0xff,0xff]
+          tileloaddt1 -32(,%rbp,2), %tmm3
+
+// CHECK: tilerelease
+// CHECK: encoding: [0xc4,0xe2,0x78,0x49,0xc0]
+          tilerelease
+
+// CHECK: tilestored %tmm6, 268435456(%rbp,%r14,8)
+// CHECK: encoding: [0xc4,0xa2,0x7a,0x4b,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          tilestored %tmm6, 268435456(%rbp,%r14,8)
+
+// CHECK: tilestored %tmm3, 291(%r8,%rax,4)
+// CHECK: encoding: [0xc4,0xc2,0x7a,0x4b,0x9c,0x80,0x23,0x01,0x00,0x00]
+          tilestored %tmm3, 291(%r8,%rax,4)
+
+// CHECK: tilestored %tmm3, -32(,%rbp,2)
+// CHECK: encoding: [0xc4,0xe2,0x7a,0x4b,0x1c,0x6d,0xe0,0xff,0xff,0xff]
+          tilestored %tmm3, -32(,%rbp,2)
+
+// CHECK: tilezero %tmm6
+// CHECK: encoding: [0xc4,0xe2,0x7b,0x49,0xf0]
+          tilezero %tmm6
+
+// CHECK: tilezero %tmm3
+// CHECK: encoding: [0xc4,0xe2,0x7b,0x49,0xd8]
+          tilezero %tmm3

diff  --git a/llvm/test/MC/X86/AMX/x86-64-amx-tile-intel.s b/llvm/test/MC/X86/AMX/x86-64-amx-tile-intel.s
new file mode 100644
index 000000000000..7fad214dac6a
--- /dev/null
+++ b/llvm/test/MC/X86/AMX/x86-64-amx-tile-intel.s
@@ -0,0 +1,197 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: tilerelease
+// CHECK: encoding: [0xc4,0xe2,0x78,0x49,0xc0]
+          tilerelease
+
+// CHECK: tilezero tmm6
+// CHECK: encoding: [0xc4,0xe2,0x7b,0x49,0xf0]
+          tilezero tmm6
+
+// CHECK: tilezero tmm3
+// CHECK: encoding: [0xc4,0xe2,0x7b,0x49,0xd8]
+          tilezero tmm3
+
+// CHECK: tilerelease
+// CHECK: encoding: [0xc4,0xe2,0x78,0x49,0xc0]
+          tilerelease
+
+// CHECK: tilezero tmm6
+// CHECK: encoding: [0xc4,0xe2,0x7b,0x49,0xf0]
+          tilezero tmm6
+
+// CHECK: tilezero tmm3
+// CHECK: encoding: [0xc4,0xe2,0x7b,0x49,0xd8]
+          tilezero tmm3
+
+// CHECK: ldtilecfg [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0xc4,0xa2,0x78,0x49,0x84,0xf5,0x00,0x00,0x00,0x10]
+          ldtilecfg [rbp + 8*r14 + 268435456]
+
+// CHECK: ldtilecfg [r8 + 4*rax + 291]
+// CHECK: encoding: [0xc4,0xc2,0x78,0x49,0x84,0x80,0x23,0x01,0x00,0x00]
+          ldtilecfg [r8 + 4*rax + 291]
+
+// CHECK: ldtilecfg [rip]
+// CHECK: encoding: [0xc4,0xe2,0x78,0x49,0x05,0x00,0x00,0x00,0x00]
+          ldtilecfg [rip]
+
+// CHECK: ldtilecfg [2*rbp - 2048]
+// CHECK: encoding: [0xc4,0xe2,0x78,0x49,0x04,0x6d,0x00,0xf8,0xff,0xff]
+          ldtilecfg [2*rbp - 2048]
+
+// CHECK: sttilecfg [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0xc4,0xa2,0x79,0x49,0x84,0xf5,0x00,0x00,0x00,0x10]
+          sttilecfg [rbp + 8*r14 + 268435456]
+
+// CHECK: sttilecfg [r8 + 4*rax + 291]
+// CHECK: encoding: [0xc4,0xc2,0x79,0x49,0x84,0x80,0x23,0x01,0x00,0x00]
+          sttilecfg [r8 + 4*rax + 291]
+
+// CHECK: sttilecfg [rip]
+// CHECK: encoding: [0xc4,0xe2,0x79,0x49,0x05,0x00,0x00,0x00,0x00]
+          sttilecfg [rip]
+
+// CHECK: sttilecfg [2*rbp - 2048]
+// CHECK: encoding: [0xc4,0xe2,0x79,0x49,0x04,0x6d,0x00,0xf8,0xff,0xff]
+          sttilecfg [2*rbp - 2048]
+
+// CHECK: tileloadd tmm6, [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0xc4,0xa2,0x7b,0x4b,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          tileloadd tmm6, [rbp + 8*r14 + 268435456]
+
+// CHECK: tileloadd tmm3, [r8 + 4*rax + 291]
+// CHECK: encoding: [0xc4,0xc2,0x7b,0x4b,0x9c,0x80,0x23,0x01,0x00,0x00]
+          tileloadd tmm3, [r8 + 4*rax + 291]
+
+// CHECK: tileloadd tmm3, [2*rbp - 32]
+// CHECK: encoding: [0xc4,0xe2,0x7b,0x4b,0x1c,0x6d,0xe0,0xff,0xff,0xff]
+          tileloadd tmm3, [2*rbp - 32]
+
+// CHECK: tileloadd tmm4, [rbx + 64]
+// CHECK: encoding: [0xc4,0xe2,0x7b,0x4b,0x64,0x23,0x40]
+          tileloadd tmm4, [rbx + 64]
+
+// CHECK: tileloaddt1 tmm6, [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0xc4,0xa2,0x79,0x4b,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          tileloaddt1 tmm6, [rbp + 8*r14 + 268435456]
+
+// CHECK: tileloaddt1 tmm3, [r8 + 4*rax + 291]
+// CHECK: encoding: [0xc4,0xc2,0x79,0x4b,0x9c,0x80,0x23,0x01,0x00,0x00]
+          tileloaddt1 tmm3, [r8 + 4*rax + 291]
+
+// CHECK: tileloaddt1 tmm3, [2*rbp - 32]
+// CHECK: encoding: [0xc4,0xe2,0x79,0x4b,0x1c,0x6d,0xe0,0xff,0xff,0xff]
+          tileloaddt1 tmm3, [2*rbp - 32]
+
+// CHECK: tileloaddt1     tmm6, [rbp + 16]
+// CHECK: encoding: [0xc4,0xe2,0x79,0x4b,0x74,0x25,0x10]
+          tileloaddt1     tmm6, [rbp + 16]
+
+// CHECK: tilerelease
+// CHECK: encoding: [0xc4,0xe2,0x78,0x49,0xc0]
+          tilerelease
+
+// CHECK: tilestored [rbp + 8*r14 + 268435456], tmm6
+// CHECK: encoding: [0xc4,0xa2,0x7a,0x4b,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          tilestored [rbp + 8*r14 + 268435456], tmm6
+
+// CHECK: tilestored [r8 + 4*rax + 291], tmm3
+// CHECK: encoding: [0xc4,0xc2,0x7a,0x4b,0x9c,0x80,0x23,0x01,0x00,0x00]
+          tilestored [r8 + 4*rax + 291], tmm3
+
+// CHECK: tilestored [2*rbp - 32], tmm3
+// CHECK: encoding: [0xc4,0xe2,0x7a,0x4b,0x1c,0x6d,0xe0,0xff,0xff,0xff]
+          tilestored [2*rbp - 32], tmm3
+
+// CHECK: tilestored [r8], tmm3
+// CHECK: encoding: [0xc4,0xc2,0x7a,0x4b,0x1c,0x20]
+          tilestored [r8], tmm3
+
+// CHECK: tilezero tmm6
+// CHECK: encoding: [0xc4,0xe2,0x7b,0x49,0xf0]
+          tilezero tmm6
+
+// CHECK: tilezero tmm3
+// CHECK: encoding: [0xc4,0xe2,0x7b,0x49,0xd8]
+          tilezero tmm3
+
+// CHECK: ldtilecfg [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0xc4,0xa2,0x78,0x49,0x84,0xf5,0x00,0x00,0x00,0x10]
+          ldtilecfg [rbp + 8*r14 + 268435456]
+
+// CHECK: ldtilecfg [r8 + 4*rax + 291]
+// CHECK: encoding: [0xc4,0xc2,0x78,0x49,0x84,0x80,0x23,0x01,0x00,0x00]
+          ldtilecfg [r8 + 4*rax + 291]
+
+// CHECK: ldtilecfg [rip]
+// CHECK: encoding: [0xc4,0xe2,0x78,0x49,0x05,0x00,0x00,0x00,0x00]
+          ldtilecfg [rip]
+
+// CHECK: ldtilecfg [2*rbp - 2048]
+// CHECK: encoding: [0xc4,0xe2,0x78,0x49,0x04,0x6d,0x00,0xf8,0xff,0xff]
+          ldtilecfg [2*rbp - 2048]
+
+// CHECK: sttilecfg [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0xc4,0xa2,0x79,0x49,0x84,0xf5,0x00,0x00,0x00,0x10]
+          sttilecfg [rbp + 8*r14 + 268435456]
+
+// CHECK: sttilecfg [r8 + 4*rax + 291]
+// CHECK: encoding: [0xc4,0xc2,0x79,0x49,0x84,0x80,0x23,0x01,0x00,0x00]
+          sttilecfg [r8 + 4*rax + 291]
+
+// CHECK: sttilecfg [rip]
+// CHECK: encoding: [0xc4,0xe2,0x79,0x49,0x05,0x00,0x00,0x00,0x00]
+          sttilecfg [rip]
+
+// CHECK: sttilecfg [2*rbp - 2048]
+// CHECK: encoding: [0xc4,0xe2,0x79,0x49,0x04,0x6d,0x00,0xf8,0xff,0xff]
+          sttilecfg [2*rbp - 2048]
+
+// CHECK: tileloadd tmm6, [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0xc4,0xa2,0x7b,0x4b,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          tileloadd tmm6, [rbp + 8*r14 + 268435456]
+
+// CHECK: tileloadd tmm3, [r8 + 4*rax + 291]
+// CHECK: encoding: [0xc4,0xc2,0x7b,0x4b,0x9c,0x80,0x23,0x01,0x00,0x00]
+          tileloadd tmm3, [r8 + 4*rax + 291]
+
+// CHECK: tileloadd tmm3, [2*rbp - 32]
+// CHECK: encoding: [0xc4,0xe2,0x7b,0x4b,0x1c,0x6d,0xe0,0xff,0xff,0xff]
+          tileloadd tmm3, [2*rbp - 32]
+
+// CHECK: tileloaddt1 tmm6, [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0xc4,0xa2,0x79,0x4b,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          tileloaddt1 tmm6, [rbp + 8*r14 + 268435456]
+
+// CHECK: tileloaddt1 tmm3, [r8 + 4*rax + 291]
+// CHECK: encoding: [0xc4,0xc2,0x79,0x4b,0x9c,0x80,0x23,0x01,0x00,0x00]
+          tileloaddt1 tmm3, [r8 + 4*rax + 291]
+
+// CHECK: tileloaddt1 tmm3, [2*rbp - 32]
+// CHECK: encoding: [0xc4,0xe2,0x79,0x4b,0x1c,0x6d,0xe0,0xff,0xff,0xff]
+          tileloaddt1 tmm3, [2*rbp - 32]
+
+// CHECK: tilerelease
+// CHECK: encoding: [0xc4,0xe2,0x78,0x49,0xc0]
+          tilerelease
+
+// CHECK: tilestored [rbp + 8*r14 + 268435456], tmm6
+// CHECK: encoding: [0xc4,0xa2,0x7a,0x4b,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          tilestored [rbp + 8*r14 + 268435456], tmm6
+
+// CHECK: tilestored [r8 + 4*rax + 291], tmm3
+// CHECK: encoding: [0xc4,0xc2,0x7a,0x4b,0x9c,0x80,0x23,0x01,0x00,0x00]
+          tilestored [r8 + 4*rax + 291], tmm3
+
+// CHECK: tilestored [2*rbp - 32], tmm3
+// CHECK: encoding: [0xc4,0xe2,0x7a,0x4b,0x1c,0x6d,0xe0,0xff,0xff,0xff]
+          tilestored [2*rbp - 32], tmm3
+
+// CHECK: tilezero tmm6
+// CHECK: encoding: [0xc4,0xe2,0x7b,0x49,0xf0]
+          tilezero tmm6
+
+// CHECK: tilezero tmm3
+// CHECK: encoding: [0xc4,0xe2,0x7b,0x49,0xd8]
+          tilezero tmm3

diff  --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
index fe235e8a3dfb..7a84f936e0d0 100644
--- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
@@ -40,6 +40,7 @@ static const char *isInvalidMemoryInstr(const Instruction &Instr) {
   case X86II::MRMSrcRegOp4:
   case X86II::MRMSrcRegCC:
   case X86II::MRMXrCC:
+  case X86II::MRMr0:
   case X86II::MRMXr:
   case X86II::MRM0r:
   case X86II::MRM1r:

diff  --git a/llvm/utils/TableGen/X86DisassemblerTables.cpp b/llvm/utils/TableGen/X86DisassemblerTables.cpp
index 76e4fd9a13ee..3a95df23f420 100644
--- a/llvm/utils/TableGen/X86DisassemblerTables.cpp
+++ b/llvm/utils/TableGen/X86DisassemblerTables.cpp
@@ -595,6 +595,7 @@ static ModRMDecisionType getDecisionType(ModRMDecision &decision) {
   bool satisfiesOneEntry = true;
   bool satisfiesSplitRM = true;
   bool satisfiesSplitReg = true;
+  bool satisfiesSplitRegM = true;
   bool satisfiesSplitMisc = true;
 
   for (unsigned index = 0; index < 256; ++index) {
@@ -616,6 +617,10 @@ static ModRMDecisionType getDecisionType(ModRMDecision &decision) {
     if (((index & 0xc0) != 0xc0) &&
        (decision.instructionIDs[index] != decision.instructionIDs[index&0x38]))
       satisfiesSplitMisc = false;
+
+    if (((index & 0xc0) == 0xc0) &&
+       (decision.instructionIDs[index] != decision.instructionIDs[index&0xc7]))
+      satisfiesSplitRegM = false;
   }
 
   if (satisfiesOneEntry)
@@ -627,6 +632,9 @@ static ModRMDecisionType getDecisionType(ModRMDecision &decision) {
   if (satisfiesSplitReg && satisfiesSplitMisc)
     return MODRM_SPLITREG;
 
+  if (satisfiesSplitRegM)
+    return MODRM_SPLITREGM;
+
   if (satisfiesSplitMisc)
     return MODRM_SPLITMISC;
 
@@ -691,6 +699,10 @@ void DisassemblerTables::emitModRMDecision(raw_ostream &o1, raw_ostream &o2,
       for (unsigned index = 0xc0; index < 256; index += 8)
         ModRMDecision.push_back(decision.instructionIDs[index]);
       break;
+    case MODRM_SPLITREGM:
+      for (unsigned index = 0xc0; index < 256; index += 8)
+        ModRMDecision.push_back(decision.instructionIDs[index]);
+      break;
     case MODRM_SPLITMISC:
       for (unsigned index = 0; index < 64; index += 8)
         ModRMDecision.push_back(decision.instructionIDs[index]);
@@ -732,6 +744,9 @@ void DisassemblerTables::emitModRMDecision(raw_ostream &o1, raw_ostream &o2,
     case MODRM_SPLITREG:
       sEntryNumber += 16;
       break;
+    case MODRM_SPLITREGM:
+      sEntryNumber += 8;
+      break;
     case MODRM_SPLITMISC:
       sEntryNumber += 8 + 64;
       break;

diff  --git a/llvm/utils/TableGen/X86ModRMFilters.cpp b/llvm/utils/TableGen/X86ModRMFilters.cpp
index 98e6fb6104d7..cf7507094fa7 100644
--- a/llvm/utils/TableGen/X86ModRMFilters.cpp
+++ b/llvm/utils/TableGen/X86ModRMFilters.cpp
@@ -18,4 +18,6 @@ void ModFilter::anchor() { }
 
 void ExtendedFilter::anchor() { }
 
+void ExtendedRMFilter::anchor() { }
+
 void ExactFilter::anchor() { }

diff  --git a/llvm/utils/TableGen/X86ModRMFilters.h b/llvm/utils/TableGen/X86ModRMFilters.h
index c77b4c21aec4..f0b8af5fb82a 100644
--- a/llvm/utils/TableGen/X86ModRMFilters.h
+++ b/llvm/utils/TableGen/X86ModRMFilters.h
@@ -108,6 +108,29 @@ class ExtendedFilter : public ModRMFilter {
   }
 };
 
+/// ExtendedRMFilter - Extended opcodes are classified based on the value of the
+///   mod field [bits 7-6] and the value of the nnn field [bits 2-0].
+class ExtendedRMFilter : public ModRMFilter {
+  void anchor() override;
+  bool R;
+  uint8_t NNN;
+public:
+  /// Constructor
+  ///
+  /// \param r   True if the mod field must be set to 11; false otherwise.
+  ///            The name is explained at ModFilter.
+  /// \param nnn The required value of the nnn field.
+  ExtendedRMFilter(bool r, uint8_t nnn) :
+    ModRMFilter(),
+    R(r),
+    NNN(nnn) {
+  }
+
+  bool accepts(uint8_t modRM) const override {
+    return ((R && ((modRM & 0xc0) == 0xc0)) &&
+            ((modRM & 0x7) == NNN));
+  }
+};
 /// ExactFilter - The occasional extended opcode (such as VMCALL or MONITOR)
 ///   requires the ModR/M byte to have a specific value.
 class ExactFilter : public ModRMFilter {

diff  --git a/llvm/utils/TableGen/X86RecognizableInstr.cpp b/llvm/utils/TableGen/X86RecognizableInstr.cpp
index f31e43bd9fca..84f6d5210d74 100644
--- a/llvm/utils/TableGen/X86RecognizableInstr.cpp
+++ b/llvm/utils/TableGen/X86RecognizableInstr.cpp
@@ -352,10 +352,13 @@ void RecognizableInstr::adjustOperandEncoding(OperandEncoding &encoding) {
   // The scaling factor for AVX512 compressed displacement encoding is an
   // instruction attribute.  Adjust the ModRM encoding type to include the
   // scale for compressed displacement.
-  if ((encoding != ENCODING_RM && encoding != ENCODING_VSIB) ||CD8_Scale == 0)
+  if ((encoding != ENCODING_RM &&
+       encoding != ENCODING_VSIB &&
+       encoding != ENCODING_SIB) ||CD8_Scale == 0)
     return;
   encoding = (OperandEncoding)(encoding + Log2_32(CD8_Scale));
   assert(((encoding >= ENCODING_RM && encoding <= ENCODING_RM_CD64) ||
+          (encoding == ENCODING_SIB) ||
           (encoding >= ENCODING_VSIB && encoding <= ENCODING_VSIB_CD64)) &&
          "Invalid CDisp scaling");
 }
@@ -519,6 +522,7 @@ void RecognizableInstr::emitInstructionSpecifier() {
     HANDLE_OPTIONAL(immediate)
     break;
   case X86Local::MRMDestMem:
+  case X86Local::MRMDestMemFSIB:
     // Operand 1 is a memory operand (possibly SIB-extended)
     // Operand 2 is a register operand in the Reg/Opcode field.
     // - In AVX, there is a register operand in the VEX.vvvv field here -
@@ -589,6 +593,7 @@ void RecognizableInstr::emitInstructionSpecifier() {
     HANDLE_OPERAND(opcodeModifier)
     break;
   case X86Local::MRMSrcMem:
+  case X86Local::MRMSrcMemFSIB:
     // Operand 1 is a register operand in the Reg/Opcode field.
     // Operand 2 is a memory operand (possibly SIB-extended)
     // - In AVX, there is a register operand in the VEX.vvvv field here -
@@ -641,6 +646,10 @@ void RecognizableInstr::emitInstructionSpecifier() {
     HANDLE_OPERAND(rmRegister)
     HANDLE_OPERAND(opcodeModifier)
     break;
+  case X86Local::MRMr0:
+    // Operand 1 is a register operand in the R/M field.
+    HANDLE_OPERAND(roRegister)
+    break;
   case X86Local::MRMXr:
   case X86Local::MRM0r:
   case X86Local::MRM1r:
@@ -772,7 +781,9 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const {
     filter = std::make_unique<ModFilter>(true);
     break;
   case X86Local::MRMDestMem:
+  case X86Local::MRMDestMemFSIB:
   case X86Local::MRMSrcMem:
+  case X86Local::MRMSrcMemFSIB:
   case X86Local::MRMSrcMem4VOp3:
   case X86Local::MRMSrcMemOp4:
   case X86Local::MRMSrcMemCC:
@@ -792,6 +803,9 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const {
   case X86Local::MRM6X: case X86Local::MRM7X:
     filter = std::make_unique<ExtendedFilter>(true, Form - X86Local::MRM0X);
     break;
+  case X86Local::MRMr0:
+    filter = std::make_unique<ExtendedRMFilter>(true, Form - X86Local::MRMr0);
+    break;
   case X86Local::MRM0m: case X86Local::MRM1m:
   case X86Local::MRM2m: case X86Local::MRM3m:
   case X86Local::MRM4m: case X86Local::MRM5m:
@@ -911,6 +925,7 @@ OperandType RecognizableInstr::typeFromString(const std::string &s,
   TYPE("i64imm",              TYPE_IMM)
   TYPE("anymem",              TYPE_M)
   TYPE("opaquemem",           TYPE_M)
+  TYPE("sibmem",              TYPE_MSIB)
   TYPE("SEGMENT_REG",         TYPE_SEGMENTREG)
   TYPE("DEBUG_REG",           TYPE_DEBUGREG)
   TYPE("CONTROL_REG",         TYPE_CONTROLREG)
@@ -969,6 +984,7 @@ OperandType RecognizableInstr::typeFromString(const std::string &s,
   TYPE("vz256mem",            TYPE_MVSIBZ)
   TYPE("vz512mem",            TYPE_MVSIBZ)
   TYPE("BNDR",                TYPE_BNDR)
+  TYPE("TILE",                TYPE_TMM)
   errs() << "Unhandled type string " << s << "\n";
   llvm_unreachable("Unhandled type string");
 }
@@ -1008,6 +1024,7 @@ RecognizableInstr::immediateEncodingFromString(const std::string &s,
   ENCODING("VR128X",          ENCODING_IB)
   ENCODING("VR256X",          ENCODING_IB)
   ENCODING("VR512",           ENCODING_IB)
+  ENCODING("TILE",            ENCODING_IB)
   errs() << "Unhandled immediate encoding " << s << "\n";
   llvm_unreachable("Unhandled immediate encoding");
 }
@@ -1046,6 +1063,7 @@ RecognizableInstr::rmRegisterEncodingFromString(const std::string &s,
   ENCODING("VK8PAIR",         ENCODING_RM)
   ENCODING("VK16PAIR",        ENCODING_RM)
   ENCODING("BNDR",            ENCODING_RM)
+  ENCODING("TILE",            ENCODING_RM)
   errs() << "Unhandled R/M register encoding " << s << "\n";
   llvm_unreachable("Unhandled R/M register encoding");
 }
@@ -1092,6 +1110,7 @@ RecognizableInstr::roRegisterEncodingFromString(const std::string &s,
   ENCODING("VK32WM",          ENCODING_REG)
   ENCODING("VK64WM",          ENCODING_REG)
   ENCODING("BNDR",            ENCODING_REG)
+  ENCODING("TILE",            ENCODING_REG)
   errs() << "Unhandled reg/opcode register encoding " << s << "\n";
   llvm_unreachable("Unhandled reg/opcode register encoding");
 }
@@ -1123,6 +1142,7 @@ RecognizableInstr::vvvvRegisterEncodingFromString(const std::string &s,
   ENCODING("VK4PAIR",         ENCODING_VVVV)
   ENCODING("VK8PAIR",         ENCODING_VVVV)
   ENCODING("VK16PAIR",        ENCODING_VVVV)
+  ENCODING("TILE",            ENCODING_VVVV)
   errs() << "Unhandled VEX.vvvv register encoding " << s << "\n";
   llvm_unreachable("Unhandled VEX.vvvv register encoding");
 }
@@ -1163,6 +1183,7 @@ RecognizableInstr::memoryEncodingFromString(const std::string &s,
   ENCODING("lea64mem",        ENCODING_RM)
   ENCODING("anymem",          ENCODING_RM)
   ENCODING("opaquemem",       ENCODING_RM)
+  ENCODING("sibmem",          ENCODING_SIB)
   ENCODING("vx64mem",         ENCODING_VSIB)
   ENCODING("vx128mem",        ENCODING_VSIB)
   ENCODING("vx256mem",        ENCODING_VSIB)

diff  --git a/llvm/utils/TableGen/X86RecognizableInstr.h b/llvm/utils/TableGen/X86RecognizableInstr.h
index 2bf6d1a39803..a7b88b4d12ed 100644
--- a/llvm/utils/TableGen/X86RecognizableInstr.h
+++ b/llvm/utils/TableGen/X86RecognizableInstr.h
@@ -103,6 +103,9 @@ namespace X86Local {
     RawFrmImm16   = 8,
     AddCCFrm      = 9,
     PrefixByte    = 10,
+    MRMr0          = 21,
+    MRMSrcMemFSIB  = 22,
+    MRMDestMemFSIB = 23,
     MRMDestMem     = 24,
     MRMSrcMem      = 25,
     MRMSrcMem4VOp3 = 26,


        


More information about the llvm-commits mailing list