[llvm] [LLVM][TableGen] Parameterize NumToSkip in DecoderEmitter (PR #136187)

Thu Apr 17 13:29:18 PDT 2025

https://github.com/jurahul updated https://github.com/llvm/llvm-project/pull/136187

>From 9de5ff58197c1a394080ebb80bd5758f3e055a45 Mon Sep 17 00:00:00 2001
From: Rahul Joshi <rjoshi at nvidia.com>
Date: Wed, 16 Apr 2025 18:27:37 -0700
Subject: [PATCH 1/2] Reapply "Reapply "[LLVM][TableGen] Parameterize NumToSkip
 in DecoderEmitter" (#136017)" (#136068)

This reverts commit 6d8bf3cf3dcc5d85bec7b1e70a59a02cdfdaa1b4.
---
 llvm/lib/Target/AArch64/CMakeLists.txt    |   2 +-
 llvm/test/TableGen/VarLenDecoder.td       |   4 +-
 llvm/test/TableGen/trydecode-emission.td  |  10 +-
 llvm/test/TableGen/trydecode-emission2.td |  16 +--
 llvm/test/TableGen/trydecode-emission3.td |   2 +-
 llvm/test/TableGen/trydecode-emission4.td |   2 +-
 llvm/utils/TableGen/DecoderEmitter.cpp    | 115 ++++++++++++----------
 7 files changed, 83 insertions(+), 68 deletions(-)

diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt
index 2300e479bc110..ba1d1605ec104 100644
--- a/llvm/lib/Target/AArch64/CMakeLists.txt
+++ b/llvm/lib/Target/AArch64/CMakeLists.txt
@@ -7,7 +7,7 @@ tablegen(LLVM AArch64GenAsmWriter.inc -gen-asm-writer)
 tablegen(LLVM AArch64GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1)
 tablegen(LLVM AArch64GenCallingConv.inc -gen-callingconv)
 tablegen(LLVM AArch64GenDAGISel.inc -gen-dag-isel)
-tablegen(LLVM AArch64GenDisassemblerTables.inc -gen-disassembler)
+tablegen(LLVM AArch64GenDisassemblerTables.inc -gen-disassembler --num-to-skip-size=3)
 tablegen(LLVM AArch64GenFastISel.inc -gen-fast-isel)
 tablegen(LLVM AArch64GenGlobalISel.inc -gen-global-isel)
 tablegen(LLVM AArch64GenO0PreLegalizeGICombiner.inc -gen-global-isel-combiner
diff --git a/llvm/test/TableGen/VarLenDecoder.td b/llvm/test/TableGen/VarLenDecoder.td
index 5cf0bf8911859..b77702ff7c5c1 100644
--- a/llvm/test/TableGen/VarLenDecoder.td
+++ b/llvm/test/TableGen/VarLenDecoder.td
@@ -47,9 +47,9 @@ def FOO32 : MyVarInst<MemOp32> {
 }
 
 // CHECK:      MCD::OPC_ExtractField, 3, 5,  // Inst{7-3} ...
-// CHECK-NEXT: MCD::OPC_FilterValue, 8, 4, 0, 0, // Skip to: 12
+// CHECK-NEXT: MCD::OPC_FilterValue, 8, 4, 0, // Skip to: 11
 // CHECK-NEXT: MCD::OPC_Decode, {{[0-9]+}}, {{[0-9]+}}, 0, // Opcode: FOO16
-// CHECK-NEXT: MCD::OPC_FilterValue, 9, 4, 0, 0, // Skip to: 21
+// CHECK-NEXT: MCD::OPC_FilterValue, 9, 4, 0, // Skip to: 19
 // CHECK-NEXT: MCD::OPC_Decode, {{[0-9]+}}, {{[0-9]+}}, 1, // Opcode: FOO32
 // CHECK-NEXT: MCD::OPC_Fail,
 
diff --git a/llvm/test/TableGen/trydecode-emission.td b/llvm/test/TableGen/trydecode-emission.td
index 20d2446eeac7f..2b4239f4fbe65 100644
--- a/llvm/test/TableGen/trydecode-emission.td
+++ b/llvm/test/TableGen/trydecode-emission.td
@@ -34,10 +34,10 @@ def InstB : TestInstruction {
 }
 
 // CHECK:      /* 0 */       MCD::OPC_ExtractField, 4, 4,  // Inst{7-4} ...
-// CHECK-NEXT: /* 3 */       MCD::OPC_FilterValue, 0, 18, 0, 0, // Skip to: 26
-// CHECK-NEXT: /* 8 */       MCD::OPC_CheckField, 2, 2, 0, 7, 0, 0, // Skip to: 22
-// CHECK-NEXT: /* 15 */      MCD::OPC_TryDecode, {{[0-9]+}}, {{[0-9]+}}, 0, 0, 0, 0, // Opcode: InstB, skip to: 22
-// CHECK-NEXT: /* 22 */      MCD::OPC_Decode, {{[0-9]+}}, {{[0-9]+}}, 1, // Opcode: InstA
-// CHECK-NEXT: /* 26 */      MCD::OPC_Fail,
+// CHECK-NEXT: /* 3 */       MCD::OPC_FilterValue, 0, 16, 0, // Skip to: 23
+// CHECK-NEXT: /* 7 */       MCD::OPC_CheckField, 2, 2, 0, 6, 0, // Skip to: 19
+// CHECK-NEXT: /* 13 */      MCD::OPC_TryDecode, {{[0-9]+}}, {{[0-9]+}}, 0, 0, 0, // Opcode: InstB, skip to: 19
+// CHECK-NEXT: /* 19 */      MCD::OPC_Decode, {{[0-9]+}}, {{[0-9]+}}, 1, // Opcode: InstA
+// CHECK-NEXT: /* 23 */      MCD::OPC_Fail,
 
 // CHECK: if (!Check(S, DecodeInstB(MI, insn, Address, Decoder))) { DecodeComplete = false; return MCDisassembler::Fail; }
diff --git a/llvm/test/TableGen/trydecode-emission2.td b/llvm/test/TableGen/trydecode-emission2.td
index 0584034e41233..7d30474058f73 100644
--- a/llvm/test/TableGen/trydecode-emission2.td
+++ b/llvm/test/TableGen/trydecode-emission2.td
@@ -31,14 +31,14 @@ def InstB : TestInstruction {
 }
 
 // CHECK:      /* 0 */       MCD::OPC_ExtractField, 2, 1,  // Inst{2} ...
-// CHECK-NEXT: /* 3 */       MCD::OPC_FilterValue, 0, 36, 0, 0, // Skip to: 44
-// CHECK-NEXT: /* 8 */       MCD::OPC_ExtractField, 5, 3,  // Inst{7-5} ...
-// CHECK-NEXT: /* 11 */      MCD::OPC_FilterValue, 0, 28, 0, 0, // Skip to: 44
-// CHECK-NEXT: /* 16 */      MCD::OPC_CheckField, 0, 2, 3, 7, 0, 0, // Skip to: 30
-// CHECK-NEXT: /* 23 */      MCD::OPC_TryDecode, {{[0-9]+}}, {{[0-9]+}}, 0, 0, 0, 0, // Opcode: InstB, skip to: 30
-// CHECK-NEXT: /* 30 */      MCD::OPC_CheckField, 3, 2, 0, 7, 0, 0, // Skip to: 44
-// CHECK-NEXT: /* 37 */      MCD::OPC_TryDecode, {{[0-9]+}}, {{[0-9]+}}, 1, 0, 0, 0, // Opcode: InstA, skip to: 44
-// CHECK-NEXT: /* 44 */      MCD::OPC_Fail,
+// CHECK-NEXT: /* 3 */       MCD::OPC_FilterValue, 0, 31, 0, // Skip to: 38
+// CHECK-NEXT: /* 7 */       MCD::OPC_ExtractField, 5, 3,  // Inst{7-5} ...
+// CHECK-NEXT: /* 10 */      MCD::OPC_FilterValue, 0, 24, 0, // Skip to: 38
+// CHECK-NEXT: /* 14 */      MCD::OPC_CheckField, 0, 2, 3, 6, 0, // Skip to: 26
+// CHECK-NEXT: /* 20 */      MCD::OPC_TryDecode, {{[0-9]+}}, {{[0-9]+}}, 0, 0, 0, // Opcode: InstB, skip to: 26
+// CHECK-NEXT: /* 26 */      MCD::OPC_CheckField, 3, 2, 0, 6, 0, // Skip to: 38
+// CHECK-NEXT: /* 32 */      MCD::OPC_TryDecode, {{[0-9]+}}, {{[0-9]+}}, 1, 0, 0, // Opcode: InstA, skip to: 38
+// CHECK-NEXT: /* 38 */      MCD::OPC_Fail,
 
 // CHECK: if (!Check(S, DecodeInstB(MI, insn, Address, Decoder))) { DecodeComplete = false; return MCDisassembler::Fail; }
 // CHECK: if (!Check(S, DecodeInstA(MI, insn, Address, Decoder))) { DecodeComplete = false; return MCDisassembler::Fail; }
diff --git a/llvm/test/TableGen/trydecode-emission3.td b/llvm/test/TableGen/trydecode-emission3.td
index 4c5be7e1af229..0abbe62fe337e 100644
--- a/llvm/test/TableGen/trydecode-emission3.td
+++ b/llvm/test/TableGen/trydecode-emission3.td
@@ -1,4 +1,4 @@
-// RUN: llvm-tblgen -gen-disassembler -I %p/../../include %s | FileCheck %s
+ // RUN: llvm-tblgen -gen-disassembler --num-to-skip-size=3 -I %p/../../include %s  | FileCheck %s
 
 include "llvm/Target/Target.td"
 
diff --git a/llvm/test/TableGen/trydecode-emission4.td b/llvm/test/TableGen/trydecode-emission4.td
index 1e51ba5e40768..413e4a0d1275a 100644
--- a/llvm/test/TableGen/trydecode-emission4.td
+++ b/llvm/test/TableGen/trydecode-emission4.td
@@ -1,4 +1,4 @@
-// RUN: llvm-tblgen -gen-disassembler -I %p/../../include %s | FileCheck %s
+// RUN: llvm-tblgen -gen-disassembler --num-to-skip-size=3 -I %p/../../include %s | FileCheck %s
 
 // Test for OPC_ExtractField/OPC_CheckField with start bit > 255.
 // These large start values may arise for architectures with long instruction
diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp
index 9c6015cc24576..eff63c6b45bb3 100644
--- a/llvm/utils/TableGen/DecoderEmitter.cpp
+++ b/llvm/utils/TableGen/DecoderEmitter.cpp
@@ -32,8 +32,10 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/LEB128.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
@@ -76,6 +78,12 @@ static cl::opt<SuppressLevel> DecoderEmitterSuppressDuplicates(
             "significantly reducing Table Duplications")),
     cl::init(SUPPRESSION_DISABLE), cl::cat(DisassemblerEmitterCat));
 
+static cl::opt<uint32_t>
+    NumToSkipSizeInBytes("num-to-skip-size",
+                         cl::desc("number of bytes to use for num-to-skip "
+                                  "entries in the decoder table (2 or 3)"),
+                         cl::init(2), cl::cat(DisassemblerEmitterCat));
+
 STATISTIC(NumEncodings, "Number of encodings considered");
 STATISTIC(NumEncodingsLackingDisasm,
           "Number of encodings without disassembler info");
@@ -130,10 +138,29 @@ struct DecoderTable : public std::vector<uint8_t> {
   // in the table for patching.
   size_t insertNumToSkip() {
     size_t Size = size();
-    insert(end(), 3, 0);
+    insert(end(), NumToSkipSizeInBytes, 0);
     return Size;
   }
+
+  void patchNumToSkip(size_t FixupIdx, uint32_t DestIdx) {
+    // Calculate the distance from the byte following the fixup entry byte
+    // to the destination. The Target is calculated from after the
+    // `NumToSkipSizeInBytes`-byte NumToSkip entry itself, so subtract
+    // `NumToSkipSizeInBytes` from the displacement here to account for that.
+    assert(DestIdx >= FixupIdx + NumToSkipSizeInBytes &&
+           "Expecting a forward jump in the decoding table");
+    uint32_t Delta = DestIdx - FixupIdx - NumToSkipSizeInBytes;
+    if (!isUIntN(8 * NumToSkipSizeInBytes, Delta))
+      PrintFatalError(
+          "disassembler decoding table too large, try --num-to-skip-size=3");
+
+    (*this)[FixupIdx] = static_cast<uint8_t>(Delta);
+    (*this)[FixupIdx + 1] = static_cast<uint8_t>(Delta >> 8);
+    if (NumToSkipSizeInBytes == 3)
+      (*this)[FixupIdx + 2] = static_cast<uint8_t>(Delta >> 16);
+  }
 };
+
 struct DecoderTableInfo {
   DecoderTable Table;
   FixupScopeList FixupStack;
@@ -690,19 +717,8 @@ static void resolveTableFixups(DecoderTable &Table, const FixupList &Fixups,
                                uint32_t DestIdx) {
   // Any NumToSkip fixups in the current scope can resolve to the
   // current location.
-  for (uint32_t FixupIdx : reverse(Fixups)) {
-    // Calculate the distance from the byte following the fixup entry byte
-    // to the destination. The Target is calculated from after the 24-bit
-    // NumToSkip entry itself, so subtract three from the displacement here
-    // to account for that.
-    uint32_t Delta = DestIdx - FixupIdx - 3;
-    // Our NumToSkip entries are 24-bits. Make sure our table isn't too
-    // big.
-    assert(isUInt<24>(Delta));
-    Table[FixupIdx] = (uint8_t)Delta;
-    Table[FixupIdx + 1] = (uint8_t)(Delta >> 8);
-    Table[FixupIdx + 2] = (uint8_t)(Delta >> 16);
-  }
+  for (uint32_t FixupIdx : Fixups)
+    Table.patchNumToSkip(FixupIdx, DestIdx);
 }
 
 // Emit table entries to decode instructions given a segment or segments
@@ -759,15 +775,9 @@ void Filter::emitTableEntry(DecoderTableInfo &TableInfo) const {
     Delegate->emitTableEntries(TableInfo);
 
     // Now that we've emitted the body of the handler, update the NumToSkip
-    // of the filter itself to be able to skip forward when false. Subtract
-    // three as to account for the width of the NumToSkip field itself.
-    if (PrevFilter) {
-      uint32_t NumToSkip = Table.size() - PrevFilter - 3;
-      assert(isUInt<24>(NumToSkip) && "disassembler decoding table too large!");
-      Table[PrevFilter] = (uint8_t)NumToSkip;
-      Table[PrevFilter + 1] = (uint8_t)(NumToSkip >> 8);
-      Table[PrevFilter + 2] = (uint8_t)(NumToSkip >> 16);
-    }
+    // of the filter itself to be able to skip forward when false.
+    if (PrevFilter)
+      Table.patchNumToSkip(PrevFilter, Table.size());
   }
 
   // If there is no fallthrough, then the final filter should get fixed
@@ -814,7 +824,8 @@ void DecoderEmitter::emitTable(formatted_raw_ostream &OS, DecoderTable &Table,
     OS << (unsigned)*I++ << ", ";
   };
 
-  // Emit 24-bit numtoskip value to OS, returning the NumToSkip value.
+  // Emit `NumToSkipSizeInBytes`-byte numtoskip value to OS, returning the
+  // NumToSkip value.
   auto emitNumToSkip = [](DecoderTable::const_iterator &I,
                           formatted_raw_ostream &OS) {
     uint8_t Byte = *I++;
@@ -823,9 +834,11 @@ void DecoderEmitter::emitTable(formatted_raw_ostream &OS, DecoderTable &Table,
     Byte = *I++;
     OS << (unsigned)Byte << ", ";
     NumToSkip |= Byte << 8;
-    Byte = *I++;
-    OS << (unsigned)(Byte) << ", ";
-    NumToSkip |= Byte << 16;
+    if (NumToSkipSizeInBytes == 3) {
+      Byte = *I++;
+      OS << (unsigned)(Byte) << ", ";
+      NumToSkip |= Byte << 16;
+    }
     return NumToSkip;
   };
 
@@ -867,7 +880,7 @@ void DecoderEmitter::emitTable(formatted_raw_ostream &OS, DecoderTable &Table,
       // The filter value is ULEB128 encoded.
       emitULEB128(I, OS);
 
-      // 24-bit numtoskip value.
+      // numtoskip value.
       uint32_t NumToSkip = emitNumToSkip(I, OS);
       OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n";
       break;
@@ -883,7 +896,7 @@ void DecoderEmitter::emitTable(formatted_raw_ostream &OS, DecoderTable &Table,
       // ULEB128 encoded field value.
       emitULEB128(I, OS);
 
-      // 24-bit numtoskip value.
+      // numtoskip value.
       uint32_t NumToSkip = emitNumToSkip(I, OS);
       OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n";
       break;
@@ -893,7 +906,7 @@ void DecoderEmitter::emitTable(formatted_raw_ostream &OS, DecoderTable &Table,
       OS << Indent << "MCD::OPC_CheckPredicate, ";
       emitULEB128(I, OS);
 
-      // 24-bit numtoskip value.
+      // numtoskip value.
       uint32_t NumToSkip = emitNumToSkip(I, OS);
       OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n";
       break;
@@ -925,7 +938,7 @@ void DecoderEmitter::emitTable(formatted_raw_ostream &OS, DecoderTable &Table,
 
       // Fallthrough for OPC_TryDecode.
 
-      // 24-bit numtoskip value.
+      // numtoskip value.
       uint32_t NumToSkip = emitNumToSkip(I, OS);
 
       OS << "// Opcode: " << NumberedEncodings[EncodingID]
@@ -1411,9 +1424,9 @@ void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
     TableInfo.Table.push_back(NumBits);
     TableInfo.Table.insertULEB128(Ilnd.FieldVal);
 
-    // The fixup is always 24-bits, so go ahead and allocate the space
-    // in the table so all our relative position calculations work OK even
-    // before we fully resolve the real value here.
+    // Allocate space in the table for fixup (NumToSkipSizeInBytes) so all
+    // our relative position calculations work OK even before we fully
+    // resolve the real value here.
 
     // Push location for NumToSkip backpatching.
     TableInfo.FixupStack.back().push_back(TableInfo.Table.insertNumToSkip());
@@ -2157,7 +2170,18 @@ insertBits(InsnType &field, uint64_t bits, unsigned startBit, unsigned numBits)
 // decodeInstruction().
 static void emitDecodeInstruction(formatted_raw_ostream &OS,
                                   bool IsVarLenInst) {
+  OS << formatv("\nconstexpr unsigned NumToSkipSizeInBytes = {};\n",
+                NumToSkipSizeInBytes);
+
   OS << R"(
+inline unsigned decodeNumToSkip(const uint8_t *&Ptr) {
+  unsigned NumToSkip = *Ptr++;
+  NumToSkip |= (*Ptr++) << 8;
+  if constexpr (NumToSkipSizeInBytes == 3)
+    NumToSkip |= (*Ptr++) << 16;
+  return NumToSkip;
+}
+
 template <typename InsnType>
 static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
                                       InsnType insn, uint64_t Address,
@@ -2195,10 +2219,7 @@ static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
       // Decode the field value.
       uint64_t Val = decodeULEB128AndIncUnsafe(++Ptr);
       bool Failed = Val != CurFieldValue;
-      // NumToSkip is a plain 24-bit integer.
-      unsigned NumToSkip = *Ptr++;
-      NumToSkip |= (*Ptr++) << 8;
-      NumToSkip |= (*Ptr++) << 16;
+      unsigned NumToSkip = decodeNumToSkip(Ptr);
 
       // Perform the filter operation.
       if (Failed)
@@ -2222,10 +2243,7 @@ static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
       uint64_t ExpectedValue = decodeULEB128(++Ptr, &PtrLen);
       Ptr += PtrLen;
       bool Failed = ExpectedValue != FieldValue;
-      // NumToSkip is a plain 24-bit integer.
-      unsigned NumToSkip = *Ptr++;
-      NumToSkip |= (*Ptr++) << 8;
-      NumToSkip |= (*Ptr++) << 16;
+      unsigned NumToSkip = decodeNumToSkip(Ptr);
 
       // If the actual and expected values don't match, skip.
       if (Failed)
@@ -2240,10 +2258,7 @@ static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
     case MCD::OPC_CheckPredicate: {
       // Decode the Predicate Index value.
       unsigned PIdx = decodeULEB128AndIncUnsafe(++Ptr);
-      // NumToSkip is a plain 24-bit integer.
-      unsigned NumToSkip = *Ptr++;
-      NumToSkip |= (*Ptr++) << 8;
-      NumToSkip |= (*Ptr++) << 16;
+      unsigned NumToSkip = decodeNumToSkip(Ptr);
       // Check the predicate.
       bool Failed = !checkDecoderPredicate(PIdx, Bits);
       if (Failed)
@@ -2278,10 +2293,7 @@ static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
       // Decode the Opcode value.
       unsigned Opc = decodeULEB128AndIncUnsafe(++Ptr);
       unsigned DecodeIdx = decodeULEB128AndIncUnsafe(Ptr);
-      // NumToSkip is a plain 24-bit integer.
-      unsigned NumToSkip = *Ptr++;
-      NumToSkip |= (*Ptr++) << 8;
-      NumToSkip |= (*Ptr++) << 16;
+      unsigned NumToSkip = decodeNumToSkip(Ptr);
 
       // Perform the decode operation.
       MCInst TmpMI;
@@ -2406,6 +2418,9 @@ handleHwModesUnrelatedEncodings(const CodeGenInstruction *Instr,
 
 // Emits disassembler code for instruction decoding.
 void DecoderEmitter::run(raw_ostream &o) {
+  if (NumToSkipSizeInBytes != 2 && NumToSkipSizeInBytes != 3)
+    PrintFatalError("Invalid value for num-to-skip-size, must be 2 or 3");
+
   formatted_raw_ostream OS(o);
   OS << R"(
 #include "llvm/MC/MCInst.h"

>From 83e5b4f7547be90e0e78448d13f7f0e10ced6b1e Mon Sep 17 00:00:00 2001
From: Rahul Joshi <rjoshi at nvidia.com>
Date: Thu, 17 Apr 2025 12:28:49 -0700
Subject: [PATCH 2/2] Fix for incorrect compilation with gcc/expensive checks.

Add wrapper structs to discriminate between decode tables that use 2 vs
3 byte for NumToSkip, and overload the `decodeInstruction` function
based on this type. This ensures that if we have a mix of 2 and 3 byte
coode, it works correctly. Without this, 2 different compilation units
may generate 2 different versions of `decodeInstruction` and can cause
problems.
---
 .../Disassembler/AArch64Disassembler.cpp      |  4 +-
 .../Disassembler/AMDGPUDisassembler.cpp       | 98 ++++++++++++++-----
 .../AMDGPU/Disassembler/AMDGPUDisassembler.h  | 42 +-------
 .../ARM/Disassembler/ARMDisassembler.cpp      | 19 ++--
 .../AVR/Disassembler/AVRDisassembler.cpp      |  3 +-
 .../Disassembler/HexagonDisassembler.cpp      |  2 +-
 .../Disassembler/MSP430Disassembler.cpp       |  5 +-
 .../RISCV/Disassembler/RISCVDisassembler.cpp  |  2 +-
 .../Disassembler/SystemZDisassembler.cpp      |  2 +-
 llvm/utils/TableGen/DecoderEmitter.cpp        | 70 +++++++++----
 10 files changed, 143 insertions(+), 104 deletions(-)

diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index bab0cbe7788e9..263c91840c5c7 100644
--- a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -244,9 +244,9 @@ DecodeStatus AArch64Disassembler::getInstruction(MCInst &MI, uint64_t &Size,
   uint32_t Insn =
       (Bytes[3] << 24) | (Bytes[2] << 16) | (Bytes[1] << 8) | (Bytes[0] << 0);
 
-  const uint8_t *Tables[] = {DecoderTable32, DecoderTableFallback32};
+  const DecoderTable3Bytes Tables[] = {DecoderTable32, DecoderTableFallback32};
 
-  for (const auto *Table : Tables) {
+  for (const auto Table : Tables) {
     DecodeStatus Result =
         decodeInstruction(Table, MI, Insn, Address, this, STI);
 
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 847121f251361..701776b16b4c0 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -487,6 +487,47 @@ static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm,
 //
 //===----------------------------------------------------------------------===//
 
+template <typename InsnType>
+static DecodeStatus tryDecodeInst(DecoderTable2Bytes Table, MCInst &MI,
+                                  InsnType Inst, uint64_t Address,
+                                  raw_ostream &Comments,
+                                  const AMDGPUDisassembler *Asm) {
+  assert(MI.getOpcode() == 0);
+  assert(MI.getNumOperands() == 0);
+  MCInst TmpInst;
+  Asm->setHasLiteral(false);
+  const auto SavedBytes = Asm->getBytes();
+
+  SmallString<64> LocalComments;
+  raw_svector_ostream LocalCommentStream(LocalComments);
+  Asm->CommentStream = &LocalCommentStream;
+
+  DecodeStatus Res = decodeInstruction(Table, TmpInst, Inst, Address, Asm,
+                                       Asm->getSubtargetInfo());
+
+  Asm->CommentStream = nullptr;
+
+  if (Res != MCDisassembler::Fail) {
+    MI = TmpInst;
+    Comments << LocalComments;
+    return MCDisassembler::Success;
+  }
+  Asm->setBytes(SavedBytes);
+  return MCDisassembler::Fail;
+}
+
+template <typename InsnType>
+static DecodeStatus
+tryDecodeInst(DecoderTable2Bytes Table1, DecoderTable2Bytes Table2, MCInst &MI,
+              InsnType Inst, uint64_t Address, raw_ostream &Comments,
+              const AMDGPUDisassembler *Asm) {
+  for (DecoderTable2Bytes T : {Table1, Table2}) {
+    if (DecodeStatus Res = tryDecodeInst(T, MI, Inst, Address, Comments, Asm))
+      return Res;
+  }
+  return MCDisassembler::Fail;
+}
+
 template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
   assert(Bytes.size() >= sizeof(T));
   const auto Res =
@@ -539,16 +580,16 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
 
       if (isGFX11() &&
           tryDecodeInst(DecoderTableGFX1196, DecoderTableGFX11_FAKE1696, MI,
-                        DecW, Address, CS))
+                        DecW, Address, CS, this))
         break;
 
       if (isGFX12() &&
           tryDecodeInst(DecoderTableGFX1296, DecoderTableGFX12_FAKE1696, MI,
-                        DecW, Address, CS))
+                        DecW, Address, CS, this))
         break;
 
       if (isGFX12() &&
-          tryDecodeInst(DecoderTableGFX12W6496, MI, DecW, Address, CS))
+          tryDecodeInst(DecoderTableGFX12W6496, MI, DecW, Address, CS, this))
         break;
 
       // Reinitialize Bytes
@@ -557,7 +598,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
     } else if (Bytes.size() >= 16 &&
                STI.hasFeature(AMDGPU::FeatureGFX950Insts)) {
       DecoderUInt128 DecW = eat16Bytes(Bytes);
-      if (tryDecodeInst(DecoderTableGFX940128, MI, DecW, Address, CS))
+      if (tryDecodeInst(DecoderTableGFX940128, MI, DecW, Address, CS, this))
         break;
 
       // Reinitialize Bytes
@@ -568,58 +609,61 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
       const uint64_t QW = eatBytes<uint64_t>(Bytes);
 
       if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
-          tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address, CS))
+          tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address, CS, this))
         break;
 
       if (STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) &&
-          tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address, CS))
+          tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address, CS,
+                        this))
         break;
 
       if (STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
-          tryDecodeInst(DecoderTableGFX95064, MI, QW, Address, CS))
+          tryDecodeInst(DecoderTableGFX95064, MI, QW, Address, CS, this))
         break;
 
       // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
       // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
       // table first so we print the correct name.
       if (STI.hasFeature(AMDGPU::FeatureFmaMixInsts) &&
-          tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address, CS))
+          tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address, CS, this))
         break;
 
       if (STI.hasFeature(AMDGPU::FeatureGFX940Insts) &&
-          tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS))
+          tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS, this))
         break;
 
       if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
-          tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS))
+          tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS, this))
         break;
 
       if ((isVI() || isGFX9()) &&
-          tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS))
+          tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS, this))
         break;
 
-      if (isGFX9() && tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS))
+      if (isGFX9() &&
+          tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS, this))
         break;
 
-      if (isGFX10() && tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS))
+      if (isGFX10() &&
+          tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS, this))
         break;
 
       if (isGFX12() &&
           tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
-                        Address, CS))
+                        Address, CS, this))
         break;
 
       if (isGFX11() &&
           tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
-                        Address, CS))
+                        Address, CS, this))
         break;
 
       if (isGFX11() &&
-          tryDecodeInst(DecoderTableGFX11W6464, MI, QW, Address, CS))
+          tryDecodeInst(DecoderTableGFX11W6464, MI, QW, Address, CS, this))
         break;
 
       if (isGFX12() &&
-          tryDecodeInst(DecoderTableGFX12W6464, MI, QW, Address, CS))
+          tryDecodeInst(DecoderTableGFX12W6464, MI, QW, Address, CS, this))
         break;
 
       // Reinitialize Bytes
@@ -631,38 +675,40 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
       const uint32_t DW = eatBytes<uint32_t>(Bytes);
 
       if ((isVI() || isGFX9()) &&
-          tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS))
+          tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS, this))
         break;
 
-      if (tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address, CS))
+      if (tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address, CS, this))
         break;
 
-      if (isGFX9() && tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS))
+      if (isGFX9() &&
+          tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS, this))
         break;
 
       if (STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
-          tryDecodeInst(DecoderTableGFX95032, MI, DW, Address, CS))
+          tryDecodeInst(DecoderTableGFX95032, MI, DW, Address, CS, this))
         break;
 
       if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
-          tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address, CS))
+          tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address, CS, this))
         break;
 
       if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
-          tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address, CS))
+          tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address, CS, this))
         break;
 
-      if (isGFX10() && tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS))
+      if (isGFX10() &&
+          tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS, this))
         break;
 
       if (isGFX11() &&
           tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
-                        Address, CS))
+                        Address, CS, this))
         break;
 
       if (isGFX12() &&
           tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW,
-                        Address, CS))
+                        Address, CS, this))
         break;
     }
 
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index 29452166e21a0..4f4bf6b2731c2 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -128,44 +128,6 @@ class AMDGPUDisassembler : public MCDisassembler {
 
   MCOperand errOperand(unsigned V, const Twine& ErrMsg) const;
 
-  template <typename InsnType>
-  DecodeStatus tryDecodeInst(const uint8_t *Table, MCInst &MI, InsnType Inst,
-                             uint64_t Address, raw_ostream &Comments) const {
-    assert(MI.getOpcode() == 0);
-    assert(MI.getNumOperands() == 0);
-    MCInst TmpInst;
-    HasLiteral = false;
-    const auto SavedBytes = Bytes;
-
-    SmallString<64> LocalComments;
-    raw_svector_ostream LocalCommentStream(LocalComments);
-    CommentStream = &LocalCommentStream;
-
-    DecodeStatus Res =
-        decodeInstruction(Table, TmpInst, Inst, Address, this, STI);
-
-    CommentStream = nullptr;
-
-    if (Res != Fail) {
-      MI = TmpInst;
-      Comments << LocalComments;
-      return MCDisassembler::Success;
-    }
-    Bytes = SavedBytes;
-    return MCDisassembler::Fail;
-  }
-
-  template <typename InsnType>
-  DecodeStatus tryDecodeInst(const uint8_t *Table1, const uint8_t *Table2,
-                             MCInst &MI, InsnType Inst, uint64_t Address,
-                             raw_ostream &Comments) const {
-    for (const uint8_t *T : {Table1, Table2}) {
-      if (DecodeStatus Res = tryDecodeInst(T, MI, Inst, Address, Comments))
-        return Res;
-    }
-    return MCDisassembler::Fail;
-  }
-
   Expected<bool> onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
                                ArrayRef<uint8_t> Bytes,
                                uint64_t Address) const override;
@@ -294,6 +256,10 @@ class AMDGPUDisassembler : public MCDisassembler {
   bool hasKernargPreload() const;
 
   bool isMacDPP(MCInst &MI) const;
+
+  void setHasLiteral(bool Val) const { HasLiteral = Val; }
+  void setBytes(ArrayRef<uint8_t> Val) const { Bytes = Val; }
+  ArrayRef<uint8_t> getBytes() const { return Bytes; }
 };
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index ef30b1aafb28b..d9da9f06a64b9 100644
--- a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -809,25 +809,20 @@ DecodeStatus ARMDisassembler::getARMInstruction(MCInst &MI, uint64_t &Size,
     return checkDecodedInstruction(MI, Size, Address, CS, Insn, Result);
   }
 
-  struct DecodeTable {
-    const uint8_t *P;
-    bool DecodePred;
-  };
-
-  const DecodeTable Tables[] = {
+  constexpr std::pair<DecoderTable2Bytes, bool> Tables[] = {
       {DecoderTableVFP32, false},      {DecoderTableVFPV832, false},
       {DecoderTableNEONData32, true},  {DecoderTableNEONLoadStore32, true},
       {DecoderTableNEONDup32, true},   {DecoderTablev8NEON32, false},
       {DecoderTablev8Crypto32, false},
   };
 
-  for (auto Table : Tables) {
-    Result = decodeInstruction(Table.P, MI, Insn, Address, this, STI);
+  for (auto [Table, DecodePred] : Tables) {
+    Result = decodeInstruction(Table, MI, Insn, Address, this, STI);
     if (Result != MCDisassembler::Fail) {
       Size = 4;
       // Add a fake predicate operand, because we share these instruction
       // definitions with Thumb2 where these instructions are predicable.
-      if (Table.DecodePred && !DecodePredicateOperand(MI, 0xE, Address, this))
+      if (DecodePred && !DecodePredicateOperand(MI, 0xE, Address, this))
         return MCDisassembler::Fail;
       return Result;
     }
@@ -1254,9 +1249,9 @@ DecodeStatus ARMDisassembler::getThumbInstruction(MCInst &MI, uint64_t &Size,
   }
 
   uint32_t Coproc = fieldFromInstruction(Insn32, 8, 4);
-  const uint8_t *DecoderTable = ARM::isCDECoproc(Coproc, STI)
-                                    ? DecoderTableThumb2CDE32
-                                    : DecoderTableThumb2CoProc32;
+  const auto DecoderTable = ARM::isCDECoproc(Coproc, STI)
+                                ? DecoderTableThumb2CDE32
+                                : DecoderTableThumb2CoProc32;
   Result =
       decodeInstruction(DecoderTable, MI, Insn32, Address, this, STI);
   if (Result != MCDisassembler::Fail) {
diff --git a/llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp b/llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp
index 70428673fcd8d..5172f47ea0537 100644
--- a/llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp
+++ b/llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp
@@ -458,8 +458,7 @@ static DecodeStatus readInstruction32(ArrayRef<uint8_t> Bytes, uint64_t Address,
   return MCDisassembler::Success;
 }
 
-static const uint8_t *getDecoderTable(uint64_t Size) {
-
+static DecoderTable2Bytes getDecoderTable(uint64_t Size) {
   switch (Size) {
   case 2:
     return DecoderTable16;
diff --git a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
index 98b711f6b014b..ef4a71903f334 100644
--- a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -327,7 +327,7 @@ DecodeStatus HexagonDisassembler::getSingleInstruction(MCInst &MI, MCInst &MCB,
   if ((Instruction & HexagonII::INST_PARSE_MASK) ==
       HexagonII::INST_PARSE_DUPLEX) {
     unsigned duplexIClass;
-    uint8_t const *DecodeLow, *DecodeHigh;
+    DecoderTable2Bytes DecodeLow, DecodeHigh;
     duplexIClass = ((Instruction >> 28) & 0xe) | ((Instruction >> 13) & 0x1);
     switch (duplexIClass) {
     default:
diff --git a/llvm/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp b/llvm/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp
index 519bba763204f..9c41a2d054026 100644
--- a/llvm/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp
+++ b/llvm/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp
@@ -201,7 +201,7 @@ static AddrMode DecodeDstAddrMode(unsigned Insn) {
   return Ad ? amIndexed : amRegister;
 }
 
-static const uint8_t *getDecoderTable(AddrMode SrcAM, unsigned Words) {
+static DecoderTable2Bytes getDecoderTable(AddrMode SrcAM, unsigned Words) {
   assert(0 < Words && Words < 4 && "Incorrect number of words");
   switch (SrcAM) {
   default:
@@ -308,7 +308,8 @@ DecodeStatus MSP430Disassembler::getInstructionII(MCInst &MI, uint64_t &Size,
     break;
   }
 
-  const uint8_t *DecoderTable = Words == 2 ? DecoderTable32 : DecoderTable16;
+  DecoderTable2Bytes DecoderTable =
+      Words == 2 ? DecoderTable32 : DecoderTable16;
   DecodeStatus Result = decodeInstruction(DecoderTable, MI, Insn, Address,
                                           this, STI);
   if (Result != MCDisassembler::Fail) {
diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
index 27809d96b647c..24f6ebd1a6268 100644
--- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
+++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
@@ -675,7 +675,7 @@ void RISCVDisassembler::addSPOperands(MCInst &MI) const {
 namespace {
 
 struct DecoderListEntry {
-  const uint8_t *Table;
+  DecoderTable2Bytes Table;
   FeatureBitset ContainedFeatures;
   const char *Desc;
 
diff --git a/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
index 8f7367b4f2dd8..068232e31799e 100644
--- a/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
+++ b/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
@@ -331,7 +331,7 @@ DecodeStatus SystemZDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
     return MCDisassembler::Fail;
 
   // The top 2 bits of the first byte specify the size.
-  const uint8_t *Table;
+  DecoderTable2Bytes Table;
   if (Bytes[0] < 0x40) {
     Size = 2;
     Table = DecoderTable16;
diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp
index eff63c6b45bb3..ba54af147ed05 100644
--- a/llvm/utils/TableGen/DecoderEmitter.cpp
+++ b/llvm/utils/TableGen/DecoderEmitter.cpp
@@ -811,8 +811,9 @@ void DecoderEmitter::emitTable(formatted_raw_ostream &OS, DecoderTable &Table,
   for (const auto &EI : EncodingIDs)
     OpcodeToEncodingID[EI.Opcode] = EI.EncodingID;
 
-  OS << Indent << "static const uint8_t DecoderTable" << Namespace << BitWidth
-     << "[] = {\n";
+  // First emit the raw table as an array of uint8_t.
+  OS << Indent << "static constexpr uint8_t DecoderTable" << Namespace
+     << BitWidth << "RawData[] = {\n";
 
   Indent += 2;
 
@@ -985,10 +986,14 @@ void DecoderEmitter::emitTable(formatted_raw_ostream &OS, DecoderTable &Table,
     }
   }
   OS << Indent << "0\n";
-
   Indent -= 2;
+  OS << Indent << "};\n";
 
-  OS << Indent << "};\n\n";
+  // Wrap the raw table data into an appropriate DecoderTable2Bytes
+  // or DecoderTable3Bytes struct.
+  OS << Indent << "static constexpr DecoderTable" << NumToSkipSizeInBytes
+     << "Bytes DecoderTable" << Namespace << BitWidth << "{DecoderTable"
+     << Namespace << BitWidth << "RawData};\n\n";
 }
 
 void DecoderEmitter::emitInstrLenTable(formatted_raw_ostream &OS,
@@ -2170,10 +2175,8 @@ insertBits(InsnType &field, uint64_t bits, unsigned startBit, unsigned numBits)
 // decodeInstruction().
 static void emitDecodeInstruction(formatted_raw_ostream &OS,
                                   bool IsVarLenInst) {
-  OS << formatv("\nconstexpr unsigned NumToSkipSizeInBytes = {};\n",
-                NumToSkipSizeInBytes);
-
   OS << R"(
+template<int NumToSkipSizeInBytes>
 inline unsigned decodeNumToSkip(const uint8_t *&Ptr) {
   unsigned NumToSkip = *Ptr++;
   NumToSkip |= (*Ptr++) << 8;
@@ -2183,8 +2186,11 @@ inline unsigned decodeNumToSkip(const uint8_t *&Ptr) {
 }
 
 template <typename InsnType>
-static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
-                                      InsnType insn, uint64_t Address,
+static DecodeStatus decodeInstruction()";
+  OS << formatv("DecoderTable{}Bytes DecodeTable,", NumToSkipSizeInBytes);
+  OS << R"(
+                                      MCInst &MI, InsnType insn,
+                                      uint64_t Address,
                                       const MCDisassembler *DisAsm,
                                       const MCSubtargetInfo &STI)";
   if (IsVarLenInst) {
@@ -2194,11 +2200,12 @@ static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
   OS << R"() {
   const FeatureBitset &Bits = STI.getFeatureBits();
 
-  const uint8_t *Ptr = DecodeTable;
+  const uint8_t *const Data = DecodeTable.Data.data();
+  const uint8_t *Ptr = Data;
   uint64_t CurFieldValue = 0;
   DecodeStatus S = MCDisassembler::Success;
   while (true) {
-    ptrdiff_t Loc = Ptr - DecodeTable;
+    ptrdiff_t Loc = Ptr - Data;
     switch (*Ptr) {
     default:
       errs() << Loc << ": Unexpected decode table opcode!\n";
@@ -2219,14 +2226,16 @@ static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
       // Decode the field value.
       uint64_t Val = decodeULEB128AndIncUnsafe(++Ptr);
       bool Failed = Val != CurFieldValue;
-      unsigned NumToSkip = decodeNumToSkip(Ptr);
+      unsigned NumToSkip = decodeNumToSkip<)";
+  OS << NumToSkipSizeInBytes;
+  OS << R"(>(Ptr);
 
       // Perform the filter operation.
       if (Failed)
         Ptr += NumToSkip;
       LLVM_DEBUG(dbgs() << Loc << ": OPC_FilterValue(" << Val << ", " << NumToSkip
                    << "): " << (Failed ? "FAIL:" : "PASS:")
-                   << " continuing at " << (Ptr - DecodeTable) << "\n");
+                   << " continuing at " << (Ptr - Data) << "\n");
 
       break;
     }
@@ -2243,7 +2252,9 @@ static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
       uint64_t ExpectedValue = decodeULEB128(++Ptr, &PtrLen);
       Ptr += PtrLen;
       bool Failed = ExpectedValue != FieldValue;
-      unsigned NumToSkip = decodeNumToSkip(Ptr);
+      unsigned NumToSkip = decodeNumToSkip<)";
+  OS << NumToSkipSizeInBytes;
+  OS << R"(>(Ptr);
 
       // If the actual and expected values don't match, skip.
       if (Failed)
@@ -2258,7 +2269,9 @@ static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
     case MCD::OPC_CheckPredicate: {
       // Decode the Predicate Index value.
       unsigned PIdx = decodeULEB128AndIncUnsafe(++Ptr);
-      unsigned NumToSkip = decodeNumToSkip(Ptr);
+      unsigned NumToSkip = decodeNumToSkip<)";
+  OS << NumToSkipSizeInBytes;
+  OS << R"(>(Ptr);
       // Check the predicate.
       bool Failed = !checkDecoderPredicate(PIdx, Bits);
       if (Failed)
@@ -2293,7 +2306,9 @@ static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
       // Decode the Opcode value.
       unsigned Opc = decodeULEB128AndIncUnsafe(++Ptr);
       unsigned DecodeIdx = decodeULEB128AndIncUnsafe(Ptr);
-      unsigned NumToSkip = decodeNumToSkip(Ptr);
+      unsigned NumToSkip = decodeNumToSkip<)";
+  OS << NumToSkipSizeInBytes;
+  OS << R"(>(Ptr);
 
       // Perform the decode operation.
       MCInst TmpMI;
@@ -2312,7 +2327,7 @@ static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
         assert(S == MCDisassembler::Fail);
         // If the decoding was incomplete, skip.
         Ptr += NumToSkip;
-        LLVM_DEBUG(dbgs() << "FAIL: continuing at " << (Ptr - DecodeTable) << "\n");
+        LLVM_DEBUG(dbgs() << "FAIL: continuing at " << (Ptr - Data) << "\n");
         // Reset decode status. This also drops a SoftFail status that could be
         // set before the decode attempt.
         S = MCDisassembler::Success;
@@ -2336,8 +2351,7 @@ static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
     }
   }
   llvm_unreachable("bogosity detected in disassembler state machine!");
-}
-
+} // decodeInstruction
 )";
 }
 
@@ -2355,6 +2369,22 @@ static bool Check(DecodeStatus &Out, DecodeStatus In) {
 )";
 }
 
+// Helper to emit declarations of decoder table struct types.
+// These structs carry the raw decoder table data as their payload, but
+// the 2 different types helps discriminate between 2 vs 3 byte NumToSkip
+// encoding and generate an overloaded `decodeInstruction` function.
+static void emitDecoderTableStructTypes(formatted_raw_ostream &OS) {
+  OS << R"(
+struct DecoderTable2Bytes { // Decoder Table with 2 bytes NumToSkip.
+   ArrayRef<uint8_t> Data;
+};
+struct DecoderTable3Bytes { // Decoder Table with 3 bytes NumToSkip.
+   ArrayRef<uint8_t> Data;
+};
+
+)";
+}
+
 // Collect all HwModes referenced by the target for encoding purposes,
 // returning a vector of corresponding names.
 static void collectHwModesReferencedForEncodings(
@@ -2438,6 +2468,7 @@ namespace llvm {
   emitFieldFromInstruction(OS);
   emitInsertBits(OS);
   emitCheck(OS);
+  emitDecoderTableStructTypes(OS);
 
   Target.reverseBitsForLittleEndianEncoding();
 
@@ -2546,6 +2577,7 @@ namespace llvm {
     TableInfo.FixupStack.clear();
     TableInfo.Table.reserve(16384);
     TableInfo.FixupStack.emplace_back();
+
     FC.emitTableEntries(TableInfo);
     // Any NumToSkip fixups in the top level scope can resolve to the
     // OPC_Fail at the end of the table.