[llvm] [TableGen] Extend OPC_ExtractField/OPC_CheckField start value widths. (PR #79723)

Sat Jan 27 19:28:27 PST 2024

https://github.com/nvjle created https://github.com/llvm/llvm-project/pull/79723

Both OPC_ExtractField and OPC_CheckField are currently defined to take an unsigned 8-bit start value. On some architectures with long instruction words, this value can silently overflow, resulting in a bad decoder table.

This patch changes each to take a ULE128B-encoded start value instead. Additionally, a range assertion is added for the 8-bit length to prominently notify a user in case that field ever overflows.

This problem isn't currently exposed upstream since all in-tree targets use small instruction words (i.e., bitwidth <= 64 bits). It does show up in at least one downstream target with instructions > 64 bits long.

>From 9c0489e172c7da91520302f2cab220c6c479407b Mon Sep 17 00:00:00 2001
From: Jason Eckhardt <jeckhardt at nvidia.com>
Date: Sat, 27 Jan 2024 20:54:49 -0600
Subject: [PATCH] [TableGen] Extend OPC_ExtractField/OPC_CheckField start value
 widths.

Both OPC_ExtractField and OPC_CheckField are currently defined to take
an unsigned 8-bit start value. On some architectures with long instruction
words, this value can silently overflow, resulting in a bad decoder table.

This patch changes each to take a ULE128B-encoded start value instead.
Additionally, a range assertion is added for the 8-bit length to
prominently notify a user in case that field ever overflows.

This problem isn't currently exposed upstream since all in-tree targets
use small instruction words (i.e., bitwidth <= 64 bits). It does show
up in at least one downstream target with instructions > 64 bits long.
---
 llvm/include/llvm/MC/MCDecoderOps.h       |  4 +-
 llvm/test/TableGen/trydecode-emission4.td | 44 +++++++++++++++
 llvm/utils/TableGen/DecoderEmitter.cpp    | 66 ++++++++++++++++-------
 3 files changed, 93 insertions(+), 21 deletions(-)
 create mode 100644 llvm/test/TableGen/trydecode-emission4.td

diff --git a/llvm/include/llvm/MC/MCDecoderOps.h b/llvm/include/llvm/MC/MCDecoderOps.h
index c1956993fca212c..3c0b68101e346c7 100644
--- a/llvm/include/llvm/MC/MCDecoderOps.h
+++ b/llvm/include/llvm/MC/MCDecoderOps.h
@@ -15,9 +15,9 @@ namespace llvm {
 namespace MCD {
 // Disassembler state machine opcodes.
 enum DecoderOps {
-  OPC_ExtractField = 1, // OPC_ExtractField(uint8_t Start, uint8_t Len)
+  OPC_ExtractField = 1, // OPC_ExtractField(uleb128 Start, uint8_t Len)
   OPC_FilterValue,      // OPC_FilterValue(uleb128 Val, uint16_t NumToSkip)
-  OPC_CheckField,       // OPC_CheckField(uint8_t Start, uint8_t Len,
+  OPC_CheckField,       // OPC_CheckField(uleb128 Start, uint8_t Len,
                         //                uleb128 Val, uint16_t NumToSkip)
   OPC_CheckPredicate,   // OPC_CheckPredicate(uleb128 PIdx, uint16_t NumToSkip)
   OPC_Decode,           // OPC_Decode(uleb128 Opcode, uleb128 DIdx)
diff --git a/llvm/test/TableGen/trydecode-emission4.td b/llvm/test/TableGen/trydecode-emission4.td
new file mode 100644
index 000000000000000..1e51ba5e4076859
--- /dev/null
+++ b/llvm/test/TableGen/trydecode-emission4.td
@@ -0,0 +1,44 @@
+// RUN: llvm-tblgen -gen-disassembler -I %p/../../include %s | FileCheck %s
+
+// Test for OPC_ExtractField/OPC_CheckField with start bit > 255.
+// These large start values may arise for architectures with long instruction
+// words.
+
+include "llvm/Target/Target.td"
+
+def archInstrInfo : InstrInfo { }
+
+def arch : Target {
+  let InstructionSet = archInstrInfo;
+}
+
+class TestInstruction : Instruction {
+  let Size = 64;
+  let OutOperandList = (outs);
+  let InOperandList = (ins);
+  field bits<512> Inst;
+  field bits<512> SoftFail = 0;
+}
+
+def InstA : TestInstruction {
+  let Inst{509-502} = {0,0,0,0,?,?,?,?};
+  let AsmString = "InstA";
+}
+
+def InstB : TestInstruction {
+  let Inst{509-502} = {0,0,0,0,0,0,?,?};
+  let AsmString = "InstB";
+  let DecoderMethod = "DecodeInstB";
+  let hasCompleteDecoder = 0;
+}
+
+
+// CHECK:      /* 0 */       MCD::OPC_ExtractField, 250, 3, 4,  // Inst{509-506} ...
+// CHECK-NEXT: /* 4 */       MCD::OPC_FilterValue, 0, 19, 0, 0, // Skip to: 28
+// CHECK-NEXT: /* 9 */       MCD::OPC_CheckField, 248, 3, 2, 0, 7, 0, 0, // Skip to: 24
+// CHECK-NEXT: /* 17 */      MCD::OPC_TryDecode, {{[0-9]+}}, {{[0-9]+}}, 0, 0, 0, 0, // Opcode: InstB, skip to: 24
+// CHECK-NEXT: /* 24 */      MCD::OPC_Decode, {{[0-9]+}}, {{[0-9]+}}, 1, // Opcode: InstA
+// CHECK-NEXT: /* 28 */      MCD::OPC_Fail,
+
+// CHECK: if (!Check(S, DecodeInstB(MI, insn, Address, Decoder))) { DecodeComplete = false; return MCDisassembler::Fail; }
+
diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp
index 607f19653c7a0da..525aae02ded9039 100644
--- a/llvm/utils/TableGen/DecoderEmitter.cpp
+++ b/llvm/utils/TableGen/DecoderEmitter.cpp
@@ -676,8 +676,13 @@ static void resolveTableFixups(DecoderTable &Table, const FixupList &Fixups,
 // Emit table entries to decode instructions given a segment or segments
 // of bits.
 void Filter::emitTableEntry(DecoderTableInfo &TableInfo) const {
+  assert((NumBits < (1u << 8)) && "NumBits overflowed uint8 table entry!");
   TableInfo.Table.push_back(MCD::OPC_ExtractField);
-  TableInfo.Table.push_back(StartBit);
+
+  SmallString<16> SBytes;
+  raw_svector_ostream S(SBytes);
+  encodeULEB128(StartBit, S);
+  TableInfo.Table.insert(TableInfo.Table.end(), SBytes.begin(), SBytes.end());
   TableInfo.Table.push_back(NumBits);
 
   // A new filter entry begins a new scope for fixup resolution.
@@ -786,10 +791,20 @@ void DecoderEmitter::emitTable(formatted_raw_ostream &OS, DecoderTable &Table,
       PrintFatalError("invalid decode table opcode");
     case MCD::OPC_ExtractField: {
       ++I;
-      unsigned Start = *I++;
+      OS.indent(Indentation) << "MCD::OPC_ExtractField, ";
+
+      // ULEB128 encoded start value.
+      uint8_t Buffer[16], *P = Buffer;
+      while ((*P++ = *I++) >= 128)
+        assert((P - Buffer) <= (ptrdiff_t)sizeof(Buffer) &&
+               "ULEB128 value too large!");
+      unsigned Start = decodeULEB128(Buffer);
+      for (P = Buffer; *P >= 128; ++P)
+        OS << (unsigned)*P << ", ";
+      OS << (unsigned)*P << ", ";
+
       unsigned Len = *I++;
-      OS.indent(Indentation) << "MCD::OPC_ExtractField, " << Start << ", "
-        << Len << ",  // Inst{";
+      OS << Len << ",  // Inst{";
       if (Len > 1)
         OS << (Start + Len - 1) << "-";
       OS << Start << "} ...\n";
@@ -818,10 +833,14 @@ void DecoderEmitter::emitTable(formatted_raw_ostream &OS, DecoderTable &Table,
     }
     case MCD::OPC_CheckField: {
       ++I;
-      unsigned Start = *I++;
+      OS.indent(Indentation) << "MCD::OPC_CheckField, ";
+      // ULEB128 encoded start value.
+      for (; *I >= 128; ++I)
+        OS << (unsigned)*I << ", ";
+      OS << (unsigned)*I++ << ", ";
+      // 8-bit length.
       unsigned Len = *I++;
-      OS.indent(Indentation) << "MCD::OPC_CheckField, " << Start << ", "
-        << Len << ", ";// << Val << ", " << NumToSkip << ",\n";
+      OS << Len << ", ";
       // ULEB128 encoded field value.
       for (; *I >= 128; ++I)
         OS << (unsigned)*I << ", ";
@@ -1416,15 +1435,19 @@ void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
 
   // Check any additional encoding fields needed.
   for (unsigned I = Size; I != 0; --I) {
-    unsigned NumBits = EndBits[I-1] - StartBits[I-1] + 1;
+    unsigned NumBits = EndBits[I - 1] - StartBits[I - 1] + 1;
+    assert((NumBits < (1u << 8)) && "NumBits overflowed uint8 table entry!");
     TableInfo.Table.push_back(MCD::OPC_CheckField);
-    TableInfo.Table.push_back(StartBits[I-1]);
+    uint8_t Buffer[16], *P;
+    encodeULEB128(StartBits[I - 1], Buffer);
+    for (P = Buffer; *P >= 128; ++P)
+      TableInfo.Table.push_back(*P);
+    TableInfo.Table.push_back(*P);
     TableInfo.Table.push_back(NumBits);
-    uint8_t Buffer[16], *p;
-    encodeULEB128(FieldVals[I-1], Buffer);
-    for (p = Buffer; *p >= 128 ; ++p)
-      TableInfo.Table.push_back(*p);
-    TableInfo.Table.push_back(*p);
+    encodeULEB128(FieldVals[I - 1], Buffer);
+    for (P = Buffer; *P >= 128; ++P)
+      TableInfo.Table.push_back(*P);
+    TableInfo.Table.push_back(*P);
     // Push location for NumToSkip backpatching.
     TableInfo.FixupStack.back().push_back(TableInfo.Table.size());
     // The fixup is always 24-bits, so go ahead and allocate the space
@@ -2227,9 +2250,11 @@ static void emitDecodeInstruction(formatted_raw_ostream &OS,
      << "      errs() << Loc << \": Unexpected decode table opcode!\\n\";\n"
      << "      return MCDisassembler::Fail;\n"
      << "    case MCD::OPC_ExtractField: {\n"
-     << "      unsigned Start = *++Ptr;\n"
-     << "      unsigned Len = *++Ptr;\n"
-     << "      ++Ptr;\n";
+     << "      // Decode the start value.\n"
+     << "      unsigned DecodedLen;\n"
+     << "      unsigned Start = decodeULEB128(++Ptr, &DecodedLen);\n"
+     << "      Ptr += DecodedLen;\n"
+     << "      unsigned Len = *Ptr++;\n";
   if (IsVarLenInst)
     OS << "      makeUp(insn, Start + Len);\n";
   OS << "      CurFieldValue = fieldFromInstruction(insn, Start, Len);\n"
@@ -2261,8 +2286,11 @@ static void emitDecodeInstruction(formatted_raw_ostream &OS,
      << "      break;\n"
      << "    }\n"
      << "    case MCD::OPC_CheckField: {\n"
-     << "      unsigned Start = *++Ptr;\n"
-     << "      unsigned Len = *++Ptr;\n";
+     << "      // Decode the start value.\n"
+     << "      unsigned Len;\n"
+     << "      unsigned Start = decodeULEB128(++Ptr, &Len);\n"
+     << "      Ptr += Len;\n"
+     << "      Len = *Ptr;\n";
   if (IsVarLenInst)
     OS << "      makeUp(insn, Start + Len);\n";
   OS << "      uint64_t FieldValue = fieldFromInstruction(insn, Start, Len);\n"