[llvm] [TableGen] Extend OPC_ExtractField/OPC_CheckField start value widths. (PR #79723)
Jason Eckhardt via llvm-commits
llvm-commits at lists.llvm.org
Sat Jan 27 19:28:27 PST 2024
https://github.com/nvjle created https://github.com/llvm/llvm-project/pull/79723
Both OPC_ExtractField and OPC_CheckField are currently defined to take an unsigned 8-bit start value. On some architectures with long instruction words, this value can silently overflow, resulting in a bad decoder table.
This patch changes each to take a ULE128B-encoded start value instead. Additionally, a range assertion is added for the 8-bit length to prominently notify a user in case that field ever overflows.
This problem isn't currently exposed upstream since all in-tree targets use small instruction words (i.e., bitwidth <= 64 bits). It does show up in at least one downstream target with instructions > 64 bits long.
>From 9c0489e172c7da91520302f2cab220c6c479407b Mon Sep 17 00:00:00 2001
From: Jason Eckhardt <jeckhardt at nvidia.com>
Date: Sat, 27 Jan 2024 20:54:49 -0600
Subject: [PATCH] [TableGen] Extend OPC_ExtractField/OPC_CheckField start value
widths.
Both OPC_ExtractField and OPC_CheckField are currently defined to take
an unsigned 8-bit start value. On some architectures with long instruction
words, this value can silently overflow, resulting in a bad decoder table.
This patch changes each to take a ULE128B-encoded start value instead.
Additionally, a range assertion is added for the 8-bit length to
prominently notify a user in case that field ever overflows.
This problem isn't currently exposed upstream since all in-tree targets
use small instruction words (i.e., bitwidth <= 64 bits). It does show
up in at least one downstream target with instructions > 64 bits long.
---
llvm/include/llvm/MC/MCDecoderOps.h | 4 +-
llvm/test/TableGen/trydecode-emission4.td | 44 +++++++++++++++
llvm/utils/TableGen/DecoderEmitter.cpp | 66 ++++++++++++++++-------
3 files changed, 93 insertions(+), 21 deletions(-)
create mode 100644 llvm/test/TableGen/trydecode-emission4.td
diff --git a/llvm/include/llvm/MC/MCDecoderOps.h b/llvm/include/llvm/MC/MCDecoderOps.h
index c1956993fca212c..3c0b68101e346c7 100644
--- a/llvm/include/llvm/MC/MCDecoderOps.h
+++ b/llvm/include/llvm/MC/MCDecoderOps.h
@@ -15,9 +15,9 @@ namespace llvm {
namespace MCD {
// Disassembler state machine opcodes.
enum DecoderOps {
- OPC_ExtractField = 1, // OPC_ExtractField(uint8_t Start, uint8_t Len)
+ OPC_ExtractField = 1, // OPC_ExtractField(uleb128 Start, uint8_t Len)
OPC_FilterValue, // OPC_FilterValue(uleb128 Val, uint16_t NumToSkip)
- OPC_CheckField, // OPC_CheckField(uint8_t Start, uint8_t Len,
+ OPC_CheckField, // OPC_CheckField(uleb128 Start, uint8_t Len,
// uleb128 Val, uint16_t NumToSkip)
OPC_CheckPredicate, // OPC_CheckPredicate(uleb128 PIdx, uint16_t NumToSkip)
OPC_Decode, // OPC_Decode(uleb128 Opcode, uleb128 DIdx)
diff --git a/llvm/test/TableGen/trydecode-emission4.td b/llvm/test/TableGen/trydecode-emission4.td
new file mode 100644
index 000000000000000..1e51ba5e4076859
--- /dev/null
+++ b/llvm/test/TableGen/trydecode-emission4.td
@@ -0,0 +1,44 @@
+// RUN: llvm-tblgen -gen-disassembler -I %p/../../include %s | FileCheck %s
+
+// Test for OPC_ExtractField/OPC_CheckField with start bit > 255.
+// These large start values may arise for architectures with long instruction
+// words.
+
+include "llvm/Target/Target.td"
+
+def archInstrInfo : InstrInfo { }
+
+def arch : Target {
+ let InstructionSet = archInstrInfo;
+}
+
+class TestInstruction : Instruction {
+ let Size = 64;
+ let OutOperandList = (outs);
+ let InOperandList = (ins);
+ field bits<512> Inst;
+ field bits<512> SoftFail = 0;
+}
+
+def InstA : TestInstruction {
+ let Inst{509-502} = {0,0,0,0,?,?,?,?};
+ let AsmString = "InstA";
+}
+
+def InstB : TestInstruction {
+ let Inst{509-502} = {0,0,0,0,0,0,?,?};
+ let AsmString = "InstB";
+ let DecoderMethod = "DecodeInstB";
+ let hasCompleteDecoder = 0;
+}
+
+
+// CHECK: /* 0 */ MCD::OPC_ExtractField, 250, 3, 4, // Inst{509-506} ...
+// CHECK-NEXT: /* 4 */ MCD::OPC_FilterValue, 0, 19, 0, 0, // Skip to: 28
+// CHECK-NEXT: /* 9 */ MCD::OPC_CheckField, 248, 3, 2, 0, 7, 0, 0, // Skip to: 24
+// CHECK-NEXT: /* 17 */ MCD::OPC_TryDecode, {{[0-9]+}}, {{[0-9]+}}, 0, 0, 0, 0, // Opcode: InstB, skip to: 24
+// CHECK-NEXT: /* 24 */ MCD::OPC_Decode, {{[0-9]+}}, {{[0-9]+}}, 1, // Opcode: InstA
+// CHECK-NEXT: /* 28 */ MCD::OPC_Fail,
+
+// CHECK: if (!Check(S, DecodeInstB(MI, insn, Address, Decoder))) { DecodeComplete = false; return MCDisassembler::Fail; }
+
diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp
index 607f19653c7a0da..525aae02ded9039 100644
--- a/llvm/utils/TableGen/DecoderEmitter.cpp
+++ b/llvm/utils/TableGen/DecoderEmitter.cpp
@@ -676,8 +676,13 @@ static void resolveTableFixups(DecoderTable &Table, const FixupList &Fixups,
// Emit table entries to decode instructions given a segment or segments
// of bits.
void Filter::emitTableEntry(DecoderTableInfo &TableInfo) const {
+ assert((NumBits < (1u << 8)) && "NumBits overflowed uint8 table entry!");
TableInfo.Table.push_back(MCD::OPC_ExtractField);
- TableInfo.Table.push_back(StartBit);
+
+ SmallString<16> SBytes;
+ raw_svector_ostream S(SBytes);
+ encodeULEB128(StartBit, S);
+ TableInfo.Table.insert(TableInfo.Table.end(), SBytes.begin(), SBytes.end());
TableInfo.Table.push_back(NumBits);
// A new filter entry begins a new scope for fixup resolution.
@@ -786,10 +791,20 @@ void DecoderEmitter::emitTable(formatted_raw_ostream &OS, DecoderTable &Table,
PrintFatalError("invalid decode table opcode");
case MCD::OPC_ExtractField: {
++I;
- unsigned Start = *I++;
+ OS.indent(Indentation) << "MCD::OPC_ExtractField, ";
+
+ // ULEB128 encoded start value.
+ uint8_t Buffer[16], *P = Buffer;
+ while ((*P++ = *I++) >= 128)
+ assert((P - Buffer) <= (ptrdiff_t)sizeof(Buffer) &&
+ "ULEB128 value too large!");
+ unsigned Start = decodeULEB128(Buffer);
+ for (P = Buffer; *P >= 128; ++P)
+ OS << (unsigned)*P << ", ";
+ OS << (unsigned)*P << ", ";
+
unsigned Len = *I++;
- OS.indent(Indentation) << "MCD::OPC_ExtractField, " << Start << ", "
- << Len << ", // Inst{";
+ OS << Len << ", // Inst{";
if (Len > 1)
OS << (Start + Len - 1) << "-";
OS << Start << "} ...\n";
@@ -818,10 +833,14 @@ void DecoderEmitter::emitTable(formatted_raw_ostream &OS, DecoderTable &Table,
}
case MCD::OPC_CheckField: {
++I;
- unsigned Start = *I++;
+ OS.indent(Indentation) << "MCD::OPC_CheckField, ";
+ // ULEB128 encoded start value.
+ for (; *I >= 128; ++I)
+ OS << (unsigned)*I << ", ";
+ OS << (unsigned)*I++ << ", ";
+ // 8-bit length.
unsigned Len = *I++;
- OS.indent(Indentation) << "MCD::OPC_CheckField, " << Start << ", "
- << Len << ", ";// << Val << ", " << NumToSkip << ",\n";
+ OS << Len << ", ";
// ULEB128 encoded field value.
for (; *I >= 128; ++I)
OS << (unsigned)*I << ", ";
@@ -1416,15 +1435,19 @@ void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
// Check any additional encoding fields needed.
for (unsigned I = Size; I != 0; --I) {
- unsigned NumBits = EndBits[I-1] - StartBits[I-1] + 1;
+ unsigned NumBits = EndBits[I - 1] - StartBits[I - 1] + 1;
+ assert((NumBits < (1u << 8)) && "NumBits overflowed uint8 table entry!");
TableInfo.Table.push_back(MCD::OPC_CheckField);
- TableInfo.Table.push_back(StartBits[I-1]);
+ uint8_t Buffer[16], *P;
+ encodeULEB128(StartBits[I - 1], Buffer);
+ for (P = Buffer; *P >= 128; ++P)
+ TableInfo.Table.push_back(*P);
+ TableInfo.Table.push_back(*P);
TableInfo.Table.push_back(NumBits);
- uint8_t Buffer[16], *p;
- encodeULEB128(FieldVals[I-1], Buffer);
- for (p = Buffer; *p >= 128 ; ++p)
- TableInfo.Table.push_back(*p);
- TableInfo.Table.push_back(*p);
+ encodeULEB128(FieldVals[I - 1], Buffer);
+ for (P = Buffer; *P >= 128; ++P)
+ TableInfo.Table.push_back(*P);
+ TableInfo.Table.push_back(*P);
// Push location for NumToSkip backpatching.
TableInfo.FixupStack.back().push_back(TableInfo.Table.size());
// The fixup is always 24-bits, so go ahead and allocate the space
@@ -2227,9 +2250,11 @@ static void emitDecodeInstruction(formatted_raw_ostream &OS,
<< " errs() << Loc << \": Unexpected decode table opcode!\\n\";\n"
<< " return MCDisassembler::Fail;\n"
<< " case MCD::OPC_ExtractField: {\n"
- << " unsigned Start = *++Ptr;\n"
- << " unsigned Len = *++Ptr;\n"
- << " ++Ptr;\n";
+ << " // Decode the start value.\n"
+ << " unsigned DecodedLen;\n"
+ << " unsigned Start = decodeULEB128(++Ptr, &DecodedLen);\n"
+ << " Ptr += DecodedLen;\n"
+ << " unsigned Len = *Ptr++;\n";
if (IsVarLenInst)
OS << " makeUp(insn, Start + Len);\n";
OS << " CurFieldValue = fieldFromInstruction(insn, Start, Len);\n"
@@ -2261,8 +2286,11 @@ static void emitDecodeInstruction(formatted_raw_ostream &OS,
<< " break;\n"
<< " }\n"
<< " case MCD::OPC_CheckField: {\n"
- << " unsigned Start = *++Ptr;\n"
- << " unsigned Len = *++Ptr;\n";
+ << " // Decode the start value.\n"
+ << " unsigned Len;\n"
+ << " unsigned Start = decodeULEB128(++Ptr, &Len);\n"
+ << " Ptr += Len;\n"
+ << " Len = *Ptr;\n";
if (IsVarLenInst)
OS << " makeUp(insn, Start + Len);\n";
OS << " uint64_t FieldValue = fieldFromInstruction(insn, Start, Len);\n"
More information about the llvm-commits
mailing list