[llvm] [NFCI][MC][DecoderEmitter] Fix BitWidth for fixed length inst encodings (PR #154934)
Rahul Joshi via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 22 05:16:41 PDT 2025
https://github.com/jurahul created https://github.com/llvm/llvm-project/pull/154934
Change `InstructionEncoding` to use `Size` field to derive the BitWidth for fixed length instruction as opposed to the number of bits in the `Inst` field. For some backends, `Inst` has more bits that `Size`, but `Size` is the true size of the instruction.
Also add validation that `Inst` has atleast `Size * 8` bits and any bits in `Inst` beyond that are either 0 or unset.
>From 713aced479d3cdcf5dc2a7183b36ab4956b64225 Mon Sep 17 00:00:00 2001
From: Rahul Joshi <rjoshi at nvidia.com>
Date: Fri, 22 Aug 2025 04:47:46 -0700
Subject: [PATCH] [NFCI][MC][DecoderEmitter] Fix BitWidth for fixed length
insts encodings
Change `InstructionEncoding` to use `Size` field to derive the BitWidth
for fixed length instruction as opposed to the number of bits in the
`Inst` field. For some backends, `Inst` has more bits that `Size`, but
`Size` is the true size of the instruction.
Also add validation that `Inst` has atleast `Size * 8` bits and any
bits in `Inst` beyond that are either 0 or unset.
---
.../FixedLenDecoderEmitter/InvalidEncoding.td | 36 +++++++++++++++++++
llvm/utils/TableGen/DecoderEmitter.cpp | 32 ++++++++++++++++-
2 files changed, 67 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/TableGen/FixedLenDecoderEmitter/InvalidEncoding.td
diff --git a/llvm/test/TableGen/FixedLenDecoderEmitter/InvalidEncoding.td b/llvm/test/TableGen/FixedLenDecoderEmitter/InvalidEncoding.td
new file mode 100644
index 0000000000000..251046648fbe9
--- /dev/null
+++ b/llvm/test/TableGen/FixedLenDecoderEmitter/InvalidEncoding.td
@@ -0,0 +1,36 @@
+// RUN: not llvm-tblgen -gen-disassembler -I %p/../../../include %s -DTEST1 2>&1 | FileCheck %s -DFILE=%s --check-prefix=CHECK-TEST1
+// RUN: not llvm-tblgen -gen-disassembler -I %p/../../../include %s -DTEST2 2>&1 | FileCheck %s -DFILE=%s --check-prefix=CHECK-TEST2
+include "llvm/Target/Target.td"
+
+def archInstrInfo : InstrInfo { }
+
+def arch : Target {
+ let InstructionSet = archInstrInfo;
+}
+
+#ifdef TEST1
+// CHECK-TEST1: [[FILE]]:[[@LINE+1]]:5: error: foo: Size is 16 bits, but Inst bits beyond that are not zero/unset
+def foo : Instruction {
+ let OutOperandList = (outs);
+ let InOperandList = (ins i32imm:$factor);
+ let Size = 2;
+ field bits<24> Inst;
+ field bits<24> SoftFail = 0;
+ bits<8> factor;
+ let Inst{15...8} = factor{7...0};
+ let Inst{20} = 1;
+}
+#endif
+
+#ifdef TEST2
+// CHECK-TEST2: [[FILE]]:[[@LINE+1]]:5: error: bar: Size is 16 bits, but Inst specifies only 8 bits
+def bar : Instruction {
+ let OutOperandList = (outs);
+ let InOperandList = (ins i32imm:$factor);
+ let Size = 2;
+ field bits<8> Inst;
+ field bits<8> SoftFail = 0;
+ bits<4> factor;
+ let Inst{4...1} = factor{3...0};
+}
+#endif
diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp
index 740035aecc69f..94e2e9505f0c2 100644
--- a/llvm/utils/TableGen/DecoderEmitter.cpp
+++ b/llvm/utils/TableGen/DecoderEmitter.cpp
@@ -2023,6 +2023,7 @@ InstructionEncoding::InstructionEncoding(const Record *EncodingDef,
HasCompleteDecoder = EncodingDef->getValueAsBit("hasCompleteDecoder");
const RecordVal *InstField = EncodingDef->getValue("Inst");
+
if (const auto *DI = dyn_cast<DagInit>(InstField->getValue())) {
VarLenInst VLI(DI, InstField);
BitWidth = VLI.size();
@@ -2031,7 +2032,36 @@ InstructionEncoding::InstructionEncoding(const Record *EncodingDef,
parseVarLenOperands(VLI);
} else {
const auto *BI = cast<BitsInit>(InstField->getValue());
- BitWidth = BI->getNumBits();
+ unsigned InstNumBits = BI->getNumBits();
+
+ // For fixed length instructions, sometimes the `Inst` field specifies more
+ // bits than the actual size of the instruction. In such cases, derive the
+ // BitWidth from the size and do some basic validation.
+ BitWidth = EncodingDef->getValueAsInt("Size") * 8;
+ if (InstNumBits < BitWidth)
+ PrintFatalError(EncodingDef, Twine(Name) + ": Size is " +
+ Twine(BitWidth) +
+ " bits, but Inst specifies only " +
+ Twine(InstNumBits) + " bits");
+
+ if (InstNumBits > BitWidth) {
+ // Ensure that all the bits beyond 'Size' are 0 or unset (i.e., carry no
+ // actual encoding).
+ ArrayRef<const Init *> UpperBits = BI->getBits().drop_front(BitWidth);
+ bool UpperZeroOrUnset = llvm::all_of(UpperBits, [](const Init *Bit) {
+ if (const auto *BI = dyn_cast<BitInit>(Bit))
+ return !BI->getValue();
+ return isa<UnsetInit>(Bit);
+ });
+ if (!UpperZeroOrUnset) {
+ InstField->print(errs());
+ PrintFatalError(
+ EncodingDef,
+ Twine(Name) + ": Size is " + Twine(BitWidth) +
+ " bits, but Inst bits beyond that are not zero/unset");
+ }
+ }
+
// If the encoding has a custom decoder, don't bother parsing the operands.
if (DecoderMethod.empty())
parseFixedLenOperands(*BI);
More information about the llvm-commits
mailing list