[llvm] [NFCI][MC][DecoderEmitter] Fix BitWidth for fixed length inst encodings (PR #154934)

Fri Aug 22 05:16:41 PDT 2025

https://github.com/jurahul created https://github.com/llvm/llvm-project/pull/154934

Change `InstructionEncoding` to use `Size` field to derive the BitWidth for fixed length instruction as opposed to the number of bits in the `Inst` field. For some backends, `Inst` has more bits that `Size`, but `Size` is the true size of the instruction.

Also add validation that `Inst` has atleast `Size * 8` bits and any bits in `Inst` beyond that are either 0 or unset.

>From 713aced479d3cdcf5dc2a7183b36ab4956b64225 Mon Sep 17 00:00:00 2001
From: Rahul Joshi <rjoshi at nvidia.com>
Date: Fri, 22 Aug 2025 04:47:46 -0700
Subject: [PATCH] [NFCI][MC][DecoderEmitter] Fix BitWidth for fixed length
 insts encodings

Change `InstructionEncoding` to use `Size` field to derive the BitWidth
for fixed length instruction as opposed to the number of bits in the
`Inst` field. For some backends, `Inst` has more bits that `Size`, but
`Size` is the true size of the instruction.

Also add validation that `Inst` has atleast `Size * 8` bits and any
bits in `Inst` beyond that are either 0 or unset.
---
 .../FixedLenDecoderEmitter/InvalidEncoding.td | 36 +++++++++++++++++++
 llvm/utils/TableGen/DecoderEmitter.cpp        | 32 ++++++++++++++++-
 2 files changed, 67 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/TableGen/FixedLenDecoderEmitter/InvalidEncoding.td

diff --git a/llvm/test/TableGen/FixedLenDecoderEmitter/InvalidEncoding.td b/llvm/test/TableGen/FixedLenDecoderEmitter/InvalidEncoding.td
new file mode 100644
index 0000000000000..251046648fbe9
--- /dev/null
+++ b/llvm/test/TableGen/FixedLenDecoderEmitter/InvalidEncoding.td
@@ -0,0 +1,36 @@
+// RUN: not llvm-tblgen -gen-disassembler -I %p/../../../include %s -DTEST1 2>&1 | FileCheck %s -DFILE=%s --check-prefix=CHECK-TEST1
+// RUN: not llvm-tblgen -gen-disassembler -I %p/../../../include %s -DTEST2 2>&1 | FileCheck %s -DFILE=%s --check-prefix=CHECK-TEST2
+include "llvm/Target/Target.td"
+
+def archInstrInfo : InstrInfo { }
+
+def arch : Target {
+    let InstructionSet = archInstrInfo;
+}
+
+#ifdef TEST1
+// CHECK-TEST1: [[FILE]]:[[@LINE+1]]:5: error: foo: Size is 16 bits, but Inst bits beyond that are not zero/unset
+def foo : Instruction {
+  let OutOperandList = (outs);
+  let InOperandList = (ins i32imm:$factor);  
+  let Size = 2;
+  field bits<24> Inst;
+  field bits<24> SoftFail = 0;
+  bits<8> factor;
+  let Inst{15...8} = factor{7...0};
+  let Inst{20} = 1;
+}
+#endif
+
+#ifdef TEST2
+// CHECK-TEST2: [[FILE]]:[[@LINE+1]]:5: error: bar: Size is 16 bits, but Inst specifies only 8 bits
+def bar : Instruction {
+  let OutOperandList = (outs);
+  let InOperandList = (ins i32imm:$factor);  
+  let Size = 2;
+  field bits<8> Inst;
+  field bits<8> SoftFail = 0;
+  bits<4> factor;
+  let Inst{4...1} = factor{3...0};
+}
+#endif
diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp
index 740035aecc69f..94e2e9505f0c2 100644
--- a/llvm/utils/TableGen/DecoderEmitter.cpp
+++ b/llvm/utils/TableGen/DecoderEmitter.cpp
@@ -2023,6 +2023,7 @@ InstructionEncoding::InstructionEncoding(const Record *EncodingDef,
     HasCompleteDecoder = EncodingDef->getValueAsBit("hasCompleteDecoder");
 
   const RecordVal *InstField = EncodingDef->getValue("Inst");
+
   if (const auto *DI = dyn_cast<DagInit>(InstField->getValue())) {
     VarLenInst VLI(DI, InstField);
     BitWidth = VLI.size();
@@ -2031,7 +2032,36 @@ InstructionEncoding::InstructionEncoding(const Record *EncodingDef,
       parseVarLenOperands(VLI);
   } else {
     const auto *BI = cast<BitsInit>(InstField->getValue());
-    BitWidth = BI->getNumBits();
+    unsigned InstNumBits = BI->getNumBits();
+
+    // For fixed length instructions, sometimes the `Inst` field specifies more
+    // bits than the actual size of the instruction. In such cases, derive the
+    // BitWidth from the size and do some basic validation.
+    BitWidth = EncodingDef->getValueAsInt("Size") * 8;
+    if (InstNumBits < BitWidth)
+      PrintFatalError(EncodingDef, Twine(Name) + ": Size is " +
+                                       Twine(BitWidth) +
+                                       " bits, but Inst specifies only " +
+                                       Twine(InstNumBits) + " bits");
+
+    if (InstNumBits > BitWidth) {
+      // Ensure that all the bits beyond 'Size' are 0 or unset (i.e., carry no
+      // actual encoding).
+      ArrayRef<const Init *> UpperBits = BI->getBits().drop_front(BitWidth);
+      bool UpperZeroOrUnset = llvm::all_of(UpperBits, [](const Init *Bit) {
+        if (const auto *BI = dyn_cast<BitInit>(Bit))
+          return !BI->getValue();
+        return isa<UnsetInit>(Bit);
+      });
+      if (!UpperZeroOrUnset) {
+        InstField->print(errs());
+        PrintFatalError(
+            EncodingDef,
+            Twine(Name) + ": Size is " + Twine(BitWidth) +
+                " bits, but Inst bits beyond that are not zero/unset");
+      }
+    }
+
     // If the encoding has a custom decoder, don't bother parsing the operands.
     if (DecoderMethod.empty())
       parseFixedLenOperands(*BI);