[llvm] [NFCI][MC][DecoderEmitter] Fix BitWidth for fixed length inst encodings (PR #154934)

Fri Aug 22 07:26:14 PDT 2025

https://github.com/jurahul updated https://github.com/llvm/llvm-project/pull/154934

>From 979a836ae35d99f8c5bd233b730f568d5156afcc Mon Sep 17 00:00:00 2001
From: Rahul Joshi <rjoshi at nvidia.com>
Date: Fri, 22 Aug 2025 04:47:46 -0700
Subject: [PATCH 1/2] [NFCI][MC][DecoderEmitter] Fix BitWidth for fixed length
 insts encodings

Change `InstructionEncoding` to use `Size` field to derive the BitWidth
for fixed length instruction as opposed to the number of bits in the
`Inst` field. For some backends, `Inst` has more bits that `Size`, but
`Size` is the true size of the instruction.

Also add validation that `Inst` has atleast `Size * 8` bits and any
bits in `Inst` beyond that are either 0 or unset.
---
 .../FixedLenDecoderEmitter/InvalidEncoding.td | 37 ++++++++
 llvm/utils/TableGen/DecoderEmitter.cpp        | 95 ++++++++++++++-----
 2 files changed, 109 insertions(+), 23 deletions(-)
 create mode 100644 llvm/test/TableGen/FixedLenDecoderEmitter/InvalidEncoding.td

diff --git a/llvm/test/TableGen/FixedLenDecoderEmitter/InvalidEncoding.td b/llvm/test/TableGen/FixedLenDecoderEmitter/InvalidEncoding.td
new file mode 100644
index 0000000000000..fa9627ec0b10e
--- /dev/null
+++ b/llvm/test/TableGen/FixedLenDecoderEmitter/InvalidEncoding.td
@@ -0,0 +1,37 @@
+// RUN: not llvm-tblgen -gen-disassembler -I %p/../../../include %s -DTEST1 2>&1 | FileCheck %s -DFILE=%s --check-prefix=CHECK-TEST1
+// RUN: not llvm-tblgen -gen-disassembler -I %p/../../../include %s -DTEST2 2>&1 | FileCheck %s -DFILE=%s --check-prefix=CHECK-TEST2
+
+include "llvm/Target/Target.td"
+
+def archInstrInfo : InstrInfo { }
+
+def arch : Target {
+    let InstructionSet = archInstrInfo;
+}
+
+#ifdef TEST1
+// CHECK-TEST1: [[FILE]]:[[@LINE+1]]:5: error: foo: Size is 16 bits, but Inst bits beyond that are not zero/unset
+def foo : Instruction {
+  let OutOperandList = (outs);
+  let InOperandList = (ins i32imm:$factor);  
+  let Size = 2;
+  field bits<24> Inst;
+  field bits<24> SoftFail = 0;
+  bits<8> factor;
+  let Inst{15...8} = factor{7...0};
+  let Inst{20} = 1;
+}
+#endif
+
+#ifdef TEST2
+// CHECK-TEST2: [[FILE]]:[[@LINE+1]]:5: error: bar: Size is 16 bits, but Inst specifies only 8 bits
+def bar : Instruction {
+  let OutOperandList = (outs);
+  let InOperandList = (ins i32imm:$factor);  
+  let Size = 2;
+  field bits<8> Inst;
+  field bits<8> SoftFail = 0;
+  bits<4> factor;
+  let Inst{4...1} = factor{3...0};
+}
+#endif
diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp
index 0d394ed87a4b8..26c2ef817e83c 100644
--- a/llvm/utils/TableGen/DecoderEmitter.cpp
+++ b/llvm/utils/TableGen/DecoderEmitter.cpp
@@ -1787,12 +1787,51 @@ void InstructionEncoding::parseVarLenEncoding(const VarLenInst &VLI) {
   assert(I == VLI.size());
 }
 
-void InstructionEncoding::parseFixedLenEncoding(const BitsInit &Bits) {
-  InstBits = KnownBits(Bits.getNumBits());
-  SoftFailBits = APInt(Bits.getNumBits(), 0);
+void InstructionEncoding::parseFixedLenEncoding(const BitsInit &InstBits) {
+  // For fixed length instructions, sometimes the `Inst` field specifies more
+  // bits than the actual size of the instruction, which is specified in `Size`.
+  // In such cases, we do some basic validation and drop the upper bits.
+  unsigned BitWidth = EncodingDef->getValueAsInt("Size") * 8;
+  unsigned InstNumBits = InstBits.getNumBits();
+
+  // Returns true if all bits in `Bits` are zero or unset.
+  auto CheckAllZeroOrUnset = [&](ArrayRef<const Init *> Bits, const RecordVal *Field) {
+    bool AllZeroOrUnset = llvm::all_of(Bits, [](const Init *Bit) {
+      if (const auto *BI = dyn_cast<BitInit>(Bit))
+        return !BI->getValue();
+      return isa<UnsetInit>(Bit);
+    });
+    if (AllZeroOrUnset)
+      return;
+    PrintNote([Field](raw_ostream &OS) {
+      Field->print(OS);
+    });
+    PrintFatalError(
+        EncodingDef,
+        Twine(Name) + ": Size is " + Twine(BitWidth) +
+            " bits, but " +  Field->getName() + " bits beyond that are not zero/unset");
+  };
+
+  if (InstNumBits < BitWidth)
+    PrintFatalError(EncodingDef, Twine(Name) + ": Size is " +
+                                      Twine(BitWidth) +
+                                      " bits, but Inst specifies only " +
+                                      Twine(InstNumBits) + " bits");
+
+  if (InstNumBits > BitWidth) {
+    // Ensure that all the bits beyond 'Size' are 0 or unset (i.e., carry no
+    // actual encoding).
+    ArrayRef<const Init *> UpperBits = InstBits.getBits().drop_front(BitWidth);
+    const RecordVal *InstField = EncodingDef->getValue("Inst");
+    CheckAllZeroOrUnset(UpperBits, InstField);
+  }
+
+  ArrayRef<const Init *> ActiveInstBits = InstBits.getBits().take_front(BitWidth);
+  InstBits = KnownBits(BitWidth);
+  SoftFailBits = APInt(BitWidth, 0);
 
   // Parse Inst field.
-  for (auto [I, V] : enumerate(Bits.getBits())) {
+  for (auto [I, V] : enumerate(ActiveInstBits)) {
     if (const auto *B = dyn_cast<BitInit>(V)) {
       if (B->getValue())
         InstBits.One.setBit(I);
@@ -1802,26 +1841,36 @@ void InstructionEncoding::parseFixedLenEncoding(const BitsInit &Bits) {
   }
 
   // Parse SoftFail field.
-  if (const RecordVal *SoftFailField = EncodingDef->getValue("SoftFail")) {
-    const auto *SFBits = dyn_cast<BitsInit>(SoftFailField->getValue());
-    if (!SFBits || SFBits->getNumBits() != Bits.getNumBits()) {
-      PrintNote(EncodingDef->getLoc(), "in record");
-      PrintFatalError(SoftFailField,
-                      formatv("SoftFail field, if defined, must be "
-                              "of the same type as Inst, which is bits<{}>",
-                              Bits.getNumBits()));
-    }
-    for (auto [I, V] : enumerate(SFBits->getBits())) {
-      if (const auto *B = dyn_cast<BitInit>(V); B && B->getValue()) {
-        if (!InstBits.Zero[I] && !InstBits.One[I]) {
-          PrintNote(EncodingDef->getLoc(), "in record");
-          PrintError(SoftFailField,
-                     formatv("SoftFail{{{0}} = 1 requires Inst{{{0}} "
-                             "to be fully defined (0 or 1, not '?')",
-                             I));
-        }
-        SoftFailBits.setBit(I);
+  const RecordVal *SoftFailField = EncodingDef->getValue("SoftFail");
+  if (!SoftFailField)
+    return;
+
+  const auto *SFBits = dyn_cast<BitsInit>(SoftFailField->getValue());
+  if (!SFBits || SFBits->getNumBits() != Bits.getNumBits()) {
+    PrintNote(EncodingDef->getLoc(), "in record");
+    PrintFatalError(SoftFailField,
+                    formatv("SoftFail field, if defined, must be "
+                            "of the same type as Inst, which is bits<{}>",
+                            Bits.getNumBits()));
+  }
+
+  if (InstNumBits > BitWidth) {
+    // Ensure that all upper bits of `SoftFail` are 0 or unset.
+    ArrayRef<const Init *> UpperBits = SFBits->getBits().drop_front(BitWidth);
+    CheckAllZeroOrUnset(UpperBits, SoftFailField);
+  }
+
+  ArrayRef<const Init *> ActiveSFBits = SFBits->getBits().take_front(BitWidth);
+  for (auto [I, V] : enumerate(ActiveSFBits)) {
+    if (const auto *B = dyn_cast<BitInit>(V); B && B->getValue()) {
+      if (!InstBits.Zero[I] && !InstBits.One[I]) {
+        PrintNote(EncodingDef->getLoc(), "in record");
+        PrintError(SoftFailField,
+                    formatv("SoftFail{{{0}} = 1 requires Inst{{{0}} "
+                            "to be fully defined (0 or 1, not '?')",
+                            I));
       }
+      SoftFailBits.setBit(I);
     }
   }
 }

>From 3c37ad64a242f0de760c73f74326b3fec118c395 Mon Sep 17 00:00:00 2001
From: Rahul Joshi <rjoshi at nvidia.com>
Date: Fri, 22 Aug 2025 06:47:18 -0700
Subject: [PATCH 2/2] Resolve conflicts

---
 .../FixedLenDecoderEmitter/InvalidEncoding.td | 28 +++++++++---
 llvm/utils/TableGen/DecoderEmitter.cpp        | 44 +++++++++----------
 2 files changed, 44 insertions(+), 28 deletions(-)

diff --git a/llvm/test/TableGen/FixedLenDecoderEmitter/InvalidEncoding.td b/llvm/test/TableGen/FixedLenDecoderEmitter/InvalidEncoding.td
index fa9627ec0b10e..074c7c53e85c4 100644
--- a/llvm/test/TableGen/FixedLenDecoderEmitter/InvalidEncoding.td
+++ b/llvm/test/TableGen/FixedLenDecoderEmitter/InvalidEncoding.td
@@ -1,5 +1,6 @@
-// RUN: not llvm-tblgen -gen-disassembler -I %p/../../../include %s -DTEST1 2>&1 | FileCheck %s -DFILE=%s --check-prefix=CHECK-TEST1
-// RUN: not llvm-tblgen -gen-disassembler -I %p/../../../include %s -DTEST2 2>&1 | FileCheck %s -DFILE=%s --check-prefix=CHECK-TEST2
+// RUN: not llvm-tblgen -gen-disassembler -I %p/../../../include %s -DTEST1 2>&1 | FileCheck %s --check-prefix=CHECK-TEST1
+// RUN: not llvm-tblgen -gen-disassembler -I %p/../../../include %s -DTEST2 2>&1 | FileCheck %s --check-prefix=CHECK-TEST2
+// RUN: not llvm-tblgen -gen-disassembler -I %p/../../../include %s -DTEST3 2>&1 | FileCheck %s --check-prefix=CHECK-TEST3
 
 include "llvm/Target/Target.td"
 
@@ -10,10 +11,10 @@ def arch : Target {
 }
 
 #ifdef TEST1
-// CHECK-TEST1: [[FILE]]:[[@LINE+1]]:5: error: foo: Size is 16 bits, but Inst bits beyond that are not zero/unset
+// CHECK-TEST1: [[#@LINE+1]]:5: error: foo: Size is 16 bits, but Inst bits beyond that are not zero/unset
 def foo : Instruction {
   let OutOperandList = (outs);
-  let InOperandList = (ins i32imm:$factor);  
+  let InOperandList = (ins i32imm:$factor);
   let Size = 2;
   field bits<24> Inst;
   field bits<24> SoftFail = 0;
@@ -24,10 +25,25 @@ def foo : Instruction {
 #endif
 
 #ifdef TEST2
-// CHECK-TEST2: [[FILE]]:[[@LINE+1]]:5: error: bar: Size is 16 bits, but Inst specifies only 8 bits
+// CHECK-TEST2: [[#@LINE+1]]:5: error: foo: Size is 16 bits, but SoftFail bits beyond that are not zero/unset
+def foo : Instruction {
+  let OutOperandList = (outs);
+  let InOperandList = (ins i32imm:$factor);
+  let Size = 2;
+  field bits<24> Inst;
+  field bits<24> SoftFail = 0;
+  bits<8> factor;
+  let Inst{15...8} = factor{7...0};
+  let Inst{23...16} = 0;
+  let SoftFail{20} = 1;
+}
+#endif
+
+#ifdef TEST3
+// CHECK-TEST3: [[#@LINE+1]]:5: error: bar: Size is 16 bits, but Inst specifies only 8 bits
 def bar : Instruction {
   let OutOperandList = (outs);
-  let InOperandList = (ins i32imm:$factor);  
+  let InOperandList = (ins i32imm:$factor);
   let Size = 2;
   field bits<8> Inst;
   field bits<8> SoftFail = 0;
diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp
index 26c2ef817e83c..02eb1410813bf 100644
--- a/llvm/utils/TableGen/DecoderEmitter.cpp
+++ b/llvm/utils/TableGen/DecoderEmitter.cpp
@@ -212,7 +212,7 @@ class InstructionEncoding {
 
 private:
   void parseVarLenEncoding(const VarLenInst &VLI);
-  void parseFixedLenEncoding(const BitsInit &Bits);
+  void parseFixedLenEncoding(const BitsInit &RecordInstBits);
 
   void parseVarLenOperands(const VarLenInst &VLI);
   void parseFixedLenOperands(const BitsInit &Bits);
@@ -1787,15 +1787,17 @@ void InstructionEncoding::parseVarLenEncoding(const VarLenInst &VLI) {
   assert(I == VLI.size());
 }
 
-void InstructionEncoding::parseFixedLenEncoding(const BitsInit &InstBits) {
+void InstructionEncoding::parseFixedLenEncoding(
+    const BitsInit &RecordInstBits) {
   // For fixed length instructions, sometimes the `Inst` field specifies more
   // bits than the actual size of the instruction, which is specified in `Size`.
   // In such cases, we do some basic validation and drop the upper bits.
   unsigned BitWidth = EncodingDef->getValueAsInt("Size") * 8;
-  unsigned InstNumBits = InstBits.getNumBits();
+  unsigned InstNumBits = RecordInstBits.getNumBits();
 
   // Returns true if all bits in `Bits` are zero or unset.
-  auto CheckAllZeroOrUnset = [&](ArrayRef<const Init *> Bits, const RecordVal *Field) {
+  auto CheckAllZeroOrUnset = [&](ArrayRef<const Init *> Bits,
+                                 const RecordVal *Field) {
     bool AllZeroOrUnset = llvm::all_of(Bits, [](const Init *Bit) {
       if (const auto *BI = dyn_cast<BitInit>(Bit))
         return !BI->getValue();
@@ -1803,30 +1805,28 @@ void InstructionEncoding::parseFixedLenEncoding(const BitsInit &InstBits) {
     });
     if (AllZeroOrUnset)
       return;
-    PrintNote([Field](raw_ostream &OS) {
-      Field->print(OS);
-    });
-    PrintFatalError(
-        EncodingDef,
-        Twine(Name) + ": Size is " + Twine(BitWidth) +
-            " bits, but " +  Field->getName() + " bits beyond that are not zero/unset");
+    PrintNote([Field](raw_ostream &OS) { Field->print(OS); });
+    PrintFatalError(EncodingDef, Twine(Name) + ": Size is " + Twine(BitWidth) +
+                                     " bits, but " + Field->getName() +
+                                     " bits beyond that are    not zero/unset");
   };
 
   if (InstNumBits < BitWidth)
-    PrintFatalError(EncodingDef, Twine(Name) + ": Size is " +
-                                      Twine(BitWidth) +
-                                      " bits, but Inst specifies only " +
-                                      Twine(InstNumBits) + " bits");
+    PrintFatalError(EncodingDef, Twine(Name) + ": Size is " + Twine(BitWidth) +
+                                     " bits, but Inst specifies only " +
+                                     Twine(InstNumBits) + " bits");
 
   if (InstNumBits > BitWidth) {
     // Ensure that all the bits beyond 'Size' are 0 or unset (i.e., carry no
     // actual encoding).
-    ArrayRef<const Init *> UpperBits = InstBits.getBits().drop_front(BitWidth);
+    ArrayRef<const Init *> UpperBits =
+        RecordInstBits.getBits().drop_front(BitWidth);
     const RecordVal *InstField = EncodingDef->getValue("Inst");
     CheckAllZeroOrUnset(UpperBits, InstField);
   }
 
-  ArrayRef<const Init *> ActiveInstBits = InstBits.getBits().take_front(BitWidth);
+  ArrayRef<const Init *> ActiveInstBits =
+      RecordInstBits.getBits().take_front(BitWidth);
   InstBits = KnownBits(BitWidth);
   SoftFailBits = APInt(BitWidth, 0);
 
@@ -1846,12 +1846,12 @@ void InstructionEncoding::parseFixedLenEncoding(const BitsInit &InstBits) {
     return;
 
   const auto *SFBits = dyn_cast<BitsInit>(SoftFailField->getValue());
-  if (!SFBits || SFBits->getNumBits() != Bits.getNumBits()) {
+  if (!SFBits || SFBits->getNumBits() != InstNumBits) {
     PrintNote(EncodingDef->getLoc(), "in record");
     PrintFatalError(SoftFailField,
                     formatv("SoftFail field, if defined, must be "
                             "of the same type as Inst, which is bits<{}>",
-                            Bits.getNumBits()));
+                            InstNumBits));
   }
 
   if (InstNumBits > BitWidth) {
@@ -1866,9 +1866,9 @@ void InstructionEncoding::parseFixedLenEncoding(const BitsInit &InstBits) {
       if (!InstBits.Zero[I] && !InstBits.One[I]) {
         PrintNote(EncodingDef->getLoc(), "in record");
         PrintError(SoftFailField,
-                    formatv("SoftFail{{{0}} = 1 requires Inst{{{0}} "
-                            "to be fully defined (0 or 1, not '?')",
-                            I));
+                   formatv("SoftFail{{{0}} = 1 requires Inst{{{0}} "
+                           "to be fully defined (0 or 1, not '?')",
+                           I));
       }
       SoftFailBits.setBit(I);
     }