[llvm] [ARM] Auto-decode Thumb1 S-bit (PR #159956)

Sergei Barannikov via llvm-commits llvm-commits at lists.llvm.org
Sat Sep 20 16:32:57 PDT 2025


https://github.com/s-barannikov created https://github.com/llvm/llvm-project/pull/159956

None

>From 372ce871c90b62f93fcd3fe23442ebc0db4d26f2 Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <barannikov88 at gmail.com>
Date: Sun, 21 Sep 2025 01:17:44 +0300
Subject: [PATCH 1/2] [TableGen][Decoder] Always handle `bits<0>`

Previously, `bits<0>` only had effect if `ignore-non-decodable-operands`
wasn't specified. Handle it even if the option wasn't specified. This
should allow for a smoother transition.
---
 llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp | 8 ++++++++
 llvm/lib/Target/RISCV/RISCVInstrInfoC.td                 | 5 ++---
 llvm/utils/TableGen/Common/InstructionEncoding.cpp       | 8 ++++++++
 llvm/utils/TableGen/DecoderEmitter.cpp                   | 2 --
 4 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
index ff07122b61378..9f070fb2ff3e2 100644
--- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
+++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
@@ -206,6 +206,14 @@ static DecodeStatus DecodeSPRegisterClass(MCInst &Inst,
   return MCDisassembler::Success;
 }
 
+static DecodeStatus DecodeSPRegisterClass(MCInst &Inst, uint64_t RegNo,
+                                          uint32_t Address,
+                                          const MCDisassembler *Decoder) {
+  assert(RegNo == 2);
+  Inst.addOperand(MCOperand::createReg(RISCV::X2));
+  return MCDisassembler::Success;
+}
+
 static DecodeStatus DecodeGPRX5RegisterClass(MCInst &Inst,
                                              const MCDisassembler *Decoder) {
   Inst.addOperand(MCOperand::createReg(RISCV::X5));
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
index 9fc73662d9704..24e7a0ee5a79f 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
@@ -298,7 +298,7 @@ def C_ADDI4SPN : RVInst16CIW<0b000, 0b00, (outs GPRC:$rd),
                              (ins SP:$rs1, uimm10_lsb00nonzero:$imm),
                              "c.addi4spn", "$rd, $rs1, $imm">,
                              Sched<[WriteIALU, ReadIALU]> {
-  bits<5> rs1;
+  bits<0> rs1;
   let Inst{12-11} = imm{5-4};
   let Inst{10-7} = imm{9-6};
   let Inst{6} = imm{2};
@@ -404,8 +404,8 @@ def C_ADDI16SP : RVInst16CI<0b011, 0b01, (outs SP:$rd_wb),
                             "c.addi16sp", "$rd, $imm">,
                  Sched<[WriteIALU, ReadIALU]> {
   let Constraints = "$rd = $rd_wb";
+  let rd = 2;
   let Inst{12} = imm{9};
-  let Inst{11-7} = 2;
   let Inst{6} = imm{4};
   let Inst{5} = imm{6};
   let Inst{4-3} = imm{8-7};
@@ -965,4 +965,3 @@ let Predicates = [HasStdExtCOrZcd, HasStdExtD] in {
   def : CompressPat<(FSD FPR64:$rs2, SPMem:$rs1, uimm9_lsb000:$imm),
                     (C_FSDSP FPR64:$rs2, SPMem:$rs1, uimm9_lsb000:$imm)>;
 } // Predicates = [HasStdExtCOrZcd, HasStdExtD]
-
diff --git a/llvm/utils/TableGen/Common/InstructionEncoding.cpp b/llvm/utils/TableGen/Common/InstructionEncoding.cpp
index 7260ee3d9b534..30bbac463c0f4 100644
--- a/llvm/utils/TableGen/Common/InstructionEncoding.cpp
+++ b/llvm/utils/TableGen/Common/InstructionEncoding.cpp
@@ -316,6 +316,14 @@ static void addOneOperandFields(const Record *EncodingDef,
     else
       OpInfo.addField(I, J - I, Offset);
   }
+
+  if (!OpInfo.InitValue && OpInfo.fields().empty()) {
+    // We found a field in InstructionEncoding record that corresponds to the
+    // named operand, but that field has no constant bits and doesn't contribute
+    // to the Inst field. For now, treat that field as if it didn't exist.
+    // TODO: Remove along with IgnoreNonDecodableOperands.
+    OpInfo.HasNoEncoding = true;
+  }
 }
 
 void InstructionEncoding::parseFixedLenOperands(const BitsInit &Bits) {
diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp
index e83df47d541c6..961dc2815f6b9 100644
--- a/llvm/utils/TableGen/DecoderEmitter.cpp
+++ b/llvm/utils/TableGen/DecoderEmitter.cpp
@@ -696,8 +696,6 @@ static void emitBinaryParser(raw_ostream &OS, indent Indent,
 
   // Special case for 'bits<0>'.
   if (OpInfo.Fields.empty() && !OpInfo.InitValue) {
-    if (IgnoreNonDecodableOperands)
-      return;
     assert(!OpInfo.Decoder.empty());
     // The operand has no encoding, so the corresponding argument is omitted.
     // This avoids confusion and allows the function to be overloaded if the

>From a89c80e015b9de2dfe994e04548cca6876f9068a Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <barannikov88 at gmail.com>
Date: Sun, 21 Sep 2025 00:32:02 +0300
Subject: [PATCH 2/2] [ARM] Auto-decode Thumb1 S-bit

---
 llvm/lib/Target/ARM/ARMInstrFormats.td        |  1 +
 .../ARM/Disassembler/ARMDisassembler.cpp      | 41 ++++++-------------
 2 files changed, 14 insertions(+), 28 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMInstrFormats.td b/llvm/lib/Target/ARM/ARMInstrFormats.td
index e50740f7d57c5..1ad2485dce17f 100644
--- a/llvm/lib/Target/ARM/ARMInstrFormats.td
+++ b/llvm/lib/Target/ARM/ARMInstrFormats.td
@@ -1219,6 +1219,7 @@ class Thumb1sI<dag oops, dag iops, AddrMode am, int sz,
                InstrItinClass itin,
                string opc, string asm, string cstr, list<dag> pattern>
   : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+  bits<0> s;
   let OutOperandList = !con(oops, (outs s_cc_out:$s));
   let InOperandList = !con(iops, (ins pred:$p));
   let AsmString = !strconcat(opc, "${s}${p}", asm);
diff --git a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 56112112a0293..b25b7e7104f20 100644
--- a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -119,6 +119,8 @@ class VPTStatus {
 class ARMDisassembler : public MCDisassembler {
 public:
   std::unique_ptr<const MCInstrInfo> MCII;
+  mutable ITStatus ITBlock;
+  mutable VPTStatus VPTBlock;
 
   ARMDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
                   const MCInstrInfo *MCII)
@@ -146,10 +148,6 @@ class ARMDisassembler : public MCDisassembler {
                                    ArrayRef<uint8_t> Bytes, uint64_t Address,
                                    raw_ostream &CStream) const;
 
-  mutable ITStatus ITBlock;
-  mutable VPTStatus VPTBlock;
-
-  void AddThumb1SBit(MCInst &MI, bool InITBlock) const;
   bool isVectorPredicable(const MCInst &MI) const;
   DecodeStatus AddThumbPredicate(MCInst&) const;
   void UpdateThumbPredicate(DecodeStatus &S, MCInst &MI) const;
@@ -636,6 +634,17 @@ static DecodeStatus DecodeCCOutOperand(MCInst &Inst, unsigned Val,
   return MCDisassembler::Success;
 }
 
+// This overload is called when decoding `s_cc_out` operand, which is not
+// encoded into instruction. It is only used in Thumb1 instructions.
+static DecodeStatus DecodeCCOutOperand(MCInst &Inst,
+                                       const MCDisassembler *Decoder) {
+  const auto *D = static_cast<const ARMDisassembler *>(Decoder);
+  // Thumb1 instructions define CPSR unless they are inside an IT block.
+  MCRegister CCR = D->ITBlock.instrInITBlock() ? ARM::NoRegister : ARM::CPSR;
+  Inst.addOperand(MCOperand::createReg(CCR));
+  return MCDisassembler::Success;
+}
+
 static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Val,
                                           uint64_t Address,
                                           const MCDisassembler *Decoder) {
@@ -6130,26 +6139,6 @@ DecodeStatus ARMDisassembler::getARMInstruction(MCInst &MI, uint64_t &Size,
   return MCDisassembler::Fail;
 }
 
-// Thumb1 instructions don't have explicit S bits.  Rather, they
-// implicitly set CPSR.  Since it's not represented in the encoding, the
-// auto-generated decoder won't inject the CPSR operand.  We need to fix
-// that as a post-pass.
-void ARMDisassembler::AddThumb1SBit(MCInst &MI, bool InITBlock) const {
-  const MCInstrDesc &MCID = MCII->get(MI.getOpcode());
-  MCInst::iterator I = MI.begin();
-  for (unsigned i = 0; i < MCID.NumOperands; ++i, ++I) {
-    if (I == MI.end()) break;
-    if (MCID.operands()[i].isOptionalDef() &&
-        MCID.operands()[i].RegClass == ARM::CCRRegClassID) {
-      if (i > 0 && MCID.operands()[i - 1].isPredicate())
-        continue;
-      MI.insert(I,
-                MCOperand::createReg(InITBlock ? ARM::NoRegister : ARM::CPSR));
-      return;
-    }
-  }
-}
-
 bool ARMDisassembler::isVectorPredicable(const MCInst &MI) const {
   const MCInstrDesc &MCID = MCII->get(MI.getOpcode());
   for (unsigned i = 0; i < MCID.NumOperands; ++i) {
@@ -6343,9 +6332,7 @@ DecodeStatus ARMDisassembler::getThumbInstruction(MCInst &MI, uint64_t &Size,
                              STI);
   if (Result) {
     Size = 2;
-    bool InITBlock = ITBlock.instrInITBlock();
     Check(Result, AddThumbPredicate(MI));
-    AddThumb1SBit(MI, InITBlock);
     return Result;
   }
 
@@ -6411,9 +6398,7 @@ DecodeStatus ARMDisassembler::getThumbInstruction(MCInst &MI, uint64_t &Size,
       decodeInstruction(DecoderTableThumb32, MI, Insn32, Address, this, STI);
   if (Result != MCDisassembler::Fail) {
     Size = 4;
-    bool InITBlock = ITBlock.instrInITBlock();
     Check(Result, AddThumbPredicate(MI));
-    AddThumb1SBit(MI, InITBlock);
     return Result;
   }
 



More information about the llvm-commits mailing list