[llvm] [LLVM][DecoderEmitter] Add option to use lambdas in decodeToMCInst (PR #144814)

Rahul Joshi via llvm-commits llvm-commits at lists.llvm.org
Sat Jun 21 10:28:38 PDT 2025


https://github.com/jurahul updated https://github.com/llvm/llvm-project/pull/144814

>From 3a8b95f70f9ba5ac3a206921aa3aa471fbcb1a3e Mon Sep 17 00:00:00 2001
From: Rahul Joshi <rjoshi at nvidia.com>
Date: Tue, 3 Jun 2025 22:25:00 -0700
Subject: [PATCH 1/2] [LLVM][TableGen][DecoderEmitter] Add option to use
 lambdas in decodeToMCInst

Add option `use-lambda-in-decode-to-mcinst` to use a table of lambdas
instead of a switch case in the generated `decodeToMCInst` function.

When the number of switch cases in this function is large, the generated
code takes a long time to compile in release builds. Using a table of
lambdas instead improves the compile time significantly (~3x speedup
in compiling the code in a downstream target). This option will allow
targets to opt into this mode if they desire for better build times.

Tested with `check-llvm-mc` with the option enabled by default.
---
 llvm/test/TableGen/DecoderEmitterLambda.td | 84 ++++++++++++++++++++++
 llvm/utils/TableGen/DecoderEmitter.cpp     | 55 +++++++++++---
 2 files changed, 131 insertions(+), 8 deletions(-)
 create mode 100644 llvm/test/TableGen/DecoderEmitterLambda.td

diff --git a/llvm/test/TableGen/DecoderEmitterLambda.td b/llvm/test/TableGen/DecoderEmitterLambda.td
new file mode 100644
index 0000000000000..4926c8d7def66
--- /dev/null
+++ b/llvm/test/TableGen/DecoderEmitterLambda.td
@@ -0,0 +1,84 @@
+// RUN: llvm-tblgen -gen-disassembler -use-lambda-in-decode-to-mcinst -I %p/../../include %s | FileCheck %s
+
+include "llvm/Target/Target.td"
+
+def archInstrInfo : InstrInfo { }
+
+def arch : Target {
+  let InstructionSet = archInstrInfo;
+}
+
+let Namespace = "arch" in {
+  def R0 : Register<"r0">;
+  def R1 : Register<"r1">;
+  def R2 : Register<"r2">;
+  def R3 : Register<"r3">;
+}
+def Regs : RegisterClass<"Regs", [i32], 32, (add R0, R1, R2, R3)>;
+
+class TestInstruction : Instruction {
+  let Size = 1;
+  let OutOperandList = (outs);
+  field bits<8> Inst;
+  field bits<8> SoftFail = 0;
+}
+
+// Define instructions to generate 4 cases in decodeToMCInst.
+// Lower 2 bits define the number of operands. Each register operand
+// needs 2 bits to encode.
+
+// An instruction with no inputs. Encoded with lower 2 bits = 0 and upper
+// 6 bits = 0 as well.
+def Inst0 : TestInstruction {
+  let Inst = 0x0;
+  let InOperandList = (ins);
+  let AsmString = "Inst0";
+}
+
+// An instruction with a single input. Encoded with lower 2 bits = 1 and the
+// single input in bits 2-3.
+def Inst1 : TestInstruction {
+  bits<2> r0;
+  let Inst{1-0} = 1;
+  let Inst{3-2} = r0;
+  let InOperandList = (ins Regs:$r0);
+  let AsmString = "Inst1";
+}
+
+// An instruction with two inputs. Encoded with lower 2 bits = 2 and the
+// inputs in bits 2-3 and 4-5.
+def Inst2 : TestInstruction {
+  bits<2> r0;
+  bits<2> r1;
+  let Inst{1-0} = 2;
+  let Inst{3-2} = r0;
+  let Inst{5-4} = r1;
+  let InOperandList = (ins Regs:$r0, Regs:$r1);
+  let AsmString = "Inst2";
+}
+
+// An instruction with three inputs. Encoded with lower 2 bits = 3 and the
+// inputs in bits 2-3 and 4-5 and 6-7.
+def Inst3 : TestInstruction {
+  bits<2> r0;
+  bits<2> r1;
+  bits<2> r2;
+  let Inst{1-0} = 3;
+  let Inst{3-2} = r0;
+  let Inst{5-4} = r1;
+  let Inst{7-6} = r2;
+  let InOperandList = (ins Regs:$r0, Regs:$r1, Regs:$r2);
+  let AsmString = "Inst3";
+}
+
+// CHECK-LABEL: decodeToMCInst
+// CHECK: decodeLambda0 =
+// CHECK: decodeLambda1 =
+// CHECK: decodeLambda2 =
+// CHECK: decodeLambda3 =
+// CHECK: decodeLambdaTable[]
+// CHECK-NEXT: decodeLambda0
+// CHECK-NEXT: decodeLambda1
+// CHECK-NEXT: decodeLambda2
+// CHECK-NEXT: decodeLambda3
+// CHECK: return decodeLambdaTable[Idx]
diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp
index 2e8ff2aa47d96..824b0ad2a2645 100644
--- a/llvm/utils/TableGen/DecoderEmitter.cpp
+++ b/llvm/utils/TableGen/DecoderEmitter.cpp
@@ -83,6 +83,13 @@ static cl::opt<bool> LargeTable(
              "in the table instead of the default 16 bits."),
     cl::init(false), cl::cat(DisassemblerEmitterCat));
 
+static cl::opt<bool> UseLambdaInDecodetoMCInst(
+    "use-lambda-in-decode-to-mcinst",
+    cl::desc("Use a table of lambdas instead of a switch case in the\n"
+             "generated `decodeToMCInst` function. Helps improve compile time\n"
+             "of the generated code."),
+    cl::init(false), cl::cat(DisassemblerEmitterCat));
+
 STATISTIC(NumEncodings, "Number of encodings considered");
 STATISTIC(NumEncodingsLackingDisasm,
           "Number of encodings without disassembler info");
@@ -1082,15 +1089,47 @@ void DecoderEmitter::emitDecoderFunction(formatted_raw_ostream &OS,
      << "using TmpType = "
         "std::conditional_t<std::is_integral<InsnType>::"
         "value, InsnType, uint64_t>;\n";
-  OS << Indent << "TmpType tmp;\n";
-  OS << Indent << "switch (Idx) {\n";
-  OS << Indent << "default: llvm_unreachable(\"Invalid index!\");\n";
-  for (const auto &[Index, Decoder] : enumerate(Decoders)) {
-    OS << Indent << "case " << Index << ":\n";
-    OS << Decoder;
-    OS << Indent + 2 << "return S;\n";
+
+  if (UseLambdaInDecodetoMCInst) {
+    // Emit one lambda for each case first.
+    for (const auto &[Index, Decoder] : enumerate(Decoders)) {
+      OS << Indent << "auto decodeLambda" << Index << " = [](DecodeStatus S,\n"
+         << Indent << "                   InsnType insn, MCInst &MI,\n"
+         << Indent << "                   uint64_t Address, \n"
+         << Indent << "                   const MCDisassembler *Decoder,\n"
+         << Indent << "                   bool &DecodeComplete) {\n";
+      OS << Indent + 2 << "[[maybe_unused]] TmpType tmp;\n";
+      OS << Decoder;
+      OS << Indent + 2 << "return S;\n";
+      OS << Indent << "};\n";
+    }
+    // Build a table of lambdas.
+
+    OS << R"(
+  using LambdaTy =
+      function_ref<DecodeStatus(DecodeStatus, InsnType, MCInst &, uint64_t,
+                                const MCDisassembler *, bool &)>;
+    )";
+    OS << Indent << "const static LambdaTy decodeLambdaTable[] = {\n";
+    for (size_t Index : llvm::seq(Decoders.size()))
+      OS << Indent + 2 << "decodeLambda" << Index << ",\n";
+    OS << Indent << "};\n";
+    OS << Indent << "if (Idx >= " << Decoders.size() << ")\n";
+    OS << Indent + 2 << "llvm_unreachable(\"Invalid index!\");\n";
+    OS << Indent
+       << "return decodeLambdaTable[Idx](S, insn, MI, Address, Decoder, "
+          "DecodeComplete);\n";
+  } else {
+    OS << Indent << "TmpType tmp;\n";
+    OS << Indent << "switch (Idx) {\n";
+    OS << Indent << "default: llvm_unreachable(\"Invalid index!\");\n";
+    for (const auto &[Index, Decoder] : enumerate(Decoders)) {
+      OS << Indent << "case " << Index << ":\n";
+      OS << Decoder;
+      OS << Indent + 2 << "return S;\n";
+    }
+    OS << Indent << "}\n";
   }
-  OS << Indent << "}\n";
   Indent -= 2;
   OS << Indent << "}\n";
 }

>From e3eb094f00422dd9d273a8423caa8ecbe0a6e2c3 Mon Sep 17 00:00:00 2001
From: Rahul Joshi <rjoshi at nvidia.com>
Date: Sat, 21 Jun 2025 10:15:41 -0700
Subject: [PATCH 2/2] Use function of static tables instead of lambda

---
 ...tterLambda.td => DecoderEmitterFnTable.td} | 22 ++---
 llvm/utils/TableGen/DecoderEmitter.cpp        | 88 ++++++++++---------
 2 files changed, 58 insertions(+), 52 deletions(-)
 rename llvm/test/TableGen/{DecoderEmitterLambda.td => DecoderEmitterFnTable.td} (81%)

diff --git a/llvm/test/TableGen/DecoderEmitterLambda.td b/llvm/test/TableGen/DecoderEmitterFnTable.td
similarity index 81%
rename from llvm/test/TableGen/DecoderEmitterLambda.td
rename to llvm/test/TableGen/DecoderEmitterFnTable.td
index 4926c8d7def66..ad21179b5c98c 100644
--- a/llvm/test/TableGen/DecoderEmitterLambda.td
+++ b/llvm/test/TableGen/DecoderEmitterFnTable.td
@@ -1,4 +1,4 @@
-// RUN: llvm-tblgen -gen-disassembler -use-lambda-in-decode-to-mcinst -I %p/../../include %s | FileCheck %s
+// RUN: llvm-tblgen -gen-disassembler -use-fn-table-in-decode-to-mcinst -I %p/../../include %s | FileCheck %s
 
 include "llvm/Target/Target.td"
 
@@ -71,14 +71,14 @@ def Inst3 : TestInstruction {
   let AsmString = "Inst3";
 }
 
+// CHECK-LABEL: decodeFn0
+// CHECK-LABEL: decodeFn1
+// CHECK-LABEL: decodeFn2
+// CHECK-LABEL: decodeFn3
 // CHECK-LABEL: decodeToMCInst
-// CHECK: decodeLambda0 =
-// CHECK: decodeLambda1 =
-// CHECK: decodeLambda2 =
-// CHECK: decodeLambda3 =
-// CHECK: decodeLambdaTable[]
-// CHECK-NEXT: decodeLambda0
-// CHECK-NEXT: decodeLambda1
-// CHECK-NEXT: decodeLambda2
-// CHECK-NEXT: decodeLambda3
-// CHECK: return decodeLambdaTable[Idx]
+// CHECK: static constexpr DecodeFnTy decodeFnTable[]
+// CHECK-NEXT: decodeFn0,
+// CHECK-NEXT: decodeFn1,
+// CHECK-NEXT: decodeFn2,
+// CHECK-NEXT: decodeFn3,
+// CHECK: return decodeFnTable[Idx]
diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp
index 824b0ad2a2645..af25975f7c7ec 100644
--- a/llvm/utils/TableGen/DecoderEmitter.cpp
+++ b/llvm/utils/TableGen/DecoderEmitter.cpp
@@ -83,11 +83,12 @@ static cl::opt<bool> LargeTable(
              "in the table instead of the default 16 bits."),
     cl::init(false), cl::cat(DisassemblerEmitterCat));
 
-static cl::opt<bool> UseLambdaInDecodetoMCInst(
-    "use-lambda-in-decode-to-mcinst",
-    cl::desc("Use a table of lambdas instead of a switch case in the\n"
-             "generated `decodeToMCInst` function. Helps improve compile time\n"
-             "of the generated code."),
+static cl::opt<bool> UseFnTableInDecodetoMCInst(
+    "use-fn-table-in-decode-to-mcinst",
+    cl::desc(
+        "Use a table of function pointers instead of a switch case in the\n"
+        "generated `decodeToMCInst` function. Helps improve compile time\n"
+        "of the generated code."),
     cl::init(false), cl::cat(DisassemblerEmitterCat));
 
 STATISTIC(NumEncodings, "Number of encodings considered");
@@ -1073,53 +1074,57 @@ void DecoderEmitter::emitPredicateFunction(formatted_raw_ostream &OS,
 void DecoderEmitter::emitDecoderFunction(formatted_raw_ostream &OS,
                                          DecoderSet &Decoders,
                                          indent Indent) const {
-  // The decoder function is just a big switch statement based on the
-  // input decoder index.
-  OS << Indent << "template <typename InsnType>\n";
-  OS << Indent << "static DecodeStatus decodeToMCInst(DecodeStatus S,"
-     << " unsigned Idx, InsnType insn, MCInst &MI,\n";
-  OS << Indent << "                                   uint64_t "
-     << "Address, const MCDisassembler *Decoder, bool &DecodeComplete) {\n";
-  Indent += 2;
-  OS << Indent << "DecodeComplete = true;\n";
+  // The decoder function is just a big switch statement or a table of function
+  // pointers based on the input decoder index.
+
   // TODO: When InsnType is large, using uint64_t limits all fields to 64 bits
   // It would be better for emitBinaryParser to use a 64-bit tmp whenever
   // possible but fall back to an InsnType-sized tmp for truly large fields.
-  OS << Indent
-     << "using TmpType = "
-        "std::conditional_t<std::is_integral<InsnType>::"
-        "value, InsnType, uint64_t>;\n";
-
-  if (UseLambdaInDecodetoMCInst) {
-    // Emit one lambda for each case first.
+  StringRef TmpTypeDecl =
+      "using TmpType = std::conditional_t<std::is_integral<InsnType>::value, "
+      "InsnType, uint64_t>;\n";
+  StringRef DecodeParams =
+      "DecodeStatus S, InsnType insn, MCInst &MI, uint64_t Address, const "
+      "MCDisassembler *Decoder, bool &DecodeComplete";
+
+  if (UseFnTableInDecodetoMCInst) {
+    // Emit a function for each case first.
     for (const auto &[Index, Decoder] : enumerate(Decoders)) {
-      OS << Indent << "auto decodeLambda" << Index << " = [](DecodeStatus S,\n"
-         << Indent << "                   InsnType insn, MCInst &MI,\n"
-         << Indent << "                   uint64_t Address, \n"
-         << Indent << "                   const MCDisassembler *Decoder,\n"
-         << Indent << "                   bool &DecodeComplete) {\n";
-      OS << Indent + 2 << "[[maybe_unused]] TmpType tmp;\n";
+      OS << Indent << "template <typename InsnType>\n";
+      OS << Indent << "DecodeStatus decodeFn" << Index << "(" << DecodeParams
+         << ") {\n";
+      Indent += 2;
+      OS << Indent << TmpTypeDecl;
+      OS << Indent << "[[maybe_unused]] TmpType tmp;\n";
       OS << Decoder;
-      OS << Indent + 2 << "return S;\n";
-      OS << Indent << "};\n";
+      OS << Indent << "return S;\n";
+      Indent -= 2;
+      OS << Indent << "}\n\n";
     }
-    // Build a table of lambdas.
+  }
 
-    OS << R"(
-  using LambdaTy =
-      function_ref<DecodeStatus(DecodeStatus, InsnType, MCInst &, uint64_t,
-                                const MCDisassembler *, bool &)>;
-    )";
-    OS << Indent << "const static LambdaTy decodeLambdaTable[] = {\n";
+  OS << Indent << "// Handling " << Decoders.size() << " cases.\n";
+  OS << Indent << "template <typename InsnType>\n";
+  OS << Indent << "static DecodeStatus decodeToMCInst(unsigned Idx, "
+     << DecodeParams << ") {\n";
+  Indent += 2;
+  OS << Indent << "DecodeComplete = true;\n";
+
+  if (UseFnTableInDecodetoMCInst) {
+    // Build a table of function pointers.
+    OS << Indent << "using DecodeFnTy = DecodeStatus (*)(" << DecodeParams
+       << ");\n";
+    OS << Indent << "static constexpr DecodeFnTy decodeFnTable[] = {\n";
     for (size_t Index : llvm::seq(Decoders.size()))
-      OS << Indent + 2 << "decodeLambda" << Index << ",\n";
+      OS << Indent + 2 << "decodeFn" << Index << ",\n";
     OS << Indent << "};\n";
     OS << Indent << "if (Idx >= " << Decoders.size() << ")\n";
     OS << Indent + 2 << "llvm_unreachable(\"Invalid index!\");\n";
     OS << Indent
-       << "return decodeLambdaTable[Idx](S, insn, MI, Address, Decoder, "
+       << "return decodeFnTable[Idx](S, insn, MI, Address, Decoder, "
           "DecodeComplete);\n";
   } else {
+    OS << Indent << TmpTypeDecl;
     OS << Indent << "TmpType tmp;\n";
     OS << Indent << "switch (Idx) {\n";
     OS << Indent << "default: llvm_unreachable(\"Invalid index!\");\n";
@@ -1306,7 +1311,8 @@ std::pair<unsigned, bool> FilterChooser::getDecoderIndex(DecoderSet &Decoders,
   // FIXME: emitDecoder() function can take a buffer directly rather than
   // a stream.
   raw_svector_ostream S(Decoder);
-  bool HasCompleteDecoder = emitDecoder(S, indent(4), Opc);
+  indent Indent(UseFnTableInDecodetoMCInst ? 2 : 4);
+  bool HasCompleteDecoder = emitDecoder(S, Indent, Opc);
 
   // Using the full decoder string as the key value here is a bit
   // heavyweight, but is effective. If the string comparisons become a
@@ -2410,7 +2416,7 @@ static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
        << "      makeUp(insn, Len);";
   }
   OS << R"(
-      S = decodeToMCInst(S, DecodeIdx, insn, MI, Address, DisAsm, DecodeComplete);
+      S = decodeToMCInst(DecodeIdx, S, insn, MI, Address, DisAsm, DecodeComplete);
       assert(DecodeComplete);
 
       LLVM_DEBUG(dbgs() << Loc << ": OPC_Decode: opcode " << Opc
@@ -2432,7 +2438,7 @@ static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
       MCInst TmpMI;
       TmpMI.setOpcode(Opc);
       bool DecodeComplete;
-      S = decodeToMCInst(S, DecodeIdx, insn, TmpMI, Address, DisAsm, DecodeComplete);
+      S = decodeToMCInst(DecodeIdx, S, insn, TmpMI, Address, DisAsm, DecodeComplete);
       LLVM_DEBUG(dbgs() << Loc << ": OPC_TryDecode: opcode " << Opc
                    << ", using decoder " << DecodeIdx << ": ");
 



More information about the llvm-commits mailing list