[llvm] 85f5953 - [LLVM][MC] Unique per-hw mode field encoding code in CodeEmitterGen (#172764)

via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 18 09:32:03 PST 2025


Author: Rahul Joshi
Date: 2025-12-18T09:31:59-08:00
New Revision: 85f5953fcad80fbe3b42a35276c38661f0eb4349

URL: https://github.com/llvm/llvm-project/commit/85f5953fcad80fbe3b42a35276c38661f0eb4349
DIFF: https://github.com/llvm/llvm-project/commit/85f5953fcad80fbe3b42a35276c38661f0eb4349.diff

LOG: [LLVM][MC] Unique per-hw mode field encoding code in CodeEmitterGen (#172764)

Change CodeEmitterGen to de-duplicate case statements emitted for
encoding instruction fields for different HW modes when they contain the
same code. When 2 or more HW modes share the same code for encoding the
fields of an instruction, we currently generate a case statement for
each mode and emit the same code in each case body. Instead, unique the
case statement bodies and emit each body just once.

Some minor refactor to help with this:
1. Make `emitCaseMap` a standalone static function and use
`ListSeparator` to emit the case statements.
2. Add a type-alias for the map of cases.

No upstream target seems to use this feature (`EncodingInfos`) but this
results in ~3% code size reduction in a downstream target.

Added: 
    

Modified: 
    llvm/test/TableGen/HwModeEncodeAPInt.td
    llvm/test/TableGen/HwModeEncodeDecode3.td
    llvm/utils/TableGen/CodeEmitterGen.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/test/TableGen/HwModeEncodeAPInt.td b/llvm/test/TableGen/HwModeEncodeAPInt.td
index 82d99940aa0ff..5056259de6494 100644
--- a/llvm/test/TableGen/HwModeEncodeAPInt.td
+++ b/llvm/test/TableGen/HwModeEncodeAPInt.td
@@ -165,22 +165,17 @@ def unrelated: Instruction {
 // ENCODER: Value = Inst;
 // ENCODER: switch (HwMode) {
 // ENCODER: default: llvm_unreachable("Unhandled HwMode");
-// ENCODER: case 0: {
-// ENCODER: op.clearAllBits();
-// ENCODER: getMachineOpValue(MI, MI.getOperand(0), op, Fixups, STI);
-// ENCODER: Value.insertBits(op.extractBitsAsZExtValue(8, 0), 120, 8);
-// ENCODER: break;
-// ENCODER: }
-// ENCODER: case 1: {
+// ENCODER: case 1:
+// ENCODER: case 2: {
 // ENCODER: op.clearAllBits();
 // ENCODER: getMachineOpValue(MI, MI.getOperand(0), op, Fixups, STI);
 // ENCODER: Value.insertBits(op.extractBitsAsZExtValue(8, 0), 112, 8);
 // ENCODER: break;
 // ENCODER: }
-// ENCODER: case 2: {
+// ENCODER: case 0: {
 // ENCODER: op.clearAllBits();
 // ENCODER: getMachineOpValue(MI, MI.getOperand(0), op, Fixups, STI);
-// ENCODER: Value.insertBits(op.extractBitsAsZExtValue(8, 0), 112, 8);
+// ENCODER: Value.insertBits(op.extractBitsAsZExtValue(8, 0), 120, 8);
 // ENCODER: break;
 // ENCODER: }
 // ENCODER: case 3: {
@@ -214,24 +209,18 @@ def unrelated: Instruction {
 // ENCODER:   unsigned HwMode = STI.getHwMode(MCSubtargetInfo::HwMode_EncodingInfo);
 // ENCODER:   switch (HwMode) {
 // ENCODER:   default: llvm_unreachable("Unhandled HwMode");
-// ENCODER:   case 0: {
-// ENCODER:   switch (OpNum) {
-// ENCODER:   case 0:
-// ENCODER:     return 120;
-// ENCODER:   }
-// ENCODER:   break;
-// ENCODER:   }
-// ENCODER:   case 1: {
+// ENCODER:   case 1:
+// ENCODER:   case 2: {
 // ENCODER:   switch (OpNum) {
 // ENCODER:   case 0:
 // ENCODER:     return 112;
 // ENCODER:   }
 // ENCODER:   break;
 // ENCODER:   }
-// ENCODER:   case 2: {
+// ENCODER:   case 0: {
 // ENCODER:   switch (OpNum) {
 // ENCODER:   case 0:
-// ENCODER:     return 112;
+// ENCODER:     return 120;
 // ENCODER:   }
 // ENCODER:   break;
 // ENCODER:   }

diff  --git a/llvm/test/TableGen/HwModeEncodeDecode3.td b/llvm/test/TableGen/HwModeEncodeDecode3.td
index 36c65214d2719..cb34c2b3d7b49 100644
--- a/llvm/test/TableGen/HwModeEncodeDecode3.td
+++ b/llvm/test/TableGen/HwModeEncodeDecode3.td
@@ -228,14 +228,15 @@ def unrelated: Instruction {
 // ENCODER: Value = InstBitsByHw[TableIndex];
 // ENCODER: switch (HwMode) {
 // ENCODER: default: llvm_unreachable("Unhandled HwMode");
-// ENCODER: case 0: {
+// ENCODER: case 0:
+// ENCODER: case 1: {
 // ENCODER: op = getMachineOpValue(MI, MI.getOperand(0), Fixups, STI);
 // ENCODER: Value |= (op & 0xf0);
 // ENCODER: break;
 // ENCODER: }
-// ENCODER: case 1: {
+// ENCODER: case 3: {
 // ENCODER: op = getMachineOpValue(MI, MI.getOperand(0), Fixups, STI);
-// ENCODER: Value |= (op & 0xf0);
+// ENCODER: Value |= (op & 0xff) << 24;
 // ENCODER: break;
 // ENCODER: }
 // ENCODER: case 2: {
@@ -243,11 +244,6 @@ def unrelated: Instruction {
 // ENCODER: Value |= (op & 0xff) << 8;
 // ENCODER: break;
 // ENCODER: }
-// ENCODER: case 3: {
-// ENCODER: op = getMachineOpValue(MI, MI.getOperand(0), Fixups, STI);
-// ENCODER: Value |= (op & 0xff) << 24;
-// ENCODER: break;
-// ENCODER: }
 // ENCODER-LABEL: case ::baz: {
 // ENCODER: unsigned HwMode = STI.getHwMode(MCSubtargetInfo::HwMode_EncodingInfo);
 // ENCODER: switch (HwMode) {

diff  --git a/llvm/utils/TableGen/CodeEmitterGen.cpp b/llvm/utils/TableGen/CodeEmitterGen.cpp
index 278fcd74cecdb..e7686f3867304 100644
--- a/llvm/utils/TableGen/CodeEmitterGen.cpp
+++ b/llvm/utils/TableGen/CodeEmitterGen.cpp
@@ -48,6 +48,10 @@ using namespace llvm;
 
 namespace {
 
+// A map of uniqued case statements. The key is the body of the case statement
+// and the value is a list of cases which share the same body.
+using CaseMapT = std::map<std::string, std::vector<std::string>>;
+
 class CodeEmitterGen {
   const RecordKeeper &RK;
   CodeGenTarget Target;
@@ -72,9 +76,6 @@ class CodeEmitterGen {
   void emitInstructionBaseValues(
       raw_ostream &O, ArrayRef<const CodeGenInstruction *> NumberedInstructions,
       unsigned HwMode = DefaultMode);
-  void
-  emitCaseMap(raw_ostream &O,
-              const std::map<std::string, std::vector<std::string>> &CaseMap);
   unsigned BitWidth = 0u;
   bool UseAPInt = false;
 };
@@ -218,6 +219,18 @@ bool CodeEmitterGen::addCodeToMergeInOperand(const Record *R,
   return true;
 }
 
+static void emitCaseMap(raw_ostream &O, const CaseMapT &CaseMap) {
+  for (const auto &[CaseBody, Cases] : CaseMap) {
+    ListSeparator LS("\n");
+    for (const auto &Case : Cases)
+      O << LS << "    case " << Case << ":";
+    O << " {\n";
+    O << CaseBody;
+    O << "      break;\n"
+      << "    }\n";
+  }
+}
+
 std::pair<std::string, std::string>
 CodeEmitterGen::getInstructionCases(const Record *R) {
   std::string Case, BitOffsetCase;
@@ -264,12 +277,26 @@ CodeEmitterGen::getInstructionCases(const Record *R) {
 
     Append("      switch (HwMode) {\n");
     Append("      default: llvm_unreachable(\"Unhandled HwMode\");\n");
+
+    // Attempt to unique the per-hw-mode encoding case statements. This helps
+    // reduce the code size if 2 or more hw-modes share the same encoding for
+    // the fields of the instruction.
+    CaseMapT CaseMap, BitOffsetCaseMap;
+    std::string ModeCase, ModeBitOffsetCase;
+
     for (auto &[ModeId, Encoding] : EBM) {
-      Append("      case " + itostr(ModeId) + ": {\n");
-      addInstructionCasesForEncoding(R, Encoding, Case, BitOffsetCase);
-      Append("      break;\n");
-      Append("      }\n");
+      ModeCase.clear();
+      ModeBitOffsetCase.clear();
+      addInstructionCasesForEncoding(R, Encoding, ModeCase, ModeBitOffsetCase);
+      CaseMap[ModeCase].push_back(utostr(ModeId));
+      BitOffsetCaseMap[ModeBitOffsetCase].push_back(utostr(ModeId));
     }
+
+    raw_string_ostream CaseOS(Case);
+    raw_string_ostream BitOffsetCaseOS(BitOffsetCase);
+    emitCaseMap(CaseOS, CaseMap);
+    emitCaseMap(BitOffsetCaseOS, BitOffsetCaseMap);
+
     Append("      }\n");
     return {std::move(Case), std::move(BitOffsetCase)};
   }
@@ -368,24 +395,6 @@ void CodeEmitterGen::emitInstructionBaseValues(
   O << "  };\n";
 }
 
-void CodeEmitterGen::emitCaseMap(
-    raw_ostream &O,
-    const std::map<std::string, std::vector<std::string>> &CaseMap) {
-  for (const auto &[Case, InstList] : CaseMap) {
-    bool First = true;
-    for (const auto &Inst : InstList) {
-      if (!First)
-        O << "\n";
-      O << "    case " << Inst << ":";
-      First = false;
-    }
-    O << " {\n";
-    O << Case;
-    O << "      break;\n"
-      << "    }\n";
-  }
-}
-
 CodeEmitterGen::CodeEmitterGen(const RecordKeeper &RK)
     : RK(RK), Target(RK), CGH(Target.getHwModes()) {
   // For little-endian instruction bit encodings, reverse the bit order.
@@ -451,8 +460,7 @@ void CodeEmitterGen::run(raw_ostream &O) {
   }
 
   // Map to accumulate all the cases.
-  std::map<std::string, std::vector<std::string>> CaseMap;
-  std::map<std::string, std::vector<std::string>> BitOffsetCaseMap;
+  CaseMapT CaseMap, BitOffsetCaseMap;
 
   // Construct all cases statement for each opcode
   for (const CodeGenInstruction *CGI : EncodedInstructions) {


        


More information about the llvm-commits mailing list