[llvm] [TableGen] More efficiency improvements for encode/decode emission. (PR #84647)

Jason Eckhardt via llvm-commits llvm-commits at lists.llvm.org
Sat Mar 9 10:01:55 PST 2024


https://github.com/nvjle created https://github.com/llvm/llvm-project/pull/84647

DecoderEmitter and CodeEmitterGen perform repeated linear walks over the entire instruction list. This patch eliminates two more such walks.

The eliminated traversals visit every instruction merely to determine whether the target has variable length encodings. For a target with variable length encodings, the original any_of will terminate quickly. But all targets other than M68k use fixed length encodings and thus any_of must visit the entire instruction list.

>From ef7a9a5e259c8f71b7b02a3246c512959c6c62a6 Mon Sep 17 00:00:00 2001
From: Jason Eckhardt <jeckhardt at nvidia.com>
Date: Sat, 9 Mar 2024 11:53:08 -0600
Subject: [PATCH] [TableGen] More efficiency improvements for encode/decode
 emission.

DecoderEmitter and CodeEmitterGen perform repeated linear walks over the
entire instruction list. This patch eliminates two more such walks.

The eliminated traversals visit every instruction merely to determine whether
the target has variable length encodings. For a target with variable length
encodings, the original any_of will terminate quickly. But all targets other
than M68k use fixed length encodings and thus any_of must visit the entire
instruction list.
---
 llvm/utils/TableGen/CodeEmitterGen.cpp   |  5 +----
 llvm/utils/TableGen/CodeGenInstruction.h |  7 +++++++
 llvm/utils/TableGen/CodeGenTarget.cpp    |  7 +++++--
 llvm/utils/TableGen/CodeGenTarget.h      |  4 ++++
 llvm/utils/TableGen/DecoderEmitter.cpp   | 18 ++++++------------
 5 files changed, 23 insertions(+), 18 deletions(-)

diff --git a/llvm/utils/TableGen/CodeEmitterGen.cpp b/llvm/utils/TableGen/CodeEmitterGen.cpp
index 1e80eb6b1ad50e..9194c13ccdcb08 100644
--- a/llvm/utils/TableGen/CodeEmitterGen.cpp
+++ b/llvm/utils/TableGen/CodeEmitterGen.cpp
@@ -434,10 +434,7 @@ void CodeEmitterGen::run(raw_ostream &o) {
   ArrayRef<const CodeGenInstruction *> NumberedInstructions =
       Target.getInstructionsByEnumValue();
 
-  if (any_of(NumberedInstructions, [](const CodeGenInstruction *CGI) {
-        Record *R = CGI->TheDef;
-        return R->getValue("Inst") && isa<DagInit>(R->getValueInit("Inst"));
-      })) {
+  if (Target.hasVariableLengthEncodings()) {
     emitVarLenCodeEmitter(Records, o);
   } else {
     const CodeGenHwModes &HWM = Target.getHwModes();
diff --git a/llvm/utils/TableGen/CodeGenInstruction.h b/llvm/utils/TableGen/CodeGenInstruction.h
index 963c9f0b259259..d250091c50f230 100644
--- a/llvm/utils/TableGen/CodeGenInstruction.h
+++ b/llvm/utils/TableGen/CodeGenInstruction.h
@@ -17,6 +17,7 @@
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/CodeGenTypes/MachineValueType.h"
+#include "llvm/TableGen/Record.h"
 #include <cassert>
 #include <string>
 #include <utility>
@@ -333,6 +334,12 @@ class CodeGenInstruction {
     return isOperandImpl("InOperandList", i, "IsImmediate");
   }
 
+  /// Return true if the instruction uses a variable length encoding.
+  bool isVariableLengthEncoding() const {
+    const RecordVal *RV = TheDef->getValue("Inst");
+    return RV && isa<DagInit>(RV->getValue());
+  }
+
 private:
   bool isOperandImpl(StringRef OpListName, unsigned i,
                      StringRef PropertyName) const;
diff --git a/llvm/utils/TableGen/CodeGenTarget.cpp b/llvm/utils/TableGen/CodeGenTarget.cpp
index 980c9bdb6367f7..e1cf33e7f62ffc 100644
--- a/llvm/utils/TableGen/CodeGenTarget.cpp
+++ b/llvm/utils/TableGen/CodeGenTarget.cpp
@@ -480,8 +480,11 @@ void CodeGenTarget::ReadInstructions() const {
     PrintFatalError("No 'Instruction' subclasses defined!");
 
   // Parse the instructions defined in the .td file.
-  for (unsigned i = 0, e = Insts.size(); i != e; ++i)
-    Instructions[Insts[i]] = std::make_unique<CodeGenInstruction>(Insts[i]);
+  for (Record *R : Insts) {
+    Instructions[R] = std::make_unique<CodeGenInstruction>(R);
+    if (Instructions[R]->isVariableLengthEncoding())
+      HasVariableLengthEncodings = true;
+  }
 }
 
 static const CodeGenInstruction *GetInstByName(
diff --git a/llvm/utils/TableGen/CodeGenTarget.h b/llvm/utils/TableGen/CodeGenTarget.h
index 2ae3a3a2204dd0..e109c717dc018e 100644
--- a/llvm/utils/TableGen/CodeGenTarget.h
+++ b/llvm/utils/TableGen/CodeGenTarget.h
@@ -65,6 +65,7 @@ class CodeGenTarget {
   mutable SmallVector<ValueTypeByHwMode, 8> LegalValueTypes;
   CodeGenHwModes CGH;
   std::vector<Record *> MacroFusions;
+  mutable bool HasVariableLengthEncodings = false;
 
   void ReadRegAltNameIndices() const;
   void ReadInstructions() const;
@@ -209,6 +210,9 @@ class CodeGenTarget {
   }
   inst_iterator inst_end() const { return getInstructionsByEnumValue().end(); }
 
+  /// Return whether instructions have variable length encodings on this target.
+  bool hasVariableLengthEncodings() const { return HasVariableLengthEncodings; }
+
   /// isLittleEndianEncoding - are instruction bit patterns defined as  [0..n]?
   ///
   bool isLittleEndianEncoding() const;
diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp
index 27ff84bce4058e..88f24523813828 100644
--- a/llvm/utils/TableGen/DecoderEmitter.cpp
+++ b/llvm/utils/TableGen/DecoderEmitter.cpp
@@ -2499,8 +2499,8 @@ void DecoderEmitter::run(raw_ostream &o) {
   const auto &NumberedInstructions = Target.getInstructionsByEnumValue();
   NumberedEncodings.reserve(NumberedInstructions.size());
   for (const auto &NumberedInstruction : NumberedInstructions) {
-    if (const RecordVal *RV =
-            NumberedInstruction->TheDef->getValue("EncodingInfos")) {
+    const Record *InstDef = NumberedInstruction->TheDef;
+    if (const RecordVal *RV = InstDef->getValue("EncodingInfos")) {
       if (DefInit *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
         EncodingInfoByHwMode EBM(DI->getDef(), HWM);
         for (auto &KV : EBM)
@@ -2513,12 +2513,11 @@ void DecoderEmitter::run(raw_ostream &o) {
     // This instruction is encoded the same on all HwModes. Emit it for all
     // HwModes by default, otherwise leave it in a single common table.
     if (DecoderEmitterSuppressDuplicates) {
-      NumberedEncodings.emplace_back(NumberedInstruction->TheDef,
-                                     NumberedInstruction, "AllModes");
+      NumberedEncodings.emplace_back(InstDef, NumberedInstruction, "AllModes");
     } else {
       for (StringRef HwModeName : HwModeNames)
-        NumberedEncodings.emplace_back(NumberedInstruction->TheDef,
-                                       NumberedInstruction, HwModeName);
+        NumberedEncodings.emplace_back(InstDef, NumberedInstruction,
+                                       HwModeName);
     }
   }
   for (const auto &NumberedAlias :
@@ -2531,12 +2530,7 @@ void DecoderEmitter::run(raw_ostream &o) {
       OpcMap;
   std::map<unsigned, std::vector<OperandInfo>> Operands;
   std::vector<unsigned> InstrLen;
-
-  bool IsVarLenInst =
-      any_of(NumberedInstructions, [](const CodeGenInstruction *CGI) {
-        RecordVal *RV = CGI->TheDef->getValue("Inst");
-        return RV && isa<DagInit>(RV->getValue());
-      });
+  bool IsVarLenInst = Target.hasVariableLengthEncodings();
   unsigned MaxInstLen = 0;
 
   for (unsigned i = 0; i < NumberedEncodings.size(); ++i) {



More information about the llvm-commits mailing list