[llvm] [TableGen] Efficiency improvements for encoding HwMode collection. (PR #82902)

Jason Eckhardt via llvm-commits llvm-commits at lists.llvm.org
Sat Feb 24 18:37:42 PST 2024


https://github.com/nvjle created https://github.com/llvm/llvm-project/pull/82902

Currently the DecoderEmitter spends a fair amount of cycles performing repeated linear walks over the entire instruction list. This patch eliminates one such walk during HwMode collection for EncodingInfos.

The eliminated traversal visits every instruction and then every EncodingInfos entry for that instruction merely to collect all referenced HwModes. That information already happens to be present in the HwModeSelects created during the one-time construction of CodeGenHwModes. We instead traverse the HwModeSelects, collecting each one referenced as an encoding select. This set is a small constant in size and does not generally grow with the size of the instruction set.

>From d1fb89829a64bcea660dbf982305a6649519f5f5 Mon Sep 17 00:00:00 2001
From: Jason Eckhardt <jeckhardt at nvidia.com>
Date: Sat, 24 Feb 2024 20:15:06 -0600
Subject: [PATCH] [TableGen] Efficiency improvements for encoding HwMode
 collection.

Currently the DecoderEmitter spends a fair amount of cycles performing
repeated linear walks over the entire instruction list. This patch
eliminates one such walk during HwMode collection for EncodingInfos.

The eliminated traversal visits every instruction and then every
EncodingInfos entry for that instruction merely to collect all referenced
HwModes. That information already happens to be present in the HwModeSelects
created during the one-time construction of CodeGenHwModes. We instead
traverse the HwModeSelects, collecting each one referenced as an encoding
select. This set is a small constant in size and does not generally grow
with the size of the instruction set.
---
 llvm/utils/TableGen/CodeGenHwModes.h   |  3 ++
 llvm/utils/TableGen/DecoderEmitter.cpp | 45 ++++++++++++++------------
 2 files changed, 28 insertions(+), 20 deletions(-)

diff --git a/llvm/utils/TableGen/CodeGenHwModes.h b/llvm/utils/TableGen/CodeGenHwModes.h
index 9a5b7a8c2c1c9d..56639f741ede11 100644
--- a/llvm/utils/TableGen/CodeGenHwModes.h
+++ b/llvm/utils/TableGen/CodeGenHwModes.h
@@ -53,6 +53,9 @@ struct CodeGenHwModes {
     return Modes[Id - 1];
   }
   const HwModeSelect &getHwModeSelect(Record *R) const;
+  const std::map<Record *, HwModeSelect> &getHwModeSelects() const {
+    return ModeSelects;
+  }
   unsigned getNumModeIds() const { return Modes.size() + 1; }
   void dump() const;
 
diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp
index 36f437f02cf514..4ce5a73d775668 100644
--- a/llvm/utils/TableGen/DecoderEmitter.cpp
+++ b/llvm/utils/TableGen/DecoderEmitter.cpp
@@ -22,6 +22,7 @@
 #include "llvm/ADT/CachedHashString.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
@@ -2448,6 +2449,22 @@ static void emitCheck(formatted_raw_ostream &OS) {
      << "}\n\n";
 }
 
+// Collect all HwModes referenced by the target for encoding purposes,
+// returning a vector of corresponding names.
+static void
+collectHwModesReferencedForEncodings(const CodeGenHwModes &HWM,
+                                     std::vector<StringRef> &Names) {
+  SmallBitVector BV(HWM.getNumModeIds());
+  for (const auto &MS : HWM.getHwModeSelects()) {
+    for (const HwModeSelect::PairType &P : MS.second.Items) {
+      if (P.second->isSubClassOf("InstructionEncoding"))
+        BV.set(P.first);
+    }
+  }
+  transform(BV.set_bits(), std::back_inserter(Names),
+            [&HWM](const int &M) { return HWM.getMode(M).Name; });
+}
+
 // Emits disassembler code for instruction decoding.
 void DecoderEmitter::run(raw_ostream &o) {
   formatted_raw_ostream OS(o);
@@ -2469,37 +2486,25 @@ void DecoderEmitter::run(raw_ostream &o) {
   Target.reverseBitsForLittleEndianEncoding();
 
   // Parameterize the decoders based on namespace and instruction width.
-  std::set<StringRef> HwModeNames;
-  const auto &NumberedInstructions = Target.getInstructionsByEnumValue();
-  NumberedEncodings.reserve(NumberedInstructions.size());
-  // First, collect all HwModes referenced by the target.
-  for (const auto &NumberedInstruction : NumberedInstructions) {
-    if (const RecordVal *RV =
-            NumberedInstruction->TheDef->getValue("EncodingInfos")) {
-      if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
-        const CodeGenHwModes &HWM = Target.getHwModes();
-        EncodingInfoByHwMode EBM(DI->getDef(), HWM);
-        for (auto &KV : EBM)
-          HwModeNames.insert(HWM.getMode(KV.first).Name);
-      }
-    }
-  }
 
+  // First, collect all encoding-related HwModes referenced by the target.
   // If HwModeNames is empty, add the empty string so we always have one HwMode.
+  const CodeGenHwModes &HWM = Target.getHwModes();
+  std::vector<StringRef> HwModeNames;
+  collectHwModesReferencedForEncodings(HWM, HwModeNames);
   if (HwModeNames.empty())
-    HwModeNames.insert("");
+    HwModeNames.push_back("");
 
+  const auto &NumberedInstructions = Target.getInstructionsByEnumValue();
+  NumberedEncodings.reserve(NumberedInstructions.size());
   for (const auto &NumberedInstruction : NumberedInstructions) {
     if (const RecordVal *RV =
             NumberedInstruction->TheDef->getValue("EncodingInfos")) {
       if (DefInit *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
-        const CodeGenHwModes &HWM = Target.getHwModes();
         EncodingInfoByHwMode EBM(DI->getDef(), HWM);
-        for (auto &KV : EBM) {
+        for (auto &KV : EBM)
           NumberedEncodings.emplace_back(KV.second, NumberedInstruction,
                                          HWM.getMode(KV.first).Name);
-          HwModeNames.insert(HWM.getMode(KV.first).Name);
-        }
         continue;
       }
     }



More information about the llvm-commits mailing list