[llvm] [TableGen] Optimize intrinsic info type signature encoding (PR #106809)

Fri Aug 30 16:23:00 PDT 2024

https://github.com/jurahul created https://github.com/llvm/llvm-project/pull/106809

Add intrinsic emitter option to change the "fixed encoding" table used for encoding intrinsic type signature to use 16-bit encoding as opposed to 32-bit.

To better segragate LLVM Core from this encoding detail, add a function `decodeIITFixedEncoding` to the emitted code to decode the fixed encoding. This allows TableGen intrinsic emitter to choose 16 or 32-bit fixed encoding withot changing the LLVM code.

When using 16-bit encoding, we seem to reduce the total static storage size of this info by 50%. Currently measure data is as follows

- Current size = 14193*4 + 16058 + 3 = 72833 bytes.
- New size = 14193*2 + 19879 + 3 = 48268 bytes.
- Reduction = 50.9%

>From f40f7e97534cb1ca8f3f8babc03b2bdf05f05ace Mon Sep 17 00:00:00 2001
From: Rahul Joshi <rjoshi at nvidia.com>
Date: Fri, 30 Aug 2024 16:05:20 -0700
Subject: [PATCH] [TableGen] Optimize intrinsic info type signature encoding

Add intrinsic emitter option to change the "fixed encoding" table used
for encoding intrinsic type signature to use 16-bit encoding as opposed
to 32-bit.

To better segragate LLVM Core from this encoding detail, add a function
`decodeIITFixedEncoding` to the emitted code to decode the fixed
encoding. This allows TableGen intrinsic emitter to choose 16 or 32-bit
fixed encoding withot changing the LLVM code.

When using 16-bit encoding, we seem to reduce the total static storage
size of this info by 50%. Currently measure data is as follows

- Current size = 14193*4 + 16058 + 3 = 72833 bytes.
- New size = 14193*2 + 19879 + 3 = 48268 bytes.
- Reduction = 50.9%
---
 llvm/lib/IR/Function.cpp                 |  24 ++---
 llvm/test/TableGen/intrinsic-attrs.td    |  17 ++--
 llvm/utils/TableGen/IntrinsicEmitter.cpp | 121 +++++++++++++++--------
 3 files changed, 95 insertions(+), 67 deletions(-)

diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp
index 69520fdb03dc7c..b798eb08bf222a 100644
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@@ -72,6 +72,7 @@
 #include <cstdint>
 #include <cstring>
 #include <string>
+#include <variant>
 
 using namespace llvm;
 using ProfileCount = Function::ProfileCount;
@@ -1381,28 +1382,15 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
 
 void Intrinsic::getIntrinsicInfoTableEntries(ID id,
                                              SmallVectorImpl<IITDescriptor> &T){
-  // Check to see if the intrinsic's type was expressible by the table.
-  unsigned TableVal = IIT_Table[id-1];
-
-  // Decode the TableVal into an array of IITValues.
-  SmallVector<unsigned char, 8> IITValues;
+  std::variant<SmallVector<unsigned char, 8>, unsigned> Decode =
+      decodeIITFixedEncoding(id);
   ArrayRef<unsigned char> IITEntries;
   unsigned NextElt = 0;
-  if ((TableVal >> 31) != 0) {
-    // This is an offset into the IIT_LongEncodingTable.
+  if (unsigned *LongEncodingOffset = std::get_if<1>(&Decode)) {
     IITEntries = IIT_LongEncodingTable;
-
-    // Strip sentinel bit.
-    NextElt = (TableVal << 1) >> 1;
+    NextElt = *LongEncodingOffset;
   } else {
-    // Decode the TableVal into an array of IITValues.  If the entry was encoded
-    // into a single word in the table itself, decode it now.
-    do {
-      IITValues.push_back(TableVal & 0xF);
-      TableVal >>= 4;
-    } while (TableVal);
-
-    IITEntries = IITValues;
+    IITEntries = std::get<0>(Decode);
     NextElt = 0;
   }
 
diff --git a/llvm/test/TableGen/intrinsic-attrs.td b/llvm/test/TableGen/intrinsic-attrs.td
index 29e8cb1e89bb01..5913cbb302d132 100644
--- a/llvm/test/TableGen/intrinsic-attrs.td
+++ b/llvm/test/TableGen/intrinsic-attrs.td
@@ -1,6 +1,6 @@
 // RUN: llvm-tblgen -gen-intrinsic-impl -I %p/../../include %s | FileCheck %s
 
-// Get the minimum blurb necessary to process ...
+// Get the minimum blurb necessary to process.
 include "llvm/CodeGen/ValueTypes.td"
 include "llvm/CodeGen/SDNodeProperties.td"
 
@@ -33,21 +33,18 @@ class Dereferenceable<AttrIndex idx, int bytes> : IntrinsicProperty {
 
 class Intrinsic<list<LLVMType> ret_types,
                 list<LLVMType> param_types = [],
-                list<IntrinsicProperty> intr_properties = [],
-                string name = "",
-                list<SDNodeProperty> sd_properties = [],
-                bit disable_default_attributes = 0> : SDPatternOperator {
-  string LLVMName = name;
+                list<IntrinsicProperty> intr_properties = []> 
+                : SDPatternOperator {
+  string LLVMName = "";
   string TargetPrefix = "";
   list<LLVMType> RetTypes = ret_types;
   list<LLVMType> ParamTypes = param_types;
   list<IntrinsicProperty> IntrProperties = intr_properties;
-  let Properties = sd_properties;
+  let Properties = [];
   bit DisableDefaultAttributes = 1;
-
-
+  list<list<int>> TypeSig = [[]];
   bit isTarget = 0;
-  bit DisableDefaultAttributes = disable_default_attributes;
+  bit DisableDefaultAttributes = 0;
 }
 
 // ... this intrinsic.
diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp
index 70ccecf7752af7..6f7f136708f30a 100644
--- a/llvm/utils/TableGen/IntrinsicEmitter.cpp
+++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp
@@ -37,11 +37,17 @@
 #include <vector>
 using namespace llvm;
 
-static cl::OptionCategory GenIntrinsicCat("Options for -gen-intrinsic-enums");
+static cl::OptionCategory EnumsCat("Options for -gen-intrinsic-enums");
 static cl::opt<std::string>
     IntrinsicPrefix("intrinsic-prefix",
                     cl::desc("Generate intrinsics with this target prefix"),
-                    cl::value_desc("target prefix"), cl::cat(GenIntrinsicCat));
+                    cl::value_desc("target prefix"), cl::cat(EnumsCat));
+
+static cl::OptionCategory ImplCat("Options for -gen-intrinsic-impl");
+static cl::opt<bool> Use16BitFixedEncoding(
+    "iit-16bit-fixed",
+    cl::desc("Use 16-bit fixed encoding for intrinsic info table"),
+    cl::init(false), cl::cat(ImplCat));
 
 namespace {
 class IntrinsicEmitter {
@@ -60,7 +66,9 @@ class IntrinsicEmitter {
                                 raw_ostream &OS);
   void EmitIntrinsicToOverloadTable(const CodeGenIntrinsicTable &Ints,
                                     raw_ostream &OS);
+  template <bool Use16BitFixedEncoding>
   void EmitGenerator(const CodeGenIntrinsicTable &Ints, raw_ostream &OS);
+
   void EmitAttributes(const CodeGenIntrinsicTable &Ints, raw_ostream &OS);
   void EmitIntrinsicToBuiltinMap(const CodeGenIntrinsicTable &Ints,
                                  bool IsClang, raw_ostream &OS);
@@ -104,7 +112,10 @@ void IntrinsicEmitter::run(raw_ostream &OS, bool Enums) {
     EmitIntrinsicToOverloadTable(Ints, OS);
 
     // Emit the intrinsic declaration generator.
-    EmitGenerator(Ints, OS);
+    if (Use16BitFixedEncoding)
+      EmitGenerator<true>(Ints, OS);
+    else
+      EmitGenerator<false>(Ints, OS);
 
     // Emit the intrinsic parameter attributes.
     EmitAttributes(Ints, OS);
@@ -273,20 +284,40 @@ using TypeSigTy = SmallVector<unsigned char>;
 /// Computes type signature of the intrinsic \p Int.
 static TypeSigTy ComputeTypeSignature(const CodeGenIntrinsic &Int) {
   TypeSigTy TypeSig;
-  if (const auto *R = Int.TheDef->getValue("TypeSig")) {
-    for (const auto *a : cast<ListInit>(R->getValue())->getValues()) {
-      for (const auto *b : cast<ListInit>(a)->getValues())
-        TypeSig.emplace_back(cast<IntInit>(b)->getValue());
-    }
+  const auto *R = Int.TheDef->getValue("TypeSig");
+  for (const auto *a : cast<ListInit>(R->getValue())->getValues()) {
+    for (const auto *b : cast<ListInit>(a)->getValues())
+      TypeSig.emplace_back(cast<IntInit>(b)->getValue());
   }
   return TypeSig;
 }
 
+// Pack the type signature into 32-bit fixed encoding word.
+std::optional<unsigned> encodePacked(const TypeSigTy &TypeSig) {
+  if (TypeSig.size() > 8)
+    return std::nullopt;
+
+  unsigned Result = 0;
+  for (unsigned char C : reverse(TypeSig)) {
+    if (C > 15)
+      return std::nullopt;
+    Result = (Result << 4) | C;
+  }
+  return Result;
+}
+
+template <bool Use16BitFixedEncoding>
 void IntrinsicEmitter::EmitGenerator(const CodeGenIntrinsicTable &Ints,
                                      raw_ostream &OS) {
-  // If we can compute a 32-bit fixed encoding for this intrinsic, do so and
+  using EncodingTy =
+      std::conditional_t<Use16BitFixedEncoding, uint16_t, unsigned>;
+  const unsigned Mask = Use16BitFixedEncoding ? 0x7FFF : 0x7FFFFFFF;
+  const unsigned MSBPostion = Use16BitFixedEncoding ? 15 : 31;
+  StringRef TypeName = Use16BitFixedEncoding ? "uint16_t" : "unsigned";
+
+  // If we can compute a 16/32-bit fixed encoding for this intrinsic, do so and
   // capture it in this vector, otherwise store a ~0U.
-  std::vector<unsigned> FixedEncodings;
+  std::vector<EncodingTy> FixedEncodings;
   SequenceToOffsetTable<TypeSigTy> LongEncodingTable;
 
   FixedEncodings.reserve(Ints.size());
@@ -296,49 +327,37 @@ void IntrinsicEmitter::EmitGenerator(const CodeGenIntrinsicTable &Ints,
     // Get the signature for the intrinsic.
     TypeSigTy TypeSig = ComputeTypeSignature(Int);
 
-    // Check to see if we can encode it into a 32-bit word. We can only encode
-    // 8 nibbles into a 32-bit word.
-    if (TypeSig.size() <= 8) {
-      // Attempt to pack elements of TypeSig into a 32-bit word, starting from
-      // the most significant nibble.
-      unsigned Result = 0;
-      bool Failed = false;
-      for (unsigned char C : reverse(TypeSig)) {
-        if (C > 15) {
-          Failed = true;
-          break;
-        }
-        Result = (Result << 4) | C;
-      }
-
-      // If this could be encoded into a 31-bit word, return it.
-      if (!Failed && (Result >> 31) == 0) {
-        FixedEncodings.push_back(Result);
-        continue;
-      }
+    // Check to see if we can encode it into a 16/32 bit word.
+    std::optional<unsigned> Result = encodePacked(TypeSig);
+    if (Result && (*Result & Mask) == Result) {
+      FixedEncodings.push_back(static_cast<EncodingTy>(*Result));
+      continue;
     }
 
-    // Otherwise, we're going to unique the sequence into the
-    // LongEncodingTable, and use its offset in the 32-bit table instead.
     LongEncodingTable.add(TypeSig);
 
     // This is a placehold that we'll replace after the table is laid out.
-    FixedEncodings.push_back(~0U);
+    FixedEncodings.push_back(static_cast<EncodingTy>(~0U));
   }
 
   LongEncodingTable.layout();
 
-  OS << R"(// Global intrinsic function declaration type table.
+  // verify that all offsets will fit in 16/32 bits.
+  if (LongEncodingTable.size() > Mask + 1)
+    PrintFatalError("Offset of long encoding table exceeds encoding bits");
+
+  OS << formatv(R"(// Global intrinsic function declaration type table.
 #ifdef GET_INTRINSIC_GENERATOR_GLOBAL
-static constexpr unsigned IIT_Table[] = {
-  )";
+static constexpr {0} IIT_Table[{1}] = {{
+  )",
+                TypeName, Ints.size() + 1);
 
   for (auto [Idx, FixedEncoding, Int] : enumerate(FixedEncodings, Ints)) {
     if ((Idx & 7) == 7)
       OS << "\n  ";
 
     // If the entry fit in the table, just emit it.
-    if (FixedEncoding != ~0U) {
+    if ((FixedEncoding & Mask) == FixedEncoding) {
       OS << "0x" << Twine::utohexstr(FixedEncoding) << ", ";
       continue;
     }
@@ -347,7 +366,8 @@ static constexpr unsigned IIT_Table[] = {
 
     // Otherwise, emit the offset into the long encoding table.  We emit it this
     // way so that it is easier to read the offset in the .def file.
-    OS << "(1U<<31) | " << LongEncodingTable.get(TypeSig) << ", ";
+    OS << formatv("(1U<<{0}) | {1}, ", MSBPostion,
+                  LongEncodingTable.get(TypeSig));
   }
 
   OS << "0\n};\n\n";
@@ -357,7 +377,30 @@ static constexpr unsigned IIT_Table[] = {
   if (!LongEncodingTable.empty())
     LongEncodingTable.emit(
         OS, [](raw_ostream &OS, unsigned char C) { OS << (unsigned)C; });
-  OS << "  255\n};\n\n";
+  OS << "  255\n};\n";
+
+  // Also emit a function to decode the fixed encoding.
+  OS << formatv(R"(
+// Returns either the decoded fixed encoding, or the offset into the long
+// encoding table for an intrinsic.
+static std::variant<SmallVector<unsigned char, 8>, unsigned>
+decodeIITFixedEncoding(Intrinsic::ID id) {{
+  {0} TableValue = IIT_Table[id-1];
+  if (TableValue >> {1}) {{
+    // This is an offset into the IIT_LongEncodingTable. Clear the MSB.
+    return static_cast<unsigned>(TableValue & {2:x});
+  }
+
+  // Fixed encoding.
+  SmallVector<unsigned char, 8> IITValues;
+  do {{
+    IITValues.push_back(TableValue & 0xF);
+    TableValue >>= 4;
+  } while (TableValue);
+  return IITValues;
+}
+)",
+                TypeName, MSBPostion, Mask);
 
   OS << "#endif\n\n"; // End of GET_INTRINSIC_GENERATOR_GLOBAL
 }