[llvm] [TableGen] Optimize intrinsic info type signature encoding (PR #106809)
Rahul Joshi via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 30 16:23:00 PDT 2024
https://github.com/jurahul created https://github.com/llvm/llvm-project/pull/106809
Add intrinsic emitter option to change the "fixed encoding" table used for encoding intrinsic type signature to use 16-bit encoding as opposed to 32-bit.
To better segragate LLVM Core from this encoding detail, add a function `decodeIITFixedEncoding` to the emitted code to decode the fixed encoding. This allows TableGen intrinsic emitter to choose 16 or 32-bit fixed encoding withot changing the LLVM code.
When using 16-bit encoding, we seem to reduce the total static storage size of this info by 50%. Currently measure data is as follows
- Current size = 14193*4 + 16058 + 3 = 72833 bytes.
- New size = 14193*2 + 19879 + 3 = 48268 bytes.
- Reduction = 50.9%
>From f40f7e97534cb1ca8f3f8babc03b2bdf05f05ace Mon Sep 17 00:00:00 2001
From: Rahul Joshi <rjoshi at nvidia.com>
Date: Fri, 30 Aug 2024 16:05:20 -0700
Subject: [PATCH] [TableGen] Optimize intrinsic info type signature encoding
Add intrinsic emitter option to change the "fixed encoding" table used
for encoding intrinsic type signature to use 16-bit encoding as opposed
to 32-bit.
To better segragate LLVM Core from this encoding detail, add a function
`decodeIITFixedEncoding` to the emitted code to decode the fixed
encoding. This allows TableGen intrinsic emitter to choose 16 or 32-bit
fixed encoding withot changing the LLVM code.
When using 16-bit encoding, we seem to reduce the total static storage
size of this info by 50%. Currently measure data is as follows
- Current size = 14193*4 + 16058 + 3 = 72833 bytes.
- New size = 14193*2 + 19879 + 3 = 48268 bytes.
- Reduction = 50.9%
---
llvm/lib/IR/Function.cpp | 24 ++---
llvm/test/TableGen/intrinsic-attrs.td | 17 ++--
llvm/utils/TableGen/IntrinsicEmitter.cpp | 121 +++++++++++++++--------
3 files changed, 95 insertions(+), 67 deletions(-)
diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp
index 69520fdb03dc7c..b798eb08bf222a 100644
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@@ -72,6 +72,7 @@
#include <cstdint>
#include <cstring>
#include <string>
+#include <variant>
using namespace llvm;
using ProfileCount = Function::ProfileCount;
@@ -1381,28 +1382,15 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
void Intrinsic::getIntrinsicInfoTableEntries(ID id,
SmallVectorImpl<IITDescriptor> &T){
- // Check to see if the intrinsic's type was expressible by the table.
- unsigned TableVal = IIT_Table[id-1];
-
- // Decode the TableVal into an array of IITValues.
- SmallVector<unsigned char, 8> IITValues;
+ std::variant<SmallVector<unsigned char, 8>, unsigned> Decode =
+ decodeIITFixedEncoding(id);
ArrayRef<unsigned char> IITEntries;
unsigned NextElt = 0;
- if ((TableVal >> 31) != 0) {
- // This is an offset into the IIT_LongEncodingTable.
+ if (unsigned *LongEncodingOffset = std::get_if<1>(&Decode)) {
IITEntries = IIT_LongEncodingTable;
-
- // Strip sentinel bit.
- NextElt = (TableVal << 1) >> 1;
+ NextElt = *LongEncodingOffset;
} else {
- // Decode the TableVal into an array of IITValues. If the entry was encoded
- // into a single word in the table itself, decode it now.
- do {
- IITValues.push_back(TableVal & 0xF);
- TableVal >>= 4;
- } while (TableVal);
-
- IITEntries = IITValues;
+ IITEntries = std::get<0>(Decode);
NextElt = 0;
}
diff --git a/llvm/test/TableGen/intrinsic-attrs.td b/llvm/test/TableGen/intrinsic-attrs.td
index 29e8cb1e89bb01..5913cbb302d132 100644
--- a/llvm/test/TableGen/intrinsic-attrs.td
+++ b/llvm/test/TableGen/intrinsic-attrs.td
@@ -1,6 +1,6 @@
// RUN: llvm-tblgen -gen-intrinsic-impl -I %p/../../include %s | FileCheck %s
-// Get the minimum blurb necessary to process ...
+// Get the minimum blurb necessary to process.
include "llvm/CodeGen/ValueTypes.td"
include "llvm/CodeGen/SDNodeProperties.td"
@@ -33,21 +33,18 @@ class Dereferenceable<AttrIndex idx, int bytes> : IntrinsicProperty {
class Intrinsic<list<LLVMType> ret_types,
list<LLVMType> param_types = [],
- list<IntrinsicProperty> intr_properties = [],
- string name = "",
- list<SDNodeProperty> sd_properties = [],
- bit disable_default_attributes = 0> : SDPatternOperator {
- string LLVMName = name;
+ list<IntrinsicProperty> intr_properties = []>
+ : SDPatternOperator {
+ string LLVMName = "";
string TargetPrefix = "";
list<LLVMType> RetTypes = ret_types;
list<LLVMType> ParamTypes = param_types;
list<IntrinsicProperty> IntrProperties = intr_properties;
- let Properties = sd_properties;
+ let Properties = [];
bit DisableDefaultAttributes = 1;
-
-
+ list<list<int>> TypeSig = [[]];
bit isTarget = 0;
- bit DisableDefaultAttributes = disable_default_attributes;
+ bit DisableDefaultAttributes = 0;
}
// ... this intrinsic.
diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp
index 70ccecf7752af7..6f7f136708f30a 100644
--- a/llvm/utils/TableGen/IntrinsicEmitter.cpp
+++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp
@@ -37,11 +37,17 @@
#include <vector>
using namespace llvm;
-static cl::OptionCategory GenIntrinsicCat("Options for -gen-intrinsic-enums");
+static cl::OptionCategory EnumsCat("Options for -gen-intrinsic-enums");
static cl::opt<std::string>
IntrinsicPrefix("intrinsic-prefix",
cl::desc("Generate intrinsics with this target prefix"),
- cl::value_desc("target prefix"), cl::cat(GenIntrinsicCat));
+ cl::value_desc("target prefix"), cl::cat(EnumsCat));
+
+static cl::OptionCategory ImplCat("Options for -gen-intrinsic-impl");
+static cl::opt<bool> Use16BitFixedEncoding(
+ "iit-16bit-fixed",
+ cl::desc("Use 16-bit fixed encoding for intrinsic info table"),
+ cl::init(false), cl::cat(ImplCat));
namespace {
class IntrinsicEmitter {
@@ -60,7 +66,9 @@ class IntrinsicEmitter {
raw_ostream &OS);
void EmitIntrinsicToOverloadTable(const CodeGenIntrinsicTable &Ints,
raw_ostream &OS);
+ template <bool Use16BitFixedEncoding>
void EmitGenerator(const CodeGenIntrinsicTable &Ints, raw_ostream &OS);
+
void EmitAttributes(const CodeGenIntrinsicTable &Ints, raw_ostream &OS);
void EmitIntrinsicToBuiltinMap(const CodeGenIntrinsicTable &Ints,
bool IsClang, raw_ostream &OS);
@@ -104,7 +112,10 @@ void IntrinsicEmitter::run(raw_ostream &OS, bool Enums) {
EmitIntrinsicToOverloadTable(Ints, OS);
// Emit the intrinsic declaration generator.
- EmitGenerator(Ints, OS);
+ if (Use16BitFixedEncoding)
+ EmitGenerator<true>(Ints, OS);
+ else
+ EmitGenerator<false>(Ints, OS);
// Emit the intrinsic parameter attributes.
EmitAttributes(Ints, OS);
@@ -273,20 +284,40 @@ using TypeSigTy = SmallVector<unsigned char>;
/// Computes type signature of the intrinsic \p Int.
static TypeSigTy ComputeTypeSignature(const CodeGenIntrinsic &Int) {
TypeSigTy TypeSig;
- if (const auto *R = Int.TheDef->getValue("TypeSig")) {
- for (const auto *a : cast<ListInit>(R->getValue())->getValues()) {
- for (const auto *b : cast<ListInit>(a)->getValues())
- TypeSig.emplace_back(cast<IntInit>(b)->getValue());
- }
+ const auto *R = Int.TheDef->getValue("TypeSig");
+ for (const auto *a : cast<ListInit>(R->getValue())->getValues()) {
+ for (const auto *b : cast<ListInit>(a)->getValues())
+ TypeSig.emplace_back(cast<IntInit>(b)->getValue());
}
return TypeSig;
}
+// Pack the type signature into 32-bit fixed encoding word.
+std::optional<unsigned> encodePacked(const TypeSigTy &TypeSig) {
+ if (TypeSig.size() > 8)
+ return std::nullopt;
+
+ unsigned Result = 0;
+ for (unsigned char C : reverse(TypeSig)) {
+ if (C > 15)
+ return std::nullopt;
+ Result = (Result << 4) | C;
+ }
+ return Result;
+}
+
+template <bool Use16BitFixedEncoding>
void IntrinsicEmitter::EmitGenerator(const CodeGenIntrinsicTable &Ints,
raw_ostream &OS) {
- // If we can compute a 32-bit fixed encoding for this intrinsic, do so and
+ using EncodingTy =
+ std::conditional_t<Use16BitFixedEncoding, uint16_t, unsigned>;
+ const unsigned Mask = Use16BitFixedEncoding ? 0x7FFF : 0x7FFFFFFF;
+ const unsigned MSBPostion = Use16BitFixedEncoding ? 15 : 31;
+ StringRef TypeName = Use16BitFixedEncoding ? "uint16_t" : "unsigned";
+
+ // If we can compute a 16/32-bit fixed encoding for this intrinsic, do so and
// capture it in this vector, otherwise store a ~0U.
- std::vector<unsigned> FixedEncodings;
+ std::vector<EncodingTy> FixedEncodings;
SequenceToOffsetTable<TypeSigTy> LongEncodingTable;
FixedEncodings.reserve(Ints.size());
@@ -296,49 +327,37 @@ void IntrinsicEmitter::EmitGenerator(const CodeGenIntrinsicTable &Ints,
// Get the signature for the intrinsic.
TypeSigTy TypeSig = ComputeTypeSignature(Int);
- // Check to see if we can encode it into a 32-bit word. We can only encode
- // 8 nibbles into a 32-bit word.
- if (TypeSig.size() <= 8) {
- // Attempt to pack elements of TypeSig into a 32-bit word, starting from
- // the most significant nibble.
- unsigned Result = 0;
- bool Failed = false;
- for (unsigned char C : reverse(TypeSig)) {
- if (C > 15) {
- Failed = true;
- break;
- }
- Result = (Result << 4) | C;
- }
-
- // If this could be encoded into a 31-bit word, return it.
- if (!Failed && (Result >> 31) == 0) {
- FixedEncodings.push_back(Result);
- continue;
- }
+ // Check to see if we can encode it into a 16/32 bit word.
+ std::optional<unsigned> Result = encodePacked(TypeSig);
+ if (Result && (*Result & Mask) == Result) {
+ FixedEncodings.push_back(static_cast<EncodingTy>(*Result));
+ continue;
}
- // Otherwise, we're going to unique the sequence into the
- // LongEncodingTable, and use its offset in the 32-bit table instead.
LongEncodingTable.add(TypeSig);
// This is a placehold that we'll replace after the table is laid out.
- FixedEncodings.push_back(~0U);
+ FixedEncodings.push_back(static_cast<EncodingTy>(~0U));
}
LongEncodingTable.layout();
- OS << R"(// Global intrinsic function declaration type table.
+ // verify that all offsets will fit in 16/32 bits.
+ if (LongEncodingTable.size() > Mask + 1)
+ PrintFatalError("Offset of long encoding table exceeds encoding bits");
+
+ OS << formatv(R"(// Global intrinsic function declaration type table.
#ifdef GET_INTRINSIC_GENERATOR_GLOBAL
-static constexpr unsigned IIT_Table[] = {
- )";
+static constexpr {0} IIT_Table[{1}] = {{
+ )",
+ TypeName, Ints.size() + 1);
for (auto [Idx, FixedEncoding, Int] : enumerate(FixedEncodings, Ints)) {
if ((Idx & 7) == 7)
OS << "\n ";
// If the entry fit in the table, just emit it.
- if (FixedEncoding != ~0U) {
+ if ((FixedEncoding & Mask) == FixedEncoding) {
OS << "0x" << Twine::utohexstr(FixedEncoding) << ", ";
continue;
}
@@ -347,7 +366,8 @@ static constexpr unsigned IIT_Table[] = {
// Otherwise, emit the offset into the long encoding table. We emit it this
// way so that it is easier to read the offset in the .def file.
- OS << "(1U<<31) | " << LongEncodingTable.get(TypeSig) << ", ";
+ OS << formatv("(1U<<{0}) | {1}, ", MSBPostion,
+ LongEncodingTable.get(TypeSig));
}
OS << "0\n};\n\n";
@@ -357,7 +377,30 @@ static constexpr unsigned IIT_Table[] = {
if (!LongEncodingTable.empty())
LongEncodingTable.emit(
OS, [](raw_ostream &OS, unsigned char C) { OS << (unsigned)C; });
- OS << " 255\n};\n\n";
+ OS << " 255\n};\n";
+
+ // Also emit a function to decode the fixed encoding.
+ OS << formatv(R"(
+// Returns either the decoded fixed encoding, or the offset into the long
+// encoding table for an intrinsic.
+static std::variant<SmallVector<unsigned char, 8>, unsigned>
+decodeIITFixedEncoding(Intrinsic::ID id) {{
+ {0} TableValue = IIT_Table[id-1];
+ if (TableValue >> {1}) {{
+ // This is an offset into the IIT_LongEncodingTable. Clear the MSB.
+ return static_cast<unsigned>(TableValue & {2:x});
+ }
+
+ // Fixed encoding.
+ SmallVector<unsigned char, 8> IITValues;
+ do {{
+ IITValues.push_back(TableValue & 0xF);
+ TableValue >>= 4;
+ } while (TableValue);
+ return IITValues;
+}
+)",
+ TypeName, MSBPostion, Mask);
OS << "#endif\n\n"; // End of GET_INTRINSIC_GENERATOR_GLOBAL
}
More information about the llvm-commits
mailing list