[llvm] [TableGen] Optimize intrinsic info type signature encoding (PR #106809)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 2 06:48:28 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-ir
Author: Rahul Joshi (jurahul)
<details>
<summary>Changes</summary>
Change the "fixed encoding" table used for encoding intrinsic type signature to use 16-bit encoding as opposed to 32-bit.
This results in both space and time improvements. For space, the total static storage size of this info reduces by 50%.
Currently measured data is as follows:
- Current size = 14193*4 (Fixed table) + 16058 + 3 (Long Encoding Table) = 72833 bytes.
- New size = 14193*2 (Fixed table) + 19879 + 3 (Long Encoding Table) = 48268 bytes.
- Reduction = 50.9%
For time, with the attached benchmark, we see a 7.3% speedup in `GetIntrinsicInfoTableEntries` benchmark. Actual output of the benchmark in included in the GitHub MR.
---
Full diff: https://github.com/llvm/llvm-project/pull/106809.diff
4 Files Affected:
- (modified) llvm/benchmarks/CMakeLists.txt (+1)
- (added) llvm/benchmarks/GetIntrinsicInfoTableEntriesBM.cpp (+31)
- (modified) llvm/lib/IR/Function.cpp (+8-6)
- (modified) llvm/utils/TableGen/IntrinsicEmitter.cpp (+46-32)
``````````diff
diff --git a/llvm/benchmarks/CMakeLists.txt b/llvm/benchmarks/CMakeLists.txt
index e3366e6f3ffe19..aa0cb777733441 100644
--- a/llvm/benchmarks/CMakeLists.txt
+++ b/llvm/benchmarks/CMakeLists.txt
@@ -6,3 +6,4 @@ add_benchmark(DummyYAML DummyYAML.cpp PARTIAL_SOURCES_INTENDED)
add_benchmark(xxhash xxhash.cpp PARTIAL_SOURCES_INTENDED)
add_benchmark(GetIntrinsicForClangBuiltin GetIntrinsicForClangBuiltin.cpp PARTIAL_SOURCES_INTENDED)
add_benchmark(FormatVariadicBM FormatVariadicBM.cpp PARTIAL_SOURCES_INTENDED)
+add_benchmark(GetIntrinsicInfoTableEntriesBM GetIntrinsicInfoTableEntriesBM.cpp PARTIAL_SOURCES_INTENDED)
diff --git a/llvm/benchmarks/GetIntrinsicInfoTableEntriesBM.cpp b/llvm/benchmarks/GetIntrinsicInfoTableEntriesBM.cpp
new file mode 100644
index 00000000000000..2854bacc5ab094
--- /dev/null
+++ b/llvm/benchmarks/GetIntrinsicInfoTableEntriesBM.cpp
@@ -0,0 +1,31 @@
+//===- GetIntrinsicInfoTableEntries.cpp - IIT signature benchmark ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "benchmark/benchmark.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Intrinsics.h"
+#include <variant>
+
+using namespace llvm;
+using namespace Intrinsic;
+
+static void BM_GetIntrinsicInfoTableEntries(benchmark::State &state) {
+ SmallVector<IITDescriptor> Table;
+ for (auto _ : state) {
+ for (ID ID = 1; ID < num_intrinsics; ++ID) {
+ // This makes sure the vector does not keep growing, as well as after the
+ // first iteration does not result in additional allocations.
+ Table.clear();
+ getIntrinsicInfoTableEntries(ID, Table);
+ }
+ }
+}
+
+BENCHMARK(BM_GetIntrinsicInfoTableEntries);
+
+BENCHMARK_MAIN();
diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp
index 69520fdb03dc7c..afef8930669e84 100644
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@@ -1381,22 +1381,24 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
void Intrinsic::getIntrinsicInfoTableEntries(ID id,
SmallVectorImpl<IITDescriptor> &T){
+ static_assert(sizeof(IIT_Table[0]) == 2,
+ "Expect 16-bit entries in IIT_Table");
// Check to see if the intrinsic's type was expressible by the table.
- unsigned TableVal = IIT_Table[id-1];
+ uint16_t TableVal = IIT_Table[id - 1];
// Decode the TableVal into an array of IITValues.
- SmallVector<unsigned char, 8> IITValues;
+ SmallVector<unsigned char> IITValues;
ArrayRef<unsigned char> IITEntries;
unsigned NextElt = 0;
- if ((TableVal >> 31) != 0) {
+ if (TableVal >> 15) {
// This is an offset into the IIT_LongEncodingTable.
IITEntries = IIT_LongEncodingTable;
// Strip sentinel bit.
- NextElt = (TableVal << 1) >> 1;
+ NextElt = TableVal & 0x7fff;
} else {
- // Decode the TableVal into an array of IITValues. If the entry was encoded
- // into a single word in the table itself, decode it now.
+ // If the entry was encoded into a single word in the table itself, decode
+ // it from an array of nibbles to an array of bytes.
do {
IITValues.push_back(TableVal & 0xF);
TableVal >>= 4;
diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp
index 09eb1ed5e1863b..3dd4767e965364 100644
--- a/llvm/utils/TableGen/IntrinsicEmitter.cpp
+++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp
@@ -61,6 +61,7 @@ class IntrinsicEmitter {
void EmitIntrinsicToOverloadTable(const CodeGenIntrinsicTable &Ints,
raw_ostream &OS);
void EmitGenerator(const CodeGenIntrinsicTable &Ints, raw_ostream &OS);
+
void EmitAttributes(const CodeGenIntrinsicTable &Ints, raw_ostream &OS);
void EmitIntrinsicToBuiltinMap(const CodeGenIntrinsicTable &Ints,
bool IsClang, raw_ostream &OS);
@@ -282,11 +283,34 @@ static TypeSigTy ComputeTypeSignature(const CodeGenIntrinsic &Int) {
return TypeSig;
}
+// Pack the type signature into 32-bit fixed encoding word.
+std::optional<unsigned> encodePacked(const TypeSigTy &TypeSig) {
+ if (TypeSig.size() > 8)
+ return std::nullopt;
+
+ unsigned Result = 0;
+ for (unsigned char C : reverse(TypeSig)) {
+ if (C > 15)
+ return std::nullopt;
+ Result = (Result << 4) | C;
+ }
+ return Result;
+}
+
void IntrinsicEmitter::EmitGenerator(const CodeGenIntrinsicTable &Ints,
raw_ostream &OS) {
- // If we can compute a 32-bit fixed encoding for this intrinsic, do so and
+ // Note: the code below can be switched to use 32-bit fixed encoding by
+ // flipping the flag below.
+ constexpr bool Use16BitFixedEncoding = true;
+ using EncodingTy =
+ std::conditional_t<Use16BitFixedEncoding, uint16_t, unsigned>;
+ const unsigned Mask = Use16BitFixedEncoding ? 0x7FFF : 0x7FFFFFFF;
+ const unsigned MSBPostion = Use16BitFixedEncoding ? 15 : 31;
+ StringRef TypeName = Use16BitFixedEncoding ? "uint16_t" : "unsigned";
+
+ // If we can compute a 16/32-bit fixed encoding for this intrinsic, do so and
// capture it in this vector, otherwise store a ~0U.
- std::vector<unsigned> FixedEncodings;
+ std::vector<EncodingTy> FixedEncodings;
SequenceToOffsetTable<TypeSigTy> LongEncodingTable;
FixedEncodings.reserve(Ints.size());
@@ -296,69 +320,59 @@ void IntrinsicEmitter::EmitGenerator(const CodeGenIntrinsicTable &Ints,
// Get the signature for the intrinsic.
TypeSigTy TypeSig = ComputeTypeSignature(Int);
- // Check to see if we can encode it into a 32-bit word. We can only encode
- // 8 nibbles into a 32-bit word.
- if (TypeSig.size() <= 8) {
- // Attempt to pack elements of TypeSig into a 32-bit word, starting from
- // the most significant nibble.
- unsigned Result = 0;
- bool Failed = false;
- for (unsigned char C : reverse(TypeSig)) {
- if (C > 15) {
- Failed = true;
- break;
- }
- Result = (Result << 4) | C;
- }
-
- // If this could be encoded into a 31-bit word, return it.
- if (!Failed && (Result >> 31) == 0) {
- FixedEncodings.push_back(Result);
- continue;
- }
+ // Check to see if we can encode it into a 16/32 bit word.
+ std::optional<unsigned> Result = encodePacked(TypeSig);
+ if (Result && (*Result & Mask) == Result) {
+ FixedEncodings.push_back(static_cast<EncodingTy>(*Result));
+ continue;
}
- // Otherwise, we're going to unique the sequence into the
- // LongEncodingTable, and use its offset in the 32-bit table instead.
LongEncodingTable.add(TypeSig);
// This is a placehold that we'll replace after the table is laid out.
- FixedEncodings.push_back(~0U);
+ FixedEncodings.push_back(static_cast<EncodingTy>(~0U));
}
LongEncodingTable.layout();
- OS << R"(// Global intrinsic function declaration type table.
+ OS << formatv(R"(// Global intrinsic function declaration type table.
#ifdef GET_INTRINSIC_GENERATOR_GLOBAL
-static constexpr unsigned IIT_Table[] = {
- )";
+static constexpr {0} IIT_Table[] = {{
+ )",
+ TypeName);
+ unsigned MaxOffset = 0;
for (auto [Idx, FixedEncoding, Int] : enumerate(FixedEncodings, Ints)) {
if ((Idx & 7) == 7)
OS << "\n ";
// If the entry fit in the table, just emit it.
- if (FixedEncoding != ~0U) {
+ if ((FixedEncoding & Mask) == FixedEncoding) {
OS << "0x" << Twine::utohexstr(FixedEncoding) << ", ";
continue;
}
TypeSigTy TypeSig = ComputeTypeSignature(Int);
+ unsigned Offset = LongEncodingTable.get(TypeSig);
+ MaxOffset = std::max(MaxOffset, Offset);
// Otherwise, emit the offset into the long encoding table. We emit it this
// way so that it is easier to read the offset in the .def file.
- OS << "(1U<<31) | " << LongEncodingTable.get(TypeSig) << ", ";
+ OS << formatv("(1U<<{0}) | {1}, ", MSBPostion, Offset);
}
OS << "0\n};\n\n";
+ // verify that all offsets will fit in 16/32 bits.
+ if ((MaxOffset & Mask) != MaxOffset)
+ PrintFatalError("Offset of long encoding table exceeds encoding bits");
+
// Emit the shared table of register lists.
OS << "static constexpr unsigned char IIT_LongEncodingTable[] = {\n";
if (!LongEncodingTable.empty())
LongEncodingTable.emit(
OS, [](raw_ostream &OS, unsigned char C) { OS << (unsigned)C; });
- OS << " 255\n};\n\n";
-
+ OS << " 255\n};\n";
OS << "#endif\n\n"; // End of GET_INTRINSIC_GENERATOR_GLOBAL
}
``````````
</details>
https://github.com/llvm/llvm-project/pull/106809
More information about the llvm-commits
mailing list