[llvm] [Docs] Document IIT encoding flow for intrinsic type signatures (PR #185453)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 9 09:31:58 PDT 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-ir
Author: Dharuni R Acharya (DharuniRAcharya)
<details>
<summary>Changes</summary>
This patch adds comments describing how intrinsic type signatures are
encoded into IIT tables at TableGen time and decoded at runtime.
Since this code is frequently encountered while working with intrinsics,
documenting the overall flow makes it easier to understand and navigate.
---
Full diff: https://github.com/llvm/llvm-project/pull/185453.diff
3 Files Affected:
- (modified) llvm/include/llvm/IR/Intrinsics.td (+23)
- (modified) llvm/lib/IR/Intrinsics.cpp (+4)
- (modified) llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp (+11)
``````````diff
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 5b5fffaa48951..9acb5d12c28ca 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -231,6 +231,21 @@ def IntrNoCreateUndefOrPoison : IntrinsicProperty;
//===----------------------------------------------------------------------===//
// IIT constants and utils
//===----------------------------------------------------------------------===//
+// IIT (Intrinsic Information Table) Encoding
+// Each intrinsic's type signature is encoded at build time and decoded at
+// runtime via three cooperating components:
+// Token definitions: IIT_* records below assign a numeric code to each type or
+// type modifier.
+// TypeInfoGen<> walks each Intrinsic<> record and builds a flat byte
+// sequence 'TypeSig' of IIT_* codes: return types first, then params.
+// Encoding: For each intrinsic ComputeTypeSignature() in IntrinsicEmitter.cpp
+// gets TypeSig and encodePacked() encodes TypeSig into packed nibbles.
+// The encodings are stored in two generated tables inside IntrinsicImpl.inc
+// by EmitGenerator().
+// Decoding: getIntrinsicInfoTableEntries() in Intrinsics.cpp reads IIT_Table[id-1],
+// checks the MSB to choose the path, unpacks the byte stream, and calls
+// DecodeIITType() to build the IITDescriptor vector consumed by getType() and
+// matchIntrinsicSignature().
// llvm::Intrinsic::IITDescriptor::ArgKind::AK_%
def ArgKind {
@@ -684,6 +699,14 @@ class TypeInfoGen<
list<LLVMType> Types = !foreach(ty, AllTypes,
!if(!isa<LLVMMatchType>(ty), ACTys[MappingRIdxs[ty.Number]], ty));
+ // TypeSig layout:
+ // - If the intrinsic returns multiple values (struct return), the first code
+ // is IIT_STRUCT followed by a count byte, then the element types.
+ // - If it returns a single void, the first code is IIT_Done (0).
+ // - Otherwise the first code(s) describe the single return type.
+ // - Parameter type codes follow in order.
+ // - The sequence is implicitly terminated: in LongEncodingTable by a 0 byte
+ // (IIT_Done), and in the fixed encoding by the natural end of nibbles.
list<int> TypeSig = !listflatten(!listconcat(
[!cond(
!eq(!size(RetTypes), 0): [IIT_Done.Number],
diff --git a/llvm/lib/IR/Intrinsics.cpp b/llvm/lib/IR/Intrinsics.cpp
index 186bd1edb8c52..0ba9f47afb216 100644
--- a/llvm/lib/IR/Intrinsics.cpp
+++ b/llvm/lib/IR/Intrinsics.cpp
@@ -349,6 +349,9 @@ DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
IITDescriptor::get(IITDescriptor::Pointer, Infos[NextElt++]));
return;
case IIT_ARG: {
+ // IIT_ARG is the primary token for overloaded intrinsics, each "any"-typed
+ // parameter or return type is encoded as IIT_ARG + ArgInfo.
+ // ArgInfo byte: bits[4:0] = argument index, bits[7:5] = argument kind.
unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Argument, ArgInfo));
return;
@@ -433,6 +436,7 @@ DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
#define GET_INTRINSIC_GENERATOR_GLOBAL
#include "llvm/IR/IntrinsicImpl.inc"
+// Decode the IIT encoding for intrinsic \p id into \p T.
void Intrinsic::getIntrinsicInfoTableEntries(
ID id, SmallVectorImpl<IITDescriptor> &T) {
// Note that `FixedEncodingTy` is defined in IntrinsicImpl.inc and can be
diff --git a/llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp b/llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp
index 5a2b2f89d5582..77175c22f7829 100644
--- a/llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp
+++ b/llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp
@@ -334,6 +334,17 @@ static std::optional<uint32_t> encodePacked(const TypeSigTy &TypeSig) {
return Result;
}
+/// Emit IIT_Table[] and IIT_LongEncodingTable[] into \p OS
+/// (included via GET_INTRINSIC_GENERATOR_GLOBAL in Intrinsics.cpp).
+/// TypeInfoGen<> in Intrinsics.td builds the TypeSig list,
+/// which IntrinsicEmitter.cpp packs using encodePacked().
+/// Check the MSB of the IIT_Table entry to determine the following:
+/// Fixed (MSB=0): all IIT codes < 16 and nibble-packed value fits in
+/// FixedEncodingTy with MSB clear. Stored directly in IIT_Table[].
+/// Long (MSB=1): any IIT code >= 16, or packed value would set the MSB.
+/// Byte sequence (0-terminated) appended to IIT_LongEncodingTable[], a
+/// SequenceToOffsetTable that deduplicates shared suffixes. IIT_Table[]
+/// stores (offset | MSB_sentinel).
void IntrinsicEmitter::EmitGenerator(const CodeGenIntrinsicTable &Ints,
raw_ostream &OS) {
// Note: the code below can be switched to use 32-bit fixed encoding by
``````````
</details>
https://github.com/llvm/llvm-project/pull/185453
More information about the llvm-commits
mailing list