[llvm] [NFC] [Docs] Document IIT encoding flow for intrinsic type signatures (PR #185453)

Dharuni R Acharya via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 19 06:59:34 PDT 2026


https://github.com/DharuniRAcharya updated https://github.com/llvm/llvm-project/pull/185453

>From 6a143ed86fa37bdac0ff66d385aa4b5b53a56007 Mon Sep 17 00:00:00 2001
From: Dharuni R Acharya <dharuniracharya at gmail.com>
Date: Mon, 9 Mar 2026 16:22:09 +0000
Subject: [PATCH 1/6] [Docs] Document IIT encoding flow for intrinsic type
 signatures

This patch adds comments describing how intrinsic type signatures are
encoded into IIT tables at TableGen time and decoded at runtime.
Since this code is frequently encountered while working with intrinsics,
documenting the overall flow makes it easier to understand and navigate.

Signed-off-by: Dharuni R Acharya <dharuniracharya at gmail.com>
---
 llvm/include/llvm/IR/Intrinsics.td            | 23 +++++++++++++++++++
 llvm/lib/IR/Intrinsics.cpp                    |  4 ++++
 .../utils/TableGen/Basic/IntrinsicEmitter.cpp | 11 +++++++++
 3 files changed, 38 insertions(+)

diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 5b5fffaa48951..9acb5d12c28ca 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -231,6 +231,21 @@ def IntrNoCreateUndefOrPoison : IntrinsicProperty;
 //===----------------------------------------------------------------------===//
 // IIT constants and utils
 //===----------------------------------------------------------------------===//
+// IIT (Intrinsic Information Table) Encoding
+// Each intrinsic's type signature is encoded at build time and decoded at
+// runtime via three cooperating components:
+// Token definitions: IIT_* records below assign a numeric code to each type or 
+// type modifier.
+// TypeInfoGen<> walks each Intrinsic<> record and builds a flat byte
+// sequence 'TypeSig' of IIT_* codes: return types first, then params.
+// Encoding: For each intrinsic ComputeTypeSignature() in IntrinsicEmitter.cpp 
+// gets TypeSig and encodePacked() encodes TypeSig into packed nibbles. 
+// The encodings are stored in two generated tables inside IntrinsicImpl.inc 
+// by EmitGenerator(). 
+// Decoding: getIntrinsicInfoTableEntries() in Intrinsics.cpp reads IIT_Table[id-1], 
+// checks the MSB to choose the path, unpacks the byte stream, and calls 
+// DecodeIITType() to build the IITDescriptor vector consumed by getType() and 
+// matchIntrinsicSignature().
 
 // llvm::Intrinsic::IITDescriptor::ArgKind::AK_%
 def ArgKind {
@@ -684,6 +699,14 @@ class TypeInfoGen<
   list<LLVMType> Types = !foreach(ty, AllTypes,
     !if(!isa<LLVMMatchType>(ty), ACTys[MappingRIdxs[ty.Number]], ty));
 
+  // TypeSig layout:
+  //   - If the intrinsic returns multiple values (struct return), the first code
+  //     is IIT_STRUCT followed by a count byte, then the element types.
+  //   - If it returns a single void, the first code is IIT_Done (0).
+  //   - Otherwise the first code(s) describe the single return type.
+  //   - Parameter type codes follow in order.
+  //   - The sequence is implicitly terminated: in LongEncodingTable by a 0 byte
+  //     (IIT_Done), and in the fixed encoding by the natural end of nibbles.
   list<int> TypeSig = !listflatten(!listconcat(
     [!cond(
       !eq(!size(RetTypes), 0): [IIT_Done.Number],
diff --git a/llvm/lib/IR/Intrinsics.cpp b/llvm/lib/IR/Intrinsics.cpp
index 186bd1edb8c52..0ba9f47afb216 100644
--- a/llvm/lib/IR/Intrinsics.cpp
+++ b/llvm/lib/IR/Intrinsics.cpp
@@ -349,6 +349,9 @@ DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
         IITDescriptor::get(IITDescriptor::Pointer, Infos[NextElt++]));
     return;
   case IIT_ARG: {
+    // IIT_ARG is the primary token for overloaded intrinsics, each "any"-typed
+    // parameter or return type is encoded as IIT_ARG + ArgInfo.
+    // ArgInfo byte: bits[4:0] = argument index, bits[7:5] = argument kind.
     unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
     OutputTable.push_back(IITDescriptor::get(IITDescriptor::Argument, ArgInfo));
     return;
@@ -433,6 +436,7 @@ DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
 #define GET_INTRINSIC_GENERATOR_GLOBAL
 #include "llvm/IR/IntrinsicImpl.inc"
 
+// Decode the IIT encoding for intrinsic \p id into \p T.
 void Intrinsic::getIntrinsicInfoTableEntries(
     ID id, SmallVectorImpl<IITDescriptor> &T) {
   // Note that `FixedEncodingTy` is defined in IntrinsicImpl.inc and can be
diff --git a/llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp b/llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp
index 5a2b2f89d5582..77175c22f7829 100644
--- a/llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp
+++ b/llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp
@@ -334,6 +334,17 @@ static std::optional<uint32_t> encodePacked(const TypeSigTy &TypeSig) {
   return Result;
 }
 
+/// Emit IIT_Table[] and IIT_LongEncodingTable[] into \p OS
+/// (included via GET_INTRINSIC_GENERATOR_GLOBAL in Intrinsics.cpp).
+/// TypeInfoGen<> in Intrinsics.td builds the TypeSig list, 
+/// which IntrinsicEmitter.cpp packs using encodePacked(). 
+/// Check the MSB of the IIT_Table entry to determine the following:
+///   Fixed (MSB=0): all IIT codes < 16 and nibble-packed value fits in
+///     FixedEncodingTy with MSB clear. Stored directly in IIT_Table[].
+///   Long (MSB=1): any IIT code >= 16, or packed value would set the MSB.
+///     Byte sequence (0-terminated) appended to IIT_LongEncodingTable[], a
+///     SequenceToOffsetTable that deduplicates shared suffixes. IIT_Table[]
+///     stores (offset | MSB_sentinel).
 void IntrinsicEmitter::EmitGenerator(const CodeGenIntrinsicTable &Ints,
                                      raw_ostream &OS) {
   // Note: the code below can be switched to use 32-bit fixed encoding by

>From f86cd5fedfdb05d3ec9d574caa0c9bfe1751eb08 Mon Sep 17 00:00:00 2001
From: Dharuni R Acharya <dharuniracharya at gmail.com>
Date: Mon, 9 Mar 2026 16:34:15 +0000
Subject: [PATCH 2/6] Fix formatting

---
 llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp b/llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp
index 77175c22f7829..f9fcbd44bf186 100644
--- a/llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp
+++ b/llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp
@@ -336,8 +336,8 @@ static std::optional<uint32_t> encodePacked(const TypeSigTy &TypeSig) {
 
 /// Emit IIT_Table[] and IIT_LongEncodingTable[] into \p OS
 /// (included via GET_INTRINSIC_GENERATOR_GLOBAL in Intrinsics.cpp).
-/// TypeInfoGen<> in Intrinsics.td builds the TypeSig list, 
-/// which IntrinsicEmitter.cpp packs using encodePacked(). 
+/// TypeInfoGen<> in Intrinsics.td builds the TypeSig list,
+/// which IntrinsicEmitter.cpp packs using encodePacked().
 /// Check the MSB of the IIT_Table entry to determine the following:
 ///   Fixed (MSB=0): all IIT codes < 16 and nibble-packed value fits in
 ///     FixedEncodingTy with MSB clear. Stored directly in IIT_Table[].

>From 696ad7bf9d7c1395faa6f83548f25784c47276bd Mon Sep 17 00:00:00 2001
From: Dharuni R Acharya <dharuniracharya at gmail.com>
Date: Thu, 12 Mar 2026 07:22:47 +0000
Subject: [PATCH 3/6] New Changes

---
 llvm/include/llvm/IR/Intrinsics.td            | 49 +++++++++++++------
 .../utils/TableGen/Basic/IntrinsicEmitter.cpp | 12 +++--
 2 files changed, 42 insertions(+), 19 deletions(-)

diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 9acb5d12c28ca..3be0f28944f13 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -232,20 +232,37 @@ def IntrNoCreateUndefOrPoison : IntrinsicProperty;
 // IIT constants and utils
 //===----------------------------------------------------------------------===//
 // IIT (Intrinsic Information Table) Encoding
-// Each intrinsic's type signature is encoded at build time and decoded at
-// runtime via three cooperating components:
-// Token definitions: IIT_* records below assign a numeric code to each type or 
-// type modifier.
+// Each intrinsic's type signature is encoded at build time (when tblgen
+// converts .td files to .inc) and decoded at runtime (when LLVM compiles IR)
+// via 3 cooperating components:
+//
+// 1. Token definitions: IIT_* records below assign a numeric code to each type 
+// or type modifier. (e.g. IIT_I32 = 4, IIT_I64 = 5; see defs below)
 // TypeInfoGen<> walks each Intrinsic<> record and builds a flat byte
 // sequence 'TypeSig' of IIT_* codes: return types first, then params.
-// Encoding: For each intrinsic ComputeTypeSignature() in IntrinsicEmitter.cpp 
-// gets TypeSig and encodePacked() encodes TypeSig into packed nibbles. 
-// The encodings are stored in two generated tables inside IntrinsicImpl.inc 
-// by EmitGenerator(). 
-// Decoding: getIntrinsicInfoTableEntries() in Intrinsics.cpp reads IIT_Table[id-1], 
-// checks the MSB to choose the path, unpacks the byte stream, and calls 
-// DecodeIITType() to build the IITDescriptor vector consumed by getType() and 
-// matchIntrinsicSignature().
+// For example, an intrinsic returning i64 and taking (i32, i32, i8) as
+// input params gives:
+// TypeSig = [IIT_I64, IIT_I32, IIT_I32, IIT_I8, IIT_Done]
+// TypeSig = [5, 4, 4, 2, 0]
+// 
+// 2. Encoding: For each intrinsic ComputeTypeSignature() in IntrinsicEmitter.cpp
+// gets TypeSig and encodePacked() encodes TypeSig into packed nibbles.
+// The encodings are stored in 2 generated tables inside IntrinsicImpl.inc
+// by EmitGenerator(): (Refer IntrinsicEmitter.cpp for more details).
+//  IIT_Table[]             – one entry per intrinsic ID (uint16_t or uint32_t)
+//  IIT_LongEncodingTable[] – shared byte array for signatures that don't fit
+//                            in a single IIT_Table word.
+// The example above packs LSB-first to 0x2445 (bit 15 = 0) in uint16_t and
+// stores in IIT_Table[].
+// 
+// 3. Decoding: getIntrinsicInfoTableEntries() in Intrinsics.cpp reads
+// IIT_Table[id-1]. 
+// The MSB selects one of the tables accordingly, unpacks the byte stream, and
+// calls DecodeIITType() to build the IITDescriptor vector. Each IITDescriptor
+// entry describes one type position in the signature. For the example above,
+// the result is [Integer(64), Integer(32), Integer(32), Integer(8)], 
+// which is used by getType() to reconstruct the intrinsic's FunctionType and
+// by matchIntrinsicSignature() to verify call sites at runtime.  
 
 // llvm::Intrinsic::IITDescriptor::ArgKind::AK_%
 def ArgKind {
@@ -699,13 +716,13 @@ class TypeInfoGen<
   list<LLVMType> Types = !foreach(ty, AllTypes,
     !if(!isa<LLVMMatchType>(ty), ACTys[MappingRIdxs[ty.Number]], ty));
 
-  // TypeSig layout:
-  //   - If the intrinsic returns multiple values (struct return), the first code
-  //     is IIT_STRUCT followed by a count byte, then the element types.
+  // TypeSig layout (See an example in comments above):
+  //   - If the intrinsic returns multiple values (struct return), the first 
+  //     code is IIT_STRUCT followed by a count byte, then the element types.
   //   - If it returns a single void, the first code is IIT_Done (0).
   //   - Otherwise the first code(s) describe the single return type.
   //   - Parameter type codes follow in order.
-  //   - The sequence is implicitly terminated: in LongEncodingTable by a 0 byte
+  //   - The sequence is implicitly terminated: in LongEncodingTable by 0 byte
   //     (IIT_Done), and in the fixed encoding by the natural end of nibbles.
   list<int> TypeSig = !listflatten(!listconcat(
     [!cond(
diff --git a/llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp b/llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp
index f9fcbd44bf186..1b7e338eaba78 100644
--- a/llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp
+++ b/llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp
@@ -341,10 +341,16 @@ static std::optional<uint32_t> encodePacked(const TypeSigTy &TypeSig) {
 /// Check the MSB of the IIT_Table entry to determine the following:
 ///   Fixed (MSB=0): all IIT codes < 16 and nibble-packed value fits in
 ///     FixedEncodingTy with MSB clear. Stored directly in IIT_Table[].
-///   Long (MSB=1): any IIT code >= 16, or packed value would set the MSB.
+///   Long (MSB=1): any IIT code >= 16, or packed value would set the MSB i.e.,
+///     (the highest nibble's value is >= 8, which would be misread as a
+///     long-table offset at runtime).
 ///     Byte sequence (0-terminated) appended to IIT_LongEncodingTable[], a
-///     SequenceToOffsetTable that deduplicates shared suffixes. IIT_Table[]
-///     stores (offset | MSB_sentinel).
+///     SequenceToOffsetTable<> that lays all sequences contiguously in one
+///     byte array and assigns each a start offset.
+///     Sequences sharing a common suffix - for example, two intrinsics both
+///     ending in [..., IIT_I32(4), IIT_I32(4), 0] - overlap in the buffer,
+///     so the shared trailing bytes are stored only once, reducing table size.
+///     IIT_Table[] stores (offset | MSB_sentinel).
 void IntrinsicEmitter::EmitGenerator(const CodeGenIntrinsicTable &Ints,
                                      raw_ostream &OS) {
   // Note: the code below can be switched to use 32-bit fixed encoding by

>From c51beab318109b4b300e4fc943667de7525f76c7 Mon Sep 17 00:00:00 2001
From: Dharuni R Acharya <dharuniracharya at gmail.com>
Date: Fri, 13 Mar 2026 15:55:55 +0000
Subject: [PATCH 4/6] Address Comments

---
 llvm/include/llvm/IR/Intrinsics.td            | 86 ++++++++++++-------
 .../utils/TableGen/Basic/IntrinsicEmitter.cpp | 20 +++--
 2 files changed, 67 insertions(+), 39 deletions(-)

diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 3be0f28944f13..002313d3c57b4 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -237,32 +237,64 @@ def IntrNoCreateUndefOrPoison : IntrinsicProperty;
 // via 3 cooperating components:
 //
 // 1. Token definitions: IIT_* records below assign a numeric code to each type 
-// or type modifier. (e.g. IIT_I32 = 4, IIT_I64 = 5; see defs below)
-// TypeInfoGen<> walks each Intrinsic<> record and builds a flat byte
-// sequence 'TypeSig' of IIT_* codes: return types first, then params.
-// For example, an intrinsic returning i64 and taking (i32, i32, i8) as
-// input params gives:
-// TypeSig = [IIT_I64, IIT_I32, IIT_I32, IIT_I8, IIT_Done]
-// TypeSig = [5, 4, 4, 2, 0]
+//    or type modifier. 
+//    TypeInfoGen<> walks each Intrinsic<> record and builds a flat byte
+//    sequence 'TypeSig' of IIT_* codes: return types first, then params.
+//    Each type position in the signature is encoded as a variable-length byte
+//    sequence. The first byte is always one of the IIT_* opcodes defined below
+//    and determines how subsequent bytes (if any) should be interpreted.
+//
+//    - Simple scalar types are self-contained in a single opcode byte:
+//        i32 -> IIT_I32 (= 4), i64 -> IIT_I64 (= 5)
+//    - Vector types encode the opcode for the vector width, immediately
+//      followed by the recursive encoding of the element type:
+//        <4 x i32>  -> [IIT_V4, IIT_I32] (= 10, 4)
+//    - Overloaded positions are encoded as IIT_ARG (= 15) followed by an
+//      ArgInfo byte where bits[7:5] identify the accepted type family.
+//        llvm_anyint_ty at overload-index 0  -> [IIT_ARG, 0x20]
+//    - For constraints that are dependent on types of other arguments, the
+//      ArgInfo byte encodes both which previously-seen type position this must
+//      match (bits[4:0] = argument index) and how it relates to that type
+//      (bits[7:5] = kind: 0=same, 1=extend, 2=trunc, 3=sext, 4=zext, 5=any, 
+//      6=ptr_to, 7=vec_of). 
+//        LLVMMatchType<0>  -> [IIT_ARG, 0x00]
+//    - Multi-value returns are prefixed with IIT_STRUCT (= 21) and a count 
+//      byte encoding N-2 (minimum 2 fields -> 0, maximum 257 fields -> 255), 
+//      followed by the encoding of each field type:
+//        {i32, i64}  -> [IIT_STRUCT, 0, IIT_I32, IIT_I64]
+//
+//     TypeInfoGen<> concatenates type encodings for all return types first,
+//     then all parameter types in declaration order. 
+//     The return section has three forms:
+//      - Void return:         starts with IIT_Done (= 0).
+//      - Single return:       no prefix; the return type encoding starts directly.
+//      - Multi-value return:  starts with IIT_STRUCT + count byte as above.
+//
+//    For example, intrinsic with i64 return and (i32, i32, i8) parameters:
+//    TypeSig = [IIT_I64(5), IIT_I32(4), IIT_I32(4), IIT_I8(2)]
 // 
-// 2. Encoding: For each intrinsic ComputeTypeSignature() in IntrinsicEmitter.cpp
-// gets TypeSig and encodePacked() encodes TypeSig into packed nibbles.
-// The encodings are stored in 2 generated tables inside IntrinsicImpl.inc
-// by EmitGenerator(): (Refer IntrinsicEmitter.cpp for more details).
-//  IIT_Table[]             – one entry per intrinsic ID (uint16_t or uint32_t)
-//  IIT_LongEncodingTable[] – shared byte array for signatures that don't fit
-//                            in a single IIT_Table word.
-// The example above packs LSB-first to 0x2445 (bit 15 = 0) in uint16_t and
-// stores in IIT_Table[].
+// 2. Encoding Optimization: For each intrinsic ComputeTypeSignature() in 
+//    IntrinsicEmitter.cpp gets TypeSig byte sequence.
+//    EmitGenerator() then decides how to store this sequence in the generated
+//    tables in IntrinsicImpl.inc file. If the sequence is short and all IIT
+//    codes are less than 16, encodePacked() compresses it into packed nibbles
+//    and stores it directly in the IIT_Table entry for that intrinsic.
+//    Otherwise the full byte sequence is placed in IIT_LongEncodingTable[],
+//    and IIT_Table[] stores an offset into that table.
+//
+//    The example above (TypeSig [5,4,4,2]) packs to 0x2445 with bit 15 = 0,
+//    so it is inlined directly into IIT_Table[]. (Refer to EmitGenerator() in
+//    IntrinsicEmitter.cpp for the full details.)
 // 
 // 3. Decoding: getIntrinsicInfoTableEntries() in Intrinsics.cpp reads
-// IIT_Table[id-1]. 
-// The MSB selects one of the tables accordingly, unpacks the byte stream, and
-// calls DecodeIITType() to build the IITDescriptor vector. Each IITDescriptor
-// entry describes one type position in the signature. For the example above,
-// the result is [Integer(64), Integer(32), Integer(32), Integer(8)], 
-// which is used by getType() to reconstruct the intrinsic's FunctionType and
-// by matchIntrinsicSignature() to verify call sites at runtime.  
+//    IIT_Table[id-1]. 
+//    The MSB selects one of the tables accordingly, unpacks the byte stream,
+//    and calls DecodeIITType() to build the IITDescriptor vector. Each
+//    IITDescriptor entry describes one type position in the signature. 
+//    For the example above, the result is [Integer(64), Integer(32),  
+//    Integer(32), Integer(8)], which is used by getType() to reconstruct the
+//    intrinsic's FunctionType and by matchIntrinsicSignature() to verify 
+//    call sites at runtime.
 
 // llvm::Intrinsic::IITDescriptor::ArgKind::AK_%
 def ArgKind {
@@ -716,14 +748,6 @@ class TypeInfoGen<
   list<LLVMType> Types = !foreach(ty, AllTypes,
     !if(!isa<LLVMMatchType>(ty), ACTys[MappingRIdxs[ty.Number]], ty));
 
-  // TypeSig layout (See an example in comments above):
-  //   - If the intrinsic returns multiple values (struct return), the first 
-  //     code is IIT_STRUCT followed by a count byte, then the element types.
-  //   - If it returns a single void, the first code is IIT_Done (0).
-  //   - Otherwise the first code(s) describe the single return type.
-  //   - Parameter type codes follow in order.
-  //   - The sequence is implicitly terminated: in LongEncodingTable by 0 byte
-  //     (IIT_Done), and in the fixed encoding by the natural end of nibbles.
   list<int> TypeSig = !listflatten(!listconcat(
     [!cond(
       !eq(!size(RetTypes), 0): [IIT_Done.Number],
diff --git a/llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp b/llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp
index 1b7e338eaba78..bc81dec0bbd6d 100644
--- a/llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp
+++ b/llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp
@@ -336,21 +336,25 @@ static std::optional<uint32_t> encodePacked(const TypeSigTy &TypeSig) {
 
 /// Emit IIT_Table[] and IIT_LongEncodingTable[] into \p OS
 /// (included via GET_INTRINSIC_GENERATOR_GLOBAL in Intrinsics.cpp).
+///
 /// TypeInfoGen<> in Intrinsics.td builds the TypeSig list,
 /// which IntrinsicEmitter.cpp packs using encodePacked().
-/// Check the MSB of the IIT_Table entry to determine the following:
-///   Fixed (MSB=0): all IIT codes < 16 and nibble-packed value fits in
-///     FixedEncodingTy with MSB clear. Stored directly in IIT_Table[].
-///   Long (MSB=1): any IIT code >= 16, or packed value would set the MSB i.e.,
+/// IIT_Table[] is fixed-width (one FixedEncodingTy entry per intrinsic) to
+/// allow O(1) lookup by intrinsic ID. The MSB of each entry distinguishes
+/// two storage paths.
+///
+///   Fixed (MSB=0)  : TypeSig inlined as nibbles directly in IIT_Table[].
+///   Long  (MSB=1)  : any code >= 16, or inlining would set the MSB.
 ///     (the highest nibble's value is >= 8, which would be misread as a
 ///     long-table offset at runtime).
-///     Byte sequence (0-terminated) appended to IIT_LongEncodingTable[], a
-///     SequenceToOffsetTable<> that lays all sequences contiguously in one
-///     byte array and assigns each a start offset.
+///     Raw bytes stored in IIT_LongEncodingTable[]; IIT_Table[] holds
+///     (offset | MSB_sentinel).
+///     SequenceToOffsetTable<> lays all sequences contiguously in one byte
+///     array and assigns each a start offset. It appends an implicit 0-byte
+///     terminator after each sequence.
 ///     Sequences sharing a common suffix - for example, two intrinsics both
 ///     ending in [..., IIT_I32(4), IIT_I32(4), 0] - overlap in the buffer,
 ///     so the shared trailing bytes are stored only once, reducing table size.
-///     IIT_Table[] stores (offset | MSB_sentinel).
 void IntrinsicEmitter::EmitGenerator(const CodeGenIntrinsicTable &Ints,
                                      raw_ostream &OS) {
   // Note: the code below can be switched to use 32-bit fixed encoding by

>From dab3e607f65a4641eeacf95fbdad7bb6b45b3239 Mon Sep 17 00:00:00 2001
From: Dharuni R Acharya <dharuniracharya at gmail.com>
Date: Wed, 18 Mar 2026 16:03:01 +0000
Subject: [PATCH 5/6] Address Comments

---
 llvm/include/llvm/IR/Intrinsics.td             | 16 ++++++++--------
 llvm/lib/IR/Intrinsics.cpp                     |  2 +-
 llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp |  2 +-
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 002313d3c57b4..644059420b2d5 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -232,6 +232,7 @@ def IntrNoCreateUndefOrPoison : IntrinsicProperty;
 // IIT constants and utils
 //===----------------------------------------------------------------------===//
 // IIT (Intrinsic Information Table) Encoding
+//
 // Each intrinsic's type signature is encoded at build time (when tblgen
 // converts .td files to .inc) and decoded at runtime (when LLVM compiles IR)
 // via 3 cooperating components:
@@ -250,14 +251,13 @@ def IntrNoCreateUndefOrPoison : IntrinsicProperty;
 //      followed by the recursive encoding of the element type:
 //        <4 x i32>  -> [IIT_V4, IIT_I32] (= 10, 4)
 //    - Overloaded positions are encoded as IIT_ARG (= 15) followed by an
-//      ArgInfo byte where bits[7:5] identify the accepted type family.
-//        llvm_anyint_ty at overload-index 0  -> [IIT_ARG, 0x20]
-//    - For constraints that are dependent on types of other arguments, the
-//      ArgInfo byte encodes both which previously-seen type position this must
-//      match (bits[4:0] = argument index) and how it relates to that type
-//      (bits[7:5] = kind: 0=same, 1=extend, 2=trunc, 3=sext, 4=zext, 5=any, 
-//      6=ptr_to, 7=vec_of). 
-//        LLVMMatchType<0>  -> [IIT_ARG, 0x00]
+//      ArgInfo byte where bits[2:0] identify the accepted type family 
+//      (1=AnyInt, 2=AnyFloat, 3=AnyVec, 4=AnyPtr) and bits[7:3] hold
+//      the overload slot index:
+//        llvm_anyint_ty at overload-index 0  -> [IIT_ARG, 0x01]
+//    - Positions whose type must match an overloaded slot use IIT_ARG with
+//      bits[2:0] = 7 (MatchType) and bits[7:3] = the slot index to match:
+//        LLVMMatchType<0>  -> [IIT_ARG, 0x07]
 //    - Multi-value returns are prefixed with IIT_STRUCT (= 21) and a count 
 //      byte encoding N-2 (minimum 2 fields -> 0, maximum 257 fields -> 255), 
 //      followed by the encoding of each field type:
diff --git a/llvm/lib/IR/Intrinsics.cpp b/llvm/lib/IR/Intrinsics.cpp
index 0ba9f47afb216..3ba5251769a7e 100644
--- a/llvm/lib/IR/Intrinsics.cpp
+++ b/llvm/lib/IR/Intrinsics.cpp
@@ -351,7 +351,7 @@ DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
   case IIT_ARG: {
     // IIT_ARG is the primary token for overloaded intrinsics, each "any"-typed
     // parameter or return type is encoded as IIT_ARG + ArgInfo.
-    // ArgInfo byte: bits[4:0] = argument index, bits[7:5] = argument kind.
+    // ArgInfo byte: bits[2:0] = argument kind, bits[7:3] = argument index.
     unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
     OutputTable.push_back(IITDescriptor::get(IITDescriptor::Argument, ArgInfo));
     return;
diff --git a/llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp b/llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp
index bc81dec0bbd6d..d126669a8a5b4 100644
--- a/llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp
+++ b/llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp
@@ -351,7 +351,7 @@ static std::optional<uint32_t> encodePacked(const TypeSigTy &TypeSig) {
 ///     (offset | MSB_sentinel).
 ///     SequenceToOffsetTable<> lays all sequences contiguously in one byte
 ///     array and assigns each a start offset. It appends an implicit 0-byte
-///     terminator after each sequence.
+///     terminator after each sequence (which is the IIT_Done token).
 ///     Sequences sharing a common suffix - for example, two intrinsics both
 ///     ending in [..., IIT_I32(4), IIT_I32(4), 0] - overlap in the buffer,
 ///     so the shared trailing bytes are stored only once, reducing table size.

>From 23d7b5b9abefd34872ebfef1a15637c7e115e39d Mon Sep 17 00:00:00 2001
From: Dharuni R Acharya <dharuniracharya at gmail.com>
Date: Thu, 19 Mar 2026 13:58:58 +0000
Subject: [PATCH 6/6] Add docs for TypeInfoGen and ResolveArgCode

---
 llvm/include/llvm/IR/Intrinsics.td | 41 ++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 644059420b2d5..766e0c20f77ed 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -706,6 +706,47 @@ def llvm_funcref_ty    : LLVMType<funcref>;
 def llvm_exnref_ty     : LLVMType<exnref>;
 
 //===----------------------------------------------------------------------===//
+// TypeInfoGen overview
+//
+// TypeInfoGen builds the TypeSig byte sequence for a single intrinsic by
+// lowering TableGen "Sig" encodings into the compact IIT_* representation
+// used at runtime.
+//
+// The following intermediate fields are constructed:
+//   AllTypes     -  all ret and param types concatenated in declaration order.
+//   ACTys        -  the subset of AllTypes passing the predicate:
+//                   ty.isAny OR isa<LLVMMatchTypeNextArg>(ty)
+//                   These define the overload slots.
+//   ArgCodes     -  ArgKind for each entry in ACTys (e.g. AnyInteger, 
+//                   AnyFloat, ...).
+//   ACIdxs       -  parallel to AllTypes. ACIdxs[i] is the count of
+//                   predicate-true entries seen before position i in AllTypes.
+//                   For a type that passes the predicate, ACIdxs[i] is its
+//                   index in ACTys. For a type that does not pass, ACIdxs[i]
+//                   is still propagated through ResolveArgCode but is not
+//                   consumed. Only used by EncAnyType and EncNextArgA.
+//   MappingRIdxs -  parallel to ACTys. MappingRIdxs[i] is the number of prior
+//                   isAny=true entries, i.e. the overload slot index among
+//                   any-types. Used by EncMatchType to map
+//                   LLVMMatchType<N>.Number (ACTys index) to a slot.
+//
+// With those tables built, TypeSig is assembled by iterating over AllTypes
+// and expanding each type's raw Sig bytes through ResolveArgCode:
+//
+//   ResolveArgCode(Mapping=MappingRIdxs, ArgCodes, ACIdx=ACIdxs[i], ax):
+//     ah = ax & 0xFF00   - identifies which Enc* class produced this byte
+//     al = ax & 0x00FF   - type-specific payload
+//     num = Mapping[al]  - overload slot index via MappingRIdxs
+//     EncAnyType  (0x100): ret = (ACIdx << 3) | al
+//                        - assigns this position to its overload slot with 
+//                          given ArgKind
+//     EncMatchType(0x200): ret = (num << 3) | AK_MatchType(7)
+//                        - al = LLVMMatchType<N>.Number, 
+//                          num = MappingRIdxs[al] = slot index
+//     EncSameWidth(0x300): ret = (num << 3) | ArgCodes[num]
+//     EncNextArgA (0x400): ret = ACIdx
+//     EncNextArgN (0x500): ret = num
+//     Otherwise          : ret = al  (already a final IIT_* code).
 
 class MakeIdx<list<int> Set> {
   list<int> IdxsR = !foreach(i, !range(Set),



More information about the llvm-commits mailing list