[llvm] POC for intrinsic with underscore in name (PR #164662)
Rahul Joshi via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 22 10:12:57 PDT 2025
https://github.com/jurahul created https://github.com/llvm/llvm-project/pull/164662
None
>From ac32e6833a498458ac96704701c4b77408d501d3 Mon Sep 17 00:00:00 2001
From: Rahul Joshi <rjoshi at nvidia.com>
Date: Wed, 22 Oct 2025 10:10:34 -0700
Subject: [PATCH] POC for intrinsic with underscore in name
---
llvm/include/llvm/IR/Intrinsics.td | 4 +-
.../TableGen/intrinsic-duplicate-enum-name.td | 9 ++
.../TableGen/Basic/CodeGenIntrinsics.cpp | 96 ++++++++++++++++---
llvm/utils/TableGen/Basic/CodeGenIntrinsics.h | 2 +-
.../GlobalISel/GlobalISelMatchTable.cpp | 4 +-
.../utils/TableGen/SearchableTableEmitter.cpp | 2 +-
6 files changed, 100 insertions(+), 17 deletions(-)
create mode 100644 llvm/test/TableGen/intrinsic-duplicate-enum-name.td
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index e6cce9a4eea1d..46fe02e075008 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -1876,8 +1876,8 @@ def int_fptosi_sat : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty]>
// Clear cache intrinsic, default to ignore (ie. emit nothing)
// maps to void __clear_cache() on supporting platforms
-def int_clear_cache : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty],
- [], "llvm.clear_cache">;
+def int_clear__cache : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty],
+ []>;
// Intrinsic to detect whether its argument is a constant.
def int_is_constant : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty],
diff --git a/llvm/test/TableGen/intrinsic-duplicate-enum-name.td b/llvm/test/TableGen/intrinsic-duplicate-enum-name.td
new file mode 100644
index 0000000000000..b091f9cd588dc
--- /dev/null
+++ b/llvm/test/TableGen/intrinsic-duplicate-enum-name.td
@@ -0,0 +1,9 @@
+// RUN: not llvm-tblgen -gen-intrinsic-impl -I %p/../../include %s -DTEST_INTRINSICS_SUPPRESS_DEFS 2>&1 | FileCheck %s -DFILE=%s
+
+include "llvm/IR/Intrinsics.td"
+
+def int_x__y_z : Intrinsic<[llvm_anyint_ty], [], []>;
+
+// CHECK: [[FILE]]:[[@LINE+2]]:5: error: `Intrinsic::x_y_z` is already defined
+// CHECK: [[FILE]]:[[@LINE-3]]:5: note: Previous definition here
+def int_x_y_z : Intrinsic<[llvm_anyint_ty], [], []>;
diff --git a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp
index be7537c83da3a..651bb88124b9c 100644
--- a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp
+++ b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp
@@ -90,15 +90,44 @@ void CodeGenIntrinsicTable::CheckDuplicateIntrinsics() const {
[](const CodeGenIntrinsic &Int1, const CodeGenIntrinsic &Int2) {
return Int1.Name == Int2.Name;
});
- if (I == Intrinsics.end())
- return;
+ if (I != Intrinsics.end()) {
+ // Found 2 intrinsics with same name.
+ const CodeGenIntrinsic &First = *I;
+ const CodeGenIntrinsic &Second = *(I + 1);
+ PrintError(Second.TheDef,
+ Twine("Intrinsic `") + First.Name + "` is already defined");
+ PrintFatalNote(First.TheDef, "Previous definition here");
+ }
- // Found a duplicate intrinsics.
- const CodeGenIntrinsic &First = *I;
- const CodeGenIntrinsic &Second = *(I + 1);
- PrintError(Second.TheDef,
- Twine("Intrinsic `") + First.Name + "` is already defined");
- PrintFatalNote(First.TheDef, "Previous definition here");
+ // Now detect intrinsics that may have the same enum name. For that, we first
+ // sort the intrinsics by their enum name.
+ std::vector<const CodeGenIntrinsic *> SortedByEnumName;
+ SortedByEnumName.reserve(size());
+ for (const CodeGenIntrinsic &Int : Intrinsics)
+ SortedByEnumName.push_back(&Int);
+
+ llvm::sort(SortedByEnumName, [](const CodeGenIntrinsic *LHS,
+ const CodeGenIntrinsic *RHS) {
+ // To ensure deterministic sorted order when duplicates are
+ // present, use record ID as a tie-breaker
+ unsigned LhsID = LHS->TheDef->getID();
+ unsigned RhsID = RHS->TheDef->getID();
+ return std::tie(LHS->EnumName, LhsID) < std::tie(RHS->EnumName, RhsID);
+ });
+ auto J = std::adjacent_find(
+ SortedByEnumName.begin(), SortedByEnumName.end(),
+ [](const CodeGenIntrinsic *Int1, const CodeGenIntrinsic *Int2) {
+ return Int1->EnumName == Int2->EnumName;
+ });
+
+ if (J != SortedByEnumName.end()) {
+ // Found 2 intrinsics with same enum name.
+ const CodeGenIntrinsic *First = *J;
+ const CodeGenIntrinsic *Second = *(J + 1);
+ PrintError(Second->TheDef, Twine("`Intrinsic::") + First->EnumName +
+ "` is already defined");
+ PrintFatalNote(First->TheDef, "Previous definition here");
+ }
}
// For target independent intrinsics, check that their second dotted component
@@ -257,6 +286,24 @@ const CodeGenIntrinsic &CodeGenIntrinsicMap::operator[](const Record *Record) {
return *Iter->second;
}
+// Sanitize the intrinsic name by replacing each _ pair with a single _ and
+// optionally each single _ (in the original input string) with .
+static void sanitizeName(std::string &Name, bool ReplaceSingleUnderscore) {
+ size_t Next = 0;
+ for (size_t I = 0, E = Name.size(); I < E;) {
+ if (Name[I] == '_' && I + 1 < E && Name[I + 1] == '_') {
+ Name[Next++] = '_';
+ I += 2;
+ } else if (ReplaceSingleUnderscore && Name[I] == '_') {
+ Name[Next++] = '.';
+ I++;
+ } else {
+ Name[Next++] = Name[I++];
+ }
+ }
+ Name = Name.substr(0, Next);
+}
+
CodeGenIntrinsic::CodeGenIntrinsic(const Record *R,
const CodeGenIntrinsicContext &Ctx)
: TheDef(R) {
@@ -267,7 +314,7 @@ CodeGenIntrinsic::CodeGenIntrinsic(const Record *R,
PrintFatalError(DefLoc,
"Intrinsic '" + DefName + "' does not start with 'int_'!");
- EnumName = DefName.substr(4);
+ EnumName = DefName.substr(4).str();
// Ignore a missing ClangBuiltinName field.
ClangBuiltinName =
@@ -278,16 +325,43 @@ CodeGenIntrinsic::CodeGenIntrinsic(const Record *R,
TargetPrefix = R->getValueAsString("TargetPrefix");
Name = R->getValueAsString("LLVMName").str();
+ // Note, we only sanitize __ in intrinsic names and not their C++ enum names.
+ // The rationale is that if we sanitize enum names as well (by just replacing
+ // _ pairs with _) we may get conflicting enum names for different record
+ // names which is not desirable. For example:
+ //
+ // Enum Name Enum Name Intrinsic Name
+ // (Sanitized) (Original)
+ //
+ // int_x__y_z x_y_z x__y_z llvm.x_y.z
+ // int_x_y_z x_y_z x_y_z llvm.x.y.z
+ //
+ // So with no enum name sanitization, two different record names will not
+ // conflicts in both enum names and intrinsic names. The side-effect is that
+ // intrinsics like int_clear_cache will need to be named int_clear__cache to
+ // have their default name be "llvm.clear_cache" but then their intrisnic name
+ // will change to "Intrinsic::clear__cache".
+
+ // Alternatively, we do sanitize the enum name (which preserved a lot of
+ // existing names), but then detect the cases where 2 different records may
+ // end up generating the same enum name. This/ can be done by extending
+ // CheckDuplicateIntrinsics() to detect duplicated enum names as well and
+ // fail if that happens.
+ // Note: (Implementing this option).
+
if (Name == "") {
// If an explicit name isn't specified, derive one from the DefName.
- Name = "llvm." + EnumName.str();
- llvm::replace(Name, '_', '.');
+ Name = "llvm." + EnumName;
+ sanitizeName(Name, /*ReplaceSingleUnderscore*/ true);
} else {
// Verify it starts with "llvm.".
if (!StringRef(Name).starts_with("llvm."))
PrintFatalError(DefLoc, "Intrinsic '" + DefName +
"'s name does not start with 'llvm.'!");
}
+ // Sanitize the enum name by just replacing each pair of _ with a single _.
+ // That way, most existing intrinsic names stay the same.
+ sanitizeName(EnumName, /*ReplaceSingleUnderscore*/ false);
// If TargetPrefix is specified, make sure that Name starts with
// "llvm.<targetprefix>.".
diff --git a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.h b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.h
index 2e86149514f46..d73aa89d076de 100644
--- a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.h
+++ b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.h
@@ -35,7 +35,7 @@ struct CodeGenIntrinsicContext {
struct CodeGenIntrinsic {
const Record *TheDef; // The actual record defining this intrinsic.
std::string Name; // The name of the LLVM function "llvm.bswap.i32"
- StringRef EnumName; // The name of the enum "bswap_i32"
+ std::string EnumName; // The name of the enum "bswap_i32"
StringRef ClangBuiltinName; // Name of the corresponding GCC builtin, or "".
StringRef MSBuiltinName; // Name of the corresponding MS builtin, or "".
StringRef TargetPrefix; // Target prefix, e.g. "ppc" for t-s intrinsics.
diff --git a/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp b/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp
index 5d49715879280..91a54aa933f6e 100644
--- a/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp
+++ b/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp
@@ -1364,7 +1364,7 @@ void IntrinsicIDOperandMatcher::emitPredicateOpcodes(MatchTable &Table,
Table << MatchTable::Opcode("GIM_CheckIntrinsicID")
<< MatchTable::Comment("MI") << MatchTable::ULEB128Value(InsnVarID)
<< MatchTable::Comment("Op") << MatchTable::ULEB128Value(OpIdx)
- << MatchTable::NamedValue(2, "Intrinsic::" + II->EnumName.str())
+ << MatchTable::NamedValue(2, "Intrinsic::" + II->EnumName)
<< MatchTable::LineBreak;
}
@@ -2180,7 +2180,7 @@ void IntrinsicIDRenderer::emitRenderOpcodes(MatchTable &Table,
RuleMatcher &Rule) const {
Table << MatchTable::Opcode("GIR_AddIntrinsicID") << MatchTable::Comment("MI")
<< MatchTable::ULEB128Value(InsnID)
- << MatchTable::NamedValue(2, "Intrinsic::" + II->EnumName.str())
+ << MatchTable::NamedValue(2, "Intrinsic::" + II->EnumName)
<< MatchTable::LineBreak;
}
diff --git a/llvm/utils/TableGen/SearchableTableEmitter.cpp b/llvm/utils/TableGen/SearchableTableEmitter.cpp
index d17d90b452bd7..295be9d54aef6 100644
--- a/llvm/utils/TableGen/SearchableTableEmitter.cpp
+++ b/llvm/utils/TableGen/SearchableTableEmitter.cpp
@@ -137,7 +137,7 @@ class SearchableTableEmitter {
if (const auto *BI = dyn_cast<BitInit>(I))
return BI->getValue() ? "true" : "false";
if (Field.IsIntrinsic)
- return "Intrinsic::" + getIntrinsic(I).EnumName.str();
+ return "Intrinsic::" + getIntrinsic(I).EnumName;
if (Field.IsInstruction)
return I->getAsString();
if (Field.Enum) {
More information about the llvm-commits
mailing list