[llvm] POC for intrinsic with underscore in name (PR #164662)
    Rahul Joshi via llvm-commits 
    llvm-commits at lists.llvm.org
       
    Wed Oct 22 10:50:14 PDT 2025
    
    
  
https://github.com/jurahul updated https://github.com/llvm/llvm-project/pull/164662
>From 7b59746110456be554f2095119d059fa90f54bb6 Mon Sep 17 00:00:00 2001
From: Rahul Joshi <rjoshi at nvidia.com>
Date: Wed, 22 Oct 2025 10:10:34 -0700
Subject: [PATCH] POC for intrinsic with underscore in name
---
 llvm/include/llvm/IR/Intrinsics.td            |  3 +-
 .../TableGen/intrinsic-duplicate-enum-name.td |  9 ++
 .../TableGen/Basic/CodeGenIntrinsics.cpp      | 96 ++++++++++++++++---
 llvm/utils/TableGen/Basic/CodeGenIntrinsics.h |  2 +-
 .../GlobalISel/GlobalISelMatchTable.cpp       |  4 +-
 .../utils/TableGen/SearchableTableEmitter.cpp |  2 +-
 6 files changed, 99 insertions(+), 17 deletions(-)
 create mode 100644 llvm/test/TableGen/intrinsic-duplicate-enum-name.td
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index e6cce9a4eea1d..939e34b80bb8a 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -1876,8 +1876,7 @@ def int_fptosi_sat : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty]>
 
 // Clear cache intrinsic, default to ignore (ie. emit nothing)
 // maps to void __clear_cache() on supporting platforms
-def int_clear_cache : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty],
-                                [], "llvm.clear_cache">;
+def int_clear__cache : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty]>;
 
 // Intrinsic to detect whether its argument is a constant.
 def int_is_constant : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty],
diff --git a/llvm/test/TableGen/intrinsic-duplicate-enum-name.td b/llvm/test/TableGen/intrinsic-duplicate-enum-name.td
new file mode 100644
index 0000000000000..b091f9cd588dc
--- /dev/null
+++ b/llvm/test/TableGen/intrinsic-duplicate-enum-name.td
@@ -0,0 +1,9 @@
+// RUN: not llvm-tblgen -gen-intrinsic-impl -I %p/../../include %s -DTEST_INTRINSICS_SUPPRESS_DEFS 2>&1 | FileCheck %s -DFILE=%s
+
+include "llvm/IR/Intrinsics.td"
+
+def int_x__y_z : Intrinsic<[llvm_anyint_ty], [], []>;
+
+// CHECK: [[FILE]]:[[@LINE+2]]:5: error: `Intrinsic::x_y_z` is already defined
+// CHECK: [[FILE]]:[[@LINE-3]]:5: note: Previous definition here
+def int_x_y_z : Intrinsic<[llvm_anyint_ty], [], []>;
diff --git a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp
index be7537c83da3a..cc3b48649f020 100644
--- a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp
+++ b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp
@@ -90,15 +90,44 @@ void CodeGenIntrinsicTable::CheckDuplicateIntrinsics() const {
       [](const CodeGenIntrinsic &Int1, const CodeGenIntrinsic &Int2) {
         return Int1.Name == Int2.Name;
       });
-  if (I == Intrinsics.end())
-    return;
+  if (I != Intrinsics.end()) {
+    // Found 2 intrinsics with same name.
+    const CodeGenIntrinsic &First = *I;
+    const CodeGenIntrinsic &Second = *(I + 1);
+    PrintError(Second.TheDef,
+               Twine("Intrinsic `") + First.Name + "` is already defined");
+    PrintFatalNote(First.TheDef, "Previous definition here");
+  }
 
-  // Found a duplicate intrinsics.
-  const CodeGenIntrinsic &First = *I;
-  const CodeGenIntrinsic &Second = *(I + 1);
-  PrintError(Second.TheDef,
-             Twine("Intrinsic `") + First.Name + "` is already defined");
-  PrintFatalNote(First.TheDef, "Previous definition here");
+  // Now detect intrinsics that may have the same enum name. For that, we first
+  // sort the intrinsics by their enum name.
+  std::vector<const CodeGenIntrinsic *> SortedByEnumName;
+  SortedByEnumName.reserve(size());
+  for (const CodeGenIntrinsic &Int : Intrinsics)
+    SortedByEnumName.push_back(&Int);
+
+  llvm::sort(SortedByEnumName, [](const CodeGenIntrinsic *LHS,
+                                  const CodeGenIntrinsic *RHS) {
+    // To ensure deterministic sorted order when duplicates are
+    // present, use record ID as a tie-breaker
+    unsigned LhsID = LHS->TheDef->getID();
+    unsigned RhsID = RHS->TheDef->getID();
+    return std::tie(LHS->EnumName, LhsID) < std::tie(RHS->EnumName, RhsID);
+  });
+  auto J = std::adjacent_find(
+      SortedByEnumName.begin(), SortedByEnumName.end(),
+      [](const CodeGenIntrinsic *Int1, const CodeGenIntrinsic *Int2) {
+        return Int1->EnumName == Int2->EnumName;
+      });
+
+  if (J != SortedByEnumName.end()) {
+    // Found 2 intrinsics with same enum name.
+    const CodeGenIntrinsic *First = *J;
+    const CodeGenIntrinsic *Second = *(J + 1);
+    PrintError(Second->TheDef, Twine("`Intrinsic::") + First->EnumName +
+                                   "` is already defined");
+    PrintFatalNote(First->TheDef, "Previous definition here");
+  }
 }
 
 // For target independent intrinsics, check that their second dotted component
@@ -257,6 +286,24 @@ const CodeGenIntrinsic &CodeGenIntrinsicMap::operator[](const Record *Record) {
   return *Iter->second;
 }
 
+// Sanitize the intrinsic name by replacing each _ pair with a single _ and
+// optionally each single _ (in the original input string) with .
+static void sanitizeName(std::string &Name, bool ReplaceSingleUnderscore) {
+  size_t Next = 0;
+  for (size_t I = 0, E = Name.size(); I < E; ++I) {
+    if (Name[I] == '_' && I + 1 < E && Name[I + 1] == '_') {
+      Name[Next++] = '_';
+      // Skip over both the _s.
+      ++I;
+    } else if (ReplaceSingleUnderscore && Name[I] == '_') {
+      Name[Next++] = '.';
+    } else {
+      Name[Next++] = Name[I];
+    }
+  }
+  Name = Name.substr(0, Next);
+}
+
 CodeGenIntrinsic::CodeGenIntrinsic(const Record *R,
                                    const CodeGenIntrinsicContext &Ctx)
     : TheDef(R) {
@@ -267,7 +314,7 @@ CodeGenIntrinsic::CodeGenIntrinsic(const Record *R,
     PrintFatalError(DefLoc,
                     "Intrinsic '" + DefName + "' does not start with 'int_'!");
 
-  EnumName = DefName.substr(4);
+  EnumName = DefName.substr(4).str();
 
   // Ignore a missing ClangBuiltinName field.
   ClangBuiltinName =
@@ -278,16 +325,43 @@ CodeGenIntrinsic::CodeGenIntrinsic(const Record *R,
   TargetPrefix = R->getValueAsString("TargetPrefix");
   Name = R->getValueAsString("LLVMName").str();
 
+  // Note, we only sanitize __ in intrinsic names and not their C++ enum names.
+  // The rationale is that if we sanitize enum names as well (by just replacing
+  // _ pairs with _) we may get conflicting enum names for different record
+  // names which is not desirable. For example:
+  //
+  //                 Enum Name      Enum Name     Intrinsic Name
+  //                 (Sanitized)    (Original)
+  //
+  //  int_x__y_z     x_y_z          x__y_z        llvm.x_y.z
+  //  int_x_y_z      x_y_z          x_y_z         llvm.x.y.z
+  //
+  // So with no enum name sanitization, two different record names will not
+  // conflicts in both enum names and intrinsic names. The side-effect is that
+  // intrinsics like int_clear_cache will need to be named int_clear__cache to
+  // have their default name be "llvm.clear_cache" but then their intrisnic name
+  // will change to "Intrinsic::clear__cache".
+
+  // Alternatively, we do sanitize the enum name (which preserved a lot of
+  // existing names), but then detect the cases where 2 different records may
+  // end up generating the same enum name. This/ can be done by extending
+  // CheckDuplicateIntrinsics() to detect duplicated enum names as well and
+  // fail if that happens.
+  // Note: (Implementing this option).
+
   if (Name == "") {
     // If an explicit name isn't specified, derive one from the DefName.
-    Name = "llvm." + EnumName.str();
-    llvm::replace(Name, '_', '.');
+    Name = "llvm." + EnumName;
+    sanitizeName(Name, /*ReplaceSingleUnderscore*/ true);
   } else {
     // Verify it starts with "llvm.".
     if (!StringRef(Name).starts_with("llvm."))
       PrintFatalError(DefLoc, "Intrinsic '" + DefName +
                                   "'s name does not start with 'llvm.'!");
   }
+  // Sanitize the enum name by just replacing each pair of _ with a single _.
+  // That way, most existing intrinsic names stay the same.
+  sanitizeName(EnumName, /*ReplaceSingleUnderscore*/ false);
 
   // If TargetPrefix is specified, make sure that Name starts with
   // "llvm.<targetprefix>.".
diff --git a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.h b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.h
index 2e86149514f46..d73aa89d076de 100644
--- a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.h
+++ b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.h
@@ -35,7 +35,7 @@ struct CodeGenIntrinsicContext {
 struct CodeGenIntrinsic {
   const Record *TheDef; // The actual record defining this intrinsic.
   std::string Name;     // The name of the LLVM function "llvm.bswap.i32"
-  StringRef EnumName;   // The name of the enum "bswap_i32"
+  std::string EnumName; // The name of the enum "bswap_i32"
   StringRef ClangBuiltinName; // Name of the corresponding GCC builtin, or "".
   StringRef MSBuiltinName;    // Name of the corresponding MS builtin, or "".
   StringRef TargetPrefix;     // Target prefix, e.g. "ppc" for t-s intrinsics.
diff --git a/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp b/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp
index 5d49715879280..91a54aa933f6e 100644
--- a/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp
+++ b/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp
@@ -1364,7 +1364,7 @@ void IntrinsicIDOperandMatcher::emitPredicateOpcodes(MatchTable &Table,
   Table << MatchTable::Opcode("GIM_CheckIntrinsicID")
         << MatchTable::Comment("MI") << MatchTable::ULEB128Value(InsnVarID)
         << MatchTable::Comment("Op") << MatchTable::ULEB128Value(OpIdx)
-        << MatchTable::NamedValue(2, "Intrinsic::" + II->EnumName.str())
+        << MatchTable::NamedValue(2, "Intrinsic::" + II->EnumName)
         << MatchTable::LineBreak;
 }
 
@@ -2180,7 +2180,7 @@ void IntrinsicIDRenderer::emitRenderOpcodes(MatchTable &Table,
                                             RuleMatcher &Rule) const {
   Table << MatchTable::Opcode("GIR_AddIntrinsicID") << MatchTable::Comment("MI")
         << MatchTable::ULEB128Value(InsnID)
-        << MatchTable::NamedValue(2, "Intrinsic::" + II->EnumName.str())
+        << MatchTable::NamedValue(2, "Intrinsic::" + II->EnumName)
         << MatchTable::LineBreak;
 }
 
diff --git a/llvm/utils/TableGen/SearchableTableEmitter.cpp b/llvm/utils/TableGen/SearchableTableEmitter.cpp
index d17d90b452bd7..295be9d54aef6 100644
--- a/llvm/utils/TableGen/SearchableTableEmitter.cpp
+++ b/llvm/utils/TableGen/SearchableTableEmitter.cpp
@@ -137,7 +137,7 @@ class SearchableTableEmitter {
     if (const auto *BI = dyn_cast<BitInit>(I))
       return BI->getValue() ? "true" : "false";
     if (Field.IsIntrinsic)
-      return "Intrinsic::" + getIntrinsic(I).EnumName.str();
+      return "Intrinsic::" + getIntrinsic(I).EnumName;
     if (Field.IsInstruction)
       return I->getAsString();
     if (Field.Enum) {
    
    
More information about the llvm-commits
mailing list