[llvm] [LLVM] Change `Intrinsic::getBaseName` to return std::string (PR #111613)

Rahul Joshi via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 8 18:45:49 PDT 2024


https://github.com/jurahul created https://github.com/llvm/llvm-project/pull/111613

Change `Intrinsic::getName()` and `Intrinsic::getBaseName()` to return std::string. This is in preparation for reducing the size of the static intrinsic string table as described in https://discourse.llvm.org/t/rfc-compress-intrinsic-name-table/82412

>From 74b30cd4cb6665dc181aaf55ea0b857dcc3c1358 Mon Sep 17 00:00:00 2001
From: Rahul Joshi <rjoshi at nvidia.com>
Date: Tue, 8 Oct 2024 18:41:40 -0700
Subject: [PATCH] [LLVM] Change `Intrinsic::getBaseName` to return std::string

Change `Intrinsic::getName()` and `Intrinsic::getBaseName()` to return
std::string. This is in preparation for reducing the size of the static
intrinsic string table as described in
https://discourse.llvm.org/t/rfc-compress-intrinsic-name-table/82412
---
 llvm/include/llvm-c/Core.h                    | 21 +++--
 llvm/include/llvm/IR/Intrinsics.h             |  4 +-
 llvm/lib/Analysis/IRSimilarityIdentifier.cpp  |  2 +-
 llvm/lib/CodeGen/ReplaceWithVeclib.cpp        |  2 +-
 llvm/lib/IR/Core.cpp                          | 33 +++++---
 llvm/lib/IR/Intrinsics.cpp                    |  4 +-
 .../Scalar/LowerMatrixIntrinsics.cpp          |  4 +-
 llvm/unittests/IR/IntrinsicsTest.cpp          | 78 +++++++++++++++++++
 8 files changed, 122 insertions(+), 26 deletions(-)

diff --git a/llvm/include/llvm-c/Core.h b/llvm/include/llvm-c/Core.h
index 28dc270ca368d2..d9eafd01efc709 100644
--- a/llvm/include/llvm-c/Core.h
+++ b/llvm/include/llvm-c/Core.h
@@ -2833,11 +2833,17 @@ LLVMTypeRef LLVMIntrinsicGetType(LLVMContextRef Ctx, unsigned ID,
  */
 const char *LLVMIntrinsicGetName(unsigned ID, size_t *NameLength);
 
+/**
+ * Retrieves the name of an intrinsic. The caller is responsible for freeing the
+ * returned string.
+ *
+ * @see llvm::Intrinsic::getName()
+ */
+char *LLVMIntrinsicCopyName(unsigned ID, size_t *NameLength);
+
 /** Deprecated: Use LLVMIntrinsicCopyOverloadedName2 instead. */
-const char *LLVMIntrinsicCopyOverloadedName(unsigned ID,
-                                            LLVMTypeRef *ParamTypes,
-                                            size_t ParamCount,
-                                            size_t *NameLength);
+char *LLVMIntrinsicCopyOverloadedName(unsigned ID, LLVMTypeRef *ParamTypes,
+                                      size_t ParamCount, size_t *NameLength);
 
 /**
  * Copies the name of an overloaded intrinsic identified by a given list of
@@ -2850,10 +2856,9 @@ const char *LLVMIntrinsicCopyOverloadedName(unsigned ID,
  *
  * @see llvm::Intrinsic::getName()
  */
-const char *LLVMIntrinsicCopyOverloadedName2(LLVMModuleRef Mod, unsigned ID,
-                                             LLVMTypeRef *ParamTypes,
-                                             size_t ParamCount,
-                                             size_t *NameLength);
+char *LLVMIntrinsicCopyOverloadedName2(LLVMModuleRef Mod, unsigned ID,
+                                       LLVMTypeRef *ParamTypes,
+                                       size_t ParamCount, size_t *NameLength);
 
 /**
  * Obtain if the intrinsic identified by the given ID is overloaded.
diff --git a/llvm/include/llvm/IR/Intrinsics.h b/llvm/include/llvm/IR/Intrinsics.h
index b251036247c5c0..292300e9326f9c 100644
--- a/llvm/include/llvm/IR/Intrinsics.h
+++ b/llvm/include/llvm/IR/Intrinsics.h
@@ -52,11 +52,11 @@ namespace Intrinsic {
   /// Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
   /// Note, this version is for intrinsics with no overloads.  Use the other
   /// version of getName if overloads are required.
-  StringRef getName(ID id);
+  std::string getName(ID id);
 
   /// Return the LLVM name for an intrinsic, without encoded types for
   /// overloading, such as "llvm.ssa.copy".
-  StringRef getBaseName(ID id);
+  std::string getBaseName(ID id);
 
   /// Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx" or
   /// "llvm.ssa.copy.p0s_s.1". Note, this version of getName supports overloads.
diff --git a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
index 42e986e6179dd3..3f5aa90a50d54f 100644
--- a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
+++ b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
@@ -147,7 +147,7 @@ void IRInstructionData::setCalleeName(bool MatchByName) {
           Intrinsic::getName(IntrinsicID, FT->params(), II->getModule(), FT);
     // If there is not an overloaded name, we only need to use this version.
     else
-      CalleeName = Intrinsic::getName(IntrinsicID).str();
+      CalleeName = Intrinsic::getName(IntrinsicID);
 
     return;
   }
diff --git a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
index 9fbb7b461364b1..34458a780690a1 100644
--- a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
+++ b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
@@ -130,7 +130,7 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
   std::string ScalarName =
       Intrinsic::isOverloaded(IID)
           ? Intrinsic::getName(IID, ScalarArgTypes, II->getModule())
-          : Intrinsic::getName(IID).str();
+          : Intrinsic::getName(IID);
 
   // Try to find the mapping for the scalar version of this intrinsic and the
   // exact vector width of the call operands in the TargetLibraryInfo. First,
diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp
index ee084e870263d0..410482d3310cce 100644
--- a/llvm/lib/IR/Core.cpp
+++ b/llvm/lib/IR/Core.cpp
@@ -39,12 +39,14 @@
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Mutex.h"
 #include "llvm/Support/Threading.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cassert>
 #include <cstdlib>
 #include <cstring>
 #include <system_error>
+#include <unordered_map>
 
 using namespace llvm;
 
@@ -2472,10 +2474,24 @@ LLVMValueRef LLVMGetIntrinsicDeclaration(LLVMModuleRef Mod,
 }
 
 const char *LLVMIntrinsicGetName(unsigned ID, size_t *NameLength) {
+  static std::unordered_map<Intrinsic::ID, std::string> IntrinsicNamePool;
+  static sys::Mutex Mutex;
   auto IID = llvm_map_to_intrinsic_id(ID);
-  auto Str = llvm::Intrinsic::getName(IID);
+
+  std::lock_guard<sys::Mutex> Guard(Mutex);
+  auto [Iter, Inserted] = IntrinsicNamePool.try_emplace(IID);
+  if (Inserted)
+    Iter->second = llvm::Intrinsic::getName(IID);
+  const std::string &Name = Iter->second;
+  *NameLength = Name.size();
+  return Name.data();
+}
+
+char *LLVMIntrinsicCopyName(unsigned ID, size_t *NameLength) {
+  auto IID = llvm_map_to_intrinsic_id(ID);
+  std::string Str = llvm::Intrinsic::getName(IID);
   *NameLength = Str.size();
-  return Str.data();
+  return strdup(Str.c_str());
 }
 
 LLVMTypeRef LLVMIntrinsicGetType(LLVMContextRef Ctx, unsigned ID,
@@ -2485,10 +2501,8 @@ LLVMTypeRef LLVMIntrinsicGetType(LLVMContextRef Ctx, unsigned ID,
   return wrap(llvm::Intrinsic::getType(*unwrap(Ctx), IID, Tys));
 }
 
-const char *LLVMIntrinsicCopyOverloadedName(unsigned ID,
-                                            LLVMTypeRef *ParamTypes,
-                                            size_t ParamCount,
-                                            size_t *NameLength) {
+char *LLVMIntrinsicCopyOverloadedName(unsigned ID, LLVMTypeRef *ParamTypes,
+                                      size_t ParamCount, size_t *NameLength) {
   auto IID = llvm_map_to_intrinsic_id(ID);
   ArrayRef<Type*> Tys(unwrap(ParamTypes), ParamCount);
   auto Str = llvm::Intrinsic::getNameNoUnnamedTypes(IID, Tys);
@@ -2496,10 +2510,9 @@ const char *LLVMIntrinsicCopyOverloadedName(unsigned ID,
   return strdup(Str.c_str());
 }
 
-const char *LLVMIntrinsicCopyOverloadedName2(LLVMModuleRef Mod, unsigned ID,
-                                             LLVMTypeRef *ParamTypes,
-                                             size_t ParamCount,
-                                             size_t *NameLength) {
+char *LLVMIntrinsicCopyOverloadedName2(LLVMModuleRef Mod, unsigned ID,
+                                       LLVMTypeRef *ParamTypes,
+                                       size_t ParamCount, size_t *NameLength) {
   auto IID = llvm_map_to_intrinsic_id(ID);
   ArrayRef<Type *> Tys(unwrap(ParamTypes), ParamCount);
   auto Str = llvm::Intrinsic::getName(IID, Tys, unwrap(Mod));
diff --git a/llvm/lib/IR/Intrinsics.cpp b/llvm/lib/IR/Intrinsics.cpp
index ef26b1926b9767..a6c63b716d0534 100644
--- a/llvm/lib/IR/Intrinsics.cpp
+++ b/llvm/lib/IR/Intrinsics.cpp
@@ -44,12 +44,12 @@ static constexpr const char *const IntrinsicNameTable[] = {
 #undef GET_INTRINSIC_NAME_TABLE
 };
 
-StringRef Intrinsic::getBaseName(ID id) {
+std::string Intrinsic::getBaseName(ID id) {
   assert(id < num_intrinsics && "Invalid intrinsic ID!");
   return IntrinsicNameTable[id];
 }
 
-StringRef Intrinsic::getName(ID id) {
+std::string Intrinsic::getName(ID id) {
   assert(id < num_intrinsics && "Invalid intrinsic ID!");
   assert(!Intrinsic::isOverloaded(id) &&
          "This version of getName does not support overloading");
diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
index 0d98e844cf91ea..7bdee8bb54d115 100644
--- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -2278,8 +2278,8 @@ class LowerMatrixIntrinsics {
           return;
         }
         auto *II = cast<IntrinsicInst>(CI);
-        write(Intrinsic::getBaseName(II->getIntrinsicID())
-                  .drop_front(StringRef("llvm.matrix.").size()));
+        std::string IntName = Intrinsic::getBaseName(II->getIntrinsicID());
+        write(StringRef(IntName).drop_front(StringRef("llvm.matrix.").size()));
         write(".");
         std::string Tmp;
         raw_string_ostream SS(Tmp);
diff --git a/llvm/unittests/IR/IntrinsicsTest.cpp b/llvm/unittests/IR/IntrinsicsTest.cpp
index 0c4af28a2ab57b..8446cd5eee39e3 100644
--- a/llvm/unittests/IR/IntrinsicsTest.cpp
+++ b/llvm/unittests/IR/IntrinsicsTest.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/IR/Intrinsics.h"
+#include "llvm-c/Core.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/IRBuilder.h"
@@ -25,6 +26,7 @@
 #include "llvm/IR/IntrinsicsS390.h"
 #include "llvm/IR/IntrinsicsX86.h"
 #include "llvm/IR/Module.h"
+#include "llvm/Support/Parallel.h"
 #include "gtest/gtest.h"
 
 using namespace llvm;
@@ -127,6 +129,82 @@ TEST(IntrinsicNameLookup, MSBuiltinLookup) {
     EXPECT_EQ(ID, getIntrinsicForMSBuiltin(Target, Builtin));
 }
 
+// Test C API to get/copy LLVM intrinsic name.
+TEST(IntrinsicNameLookup, LLVMIntrinsicGetCopyNameSimple) {
+  static constexpr struct {
+    Intrinsic::ID ID;
+    const char *Name;
+  } Tests[] = {{Intrinsic::not_intrinsic, "not_intrinsic"},
+               {Intrinsic::assume, "llvm.assume"},
+               {Intrinsic::coro_free, "llvm.coro.free"},
+               {Intrinsic::aarch64_break, "llvm.aarch64.break"},
+               {Intrinsic::x86_int, "llvm.x86.int"}};
+
+  for (auto [ID, ExpectedName] : Tests) {
+    size_t NameSize = 0;
+    const char *CName = LLVMIntrinsicGetName(ID, &NameSize);
+    StringRef Name(CName, NameSize);
+
+    // Verify we get correct name.
+    EXPECT_EQ(Name, ExpectedName);
+    const char *CName1 = LLVMIntrinsicGetName(ID, &NameSize);
+
+    // Verify we get the same pointer and length the second time.
+    EXPECT_EQ(CName, CName1);
+    EXPECT_EQ(NameSize, Name.size());
+  }
+
+  for (auto [ID, ExpectedName] : Tests) {
+    size_t NameSize = 0;
+    // Now test the copy API.
+    char *CName = LLVMIntrinsicCopyName(ID, &NameSize);
+    StringRef Name(CName, NameSize);
+
+    // Verify we get correct name.
+    EXPECT_EQ(Name, ExpectedName);
+
+    // Verify we get the different pointer and same length the second time.
+    char *CName1 = LLVMIntrinsicCopyName(ID, &NameSize);
+    EXPECT_NE(CName, CName1);
+    EXPECT_EQ(NameSize, Name.size());
+
+    free(CName);
+    free(CName1);
+  }
+}
+
+// Test C API to get/copy LLVM intrinsic name.
+TEST(IntrinsicNameLookup, LLVMIntrinsicGetNameMultiThreaded) {
+  constexpr unsigned NUM_TASKS = 16;
+  constexpr unsigned STEP = 40;
+  std::map<unsigned, StringRef> LookupResults[NUM_TASKS];
+
+  parallelFor(0, NUM_TASKS, [&](size_t Idx) {
+    for (unsigned ID = 0; ID < Intrinsic::num_intrinsics; ID += STEP) {
+      if (LLVMIntrinsicIsOverloaded(ID))
+        continue;
+      size_t NameSize;
+      const char *Name = LLVMIntrinsicGetName(ID, &NameSize);
+      LookupResults[Idx].insert({ID, StringRef(Name, NameSize)});
+    }
+  });
+
+  // Validate data.
+  for (unsigned ID = 0; ID < Intrinsic::num_intrinsics; ID += STEP) {
+    if (LLVMIntrinsicIsOverloaded(ID))
+      continue;
+    size_t NameSize;
+    const char *CName = LLVMIntrinsicGetName(ID, &NameSize);
+    StringRef Name(CName, NameSize);
+    for (unsigned Idx = 0; Idx < NUM_TASKS; ++Idx) {
+      auto Iter = LookupResults[Idx].find(ID);
+      ASSERT_NE(Iter, LookupResults[Idx].end());
+      EXPECT_EQ(Iter->second.size(), Name.size());
+      EXPECT_EQ(Iter->second.data(), Name.data());
+    }
+  }
+}
+
 TEST_F(IntrinsicsTest, InstrProfInheritance) {
   auto isInstrProfInstBase = [](const Instruction &I) {
     return isa<InstrProfInstBase>(I);



More information about the llvm-commits mailing list