[llvm] Switch the intrinsic names to a string table (PR #118929)

Chandler Carruth via llvm-commits llvm-commits at lists.llvm.org
Sat Dec 7 16:54:32 PST 2024


https://github.com/chandlerc updated https://github.com/llvm/llvm-project/pull/118929

>From ced9cf57216efca1a7d9f13061d62270fdd0e426 Mon Sep 17 00:00:00 2001
From: Chandler Carruth <chandlerc at gmail.com>
Date: Thu, 5 Dec 2024 21:12:41 +0000
Subject: [PATCH 1/2] Switch the intrinsic names to a string table

This avoids the need to dynamically relocate each pointer in the table.

To make this work, this PR also moves the binary search of intrinsic
names to an internal function with an adjusted signature, and switches
the unittesting to test against actual intrinsics.
---
 llvm/include/llvm/IR/Intrinsics.h        |  7 ---
 llvm/lib/IR/Intrinsics.cpp               | 65 +++++++++++++++---------
 llvm/unittests/IR/IntrinsicsTest.cpp     | 29 +++++------
 llvm/utils/TableGen/IntrinsicEmitter.cpp | 28 +++++++++-
 4 files changed, 80 insertions(+), 49 deletions(-)

diff --git a/llvm/include/llvm/IR/Intrinsics.h b/llvm/include/llvm/IR/Intrinsics.h
index 89dfff256e0c43..82f72131b9d2f4 100644
--- a/llvm/include/llvm/IR/Intrinsics.h
+++ b/llvm/include/llvm/IR/Intrinsics.h
@@ -112,13 +112,6 @@ namespace Intrinsic {
   Function *getDeclarationIfExists(Module *M, ID id, ArrayRef<Type *> Tys,
                                    FunctionType *FT = nullptr);
 
-  /// Looks up Name in NameTable via binary search. NameTable must be sorted
-  /// and all entries must start with "llvm.".  If NameTable contains an exact
-  /// match for Name or a prefix of Name followed by a dot, its index in
-  /// NameTable is returned. Otherwise, -1 is returned.
-  int lookupLLVMIntrinsicByName(ArrayRef<const char *> NameTable,
-                                StringRef Name, StringRef Target = "");
-
   /// Map a Clang builtin name to an intrinsic ID.
   ID getIntrinsicForClangBuiltin(StringRef TargetPrefix, StringRef BuiltinName);
 
diff --git a/llvm/lib/IR/Intrinsics.cpp b/llvm/lib/IR/Intrinsics.cpp
index 3130a0bd2955a5..4de76ffb11718e 100644
--- a/llvm/lib/IR/Intrinsics.cpp
+++ b/llvm/lib/IR/Intrinsics.cpp
@@ -34,16 +34,13 @@
 using namespace llvm;
 
 /// Table of string intrinsic names indexed by enum value.
-static constexpr const char *const IntrinsicNameTable[] = {
-    "not_intrinsic",
 #define GET_INTRINSIC_NAME_TABLE
 #include "llvm/IR/IntrinsicImpl.inc"
 #undef GET_INTRINSIC_NAME_TABLE
-};
 
 StringRef Intrinsic::getBaseName(ID id) {
   assert(id < num_intrinsics && "Invalid intrinsic ID!");
-  return IntrinsicNameTable[id];
+  return IntrinsicNameTable + IntrinsicNameOffsetTable[id];
 }
 
 StringRef Intrinsic::getName(ID id) {
@@ -621,9 +618,12 @@ bool Intrinsic::isTargetIntrinsic(Intrinsic::ID IID) {
   return IID > TargetInfos[0].Count;
 }
 
-int llvm::Intrinsic::lookupLLVMIntrinsicByName(ArrayRef<const char *> NameTable,
-                                               StringRef Name,
-                                               StringRef Target) {
+/// Looks up Name in NameTable via binary search. NameTable must be sorted
+/// and all entries must start with "llvm.".  If NameTable contains an exact
+/// match for Name or a prefix of Name followed by a dot, its index in
+/// NameTable is returned. Otherwise, -1 is returned.
+static int lookupLLVMIntrinsicByName(ArrayRef<int> NameOffsetTable,
+                                     StringRef Name, StringRef Target = "") {
   assert(Name.starts_with("llvm.") && "Unexpected intrinsic prefix");
   assert(Name.drop_front(5).starts_with(Target) && "Unexpected target");
 
@@ -638,15 +638,31 @@ int llvm::Intrinsic::lookupLLVMIntrinsicByName(ArrayRef<const char *> NameTable,
   if (!Target.empty())
     CmpEnd += 1 + Target.size(); // skip the .target component.
 
-  const char *const *Low = NameTable.begin();
-  const char *const *High = NameTable.end();
-  const char *const *LastLow = Low;
+  const int *Low = NameOffsetTable.begin();
+  const int *High = NameOffsetTable.end();
+  const int *LastLow = Low;
   while (CmpEnd < Name.size() && High - Low > 0) {
     size_t CmpStart = CmpEnd;
     CmpEnd = Name.find('.', CmpStart + 1);
     CmpEnd = CmpEnd == StringRef::npos ? Name.size() : CmpEnd;
-    auto Cmp = [CmpStart, CmpEnd](const char *LHS, const char *RHS) {
-      return strncmp(LHS + CmpStart, RHS + CmpStart, CmpEnd - CmpStart) < 0;
+    auto Cmp = [CmpStart, CmpEnd](auto LHS, auto RHS) {
+      // `equal_range` requires the comparison to work with either side being an
+      // offset or the value. Detect which kind each side is to set up the
+      // compared strings.
+      const char *LHSStr;
+      if constexpr (std::is_integral_v<decltype(LHS)>) {
+        LHSStr = &IntrinsicNameTable[LHS];
+      } else {
+        LHSStr = LHS;
+      }
+      const char *RHSStr;
+      if constexpr (std::is_integral_v<decltype(RHS)>) {
+        RHSStr = &IntrinsicNameTable[RHS];
+      } else {
+        RHSStr = RHS;
+      }
+      return strncmp(LHSStr + CmpStart, RHSStr + CmpStart, CmpEnd - CmpStart) <
+             0;
     };
     LastLow = Low;
     std::tie(Low, High) = std::equal_range(Low, High, Name.data(), Cmp);
@@ -654,21 +670,21 @@ int llvm::Intrinsic::lookupLLVMIntrinsicByName(ArrayRef<const char *> NameTable,
   if (High - Low > 0)
     LastLow = Low;
 
-  if (LastLow == NameTable.end())
+  if (LastLow == NameOffsetTable.end())
     return -1;
-  StringRef NameFound = *LastLow;
+  StringRef NameFound = &IntrinsicNameTable[*LastLow];
   if (Name == NameFound ||
       (Name.starts_with(NameFound) && Name[NameFound.size()] == '.'))
-    return LastLow - NameTable.begin();
+    return LastLow - NameOffsetTable.begin();
   return -1;
 }
 
-/// Find the segment of \c IntrinsicNameTable for intrinsics with the same
+/// Find the segment of \c IntrinsicNameOffsetTable for intrinsics with the same
 /// target as \c Name, or the generic table if \c Name is not target specific.
 ///
-/// Returns the relevant slice of \c IntrinsicNameTable and the target name.
-static std::pair<ArrayRef<const char *>, StringRef>
-findTargetSubtable(StringRef Name) {
+/// Returns the relevant slice of \c IntrinsicNameOffsetTable and the target
+/// name.
+static std::pair<ArrayRef<int>, StringRef> findTargetSubtable(StringRef Name) {
   assert(Name.starts_with("llvm."));
 
   ArrayRef<IntrinsicTargetInfo> Targets(TargetInfos);
@@ -680,25 +696,26 @@ findTargetSubtable(StringRef Name) {
   // We've either found the target or just fall back to the generic set, which
   // is always first.
   const auto &TI = It != Targets.end() && It->Name == Target ? *It : Targets[0];
-  return {ArrayRef(&IntrinsicNameTable[1] + TI.Offset, TI.Count), TI.Name};
+  return {ArrayRef(&IntrinsicNameOffsetTable[1] + TI.Offset, TI.Count),
+          TI.Name};
 }
 
 /// This does the actual lookup of an intrinsic ID which matches the given
 /// function name.
 Intrinsic::ID Intrinsic::lookupIntrinsicID(StringRef Name) {
-  auto [NameTable, Target] = findTargetSubtable(Name);
-  int Idx = Intrinsic::lookupLLVMIntrinsicByName(NameTable, Name, Target);
+  auto [NameOffsetTable, Target] = findTargetSubtable(Name);
+  int Idx = lookupLLVMIntrinsicByName(NameOffsetTable, Name, Target);
   if (Idx == -1)
     return Intrinsic::not_intrinsic;
 
   // Intrinsic IDs correspond to the location in IntrinsicNameTable, but we have
   // an index into a sub-table.
-  int Adjust = NameTable.data() - IntrinsicNameTable;
+  int Adjust = NameOffsetTable.data() - IntrinsicNameOffsetTable;
   Intrinsic::ID ID = static_cast<Intrinsic::ID>(Idx + Adjust);
 
   // If the intrinsic is not overloaded, require an exact match. If it is
   // overloaded, require either exact or prefix match.
-  const auto MatchSize = strlen(NameTable[Idx]);
+  const auto MatchSize = strlen(&IntrinsicNameTable[NameOffsetTable[Idx]]);
   assert(Name.size() >= MatchSize && "Expected either exact or prefix match");
   bool IsExactMatch = Name.size() == MatchSize;
   return IsExactMatch || Intrinsic::isOverloaded(ID) ? ID
diff --git a/llvm/unittests/IR/IntrinsicsTest.cpp b/llvm/unittests/IR/IntrinsicsTest.cpp
index b32a1991448595..ad9af2075e371d 100644
--- a/llvm/unittests/IR/IntrinsicsTest.cpp
+++ b/llvm/unittests/IR/IntrinsicsTest.cpp
@@ -63,24 +63,21 @@ class IntrinsicsTest : public ::testing::Test {
 };
 
 TEST(IntrinsicNameLookup, Basic) {
-  static constexpr const char *const NameTable[] = {
-      "llvm.foo", "llvm.foo.a", "llvm.foo.b", "llvm.foo.b.a", "llvm.foo.c",
-  };
+  using namespace Intrinsic;
+  EXPECT_EQ(Intrinsic::memcpy, lookupIntrinsicID("llvm.memcpy"));
 
-  static constexpr std::pair<const char *, int> Tests[] = {
-      {"llvm.foo", 0},     {"llvm.foo.f64", 0}, {"llvm.foo.b", 2},
-      {"llvm.foo.b.a", 3}, {"llvm.foo.c", 4},   {"llvm.foo.c.f64", 4},
-      {"llvm.bar", -1},
-  };
+  // Partial, either between dots or not the last dot are not intrinsics.
+  EXPECT_EQ(not_intrinsic, lookupIntrinsicID("llvm.mem"));
+  EXPECT_EQ(not_intrinsic, lookupIntrinsicID("llvm.gc"));
 
-  for (const auto &[Name, ExpectedIdx] : Tests) {
-    int Idx = Intrinsic::lookupLLVMIntrinsicByName(NameTable, Name);
-    EXPECT_EQ(ExpectedIdx, Idx);
-    if (!StringRef(Name).starts_with("llvm.foo"))
-      continue;
-    Idx = Intrinsic::lookupLLVMIntrinsicByName(NameTable, Name, "foo");
-    EXPECT_EQ(ExpectedIdx, Idx);
-  }
+  // Look through intrinsic names with internal dots.
+  EXPECT_EQ(memcpy_inline, lookupIntrinsicID("llvm.memcpy.inline"));
+
+  // Check that overloaded names are mapped to the underlying ID.
+  EXPECT_EQ(memcpy_inline, lookupIntrinsicID("llvm.memcpy.inline.p0.p0.i8"));
+  EXPECT_EQ(memcpy_inline, lookupIntrinsicID("llvm.memcpy.inline.p0.p0.i32"));
+  EXPECT_EQ(memcpy_inline, lookupIntrinsicID("llvm.memcpy.inline.p0.p0.i64"));
+  EXPECT_EQ(memcpy_inline, lookupIntrinsicID("llvm.memcpy.inline.p0.p0.i1024"));
 }
 
 // Tests to verify getIntrinsicForClangBuiltin.
diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp
index 070d7522a97be9..d986620c52018e 100644
--- a/llvm/utils/TableGen/IntrinsicEmitter.cpp
+++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp
@@ -239,13 +239,37 @@ static constexpr IntrinsicTargetInfo TargetInfos[] = {
 
 void IntrinsicEmitter::EmitIntrinsicToNameTable(
     const CodeGenIntrinsicTable &Ints, raw_ostream &OS) {
+  // Built up a table of the intrinsic names.
+  constexpr StringLiteral NotIntrinsic = "not_intrinsic";
+  StringToOffsetTable Table;
+  Table.GetOrAddStringOffset(NotIntrinsic);
+  for (const auto &Int : Ints)
+    Table.GetOrAddStringOffset(Int.Name);
+
   OS << R"(// Intrinsic ID to name table.
 #ifdef GET_INTRINSIC_NAME_TABLE
 // Note that entry #0 is the invalid intrinsic!
+
+)";
+
+  Table.EmitStringLiteralDef(OS, "static constexpr char IntrinsicNameTable[]",
+                             /*Indent=*/"");
+
+  OS << R"(
+static constexpr int IntrinsicNameOffsetTable[] = {
 )";
+
+  OS << formatv("  {}, // {}\n", Table.GetStringOffset(NotIntrinsic),
+                NotIntrinsic);
   for (const auto &Int : Ints)
-    OS << "  \"" << Int.Name << "\",\n";
-  OS << "#endif\n\n";
+    OS << formatv("  {}, // {}\n", Table.GetStringOffset(Int.Name), Int.Name);
+
+  OS << R"(
+}; // IntrinsicNameOffsetTable
+
+#endif
+
+)";
 }
 
 void IntrinsicEmitter::EmitIntrinsicToOverloadTable(

>From 7c1fa257944fa2d861c505a4acbbf881b888047b Mon Sep 17 00:00:00 2001
From: Chandler Carruth <chandlerc at gmail.com>
Date: Sun, 8 Dec 2024 00:44:47 +0000
Subject: [PATCH 2/2] change type

---
 llvm/lib/IR/Intrinsics.cpp               | 11 ++++++-----
 llvm/utils/TableGen/IntrinsicEmitter.cpp |  2 +-
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/IR/Intrinsics.cpp b/llvm/lib/IR/Intrinsics.cpp
index 4de76ffb11718e..ec1184e8d835d6 100644
--- a/llvm/lib/IR/Intrinsics.cpp
+++ b/llvm/lib/IR/Intrinsics.cpp
@@ -622,7 +622,7 @@ bool Intrinsic::isTargetIntrinsic(Intrinsic::ID IID) {
 /// and all entries must start with "llvm.".  If NameTable contains an exact
 /// match for Name or a prefix of Name followed by a dot, its index in
 /// NameTable is returned. Otherwise, -1 is returned.
-static int lookupLLVMIntrinsicByName(ArrayRef<int> NameOffsetTable,
+static int lookupLLVMIntrinsicByName(ArrayRef<unsigned> NameOffsetTable,
                                      StringRef Name, StringRef Target = "") {
   assert(Name.starts_with("llvm.") && "Unexpected intrinsic prefix");
   assert(Name.drop_front(5).starts_with(Target) && "Unexpected target");
@@ -638,9 +638,9 @@ static int lookupLLVMIntrinsicByName(ArrayRef<int> NameOffsetTable,
   if (!Target.empty())
     CmpEnd += 1 + Target.size(); // skip the .target component.
 
-  const int *Low = NameOffsetTable.begin();
-  const int *High = NameOffsetTable.end();
-  const int *LastLow = Low;
+  const unsigned *Low = NameOffsetTable.begin();
+  const unsigned *High = NameOffsetTable.end();
+  const unsigned *LastLow = Low;
   while (CmpEnd < Name.size() && High - Low > 0) {
     size_t CmpStart = CmpEnd;
     CmpEnd = Name.find('.', CmpStart + 1);
@@ -684,7 +684,8 @@ static int lookupLLVMIntrinsicByName(ArrayRef<int> NameOffsetTable,
 ///
 /// Returns the relevant slice of \c IntrinsicNameOffsetTable and the target
 /// name.
-static std::pair<ArrayRef<int>, StringRef> findTargetSubtable(StringRef Name) {
+static std::pair<ArrayRef<unsigned>, StringRef>
+findTargetSubtable(StringRef Name) {
   assert(Name.starts_with("llvm."));
 
   ArrayRef<IntrinsicTargetInfo> Targets(TargetInfos);
diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp
index d986620c52018e..093602c3da8045 100644
--- a/llvm/utils/TableGen/IntrinsicEmitter.cpp
+++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp
@@ -256,7 +256,7 @@ void IntrinsicEmitter::EmitIntrinsicToNameTable(
                              /*Indent=*/"");
 
   OS << R"(
-static constexpr int IntrinsicNameOffsetTable[] = {
+static constexpr unsigned IntrinsicNameOffsetTable[] = {
 )";
 
   OS << formatv("  {}, // {}\n", Table.GetStringOffset(NotIntrinsic),



More information about the llvm-commits mailing list