[llvm] [TableGen] Rework `EmitIntrinsicToBuiltinMap` (PR #104681)

Rahul Joshi via llvm-commits llvm-commits at lists.llvm.org
Sat Aug 17 11:48:20 PDT 2024


https://github.com/jurahul created https://github.com/llvm/llvm-project/pull/104681

Rework `IntrinsicEmitter::EmitIntrinsicToBuiltinMap` for improved performance as well as refactored the code.

Performance:
- Current generated code does a linear search on the TargetPrefix, followed by a binary search on the builtin name within that target's builtins.
- Improve the performance of this code in 2 ways: 
  - Build a table of lambdas, one per target, and use binary search on the target prefix to lookup the lambda for that target. 
  - Improve the (common) case of when all builtins for a target share a common prefix. Within that target's lambda, check thus common prefix first, and then do the binary search on the builtin name with the common prefix removed. This should help both code size by creating a smaller static string table and runtime by reducing the cost of binary search on smaller strings. 
  - Do not generate the builtin table when a target has a single builtin (in which case that will also be the common suffix).

Refactor:
- Use range based for loops for iterating over maps.
- Use formatv() to simplify the emission code.

>From 7fcce6f69a251cb4041ec8c6bda94f66c37dcd14 Mon Sep 17 00:00:00 2001
From: Rahul Joshi <rjoshi at nvidia.com>
Date: Sat, 17 Aug 2024 06:43:36 -0700
Subject: [PATCH] [TableGen] Rework `EmitIntrinsicToBuiltinMap`

Rework `IntrinsicEmitter::EmitIntrinsicToBuiltinMap` for improved
peformance as well as refactored the code.

Performance:
- Current generated code does a linear search on the TargetPrefix,
  followed by a binary search on the builtin name within that
  target's builtins.
- Improve the performance of this code in 2 ways:
  (a) Build a table of lambdas, one per target, and use binary
      search on the target prefix to lookup the lambda for that
      target.
  (b) Improve the (common) case of when all builtins for a target
      share a common prefix. Within that target's lambda, check thus
      common prefix first, and then do the binary search on the
      builtin name with the common prefix removed. This should help
      both code size by creating a smaller static string table and
      runtime by reducing the cost of binary search on smaller
      strings.
  (c) Do not generate the builtin table when a target has a single
      builtin (in which case that will also be the common suffix).

Refactor:
- Use range based for loops for iterating over maps.
- Use formatv() to simplify the emission code.
---
 llvm/include/llvm/ADT/STLExtras.h             |  13 +-
 llvm/include/llvm/IR/Intrinsics.h             |   4 +-
 .../llvm/TableGen/StringToOffsetTable.h       |   3 +-
 llvm/utils/TableGen/IntrinsicEmitter.cpp      | 215 +++++++++++++-----
 4 files changed, 175 insertions(+), 60 deletions(-)

diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h
index a810a1c318c42c..61022cc5a2d289 100644
--- a/llvm/include/llvm/ADT/STLExtras.h
+++ b/llvm/include/llvm/ADT/STLExtras.h
@@ -1847,7 +1847,7 @@ OutputIt replace_copy(R &&Range, OutputIt Out, const T &OldValue,
 /// begin/end explicitly.
 template <typename R, typename T>
 void replace(R &&Range, const T &OldValue, const T &NewValue) {
-  return std::replace(adl_begin(Range), adl_end(Range), OldValue, NewValue);
+  std::replace(adl_begin(Range), adl_end(Range), OldValue, NewValue);
 }
 
 /// Provide wrappers to std::move which take ranges instead of having to
@@ -1982,6 +1982,8 @@ auto upper_bound(R &&Range, T &&Value, Compare C) {
                           std::forward<T>(Value), C);
 }
 
+/// Provide wrappers to std::min_element which take ranges instead of having to
+/// pass begin/end explicitly.
 template <typename R> auto min_element(R &&Range) {
   return std::min_element(adl_begin(Range), adl_end(Range));
 }
@@ -1990,6 +1992,8 @@ template <typename R, typename Compare> auto min_element(R &&Range, Compare C) {
   return std::min_element(adl_begin(Range), adl_end(Range), C);
 }
 
+/// Provide wrappers to std::max_element which take ranges instead of having to
+/// pass begin/end explicitly.
 template <typename R> auto max_element(R &&Range) {
   return std::max_element(adl_begin(Range), adl_end(Range));
 }
@@ -1998,6 +2002,13 @@ template <typename R, typename Compare> auto max_element(R &&Range, Compare C) {
   return std::max_element(adl_begin(Range), adl_end(Range), C);
 }
 
+/// Provide wrappers to std::mismatch which take ranges instead of having to
+/// pass begin/end explicitly.
+template <typename R1, typename R2> auto mismatch(R1 &&Range1, R2 &&Range2) {
+  return std::mismatch(adl_begin(Range1), adl_end(Range1), adl_begin(Range2),
+                       adl_end(Range2));
+}
+
 template <typename R>
 void stable_sort(R &&Range) {
   std::stable_sort(adl_begin(Range), adl_end(Range));
diff --git a/llvm/include/llvm/IR/Intrinsics.h b/llvm/include/llvm/IR/Intrinsics.h
index f79df522dc8056..4f5801e1d996c4 100644
--- a/llvm/include/llvm/IR/Intrinsics.h
+++ b/llvm/include/llvm/IR/Intrinsics.h
@@ -100,10 +100,10 @@ namespace Intrinsic {
                                 StringRef Name);
 
   /// Map a Clang builtin name to an intrinsic ID.
-  ID getIntrinsicForClangBuiltin(const char *Prefix, StringRef BuiltinName);
+  ID getIntrinsicForClangBuiltin(StringRef TargetPrefix, StringRef BuiltinName);
 
   /// Map a MS builtin name to an intrinsic ID.
-  ID getIntrinsicForMSBuiltin(const char *Prefix, StringRef BuiltinName);
+  ID getIntrinsicForMSBuiltin(StringRef TargetPrefix, StringRef BuiltinName);
 
   /// Returns true if the intrinsic ID is for one of the "Constrained
   /// Floating-Point Intrinsics".
diff --git a/llvm/include/llvm/TableGen/StringToOffsetTable.h b/llvm/include/llvm/TableGen/StringToOffsetTable.h
index 66bcc81c94b594..92422a65f70ab3 100644
--- a/llvm/include/llvm/TableGen/StringToOffsetTable.h
+++ b/llvm/include/llvm/TableGen/StringToOffsetTable.h
@@ -26,7 +26,8 @@ class StringToOffsetTable {
   std::string AggregateString;
 
 public:
-  bool Empty() const { return StringOffset.empty(); }
+  bool empty() const { return StringOffset.empty(); }
+  size_t size() const { return AggregateString.size(); }
 
   unsigned GetOrAddStringOffset(StringRef Str, bool appendZero = true) {
     auto IterBool =
diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp
index b1307153e9109b..f06cc59ebc00a0 100644
--- a/llvm/utils/TableGen/IntrinsicEmitter.cpp
+++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp
@@ -19,6 +19,7 @@
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/ModRef.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/TableGen/Error.h"
@@ -28,6 +29,7 @@
 #include <algorithm>
 #include <array>
 #include <cassert>
+#include <cctype>
 #include <map>
 #include <optional>
 #include <string>
@@ -99,7 +101,7 @@ void IntrinsicEmitter::run(raw_ostream &OS, bool Enums) {
     // Emit the intrinsic parameter attributes.
     EmitAttributes(Ints, OS);
 
-    // Emit code to translate GCC builtins into LLVM intrinsics.
+    // Emit code to translate Clang builtins into LLVM intrinsics.
     EmitIntrinsicToBuiltinMap(Ints, true, OS);
 
     // Emit code to translate MS builtins into LLVM intrinsics.
@@ -595,15 +597,15 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints,
 void IntrinsicEmitter::EmitIntrinsicToBuiltinMap(
     const CodeGenIntrinsicTable &Ints, bool IsClang, raw_ostream &OS) {
   StringRef CompilerName = IsClang ? "Clang" : "MS";
-  StringRef UpperCompilerName = IsClang ? "CLANG" : "MS";
   // map<TargetPrefix, map<BuiltinName, EnumName>>. Note that we iterate over
   // both maps in the code below. For the inner map, entries need to be emitted
   // in the sorted order of `BuiltinName` because we use std::lower_bound to
   // search these entries. For the outer map, it doesn't need be be sorted, but
   // we use a map to eliminate non-determinism in the emitted code.
-  typedef std::map<StringRef, std::map<StringRef, StringRef>> BIMTy;
+  using BIMTy = std::map<StringRef, std::map<StringRef, StringRef>>;
   BIMTy BuiltinMap;
-  StringToOffsetTable Table;
+  StringToOffsetTable OldTable;
+
   for (const CodeGenIntrinsic &Int : Ints) {
     StringRef BuiltinName = IsClang ? Int.ClangBuiltinName : Int.MSBuiltinName;
     if (BuiltinName.empty())
@@ -615,70 +617,171 @@ void IntrinsicEmitter::EmitIntrinsicToBuiltinMap(
       PrintFatalError(Int.TheDef->getLoc(),
                       "Intrinsic '" + Int.TheDef->getName() + "': duplicate " +
                           CompilerName + " builtin name!");
-    Table.GetOrAddStringOffset(BuiltinName);
-  }
 
-  OS << "// Get the LLVM intrinsic that corresponds to a builtin.\n";
-  OS << "// This is used by the C front-end.  The builtin name is passed\n";
-  OS << "// in as BuiltinName, and a target prefix (e.g. 'ppc') is passed\n";
-  OS << "// in as TargetPrefix. The result is assigned to 'IntrinsicID'.\n";
-  OS << "#ifdef GET_LLVM_INTRINSIC_FOR_" << UpperCompilerName << "_BUILTIN\n";
+    OldTable.GetOrAddStringOffset(BuiltinName);
+  }
 
-  OS << "Intrinsic::ID Intrinsic::getIntrinsicFor" << CompilerName
-     << "Builtin(const char "
-     << "*TargetPrefixStr, StringRef BuiltinNameStr) {\n";
+  // For each target, determine the common prefix for all the builtins for that
+  // target. Populate the string table with the names of all the builtins after
+  // removing this common prefix.
+  DenseMap<StringRef, StringRef> CommonPrefixMap;
+  StringToOffsetTable Table;
+  for (const auto &[TargetPrefix, Map] : BuiltinMap) {
+    // The Map is guaranteed to be non-empty here.
+    StringRef CommonPrefix = Map.begin()->first;
+    for (auto &[BuiltinName, EnumName] : Map) {
+      // Update the common prefix.
+      const char *Mismatch = mismatch(CommonPrefix, BuiltinName).first;
+      CommonPrefix = CommonPrefix.take_front(Mismatch - CommonPrefix.begin());
+      if (CommonPrefix.empty())
+        break;
+    }
+    CommonPrefixMap[TargetPrefix] = CommonPrefix;
+    for (auto &[BuiltinName, EnumName] : Map) {
+      StringRef Suffix = BuiltinName.substr(CommonPrefix.size());
+      if (!Suffix.empty())
+        Table.GetOrAddStringOffset(Suffix);
+    }
+  }
 
-  if (Table.Empty()) {
-    OS << "  return Intrinsic::not_intrinsic;\n";
-    OS << "}\n";
-    OS << "#endif\n\n";
+  std::string PreprocessorGuard =
+      "GET_LLVM_INTRINSIC_FOR_" + CompilerName.upper() + "_BUILTIN";
+
+  OS << formatv(R"(
+// Get the LLVM intrinsic that corresponds to a builtin. This is used by the
+// C front-end. The builtin name is passed in as BuiltinName, and a target
+// prefix (e.g. 'ppc') is passed in as TargetPrefix.
+#ifdef {0}
+
+Intrinsic::ID
+Intrinsic::getIntrinsicFor{1}Builtin(StringRef TargetPrefix, 
+                                      StringRef BuiltinName) {
+  using namespace Intrinsic;
+)",
+                PreprocessorGuard, CompilerName);
+
+  if (BuiltinMap.empty()) {
+    OS << formatv(R"(
+  return not_intrinsic;
+  }
+#endif  // {0}
+)",
+                  PreprocessorGuard);
     return;
   }
 
-  OS << "  static constexpr char BuiltinNames[] = {\n";
-  Table.EmitCharArray(OS);
-  OS << "  };\n\n";
+  if (!Table.empty()) {
+    OS << "  static constexpr char BuiltinNames[] = {\n";
+    Table.EmitCharArray(OS);
+    OS << "  };\n\n";
+
+    OS << R"(
+  struct BuiltinEntry {
+    ID IntrinsicID;
+    unsigned StrTabOffset;
+    const char *getName() const { return &BuiltinNames[StrTabOffset]; }
+     bool operator<(StringRef RHS) const {
+       return strncmp(getName(), RHS.data(), RHS.size()) < 0;
+     }
+  };
 
-  OS << "  struct BuiltinEntry {\n";
-  OS << "    Intrinsic::ID IntrinID;\n";
-  OS << "    unsigned StrTabOffset;\n";
-  OS << "    const char *getName() const {\n";
-  OS << "      return &BuiltinNames[StrTabOffset];\n";
-  OS << "    }\n";
-  OS << "    bool operator<(StringRef RHS) const {\n";
-  OS << "      return strncmp(getName(), RHS.data(), RHS.size()) < 0;\n";
-  OS << "    }\n";
-  OS << "  };\n";
+)";
+  }
 
-  OS << "  StringRef TargetPrefix(TargetPrefixStr);\n\n";
+  auto GetLambdaName = [](StringRef TargetPrefix) -> std::string {
+    return "Get" + TargetPrefix.str() + "Intrinsic";
+  };
 
-  // Note: this could emit significantly better code if we cared.
-  for (auto &I : BuiltinMap) {
-    OS << "  ";
-    if (!I.first.empty())
-      OS << "if (TargetPrefix == \"" << I.first << "\") ";
+  // Emit a per target lambda to find the builtin within that target's builtins.
+  // It will return the intrinsic ID if it finds the builtin, else
+  // returns not_intrinsic.
+  bool HasTargetIndependentBuiltins = false;
+  for (const auto &[TargetPrefix, Map] : BuiltinMap) {
+    HasTargetIndependentBuiltins |= TargetPrefix.empty();
+    if (!TargetPrefix.empty())
+      OS << formatv("  // Lookup builtins for {0}.\n", TargetPrefix);
     else
-      OS << "/* Target Independent Builtins */ ";
-    OS << "{\n";
-
-    // Emit the comparisons for this target prefix.
-    OS << "    static constexpr BuiltinEntry " << I.first << "Names[] = {\n";
-    for (const auto &P : I.second) {
-      OS << "      {Intrinsic::" << P.second << ", "
-         << Table.GetOrAddStringOffset(P.first) << "}, // " << P.first << "\n";
+      OS << "  // Lookup target independent builtins.\n";
+
+    std::string LambdaName = GetLambdaName(TargetPrefix);
+    OS << formatv("  auto {0} = [](StringRef Name) -> ID {", LambdaName);
+    StringRef CommonPrefix = CommonPrefixMap[TargetPrefix];
+    if (!CommonPrefix.empty())
+      OS << formatv(R"(
+    if (!Name.consume_front("{0}"))
+      return not_intrinsic;
+)",
+                    CommonPrefix);
+
+    // We need the array only for > 1 entries. If there is just one entry, it
+    // will be covered by the common prefix check.
+    if (Map.size() > 1) {
+      // Emit the comparisons for this target prefix.
+      OS << "    static constexpr BuiltinEntry Names[] = {\n";
+      for (const auto &[BuiltinName, EnumName] : Map) {
+        StringRef Suffix = BuiltinName.substr(CommonPrefix.size());
+        OS << formatv("      {{{0}, {1}}, // {2}\n", EnumName,
+                      Table.GetOrAddStringOffset(Suffix), BuiltinName);
+      }
+      OS << R"(    }; // Names.
+    auto II = lower_bound(Names, Name);
+    if (II != std::end(Names) && II->getName() == Name)
+      return II->IntrinsicID;
+    return not_intrinsic;
+)";
+    } else {
+      // Single entry case. If he prefix check passed, just return the single
+      // intrinsic ID.
+      const auto &[BuiltinName, EnumName] = *Map.begin();
+      OS << formatv("    return {0}; // {1}\n", EnumName, BuiltinName);
     }
-    OS << "    };\n";
-    OS << "    auto I = std::lower_bound(std::begin(" << I.first << "Names),\n";
-    OS << "                              std::end(" << I.first << "Names),\n";
-    OS << "                              BuiltinNameStr);\n";
-    OS << "    if (I != std::end(" << I.first << "Names) &&\n";
-    OS << "        I->getName() == BuiltinNameStr)\n";
-    OS << "      return I->IntrinID;\n";
-    OS << "  }\n";
+    OS << formatv("  }; // end {0}.\n\n", LambdaName);
   }
-  OS << "  return Intrinsic::not_intrinsic;\n";
-  OS << "}\n";
-  OS << "#endif\n\n";
+
+  // After emitting the lambdas, emit a lookup table for the lambdas (except the
+  // target independent one). Use binary search, similar to the table for
+  // builtin names.
+  OS << R"(
+  struct TargetEntry {
+    StringRef TargetPrefix;
+    function_ref<ID(StringRef)> GetIntrinsicFn;
+    bool operator<(StringRef RHS) const {
+      return TargetPrefix < RHS;
+    };
+  };
+  static const TargetEntry TargetTable[] = {
+)";
+
+  for (const auto &[TargetPrefix, Map] : BuiltinMap) {
+    if (TargetPrefix.empty())
+      continue;
+    OS << formatv(R"(    {{"{0}", {1}},)", TargetPrefix,
+                  GetLambdaName(TargetPrefix))
+       << "\n";
+  }
+  OS << "  };\n";
+
+  // Now for the actual lookup, first check the target independent lambda if
+  // we emitted one.
+  if (HasTargetIndependentBuiltins) {
+    OS << formatv(R"(
+  ID IntrinsicID = {0}(BuiltinName);
+  if (IntrinsicID != not_intrinsic)
+     return IntrinsicID;
+)",
+                  GetLambdaName(""));
+  }
+
+  // If a target independent builtin was not found, lookup the target specific.
+  OS << formatv(R"(
+  auto II = lower_bound(TargetTable, TargetPrefix);
+  if (II != std::end(TargetTable) && II->TargetPrefix == TargetPrefix)
+    return II->GetIntrinsicFn(BuiltinName);
+  return not_intrinsic;
+  }
+  #endif // {0}
+)",
+                PreprocessorGuard);
 }
 
 static void EmitIntrinsicEnums(RecordKeeper &RK, raw_ostream &OS) {



More information about the llvm-commits mailing list