[llvm] [TableGen] Rework `EmitIntrinsicToBuiltinMap` (PR #104681)
Rahul Joshi via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 19 20:43:12 PDT 2024
https://github.com/jurahul updated https://github.com/llvm/llvm-project/pull/104681
>From f5ee88cbbb8c5739402674c54bcac4bae3f17e77 Mon Sep 17 00:00:00 2001
From: Rahul Joshi <rjoshi at nvidia.com>
Date: Sat, 17 Aug 2024 06:43:36 -0700
Subject: [PATCH] [TableGen] Rework `EmitIntrinsicToBuiltinMap`
Rework `IntrinsicEmitter::EmitIntrinsicToBuiltinMap` for improved
peformance as well as refactored the code.
Performance:
- Current generated code does a linear search on the TargetPrefix,
followed by a binary search on the builtin name within that
target's builtins.
- Improve the performance of this code in 2 ways:
(a) Use binary search on the target prefix to lookup the builtin
table for a target.
(b) Improve the (common) case of when all builtins for a target
share a common prefix. Within that target's lambda, check thus
common prefix first, and then do the binary search on the
builtin name with the common prefix removed. This should help
both code size by creating a smaller static string table and
runtime by reducing the cost of binary search on smaller
strings.
Refactor:
- Use range based for loops for iterating over maps.
- Use formatv() and C++ raw string literals to simplify the emission code.
---
llvm/benchmarks/CMakeLists.txt | 2 +
.../GetIntrinsicForClangBuiltin.cpp | 50 ++++
llvm/include/llvm/IR/Intrinsics.h | 4 +-
.../llvm/TableGen/StringToOffsetTable.h | 13 +-
llvm/utils/TableGen/IntrinsicEmitter.cpp | 220 ++++++++++++------
5 files changed, 221 insertions(+), 68 deletions(-)
create mode 100644 llvm/benchmarks/GetIntrinsicForClangBuiltin.cpp
diff --git a/llvm/benchmarks/CMakeLists.txt b/llvm/benchmarks/CMakeLists.txt
index 52d726451ada96..713d4ccd3c5975 100644
--- a/llvm/benchmarks/CMakeLists.txt
+++ b/llvm/benchmarks/CMakeLists.txt
@@ -1,5 +1,7 @@
set(LLVM_LINK_COMPONENTS
+ Core
Support)
add_benchmark(DummyYAML DummyYAML.cpp PARTIAL_SOURCES_INTENDED)
add_benchmark(xxhash xxhash.cpp PARTIAL_SOURCES_INTENDED)
+add_benchmark(GetIntrinsicForClangBuiltin GetIntrinsicForClangBuiltin.cpp PARTIAL_SOURCES_INTENDED)
diff --git a/llvm/benchmarks/GetIntrinsicForClangBuiltin.cpp b/llvm/benchmarks/GetIntrinsicForClangBuiltin.cpp
new file mode 100644
index 00000000000000..fa9c528424c95f
--- /dev/null
+++ b/llvm/benchmarks/GetIntrinsicForClangBuiltin.cpp
@@ -0,0 +1,50 @@
+#include "benchmark/benchmark.h"
+#include "llvm/IR/Intrinsics.h"
+
+using namespace llvm;
+using namespace Intrinsic;
+
+// Benchmark intrinsic lookup from a variety of targets.
+static void BM_GetIntrinsicForClangBuiltin(benchmark::State &state) {
+ static const char *Builtins[] = {
+ "__builtin_adjust_trampoline",
+ "__builtin_trap",
+ "__builtin_arm_ttest",
+ "__builtin_amdgcn_cubetc",
+ "__builtin_amdgcn_udot2",
+ "__builtin_arm_stc",
+ "__builtin_bpf_compare",
+ "__builtin_HEXAGON_A2_max",
+ "__builtin_lasx_xvabsd_b",
+ "__builtin_mips_dlsa",
+ "__nvvm_floor_f",
+ "__builtin_altivec_vslb",
+ "__builtin_r600_read_tgid_x",
+ "__builtin_riscv_aes64im",
+ "__builtin_s390_vcksm",
+ "__builtin_ve_vl_pvfmksge_Mvl",
+ "__builtin_ia32_axor64",
+ "__builtin_bitrev",
+ };
+ static const char *Targets[] = {"", "aarch64", "amdgcn", "mips",
+ "nvvm", "r600", "riscv"};
+
+ for (auto _ : state) {
+ for (auto Builtin : Builtins)
+ for (auto Target : Targets)
+ getIntrinsicForClangBuiltin(Target, Builtin);
+ }
+}
+
+static void
+BM_GetIntrinsicForClangBuiltinHexagonFirst(benchmark::State &state) {
+ // Exercise the worst case by looking for the first builtin for a target
+ // that has a lot of builtins.
+ for (auto _ : state)
+ getIntrinsicForClangBuiltin("hexagon", "__builtin_HEXAGON_A2_abs");
+}
+
+BENCHMARK(BM_GetIntrinsicForClangBuiltin);
+BENCHMARK(BM_GetIntrinsicForClangBuiltinHexagonFirst);
+
+BENCHMARK_MAIN();
diff --git a/llvm/include/llvm/IR/Intrinsics.h b/llvm/include/llvm/IR/Intrinsics.h
index f79df522dc8056..4f5801e1d996c4 100644
--- a/llvm/include/llvm/IR/Intrinsics.h
+++ b/llvm/include/llvm/IR/Intrinsics.h
@@ -100,10 +100,10 @@ namespace Intrinsic {
StringRef Name);
/// Map a Clang builtin name to an intrinsic ID.
- ID getIntrinsicForClangBuiltin(const char *Prefix, StringRef BuiltinName);
+ ID getIntrinsicForClangBuiltin(StringRef TargetPrefix, StringRef BuiltinName);
/// Map a MS builtin name to an intrinsic ID.
- ID getIntrinsicForMSBuiltin(const char *Prefix, StringRef BuiltinName);
+ ID getIntrinsicForMSBuiltin(StringRef TargetPrefix, StringRef BuiltinName);
/// Returns true if the intrinsic ID is for one of the "Constrained
/// Floating-Point Intrinsics".
diff --git a/llvm/include/llvm/TableGen/StringToOffsetTable.h b/llvm/include/llvm/TableGen/StringToOffsetTable.h
index 66bcc81c94b594..7fb9d02d77c704 100644
--- a/llvm/include/llvm/TableGen/StringToOffsetTable.h
+++ b/llvm/include/llvm/TableGen/StringToOffsetTable.h
@@ -14,6 +14,7 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/Support/raw_ostream.h"
#include <cctype>
+#include <optional>
namespace llvm {
@@ -26,7 +27,8 @@ class StringToOffsetTable {
std::string AggregateString;
public:
- bool Empty() const { return StringOffset.empty(); }
+ bool empty() const { return StringOffset.empty(); }
+ size_t size() const { return AggregateString.size(); }
unsigned GetOrAddStringOffset(StringRef Str, bool appendZero = true) {
auto IterBool =
@@ -41,6 +43,15 @@ class StringToOffsetTable {
return IterBool.first->second;
}
+ // Returns the offset of `Str` in the table if its preset, else return
+ // std::nullopt.
+ std::optional<unsigned> GetStringOffset(StringRef Str) const {
+ auto II = StringOffset.find(Str);
+ if (II == StringOffset.end())
+ return std::nullopt;
+ return II->second;
+ }
+
void EmitString(raw_ostream &O) {
// Escape the string.
SmallString<256> Str;
diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp
index b1307153e9109b..71a33ce36ef1cb 100644
--- a/llvm/utils/TableGen/IntrinsicEmitter.cpp
+++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp
@@ -19,6 +19,7 @@
#include "llvm/ADT/Twine.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/ModRef.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TableGen/Error.h"
@@ -28,6 +29,7 @@
#include <algorithm>
#include <array>
#include <cassert>
+#include <cctype>
#include <map>
#include <optional>
#include <string>
@@ -99,7 +101,7 @@ void IntrinsicEmitter::run(raw_ostream &OS, bool Enums) {
// Emit the intrinsic parameter attributes.
EmitAttributes(Ints, OS);
- // Emit code to translate GCC builtins into LLVM intrinsics.
+ // Emit code to translate Clang builtins into LLVM intrinsics.
EmitIntrinsicToBuiltinMap(Ints, true, OS);
// Emit code to translate MS builtins into LLVM intrinsics.
@@ -596,89 +598,177 @@ void IntrinsicEmitter::EmitIntrinsicToBuiltinMap(
const CodeGenIntrinsicTable &Ints, bool IsClang, raw_ostream &OS) {
StringRef CompilerName = IsClang ? "Clang" : "MS";
StringRef UpperCompilerName = IsClang ? "CLANG" : "MS";
- // map<TargetPrefix, map<BuiltinName, EnumName>>. Note that we iterate over
- // both maps in the code below. For the inner map, entries need to be emitted
- // in the sorted order of `BuiltinName` because we use std::lower_bound to
- // search these entries. For the outer map, it doesn't need be be sorted, but
- // we use a map to eliminate non-determinism in the emitted code.
- typedef std::map<StringRef, std::map<StringRef, StringRef>> BIMTy;
- BIMTy BuiltinMap;
- StringToOffsetTable Table;
+
+ // map<TargetPrefix, pair<map<BuiltinName, EnumName>, CommonPrefix>.
+ // Note that we iterate over both the maps in the code below and both
+ // iterations need to iterate in sorted key order. For the inner map, entries
+ // need to be emitted in the sorted order of `BuiltinName` with `CommonPrefix`
+ // rempved, because we use std::lower_bound to search these entries. For the
+ // outer map as well, entries need to be emitted in sorter order of
+ // `TargetPrefix` as we use std::lower_bound to search these entries.
+ using BIMEntryTy =
+ std::pair<std::map<StringRef, StringRef>, std::optional<StringRef>>;
+ std::map<StringRef, BIMEntryTy> BuiltinMap;
+
for (const CodeGenIntrinsic &Int : Ints) {
StringRef BuiltinName = IsClang ? Int.ClangBuiltinName : Int.MSBuiltinName;
if (BuiltinName.empty())
continue;
// Get the map for this target prefix.
- std::map<StringRef, StringRef> &BIM = BuiltinMap[Int.TargetPrefix];
+ auto &[Map, CommonPrefix] = BuiltinMap[Int.TargetPrefix];
- if (!BIM.insert(std::pair(BuiltinName, Int.EnumName)).second)
+ if (!Map.insert({BuiltinName, Int.EnumName}).second)
PrintFatalError(Int.TheDef->getLoc(),
"Intrinsic '" + Int.TheDef->getName() + "': duplicate " +
CompilerName + " builtin name!");
- Table.GetOrAddStringOffset(BuiltinName);
- }
- OS << "// Get the LLVM intrinsic that corresponds to a builtin.\n";
- OS << "// This is used by the C front-end. The builtin name is passed\n";
- OS << "// in as BuiltinName, and a target prefix (e.g. 'ppc') is passed\n";
- OS << "// in as TargetPrefix. The result is assigned to 'IntrinsicID'.\n";
- OS << "#ifdef GET_LLVM_INTRINSIC_FOR_" << UpperCompilerName << "_BUILTIN\n";
+ // Update common prefix.
+ if (!CommonPrefix) {
+ // For the first builtin for this target, initialize the common prefix.
+ CommonPrefix = BuiltinName;
+ continue;
+ }
- OS << "Intrinsic::ID Intrinsic::getIntrinsicFor" << CompilerName
- << "Builtin(const char "
- << "*TargetPrefixStr, StringRef BuiltinNameStr) {\n";
+ // Update the common prefix. Note that this assumes that `take_front` will
+ // never set the `Data` pointer in CommonPrefix to nullptr.
+ const char *Mismatch = mismatch(*CommonPrefix, BuiltinName).first;
+ *CommonPrefix = CommonPrefix->take_front(Mismatch - CommonPrefix->begin());
+ }
- if (Table.Empty()) {
- OS << " return Intrinsic::not_intrinsic;\n";
- OS << "}\n";
- OS << "#endif\n\n";
+ // Populate the string table with the names of all the builtins after
+ // removing this common prefix.
+ StringToOffsetTable Table;
+ for (const auto &[TargetPrefix, Entry] : BuiltinMap) {
+ auto &[Map, CommonPrefix] = Entry;
+ for (auto &[BuiltinName, EnumName] : Map) {
+ StringRef Suffix = BuiltinName.substr(CommonPrefix->size());
+ Table.GetOrAddStringOffset(Suffix);
+ }
+ }
+
+ OS << formatv(R"(
+// Get the LLVM intrinsic that corresponds to a builtin. This is used by the
+// C front-end. The builtin name is passed in as BuiltinName, and a target
+// prefix (e.g. 'ppc') is passed in as TargetPrefix.
+#ifdef GET_LLVM_INTRINSIC_FOR_{0}_BUILTIN
+Intrinsic::ID
+Intrinsic::getIntrinsicFor{1}Builtin(StringRef TargetPrefix,
+ StringRef BuiltinName) {{
+ using namespace Intrinsic;
+)",
+ UpperCompilerName, CompilerName);
+
+ if (BuiltinMap.empty()) {
+ OS << formatv(R"(
+ return not_intrinsic;
+ }
+#endif // GET_LLVM_INTRINSIC_FOR_{0}_BUILTIN
+)",
+ UpperCompilerName);
return;
}
- OS << " static constexpr char BuiltinNames[] = {\n";
- Table.EmitCharArray(OS);
- OS << " };\n\n";
+ if (!Table.empty()) {
+ OS << " static constexpr char BuiltinNames[] = {\n";
+ Table.EmitCharArray(OS);
+ OS << " };\n\n";
+
+ OS << R"(
+ struct BuiltinEntry {
+ constexpr BuiltinEntry(ID IntrinsicID, unsigned Offset)
+ : IntrinsicID(IntrinsicID), Suffix(&BuiltinNames[Offset]) {}
+ ID IntrinsicID;
+ StringRef Suffix;
+ bool operator<(StringRef RHS) const { return Suffix < RHS; }
+ };
- OS << " struct BuiltinEntry {\n";
- OS << " Intrinsic::ID IntrinID;\n";
- OS << " unsigned StrTabOffset;\n";
- OS << " const char *getName() const {\n";
- OS << " return &BuiltinNames[StrTabOffset];\n";
- OS << " }\n";
- OS << " bool operator<(StringRef RHS) const {\n";
- OS << " return strncmp(getName(), RHS.data(), RHS.size()) < 0;\n";
- OS << " }\n";
- OS << " };\n";
+)";
+ }
- OS << " StringRef TargetPrefix(TargetPrefixStr);\n\n";
-
- // Note: this could emit significantly better code if we cared.
- for (auto &I : BuiltinMap) {
- OS << " ";
- if (!I.first.empty())
- OS << "if (TargetPrefix == \"" << I.first << "\") ";
- else
- OS << "/* Target Independent Builtins */ ";
- OS << "{\n";
-
- // Emit the comparisons for this target prefix.
- OS << " static constexpr BuiltinEntry " << I.first << "Names[] = {\n";
- for (const auto &P : I.second) {
- OS << " {Intrinsic::" << P.second << ", "
- << Table.GetOrAddStringOffset(P.first) << "}, // " << P.first << "\n";
+ // Emit a per target table of bultin names.
+ bool HasTargetIndependentBuiltins = false;
+ StringRef TargetIndepndentCommonPrefix;
+ for (const auto &[TargetPrefix, Entry] : BuiltinMap) {
+ const auto &[Map, CommonPrefix] = Entry;
+ if (!TargetPrefix.empty()) {
+ OS << formatv(" // Builtins for {0}.\n", TargetPrefix);
+ } else {
+ OS << " // Target independent builtins.\n";
+ HasTargetIndependentBuiltins = true;
+ TargetIndepndentCommonPrefix = *CommonPrefix;
+ }
+
+ // Emit the builtin table for this target prefix.
+ OS << formatv(" static constexpr BuiltinEntry {0}Names[] = {{\n",
+ TargetPrefix);
+ for (const auto &[BuiltinName, EnumName] : Map) {
+ StringRef Suffix = BuiltinName.substr(CommonPrefix->size());
+ OS << formatv(" {{{0}, {1}}, // {2}\n", EnumName,
+ *Table.GetStringOffset(Suffix), BuiltinName);
}
- OS << " };\n";
- OS << " auto I = std::lower_bound(std::begin(" << I.first << "Names),\n";
- OS << " std::end(" << I.first << "Names),\n";
- OS << " BuiltinNameStr);\n";
- OS << " if (I != std::end(" << I.first << "Names) &&\n";
- OS << " I->getName() == BuiltinNameStr)\n";
- OS << " return I->IntrinID;\n";
- OS << " }\n";
+ OS << formatv(" }; // {0}Names\n\n", TargetPrefix);
}
- OS << " return Intrinsic::not_intrinsic;\n";
- OS << "}\n";
- OS << "#endif\n\n";
+
+ // After emitting the builtin tables for all targets, emit a lookup table for
+ // all targets. We will use binary search, similar to the table for builtin
+ // names to lookup into this table.
+ OS << R"(
+ struct TargetEntry {
+ StringRef TargetPrefix;
+ ArrayRef<BuiltinEntry> Names;
+ StringRef CommonPrefix;
+ bool operator<(StringRef RHS) const {
+ return TargetPrefix < RHS;
+ };
+ };
+ static constexpr TargetEntry TargetTable[] = {
+)";
+
+ for (const auto &[TargetPrefix, Entry] : BuiltinMap) {
+ const auto &[Map, CommonPrefix] = Entry;
+ if (TargetPrefix.empty())
+ continue;
+ OS << formatv(R"( {{"{0}", {0}Names, "{2}"},)", TargetPrefix,
+ TargetPrefix, CommonPrefix)
+ << "\n";
+ }
+ OS << " };\n";
+
+ // Now for the actual lookup, first check the target independent table if
+ // we emitted one.
+ if (HasTargetIndependentBuiltins) {
+ OS << formatv(R"(
+ // Check if it's a target independent builtin.
+ // Copy the builtin name so we can use it in consume_front without clobbering
+ // if for the lookup in the target specific table.
+ StringRef Suffix = BuiltinName;
+ if (Suffix.consume_front("{0}")) {{
+ auto II = lower_bound(Names, Suffix);
+ if (II != std::end(Names) && II->Suffix == Suffix)
+ return II->IntrinsicID;
+ }
+)",
+ TargetIndepndentCommonPrefix);
+ }
+
+ // If a target independent builtin was not found, lookup the target specific.
+ OS << formatv(R"(
+ auto TI = lower_bound(TargetTable, TargetPrefix);
+ if (TI == std::end(TargetTable) || TI->TargetPrefix != TargetPrefix)
+ return not_intrinsic;
+ // This is the last use of BuiltinName, so no need to copy before using it in
+ // consume_front.
+ if (!BuiltinName.consume_front(TI->CommonPrefix))
+ return not_intrinsic;
+ auto II = lower_bound(TI->Names, BuiltinName);
+ if (II == std::end(TI->Names) || II->Suffix != BuiltinName)
+ return not_intrinsic;
+ return II->IntrinsicID;
+}
+#endif // GET_LLVM_INTRINSIC_FOR_{0}_BUILTIN
+
+)",
+ UpperCompilerName);
}
static void EmitIntrinsicEnums(RecordKeeper &RK, raw_ostream &OS) {
More information about the llvm-commits
mailing list