[llvm] [LLVM][TableGen] Check overloaded intrinsic mangling suffix conflicts (PR #110324)
Rahul Joshi via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 27 17:47:31 PDT 2024
https://github.com/jurahul updated https://github.com/llvm/llvm-project/pull/110324
>From 46345fb95b7b99380f4e17fe1ba9cba86e37b8fc Mon Sep 17 00:00:00 2001
From: Rahul Joshi <rjoshi at nvidia.com>
Date: Thu, 26 Sep 2024 17:38:41 -0700
Subject: [PATCH] [LLVM][TableGen] Check overloaded intrinsic mangling suffix
conflicts
---
.../TableGen/intrinsic-overload-conflict.td | 43 ++++++
llvm/unittests/IR/IntrinsicsTest.cpp | 65 ++++++++-
.../TableGen/Basic/CodeGenIntrinsics.cpp | 127 ++++++++++++++++++
llvm/utils/TableGen/Basic/CodeGenIntrinsics.h | 4 +
llvm/utils/TableGen/IntrinsicEmitter.cpp | 2 +-
5 files changed, 237 insertions(+), 4 deletions(-)
create mode 100644 llvm/test/TableGen/intrinsic-overload-conflict.td
diff --git a/llvm/test/TableGen/intrinsic-overload-conflict.td b/llvm/test/TableGen/intrinsic-overload-conflict.td
new file mode 100644
index 00000000000000..2e3c5f6f087fff
--- /dev/null
+++ b/llvm/test/TableGen/intrinsic-overload-conflict.td
@@ -0,0 +1,43 @@
+// RUN: llvm-tblgen -gen-intrinsic-enums -I %p/../../include %s -DTEST_INTRINSICS_SUPPRESS_DEFS | FileCheck %s
+// RUN: not llvm-tblgen -gen-intrinsic-enums -I %p/../../include %s -DTEST_INTRINSICS_SUPPRESS_DEFS -DCONFLICT 2>&1 | FileCheck %s -DFILE=%s --check-prefix=CHECK-CONFLICT
+
+
+include "llvm/IR/Intrinsics.td"
+// CHECK: foo = 1,
+def int_foo : Intrinsic<[llvm_any_ty]>;
+
+// No conflicts, since .bar is not a vaid mangled type.
+// CHECK: foo_bar,
+def int_foo_bar : Intrinsic<[llvm_i32_ty]>;
+
+// CHECK: foo_bar_f32,
+def int_foo_bar_f32 : Intrinsic<[llvm_i32_ty]>;
+
+#ifdef CONFLICT
+// CHECK-CONFLICT: error: intrinsic `llvm.foo.a3` cannot share prefix `llvm.foo.a3` with another overloaded intrinsic `llvm.foo`
+// CHECK-CONFLICT: error: intrinsic `llvm.foo.bf16` cannot share prefix `llvm.foo.bf16` with another overloaded intrinsic `llvm.foo`
+// CHECK-CONFLICT: error: intrinsic `llvm.foo.f32` cannot share prefix `llvm.foo.f32` with another overloaded intrinsic `llvm.foo`
+// CHECK-CONFLICT: error: intrinsic `llvm.foo.f64` cannot share prefix `llvm.foo.f64` with another overloaded intrinsic `llvm.foo`
+// CHECK-CONFLICT: error: intrinsic `llvm.foo.f_3` cannot share prefix `llvm.foo.f_3` with another overloaded intrinsic `llvm.foo`
+// CHECK-CONFLICT: error: intrinsic `llvm.foo.i65` cannot share prefix `llvm.foo.i65` with another overloaded intrinsic `llvm.foo`
+// CHECK-CONFLICT: error: intrinsic `llvm.foo.i65.bar` cannot share prefix `llvm.foo.i65` with another overloaded intrinsic `llvm.foo`
+// CHECK-CONFLICT: error: intrinsic `llvm.foo.p3` cannot share prefix `llvm.foo.p3` with another overloaded intrinsic `llvm.foo`
+// CHECK-CONFLICT: error: intrinsic `llvm.foo.s_3` cannot share prefix `llvm.foo.s_3` with another overloaded intrinsic `llvm.foo`
+// CHECK-CONFLICT: error: intrinsic `llvm.foo.v4` cannot share prefix `llvm.foo.v4` with another overloaded intrinsic `llvm.foo`
+// CHECK-CONFLICT: 10 errors
+// CHECK-CONFLICT-NOT: error
+
+// Confcts due to suffix being a posssible mangled type
+def int_foo_f32 : Intrinsic<[llvm_i32_ty]>;
+def int_foo_f64 : Intrinsic<[llvm_i32_ty]>;
+def int_foo_bf16: Intrinsic<[llvm_i32_ty]>;
+def int_foo_p3 : Intrinsic<[llvm_i32_ty]>;
+def int_foo_a3 : Intrinsic<[llvm_i32_ty]>;
+def int_foo_v4 : Intrinsic<[llvm_i32_ty]>;
+def int_foo_func : Intrinsic<[llvm_i32_ty], [], [], "llvm.foo.f_3">;
+def int_foo_struct : Intrinsic<[llvm_i32_ty], [], [], "llvm.foo.s_3">;
+def int_foo_t3 : Intrinsic<[llvm_i32_ty]>;
+def int_foo_i65 : Intrinsic<[llvm_i32_ty]>;
+def int_foo_i65_bar : Intrinsic<[llvm_i32_ty]>;
+
+#endif
diff --git a/llvm/unittests/IR/IntrinsicsTest.cpp b/llvm/unittests/IR/IntrinsicsTest.cpp
index 0c4af28a2ab57b..1f9d17c2e0fe56 100644
--- a/llvm/unittests/IR/IntrinsicsTest.cpp
+++ b/llvm/unittests/IR/IntrinsicsTest.cpp
@@ -63,7 +63,7 @@ class IntrinsicsTest : public ::testing::Test {
};
TEST(IntrinsicNameLookup, Basic) {
- static constexpr const char *const NameTable1[] = {
+ static constexpr const char *const NameTable[] = {
"llvm.foo", "llvm.foo.a", "llvm.foo.b", "llvm.foo.b.a", "llvm.foo.c",
};
@@ -74,15 +74,74 @@ TEST(IntrinsicNameLookup, Basic) {
};
for (const auto &[Name, ExpectedIdx] : Tests) {
- int Idx = Intrinsic::lookupLLVMIntrinsicByName(NameTable1, Name);
+ int Idx = Intrinsic::lookupLLVMIntrinsicByName(NameTable, Name);
EXPECT_EQ(ExpectedIdx, Idx);
if (!StringRef(Name).starts_with("llvm.foo"))
continue;
- Idx = Intrinsic::lookupLLVMIntrinsicByName(NameTable1, Name, "foo");
+ Idx = Intrinsic::lookupLLVMIntrinsicByName(NameTable, Name, "foo");
EXPECT_EQ(ExpectedIdx, Idx);
}
}
+// Test case to demonstrate potential conflicts between overloaded and non-
+// overloaded intrinsics. The name match works by essentially dividing then
+// name into . separated components and doing successive search for each
+// component. When a search fails, the lowest component of the matching
+// range for the previous component is returned.
+TEST(IntrinsicNameLookup, OverloadConflict) {
+ // Assume possible mangled type strings are just .f32 and .i32.
+ static constexpr const char *const NameTable[] = {
+ "llvm.foo",
+ "llvm.foo.f32",
+ "llvm.foo.i32",
+ };
+
+ // Here, first we match llvm.foo and our search window is [0,2]. Then we try
+ // to match llvm.foo.f64 and there is no match, so it returns the low of the
+ // last match. So this lookup works as expected.
+ int Idx = Intrinsic::lookupLLVMIntrinsicByName(NameTable, "llvm.foo.f64");
+ EXPECT_EQ(Idx, 0);
+
+ // Now imagine if llvm.foo has 2 mangling suffixes, .f32 and .f64. The search
+ // will first match llvm.foo to [0, 2] and then llvm.foo.f32 to [1,1] and then
+ // not find any match for llvm.foo.f32.f64. So it will return the low of the
+ // last match, which is llvm.foo.f32. However, the intent was to match
+ // llvm.foo. So the presence of llvm.foo.f32 eliminated the possibility of
+ // matching with llvm.foo. So it seems if we have an intrinsic llvm.foo,
+ // another one with the same suffix and a single .suffix is not going to
+ // cause problems. If there exists another one with 2 or more suffixes,
+ // .suffix0 and .suffix1, its possible that the mangling suffix for llvm.foo
+ // might match with .suffix0 and then the match will fail to match llvm.foo.
+ // .suffix1 won't be a problem because its the last one so the matcher will
+ // try an exact match (in which case exact name in the table was searched for,
+ // so its expected to match that entry).
+ //
+ // This example leads the the following rule: if we have an overloaded
+ // intrinsic with name `llvm.foo` and another one with same prefix and one or
+ // more suffixes, `llvm.foo[.<suffixN>]+`, then the name search will try to
+ // first match against suffix0, then suffix1 etc. If suffix0 can match a
+ // mangled type, then the search for an `llvm.foo` with a mangling suffix can
+ // match against suffix0, preventing a match with `llvm.foo`. If suffix0
+ // cannot match a mangled type, then that cannot happen, so we do not need to
+ // check for later suffixes. Generalizing, the `llvm.foo[.suffixN]+` will
+ // cause a conflict if the first suffix (.suffix0) can match a mangled type
+ // (and then we do not need to check later suffixes) and will not cause a
+ // conflict if it cannot (and then again, we do not need to check for later
+ // suffixes.)
+ Idx = Intrinsic::lookupLLVMIntrinsicByName(NameTable, "llvm.foo.f32.f64");
+ EXPECT_EQ(Idx, 1);
+
+ // Here .bar and .f33 do not conflict with the manging suffixes, so the search
+ // should match against llvm.foo.
+ static constexpr const char *const NameTable1[] = {
+ "llvm.foo",
+ "llvm.foo.bar",
+ "llvm.foo.f33",
+ };
+ Idx = Intrinsic::lookupLLVMIntrinsicByName(NameTable1, "llvm.foo.f32.f64");
+ EXPECT_EQ(Idx, 0);
+}
+
// Tests to verify getIntrinsicForClangBuiltin.
TEST(IntrinsicNameLookup, ClangBuiltinLookup) {
using namespace Intrinsic;
diff --git a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp
index c3bd7efd8387a8..57a73f9e9e399a 100644
--- a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp
+++ b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp
@@ -76,6 +76,7 @@ CodeGenIntrinsicTable::CodeGenIntrinsicTable(const RecordKeeper &RC) {
CheckDuplicateIntrinsics();
CheckTargetIndependentIntrinsics();
+ CheckOverloadSuffixConflicts();
}
// Check for duplicate intrinsic names.
@@ -124,6 +125,132 @@ void CodeGenIntrinsicTable::CheckTargetIndependentIntrinsics() const {
}
}
+// Return true if the given Suffix looks like mangled type. Note that this
+// check is conservative, but allows all existing LLVM intrinsic suffixes to be
+// consider as not looking like a mangling suffix.
+static bool DoesSuffixLookLikeMangledType(StringRef Suffix) {
+ // Try to match against possible mangling suffixes for various types.
+ // See getMangledTypeStr() for the mangling suffixes possible. It includes
+ // pointer : p[0-9]+
+ // array : a[0-9]+[.+]
+ // struct: : s_/sl_[.+]
+ // function : f_[.+]
+ // vector : v/nxv[0-9]+[.+]
+ // target type : t[.*]
+ // integer : i[0-9]+
+ // named types : See `NamedTypes` below.
+
+ // Match anything with an _, so match function and struct types.
+ if (Suffix.contains('_'))
+ return true;
+
+ // [a|v][0-9|$][.*] // $ is end of string.
+ if (is_contained("av", Suffix[0]) &&
+ (Suffix.size() == 1 || isDigit(Suffix[1])))
+ return true;
+
+ // nxv[0-9|$][.*]
+ if (Suffix.starts_with("nxv") && (Suffix.size() == 3 || isDigit(Suffix[3])))
+ return true;
+ // t[.*]
+ if (Suffix.starts_with('t'))
+ return false;
+
+ // [p|i][0-9]+
+ if ((Suffix[0] == 'i' || Suffix[0] == 'p') &&
+ all_of(Suffix.drop_front(), isDigit))
+ return true;
+
+ // Match one of the named types.
+ StringLiteral NamedTypes[] = {"isVoid", "Metadata", "f16", "f32",
+ "f64", "f80", "f128", "bf16",
+ "ppcf128", "x86amx"};
+ return is_contained(NamedTypes, Suffix);
+}
+
+// Check for conflicts with overloaded intrinsics. If there exists an overloaded
+// intrinsic with base name `llvm.target.foo`, LLVM will add a mangling suffix
+// to it to encode the overload types. This mangling suffix is 1 or more .
+// prefixed mangled type string as defined in `getMangledTypeStr`. If there
+// exists another intrinsic `llvm.target.foo[.<suffixN>]+`, which has the same
+// prefix as the overloaded intrinsic, its possible that there may be a name
+// conflict with the overloaded intrinsic and either one may interfere with name
+// lookup for the other, leading to wrong intrinsic ID being assigned.
+//
+// The actual name lookup in the intrinsic name table is done by a search
+// on each successive . separted component of the intrinsic name (see
+// `lookupLLVMIntrinsicByName`). Consider first the case where there exists a
+// non-overloaded intrinsic `llvm.target.foo[.suffix]+`. For the non-overloaded
+// intrinsics, the name lookup is an exact match, so the presence of the
+// overloaded intrinsic with the same prefix will not interfere with the
+// search. However, a lookup intended to match the overloaded intrinsic might be
+// affected by the presence of another entry in the name table with the same
+// prefix. See the `OverloadConflict` sub-test in IntrinsicsTest.cpp to
+// demonstrate the cases where there is a conflict and for the exact check
+// (replicated below) for when the conflict can or cannot happen.
+//
+// Since LLVM's name lookup first selects the target specific (or target
+// independent) slice of the name table to look into, intrinsics in 2 different
+// slices cannot conflict with each other. Within a specific slice,
+// if we have an overloaded intrinsic with name `llvm.target.foo` and another
+// one with same prefix and one or more suffixes `llvm.target.foo[.<suffixN>]+`,
+// then the name search will try to first match against suffix0, then suffix1
+// etc. If suffix0 can match a mangled type, then the search for an
+// `llvm.target.foo` with a mangling suffix can match against suffix0,
+// preventing a match with `llvm.target.foo`. If suffix0 cannot match a mangled
+// type, then that cannot happen, so we do not need to check for later suffixes.
+//
+// Generalizing, the `llvm.target.foo[.suffixN]+` will cause a conflict if the
+// first suffix (.suffix0) can match a mangled type (and then we do not need to
+// check later suffixes) and will not cause a conflict if it cannot (and then
+// again, we do not need to check for later suffixes.)
+void CodeGenIntrinsicTable::CheckOverloadSuffixConflicts() const {
+ for (const TargetSet &Set : Targets) {
+ const CodeGenIntrinsic *Overloaded = nullptr;
+ for (const CodeGenIntrinsic &Int : (*this)[Set]) {
+ // If we do not have an overloaded intrinsic to check against, nothing
+ // to do except potentially identifying this as a candidate for checking
+ // against in future iteration.
+ if (!Overloaded) {
+ if (Int.isOverloaded)
+ Overloaded = ∬
+ continue;
+ }
+
+ StringRef Name = Int.Name;
+ StringRef OverloadName = Overloaded->Name;
+ // If we have an overloaded intrinsic to check again, check if its name is
+ // a proper prefix of this intrinsic.
+ if (Name.starts_with(OverloadName) && Name[OverloadName.size()] == '.') {
+ // If yes, verify suffixes and flag an error.
+ StringRef Suffixes = Name.drop_front(OverloadName.size() + 1);
+
+ // Only need to look at the first suffix.
+ StringRef Suffix0 = Suffixes.split('.').first;
+
+ if (!DoesSuffixLookLikeMangledType(Suffix0))
+ continue;
+
+ unsigned SuffixSize = OverloadName.size() + 1 + Suffix0.size();
+ // If suffix looks like mangling suffix, flag it as an error.
+ PrintError(Int.TheDef->getLoc(),
+ "intrinsic `" + Name + "` cannot share prefix `" +
+ Name.take_front(SuffixSize) +
+ "` with another overloaded intrinsic `" + OverloadName +
+ "`");
+ PrintNote(Overloaded->TheDef->getLoc(),
+ "Overloaded intrinsic `" + OverloadName + "` defined here");
+ continue;
+ }
+
+ // If we find an intrinsic that is not a proper prefix, any later
+ // intrinsic is also not going to be a proper prefix, so invalidate the
+ // overloaded to check against.
+ Overloaded = nullptr;
+ }
+ }
+}
+
CodeGenIntrinsic &CodeGenIntrinsicMap::operator[](const Record *Record) {
if (!Record->isSubClassOf("Intrinsic"))
PrintFatalError("Intrinsic defs should be subclass of 'Intrinsic' class");
diff --git a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.h b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.h
index 1cdeaacd52dcdb..cf6375c77c133a 100644
--- a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.h
+++ b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.h
@@ -186,6 +186,9 @@ class CodeGenIntrinsicTable {
auto begin() const { return Intrinsics.begin(); }
auto end() const { return Intrinsics.end(); }
CodeGenIntrinsic &operator[](size_t Pos) { return Intrinsics[Pos]; }
+ ArrayRef<CodeGenIntrinsic> operator[](const TargetSet &Set) const {
+ return ArrayRef(&Intrinsics[Set.Offset], Set.Count);
+ }
const CodeGenIntrinsic &operator[](size_t Pos) const {
return Intrinsics[Pos];
}
@@ -193,6 +196,7 @@ class CodeGenIntrinsicTable {
private:
void CheckDuplicateIntrinsics() const;
void CheckTargetIndependentIntrinsics() const;
+ void CheckOverloadSuffixConflicts() const;
};
// This class builds `CodeGenIntrinsic` on demand for a given Def.
diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp
index efa067e60de439..d5e5cf1a3b5685 100644
--- a/llvm/utils/TableGen/IntrinsicEmitter.cpp
+++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp
@@ -154,7 +154,7 @@ void IntrinsicEmitter::EmitEnumInfo(const CodeGenIntrinsicTable &Ints,
OS << "// Enum values for intrinsics.\n";
bool First = true;
- for (const auto &Int : ArrayRef(&Ints[Set->Offset], Set->Count)) {
+ for (const auto &Int : Ints[*Set]) {
OS << " " << Int.EnumName;
// Assign a value to the first intrinsic in this target set so that all
More information about the llvm-commits
mailing list