[llvm] [LLVM][TableGen] Check overloaded intrinsic mangling suffix conflicts (PR #110324)

Rahul Joshi via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 27 17:47:31 PDT 2024


https://github.com/jurahul updated https://github.com/llvm/llvm-project/pull/110324

>From 46345fb95b7b99380f4e17fe1ba9cba86e37b8fc Mon Sep 17 00:00:00 2001
From: Rahul Joshi <rjoshi at nvidia.com>
Date: Thu, 26 Sep 2024 17:38:41 -0700
Subject: [PATCH] [LLVM][TableGen] Check overloaded intrinsic mangling suffix
 conflicts

---
 .../TableGen/intrinsic-overload-conflict.td   |  43 ++++++
 llvm/unittests/IR/IntrinsicsTest.cpp          |  65 ++++++++-
 .../TableGen/Basic/CodeGenIntrinsics.cpp      | 127 ++++++++++++++++++
 llvm/utils/TableGen/Basic/CodeGenIntrinsics.h |   4 +
 llvm/utils/TableGen/IntrinsicEmitter.cpp      |   2 +-
 5 files changed, 237 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/TableGen/intrinsic-overload-conflict.td

diff --git a/llvm/test/TableGen/intrinsic-overload-conflict.td b/llvm/test/TableGen/intrinsic-overload-conflict.td
new file mode 100644
index 00000000000000..2e3c5f6f087fff
--- /dev/null
+++ b/llvm/test/TableGen/intrinsic-overload-conflict.td
@@ -0,0 +1,43 @@
+// RUN: llvm-tblgen -gen-intrinsic-enums -I %p/../../include %s -DTEST_INTRINSICS_SUPPRESS_DEFS | FileCheck %s
+// RUN: not llvm-tblgen -gen-intrinsic-enums -I %p/../../include %s -DTEST_INTRINSICS_SUPPRESS_DEFS -DCONFLICT 2>&1 | FileCheck %s -DFILE=%s --check-prefix=CHECK-CONFLICT
+
+
+include "llvm/IR/Intrinsics.td"
+// CHECK: foo = 1,
+def int_foo : Intrinsic<[llvm_any_ty]>;
+
+// No conflicts, since .bar is not a vaid mangled type.
+// CHECK: foo_bar,
+def int_foo_bar : Intrinsic<[llvm_i32_ty]>;
+
+// CHECK: foo_bar_f32,
+def int_foo_bar_f32 : Intrinsic<[llvm_i32_ty]>;
+
+#ifdef CONFLICT
+// CHECK-CONFLICT: error: intrinsic `llvm.foo.a3` cannot share prefix `llvm.foo.a3` with another overloaded intrinsic `llvm.foo`
+// CHECK-CONFLICT: error: intrinsic `llvm.foo.bf16` cannot share prefix `llvm.foo.bf16` with another overloaded intrinsic `llvm.foo`
+// CHECK-CONFLICT: error: intrinsic `llvm.foo.f32` cannot share prefix `llvm.foo.f32` with another overloaded intrinsic `llvm.foo`
+// CHECK-CONFLICT: error: intrinsic `llvm.foo.f64` cannot share prefix `llvm.foo.f64` with another overloaded intrinsic `llvm.foo`
+// CHECK-CONFLICT: error: intrinsic `llvm.foo.f_3` cannot share prefix `llvm.foo.f_3` with another overloaded intrinsic `llvm.foo`
+// CHECK-CONFLICT: error: intrinsic `llvm.foo.i65` cannot share prefix `llvm.foo.i65` with another overloaded intrinsic `llvm.foo`
+// CHECK-CONFLICT: error: intrinsic `llvm.foo.i65.bar` cannot share prefix `llvm.foo.i65` with another overloaded intrinsic `llvm.foo`
+// CHECK-CONFLICT: error: intrinsic `llvm.foo.p3` cannot share prefix `llvm.foo.p3` with another overloaded intrinsic `llvm.foo`
+// CHECK-CONFLICT: error: intrinsic `llvm.foo.s_3` cannot share prefix `llvm.foo.s_3` with another overloaded intrinsic `llvm.foo`
+// CHECK-CONFLICT: error: intrinsic `llvm.foo.v4` cannot share prefix `llvm.foo.v4` with another overloaded intrinsic `llvm.foo`
+// CHECK-CONFLICT: 10 errors
+// CHECK-CONFLICT-NOT: error
+
+// Confcts due to suffix being a posssible mangled type
+def int_foo_f32 : Intrinsic<[llvm_i32_ty]>;
+def int_foo_f64 : Intrinsic<[llvm_i32_ty]>;
+def int_foo_bf16: Intrinsic<[llvm_i32_ty]>;
+def int_foo_p3 : Intrinsic<[llvm_i32_ty]>;
+def int_foo_a3 : Intrinsic<[llvm_i32_ty]>;
+def int_foo_v4 : Intrinsic<[llvm_i32_ty]>;
+def int_foo_func : Intrinsic<[llvm_i32_ty], [], [], "llvm.foo.f_3">;
+def int_foo_struct : Intrinsic<[llvm_i32_ty], [], [], "llvm.foo.s_3">;
+def int_foo_t3 : Intrinsic<[llvm_i32_ty]>;
+def int_foo_i65 : Intrinsic<[llvm_i32_ty]>;
+def int_foo_i65_bar : Intrinsic<[llvm_i32_ty]>;
+
+#endif
diff --git a/llvm/unittests/IR/IntrinsicsTest.cpp b/llvm/unittests/IR/IntrinsicsTest.cpp
index 0c4af28a2ab57b..1f9d17c2e0fe56 100644
--- a/llvm/unittests/IR/IntrinsicsTest.cpp
+++ b/llvm/unittests/IR/IntrinsicsTest.cpp
@@ -63,7 +63,7 @@ class IntrinsicsTest : public ::testing::Test {
 };
 
 TEST(IntrinsicNameLookup, Basic) {
-  static constexpr const char *const NameTable1[] = {
+  static constexpr const char *const NameTable[] = {
       "llvm.foo", "llvm.foo.a", "llvm.foo.b", "llvm.foo.b.a", "llvm.foo.c",
   };
 
@@ -74,15 +74,74 @@ TEST(IntrinsicNameLookup, Basic) {
   };
 
   for (const auto &[Name, ExpectedIdx] : Tests) {
-    int Idx = Intrinsic::lookupLLVMIntrinsicByName(NameTable1, Name);
+    int Idx = Intrinsic::lookupLLVMIntrinsicByName(NameTable, Name);
     EXPECT_EQ(ExpectedIdx, Idx);
     if (!StringRef(Name).starts_with("llvm.foo"))
       continue;
-    Idx = Intrinsic::lookupLLVMIntrinsicByName(NameTable1, Name, "foo");
+    Idx = Intrinsic::lookupLLVMIntrinsicByName(NameTable, Name, "foo");
     EXPECT_EQ(ExpectedIdx, Idx);
   }
 }
 
+// Test case to demonstrate potential conflicts between overloaded and non-
+// overloaded intrinsics. The name match works by essentially dividing then
+// name into . separated components and doing successive search for each
+// component. When a search fails, the lowest component of the matching
+// range for the previous component is returned.
+TEST(IntrinsicNameLookup, OverloadConflict) {
+  // Assume possible mangled type strings are just .f32 and .i32.
+  static constexpr const char *const NameTable[] = {
+      "llvm.foo",
+      "llvm.foo.f32",
+      "llvm.foo.i32",
+  };
+
+  // Here, first we match llvm.foo and our search window is [0,2]. Then we try
+  // to match llvm.foo.f64 and there is no match, so it returns the low of the
+  // last match. So this lookup works as expected.
+  int Idx = Intrinsic::lookupLLVMIntrinsicByName(NameTable, "llvm.foo.f64");
+  EXPECT_EQ(Idx, 0);
+
+  // Now imagine if llvm.foo has 2 mangling suffixes, .f32 and .f64. The search
+  // will first match llvm.foo to [0, 2] and then llvm.foo.f32 to [1,1] and then
+  // not find any match for llvm.foo.f32.f64. So it will return the low of the
+  // last match, which is llvm.foo.f32. However, the intent was to match
+  // llvm.foo. So the presence of llvm.foo.f32 eliminated the possibility of
+  // matching with llvm.foo. So it seems if we have an intrinsic llvm.foo,
+  // another one with the same suffix and a single .suffix is not going to
+  // cause problems. If there exists another one with 2 or more suffixes,
+  // .suffix0 and .suffix1, its possible that the mangling suffix for llvm.foo
+  // might match with .suffix0 and then the match will fail to match llvm.foo.
+  // .suffix1 won't be a problem because its the last one so the matcher will
+  // try an exact match (in which case exact name in the table was searched for,
+  // so its expected to match that entry).
+  //
+  // This example leads the the following rule: if we have an overloaded
+  // intrinsic with name `llvm.foo` and another one with same prefix and one or
+  // more suffixes, `llvm.foo[.<suffixN>]+`, then the name search will try to
+  // first match against suffix0, then suffix1 etc. If suffix0 can match a
+  // mangled type, then the search for an `llvm.foo` with a mangling suffix can
+  // match against suffix0, preventing a match with `llvm.foo`. If suffix0
+  // cannot match a mangled type, then that cannot happen, so we do not need to
+  // check for later suffixes. Generalizing, the `llvm.foo[.suffixN]+` will
+  // cause a conflict if the first suffix (.suffix0) can match a mangled type
+  // (and then we do not need to check later suffixes) and will not cause a
+  // conflict if it cannot (and then again, we do not need to check for later
+  // suffixes.)
+  Idx = Intrinsic::lookupLLVMIntrinsicByName(NameTable, "llvm.foo.f32.f64");
+  EXPECT_EQ(Idx, 1);
+
+  // Here .bar and .f33 do not conflict with the manging suffixes, so the search
+  // should match against llvm.foo.
+  static constexpr const char *const NameTable1[] = {
+      "llvm.foo",
+      "llvm.foo.bar",
+      "llvm.foo.f33",
+  };
+  Idx = Intrinsic::lookupLLVMIntrinsicByName(NameTable1, "llvm.foo.f32.f64");
+  EXPECT_EQ(Idx, 0);
+}
+
 // Tests to verify getIntrinsicForClangBuiltin.
 TEST(IntrinsicNameLookup, ClangBuiltinLookup) {
   using namespace Intrinsic;
diff --git a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp
index c3bd7efd8387a8..57a73f9e9e399a 100644
--- a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp
+++ b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp
@@ -76,6 +76,7 @@ CodeGenIntrinsicTable::CodeGenIntrinsicTable(const RecordKeeper &RC) {
 
   CheckDuplicateIntrinsics();
   CheckTargetIndependentIntrinsics();
+  CheckOverloadSuffixConflicts();
 }
 
 // Check for duplicate intrinsic names.
@@ -124,6 +125,132 @@ void CodeGenIntrinsicTable::CheckTargetIndependentIntrinsics() const {
   }
 }
 
+// Return true if the given Suffix looks like mangled type. Note that this
+// check is conservative, but allows all existing LLVM intrinsic suffixes to be
+// consider as not looking like a mangling suffix.
+static bool DoesSuffixLookLikeMangledType(StringRef Suffix) {
+  // Try to match against possible mangling suffixes for various types.
+  // See getMangledTypeStr() for the mangling suffixes possible. It includes
+  //  pointer       : p[0-9]+
+  //  array         : a[0-9]+[.+]
+  //  struct:       : s_/sl_[.+]
+  //  function      : f_[.+]
+  //  vector        : v/nxv[0-9]+[.+]
+  //  target type   : t[.*]
+  //  integer       : i[0-9]+
+  //  named types   : See `NamedTypes` below.
+
+  // Match anything with an _, so match function and struct types.
+  if (Suffix.contains('_'))
+    return true;
+
+  // [a|v][0-9|$][.*] // $ is end of string.
+  if (is_contained("av", Suffix[0]) &&
+      (Suffix.size() == 1 || isDigit(Suffix[1])))
+    return true;
+
+  // nxv[0-9|$][.*]
+  if (Suffix.starts_with("nxv") && (Suffix.size() == 3 || isDigit(Suffix[3])))
+    return true;
+  // t[.*]
+  if (Suffix.starts_with('t'))
+    return false;
+
+  // [p|i][0-9]+
+  if ((Suffix[0] == 'i' || Suffix[0] == 'p') &&
+      all_of(Suffix.drop_front(), isDigit))
+    return true;
+
+  // Match one of the named types.
+  StringLiteral NamedTypes[] = {"isVoid",  "Metadata", "f16",  "f32",
+                                "f64",     "f80",      "f128", "bf16",
+                                "ppcf128", "x86amx"};
+  return is_contained(NamedTypes, Suffix);
+}
+
+// Check for conflicts with overloaded intrinsics. If there exists an overloaded
+// intrinsic with base name `llvm.target.foo`, LLVM will add a mangling suffix
+// to it to encode the overload types. This mangling suffix is 1 or more .
+// prefixed mangled type string as defined in `getMangledTypeStr`. If there
+// exists another intrinsic `llvm.target.foo[.<suffixN>]+`, which has the same
+// prefix as the overloaded intrinsic, its possible that there may be a name
+// conflict with the overloaded intrinsic and either one may interfere with name
+// lookup for the other, leading to wrong intrinsic ID being assigned.
+//
+// The actual name lookup in the intrinsic name table is done by a search
+// on each successive . separted component of the intrinsic name (see
+// `lookupLLVMIntrinsicByName`). Consider first the case where there exists a
+// non-overloaded intrinsic `llvm.target.foo[.suffix]+`. For the non-overloaded
+// intrinsics, the name lookup is an exact match, so the presence of the
+// overloaded intrinsic with the same prefix will not interfere with the
+// search. However, a lookup intended to match the overloaded intrinsic might be
+// affected by the presence of another entry in the name table with the same
+// prefix. See the `OverloadConflict` sub-test in IntrinsicsTest.cpp to
+// demonstrate the cases where there is a conflict and for the exact check
+// (replicated below) for when the conflict can or cannot happen.
+//
+// Since LLVM's name lookup first selects the target specific (or target
+// independent) slice of the name table to look into, intrinsics in 2 different
+// slices cannot conflict with each other. Within a specific slice,
+// if we have an overloaded intrinsic with name `llvm.target.foo` and another
+// one with same prefix and one or more suffixes `llvm.target.foo[.<suffixN>]+`,
+// then the name search will try to first match against suffix0, then suffix1
+// etc. If suffix0 can match a mangled type, then the search for an
+// `llvm.target.foo` with a mangling suffix can match against suffix0,
+// preventing a match with `llvm.target.foo`. If suffix0 cannot match a mangled
+// type, then that cannot happen, so we do not need to check for later suffixes.
+//
+// Generalizing, the `llvm.target.foo[.suffixN]+` will cause a conflict if the
+// first suffix (.suffix0) can match a mangled type (and then we do not need to
+// check later suffixes) and will not cause a conflict if it cannot (and then
+// again, we do not need to check for later suffixes.)
+void CodeGenIntrinsicTable::CheckOverloadSuffixConflicts() const {
+  for (const TargetSet &Set : Targets) {
+    const CodeGenIntrinsic *Overloaded = nullptr;
+    for (const CodeGenIntrinsic &Int : (*this)[Set]) {
+      // If we do not have an overloaded intrinsic to check against, nothing
+      // to do except potentially identifying this as a candidate for checking
+      // against in future iteration.
+      if (!Overloaded) {
+        if (Int.isOverloaded)
+          Overloaded = ∬
+        continue;
+      }
+
+      StringRef Name = Int.Name;
+      StringRef OverloadName = Overloaded->Name;
+      // If we have an overloaded intrinsic to check again, check if its name is
+      // a proper prefix of this intrinsic.
+      if (Name.starts_with(OverloadName) && Name[OverloadName.size()] == '.') {
+        // If yes, verify suffixes and flag an error.
+        StringRef Suffixes = Name.drop_front(OverloadName.size() + 1);
+
+        // Only need to look at the first suffix.
+        StringRef Suffix0 = Suffixes.split('.').first;
+
+        if (!DoesSuffixLookLikeMangledType(Suffix0))
+          continue;
+
+        unsigned SuffixSize = OverloadName.size() + 1 + Suffix0.size();
+        // If suffix looks like mangling suffix, flag it as an error.
+        PrintError(Int.TheDef->getLoc(),
+                   "intrinsic `" + Name + "` cannot share prefix `" +
+                       Name.take_front(SuffixSize) +
+                       "` with another overloaded intrinsic `" + OverloadName +
+                       "`");
+        PrintNote(Overloaded->TheDef->getLoc(),
+                  "Overloaded intrinsic `" + OverloadName + "` defined here");
+        continue;
+      }
+
+      // If we find an intrinsic that is not a proper prefix, any later
+      // intrinsic is also not going to be a proper prefix, so invalidate the
+      // overloaded to check against.
+      Overloaded = nullptr;
+    }
+  }
+}
+
 CodeGenIntrinsic &CodeGenIntrinsicMap::operator[](const Record *Record) {
   if (!Record->isSubClassOf("Intrinsic"))
     PrintFatalError("Intrinsic defs should be subclass of 'Intrinsic' class");
diff --git a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.h b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.h
index 1cdeaacd52dcdb..cf6375c77c133a 100644
--- a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.h
+++ b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.h
@@ -186,6 +186,9 @@ class CodeGenIntrinsicTable {
   auto begin() const { return Intrinsics.begin(); }
   auto end() const { return Intrinsics.end(); }
   CodeGenIntrinsic &operator[](size_t Pos) { return Intrinsics[Pos]; }
+  ArrayRef<CodeGenIntrinsic> operator[](const TargetSet &Set) const {
+    return ArrayRef(&Intrinsics[Set.Offset], Set.Count);
+  }
   const CodeGenIntrinsic &operator[](size_t Pos) const {
     return Intrinsics[Pos];
   }
@@ -193,6 +196,7 @@ class CodeGenIntrinsicTable {
 private:
   void CheckDuplicateIntrinsics() const;
   void CheckTargetIndependentIntrinsics() const;
+  void CheckOverloadSuffixConflicts() const;
 };
 
 // This class builds `CodeGenIntrinsic` on demand for a given Def.
diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp
index efa067e60de439..d5e5cf1a3b5685 100644
--- a/llvm/utils/TableGen/IntrinsicEmitter.cpp
+++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp
@@ -154,7 +154,7 @@ void IntrinsicEmitter::EmitEnumInfo(const CodeGenIntrinsicTable &Ints,
 
   OS << "// Enum values for intrinsics.\n";
   bool First = true;
-  for (const auto &Int : ArrayRef(&Ints[Set->Offset], Set->Count)) {
+  for (const auto &Int : Ints[*Set]) {
     OS << "    " << Int.EnumName;
 
     // Assign a value to the first intrinsic in this target set so that all



More information about the llvm-commits mailing list