[llvm] [LLVM][TableGen] Check overloaded intrinsic mangling suffix conflicts (PR #110324)

Rahul Joshi via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 14 13:14:12 PDT 2024


https://github.com/jurahul updated https://github.com/llvm/llvm-project/pull/110324

>From 481c35336dea9b891e164da45d9c0654b6461498 Mon Sep 17 00:00:00 2001
From: Rahul Joshi <rjoshi at nvidia.com>
Date: Thu, 26 Sep 2024 17:38:41 -0700
Subject: [PATCH] [LLVM][TableGen] Check overloaded intrinsic mangling suffix
 conflicts

---
 .../TableGen/intrinsic-overload-conflict.td   |  43 ++++++
 llvm/unittests/IR/IntrinsicsTest.cpp          |   6 +-
 .../TableGen/Basic/CodeGenIntrinsics.cpp      | 124 ++++++++++++++++++
 llvm/utils/TableGen/Basic/CodeGenIntrinsics.h |   4 +
 llvm/utils/TableGen/IntrinsicEmitter.cpp      |   2 +-
 5 files changed, 175 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/TableGen/intrinsic-overload-conflict.td

diff --git a/llvm/test/TableGen/intrinsic-overload-conflict.td b/llvm/test/TableGen/intrinsic-overload-conflict.td
new file mode 100644
index 00000000000000..84333119d41f53
--- /dev/null
+++ b/llvm/test/TableGen/intrinsic-overload-conflict.td
@@ -0,0 +1,43 @@
+// RUN: llvm-tblgen -gen-intrinsic-enums -I %p/../../include %s -DTEST_INTRINSICS_SUPPRESS_DEFS | FileCheck %s
+// RUN: not llvm-tblgen -gen-intrinsic-enums -I %p/../../include %s -DTEST_INTRINSICS_SUPPRESS_DEFS -DCONFLICT 2>&1 | FileCheck %s -DFILE=%s --check-prefix=CHECK-CONFLICT
+
+
+include "llvm/IR/Intrinsics.td"
+// CHECK: foo = 1,
+def int_foo : Intrinsic<[llvm_any_ty]>;
+
+// No conflicts, since .bar is not a vaid mangled type.
+// CHECK: foo_bar,
+def int_foo_bar : Intrinsic<[llvm_i32_ty]>;
+
+// CHECK: foo_bar_f32,
+def int_foo_bar_f32 : Intrinsic<[llvm_i32_ty]>;
+
+#ifdef CONFLICT
+// CHECK-CONFLICT: error: intrinsic `llvm.foo.a3` cannot share prefix `llvm.foo.a3` with another overloaded intrinsic `llvm.foo`
+// CHECK-CONFLICT: error: intrinsic `llvm.foo.bf16` cannot share prefix `llvm.foo.bf16` with another overloaded intrinsic `llvm.foo`
+// CHECK-CONFLICT: error: intrinsic `llvm.foo.f32` cannot share prefix `llvm.foo.f32` with another overloaded intrinsic `llvm.foo`
+// CHECK-CONFLICT: error: intrinsic `llvm.foo.f64` cannot share prefix `llvm.foo.f64` with another overloaded intrinsic `llvm.foo`
+// CHECK-CONFLICT: error: intrinsic `llvm.foo.f_3` cannot share prefix `llvm.foo.f_3` with another overloaded intrinsic `llvm.foo`
+// CHECK-CONFLICT: error: intrinsic `llvm.foo.i65` cannot share prefix `llvm.foo.i65` with another overloaded intrinsic `llvm.foo`
+// CHECK-CONFLICT: error: intrinsic `llvm.foo.i65.bar` cannot share prefix `llvm.foo.i65` with another overloaded intrinsic `llvm.foo`
+// CHECK-CONFLICT: error: intrinsic `llvm.foo.p3` cannot share prefix `llvm.foo.p3` with another overloaded intrinsic `llvm.foo`
+// CHECK-CONFLICT: error: intrinsic `llvm.foo.s_3` cannot share prefix `llvm.foo.s_3` with another overloaded intrinsic `llvm.foo`
+// CHECK-CONFLICT: error: intrinsic `llvm.foo.v4` cannot share prefix `llvm.foo.v4` with another overloaded intrinsic `llvm.foo`
+// CHECK-CONFLICT: 10 errors
+// CHECK-CONFLICT-NOT: error
+
+// Conflicts due to suffix being a possible mangled type.
+def int_foo_f32 : Intrinsic<[llvm_i32_ty]>;
+def int_foo_f64 : Intrinsic<[llvm_i32_ty]>;
+def int_foo_bf16: Intrinsic<[llvm_i32_ty]>;
+def int_foo_p3 : Intrinsic<[llvm_i32_ty]>;
+def int_foo_a3 : Intrinsic<[llvm_i32_ty]>;
+def int_foo_v4 : Intrinsic<[llvm_i32_ty]>;
+def int_foo_func : Intrinsic<[llvm_i32_ty], [], [], "llvm.foo.f_3">;
+def int_foo_struct : Intrinsic<[llvm_i32_ty], [], [], "llvm.foo.s_3">;
+def int_foo_t3 : Intrinsic<[llvm_i32_ty]>;
+def int_foo_i65 : Intrinsic<[llvm_i32_ty]>;
+def int_foo_i65_bar : Intrinsic<[llvm_i32_ty]>;
+
+#endif
diff --git a/llvm/unittests/IR/IntrinsicsTest.cpp b/llvm/unittests/IR/IntrinsicsTest.cpp
index 7fe0bd79b80a60..b32a1991448595 100644
--- a/llvm/unittests/IR/IntrinsicsTest.cpp
+++ b/llvm/unittests/IR/IntrinsicsTest.cpp
@@ -63,7 +63,7 @@ class IntrinsicsTest : public ::testing::Test {
 };
 
 TEST(IntrinsicNameLookup, Basic) {
-  static constexpr const char *const NameTable1[] = {
+  static constexpr const char *const NameTable[] = {
       "llvm.foo", "llvm.foo.a", "llvm.foo.b", "llvm.foo.b.a", "llvm.foo.c",
   };
 
@@ -74,11 +74,11 @@ TEST(IntrinsicNameLookup, Basic) {
   };
 
   for (const auto &[Name, ExpectedIdx] : Tests) {
-    int Idx = Intrinsic::lookupLLVMIntrinsicByName(NameTable1, Name);
+    int Idx = Intrinsic::lookupLLVMIntrinsicByName(NameTable, Name);
     EXPECT_EQ(ExpectedIdx, Idx);
     if (!StringRef(Name).starts_with("llvm.foo"))
       continue;
-    Idx = Intrinsic::lookupLLVMIntrinsicByName(NameTable1, Name, "foo");
+    Idx = Intrinsic::lookupLLVMIntrinsicByName(NameTable, Name, "foo");
     EXPECT_EQ(ExpectedIdx, Idx);
   }
 }
diff --git a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp
index fe3c83ce92e99d..2a246d60de6156 100644
--- a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp
+++ b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp
@@ -76,6 +76,7 @@ CodeGenIntrinsicTable::CodeGenIntrinsicTable(const RecordKeeper &RC) {
 
   CheckDuplicateIntrinsics();
   CheckTargetIndependentIntrinsics();
+  CheckOverloadSuffixConflicts();
 }
 
 // Check for duplicate intrinsic names.
@@ -124,6 +125,129 @@ void CodeGenIntrinsicTable::CheckTargetIndependentIntrinsics() const {
   }
 }
 
+// Return true if the given Suffix looks like a mangled type. Note that this
+// check is conservative, but allows all existing LLVM intrinsic suffixes to be
+// considered as not looking like a mangling suffix.
+static bool doesSuffixLookLikeMangledType(StringRef Suffix) {
+  // Try to match against possible mangling suffixes for various types.
+  // See getMangledTypeStr() for the mangling suffixes possible. It includes
+  //  pointer       : p[0-9]+
+  //  array         : a[0-9]+.+
+  //  struct:       : s_/sl_.+
+  //  function      : f_.+
+  //  vector        : v/nxv[0-9]+.+
+  //  target type   : t.+
+  //  integer       : i[0-9]+
+  //  named types   : See `NamedTypes` below.
+
+  // Match anything with an _, so match function and struct types.
+  if (Suffix.contains('_'))
+    return true;
+
+  // [av][0-9]+.+, simplified to [av][0-9].+
+  if (Suffix.size() >= 2 && is_contained("av", Suffix[0]) && isDigit(Suffix[1]))
+    return true;
+
+  // nxv[0-9]+.+, simplified to nxv[0-9].+
+  if (Suffix.size() >= 4 && Suffix.starts_with("nxv") && isDigit(Suffix[3]))
+    return true;
+
+  // t.+
+  if (Suffix.size() > 1 && Suffix.starts_with('t'))
+    return false;
+
+  // [pi][0-9]+
+  if (is_contained("pi", Suffix[0]) && all_of(Suffix.drop_front(), isDigit))
+    return true;
+
+  // Match one of the named types.
+  static constexpr StringLiteral NamedTypes[] = {
+      "isVoid", "Metadata", "f16",  "f32",     "f64",
+      "f80",    "f128",     "bf16", "ppcf128", "x86amx"};
+  return is_contained(NamedTypes, Suffix);
+}
+
+// Check for conflicts with overloaded intrinsics. If there exists an overloaded
+// intrinsic with base name `llvm.target.foo`, LLVM will add a mangling suffix
+// to it to encode the overload types. This mangling suffix is 1 or more .
+// prefixed mangled type string as defined in `getMangledTypeStr`. If there
+// exists another intrinsic `llvm.target.foo[.<suffixN>]+`, which has the same
+// prefix as the overloaded intrinsic, its possible that there may be a name
+// conflict with the overloaded intrinsic and either one may interfere with name
+// lookup for the other, leading to wrong intrinsic ID being assigned.
+//
+// The actual name lookup in the intrinsic name table is done by a search
+// on each successive '.' separted component of the intrinsic name (see
+// `lookupLLVMIntrinsicByName`). Consider first the case where there exists a
+// non-overloaded intrinsic `llvm.target.foo[.suffix]+`. For the non-overloaded
+// intrinsics, the name lookup is an exact match, so the presence of the
+// overloaded intrinsic with the same prefix will not interfere with the
+// search. However, a lookup intended to match the overloaded intrinsic might be
+// affected by the presence of another entry in the name table with the same
+// prefix.
+//
+// Since LLVM's name lookup first selects the target specific (or target
+// independent) slice of the name table to look into, intrinsics in 2 different
+// targets cannot conflict with each other. Within a specific target,
+// if we have an overloaded intrinsic with name `llvm.target.foo` and another
+// one with same prefix and one or more suffixes `llvm.target.foo[.<suffixN>]+`,
+// then the name search will try to first match against suffix0, then suffix1
+// etc. If suffix0 can match a mangled type, then the search for an
+// `llvm.target.foo` with a mangling suffix can match against suffix0,
+// preventing a match with `llvm.target.foo`. If suffix0 cannot match a mangled
+// type, then that cannot happen, so we do not need to check for later suffixes.
+//
+// Generalizing, the `llvm.target.foo[.suffixN]+` will cause a conflict if the
+// first suffix (.suffix0) can match a mangled type (and then we do not need to
+// check later suffixes) and will not cause a conflict if it cannot (and then
+// again, we do not need to check for later suffixes).
+void CodeGenIntrinsicTable::CheckOverloadSuffixConflicts() const {
+  for (const TargetSet &Set : Targets) {
+    const CodeGenIntrinsic *Overloaded = nullptr;
+    for (const CodeGenIntrinsic &Int : (*this)[Set]) {
+      // If we do not have an overloaded intrinsic to check against, nothing
+      // to do except potentially identifying this as a candidate for checking
+      // against in future iteration.
+      if (!Overloaded) {
+        if (Int.isOverloaded)
+          Overloaded = ∬
+        continue;
+      }
+
+      StringRef Name = Int.Name;
+      StringRef OverloadName = Overloaded->Name;
+      // If we have an overloaded intrinsic to check again, check if its name is
+      // a proper prefix of this intrinsic.
+      if (Name.starts_with(OverloadName) && Name[OverloadName.size()] == '.') {
+        // If yes, verify suffixes and flag an error.
+        StringRef Suffixes = Name.drop_front(OverloadName.size() + 1);
+
+        // Only need to look at the first suffix.
+        StringRef Suffix0 = Suffixes.split('.').first;
+
+        if (!doesSuffixLookLikeMangledType(Suffix0))
+          continue;
+
+        unsigned SuffixSize = OverloadName.size() + 1 + Suffix0.size();
+        // If suffix looks like mangling suffix, flag it as an error.
+        PrintError(Int.TheDef->getLoc(),
+                   "intrinsic `" + Name + "` cannot share prefix `" +
+                       Name.take_front(SuffixSize) +
+                       "` with another overloaded intrinsic `" + OverloadName +
+                       "`");
+        PrintNote(Overloaded->TheDef->getLoc(),
+                  "Overloaded intrinsic `" + OverloadName + "` defined here");
+        continue;
+      }
+
+      // If we find an intrinsic that is not a proper prefix, any later
+      // intrinsic is also not going to be a proper prefix, so invalidate the
+      // overloaded to check against.
+      Overloaded = nullptr;
+    }
+  }
+}
+
 const CodeGenIntrinsic &CodeGenIntrinsicMap::operator[](const Record *Record) {
   if (!Record->isSubClassOf("Intrinsic"))
     PrintFatalError("Intrinsic defs should be subclass of 'Intrinsic' class");
diff --git a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.h b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.h
index c2701e1586ee34..8428d09a94009e 100644
--- a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.h
+++ b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.h
@@ -185,11 +185,15 @@ class CodeGenIntrinsicTable {
   const CodeGenIntrinsic &operator[](size_t Pos) const {
     return Intrinsics[Pos];
   }
+  ArrayRef<CodeGenIntrinsic> operator[](const TargetSet &Set) const {
+    return ArrayRef(&Intrinsics[Set.Offset], Set.Count);
+  }
   ArrayRef<TargetSet> getTargets() const { return Targets; }
 
 private:
   void CheckDuplicateIntrinsics() const;
   void CheckTargetIndependentIntrinsics() const;
+  void CheckOverloadSuffixConflicts() const;
 
   std::vector<CodeGenIntrinsic> Intrinsics;
   std::vector<TargetSet> Targets;
diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp
index 6d92c733ea3d6d..1968e7eac21e33 100644
--- a/llvm/utils/TableGen/IntrinsicEmitter.cpp
+++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp
@@ -155,7 +155,7 @@ void IntrinsicEmitter::EmitEnumInfo(const CodeGenIntrinsicTable &Ints,
 
   OS << "// Enum values for intrinsics.\n";
   bool First = true;
-  for (const auto &Int : ArrayRef(&Ints[Set->Offset], Set->Count)) {
+  for (const auto &Int : Ints[*Set]) {
     OS << "    " << Int.EnumName;
 
     // Assign a value to the first intrinsic in this target set so that all



More information about the llvm-commits mailing list