[llvm] [LLVM][TableGen] Add overloaded intrinsic name conflict checks (PR #109314)

Rahul Joshi via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 19 10:06:42 PDT 2024


https://github.com/jurahul created https://github.com/llvm/llvm-project/pull/109314

None

>From 16a52cbc3065040d94d918b49b7d620d84bc1c92 Mon Sep 17 00:00:00 2001
From: Rahul Joshi <rjoshi at nvidia.com>
Date: Thu, 19 Sep 2024 09:57:24 -0700
Subject: [PATCH] [LLVM][TableGen] Add overloaded intrinsic name conflict
 checks

---
 llvm/include/llvm/IR/Intrinsics.td            |   4 +
 llvm/lib/IR/IntrinsicInst.cpp                 |   2 +
 .../TableGen/Basic/CodeGenIntrinsics.cpp      | 163 ++++++++++++++++++
 llvm/utils/TableGen/Basic/CodeGenIntrinsics.h |   1 +
 4 files changed, 170 insertions(+)

diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 0a74a217a5f010..ff0174cfc9ecc3 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -1834,6 +1834,10 @@ def int_is_constant : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty],
                                 [IntrNoMem, IntrWillReturn, IntrConvergent],
                                 "llvm.is.constant">;
 
+def int_is_constant0 : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty],
+                                [IntrNoMem, IntrWillReturn, IntrConvergent],
+                                "llvm.is.constant.my">;
+
 // Introduce a use of the argument without generating any code.
 def int_fake_use : Intrinsic<[], [llvm_vararg_ty]>;
 
diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp
index 7ed82c2ece464a..009b20cf8e635c 100644
--- a/llvm/lib/IR/IntrinsicInst.cpp
+++ b/llvm/lib/IR/IntrinsicInst.cpp
@@ -236,6 +236,8 @@ void DbgAssignIntrinsic::setValue(Value *V) {
              MetadataAsValue::get(getContext(), ValueAsMetadata::get(V)));
 }
 
+// Note: a modified version of this fuction exists in CodeGenIntrinsics.cpp.
+// Any changes here likely need to be reflected there as well.
 int llvm::Intrinsic::lookupLLVMIntrinsicByName(ArrayRef<const char *> NameTable,
                                                StringRef Name) {
   assert(Name.starts_with("llvm.") && "Unexpected intrinsic prefix");
diff --git a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp
index e566b7ceedf38e..4c3d5c3b1b2f13 100644
--- a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp
+++ b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp
@@ -68,6 +68,7 @@ CodeGenIntrinsicTable::CodeGenIntrinsicTable(const RecordKeeper &RC) {
   Targets.back().Count = Intrinsics.size() - Targets.back().Offset;
 
   CheckDuplicateIntrinsics();
+  CheckOverloadConflicts();
 }
 
 // Check for duplicate intrinsic names.
@@ -94,6 +95,168 @@ void CodeGenIntrinsicTable::CheckDuplicateIntrinsics() const {
   PrintFatalNote(First.TheDef, "Previous definition here");
 }
 
+// Note: This is a modified version of `Intrinsic::lookupLLVMIntrinsicByName`
+// in IntrinsicInst.cpp file.
+template <typename T>
+int lookupLLVMIntrinsicByName(ArrayRef<const T *> NameTable, StringRef Name,
+                              function_ref<const char *(const T *)> ToString) {
+  using ToStringTy = function_ref<const char *(const T *)>;
+  assert(Name.starts_with("llvm.") && "Unexpected intrinsic prefix");
+
+  // Do successive binary searches of the dotted name components. For
+  // "llvm.gc.experimental.statepoint.p1i8.p1i32", we will find the range of
+  // intrinsics starting with "llvm.gc", then "llvm.gc.experimental", then
+  // "llvm.gc.experimental.statepoint", and then we will stop as the range is
+  // size 1. During the search, we can skip the prefix that we already know is
+  // identical. By using strncmp we consider names with differing suffixes to
+  // be part of the equal range.
+  size_t CmpEnd = 4; // Skip the "llvm" component.
+
+  struct Compare {
+    Compare(size_t CmpStart, size_t CmpEnd, ToStringTy ToString)
+        : CmpStart(CmpStart), CmpEnd(CmpEnd), ToString(ToString) {}
+    bool operator()(const T *LHS, const char *RHS) {
+      return strncmp(ToString(LHS) + CmpStart, RHS + CmpStart,
+                     CmpEnd - CmpStart) < 0;
+    }
+    bool operator()(const char *LHS, const CodeGenIntrinsic *RHS) {
+      return strncmp(LHS + CmpStart, ToString(RHS) + CmpStart,
+                     CmpEnd - CmpStart) < 0;
+    }
+    const size_t CmpStart, CmpEnd;
+    ToStringTy ToString;
+  };
+
+  auto Low = NameTable.begin();
+  auto High = NameTable.end();
+  auto LastLow = Low;
+  while (CmpEnd < Name.size() && High - Low > 0) {
+    size_t CmpStart = CmpEnd;
+    CmpEnd = Name.find('.', CmpStart + 1);
+    CmpEnd = CmpEnd == StringRef::npos ? Name.size() : CmpEnd;
+    LastLow = Low;
+    Compare Cmp(CmpStart, CmpEnd, ToString);
+    std::tie(Low, High) = std::equal_range(Low, High, Name.data(), Cmp);
+  }
+  if (High - Low > 0)
+    LastLow = Low;
+
+  if (LastLow == NameTable.end())
+    return -1;
+  StringRef NameFound = (*LastLow)->Name;
+  if (Name == NameFound ||
+      (Name.starts_with(NameFound) && Name[NameFound.size()] == '.'))
+    return LastLow - NameTable.begin();
+  return -1;
+}
+
+// Check for conflicts with overloaded intrinsics. If an intrinsic's name has
+// the name of an overloaded intrinsic as a prefix, then it may be ambiguious
+// and confusing as to which of the two a specific call refers to. Hence
+// disallow such cases.
+//
+// As an example, if `llvm.foo` is overloaded, it will appear as
+// `llvm.foo.<mangling_suffix>` in the IR. Now, if another intrinsic is called
+// `llvm.foo.bar`, it may be ambiguious as to whether `.bar` is a mangling
+// suffix for the overloaded `llvm.foo` or if its the `llvm.foo.bar` intrinsic.
+// Note though that `llvm.foobar` is OK. So the prefix check will check if
+// there name of the overloaded intrinsic is a prefix of another one and the
+// next letter after the prefix is a `.`.
+//
+// Also note that the `.bar` suffix in the example above may not be a valid
+// mangling suffix for `llvm.foo`, so we could check that and allow
+// `llvm.foo.bar` as there is no ambiguity. But LLVM's intrinsic name matching
+// does not support this (in llvm::Intrinsic::lookupLLVMIntrinsicByName).
+// However the ambiguity is still there, so we do not allow this case (i.e.,
+// the check is overly strict).
+void CodeGenIntrinsicTable::CheckOverloadConflicts() const {
+  // Collect all overloaded intrinsic names in a vector. `Intrinsics` are
+  // already mostly sorted by name (all target independent ones first, sorted by
+  // their name, followed by target specific ones, sorted by their name).
+  // However we would like to detect cases where an overloaded target
+  // independent one shares a prefix with a target depndent one. So we need to
+  // collect and resort all of them by name.
+  std::vector<const CodeGenIntrinsic *> OverloadedIntrinsics;
+  for (const CodeGenIntrinsic &Int : Intrinsics)
+    if (Int.isOverloaded)
+      OverloadedIntrinsics.push_back(&Int);
+
+  sort(OverloadedIntrinsics,
+       [](const CodeGenIntrinsic *Int1, const CodeGenIntrinsic *Int2) {
+         return Int1->Name < Int2->Name;
+       });
+
+  ArrayRef<const CodeGenIntrinsic *> AR = OverloadedIntrinsics;
+  auto ToString = [](const CodeGenIntrinsic *Int) -> const char * {
+    return Int->Name.c_str();
+  };
+
+  for (const CodeGenIntrinsic &Int : Intrinsics) {
+    int index =
+        lookupLLVMIntrinsicByName<CodeGenIntrinsic>(AR, Int.Name, ToString);
+    if (index == -1 || AR[index] == &Int)
+      continue;
+    const CodeGenIntrinsic &Overloaded = *AR[index];
+
+    // Allow only a single ".xxx" suffix after the matched name, if we know that
+    // the it likely won't match a mangling suffix.
+    StringRef Suffix =
+        StringRef(Int.Name).drop_front(Overloaded.Name.size() + 1);
+    bool IsSuffixOk = [&]() {
+      // Allow only a single "." separated token.
+      if (Suffix.find('.') != StringRef::npos)
+        return false;
+      // Do not allow if suffix can potentially match a mangling suffix.
+      // See getMangledTypeStr() for the mangling suffixes possible. It includes
+      //  pointer       : p[0-9]+
+      //  array         : a[0-9]+[.+]
+      //  struct:       : s_/sl_[.+]
+      //  function      : f_[.+]
+      //  vector        : v/nxv[0-9]+[.+]
+      //  target type   : t[.*]
+      //  integer       : i[0-9]+
+      //  named types   : See `NamedTypes` below.
+
+      // 1. Do not allow anything with characters other that [0-9A-Za-z]. That
+      // means really no _, so that eliminates functions and structs.
+      if (Suffix.find('_') != StringRef::npos)
+        return false;
+
+      // [a|v][0-9|$][.*] // $ is end of string.
+      if (is_contained("av", Suffix[0]) &&
+          (Suffix.size() == 1 || isDigit(Suffix[1])))
+        return false;
+      // nxv[0-9|$][.*]
+      if (Suffix.starts_with("nxv") &&
+          (Suffix.size() == 3 || isDigit(Suffix[3])))
+        return false;
+      // t[.*]
+      if (Suffix.starts_with('t'))
+        return false;
+      // [p|i][0-9]+
+      if ((Suffix[0] == 'i' || Suffix[0] == 'p') &&
+          all_of(Suffix.drop_front(), isDigit))
+        return false;
+      // Match one of the named types.
+      StringLiteral NamedTypes[] = {"isVoid",  "Metadata", "f16",  "f32",
+                                    "f64",     "f80",      "f128", "bf16",
+                                    "ppcf128", "x86amx"};
+      if (is_contained(NamedTypes, Suffix))
+        return false;
+      return true;
+    }();
+    if (IsSuffixOk)
+      continue;
+
+    PrintError(Int.TheDef->getLoc(),
+               Twine("Intrinsic `") + Int.Name + "` cannot share prefix `" +
+                   Overloaded.Name + "` with an overloaded intrinsic");
+    PrintFatalError(Overloaded.TheDef->getLoc(), "Overloaded intrinsic `" +
+                                                     Overloaded.Name +
+                                                     "` defined here");
+  }
+}
+
 CodeGenIntrinsic &CodeGenIntrinsicMap::operator[](const Record *Record) {
   if (!Record->isSubClassOf("Intrinsic"))
     PrintFatalError("Intrinsic defs should be subclass of 'Intrinsic' class");
diff --git a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.h b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.h
index 2df598da3f2507..10d43633366825 100644
--- a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.h
+++ b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.h
@@ -192,6 +192,7 @@ class CodeGenIntrinsicTable {
 
 private:
   void CheckDuplicateIntrinsics() const;
+  void CheckOverloadConflicts() const;
 };
 
 // This class builds `CodeGenIntrinsic` on demand for a given Def.



More information about the llvm-commits mailing list