[llvm] [AsmParser] Support calling intrinsics without mangling suffix (PR #89172)

via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 17 23:17:04 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-ir

Author: Nikita Popov (nikic)

<details>
<summary>Changes</summary>

This adds proper support for calling intrinsics without mangling suffix when parsing textual IR. This already worked (mostly by accident) when only a single mangling suffix was in use.

This patch extends support to the case where the intrinsic is used with multiple signatures, and as such multiple different intrinsic declarations have to be inserted. The final IR will have intrinsics with mangling suffix as usual.

Motivated by the discussion at:
https://discourse.llvm.org/t/recent-improvements-to-the-ir-parser/77366

---
Full diff: https://github.com/llvm/llvm-project/pull/89172.diff


7 Files Affected:

- (modified) llvm/include/llvm/IR/Intrinsics.h (+6-1) 
- (modified) llvm/lib/AsmParser/LLParser.cpp (+52-33) 
- (modified) llvm/lib/IR/Function.cpp (+9-6) 
- (modified) llvm/test/Assembler/implicit-intrinsic-declaration-invalid.ll (+4-5) 
- (added) llvm/test/Assembler/implicit-intrinsic-declaration-invalid2.ll (+11) 
- (added) llvm/test/Assembler/implicit-intrinsic-declaration-invalid3.ll (+9) 
- (modified) llvm/test/Assembler/implicit-intrinsic-declaration.ll (+16-10) 


``````````diff
diff --git a/llvm/include/llvm/IR/Intrinsics.h b/llvm/include/llvm/IR/Intrinsics.h
index 92eae344ce729e..340c1c326d0665 100644
--- a/llvm/include/llvm/IR/Intrinsics.h
+++ b/llvm/include/llvm/IR/Intrinsics.h
@@ -235,7 +235,12 @@ namespace Intrinsic {
   /// specified by the .td file. The overloaded types are pushed into the
   /// AgTys vector.
   ///
-  /// Returns false if the given function is not a valid intrinsic call.
+  /// Returns false if the given ID and function type combination is not a
+  /// valid intrinsic call.
+  bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT,
+                             SmallVectorImpl<Type *> &ArgTys);
+
+  /// Same as previous, but accepts a Function instead of ID and FunctionType.
   bool getIntrinsicSignature(Function *F, SmallVectorImpl<Type *> &ArgTys);
 
   // Checks if the intrinsic name matches with its signature and if not
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 63104129f8c2df..2902bd9fe17c48 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -326,6 +326,42 @@ bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) {
                      ForwardRefComdats.begin()->first + "'");
 
   for (const auto &[Name, Info] : make_early_inc_range(ForwardRefVals)) {
+    if (StringRef(Name).starts_with("llvm.")) {
+      Intrinsic::ID IID = Function::lookupIntrinsicID(Name);
+      if (IID == Intrinsic::not_intrinsic)
+        // Don't do anything for unknown intrinsics.
+        continue;
+
+      // Automatically create declarations for intrinsics. Intrinsics can only
+      // be called directly, so the call function type directly determines the
+      // declaration function type.
+      //
+      // Additionally, automatically add the required mangling suffix to the
+      // intrinsic name. This means that we may replace a single forward
+      // declaration with multiple functions here.
+      for (Use &U : make_early_inc_range(Info.first->uses())) {
+        auto *CB = dyn_cast<CallBase>(U.getUser());
+        if (!CB || !CB->isCallee(&U))
+          return error(Info.second, "intrinsic can only be used as callee");
+
+        SmallVector<Type *> OverloadTys;
+        if (!Intrinsic::getIntrinsicSignature(IID, CB->getFunctionType(),
+                                              OverloadTys))
+          return error(Info.second, "invalid intrinsic signature");
+
+        U.set(Intrinsic::getDeclaration(M, IID, OverloadTys));
+      }
+
+      Info.first->eraseFromParent();
+      ForwardRefVals.erase(Name);
+      continue;
+    }
+
+    // If incomplete IR is allowed, also add declarations for
+    // non-intrinsics.
+    if (!AllowIncompleteIR)
+      continue;
+
     auto GetCommonFunctionType = [](Value *V) -> FunctionType * {
       FunctionType *FTy = nullptr;
       for (Use &U : V->uses()) {
@@ -337,40 +373,23 @@ bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) {
       return FTy;
     };
 
-    auto GetDeclarationType = [&](StringRef Name, Value *V) -> Type * {
-      // Automatically create declarations for intrinsics. Intrinsics can only
-      // be called directly, so the call function type directly determines the
-      // declaration function type.
-      if (Name.starts_with("llvm."))
-        // Don't do anything if the intrinsic is called with different function
-        // types. This would result in a verifier error anyway.
-        return GetCommonFunctionType(V);
-
-      if (AllowIncompleteIR) {
-        // If incomplete IR is allowed, also add declarations for
-        // non-intrinsics. First check whether this global is only used in
-        // calls with the same type, in which case we'll insert a function.
-        if (auto *Ty = GetCommonFunctionType(V))
-          return Ty;
-
-        // Otherwise, fall back to using a dummy i8 type.
-        return Type::getInt8Ty(Context);
-      }
-      return nullptr;
-    };
+    // First check whether this global is only used in calls with the same
+    // type, in which case we'll insert a function. Otherwise, fall back to
+    // using a dummy i8 type.
+    Type *Ty = GetCommonFunctionType(Info.first);
+    if (!Ty)
+      Ty = Type::getInt8Ty(Context);
 
-    if (Type *Ty = GetDeclarationType(Name, Info.first)) {
-      GlobalValue *GV;
-      if (auto *FTy = dyn_cast<FunctionType>(Ty))
-        GV = Function::Create(FTy, GlobalValue::ExternalLinkage, Name, M);
-      else
-        GV = new GlobalVariable(*M, Ty, /*isConstant*/ false,
-                                GlobalValue::ExternalLinkage,
-                                /*Initializer*/ nullptr, Name);
-      Info.first->replaceAllUsesWith(GV);
-      Info.first->eraseFromParent();
-      ForwardRefVals.erase(Name);
-    }
+    GlobalValue *GV;
+    if (auto *FTy = dyn_cast<FunctionType>(Ty))
+      GV = Function::Create(FTy, GlobalValue::ExternalLinkage, Name, M);
+    else
+      GV = new GlobalVariable(*M, Ty, /*isConstant*/ false,
+                              GlobalValue::ExternalLinkage,
+                              /*Initializer*/ nullptr, Name);
+    Info.first->replaceAllUsesWith(GV);
+    Info.first->eraseFromParent();
+    ForwardRefVals.erase(Name);
   }
 
   if (!ForwardRefVals.empty())
diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp
index 818a167560de69..10cb6e75ffe69e 100644
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@@ -1742,9 +1742,8 @@ Intrinsic::matchIntrinsicVarArg(bool isVarArg,
   return true;
 }
 
-bool Intrinsic::getIntrinsicSignature(Function *F,
+bool Intrinsic::getIntrinsicSignature(Intrinsic::ID ID, FunctionType *FT,
                                       SmallVectorImpl<Type *> &ArgTys) {
-  Intrinsic::ID ID = F->getIntrinsicID();
   if (!ID)
     return false;
 
@@ -1752,17 +1751,21 @@ bool Intrinsic::getIntrinsicSignature(Function *F,
   getIntrinsicInfoTableEntries(ID, Table);
   ArrayRef<Intrinsic::IITDescriptor> TableRef = Table;
 
-  if (Intrinsic::matchIntrinsicSignature(F->getFunctionType(), TableRef,
-                                         ArgTys) !=
+  if (Intrinsic::matchIntrinsicSignature(FT, TableRef, ArgTys) !=
       Intrinsic::MatchIntrinsicTypesResult::MatchIntrinsicTypes_Match) {
     return false;
   }
-  if (Intrinsic::matchIntrinsicVarArg(F->getFunctionType()->isVarArg(),
-                                      TableRef))
+  if (Intrinsic::matchIntrinsicVarArg(FT->isVarArg(), TableRef))
     return false;
   return true;
 }
 
+bool Intrinsic::getIntrinsicSignature(Function *F,
+                                      SmallVectorImpl<Type *> &ArgTys) {
+  return getIntrinsicSignature(F->getIntrinsicID(), F->getFunctionType(),
+                               ArgTys);
+}
+
 std::optional<Function *> Intrinsic::remangleIntrinsicFunction(Function *F) {
   SmallVector<Type *, 4> ArgTys;
   if (!getIntrinsicSignature(F, ArgTys))
diff --git a/llvm/test/Assembler/implicit-intrinsic-declaration-invalid.ll b/llvm/test/Assembler/implicit-intrinsic-declaration-invalid.ll
index 0cb35b390337a0..63566a26dfa023 100644
--- a/llvm/test/Assembler/implicit-intrinsic-declaration-invalid.ll
+++ b/llvm/test/Assembler/implicit-intrinsic-declaration-invalid.ll
@@ -1,11 +1,10 @@
 ; RUN: not llvm-as < %s 2>&1 | FileCheck %s
 
-; Check that intrinsics do not get automatically declared if they are used
-; with different function types.
+; Use of intrinsic without mangling suffix and invalid signature should
+; be rejected.
 
-; CHECK: error: use of undefined value '@llvm.umax'
+; CHECK: error: invalid intrinsic signature
 define void @test() {
-  call i8 @llvm.umax(i8 0, i8 1)
-  call i16 @llvm.umax(i16 0, i16 1)
+  call i8 @llvm.umax(i8 0, i16 1)
   ret void
 }
diff --git a/llvm/test/Assembler/implicit-intrinsic-declaration-invalid2.ll b/llvm/test/Assembler/implicit-intrinsic-declaration-invalid2.ll
new file mode 100644
index 00000000000000..99a3b07fb25644
--- /dev/null
+++ b/llvm/test/Assembler/implicit-intrinsic-declaration-invalid2.ll
@@ -0,0 +1,11 @@
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+
+; Use of intrinsic as non-callee should be rejected.
+
+; CHECK: error: intrinsic can only be used as callee
+define void @test() {
+  call void @foo(ptr @llvm.umax)
+  ret void
+}
+
+declare void @foo(ptr)
diff --git a/llvm/test/Assembler/implicit-intrinsic-declaration-invalid3.ll b/llvm/test/Assembler/implicit-intrinsic-declaration-invalid3.ll
new file mode 100644
index 00000000000000..ad5a96a6d85195
--- /dev/null
+++ b/llvm/test/Assembler/implicit-intrinsic-declaration-invalid3.ll
@@ -0,0 +1,9 @@
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+
+; Use of unknown intrinsic without declaration should be rejected.
+
+; CHECK: error: use of undefined value '@llvm.foobar'
+define void @test() {
+  call i8 @llvm.foobar(i8 0, i16 1)
+  ret void
+}
diff --git a/llvm/test/Assembler/implicit-intrinsic-declaration.ll b/llvm/test/Assembler/implicit-intrinsic-declaration.ll
index 54ec63fc7f3da4..9021662bea4345 100644
--- a/llvm/test/Assembler/implicit-intrinsic-declaration.ll
+++ b/llvm/test/Assembler/implicit-intrinsic-declaration.ll
@@ -2,10 +2,10 @@
 ; RUN: opt -S < %s | FileCheck %s
 ; RUN: opt -S -passes=instcombine < %s | FileCheck %s --check-prefix=INSTCOMBINE
 
-; llvm.umax is intentionally missing the mangling suffix here, to show that
-; this works fine with auto-upgrade.
-define i16 @test(i8 %x, i8 %y) {
-; CHECK-LABEL: define i16 @test(
+; Two of the llvm.umax calls are intentionally missing the mangling suffix here,
+; to show that it will be added automatically.
+define i32 @test(i8 %x, i8 %y) {
+; CHECK-LABEL: define i32 @test(
 ; CHECK-SAME: i8 [[X:%.*]], i8 [[Y:%.*]]) {
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 [[X]], -1
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
@@ -13,21 +13,27 @@ define i16 @test(i8 %x, i8 %y) {
 ; CHECK-NEXT:    [[X_EXT:%.*]] = zext i8 [[X]] to i16
 ; CHECK-NEXT:    [[Y_EXT:%.*]] = zext i8 [[Y]] to i16
 ; CHECK-NEXT:    [[MAX2:%.*]] = call i16 @llvm.umax.i16(i16 [[X_EXT]], i16 [[Y_EXT]])
-; CHECK-NEXT:    ret i16 [[MAX2]]
+; CHECK-NEXT:    [[X_EXT2:%.*]] = zext i8 [[X]] to i32
+; CHECK-NEXT:    [[Y_EXT2:%.*]] = zext i8 [[Y]] to i32
+; CHECK-NEXT:    [[MAX3:%.*]] = call i32 @llvm.umax.i32(i32 [[X_EXT2]], i32 [[Y_EXT2]])
+; CHECK-NEXT:    ret i32 [[MAX3]]
 ;
-; INSTCOMBINE-LABEL: define i16 @test(
+; INSTCOMBINE-LABEL: define i32 @test(
 ; INSTCOMBINE-SAME: i8 [[X:%.*]], i8 [[Y:%.*]]) {
 ; INSTCOMBINE-NEXT:    [[CMP:%.*]] = icmp sgt i8 [[X]], -1
 ; INSTCOMBINE-NEXT:    call void @llvm.assume(i1 [[CMP]])
 ; INSTCOMBINE-NEXT:    [[TMP1:%.*]] = call i8 @llvm.umax.i8(i8 [[X]], i8 [[Y]])
-; INSTCOMBINE-NEXT:    [[MAX2:%.*]] = zext i8 [[TMP1]] to i16
-; INSTCOMBINE-NEXT:    ret i16 [[MAX2]]
+; INSTCOMBINE-NEXT:    [[MAX3:%.*]] = zext i8 [[TMP1]] to i32
+; INSTCOMBINE-NEXT:    ret i32 [[MAX3]]
 ;
   %cmp = icmp sgt i8 %x, -1
   call void @llvm.assume(i1 %cmp)
   %max1 = call i8 @llvm.umax(i8 %x, i8 %y)
   %x.ext = zext i8 %x to i16
   %y.ext = zext i8 %y to i16
-  %max2 = call i16 @llvm.umax.i16(i16 %x.ext, i16 %y.ext)
-  ret i16 %max2
+  %max2 = call i16 @llvm.umax(i16 %x.ext, i16 %y.ext)
+  %x.ext2 = zext i8 %x to i32
+  %y.ext2 = zext i8 %y to i32
+  %max3 = call i32 @llvm.umax.i32(i32 %x.ext2, i32 %y.ext2)
+  ret i32 %max3
 }

``````````

</details>


https://github.com/llvm/llvm-project/pull/89172


More information about the llvm-commits mailing list