[clang] 0f0e31c - Update inline builtin handling to honor gnu inline attribute

via cfe-commits cfe-commits at lists.llvm.org
Mon Oct 4 13:26:39 PDT 2021


Author: serge-sans-paille
Date: 2021-10-04T22:26:25+02:00
New Revision: 0f0e31cf511def3e92244e615b2646c1fd0df0cd

URL: https://github.com/llvm/llvm-project/commit/0f0e31cf511def3e92244e615b2646c1fd0df0cd
DIFF: https://github.com/llvm/llvm-project/commit/0f0e31cf511def3e92244e615b2646c1fd0df0cd.diff

LOG: Update inline builtin handling to honor gnu inline attribute

Per the GCC info page:

    If the function is declared 'extern', then this definition of the
    function is used only for inlining.  In no case is the function
    compiled as a standalone function, not even if you take its address
    explicitly.  Such an address becomes an external reference, as if
    you had only declared the function, and had not defined it.

Respect that behavior for inline builtins: keep the original definition, and
generate a copy of the declaration suffixed by '.inline' that's only referenced
in direct call.

This fixes holes in c3717b6858d32d64514a187ede1a77be8ba4e542.

Differential Revision: https://reviews.llvm.org/D111009

Added: 
    clang/test/CodeGen/memcmp-inline-builtin-to-asm.c
    clang/test/CodeGen/memcpy-inline-builtin-no-extern.c

Modified: 
    clang/lib/AST/Decl.cpp
    clang/lib/CodeGen/CGExpr.cpp
    clang/lib/CodeGen/CodeGenFunction.cpp
    clang/test/CodeGen/memcpy-inline-builtin.c
    clang/test/CodeGen/memcpy-nobuiltin.c
    clang/test/CodeGen/memcpy-nobuiltin.inc
    clang/test/CodeGen/pr9614.c

Removed: 
    


################################################################################
diff  --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp
index 60ca8633224b..6788f2132fbc 100644
--- a/clang/lib/AST/Decl.cpp
+++ b/clang/lib/AST/Decl.cpp
@@ -3177,7 +3177,8 @@ bool FunctionDecl::isInlineBuiltinDeclaration() const {
 
   const FunctionDecl *Definition;
   return hasBody(Definition) && Definition->isInlineSpecified() &&
-         Definition->hasAttr<AlwaysInlineAttr>();
+         Definition->hasAttr<AlwaysInlineAttr>() &&
+         Definition->hasAttr<GNUInlineAttr>();
 }
 
 bool FunctionDecl::isDestroyingOperatorDelete() const {

diff  --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index a62a4d60830d..6dd511acf105 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -4891,12 +4891,28 @@ static CGCallee EmitDirectCallee(CodeGenFunction &CGF, GlobalDecl GD) {
   const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
 
   if (auto builtinID = FD->getBuiltinID()) {
-    // Replaceable builtin provide their own implementation of a builtin. Unless
-    // we are in the builtin implementation itself, don't call the actual
-    // builtin. If we are in the builtin implementation, avoid trivial infinite
+    std::string FDInlineName = (FD->getName() + ".inline").str();
+    // When directing calling an inline builtin, call it through it's mangled
+    // name to make it clear it's not the actual builtin.
+    if (FD->isInlineBuiltinDeclaration() &&
+        CGF.CurFn->getName() != FDInlineName) {
+      llvm::Constant *CalleePtr = EmitFunctionDeclPointer(CGF.CGM, GD);
+      llvm::Function *Fn = llvm::cast<llvm::Function>(CalleePtr);
+      llvm::Module *M = Fn->getParent();
+      llvm::Function *Clone = M->getFunction(FDInlineName);
+      if (!Clone) {
+        Clone = llvm::Function::Create(Fn->getFunctionType(),
+                                       llvm::GlobalValue::InternalLinkage,
+                                       Fn->getAddressSpace(), FDInlineName, M);
+        Clone->addFnAttr(llvm::Attribute::AlwaysInline);
+      }
+      return CGCallee::forDirect(Clone, GD);
+    }
+
+    // Replaceable builtins provide their own implementation of a builtin. If we
+    // are in an inline builtin implementation, avoid trivial infinite
     // recursion.
-    if (!FD->isInlineBuiltinDeclaration() ||
-        CGF.CurFn->getName() == FD->getName())
+    else
       return CGCallee::forBuiltin(builtinID, FD);
   }
 
@@ -4905,6 +4921,7 @@ static CGCallee EmitDirectCallee(CodeGenFunction &CGF, GlobalDecl GD) {
       FD->hasAttr<CUDAGlobalAttr>())
     CalleePtr = CGF.CGM.getCUDARuntime().getKernelStub(
         cast<llvm::GlobalValue>(CalleePtr->stripPointerCasts()));
+
   return CGCallee::forDirect(CalleePtr, GD);
 }
 

diff  --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index da9a5303daad..13bc366b6c2f 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -45,6 +45,7 @@
 #include "llvm/Support/CRC.h"
 #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
+
 using namespace clang;
 using namespace CodeGen;
 
@@ -1294,10 +1295,22 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
   FunctionArgList Args;
   QualType ResTy = BuildFunctionArgList(GD, Args);
 
-  // Give a 
diff erent name to inline builtin to avoid conflict with actual
-  // builtins.
-  if (FD->isInlineBuiltinDeclaration() && Fn)
-    Fn->setName(Fn->getName() + ".inline");
+  // When generating code for a builtin with an inline declaration, use a
+  // mangled name to hold the actual body, while keeping an external definition
+  // in case the function pointer is referenced somewhere.
+  if (FD->isInlineBuiltinDeclaration() && Fn) {
+    std::string FDInlineName = (Fn->getName() + ".inline").str();
+    llvm::Module *M = Fn->getParent();
+    llvm::Function *Clone = M->getFunction(FDInlineName);
+    if (!Clone) {
+      Clone = llvm::Function::Create(Fn->getFunctionType(),
+                                     llvm::GlobalValue::InternalLinkage,
+                                     Fn->getAddressSpace(), FDInlineName, M);
+      Clone->addFnAttr(llvm::Attribute::AlwaysInline);
+    }
+    Fn->setLinkage(llvm::GlobalValue::ExternalLinkage);
+    Fn = Clone;
+  }
 
   // Check if we should generate debug info for this function.
   if (FD->hasAttr<NoDebugAttr>()) {

diff  --git a/clang/test/CodeGen/memcmp-inline-builtin-to-asm.c b/clang/test/CodeGen/memcmp-inline-builtin-to-asm.c
new file mode 100644
index 000000000000..7b90f03d45f2
--- /dev/null
+++ b/clang/test/CodeGen/memcmp-inline-builtin-to-asm.c
@@ -0,0 +1,36 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+
+// RUN: %clang_cc1 -triple x86_64 -S -emit-llvm -o - %s | opt -S -verify | FileCheck %s
+//
+// Verifies that clang detects memcmp inline version and uses it instead of the builtin.
+
+typedef unsigned long size_t;
+
+// Clang requires these attributes for a function to be redefined.
+#define AVAILABLE_EXTERNALLY extern inline __attribute__((always_inline)) __attribute__((gnu_inline))
+
+const void *con_unify_unimap_p1;
+
+AVAILABLE_EXTERNALLY
+int memcmp(const void *p, const void *q, unsigned long size) {
+  return __builtin_memcmp(p, q, size);
+}
+
+// CHECK-LABEL: @con_unify_unimap_q1(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[P_ADDR_I:%.*]] = alloca i8*, align 8
+// CHECK-NEXT:    [[Q_ADDR_I:%.*]] = alloca i8*, align 8
+// CHECK-NEXT:    [[SIZE_ADDR_I:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load i8*, i8** @con_unify_unimap_p1, align 8
+// CHECK-NEXT:    store i8* [[TMP0]], i8** [[P_ADDR_I]], align 8
+// CHECK-NEXT:    store i8* bitcast (i32 ()* @con_unify_unimap_q1 to i8*), i8** [[Q_ADDR_I]], align 8
+// CHECK-NEXT:    store i64 4, i64* [[SIZE_ADDR_I]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[P_ADDR_I]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[Q_ADDR_I]], align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = load i64, i64* [[SIZE_ADDR_I]], align 8
+// CHECK-NEXT:    [[CALL_I:%.*]] = call i32 @memcmp(i8* [[TMP1]], i8* [[TMP2]], i64 [[TMP3]]) #[[ATTR3:[0-9]+]]
+// CHECK-NEXT:    ret i32 [[CALL_I]]
+//
+int con_unify_unimap_q1(void) {
+  return memcmp(con_unify_unimap_p1, con_unify_unimap_q1, sizeof(int));
+}

diff  --git a/clang/test/CodeGen/memcpy-inline-builtin-no-extern.c b/clang/test/CodeGen/memcpy-inline-builtin-no-extern.c
new file mode 100644
index 000000000000..bb46e48f847b
--- /dev/null
+++ b/clang/test/CodeGen/memcpy-inline-builtin-no-extern.c
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -triple x86_64 -S -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
+//
+// Verifies that clang-generated *.inline are flagged as internal.
+
+typedef unsigned long size_t;
+
+#define AVAILABLE_EXTERNALLY extern inline __attribute__((always_inline)) __attribute__((gnu_inline))
+// Clang recognizes an inline builtin and renames it to memcmp.inline to prevent conflict with builtins.
+AVAILABLE_EXTERNALLY int memcmp(const void *a, const void *b, size_t c) {
+  return __builtin_memcmp(a, b, c);
+}
+
+// CHECK: internal{{.*}}memcmp.inline
+int bar(const void *a, const void *b, size_t c) {
+  return memcmp(a, b, c);
+}
+
+// Note that extern has been omitted here.
+#define TRIPLE_INLINE inline __attribute__((always_inline)) __attribute__((gnu_inline))
+
+// Clang recognizes an inline builtin and renames it to memcpy.inline to prevent conflict with builtins.
+TRIPLE_INLINE void *memcpy(void *a, const void *b, size_t c) {
+  return __builtin_memcpy(a, b, c);
+}
+
+// CHECK: internal{{.*}}memcpy.inline
+void *foo(void *a, const void *b, size_t c) {
+  return memcpy(a, b, c);
+}

diff  --git a/clang/test/CodeGen/memcpy-inline-builtin.c b/clang/test/CodeGen/memcpy-inline-builtin.c
index 814ce220195f..e63e77c8ba45 100644
--- a/clang/test/CodeGen/memcpy-inline-builtin.c
+++ b/clang/test/CodeGen/memcpy-inline-builtin.c
@@ -32,13 +32,39 @@ AVAILABLE_EXTERNALLY void *memcpy(void *a, const void *b, size_t c) {
 // CHECK-NEXT:    store i8* [[TMP0]], i8** [[A_ADDR_I]], align 8
 // CHECK-NEXT:    store i8* [[TMP1]], i8** [[B_ADDR_I]], align 8
 // CHECK-NEXT:    store i64 [[TMP2]], i64* [[C_ADDR_I]], align 8
-// CHECK-NEXT:    call void asm sideeffect "# memcpy.inline marker", "~{dirflag},~{fpsr},~{flags}"() #[[ATTR2:[0-9]+]], !srcloc !2
+// CHECK-NEXT:    call void asm sideeffect "# memcpy.inline marker", "~{dirflag},~{fpsr},~{flags}"() #[[ATTR4:[0-9]+]], !srcloc !2
 // CHECK-NEXT:    [[TMP3:%.*]] = load i8*, i8** [[A_ADDR_I]], align 8
 // CHECK-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[B_ADDR_I]], align 8
 // CHECK-NEXT:    [[TMP5:%.*]] = load i64, i64* [[C_ADDR_I]], align 8
-// CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[TMP3]], i8* align 1 [[TMP4]], i64 [[TMP5]], i1 false) #[[ATTR2]]
+// CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[TMP3]], i8* align 1 [[TMP4]], i64 [[TMP5]], i1 false) #[[ATTR4]]
+// CHECK-NEXT:    ret i8* [[TMP3]]
+//
+void *foo(void *a, const void *b, size_t c) {
+  return memcpy(a, b, c);
+}
+
+// CHECK-LABEL: @bar(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-NEXT:    [[C_ADDR:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    [[CPY:%.*]] = alloca i8* (i8*, i8*, i64)*, align 8
+// CHECK-NEXT:    store i8* [[A:%.*]], i8** [[A_ADDR]], align 8
+// CHECK-NEXT:    store i8* [[B:%.*]], i8** [[B_ADDR]], align 8
+// CHECK-NEXT:    store i64 [[C:%.*]], i64* [[C_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load i64, i64* [[C_ADDR]], align 8
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[TMP0]], 10
+// CHECK-NEXT:    [[TMP1:%.*]] = zext i1 [[CMP]] to i64
+// CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i8* (i8*, i8*, i64)* @memcpy, i8* (i8*, i8*, i64)* @foo
+// CHECK-NEXT:    store i8* (i8*, i8*, i64)* [[COND]], i8* (i8*, i8*, i64)** [[CPY]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = load i8* (i8*, i8*, i64)*, i8* (i8*, i8*, i64)** [[CPY]], align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = load i8*, i8** [[A_ADDR]], align 8
+// CHECK-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[B_ADDR]], align 8
+// CHECK-NEXT:    [[TMP5:%.*]] = load i64, i64* [[C_ADDR]], align 8
+// CHECK-NEXT:    [[CALL:%.*]] = call i8* [[TMP2]](i8* [[TMP3]], i8* [[TMP4]], i64 [[TMP5]])
 // CHECK-NEXT:    ret void
 //
-void foo(void *a, const void *b, size_t c) {
-  memcpy(a, b, c);
+void bar(void *a, const void *b, size_t c) {
+  void *(*cpy)(void *, const void *, size_t) = c > 10 ? memcpy : foo;
+  cpy(a, b, c);
 }

diff  --git a/clang/test/CodeGen/memcpy-nobuiltin.c b/clang/test/CodeGen/memcpy-nobuiltin.c
index d752df8ba0eb..fe93382393ab 100644
--- a/clang/test/CodeGen/memcpy-nobuiltin.c
+++ b/clang/test/CodeGen/memcpy-nobuiltin.c
@@ -1,11 +1,11 @@
 // RUN: %clang_cc1 -verify -S -emit-llvm -o- %s -isystem %S -DWITH_DECL | FileCheck --check-prefix=CHECK-WITH-DECL %s
 // RUN: %clang_cc1 -verify -S -emit-llvm -o- %s -isystem %S -UWITH_DECL | FileCheck --check-prefix=CHECK-NO-DECL %s
-// RUN: %clang_cc1 -verify -S -emit-llvm -o- %s -isystem %S -DWITH_SELF_REFERENCE_DECL | FileCheck --check-prefix=CHECK-SELF-REF-DECL %s
+// RUN: %clang_cc1 -verify -S -emit-llvm -o- %s -isystem %S -disable-llvm-passes -DWITH_SELF_REFERENCE_DECL | FileCheck --check-prefix=CHECK-SELF-REF-DECL %s
 //
 // CHECK-WITH-DECL-NOT: @llvm.memcpy
 // CHECK-NO-DECL: @llvm.memcpy
 // CHECK-SELF-REF-DECL-LABEL: define {{.*}}i8* @memcpy.inline
-// CHECK-SELF-REF-DECL:       @memcpy(
+// CHECK-SELF-REF-DECL:       @llvm.memcpy.{{.*}}(
 //
 #include <memcpy-nobuiltin.inc>
 void test(void *dest, void const *from, size_t n) {

diff  --git a/clang/test/CodeGen/memcpy-nobuiltin.inc b/clang/test/CodeGen/memcpy-nobuiltin.inc
index d1d034c12899..06ecb108d93c 100644
--- a/clang/test/CodeGen/memcpy-nobuiltin.inc
+++ b/clang/test/CodeGen/memcpy-nobuiltin.inc
@@ -2,7 +2,7 @@
 extern void *memcpy(void *dest, void const *from, size_t n);
 
 #ifdef WITH_DECL
-inline __attribute__((always_inline)) void *memcpy(void *dest, void const *from, size_t n) {
+inline __attribute__((always_inline)) __attribute__((gnu_inline)) void *memcpy(void *dest, void const *from, size_t n) {
   char const *ifrom = from;
   char *idest = dest;
   while (n--)
@@ -11,7 +11,7 @@ inline __attribute__((always_inline)) void *memcpy(void *dest, void const *from,
 }
 #endif
 #ifdef WITH_SELF_REFERENCE_DECL
-inline __attribute__((always_inline)) void *memcpy(void *dest, void const *from, size_t n) {
+inline __attribute__((always_inline)) __attribute__((gnu_inline)) void *memcpy(void *dest, void const *from, size_t n) {
   if (n != 0)
     memcpy(dest, from, n);
   return dest;

diff  --git a/clang/test/CodeGen/pr9614.c b/clang/test/CodeGen/pr9614.c
index 3674ba844aa1..c153283a8377 100644
--- a/clang/test/CodeGen/pr9614.c
+++ b/clang/test/CodeGen/pr9614.c
@@ -32,14 +32,14 @@ void f(void) {
 
 // CHECK-LABEL: define{{.*}} void @f()
 // CHECK: call void @foo()
-// CHECK: call i32 @abs(i32 %0)
+// CHECK: call i32 @abs(i32 0)
 // CHECK: call i8* @strrchr(
 // CHECK: call void @llvm.prefetch.p0i8(
 // CHECK: call i8* @memchr(
 // CHECK: ret void
 
 // CHECK: declare void @foo()
+// CHECK: declare i32 @abs(i32
 // CHECK: declare i8* @strrchr(i8*, i32)
 // CHECK: declare i8* @memchr(
-// CHECK: declare i32 @abs(i32
 // CHECK: declare void @llvm.prefetch.p0i8(


        


More information about the cfe-commits mailing list