[clang] [clang] Add __builtin_start_object_lifetime builtin. (PR #82776)

Haojian Wu via cfe-commits cfe-commits at lists.llvm.org
Tue Apr 2 04:18:03 PDT 2024


https://github.com/hokein updated https://github.com/llvm/llvm-project/pull/82776

>From 7fcd58b750872221aa754e81e17ab9068e144a44 Mon Sep 17 00:00:00 2001
From: Haojian Wu <hokein.wu at gmail.com>
Date: Fri, 23 Feb 2024 10:03:16 +0100
Subject: [PATCH 1/2] [clang] Add __builtin_start_object_lifetime builtin.

This patch implements a clang built `__builtin_start_object_lifetime`,
it has the same semantics as C++23's `std::start_lifetime_as`, but
without the implicit-lifetime type restriction, it can be used for
implementing `std::start_lifetime_as` in the future.

Due to the current clang lowering, the builtin reuses the existing `__builtin_launder` implementation:
- it is a no-op for the most part;
- with `-fstrict-vtable-pointers` flag, we update the vtpr assumption correctly
  (mark the load/store vptr with appropriate invariant group intrinsics)
  to prevent incorrect vptr load folding;
- for now, the builtin is non-constant, cannot be executed in constant evaluation;

CAVEAT:
- this builtin may cause TBAA miscomplies without the `-fno-strict-alias`
  flag. These TBAA miscompiles are known issues and may need more LLVM
  IR support for the fix, fixing them is orthogonal to the implementaton of the
  builtin.

Context: https://discourse.llvm.org/t/extension-for-creating-objects-via-memcpy/76961
---
 clang/include/clang/Basic/Builtins.td         |  6 +++
 clang/lib/CodeGen/CGBuiltin.cpp               |  1 +
 clang/lib/Sema/SemaChecking.cpp               |  2 +
 clang/test/CodeGen/builtins.c                 | 10 ++++
 .../builtin-start-object-life-time.cpp        | 49 +++++++++++++++++++
 clang/test/SemaCXX/builtins.cpp               | 33 ++++++++++++-
 6 files changed, 100 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/CodeGenCXX/builtin-start-object-life-time.cpp

diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index f421223ff087de..2c2e0eb58b15a1 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -926,6 +926,12 @@ def Launder : Builtin {
   let Prototype = "void*(void*)";
 }
 
+def StartObjectLifeTime : Builtin {
+  let Spellings = ["__builtin_start_object_lifetime"];
+  let Attributes = [NoThrow, CustomTypeChecking];
+  let Prototype = "void*(void*)";
+}
+
 def IsConstantEvaluated : LangBuiltin<"CXX_LANG"> {
   let Spellings = ["__builtin_is_constant_evaluated"];
   let Attributes = [NoThrow, Constexpr];
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 483f9c26859923..6cdb602f8bb07d 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -4509,6 +4509,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
 
     return RValue::get(nullptr);
   }
+  case Builtin::BI__builtin_start_object_lifetime:
   case Builtin::BI__builtin_launder: {
     const Expr *Arg = E->getArg(0);
     QualType ArgTy = Arg->getType()->getPointeeType();
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 11401b6f56c0ea..356765609f694b 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -37,6 +37,7 @@
 #include "clang/AST/TypeLoc.h"
 #include "clang/AST/UnresolvedSet.h"
 #include "clang/Basic/AddressSpaces.h"
+#include "clang/Basic/Builtins.h"
 #include "clang/Basic/CharInfo.h"
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/IdentifierTable.h"
@@ -2642,6 +2643,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
     TheCall->setType(Context.IntTy);
     break;
   }
+  case Builtin::BI__builtin_start_object_lifetime:
   case Builtin::BI__builtin_launder:
     return SemaBuiltinLaunder(*this, TheCall);
   case Builtin::BI__sync_fetch_and_add:
diff --git a/clang/test/CodeGen/builtins.c b/clang/test/CodeGen/builtins.c
index 407e0857d22311..00c81c23d0ed02 100644
--- a/clang/test/CodeGen/builtins.c
+++ b/clang/test/CodeGen/builtins.c
@@ -143,6 +143,7 @@ int main(void) {
   P(signbit, (1.0));
 
   R(launder, (&N));
+  R(start_object_lifetime, (&N));
 
   return 0;
 }
@@ -511,6 +512,15 @@ void test_builtin_launder(int *p) {
   int *d = __builtin_launder(p);
 }
 
+/// It should be a NOP in C since there are no vtables.
+// CHECK-LABEL: define{{.*}} void @test_builtin_start_object_lifetime
+void test_builtin_start_object_lifetime(int *p) {
+  // CHECK: [[TMP:%.*]] = load ptr,
+  // CHECK-NOT: @llvm.launder
+  // CHECK: store ptr [[TMP]],
+  int *d = __builtin_start_object_lifetime(p);
+}
+
 // __warn_memset_zero_len should be NOP, see https://sourceware.org/bugzilla/show_bug.cgi?id=25399
 // CHECK-LABEL: define{{.*}} void @test___warn_memset_zero_len
 void test___warn_memset_zero_len(void) {
diff --git a/clang/test/CodeGenCXX/builtin-start-object-life-time.cpp b/clang/test/CodeGenCXX/builtin-start-object-life-time.cpp
new file mode 100644
index 00000000000000..58012f52cc0ef5
--- /dev/null
+++ b/clang/test/CodeGenCXX/builtin-start-object-life-time.cpp
@@ -0,0 +1,49 @@
+// RUN: %clang_cc1 -triple=x86_64-linux-gnu -emit-llvm -fstrict-vtable-pointers -o - %s \
+// RUN: | FileCheck --check-prefixes=CHECK,CHECK-STRICT %s
+// RUN: %clang_cc1 -triple=x86_64-linux-gnu -emit-llvm -o - %s \
+// RUN: | FileCheck --check-prefixes=CHECK,CHECK-NONSTRICT %s
+
+struct TestVirtualFn {
+  virtual void foo();
+};
+// CHECK-LABEL: define{{.*}} void @test_dynamic_class
+extern "C" void test_dynamic_class(TestVirtualFn *p) {
+  // CHECK: store ptr %p, ptr %p.addr
+  // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr %p.addr
+
+  // CHECK-NONSTRICT-NEXT: store ptr [[TMP0]], ptr %d
+
+  // CHECK-STRICT-NEXT: [[TMP2:%.*]] = call ptr @llvm.launder.invariant.group.p0(ptr [[TMP0]])
+  // CHECK-STRICT-NEXT: store ptr [[TMP2]], ptr %d
+
+  // CHECK-NEXT: ret void
+  TestVirtualFn *d = __builtin_start_object_lifetime(p);
+}
+
+// CHECK-LABEL: define{{.*}} void @test_scalar_pointer
+extern "C" void test_scalar_pointer(int *p) {
+  // CHECK: entry
+  // CHECK-NEXT: %p.addr = alloca ptr
+  // CHECK-NEXT: %d = alloca ptr
+  // CHECK-NEXT: store ptr %p, ptr %p.addr, align 8
+  // CHECK-NEXT: [[TMP:%.*]] = load ptr, ptr %p.addr
+  // CHECK-NEXT: store ptr [[TMP]], ptr %d
+  // CHECK-NEXT: ret void
+  int *d = __builtin_start_object_lifetime(p);
+}
+
+struct TestNoInvariant {
+  int x;
+};
+// CHECK-LABEL: define{{.*}} void @test_non_dynamic_class
+extern "C" void test_non_dynamic_class(TestNoInvariant *p) {
+  // CHECK: entry
+  // CHECK-NOT: llvm.launder.invariant.group
+  // CHECK-NEXT: %p.addr = alloca ptr, align 8
+  // CHECK-NEXT: %d = alloca ptr
+  // CHECK-NEXT: store ptr %p, ptr %p.addr
+  // CHECK-NEXT: [[TMP:%.*]] = load ptr, ptr %p.addr
+  // CHECK-NEXT: store ptr [[TMP]], ptr %d
+  // CHECK-NEXT: ret void
+  TestNoInvariant *d = __builtin_start_object_lifetime(p);
+}
diff --git a/clang/test/SemaCXX/builtins.cpp b/clang/test/SemaCXX/builtins.cpp
index 567094c94c171b..4334b5bbf63663 100644
--- a/clang/test/SemaCXX/builtins.cpp
+++ b/clang/test/SemaCXX/builtins.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -fsyntax-only -verify -std=c++11 -fcxx-exceptions
+// RUN: %clang_cc1 %s -fsyntax-only -verify -DCXX11 -std=c++11 -fcxx-exceptions
 // RUN: %clang_cc1 %s -fsyntax-only -verify -std=c++1z -fcxx-exceptions
 typedef const struct __CFString * CFStringRef;
 #define CFSTR __builtin___CFStringMakeConstantString
@@ -156,6 +156,37 @@ void test_noexcept(int *i) {
 #undef TEST_TYPE
 } // end namespace test_launder
 
+namespace test_start_object_lifetime {
+// The builtin is non-constant.
+constexpr int test_non_constexpr(int i) { // expected-error {{constexpr function never produces a constant expression}}
+  __builtin_start_object_lifetime(&i); // expected-note {{subexpression not valid in a constant expression}}
+#ifdef CXX11
+  // expected-warning at -2 {{use of this statement in a constexpr function is a C++14 extension}}
+#endif
+  return 0;
+}
+
+struct Incomplete; // expected-note {{forward declaration}}
+void test_incomplete(Incomplete *i) {
+   // Requires a complete type
+   __builtin_start_object_lifetime(i); // expected-error {{incomplete type 'Incomplete' where a complete type is required}}
+}
+
+// The builtin is type-generic.
+#define TEST_TYPE(Ptr, Type) \
+  static_assert(__is_same(decltype(__builtin_launder(Ptr)), Type), "expected same type")
+void test_type_generic() {
+  char * p;
+  int * i;
+  TEST_TYPE(p, char*);
+  TEST_TYPE(i, int*);
+}
+// The builtin is noexcept.
+void test_noexcept(int *i) {
+  static_assert(noexcept(__builtin_start_object_lifetime(i)), "");
+}
+}
+
 template<typename T> void test_builtin_complex(T v, double d) {
   (void)__builtin_complex(v, d); // expected-error {{different types}} expected-error {{not a real floating}}
   (void)__builtin_complex(d, v); // expected-error {{different types}} expected-error {{not a real floating}}

>From 68b26fe29e3bbc4e6bf23dac5c712777bff11a31 Mon Sep 17 00:00:00 2001
From: Haojian Wu <hokein.wu at gmail.com>
Date: Mon, 1 Apr 2024 22:26:17 +0200
Subject: [PATCH 2/2] - address review comments - add doc for the builtin in
 LanguageExtensions.rst - adjust the existing diagnostics for the new builtin
 - add release note

---
 clang/docs/LanguageExtensions.rst             | 48 +++++++++++++++++++
 clang/docs/ReleaseNotes.rst                   |  3 ++
 clang/include/clang/Basic/Builtins.td         |  2 +-
 .../clang/Basic/DiagnosticSemaKinds.td        |  4 +-
 clang/lib/CodeGen/CGBuiltin.cpp               |  4 ++
 clang/lib/Sema/SemaChecking.cpp               | 16 +++++--
 clang/test/SemaCXX/builtins.cpp               |  7 ++-
 7 files changed, 75 insertions(+), 9 deletions(-)

diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index 7b23e4d1c2f30c..b16fe25c2ccff6 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -2523,6 +2523,54 @@ implemented directly in terms of :ref:`extended vector support
 <langext-vectors>` instead of builtins, in order to reduce the number of
 builtins that we need to implement.
 
+``__builtin_start_object_lifetime``
+-----------------------------------
+
+The builtin is used to instruct compiler to explicitly create an object in-place
+and start the object lifetime without running any initialisation code.
+
+**Syntax**:
+
+.. code-block:: c++
+
+  T* __builtin_start_object_lifetime(T* p)
+
+
+**Example of Use***:
+
+.. code-block:: c++
+
+  struct Foo {};
+
+  // [buffer, buffer+sizeof(Foo)) is a memory region whose bytes represent a
+  // valid object representation of type Foo.
+  Foo* make_foo(char* buffer) {
+    return __builtin_start_object_lifetime(reinterpret_cast<Foo*>(buffer));
+  }
+
+**Description**:
+
+This builtin creates an object at the given memory location and start
+the lifetime of the object without running any constructor code. It returns a
+pointer to the same memory that the parameter `p` points to, and the returned
+result can be legitimately used to access the object `T`.
+
+It can be used to implement C++23's `std::start_lifetime_as` API.
+Unlike the `std::start_lifetime_as` which only works for implicit-lifetime
+types. This builtin doens't have this restriction, it can apply to
+non-implicit-lifetime types.
+
+This builtin is a no-op barrier operation taken by the compiler to address object
+value propagation analysis in an opaque manner appropriately, e.g. suppressing
+certain optimizations.
+
+This builtin cannot be called in a ``constexpr`` context.
+
+NOTE: this builtin is considered experimental at this time. It is known that it
+can cause TBAA miscompile issues when using with `-fstrict-aliasing` flag (which
+is on by default). Until we fix all TBAA issues (which requires more LLVM IR
+support), we suggest to use it with `-fno-strict-aliasing`.
+
 ``__builtin_alloca``
 --------------------
 
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 76eaf0bf11c303..2e83d6ea565dd8 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -193,6 +193,9 @@ Non-comprehensive list of changes in this release
   with support for any unsigned integer type. Like the previous builtins, these
   new builtins are constexpr and may be used in constant expressions.
 
+- Added ``__builtin_start_object_lifetime`` for creating object in-place and
+  starting object lifetime without running any initialisation code.
+
 New Compiler Flags
 ------------------
 
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 2c2e0eb58b15a1..9c9c9b85c73cb7 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -926,7 +926,7 @@ def Launder : Builtin {
   let Prototype = "void*(void*)";
 }
 
-def StartObjectLifeTime : Builtin {
+def StartObjectLifetime : Builtin {
   let Spellings = ["__builtin_start_object_lifetime"];
   let Attributes = [NoThrow, CustomTypeChecking];
   let Prototype = "void*(void*)";
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index df57f5e6ce11ba..d53b690ceabd63 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -12013,9 +12013,9 @@ def warn_noderef_on_non_pointer_or_array : Warning<
 def warn_noderef_to_dereferenceable_pointer : Warning<
   "casting to dereferenceable pointer removes 'noderef' attribute">, InGroup<NoDeref>;
 
-def err_builtin_launder_invalid_arg : Error<
+def err_builtin_launder_or_start_object_lifetime_invalid_arg : Error<
   "%select{non-pointer|function pointer|void pointer}0 argument to "
-  "'__builtin_launder' is not allowed">;
+  "'%select{__builtin_launder|__builtin_start_object_lifetime}1' is not allowed">;
 
 def err_builtin_invalid_arg_type: Error <
   "%ordinal0 argument must be "
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 6cdb602f8bb07d..017ed4e094be6b 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -4510,10 +4510,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
     return RValue::get(nullptr);
   }
   case Builtin::BI__builtin_start_object_lifetime:
+    // FIXME: we need some TBAA fences to prevent strict-aliasing miscompiles.
   case Builtin::BI__builtin_launder: {
     const Expr *Arg = E->getArg(0);
     QualType ArgTy = Arg->getType()->getPointeeType();
     Value *Ptr = EmitScalarExpr(Arg);
+    // Arguments of __builtin_launder and __builtin_start_object_lifetime may
+    // need the LLVM IR launder.invariant.group intrinsic barrier to prevent
+    // alising-based optimizations (e.g. -fstrict-vtable-pointers).
     if (TypeRequiresBuiltinLaunder(CGM, ArgTy))
       Ptr = Builder.CreateLaunderInvariantGroup(Ptr);
 
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 356765609f694b..9212089699f395 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2157,7 +2157,13 @@ static ExprResult SemaPointerAuthAuthAndResign(Sema &S, CallExpr *Call) {
   return Call;
 }
 
-static ExprResult SemaBuiltinLaunder(Sema &S, CallExpr *TheCall) {
+// Semantic check for function arguments of __builtin_launder or
+// __builtin_start_object_lifetime.
+static ExprResult SemaBuiltinLaunderOrStartObjectLifetime(Sema &S,
+                                                          CallExpr *TheCall,
+                                                          unsigned BuiltinID) {
+  assert(BuiltinID == Builtin::BI__builtin_launder ||
+         BuiltinID == Builtin::BI__builtin_start_object_lifetime);
   if (checkArgCount(S, TheCall, 1))
     return ExprError();
 
@@ -2188,8 +2194,10 @@ static ExprResult SemaBuiltinLaunder(Sema &S, CallExpr *TheCall) {
     return std::optional<unsigned>{};
   }();
   if (DiagSelect) {
-    S.Diag(TheCall->getBeginLoc(), diag::err_builtin_launder_invalid_arg)
-        << *DiagSelect << TheCall->getSourceRange();
+    S.Diag(TheCall->getBeginLoc(),
+           diag::err_builtin_launder_or_start_object_lifetime_invalid_arg)
+        << *DiagSelect << (BuiltinID == Builtin::BI__builtin_launder ? 0 : 1)
+        << TheCall->getSourceRange();
     return ExprError();
   }
 
@@ -2645,7 +2653,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
   }
   case Builtin::BI__builtin_start_object_lifetime:
   case Builtin::BI__builtin_launder:
-    return SemaBuiltinLaunder(*this, TheCall);
+    return SemaBuiltinLaunderOrStartObjectLifetime(*this, TheCall, BuiltinID);
   case Builtin::BI__sync_fetch_and_add:
   case Builtin::BI__sync_fetch_and_add_1:
   case Builtin::BI__sync_fetch_and_add_2:
diff --git a/clang/test/SemaCXX/builtins.cpp b/clang/test/SemaCXX/builtins.cpp
index 4334b5bbf63663..0c2d70b8cbf6eb 100644
--- a/clang/test/SemaCXX/builtins.cpp
+++ b/clang/test/SemaCXX/builtins.cpp
@@ -167,14 +167,17 @@ constexpr int test_non_constexpr(int i) { // expected-error {{constexpr function
 }
 
 struct Incomplete; // expected-note {{forward declaration}}
-void test_incomplete(Incomplete *i) {
+void test_diag(Incomplete *i) {
    // Requires a complete type
    __builtin_start_object_lifetime(i); // expected-error {{incomplete type 'Incomplete' where a complete type is required}}
+
+  int x;
+   __builtin_start_object_lifetime(x); // expected-error {{non-pointer argument to '__builtin_start_object_lifetime' is not allowed}}
 }
 
 // The builtin is type-generic.
 #define TEST_TYPE(Ptr, Type) \
-  static_assert(__is_same(decltype(__builtin_launder(Ptr)), Type), "expected same type")
+  static_assert(__is_same(decltype(__builtin_start_object_lifetime(Ptr)), Type), "expected same type")
 void test_type_generic() {
   char * p;
   int * i;



More information about the cfe-commits mailing list