[clang] [Clang] Added lifetime markers for indereacly passed Aggregate types (PR #90849)

Thu May 2 05:31:14 PDT 2024

llvmbot wrote:




@llvm/pr-subscribers-clang

Author: None (Lukacma)

<details>
<summary>Changes</summary>

Adding lifetime markers to pointers to temporary stack allocated objects, allows compiler to make more efficient use of the stack. Currently these markers are not added to indirectly called aggregate types so this patch adds them. Motivating example showing how stack space is reused when lifetime markers are added:

No lifetime markers:

https://gcc.godbolt.org/z/7nzj96qbW

Lifetime markers:

https://gcc.godbolt.org/z/K8GqYPPov


---

Patch is 23.42 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/90849.diff


12 Files Affected:

- (modified) clang/include/clang/Basic/TargetInfo.h (+10) 
- (modified) clang/lib/AST/ASTContext.cpp (+2-3) 
- (modified) clang/lib/Basic/Targets/AArch64.cpp (+6-1) 
- (modified) clang/lib/Basic/Targets/AArch64.h (+2) 
- (modified) clang/lib/CodeGen/CGCall.cpp (+10) 
- (modified) clang/lib/CodeGen/Targets/AArch64.cpp (+1-1) 
- (modified) clang/test/CodeGen/aapcs64-align.cpp (+63-1) 
- (added) clang/test/CodeGen/aarch64-bitint-argpass.c (+14) 
- (modified) clang/test/CodeGen/aarch64-byval-temp.c (+43-3) 
- (modified) clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c (+2) 
- (modified) clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp (+2) 
- (modified) clang/test/CodeGen/nofpclass.c (+13-9) 


``````````diff

diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h
index 3ced2e7397a754..488b166a95af7d 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -518,6 +518,16 @@ class TargetInfo : public TransferrableTargetInfo,
   /// getInt128Align() - Returns the alignment of Int128.
   unsigned getInt128Align() const { return Int128Align; }
 
+  /// getBitIntAlign/Width - Return aligned size of '_BitInt' and
+  /// 'unsigned _BitInt' for this target, in bits.
+  unsigned getBitIntWidth(unsigned NumBits) const {
+    return llvm::alignTo(NumBits, getBitIntAlign(NumBits));
+  }
+  virtual unsigned getBitIntAlign(unsigned NumBits) const {
+    return std::clamp<unsigned>(llvm::PowerOf2Ceil(NumBits), getCharWidth(),
+                                getLongLongAlign());
+  }
+
   /// getShortAccumWidth/Align - Return the size of 'signed short _Accum' and
   /// 'unsigned short _Accum' for this target, in bits.
   unsigned getShortAccumWidth() const { return ShortAccumWidth; }
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index cbf4932aff9a6b..f440af50e08a49 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -2263,9 +2263,8 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const {
   }
   case Type::BitInt: {
     const auto *EIT = cast<BitIntType>(T);
-    Align = std::clamp<unsigned>(llvm::PowerOf2Ceil(EIT->getNumBits()),
-                                 getCharWidth(), Target->getLongLongAlign());
-    Width = llvm::alignTo(EIT->getNumBits(), Align);
+    Align = Target->getBitIntAlign(EIT->getNumBits());
+    Width = Target->getBitIntWidth(EIT->getNumBits());
     break;
   }
   case Type::Record:
diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index c8d243a8fb7aea..cc0ae0e2856fd0 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -1473,6 +1473,11 @@ bool AArch64TargetInfo::validatePointerAuthKey(
 
 bool AArch64TargetInfo::hasInt128Type() const { return true; }
 
+unsigned AArch64TargetInfo::getBitIntAlign(unsigned NumBits) const {
+  return std::clamp<unsigned>(llvm::PowerOf2Ceil(NumBits), getCharWidth(),
+                              getInt128Align());
+}
+
 AArch64leTargetInfo::AArch64leTargetInfo(const llvm::Triple &Triple,
                                          const TargetOptions &Opts)
     : AArch64TargetInfo(Triple, Opts) {}
@@ -1674,4 +1679,4 @@ void RenderScript64TargetInfo::getTargetDefines(const LangOptions &Opts,
                                                 MacroBuilder &Builder) const {
   Builder.defineMacro("__RENDERSCRIPT__");
   AArch64leTargetInfo::getTargetDefines(Opts, Builder);
-}
+}
\ No newline at end of file
diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h
index 12fb50286f7511..be6435007a6009 100644
--- a/clang/lib/Basic/Targets/AArch64.h
+++ b/clang/lib/Basic/Targets/AArch64.h
@@ -202,6 +202,8 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo {
   bool hasBitIntType() const override { return true; }
 
   bool validateTarget(DiagnosticsEngine &Diags) const override;
+
+  unsigned getBitIntAlign(unsigned NumBits) const override;
 };
 
 class LLVM_LIBRARY_VISIBILITY AArch64leTargetInfo : public AArch64TargetInfo {
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 69548902dc43b9..784232ed8d0714 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -5198,6 +5198,16 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
           Val = Builder.CreateFreeze(Val);
         IRCallArgs[FirstIRArg] = Val;
 
+        // Emit lifetime markers for the temporary alloca.
+        llvm::TypeSize ByvalTempElementSize =
+            CGM.getDataLayout().getTypeAllocSize(Addr.getElementType());
+        llvm::Value *LifetimeSize =
+            EmitLifetimeStart(ByvalTempElementSize, Addr.getPointer());
+
+        // Add cleanup code to emit the end lifetime marker after the call.
+        if (LifetimeSize) // In case we disabled lifetime markers.
+          CallLifetimeEndAfterCall.emplace_back(Addr, LifetimeSize);
+
         I->copyInto(*this, Addr);
       } else {
         // We want to avoid creating an unnecessary temporary+copy here;
diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp
index 4c32f510101f04..7daf416b624fa6 100644
--- a/clang/lib/CodeGen/Targets/AArch64.cpp
+++ b/clang/lib/CodeGen/Targets/AArch64.cpp
@@ -302,7 +302,7 @@ AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadic,
 
     if (const auto *EIT = Ty->getAs<BitIntType>())
       if (EIT->getNumBits() > 128)
-        return getNaturalAlignIndirect(Ty);
+        return getNaturalAlignIndirect(Ty, false);
 
     return (isPromotableIntegerTypeForABI(Ty) && isDarwinPCS()
                 ? ABIArgInfo::getExtend(Ty)
diff --git a/clang/test/CodeGen/aapcs64-align.cpp b/clang/test/CodeGen/aapcs64-align.cpp
index de231f2123b975..7a8151022852ea 100644
--- a/clang/test/CodeGen/aapcs64-align.cpp
+++ b/clang/test/CodeGen/aapcs64-align.cpp
@@ -1,7 +1,7 @@
 // REQUIRES: arm-registered-target
 // RUN: %clang_cc1 -triple aarch64-none-elf \
 // RUN:   -O2 \
-// RUN:   -emit-llvm -o - %s | FileCheck %s
+// RUN:   -emit-llvm -fexperimental-max-bitint-width=1024 -o - %s | FileCheck %s
 
 extern "C" {
 
@@ -100,4 +100,66 @@ void f5m(int, int, int, int, int, P16);
 // CHECK: declare void @f5(i32 noundef, [2 x i64])
 // CHECK: declare void @f5m(i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, [2 x i64])
 
+//BitInt alignment
+struct BITINT129 {
+    char ch;
+    unsigned _BitInt(129) v;
+};
+
+int test_bitint129(){
+  return __builtin_offsetof(struct BITINT129, v);
 }
+// CHECK:  ret i32 16 
+
+struct BITINT127 {
+    char ch;
+    _BitInt(127) v;
+};
+
+int test_bitint127(){
+  return __builtin_offsetof(struct BITINT127, v);
+}
+// CHECK:  ret i32 16 
+
+struct BITINT63 {
+    char ch;
+    _BitInt(63) v;
+};
+
+int test_bitint63(){
+  return __builtin_offsetof(struct BITINT63, v);
+}
+// CHECK:  ret i32 8 
+
+struct BITINT32 {
+    char ch;
+    unsigned _BitInt(32) v;
+};
+
+int test_bitint32(){
+  return __builtin_offsetof(struct BITINT32, v);
+}
+// CHECK:  ret i32 4
+
+struct BITINT9 {
+    char ch;
+    unsigned _BitInt(9) v;
+};
+
+int test_bitint9(){
+  return __builtin_offsetof(struct BITINT9, v);
+}
+// CHECK:  ret i32 2
+
+struct BITINT8 {
+    char ch;
+    unsigned _BitInt(8) v;
+};
+
+int test_bitint8(){
+  return __builtin_offsetof(struct BITINT8, v);
+}
+// CHECK:  ret i32 1
+
+}
+
diff --git a/clang/test/CodeGen/aarch64-bitint-argpass.c b/clang/test/CodeGen/aarch64-bitint-argpass.c
new file mode 100644
index 00000000000000..c7333bac75c1ac
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-bitint-argpass.c
@@ -0,0 +1,14 @@
+// REQUIRES: arm-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-elf \
+// RUN:   -O2 \
+// RUN:   -emit-llvm -fexperimental-max-bitint-width=1024 -o - %s | FileCheck %s
+
+_BitInt(129) v = -1;
+int h(_BitInt(129));
+
+// CHECK: declare i32 @h(ptr noundef)
+int largerthan128() {
+   return h(v);
+}
+
+
diff --git a/clang/test/CodeGen/aarch64-byval-temp.c b/clang/test/CodeGen/aarch64-byval-temp.c
index e9e2586406e5c1..765a298f83ed0e 100644
--- a/clang/test/CodeGen/aarch64-byval-temp.c
+++ b/clang/test/CodeGen/aarch64-byval-temp.c
@@ -1,13 +1,15 @@
-// RUN: %clang_cc1 -emit-llvm -triple arm64-- -o - %s -O0 | FileCheck %s --check-prefix=CHECK-O0
-// RUN: %clang_cc1 -emit-llvm -disable-llvm-optzns -triple arm64-- -o - %s -O3 | FileCheck %s --check-prefix=CHECK-O3
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
+// RUN: %clang_cc1 -emit-llvm -triple arm64-- -fexperimental-max-bitint-width=1024  -o - %s -O0 | FileCheck %s --check-prefix=CHECK-O0
+// RUN: %clang_cc1 -emit-llvm -disable-llvm-optzns -fexperimental-max-bitint-width=1024  -triple arm64-- -o - %s -O3 | FileCheck %s --check-prefix=CHECK-O3
 
 struct large {
     void* pointers[8];
 };
 
 void pass_large(struct large);
+void pass_large_BitInt(_BitInt(129));
 
-// For arm64, we don't use byval to pass structs but instead we create
+// For arm64, we don't use byval to pass structs and _BitInt(>128) type, but instead we create
 // temporary allocas.
 //
 // Make sure we generate the appropriate lifetime markers for the temporary
@@ -71,3 +73,41 @@ void example(void) {
 // Mark the end of the lifetime of `l`.
 // CHECK-O3-NEXT: call void @llvm.lifetime.end.p0(i64 64, ptr %l)
 // CHECK-O3-NEXT: ret void
+
+void example_BitInt(void) {
+    _BitInt(129) l = {0};
+    pass_large_BitInt(l);
+    pass_large_BitInt(l);
+}
+// CHECK-O0-LABEL: define dso_local void @example_BitInt(
+// CHECK-O0-NEXT:  entry:
+// CHECK-O0-NEXT:    [[L:%.*]] = alloca i129, align 16
+// CHECK-O0-NEXT:    [[INDIRECT_ARG_TEMP:%.*]] = alloca i129, align 16
+// CHECK-O0-NEXT:    [[INDIRECT_ARG_TEMP1:%.*]] = alloca i129, align 16
+// CHECK-O0-NEXT:    store i129 0, ptr [[L]], align 16
+// CHECK-O0-NEXT:    [[TMP0:%.*]] = load i129, ptr [[L]], align 16
+// CHECK-O0-NEXT:    store i129 [[TMP0]], ptr [[INDIRECT_ARG_TEMP]], align 16
+// CHECK-O0-NEXT:    call void @pass_large_BitInt(ptr noundef [[INDIRECT_ARG_TEMP]])
+// CHECK-O0-NEXT:    [[TMP1:%.*]] = load i129, ptr [[L]], align 16
+// CHECK-O0-NEXT:    store i129 [[TMP1]], ptr [[INDIRECT_ARG_TEMP1]], align 16
+// CHECK-O0-NEXT:    call void @pass_large_BitInt(ptr noundef [[INDIRECT_ARG_TEMP1]])
+// CHECK-O0-NEXT:    ret void
+//
+// CHECK-O3-LABEL: define dso_local void @example_BitInt(
+// CHECK-O3-NEXT:  entry:
+// CHECK-O3-NEXT:    [[L:%.*]] = alloca i129, align 16
+// CHECK-O3-NEXT:    [[INDIRECT_ARG_TEMP:%.*]] = alloca i129, align 16
+// CHECK-O3-NEXT:    [[INDIRECT_ARG_TEMP1:%.*]] = alloca i129, align 16
+// CHECK-O3-NEXT:    call void @llvm.lifetime.start.p0(i64 32, ptr [[L]]) 
+// CHECK-O3-NEXT:    store i129 0, ptr [[L]], align 16, !tbaa [[TBAA6:![0-9]+]]
+// CHECK-O3-NEXT:    [[TMP0:%.*]] = load i129, ptr [[L]], align 16, !tbaa [[TBAA6]]
+// CHECK-O3-NEXT:    call void @llvm.lifetime.start.p0(i64 32, ptr [[INDIRECT_ARG_TEMP]]) 
+// CHECK-O3-NEXT:    store i129 [[TMP0]], ptr [[INDIRECT_ARG_TEMP]], align 16, !tbaa [[TBAA6]]
+// CHECK-O3-NEXT:    call void @pass_large_BitInt(ptr noundef [[INDIRECT_ARG_TEMP]])
+// CHECK-O3-NEXT:    call void @llvm.lifetime.end.p0(i64 32, ptr [[INDIRECT_ARG_TEMP]]) 
+// CHECK-O3-NEXT:    [[TMP1:%.*]] = load i129, ptr [[L]], align 16, !tbaa [[TBAA6]]
+// CHECK-O3-NEXT:    call void @llvm.lifetime.start.p0(i64 32, ptr [[INDIRECT_ARG_TEMP1]]) 
+// CHECK-O3-NEXT:    store i129 [[TMP1]], ptr [[INDIRECT_ARG_TEMP1]], align 16, !tbaa [[TBAA6]]
+// CHECK-O3-NEXT:    call void @pass_large_BitInt(ptr noundef [[INDIRECT_ARG_TEMP1]])
+// CHECK-O3-NEXT:    call void @llvm.lifetime.end.p0(i64 32, ptr [[INDIRECT_ARG_TEMP1]]) 
+// CHECK-O3-NEXT:    call void @llvm.lifetime.end.p0(i64 32, ptr [[L]]) 
diff --git a/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c b/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c
index 1e6a4500cc886c..b06e2e72053422 100644
--- a/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c
+++ b/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c
@@ -88,8 +88,10 @@ typedef svint8_t vec2 __attribute__((arm_sve_vector_bits(N)));
 // CHECK-NEXT: entry:
 // CHECK-NEXT:   [[INDIRECT_ARG_TEMP:%.*]] = alloca <[[#div(VBITS,8)]] x i8>, align 16
 // CHECK-NEXT:   [[X:%.*]] = tail call <[[#div(VBITS,8)]] x i8> @llvm.vector.extract.v[[#div(VBITS,8)]]i8.nxv16i8(<vscale x 16 x i8> [[X_COERCE:%.*]], i64 0)
+// CHECK-NEXT:   call void @llvm.lifetime.start.p0(i64 [[SIZE:[0-9]+]], ptr nonnull [[INDIRECT_ARG_TEMP]]) #[[ATTR6:[0-9]+]]
 // CHECK-NEXT:   store <[[#div(VBITS,8)]] x i8> [[X]], ptr [[INDIRECT_ARG_TEMP]], align 16, [[TBAA6]]
 // CHECK-NEXT:   call void @f3(ptr noundef nonnull [[INDIRECT_ARG_TEMP]]) [[ATTR5:#.*]]
+// CHECK-NEXT:   call void @llvm.lifetime.end.p0(i64 [[SIZE]], ptr nonnull [[INDIRECT_ARG_TEMP]]) #[[ATTR6:[0-9]+]]
 // CHECK-NEXT:   ret void
 
 // CHECK128-LABEL: declare void @f3(<16 x i8> noundef)
diff --git a/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp b/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp
index 8c41fb956145f0..ab53de78ee281b 100644
--- a/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp
+++ b/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp
@@ -73,8 +73,10 @@ typedef svint16_t vec2 __attribute__((arm_sve_vector_bits(N)));
 // CHECK128-NEXT:   ret void
 // CHECKWIDE-NEXT:   [[INDIRECT_ARG_TEMP:%.*]] = alloca <[[#div(VBITS, 16)]] x i16>, align 16
 // CHECKWIDE-NEXT:   [[X:%.*]] = tail call <[[#div(VBITS, 16)]] x i16> @llvm.vector.extract.v[[#div(VBITS, 16)]]i16.nxv8i16(<vscale x 8 x i16> [[X_COERCE:%.*]], i64 0)
+// CHECKWIDE-NEXT:   call void @llvm.lifetime.start.p0(i64 [[SIZE:[0-9]+]], ptr nonnull [[INDIRECT_ARG_TEMP]]) #[[ATTR6:[0-9]+]]
 // CHECKWIDE-NEXT:   store <[[#div(VBITS, 16)]] x i16> [[X]], ptr [[INDIRECT_ARG_TEMP]], align 16, [[TBAA6:!tbaa !.*]]
 // CHECKWIDE-NEXT:   call void @_Z1fDv[[#div(VBITS, 16)]]_s(ptr noundef nonnull [[INDIRECT_ARG_TEMP]]) [[ATTR5:#.*]]
+// CHECKWIDE-NEXT:   call void @llvm.lifetime.end.p0(i64 [[SIZE]], ptr nonnull [[INDIRECT_ARG_TEMP]]) #[[ATTR6:[0-9]+]]
 // CHECKWIDE-NEXT:   ret void
 void g(vec2 x) { f(x); } // OK
 #endif
diff --git a/clang/test/CodeGen/nofpclass.c b/clang/test/CodeGen/nofpclass.c
index dd90d02f7759b6..fc4c64f9b921ba 100644
--- a/clang/test/CodeGen/nofpclass.c
+++ b/clang/test/CodeGen/nofpclass.c
@@ -172,7 +172,7 @@ double2 defined_func_v2f64(double2 a, double2 b, double2 c) {
 // CLFINITEONLY-LABEL: define dso_local nofpclass(nan inf) float @call_extern_func
 // CLFINITEONLY-SAME: (float noundef nofpclass(nan inf) [[A:%.*]], double noundef nofpclass(nan inf) [[B:%.*]], half noundef nofpclass(nan inf) [[C:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] {
 // CLFINITEONLY-NEXT:  entry:
-// CLFINITEONLY-NEXT:    [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) float @extern_func(float noundef nofpclass(nan inf) [[A]], double noundef nofpclass(nan inf) [[B]], half noundef nofpclass(nan inf) [[C]]) #[[ATTR10:[0-9]+]]
+// CLFINITEONLY-NEXT:    [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) float @extern_func(float noundef nofpclass(nan inf) [[A]], double noundef nofpclass(nan inf) [[B]], half noundef nofpclass(nan inf) [[C]]) #[[ATTR11:[0-9]+]]
 // CLFINITEONLY-NEXT:    ret float [[CALL]]
 //
 // NONANS: Function Attrs: noinline nounwind optnone
@@ -249,7 +249,7 @@ float call_extern_func(float a, double b, _Float16 c) {
 // CLFINITEONLY-LABEL: define dso_local nofpclass(nan inf) double @call_extern_func_vec
 // CLFINITEONLY-SAME: (double noundef nofpclass(nan inf) [[A_COERCE:%.*]], <2 x double> noundef nofpclass(nan inf) [[B:%.*]], i32 noundef [[C_COERCE:%.*]]) local_unnamed_addr #[[ATTR5:[0-9]+]] {
 // CLFINITEONLY-NEXT:  entry:
-// CLFINITEONLY-NEXT:    [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) double @extern_func_vec(double noundef nofpclass(nan inf) [[A_COERCE]], <2 x double> noundef nofpclass(nan inf) [[B]], i32 noundef [[C_COERCE]]) #[[ATTR10]]
+// CLFINITEONLY-NEXT:    [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) double @extern_func_vec(double noundef nofpclass(nan inf) [[A_COERCE]], <2 x double> noundef nofpclass(nan inf) [[B]], i32 noundef [[C_COERCE]]) #[[ATTR11]]
 // CLFINITEONLY-NEXT:    ret double [[CALL]]
 //
 // NONANS: Function Attrs: noinline nounwind optnone
@@ -389,7 +389,7 @@ float2 call_extern_func_vec(float2 a, double2 b, half2 c) {
 // CLFINITEONLY-LABEL: define dso_local nofpclass(nan inf) <2 x float> @defined_complex_func
 // CLFINITEONLY-SAME: (<2 x float> noundef nofpclass(nan inf) [[A_COERCE:%.*]], double noundef nofpclass(nan inf) [[B_COERCE0:%.*]], double noundef nofpclass(nan inf) [[B_COERCE1:%.*]], <2 x half> noundef nofpclass(nan inf) [[C_COERCE:%.*]]) local_unnamed_addr #[[ATTR6:[0-9]+]] {
 // CLFINITEONLY-NEXT:  entry:
-// CLFINITEONLY-NEXT:    [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) <2 x float> @extern_complex(<2 x float> noundef nofpclass(nan inf) [[A_COERCE]], double noundef nofpclass(nan inf) [[B_COERCE0]], double noundef nofpclass(nan inf) [[B_COERCE1]], <2 x half> noundef nofpclass(nan inf) [[C_COERCE]]) #[[ATTR10]]
+// CLFINITEONLY-NEXT:    [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) <2 x float> @extern_complex(<2 x float> noundef nofpclass(nan inf) [[A_COERCE]], double noundef nofpclass(nan inf) [[B_COERCE0]], double noundef nofpclass(nan inf) [[B_COERCE1]], <2 x half> noundef nofpclass(nan inf) [[C_COERCE]]) #[[ATTR11]]
 // CLFINITEONLY-NEXT:    ret <2 x float> [[CALL]]
 //
 // NONANS: Function Attrs: noinline nounwind optnone
@@ -927,12 +927,14 @@ _Complex _Float16 defined_complex_func_f16_ret(_Complex _Float16 c) {
 // CLFINITEONLY-NEXT:    [[CF16_REAL:%.*]] = load half, ptr [[CF16]], align 8
 // CLFINITEONLY-NEXT:    [[CF16_IMAGP:%.*]] = getelementptr inbounds i8, ptr [[CF16]], i64 2
 // CLFINITEONLY-NEXT:    [[CF16_IMAG:%.*]] = load half, ptr [[CF16_IMAGP]], align 2
+// CLFINITEONLY-NEXT:    call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[INDIRECT_ARG_TEMP]]) #[[ATTR12:[0-9]+]]
 // CLFINITEONLY-NEXT:    [[INDIRECT_ARG_TEMP_IMAGP:%.*]] = getelementptr inbounds i8, ptr [[INDIRECT_ARG_TEMP]], i64 8
 // CLFINITEONLY-NEXT:    store double [[CF64_COERCE0]], ptr [[INDIRECT_ARG_TEMP]], align 8
 // CLFINITEONLY-NEXT:    store double [[CF64_COERCE1]], ptr [[INDIRECT_ARG_TEMP_IMAGP]], align 8
 // CLFINITEONLY-NEXT:    [[COERCE5_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x half> poison, half [[CF16_REAL]], i64 0
 // CLFINITEONLY-NEXT:    [[COERCE5_SROA_0_2_VEC_INSERT:%.*]] = insertelement <2 x half> [[COERCE5_SROA_0_0_VEC_INSERT]], half [[CF16_IMAG]], i64 1
-// CLFINITEONLY-NEXT:    [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) float (float, ...) @variadic(float noundef nofpclass(nan inf) [[F32]], double noundef nofpclass(nan inf) [[CONV]], double noundef nofpclass(nan inf) [[F64]], half noundef nofpclass(nan inf) [[F16]], double noundef nofpclass(nan inf) [[V2F32_COERCE]], <2 x double> noundef nofpclass(nan inf) [[V2F64]], i32 noundef [[V2F16_COERCE]], <2 x float> noundef nofpclass(nan inf) [[CF32_COERCE]], ptr noundef nonnull byval({ double, double }) align 8 [[INDIRECT_ARG_TEMP]], <2 x half> noundef nofpclass(nan inf) [[COERCE5_SROA_0_2_VEC_INSERT]]) #[[ATTR10]]
+// CLFINITEONLY-NEXT:    [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) float (float, ...) @variadic(float noundef nofpclass(nan inf) [[F32]], double noundef nofpclass(nan inf) [[CONV]], double noundef nofpclass(nan inf) [[F64]], half noundef nofpclass(nan inf) [[F16]], double noundef nofpclass(nan inf) [[V2F32_COERCE]], <2 x double> noundef nofpclass(nan inf) [[V2F64]], i32 noundef [[V2F16_COERCE]], <2 x float> noundef nofpclass(nan inf) [[CF32_COERCE]], ptr noundef nonnull byval({ double, double }) align 8 [[INDIRECT_ARG_TEMP]], <2 x half> noundef nofpclass(nan inf) [[COERCE5_SROA_0_2_VEC_INSERT]]) #[[ATTR11]]
+// CLFINITEONLY-NEXT:    call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[INDIRECT_ARG_TEMP]]) #[[ATTR12]]
 // CLFINITEONLY-NEXT:    ret float [[CALL]]
 //
 // NONANS: Function Attrs: noinline nounwind optnone
@@ -1178,12 +1180,14 @@ float call_variadic(float f32, double f64, _Float16 f16,
 // CLFINITEONLY-NEXT:    [[CF16_REAL:%.*]] = load half, ptr [[CF16]], align 8
 // CLFINITEONLY-NEXT:    [[CF16_IMAGP:%.*]] = getelementptr inbounds i8, ptr [[CF16]], i64 2
 // CLFINITEONLY-NEXT:    [[CF16_IMAG:%.*]] = load half, ptr [[CF16_IMAGP]], align 2
+// CLFINITEONLY-NEXT:    call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[INDIRECT_ARG_TEMP]]) #[[ATTR12]]
 // CLFINITEONLY-NEXT:    [[INDIRECT_ARG_TEMP_IMAGP:%.*]] = getelementptr inbounds i8, ptr [[INDIRECT_ARG_TEMP]], i64 8
 // CLFINITEONLY-NEXT:    store double [[CF64_COERCE0]], ptr [[INDIRECT_ARG_TEMP]], align 8
 // CLFINITEONLY-NEXT:    store double [[CF64_COERCE1]], ptr [[INDIRECT_ARG_TEMP_IMAGP]], align 8
 // CLFINITEONLY-NEXT:    [[COERCE5_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x half> poison, half [[CF16_REAL]], i64 0
 // CLFINITEONLY-NEXT:    [[COERCE5_SROA_0_2_VEC_INSERT:%.*]] = insertelement <2 x half>...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/90849