[clang] [Clang] Added lifetime markers for indereacly passed Aggregate types (PR #90849)

Thu May 2 05:30:44 PDT 2024

https://github.com/Lukacma created https://github.com/llvm/llvm-project/pull/90849

Adding lifetime markers to pointers to temporary stack allocated objects, allows compiler to make more efficient use of the stack. Currently these markers are not added to indirectly called aggregate types so this patch adds them. Motivating example showing how stack space is reused when lifetime markers are added:

No lifetime markers:

https://gcc.godbolt.org/z/7nzj96qbW

Lifetime markers:

https://gcc.godbolt.org/z/K8GqYPPov


>From dfb8a9de874f233c9d3964569f3d5201fd717c16 Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Tue, 30 Apr 2024 12:46:48 +0000
Subject: [PATCH 1/4] [Clang][AArch64] Fixed incorrect _BitInt alignment

---
 clang/include/clang/Basic/TargetInfo.h |  8 ++++
 clang/lib/AST/ASTContext.cpp           |  5 +-
 clang/lib/Basic/Targets/AArch64.cpp    |  6 ++-
 clang/lib/Basic/Targets/AArch64.h      |  3 ++
 clang/test/CodeGen/aapcs64-align.cpp   | 64 +++++++++++++++++++++++++-
 5 files changed, 81 insertions(+), 5 deletions(-)

diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h
index 3ced2e7397a754..1b5efa488b6ded 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -518,6 +518,14 @@ class TargetInfo : public TransferrableTargetInfo,
   /// getInt128Align() - Returns the alignment of Int128.
   unsigned getInt128Align() const { return Int128Align; }
 
+  /// getBitIntAlign/Width - Return aligned size of '_BitInt' and 
+  /// 'unsigned _BitInt' for this target, in bits.
+  unsigned getBitIntWidth(unsigned NumBits) const {
+    return llvm::alignTo(NumBits, getBitIntAlign(NumBits));}
+  virtual unsigned getBitIntAlign(unsigned NumBits) const {
+    return std::clamp<unsigned>(llvm::PowerOf2Ceil(NumBits), getCharWidth(), 
+                                getLongLongAlign());}
+
   /// getShortAccumWidth/Align - Return the size of 'signed short _Accum' and
   /// 'unsigned short _Accum' for this target, in bits.
   unsigned getShortAccumWidth() const { return ShortAccumWidth; }
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index cbf4932aff9a6b..f440af50e08a49 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -2263,9 +2263,8 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const {
   }
   case Type::BitInt: {
     const auto *EIT = cast<BitIntType>(T);
-    Align = std::clamp<unsigned>(llvm::PowerOf2Ceil(EIT->getNumBits()),
-                                 getCharWidth(), Target->getLongLongAlign());
-    Width = llvm::alignTo(EIT->getNumBits(), Align);
+    Align = Target->getBitIntAlign(EIT->getNumBits());
+    Width = Target->getBitIntWidth(EIT->getNumBits());
     break;
   }
   case Type::Record:
diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index c8d243a8fb7aea..9a6c197ff77b58 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -1473,6 +1473,10 @@ bool AArch64TargetInfo::validatePointerAuthKey(
 
 bool AArch64TargetInfo::hasInt128Type() const { return true; }
 
+unsigned AArch64TargetInfo::getBitIntAlign(unsigned NumBits) const{
+        return std::clamp<unsigned>(llvm::PowerOf2Ceil(NumBits), getCharWidth(), 
+                                    getInt128Align());}
+
 AArch64leTargetInfo::AArch64leTargetInfo(const llvm::Triple &Triple,
                                          const TargetOptions &Opts)
     : AArch64TargetInfo(Triple, Opts) {}
@@ -1674,4 +1678,4 @@ void RenderScript64TargetInfo::getTargetDefines(const LangOptions &Opts,
                                                 MacroBuilder &Builder) const {
   Builder.defineMacro("__RENDERSCRIPT__");
   AArch64leTargetInfo::getTargetDefines(Opts, Builder);
-}
+}
\ No newline at end of file
diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h
index 12fb50286f7511..d8cdc814b2a9ae 100644
--- a/clang/lib/Basic/Targets/AArch64.h
+++ b/clang/lib/Basic/Targets/AArch64.h
@@ -202,6 +202,9 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo {
   bool hasBitIntType() const override { return true; }
 
   bool validateTarget(DiagnosticsEngine &Diags) const override;
+
+  unsigned getBitIntAlign(unsigned NumBits) const override;
+ 
 };
 
 class LLVM_LIBRARY_VISIBILITY AArch64leTargetInfo : public AArch64TargetInfo {
diff --git a/clang/test/CodeGen/aapcs64-align.cpp b/clang/test/CodeGen/aapcs64-align.cpp
index de231f2123b975..7a8151022852ea 100644
--- a/clang/test/CodeGen/aapcs64-align.cpp
+++ b/clang/test/CodeGen/aapcs64-align.cpp
@@ -1,7 +1,7 @@
 // REQUIRES: arm-registered-target
 // RUN: %clang_cc1 -triple aarch64-none-elf \
 // RUN:   -O2 \
-// RUN:   -emit-llvm -o - %s | FileCheck %s
+// RUN:   -emit-llvm -fexperimental-max-bitint-width=1024 -o - %s | FileCheck %s
 
 extern "C" {
 
@@ -100,4 +100,66 @@ void f5m(int, int, int, int, int, P16);
 // CHECK: declare void @f5(i32 noundef, [2 x i64])
 // CHECK: declare void @f5m(i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, [2 x i64])
 
+//BitInt alignment
+struct BITINT129 {
+    char ch;
+    unsigned _BitInt(129) v;
+};
+
+int test_bitint129(){
+  return __builtin_offsetof(struct BITINT129, v);
 }
+// CHECK:  ret i32 16 
+
+struct BITINT127 {
+    char ch;
+    _BitInt(127) v;
+};
+
+int test_bitint127(){
+  return __builtin_offsetof(struct BITINT127, v);
+}
+// CHECK:  ret i32 16 
+
+struct BITINT63 {
+    char ch;
+    _BitInt(63) v;
+};
+
+int test_bitint63(){
+  return __builtin_offsetof(struct BITINT63, v);
+}
+// CHECK:  ret i32 8 
+
+struct BITINT32 {
+    char ch;
+    unsigned _BitInt(32) v;
+};
+
+int test_bitint32(){
+  return __builtin_offsetof(struct BITINT32, v);
+}
+// CHECK:  ret i32 4
+
+struct BITINT9 {
+    char ch;
+    unsigned _BitInt(9) v;
+};
+
+int test_bitint9(){
+  return __builtin_offsetof(struct BITINT9, v);
+}
+// CHECK:  ret i32 2
+
+struct BITINT8 {
+    char ch;
+    unsigned _BitInt(8) v;
+};
+
+int test_bitint8(){
+  return __builtin_offsetof(struct BITINT8, v);
+}
+// CHECK:  ret i32 1
+
+}
+

>From 908693b105980253eadb10b18f8b4c0d52f73415 Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Tue, 30 Apr 2024 13:02:05 +0000
Subject: [PATCH 2/4] Fix formatting

---
 clang/include/clang/Basic/TargetInfo.h | 10 ++++++----
 clang/lib/Basic/Targets/AArch64.cpp    |  7 ++++---
 clang/lib/Basic/Targets/AArch64.h      |  1 -
 3 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h
index 1b5efa488b6ded..488b166a95af7d 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -518,13 +518,15 @@ class TargetInfo : public TransferrableTargetInfo,
   /// getInt128Align() - Returns the alignment of Int128.
   unsigned getInt128Align() const { return Int128Align; }
 
-  /// getBitIntAlign/Width - Return aligned size of '_BitInt' and 
+  /// getBitIntAlign/Width - Return aligned size of '_BitInt' and
   /// 'unsigned _BitInt' for this target, in bits.
   unsigned getBitIntWidth(unsigned NumBits) const {
-    return llvm::alignTo(NumBits, getBitIntAlign(NumBits));}
+    return llvm::alignTo(NumBits, getBitIntAlign(NumBits));
+  }
   virtual unsigned getBitIntAlign(unsigned NumBits) const {
-    return std::clamp<unsigned>(llvm::PowerOf2Ceil(NumBits), getCharWidth(), 
-                                getLongLongAlign());}
+    return std::clamp<unsigned>(llvm::PowerOf2Ceil(NumBits), getCharWidth(),
+                                getLongLongAlign());
+  }
 
   /// getShortAccumWidth/Align - Return the size of 'signed short _Accum' and
   /// 'unsigned short _Accum' for this target, in bits.
diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index 9a6c197ff77b58..cc0ae0e2856fd0 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -1473,9 +1473,10 @@ bool AArch64TargetInfo::validatePointerAuthKey(
 
 bool AArch64TargetInfo::hasInt128Type() const { return true; }
 
-unsigned AArch64TargetInfo::getBitIntAlign(unsigned NumBits) const{
-        return std::clamp<unsigned>(llvm::PowerOf2Ceil(NumBits), getCharWidth(), 
-                                    getInt128Align());}
+unsigned AArch64TargetInfo::getBitIntAlign(unsigned NumBits) const {
+  return std::clamp<unsigned>(llvm::PowerOf2Ceil(NumBits), getCharWidth(),
+                              getInt128Align());
+}
 
 AArch64leTargetInfo::AArch64leTargetInfo(const llvm::Triple &Triple,
                                          const TargetOptions &Opts)
diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h
index d8cdc814b2a9ae..be6435007a6009 100644
--- a/clang/lib/Basic/Targets/AArch64.h
+++ b/clang/lib/Basic/Targets/AArch64.h
@@ -204,7 +204,6 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo {
   bool validateTarget(DiagnosticsEngine &Diags) const override;
 
   unsigned getBitIntAlign(unsigned NumBits) const override;
- 
 };
 
 class LLVM_LIBRARY_VISIBILITY AArch64leTargetInfo : public AArch64TargetInfo {

>From 806f5d6c9ac351b21bea2d32894abdb80849cffe Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Wed, 1 May 2024 15:20:05 +0000
Subject: [PATCH 3/4] [Clang] Fix incorrect passing of _BitInt args

---
 clang/lib/CodeGen/Targets/AArch64.cpp       |  2 +-
 clang/test/CodeGen/aarch64-bitint-argpass.c | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/CodeGen/aarch64-bitint-argpass.c

diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp
index 4c32f510101f04..7daf416b624fa6 100644
--- a/clang/lib/CodeGen/Targets/AArch64.cpp
+++ b/clang/lib/CodeGen/Targets/AArch64.cpp
@@ -302,7 +302,7 @@ AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadic,
 
     if (const auto *EIT = Ty->getAs<BitIntType>())
       if (EIT->getNumBits() > 128)
-        return getNaturalAlignIndirect(Ty);
+        return getNaturalAlignIndirect(Ty, false);
 
     return (isPromotableIntegerTypeForABI(Ty) && isDarwinPCS()
                 ? ABIArgInfo::getExtend(Ty)
diff --git a/clang/test/CodeGen/aarch64-bitint-argpass.c b/clang/test/CodeGen/aarch64-bitint-argpass.c
new file mode 100644
index 00000000000000..c7333bac75c1ac
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-bitint-argpass.c
@@ -0,0 +1,14 @@
+// REQUIRES: arm-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-elf \
+// RUN:   -O2 \
+// RUN:   -emit-llvm -fexperimental-max-bitint-width=1024 -o - %s | FileCheck %s
+
+_BitInt(129) v = -1;
+int h(_BitInt(129));
+
+// CHECK: declare i32 @h(ptr noundef)
+int largerthan128() {
+   return h(v);
+}
+
+

>From f54e86653e89faa11e05c2ddc2b34a41bc586c7e Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Thu, 2 May 2024 11:33:13 +0000
Subject: [PATCH 4/4] [Clang] Added lifetime markers for temp. allocated
 Aggregate types

---
 clang/lib/CodeGen/CGCall.cpp                  | 10 ++++
 clang/test/CodeGen/aarch64-byval-temp.c       | 46 +++++++++++++++++--
 ...-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c |  2 +
 ...cle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp |  2 +
 clang/test/CodeGen/nofpclass.c                | 22 +++++----
 5 files changed, 70 insertions(+), 12 deletions(-)

diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 69548902dc43b9..784232ed8d0714 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -5198,6 +5198,16 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
           Val = Builder.CreateFreeze(Val);
         IRCallArgs[FirstIRArg] = Val;
 
+        // Emit lifetime markers for the temporary alloca.
+        llvm::TypeSize ByvalTempElementSize =
+            CGM.getDataLayout().getTypeAllocSize(Addr.getElementType());
+        llvm::Value *LifetimeSize =
+            EmitLifetimeStart(ByvalTempElementSize, Addr.getPointer());
+
+        // Add cleanup code to emit the end lifetime marker after the call.
+        if (LifetimeSize) // In case we disabled lifetime markers.
+          CallLifetimeEndAfterCall.emplace_back(Addr, LifetimeSize);
+
         I->copyInto(*this, Addr);
       } else {
         // We want to avoid creating an unnecessary temporary+copy here;
diff --git a/clang/test/CodeGen/aarch64-byval-temp.c b/clang/test/CodeGen/aarch64-byval-temp.c
index e9e2586406e5c1..765a298f83ed0e 100644
--- a/clang/test/CodeGen/aarch64-byval-temp.c
+++ b/clang/test/CodeGen/aarch64-byval-temp.c
@@ -1,13 +1,15 @@
-// RUN: %clang_cc1 -emit-llvm -triple arm64-- -o - %s -O0 | FileCheck %s --check-prefix=CHECK-O0
-// RUN: %clang_cc1 -emit-llvm -disable-llvm-optzns -triple arm64-- -o - %s -O3 | FileCheck %s --check-prefix=CHECK-O3
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
+// RUN: %clang_cc1 -emit-llvm -triple arm64-- -fexperimental-max-bitint-width=1024  -o - %s -O0 | FileCheck %s --check-prefix=CHECK-O0
+// RUN: %clang_cc1 -emit-llvm -disable-llvm-optzns -fexperimental-max-bitint-width=1024  -triple arm64-- -o - %s -O3 | FileCheck %s --check-prefix=CHECK-O3
 
 struct large {
     void* pointers[8];
 };
 
 void pass_large(struct large);
+void pass_large_BitInt(_BitInt(129));
 
-// For arm64, we don't use byval to pass structs but instead we create
+// For arm64, we don't use byval to pass structs and _BitInt(>128) type, but instead we create
 // temporary allocas.
 //
 // Make sure we generate the appropriate lifetime markers for the temporary
@@ -71,3 +73,41 @@ void example(void) {
 // Mark the end of the lifetime of `l`.
 // CHECK-O3-NEXT: call void @llvm.lifetime.end.p0(i64 64, ptr %l)
 // CHECK-O3-NEXT: ret void
+
+void example_BitInt(void) {
+    _BitInt(129) l = {0};
+    pass_large_BitInt(l);
+    pass_large_BitInt(l);
+}
+// CHECK-O0-LABEL: define dso_local void @example_BitInt(
+// CHECK-O0-NEXT:  entry:
+// CHECK-O0-NEXT:    [[L:%.*]] = alloca i129, align 16
+// CHECK-O0-NEXT:    [[INDIRECT_ARG_TEMP:%.*]] = alloca i129, align 16
+// CHECK-O0-NEXT:    [[INDIRECT_ARG_TEMP1:%.*]] = alloca i129, align 16
+// CHECK-O0-NEXT:    store i129 0, ptr [[L]], align 16
+// CHECK-O0-NEXT:    [[TMP0:%.*]] = load i129, ptr [[L]], align 16
+// CHECK-O0-NEXT:    store i129 [[TMP0]], ptr [[INDIRECT_ARG_TEMP]], align 16
+// CHECK-O0-NEXT:    call void @pass_large_BitInt(ptr noundef [[INDIRECT_ARG_TEMP]])
+// CHECK-O0-NEXT:    [[TMP1:%.*]] = load i129, ptr [[L]], align 16
+// CHECK-O0-NEXT:    store i129 [[TMP1]], ptr [[INDIRECT_ARG_TEMP1]], align 16
+// CHECK-O0-NEXT:    call void @pass_large_BitInt(ptr noundef [[INDIRECT_ARG_TEMP1]])
+// CHECK-O0-NEXT:    ret void
+//
+// CHECK-O3-LABEL: define dso_local void @example_BitInt(
+// CHECK-O3-NEXT:  entry:
+// CHECK-O3-NEXT:    [[L:%.*]] = alloca i129, align 16
+// CHECK-O3-NEXT:    [[INDIRECT_ARG_TEMP:%.*]] = alloca i129, align 16
+// CHECK-O3-NEXT:    [[INDIRECT_ARG_TEMP1:%.*]] = alloca i129, align 16
+// CHECK-O3-NEXT:    call void @llvm.lifetime.start.p0(i64 32, ptr [[L]]) 
+// CHECK-O3-NEXT:    store i129 0, ptr [[L]], align 16, !tbaa [[TBAA6:![0-9]+]]
+// CHECK-O3-NEXT:    [[TMP0:%.*]] = load i129, ptr [[L]], align 16, !tbaa [[TBAA6]]
+// CHECK-O3-NEXT:    call void @llvm.lifetime.start.p0(i64 32, ptr [[INDIRECT_ARG_TEMP]]) 
+// CHECK-O3-NEXT:    store i129 [[TMP0]], ptr [[INDIRECT_ARG_TEMP]], align 16, !tbaa [[TBAA6]]
+// CHECK-O3-NEXT:    call void @pass_large_BitInt(ptr noundef [[INDIRECT_ARG_TEMP]])
+// CHECK-O3-NEXT:    call void @llvm.lifetime.end.p0(i64 32, ptr [[INDIRECT_ARG_TEMP]]) 
+// CHECK-O3-NEXT:    [[TMP1:%.*]] = load i129, ptr [[L]], align 16, !tbaa [[TBAA6]]
+// CHECK-O3-NEXT:    call void @llvm.lifetime.start.p0(i64 32, ptr [[INDIRECT_ARG_TEMP1]]) 
+// CHECK-O3-NEXT:    store i129 [[TMP1]], ptr [[INDIRECT_ARG_TEMP1]], align 16, !tbaa [[TBAA6]]
+// CHECK-O3-NEXT:    call void @pass_large_BitInt(ptr noundef [[INDIRECT_ARG_TEMP1]])
+// CHECK-O3-NEXT:    call void @llvm.lifetime.end.p0(i64 32, ptr [[INDIRECT_ARG_TEMP1]]) 
+// CHECK-O3-NEXT:    call void @llvm.lifetime.end.p0(i64 32, ptr [[L]]) 
diff --git a/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c b/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c
index 1e6a4500cc886c..b06e2e72053422 100644
--- a/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c
+++ b/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c
@@ -88,8 +88,10 @@ typedef svint8_t vec2 __attribute__((arm_sve_vector_bits(N)));
 // CHECK-NEXT: entry:
 // CHECK-NEXT:   [[INDIRECT_ARG_TEMP:%.*]] = alloca <[[#div(VBITS,8)]] x i8>, align 16
 // CHECK-NEXT:   [[X:%.*]] = tail call <[[#div(VBITS,8)]] x i8> @llvm.vector.extract.v[[#div(VBITS,8)]]i8.nxv16i8(<vscale x 16 x i8> [[X_COERCE:%.*]], i64 0)
+// CHECK-NEXT:   call void @llvm.lifetime.start.p0(i64 [[SIZE:[0-9]+]], ptr nonnull [[INDIRECT_ARG_TEMP]]) #[[ATTR6:[0-9]+]]
 // CHECK-NEXT:   store <[[#div(VBITS,8)]] x i8> [[X]], ptr [[INDIRECT_ARG_TEMP]], align 16, [[TBAA6]]
 // CHECK-NEXT:   call void @f3(ptr noundef nonnull [[INDIRECT_ARG_TEMP]]) [[ATTR5:#.*]]
+// CHECK-NEXT:   call void @llvm.lifetime.end.p0(i64 [[SIZE]], ptr nonnull [[INDIRECT_ARG_TEMP]]) #[[ATTR6:[0-9]+]]
 // CHECK-NEXT:   ret void
 
 // CHECK128-LABEL: declare void @f3(<16 x i8> noundef)
diff --git a/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp b/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp
index 8c41fb956145f0..ab53de78ee281b 100644
--- a/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp
+++ b/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp
@@ -73,8 +73,10 @@ typedef svint16_t vec2 __attribute__((arm_sve_vector_bits(N)));
 // CHECK128-NEXT:   ret void
 // CHECKWIDE-NEXT:   [[INDIRECT_ARG_TEMP:%.*]] = alloca <[[#div(VBITS, 16)]] x i16>, align 16
 // CHECKWIDE-NEXT:   [[X:%.*]] = tail call <[[#div(VBITS, 16)]] x i16> @llvm.vector.extract.v[[#div(VBITS, 16)]]i16.nxv8i16(<vscale x 8 x i16> [[X_COERCE:%.*]], i64 0)
+// CHECKWIDE-NEXT:   call void @llvm.lifetime.start.p0(i64 [[SIZE:[0-9]+]], ptr nonnull [[INDIRECT_ARG_TEMP]]) #[[ATTR6:[0-9]+]]
 // CHECKWIDE-NEXT:   store <[[#div(VBITS, 16)]] x i16> [[X]], ptr [[INDIRECT_ARG_TEMP]], align 16, [[TBAA6:!tbaa !.*]]
 // CHECKWIDE-NEXT:   call void @_Z1fDv[[#div(VBITS, 16)]]_s(ptr noundef nonnull [[INDIRECT_ARG_TEMP]]) [[ATTR5:#.*]]
+// CHECKWIDE-NEXT:   call void @llvm.lifetime.end.p0(i64 [[SIZE]], ptr nonnull [[INDIRECT_ARG_TEMP]]) #[[ATTR6:[0-9]+]]
 // CHECKWIDE-NEXT:   ret void
 void g(vec2 x) { f(x); } // OK
 #endif
diff --git a/clang/test/CodeGen/nofpclass.c b/clang/test/CodeGen/nofpclass.c
index dd90d02f7759b6..fc4c64f9b921ba 100644
--- a/clang/test/CodeGen/nofpclass.c
+++ b/clang/test/CodeGen/nofpclass.c
@@ -172,7 +172,7 @@ double2 defined_func_v2f64(double2 a, double2 b, double2 c) {
 // CLFINITEONLY-LABEL: define dso_local nofpclass(nan inf) float @call_extern_func
 // CLFINITEONLY-SAME: (float noundef nofpclass(nan inf) [[A:%.*]], double noundef nofpclass(nan inf) [[B:%.*]], half noundef nofpclass(nan inf) [[C:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] {
 // CLFINITEONLY-NEXT:  entry:
-// CLFINITEONLY-NEXT:    [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) float @extern_func(float noundef nofpclass(nan inf) [[A]], double noundef nofpclass(nan inf) [[B]], half noundef nofpclass(nan inf) [[C]]) #[[ATTR10:[0-9]+]]
+// CLFINITEONLY-NEXT:    [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) float @extern_func(float noundef nofpclass(nan inf) [[A]], double noundef nofpclass(nan inf) [[B]], half noundef nofpclass(nan inf) [[C]]) #[[ATTR11:[0-9]+]]
 // CLFINITEONLY-NEXT:    ret float [[CALL]]
 //
 // NONANS: Function Attrs: noinline nounwind optnone
@@ -249,7 +249,7 @@ float call_extern_func(float a, double b, _Float16 c) {
 // CLFINITEONLY-LABEL: define dso_local nofpclass(nan inf) double @call_extern_func_vec
 // CLFINITEONLY-SAME: (double noundef nofpclass(nan inf) [[A_COERCE:%.*]], <2 x double> noundef nofpclass(nan inf) [[B:%.*]], i32 noundef [[C_COERCE:%.*]]) local_unnamed_addr #[[ATTR5:[0-9]+]] {
 // CLFINITEONLY-NEXT:  entry:
-// CLFINITEONLY-NEXT:    [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) double @extern_func_vec(double noundef nofpclass(nan inf) [[A_COERCE]], <2 x double> noundef nofpclass(nan inf) [[B]], i32 noundef [[C_COERCE]]) #[[ATTR10]]
+// CLFINITEONLY-NEXT:    [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) double @extern_func_vec(double noundef nofpclass(nan inf) [[A_COERCE]], <2 x double> noundef nofpclass(nan inf) [[B]], i32 noundef [[C_COERCE]]) #[[ATTR11]]
 // CLFINITEONLY-NEXT:    ret double [[CALL]]
 //
 // NONANS: Function Attrs: noinline nounwind optnone
@@ -389,7 +389,7 @@ float2 call_extern_func_vec(float2 a, double2 b, half2 c) {
 // CLFINITEONLY-LABEL: define dso_local nofpclass(nan inf) <2 x float> @defined_complex_func
 // CLFINITEONLY-SAME: (<2 x float> noundef nofpclass(nan inf) [[A_COERCE:%.*]], double noundef nofpclass(nan inf) [[B_COERCE0:%.*]], double noundef nofpclass(nan inf) [[B_COERCE1:%.*]], <2 x half> noundef nofpclass(nan inf) [[C_COERCE:%.*]]) local_unnamed_addr #[[ATTR6:[0-9]+]] {
 // CLFINITEONLY-NEXT:  entry:
-// CLFINITEONLY-NEXT:    [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) <2 x float> @extern_complex(<2 x float> noundef nofpclass(nan inf) [[A_COERCE]], double noundef nofpclass(nan inf) [[B_COERCE0]], double noundef nofpclass(nan inf) [[B_COERCE1]], <2 x half> noundef nofpclass(nan inf) [[C_COERCE]]) #[[ATTR10]]
+// CLFINITEONLY-NEXT:    [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) <2 x float> @extern_complex(<2 x float> noundef nofpclass(nan inf) [[A_COERCE]], double noundef nofpclass(nan inf) [[B_COERCE0]], double noundef nofpclass(nan inf) [[B_COERCE1]], <2 x half> noundef nofpclass(nan inf) [[C_COERCE]]) #[[ATTR11]]
 // CLFINITEONLY-NEXT:    ret <2 x float> [[CALL]]
 //
 // NONANS: Function Attrs: noinline nounwind optnone
@@ -927,12 +927,14 @@ _Complex _Float16 defined_complex_func_f16_ret(_Complex _Float16 c) {
 // CLFINITEONLY-NEXT:    [[CF16_REAL:%.*]] = load half, ptr [[CF16]], align 8
 // CLFINITEONLY-NEXT:    [[CF16_IMAGP:%.*]] = getelementptr inbounds i8, ptr [[CF16]], i64 2
 // CLFINITEONLY-NEXT:    [[CF16_IMAG:%.*]] = load half, ptr [[CF16_IMAGP]], align 2
+// CLFINITEONLY-NEXT:    call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[INDIRECT_ARG_TEMP]]) #[[ATTR12:[0-9]+]]
 // CLFINITEONLY-NEXT:    [[INDIRECT_ARG_TEMP_IMAGP:%.*]] = getelementptr inbounds i8, ptr [[INDIRECT_ARG_TEMP]], i64 8
 // CLFINITEONLY-NEXT:    store double [[CF64_COERCE0]], ptr [[INDIRECT_ARG_TEMP]], align 8
 // CLFINITEONLY-NEXT:    store double [[CF64_COERCE1]], ptr [[INDIRECT_ARG_TEMP_IMAGP]], align 8
 // CLFINITEONLY-NEXT:    [[COERCE5_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x half> poison, half [[CF16_REAL]], i64 0
 // CLFINITEONLY-NEXT:    [[COERCE5_SROA_0_2_VEC_INSERT:%.*]] = insertelement <2 x half> [[COERCE5_SROA_0_0_VEC_INSERT]], half [[CF16_IMAG]], i64 1
-// CLFINITEONLY-NEXT:    [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) float (float, ...) @variadic(float noundef nofpclass(nan inf) [[F32]], double noundef nofpclass(nan inf) [[CONV]], double noundef nofpclass(nan inf) [[F64]], half noundef nofpclass(nan inf) [[F16]], double noundef nofpclass(nan inf) [[V2F32_COERCE]], <2 x double> noundef nofpclass(nan inf) [[V2F64]], i32 noundef [[V2F16_COERCE]], <2 x float> noundef nofpclass(nan inf) [[CF32_COERCE]], ptr noundef nonnull byval({ double, double }) align 8 [[INDIRECT_ARG_TEMP]], <2 x half> noundef nofpclass(nan inf) [[COERCE5_SROA_0_2_VEC_INSERT]]) #[[ATTR10]]
+// CLFINITEONLY-NEXT:    [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) float (float, ...) @variadic(float noundef nofpclass(nan inf) [[F32]], double noundef nofpclass(nan inf) [[CONV]], double noundef nofpclass(nan inf) [[F64]], half noundef nofpclass(nan inf) [[F16]], double noundef nofpclass(nan inf) [[V2F32_COERCE]], <2 x double> noundef nofpclass(nan inf) [[V2F64]], i32 noundef [[V2F16_COERCE]], <2 x float> noundef nofpclass(nan inf) [[CF32_COERCE]], ptr noundef nonnull byval({ double, double }) align 8 [[INDIRECT_ARG_TEMP]], <2 x half> noundef nofpclass(nan inf) [[COERCE5_SROA_0_2_VEC_INSERT]]) #[[ATTR11]]
+// CLFINITEONLY-NEXT:    call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[INDIRECT_ARG_TEMP]]) #[[ATTR12]]
 // CLFINITEONLY-NEXT:    ret float [[CALL]]
 //
 // NONANS: Function Attrs: noinline nounwind optnone
@@ -1178,12 +1180,14 @@ float call_variadic(float f32, double f64, _Float16 f16,
 // CLFINITEONLY-NEXT:    [[CF16_REAL:%.*]] = load half, ptr [[CF16]], align 8
 // CLFINITEONLY-NEXT:    [[CF16_IMAGP:%.*]] = getelementptr inbounds i8, ptr [[CF16]], i64 2
 // CLFINITEONLY-NEXT:    [[CF16_IMAG:%.*]] = load half, ptr [[CF16_IMAGP]], align 2
+// CLFINITEONLY-NEXT:    call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[INDIRECT_ARG_TEMP]]) #[[ATTR12]]
 // CLFINITEONLY-NEXT:    [[INDIRECT_ARG_TEMP_IMAGP:%.*]] = getelementptr inbounds i8, ptr [[INDIRECT_ARG_TEMP]], i64 8
 // CLFINITEONLY-NEXT:    store double [[CF64_COERCE0]], ptr [[INDIRECT_ARG_TEMP]], align 8
 // CLFINITEONLY-NEXT:    store double [[CF64_COERCE1]], ptr [[INDIRECT_ARG_TEMP_IMAGP]], align 8
 // CLFINITEONLY-NEXT:    [[COERCE5_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x half> poison, half [[CF16_REAL]], i64 0
 // CLFINITEONLY-NEXT:    [[COERCE5_SROA_0_2_VEC_INSERT:%.*]] = insertelement <2 x half> [[COERCE5_SROA_0_0_VEC_INSERT]], half [[CF16_IMAG]], i64 1
-// CLFINITEONLY-NEXT:    [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) float (float, ...) [[FPTR]](float noundef nofpclass(nan inf) [[F32]], double noundef nofpclass(nan inf) [[CONV]], double noundef nofpclass(nan inf) [[F64]], half noundef nofpclass(nan inf) [[F16]], double noundef nofpclass(nan inf) [[V2F32_COERCE]], <2 x double> noundef nofpclass(nan inf) [[V2F64]], i32 noundef [[V2F16_COERCE]], <2 x float> noundef nofpclass(nan inf) [[CF32_COERCE]], ptr noundef nonnull byval({ double, double }) align 8 [[INDIRECT_ARG_TEMP]], <2 x half> noundef nofpclass(nan inf) [[COERCE5_SROA_0_2_VEC_INSERT]]) #[[ATTR10]]
+// CLFINITEONLY-NEXT:    [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) float (float, ...) [[FPTR]](float noundef nofpclass(nan inf) [[F32]], double noundef nofpclass(nan inf) [[CONV]], double noundef nofpclass(nan inf) [[F64]], half noundef nofpclass(nan inf) [[F16]], double noundef nofpclass(nan inf) [[V2F32_COERCE]], <2 x double> noundef nofpclass(nan inf) [[V2F64]], i32 noundef [[V2F16_COERCE]], <2 x float> noundef nofpclass(nan inf) [[CF32_COERCE]], ptr noundef nonnull byval({ double, double }) align 8 [[INDIRECT_ARG_TEMP]], <2 x half> noundef nofpclass(nan inf) [[COERCE5_SROA_0_2_VEC_INSERT]]) #[[ATTR11]]
+// CLFINITEONLY-NEXT:    call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[INDIRECT_ARG_TEMP]]) #[[ATTR12]]
 // CLFINITEONLY-NEXT:    ret float [[CALL]]
 //
 // NONANS: Function Attrs: noinline nounwind optnone
@@ -1364,9 +1368,9 @@ extern __m256d extern_m256d(__m256d, ...);
 //
 // CLFINITEONLY: Function Attrs: convergent norecurse nounwind
 // CLFINITEONLY-LABEL: define dso_local nofpclass(nan inf) <4 x double> @call_m256d
-// CLFINITEONLY-SAME: (<4 x double> noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR8:[0-9]+]] {
+// CLFINITEONLY-SAME: (<4 x double> noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR9:[0-9]+]] {
 // CLFINITEONLY-NEXT:  entry:
-// CLFINITEONLY-NEXT:    [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) <4 x double> (<4 x double>, ...) @extern_m256d(<4 x double> noundef nofpclass(nan inf) [[X]], <4 x double> noundef nofpclass(nan inf) [[X]]) #[[ATTR10]]
+// CLFINITEONLY-NEXT:    [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) <4 x double> (<4 x double>, ...) @extern_m256d(<4 x double> noundef nofpclass(nan inf) [[X]], <4 x double> noundef nofpclass(nan inf) [[X]]) #[[ATTR11]]
 // CLFINITEONLY-NEXT:    ret <4 x double> [[CALL]]
 //
 // NONANS: Function Attrs: noinline nounwind optnone
@@ -1407,9 +1411,9 @@ __m256d call_m256d(__m256d x) {
 //
 // CLFINITEONLY: Function Attrs: convergent norecurse nounwind
 // CLFINITEONLY-LABEL: define dso_local nofpclass(nan inf) <25 x double> @call_matrix
-// CLFINITEONLY-SAME: (<25 x double> noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR9:[0-9]+]] {
+// CLFINITEONLY-SAME: (<25 x double> noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR10:[0-9]+]] {
 // CLFINITEONLY-NEXT:  entry:
-// CLFINITEONLY-NEXT:    [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) <25 x double> @extern_matrix(<25 x double> noundef nofpclass(nan inf) [[X]]) #[[ATTR10]]
+// CLFINITEONLY-NEXT:    [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) <25 x double> @extern_matrix(<25 x double> noundef nofpclass(nan inf) [[X]]) #[[ATTR11]]
 // CLFINITEONLY-NEXT:    ret <25 x double> [[CALL]]
 //
 // NONANS: Function Attrs: noinline nounwind optnone