[clang] [Clang] Access tls_guard via llvm.threadlocal.address (PR #96633)

via cfe-commits cfe-commits at lists.llvm.org
Wed Jul 3 04:06:36 PDT 2024


https://github.com/nikola-tesic-ns updated https://github.com/llvm/llvm-project/pull/96633

>From 41427a3de345517025477257bfd4f614f06cbcfe Mon Sep 17 00:00:00 2001
From: Nikola Tesic <nikola.tesic at nextsilicon.com>
Date: Tue, 25 Jun 2024 15:58:18 +0300
Subject: [PATCH 1/3] [Clang] Access tls_guard via llvm.threadlocal.address

This patch fixes compiler generated code in `tls_init` function to access
TLS variable (`tls_guard`) via llvm.threadlocal.address intrinsic.
---
 clang/lib/CodeGen/CGDeclCXX.cpp               | 29 +++++++++++-------
 clang/test/CodeGenCXX/cxx11-thread-local.cpp  |  9 ++++--
 .../static-initializer-branch-weights.cpp     |  3 +-
 clang/test/OpenMP/parallel_copyin_codegen.cpp |  6 ++--
 .../OpenMP/target_has_device_addr_codegen.cpp |  6 ++--
 clang/test/OpenMP/threadprivate_codegen.cpp   | 30 ++++++++++++-------
 6 files changed, 55 insertions(+), 28 deletions(-)

diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp
index e18b339b31d24..0663a083bf3e8 100644
--- a/clang/lib/CodeGen/CGDeclCXX.cpp
+++ b/clang/lib/CodeGen/CGDeclCXX.cpp
@@ -1059,9 +1059,10 @@ CodeGenFunction::GenerateCXXGlobalInitFunc(llvm::Function *Fn,
     if (Guard.isValid()) {
       // If we have a guard variable, check whether we've already performed
       // these initializations. This happens for TLS initialization functions.
-      llvm::Value *GuardVal = Builder.CreateLoad(Guard);
-      llvm::Value *Uninit = Builder.CreateIsNull(GuardVal,
-                                                 "guard.uninitialized");
+      llvm::Value *GuardVal = EmitLoadOfScalar(
+          MakeAddrLValue(Guard, getContext().IntTy), SourceLocation());
+      llvm::Value *Uninit =
+          Builder.CreateIsNull(GuardVal, "guard.uninitialized");
       llvm::BasicBlock *InitBlock = createBasicBlock("init");
       ExitBlock = createBasicBlock("exit");
       EmitCXXGuardedInitBranch(Uninit, InitBlock, ExitBlock,
@@ -1070,13 +1071,21 @@ CodeGenFunction::GenerateCXXGlobalInitFunc(llvm::Function *Fn,
       // Mark as initialized before initializing anything else. If the
       // initializers use previously-initialized thread_local vars, that's
       // probably supposed to be OK, but the standard doesn't say.
-      Builder.CreateStore(llvm::ConstantInt::get(GuardVal->getType(),1), Guard);
-
-      // The guard variable can't ever change again.
-      EmitInvariantStart(
-          Guard.getPointer(),
-          CharUnits::fromQuantity(
-              CGM.getDataLayout().getTypeAllocSize(GuardVal->getType())));
+      EmitStoreOfScalar(llvm::ConstantInt::get(GuardVal->getType(), 1),
+                        MakeAddrLValue(Guard, getContext().IntTy));
+
+      // Emit invariant start for TLS guard address.
+      if (CGM.getCodeGenOpts().OptimizationLevel > 0) {
+        uint64_t Width =
+            CGM.getDataLayout().getTypeAllocSize(GuardVal->getType());
+        llvm::Value *TLSAddr = Guard.getPointer();
+        // Get the thread-local address via intrinsic.
+        if (auto *GV = dyn_cast<llvm::GlobalValue>(Guard.getPointer()))
+          if (GV->isThreadLocal())
+            TLSAddr = Builder.CreateThreadLocalAddress(Guard.getPointer());
+        Builder.CreateInvariantStart(
+            TLSAddr, llvm::ConstantInt::getSigned(Int64Ty, Width));
+      }
     }
 
     RunCleanupsScope Scope(*this);
diff --git a/clang/test/CodeGenCXX/cxx11-thread-local.cpp b/clang/test/CodeGenCXX/cxx11-thread-local.cpp
index bcc490bc32e6e..e9a0799cf8d9a 100644
--- a/clang/test/CodeGenCXX/cxx11-thread-local.cpp
+++ b/clang/test/CodeGenCXX/cxx11-thread-local.cpp
@@ -358,12 +358,15 @@ void set_anon_i() {
 
 
 // CHECK: define {{.*}}@__tls_init()
-// CHECK: load i8, ptr @__tls_guard
+// CHECK: [[TLS_GUARD_ADDR_1:%.+]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard)
+// CHECK: load i8, ptr [[TLS_GUARD_ADDR_1]]
 // CHECK: %[[NEED_TLS_INIT:.*]] = icmp eq i8 %{{.*}}, 0
 // CHECK: br i1 %[[NEED_TLS_INIT]],
 // init:
-// CHECK: store i8 1, ptr @__tls_guard
-// CHECK-OPT: call ptr @llvm.invariant.start.p0(i64 1, ptr @__tls_guard)
+// CHECK: [[TLS_GUARD_ADDR_2:%.+]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard)
+// CHECK: store i8 1, ptr [[TLS_GUARD_ADDR_2]]
+// CHECK-OPT: [[TLS_GUARD_ADDR_3:%.+]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard)
+// CHECK-OPT: call ptr @llvm.invariant.start.p0(i64 1, ptr [[TLS_GUARD_ADDR_3]])
 // CHECK-NOT: call void @[[V_M_INIT]]()
 // CHECK: call void @[[A_INIT]]()
 // CHECK-NOT: call void @[[V_M_INIT]]()
diff --git a/clang/test/CodeGenCXX/static-initializer-branch-weights.cpp b/clang/test/CodeGenCXX/static-initializer-branch-weights.cpp
index 121b9b2029959..e855f54643eae 100644
--- a/clang/test/CodeGenCXX/static-initializer-branch-weights.cpp
+++ b/clang/test/CodeGenCXX/static-initializer-branch-weights.cpp
@@ -118,7 +118,8 @@ void use_b() {
 }
 
 // CHECK-LABEL: define {{.*}}tls_init()
-// CHECK: load i8, ptr @__tls_guard, align 1
+// CHECK: [[TLS_GUARD_ADDR:%.+]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard)
+// CHECK: load i8, ptr [[TLS_GUARD_ADDR]], align 1
 // CHECK: icmp eq i8 {{.*}}, 0
 // CHECK: br i1 {{.*}}, !prof ![[WEIGHTS_THREAD_LOCAL]]
 
diff --git a/clang/test/OpenMP/parallel_copyin_codegen.cpp b/clang/test/OpenMP/parallel_copyin_codegen.cpp
index e653a7734161b..aa2ea78c4fe09 100644
--- a/clang/test/OpenMP/parallel_copyin_codegen.cpp
+++ b/clang/test/OpenMP/parallel_copyin_codegen.cpp
@@ -1760,11 +1760,13 @@ void foo() {
 // CHECK16-LABEL: define {{[^@]+}}@__tls_init
 // CHECK16-SAME: () #[[ATTR0]] {
 // CHECK16-NEXT:  entry:
-// CHECK16-NEXT:    [[TMP0:%.*]] = load i8, ptr @__tls_guard, align 1
+// CHECK16-NEXT:    [[TLS_ADR_1:%.*]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard)
+// CHECK16-NEXT:    [[TMP0:%.*]] = load i8, ptr [[TLS_ADR_1]], align 1
 // CHECK16-NEXT:    [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0
 // CHECK16-NEXT:    br i1 [[GUARD_UNINITIALIZED]], label [[INIT:%.*]], label [[EXIT:%.*]], !prof [[PROF5:![0-9]+]]
 // CHECK16:       init:
-// CHECK16-NEXT:    store i8 1, ptr @__tls_guard, align 1
+// CHECK16-NEXT:    [[TLS_ADR_2:%.*]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard)
+// CHECK16-NEXT:    store i8 1, ptr [[TLS_ADR_2]], align 1
 // CHECK16-NEXT:    call void @__cxx_global_var_init()
 // CHECK16-NEXT:    br label [[EXIT]]
 // CHECK16:       exit:
diff --git a/clang/test/OpenMP/target_has_device_addr_codegen.cpp b/clang/test/OpenMP/target_has_device_addr_codegen.cpp
index ba1b618ed8bdd..71dd92eb9fe48 100644
--- a/clang/test/OpenMP/target_has_device_addr_codegen.cpp
+++ b/clang/test/OpenMP/target_has_device_addr_codegen.cpp
@@ -1304,11 +1304,13 @@ void use_template() {
 // CHECK-LABEL: define {{[^@]+}}@__tls_init
 // CHECK-SAME: () #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr @__tls_guard, align 1
+// CHECK-NEXT:    [[TLS_ADR_1:%.*]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard)
+// CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[TLS_ADR_1]], align 1
 // CHECK-NEXT:    [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0
 // CHECK-NEXT:    br i1 [[GUARD_UNINITIALIZED]], label [[INIT:%.*]], label [[EXIT:%.*]], !prof [[PROF18:![0-9]+]]
 // CHECK:       init:
-// CHECK-NEXT:    store i8 1, ptr @__tls_guard, align 1
+// CHECK-NEXT:    [[TLS_ADR_2:%.*]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard)
+// CHECK-NEXT:    store i8 1, ptr [[TLS_ADR_2]], align 1
 // CHECK-NEXT:    call void @__cxx_global_var_init.4()
 // CHECK-NEXT:    br label [[EXIT]]
 // CHECK:       exit:
diff --git a/clang/test/OpenMP/threadprivate_codegen.cpp b/clang/test/OpenMP/threadprivate_codegen.cpp
index b5eb4651d6c33..2dbdfc5eb6095 100644
--- a/clang/test/OpenMP/threadprivate_codegen.cpp
+++ b/clang/test/OpenMP/threadprivate_codegen.cpp
@@ -958,11 +958,13 @@ int foobar() {
 // OMP50-TLS: define {{.*}}void [[ST_S4_ST_DTOR2]](ptr {{.*}})
 
 // CHECK-TLS:      define internal void @__tls_init()
-// CHECK-TLS:      [[GRD:%.*]] = load i8, ptr @__tls_guard
+// CHECK-TLS:      [[TLS_ADR_1:%.*]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard)
+// CHECK-TLS-NEXT: [[GRD:%.*]] = load i8, ptr [[TLS_ADR_1]]
 // CHECK-TLS-NEXT: [[IS_INIT:%.*]] = icmp eq i8 [[GRD]], 0
 // CHECK-TLS-NEXT: br i1 [[IS_INIT]], label %[[INIT_LABEL:[^,]+]], label %[[DONE_LABEL:[^,]+]]{{.*}}
 // CHECK-TLS:      [[INIT_LABEL]]
-// CHECK-TLS-NEXT: store i8 1, ptr @__tls_guard
+// CHECK-TLS-NEXT: [[TLS_ADR_2:%.*]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard)
+// CHECK-TLS-NEXT: store i8 1, ptr [[TLS_ADR_2]]
 // CHECK-TLS:      call void [[GS1_CXX_INIT]]
 // CHECK-TLS-NOT:  call void [[GS2_CXX_INIT]]
 // CHECK-TLS:      call void [[ARR_X_CXX_INIT]]
@@ -3829,11 +3831,13 @@ int foobar() {
 // CHECK-TLS1-LABEL: define {{[^@]+}}@__tls_init
 // CHECK-TLS1-SAME: () #[[ATTR0]] {
 // CHECK-TLS1-NEXT:  entry:
-// CHECK-TLS1-NEXT:    [[TMP0:%.*]] = load i8, ptr @__tls_guard, align 1
+// CHECK-TLS1-NEXT:    [[TLS_ADR_3:%.*]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard)
+// CHECK-TLS1-NEXT:    [[TMP0:%.*]] = load i8, ptr [[TLS_ADR_3]], align 1
 // CHECK-TLS1-NEXT:    [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0
 // CHECK-TLS1-NEXT:    br i1 [[GUARD_UNINITIALIZED]], label [[INIT:%.*]], label [[EXIT:%.*]], !prof [[PROF3]]
 // CHECK-TLS1:       init:
-// CHECK-TLS1-NEXT:    store i8 1, ptr @__tls_guard, align 1
+// CHECK-TLS1-NEXT:    [[TLS_ADR_4:%.*]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard)
+// CHECK-TLS1-NEXT:    store i8 1, ptr [[TLS_ADR_4]], align 1
 // CHECK-TLS1-NEXT:    call void @__cxx_global_var_init()
 // CHECK-TLS1-NEXT:    call void @__cxx_global_var_init.2()
 // CHECK-TLS1-NEXT:    br label [[EXIT]]
@@ -4366,11 +4370,13 @@ int foobar() {
 // CHECK-TLS2-LABEL: define {{[^@]+}}@__tls_init
 // CHECK-TLS2-SAME: () #[[ATTR6]] {
 // CHECK-TLS2-NEXT:  entry:
-// CHECK-TLS2-NEXT:    [[TMP0:%.*]] = load i8, ptr @__tls_guard, align 1
+// CHECK-TLS2-NEXT:    [[TLS_ADR_5:%.*]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard)
+// CHECK-TLS2-NEXT:    [[TMP0:%.*]] = load i8, ptr [[TLS_ADR_5]], align 1
 // CHECK-TLS2-NEXT:    [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0
 // CHECK-TLS2-NEXT:    br i1 [[GUARD_UNINITIALIZED]], label [[INIT:%.*]], label [[EXIT:%.*]], !prof [[PROF3]]
 // CHECK-TLS2:       init:
-// CHECK-TLS2-NEXT:    store i8 1, ptr @__tls_guard, align 1
+// CHECK-TLS2-NEXT:    [[TLS_ADR_6:%.*]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard)
+// CHECK-TLS2-NEXT:    store i8 1, ptr [[TLS_ADR_6]], align 1
 // CHECK-TLS2-NEXT:    call void @__cxx_global_var_init()
 // CHECK-TLS2-NEXT:    call void @__cxx_global_var_init.2()
 // CHECK-TLS2-NEXT:    br label [[EXIT]]
@@ -4918,11 +4924,13 @@ int foobar() {
 // CHECK-TLS3-LABEL: define {{[^@]+}}@__tls_init
 // CHECK-TLS3-SAME: () #[[ATTR0]] !dbg [[DBG326:![0-9]+]] {
 // CHECK-TLS3-NEXT:  entry:
-// CHECK-TLS3-NEXT:    [[TMP0:%.*]] = load i8, ptr @__tls_guard, align 1, !dbg [[DBG327:![0-9]+]]
+// CHECK-TLS3-NEXT:    [[TLS_ADR_7:%.*]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard)
+// CHECK-TLS3-NEXT:    [[TMP0:%.*]] = load i8, ptr [[TLS_ADR_7]], align 1, !dbg [[DBG327:![0-9]+]]
 // CHECK-TLS3-NEXT:    [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0, !dbg [[DBG327]]
 // CHECK-TLS3-NEXT:    br i1 [[GUARD_UNINITIALIZED]], label [[INIT:%.*]], label [[EXIT:%.*]], !dbg [[DBG327]], !prof [[PROF206]]
 // CHECK-TLS3:       init:
-// CHECK-TLS3-NEXT:    store i8 1, ptr @__tls_guard, align 1, !dbg [[DBG327]]
+// CHECK-TLS3-NEXT:    [[TLS_ADR_8:%.*]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard)
+// CHECK-TLS3-NEXT:    store i8 1, ptr [[TLS_ADR_8]], align 1, !dbg [[DBG327]]
 // CHECK-TLS3-NEXT:    call void @__cxx_global_var_init(), !dbg [[DBG327]]
 // CHECK-TLS3-NEXT:    call void @__cxx_global_var_init.2(), !dbg [[DBG327]]
 // CHECK-TLS3-NEXT:    br label [[EXIT]], !dbg [[DBG327]]
@@ -5482,11 +5490,13 @@ int foobar() {
 // CHECK-TLS4-LABEL: define {{[^@]+}}@__tls_init
 // CHECK-TLS4-SAME: () #[[ATTR6]] !dbg [[DBG326:![0-9]+]] {
 // CHECK-TLS4-NEXT:  entry:
-// CHECK-TLS4-NEXT:    [[TMP0:%.*]] = load i8, ptr @__tls_guard, align 1, !dbg [[DBG327:![0-9]+]]
+// CHECK-TLS4-NEXT:    [[TLS_ADR_9:%.*]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard)
+// CHECK-TLS4-NEXT:    [[TMP0:%.*]] = load i8, ptr [[TLS_ADR_9]], align 1, !dbg [[DBG327:![0-9]+]]
 // CHECK-TLS4-NEXT:    [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0, !dbg [[DBG327]]
 // CHECK-TLS4-NEXT:    br i1 [[GUARD_UNINITIALIZED]], label [[INIT:%.*]], label [[EXIT:%.*]], !dbg [[DBG327]], !prof [[PROF119]]
 // CHECK-TLS4:       init:
-// CHECK-TLS4-NEXT:    store i8 1, ptr @__tls_guard, align 1, !dbg [[DBG327]]
+// CHECK-TLS4-NEXT:    [[TLS_ADR_10:%.*]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard)
+// CHECK-TLS4-NEXT:    store i8 1, ptr [[TLS_ADR_10]], align 1, !dbg [[DBG327]]
 // CHECK-TLS4-NEXT:    call void @__cxx_global_var_init(), !dbg [[DBG327]]
 // CHECK-TLS4-NEXT:    call void @__cxx_global_var_init.2(), !dbg [[DBG327]]
 // CHECK-TLS4-NEXT:    br label [[EXIT]], !dbg [[DBG327]]

>From 9989b88e0206f130a0870b490b890e1f3ddcdfbf Mon Sep 17 00:00:00 2001
From: Nikola Tesic <nikola.tesic at nextsilicon.com>
Date: Wed, 26 Jun 2024 14:09:02 +0300
Subject: [PATCH 2/3] Fixup1: Use CreateThreadLocalAddress API directly

---
 clang/lib/CodeGen/CGDeclCXX.cpp | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp
index 0663a083bf3e8..54051f146e191 100644
--- a/clang/lib/CodeGen/CGDeclCXX.cpp
+++ b/clang/lib/CodeGen/CGDeclCXX.cpp
@@ -1059,8 +1059,13 @@ CodeGenFunction::GenerateCXXGlobalInitFunc(llvm::Function *Fn,
     if (Guard.isValid()) {
       // If we have a guard variable, check whether we've already performed
       // these initializations. This happens for TLS initialization functions.
-      llvm::Value *GuardVal = EmitLoadOfScalar(
-          MakeAddrLValue(Guard, getContext().IntTy), SourceLocation());
+      Address GuardAddr = Guard;
+      if (auto *GV = dyn_cast<llvm::GlobalValue>(Guard.getPointer()))
+        // Get the thread-local address via intrinsic.
+        if (GV->isThreadLocal())
+          GuardAddr = GuardAddr.withPointer(
+              Builder.CreateThreadLocalAddress(GV), NotKnownNonNull);
+      llvm::Value *GuardVal = Builder.CreateLoad(GuardAddr);
       llvm::Value *Uninit =
           Builder.CreateIsNull(GuardVal, "guard.uninitialized");
       llvm::BasicBlock *InitBlock = createBasicBlock("init");
@@ -1071,18 +1076,23 @@ CodeGenFunction::GenerateCXXGlobalInitFunc(llvm::Function *Fn,
       // Mark as initialized before initializing anything else. If the
       // initializers use previously-initialized thread_local vars, that's
       // probably supposed to be OK, but the standard doesn't say.
-      EmitStoreOfScalar(llvm::ConstantInt::get(GuardVal->getType(), 1),
-                        MakeAddrLValue(Guard, getContext().IntTy));
+      if (auto *GV = dyn_cast<llvm::GlobalValue>(Guard.getPointer()))
+        // Get the thread-local address via intrinsic.
+        if (GV->isThreadLocal())
+          GuardAddr = GuardAddr.withPointer(
+              Builder.CreateThreadLocalAddress(GV), NotKnownNonNull);
+      Builder.CreateStore(llvm::ConstantInt::get(GuardVal->getType(), 1),
+                          GuardAddr);
 
       // Emit invariant start for TLS guard address.
       if (CGM.getCodeGenOpts().OptimizationLevel > 0) {
         uint64_t Width =
             CGM.getDataLayout().getTypeAllocSize(GuardVal->getType());
         llvm::Value *TLSAddr = Guard.getPointer();
-        // Get the thread-local address via intrinsic.
         if (auto *GV = dyn_cast<llvm::GlobalValue>(Guard.getPointer()))
+          // Get the thread-local address via intrinsic.
           if (GV->isThreadLocal())
-            TLSAddr = Builder.CreateThreadLocalAddress(Guard.getPointer());
+            TLSAddr = Builder.CreateThreadLocalAddress(GV);
         Builder.CreateInvariantStart(
             TLSAddr, llvm::ConstantInt::getSigned(Int64Ty, Width));
       }

>From b6802de028ccb46cfc8df859a06b35e220c5dc29 Mon Sep 17 00:00:00 2001
From: Nikola Tesic <nikola.tesic at nextsilicon.com>
Date: Tue, 2 Jul 2024 18:58:22 +0300
Subject: [PATCH 3/3] Fixup2: Pass IsTLS flag

---
 clang/lib/CodeGen/CGDeclCXX.cpp     | 64 +++++++++++++++--------------
 clang/lib/CodeGen/CodeGenFunction.h | 11 ++---
 clang/lib/CodeGen/ItaniumCXXABI.cpp |  3 +-
 3 files changed, 41 insertions(+), 37 deletions(-)

diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp
index 54051f146e191..b05ae470188fc 100644
--- a/clang/lib/CodeGen/CGDeclCXX.cpp
+++ b/clang/lib/CodeGen/CGDeclCXX.cpp
@@ -154,21 +154,28 @@ static void EmitDeclInvariant(CodeGenFunction &CGF, const VarDecl &D,
       Addr, CGF.getContext().getTypeSizeInChars(D.getType()));
 }
 
-void CodeGenFunction::EmitInvariantStart(llvm::Constant *Addr, CharUnits Size) {
+void CodeGenFunction::EmitInvariantStart(llvm::Constant *Addr, CharUnits Size,
+                                         bool IsTLS) {
   // Do not emit the intrinsic if we're not optimizing.
   if (!CGM.getCodeGenOpts().OptimizationLevel)
     return;
 
   // Grab the llvm.invariant.start intrinsic.
   llvm::Intrinsic::ID InvStartID = llvm::Intrinsic::invariant_start;
+  llvm::Value *AddrPtr = Addr;
+  // Get the thread-local address via intrinsic.
+  if (IsTLS)
+    AddrPtr = Builder.CreateThreadLocalAddress(AddrPtr);
+
   // Overloaded address space type.
-  assert(Addr->getType()->isPointerTy() && "Address must be a pointer");
-  llvm::Type *ObjectPtr[1] = {Addr->getType()};
+  assert(AddrPtr->getType()->isPointerTy() && "Address must be a pointer");
+  llvm::Type *ObjectPtr[1] = {AddrPtr->getType()};
   llvm::Function *InvariantStart = CGM.getIntrinsic(InvStartID, ObjectPtr);
 
   // Emit a call with the size in bytes of the object.
   uint64_t Width = Size.getQuantity();
-  llvm::Value *Args[2] = {llvm::ConstantInt::getSigned(Int64Ty, Width), Addr};
+  llvm::Value *Args[2] = {llvm::ConstantInt::getSigned(Int64Ty, Width),
+                          AddrPtr};
   Builder.CreateCall(InvariantStart, Args);
 }
 
@@ -760,6 +767,7 @@ void CodeGenModule::EmitCXXModuleInitFunc(Module *Primary) {
     // If we have a completely empty initializer then we do not want to create
     // the guard variable.
     ConstantAddress GuardAddr = ConstantAddress::invalid();
+    bool IsTLS = false;
     if (!ModuleInits.empty()) {
       // Create the guard var.
       llvm::GlobalVariable *Guard = new llvm::GlobalVariable(
@@ -769,9 +777,10 @@ void CodeGenModule::EmitCXXModuleInitFunc(Module *Primary) {
       CharUnits GuardAlign = CharUnits::One();
       Guard->setAlignment(GuardAlign.getAsAlign());
       GuardAddr = ConstantAddress(Guard, Int8Ty, GuardAlign);
+      IsTLS = Guard->isThreadLocal();
     }
-    CodeGenFunction(*this).GenerateCXXGlobalInitFunc(Fn, ModuleInits,
-                                                     GuardAddr);
+    CodeGenFunction(*this).GenerateCXXGlobalInitFunc(Fn, ModuleInits, GuardAddr,
+                                                     IsTLS);
   }
 
   // We allow for the case that a module object is added to a linked binary
@@ -1044,10 +1053,9 @@ void CodeGenFunction::GenerateCXXGlobalVarDeclInitFunc(llvm::Function *Fn,
   FinishFunction();
 }
 
-void
-CodeGenFunction::GenerateCXXGlobalInitFunc(llvm::Function *Fn,
-                                           ArrayRef<llvm::Function *> Decls,
-                                           ConstantAddress Guard) {
+void CodeGenFunction::GenerateCXXGlobalInitFunc(
+    llvm::Function *Fn, ArrayRef<llvm::Function *> Decls, ConstantAddress Guard,
+    bool IsTLS) {
   {
     auto NL = ApplyDebugLocation::CreateEmpty(*this);
     StartFunction(GlobalDecl(), getContext().VoidTy, Fn,
@@ -1060,11 +1068,11 @@ CodeGenFunction::GenerateCXXGlobalInitFunc(llvm::Function *Fn,
       // If we have a guard variable, check whether we've already performed
       // these initializations. This happens for TLS initialization functions.
       Address GuardAddr = Guard;
-      if (auto *GV = dyn_cast<llvm::GlobalValue>(Guard.getPointer()))
-        // Get the thread-local address via intrinsic.
-        if (GV->isThreadLocal())
-          GuardAddr = GuardAddr.withPointer(
-              Builder.CreateThreadLocalAddress(GV), NotKnownNonNull);
+      // Get the thread-local address via intrinsic.
+      if (IsTLS)
+        GuardAddr = GuardAddr.withPointer(
+            Builder.CreateThreadLocalAddress(Guard.getPointer()),
+            NotKnownNonNull);
       llvm::Value *GuardVal = Builder.CreateLoad(GuardAddr);
       llvm::Value *Uninit =
           Builder.CreateIsNull(GuardVal, "guard.uninitialized");
@@ -1076,26 +1084,20 @@ CodeGenFunction::GenerateCXXGlobalInitFunc(llvm::Function *Fn,
       // Mark as initialized before initializing anything else. If the
       // initializers use previously-initialized thread_local vars, that's
       // probably supposed to be OK, but the standard doesn't say.
-      if (auto *GV = dyn_cast<llvm::GlobalValue>(Guard.getPointer()))
-        // Get the thread-local address via intrinsic.
-        if (GV->isThreadLocal())
-          GuardAddr = GuardAddr.withPointer(
-              Builder.CreateThreadLocalAddress(GV), NotKnownNonNull);
+      // Get the thread-local address via intrinsic.
+      if (IsTLS)
+        GuardAddr = GuardAddr.withPointer(
+            Builder.CreateThreadLocalAddress(Guard.getPointer()),
+            NotKnownNonNull);
       Builder.CreateStore(llvm::ConstantInt::get(GuardVal->getType(), 1),
                           GuardAddr);
 
       // Emit invariant start for TLS guard address.
-      if (CGM.getCodeGenOpts().OptimizationLevel > 0) {
-        uint64_t Width =
-            CGM.getDataLayout().getTypeAllocSize(GuardVal->getType());
-        llvm::Value *TLSAddr = Guard.getPointer();
-        if (auto *GV = dyn_cast<llvm::GlobalValue>(Guard.getPointer()))
-          // Get the thread-local address via intrinsic.
-          if (GV->isThreadLocal())
-            TLSAddr = Builder.CreateThreadLocalAddress(GV);
-        Builder.CreateInvariantStart(
-            TLSAddr, llvm::ConstantInt::getSigned(Int64Ty, Width));
-      }
+      EmitInvariantStart(
+          Guard.getPointer(),
+          CharUnits::fromQuantity(
+              CGM.getDataLayout().getTypeAllocSize(GuardVal->getType())),
+          IsTLS);
     }
 
     RunCleanupsScope Scope(*this);
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index a9c497bde6871..757217f8bf1e3 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -39,6 +39,7 @@
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
+#include "llvm/IR/Constant.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/Support/Debug.h"
@@ -4832,7 +4833,8 @@ class CodeGenFunction : public CodeGenTypeCache {
                                 llvm::GlobalVariable *GV);
 
   // Emit an @llvm.invariant.start call for the given memory region.
-  void EmitInvariantStart(llvm::Constant *Addr, CharUnits Size);
+  void EmitInvariantStart(llvm::Constant *Addr, CharUnits Size,
+                          bool IsTLS = false);
 
   /// EmitCXXGlobalVarDeclInit - Create the initializer for a C++
   /// variable with global storage.
@@ -4881,10 +4883,9 @@ class CodeGenFunction : public CodeGenTypeCache {
 
   /// GenerateCXXGlobalInitFunc - Generates code for initializing global
   /// variables.
-  void
-  GenerateCXXGlobalInitFunc(llvm::Function *Fn,
-                            ArrayRef<llvm::Function *> CXXThreadLocals,
-                            ConstantAddress Guard = ConstantAddress::invalid());
+  void GenerateCXXGlobalInitFunc(
+      llvm::Function *Fn, ArrayRef<llvm::Function *> CXXThreadLocals,
+      ConstantAddress Guard = ConstantAddress::invalid(), bool IsTLS = false);
 
   /// GenerateCXXGlobalCleanUpFunc - Generates code for cleaning up global
   /// variables.
diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp
index 01a735c1437e1..f1a1ffa07a164 100644
--- a/clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -2933,7 +2933,8 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs(
     Guard->setAlignment(GuardAlign.getAsAlign());
 
     CodeGenFunction(CGM).GenerateCXXGlobalInitFunc(
-        InitFunc, OrderedInits, ConstantAddress(Guard, CGM.Int8Ty, GuardAlign));
+        InitFunc, OrderedInits, ConstantAddress(Guard, CGM.Int8Ty, GuardAlign),
+        Guard->isThreadLocal());
     // On Darwin platforms, use CXX_FAST_TLS calling convention.
     if (CGM.getTarget().getTriple().isOSDarwin()) {
       InitFunc->setCallingConv(llvm::CallingConv::CXX_FAST_TLS);



More information about the cfe-commits mailing list