[clang] [Clang] Access tls_guard via llvm.threadlocal.address (PR #96633)
via cfe-commits
cfe-commits at lists.llvm.org
Wed Jun 26 04:25:03 PDT 2024
https://github.com/nikola-tesic-ns updated https://github.com/llvm/llvm-project/pull/96633
>From 41427a3de345517025477257bfd4f614f06cbcfe Mon Sep 17 00:00:00 2001
From: Nikola Tesic <nikola.tesic at nextsilicon.com>
Date: Tue, 25 Jun 2024 15:58:18 +0300
Subject: [PATCH 1/2] [Clang] Access tls_guard via llvm.threadlocal.address
This patch fixes compiler generated code in `tls_init` function to access
TLS variable (`tls_guard`) via llvm.threadlocal.address intrinsic.
---
clang/lib/CodeGen/CGDeclCXX.cpp | 29 +++++++++++-------
clang/test/CodeGenCXX/cxx11-thread-local.cpp | 9 ++++--
.../static-initializer-branch-weights.cpp | 3 +-
clang/test/OpenMP/parallel_copyin_codegen.cpp | 6 ++--
.../OpenMP/target_has_device_addr_codegen.cpp | 6 ++--
clang/test/OpenMP/threadprivate_codegen.cpp | 30 ++++++++++++-------
6 files changed, 55 insertions(+), 28 deletions(-)
diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp
index e18b339b31d24..0663a083bf3e8 100644
--- a/clang/lib/CodeGen/CGDeclCXX.cpp
+++ b/clang/lib/CodeGen/CGDeclCXX.cpp
@@ -1059,9 +1059,10 @@ CodeGenFunction::GenerateCXXGlobalInitFunc(llvm::Function *Fn,
if (Guard.isValid()) {
// If we have a guard variable, check whether we've already performed
// these initializations. This happens for TLS initialization functions.
- llvm::Value *GuardVal = Builder.CreateLoad(Guard);
- llvm::Value *Uninit = Builder.CreateIsNull(GuardVal,
- "guard.uninitialized");
+ llvm::Value *GuardVal = EmitLoadOfScalar(
+ MakeAddrLValue(Guard, getContext().IntTy), SourceLocation());
+ llvm::Value *Uninit =
+ Builder.CreateIsNull(GuardVal, "guard.uninitialized");
llvm::BasicBlock *InitBlock = createBasicBlock("init");
ExitBlock = createBasicBlock("exit");
EmitCXXGuardedInitBranch(Uninit, InitBlock, ExitBlock,
@@ -1070,13 +1071,21 @@ CodeGenFunction::GenerateCXXGlobalInitFunc(llvm::Function *Fn,
// Mark as initialized before initializing anything else. If the
// initializers use previously-initialized thread_local vars, that's
// probably supposed to be OK, but the standard doesn't say.
- Builder.CreateStore(llvm::ConstantInt::get(GuardVal->getType(),1), Guard);
-
- // The guard variable can't ever change again.
- EmitInvariantStart(
- Guard.getPointer(),
- CharUnits::fromQuantity(
- CGM.getDataLayout().getTypeAllocSize(GuardVal->getType())));
+ EmitStoreOfScalar(llvm::ConstantInt::get(GuardVal->getType(), 1),
+ MakeAddrLValue(Guard, getContext().IntTy));
+
+ // Emit invariant start for TLS guard address.
+ if (CGM.getCodeGenOpts().OptimizationLevel > 0) {
+ uint64_t Width =
+ CGM.getDataLayout().getTypeAllocSize(GuardVal->getType());
+ llvm::Value *TLSAddr = Guard.getPointer();
+ // Get the thread-local address via intrinsic.
+ if (auto *GV = dyn_cast<llvm::GlobalValue>(Guard.getPointer()))
+ if (GV->isThreadLocal())
+ TLSAddr = Builder.CreateThreadLocalAddress(Guard.getPointer());
+ Builder.CreateInvariantStart(
+ TLSAddr, llvm::ConstantInt::getSigned(Int64Ty, Width));
+ }
}
RunCleanupsScope Scope(*this);
diff --git a/clang/test/CodeGenCXX/cxx11-thread-local.cpp b/clang/test/CodeGenCXX/cxx11-thread-local.cpp
index bcc490bc32e6e..e9a0799cf8d9a 100644
--- a/clang/test/CodeGenCXX/cxx11-thread-local.cpp
+++ b/clang/test/CodeGenCXX/cxx11-thread-local.cpp
@@ -358,12 +358,15 @@ void set_anon_i() {
// CHECK: define {{.*}}@__tls_init()
-// CHECK: load i8, ptr @__tls_guard
+// CHECK: [[TLS_GUARD_ADDR_1:%.+]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard)
+// CHECK: load i8, ptr [[TLS_GUARD_ADDR_1]]
// CHECK: %[[NEED_TLS_INIT:.*]] = icmp eq i8 %{{.*}}, 0
// CHECK: br i1 %[[NEED_TLS_INIT]],
// init:
-// CHECK: store i8 1, ptr @__tls_guard
-// CHECK-OPT: call ptr @llvm.invariant.start.p0(i64 1, ptr @__tls_guard)
+// CHECK: [[TLS_GUARD_ADDR_2:%.+]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard)
+// CHECK: store i8 1, ptr [[TLS_GUARD_ADDR_2]]
+// CHECK-OPT: [[TLS_GUARD_ADDR_3:%.+]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard)
+// CHECK-OPT: call ptr @llvm.invariant.start.p0(i64 1, ptr [[TLS_GUARD_ADDR_3]])
// CHECK-NOT: call void @[[V_M_INIT]]()
// CHECK: call void @[[A_INIT]]()
// CHECK-NOT: call void @[[V_M_INIT]]()
diff --git a/clang/test/CodeGenCXX/static-initializer-branch-weights.cpp b/clang/test/CodeGenCXX/static-initializer-branch-weights.cpp
index 121b9b2029959..e855f54643eae 100644
--- a/clang/test/CodeGenCXX/static-initializer-branch-weights.cpp
+++ b/clang/test/CodeGenCXX/static-initializer-branch-weights.cpp
@@ -118,7 +118,8 @@ void use_b() {
}
// CHECK-LABEL: define {{.*}}tls_init()
-// CHECK: load i8, ptr @__tls_guard, align 1
+// CHECK: [[TLS_GUARD_ADDR:%.+]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard)
+// CHECK: load i8, ptr [[TLS_GUARD_ADDR]], align 1
// CHECK: icmp eq i8 {{.*}}, 0
// CHECK: br i1 {{.*}}, !prof ![[WEIGHTS_THREAD_LOCAL]]
diff --git a/clang/test/OpenMP/parallel_copyin_codegen.cpp b/clang/test/OpenMP/parallel_copyin_codegen.cpp
index e653a7734161b..aa2ea78c4fe09 100644
--- a/clang/test/OpenMP/parallel_copyin_codegen.cpp
+++ b/clang/test/OpenMP/parallel_copyin_codegen.cpp
@@ -1760,11 +1760,13 @@ void foo() {
// CHECK16-LABEL: define {{[^@]+}}@__tls_init
// CHECK16-SAME: () #[[ATTR0]] {
// CHECK16-NEXT: entry:
-// CHECK16-NEXT: [[TMP0:%.*]] = load i8, ptr @__tls_guard, align 1
+// CHECK16-NEXT: [[TLS_ADR_1:%.*]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard)
+// CHECK16-NEXT: [[TMP0:%.*]] = load i8, ptr [[TLS_ADR_1]], align 1
// CHECK16-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0
// CHECK16-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT:%.*]], label [[EXIT:%.*]], !prof [[PROF5:![0-9]+]]
// CHECK16: init:
-// CHECK16-NEXT: store i8 1, ptr @__tls_guard, align 1
+// CHECK16-NEXT: [[TLS_ADR_2:%.*]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard)
+// CHECK16-NEXT: store i8 1, ptr [[TLS_ADR_2]], align 1
// CHECK16-NEXT: call void @__cxx_global_var_init()
// CHECK16-NEXT: br label [[EXIT]]
// CHECK16: exit:
diff --git a/clang/test/OpenMP/target_has_device_addr_codegen.cpp b/clang/test/OpenMP/target_has_device_addr_codegen.cpp
index ba1b618ed8bdd..71dd92eb9fe48 100644
--- a/clang/test/OpenMP/target_has_device_addr_codegen.cpp
+++ b/clang/test/OpenMP/target_has_device_addr_codegen.cpp
@@ -1304,11 +1304,13 @@ void use_template() {
// CHECK-LABEL: define {{[^@]+}}@__tls_init
// CHECK-SAME: () #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr @__tls_guard, align 1
+// CHECK-NEXT: [[TLS_ADR_1:%.*]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard)
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[TLS_ADR_1]], align 1
// CHECK-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0
// CHECK-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT:%.*]], label [[EXIT:%.*]], !prof [[PROF18:![0-9]+]]
// CHECK: init:
-// CHECK-NEXT: store i8 1, ptr @__tls_guard, align 1
+// CHECK-NEXT: [[TLS_ADR_2:%.*]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard)
+// CHECK-NEXT: store i8 1, ptr [[TLS_ADR_2]], align 1
// CHECK-NEXT: call void @__cxx_global_var_init.4()
// CHECK-NEXT: br label [[EXIT]]
// CHECK: exit:
diff --git a/clang/test/OpenMP/threadprivate_codegen.cpp b/clang/test/OpenMP/threadprivate_codegen.cpp
index b5eb4651d6c33..2dbdfc5eb6095 100644
--- a/clang/test/OpenMP/threadprivate_codegen.cpp
+++ b/clang/test/OpenMP/threadprivate_codegen.cpp
@@ -958,11 +958,13 @@ int foobar() {
// OMP50-TLS: define {{.*}}void [[ST_S4_ST_DTOR2]](ptr {{.*}})
// CHECK-TLS: define internal void @__tls_init()
-// CHECK-TLS: [[GRD:%.*]] = load i8, ptr @__tls_guard
+// CHECK-TLS: [[TLS_ADR_1:%.*]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard)
+// CHECK-TLS-NEXT: [[GRD:%.*]] = load i8, ptr [[TLS_ADR_1]]
// CHECK-TLS-NEXT: [[IS_INIT:%.*]] = icmp eq i8 [[GRD]], 0
// CHECK-TLS-NEXT: br i1 [[IS_INIT]], label %[[INIT_LABEL:[^,]+]], label %[[DONE_LABEL:[^,]+]]{{.*}}
// CHECK-TLS: [[INIT_LABEL]]
-// CHECK-TLS-NEXT: store i8 1, ptr @__tls_guard
+// CHECK-TLS-NEXT: [[TLS_ADR_2:%.*]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard)
+// CHECK-TLS-NEXT: store i8 1, ptr [[TLS_ADR_2]]
// CHECK-TLS: call void [[GS1_CXX_INIT]]
// CHECK-TLS-NOT: call void [[GS2_CXX_INIT]]
// CHECK-TLS: call void [[ARR_X_CXX_INIT]]
@@ -3829,11 +3831,13 @@ int foobar() {
// CHECK-TLS1-LABEL: define {{[^@]+}}@__tls_init
// CHECK-TLS1-SAME: () #[[ATTR0]] {
// CHECK-TLS1-NEXT: entry:
-// CHECK-TLS1-NEXT: [[TMP0:%.*]] = load i8, ptr @__tls_guard, align 1
+// CHECK-TLS1-NEXT: [[TLS_ADR_3:%.*]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard)
+// CHECK-TLS1-NEXT: [[TMP0:%.*]] = load i8, ptr [[TLS_ADR_3]], align 1
// CHECK-TLS1-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0
// CHECK-TLS1-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT:%.*]], label [[EXIT:%.*]], !prof [[PROF3]]
// CHECK-TLS1: init:
-// CHECK-TLS1-NEXT: store i8 1, ptr @__tls_guard, align 1
+// CHECK-TLS1-NEXT: [[TLS_ADR_4:%.*]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard)
+// CHECK-TLS1-NEXT: store i8 1, ptr [[TLS_ADR_4]], align 1
// CHECK-TLS1-NEXT: call void @__cxx_global_var_init()
// CHECK-TLS1-NEXT: call void @__cxx_global_var_init.2()
// CHECK-TLS1-NEXT: br label [[EXIT]]
@@ -4366,11 +4370,13 @@ int foobar() {
// CHECK-TLS2-LABEL: define {{[^@]+}}@__tls_init
// CHECK-TLS2-SAME: () #[[ATTR6]] {
// CHECK-TLS2-NEXT: entry:
-// CHECK-TLS2-NEXT: [[TMP0:%.*]] = load i8, ptr @__tls_guard, align 1
+// CHECK-TLS2-NEXT: [[TLS_ADR_5:%.*]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard)
+// CHECK-TLS2-NEXT: [[TMP0:%.*]] = load i8, ptr [[TLS_ADR_5]], align 1
// CHECK-TLS2-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0
// CHECK-TLS2-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT:%.*]], label [[EXIT:%.*]], !prof [[PROF3]]
// CHECK-TLS2: init:
-// CHECK-TLS2-NEXT: store i8 1, ptr @__tls_guard, align 1
+// CHECK-TLS2-NEXT: [[TLS_ADR_6:%.*]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard)
+// CHECK-TLS2-NEXT: store i8 1, ptr [[TLS_ADR_6]], align 1
// CHECK-TLS2-NEXT: call void @__cxx_global_var_init()
// CHECK-TLS2-NEXT: call void @__cxx_global_var_init.2()
// CHECK-TLS2-NEXT: br label [[EXIT]]
@@ -4918,11 +4924,13 @@ int foobar() {
// CHECK-TLS3-LABEL: define {{[^@]+}}@__tls_init
// CHECK-TLS3-SAME: () #[[ATTR0]] !dbg [[DBG326:![0-9]+]] {
// CHECK-TLS3-NEXT: entry:
-// CHECK-TLS3-NEXT: [[TMP0:%.*]] = load i8, ptr @__tls_guard, align 1, !dbg [[DBG327:![0-9]+]]
+// CHECK-TLS3-NEXT: [[TLS_ADR_7:%.*]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard)
+// CHECK-TLS3-NEXT: [[TMP0:%.*]] = load i8, ptr [[TLS_ADR_7]], align 1, !dbg [[DBG327:![0-9]+]]
// CHECK-TLS3-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0, !dbg [[DBG327]]
// CHECK-TLS3-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT:%.*]], label [[EXIT:%.*]], !dbg [[DBG327]], !prof [[PROF206]]
// CHECK-TLS3: init:
-// CHECK-TLS3-NEXT: store i8 1, ptr @__tls_guard, align 1, !dbg [[DBG327]]
+// CHECK-TLS3-NEXT: [[TLS_ADR_8:%.*]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard)
+// CHECK-TLS3-NEXT: store i8 1, ptr [[TLS_ADR_8]], align 1, !dbg [[DBG327]]
// CHECK-TLS3-NEXT: call void @__cxx_global_var_init(), !dbg [[DBG327]]
// CHECK-TLS3-NEXT: call void @__cxx_global_var_init.2(), !dbg [[DBG327]]
// CHECK-TLS3-NEXT: br label [[EXIT]], !dbg [[DBG327]]
@@ -5482,11 +5490,13 @@ int foobar() {
// CHECK-TLS4-LABEL: define {{[^@]+}}@__tls_init
// CHECK-TLS4-SAME: () #[[ATTR6]] !dbg [[DBG326:![0-9]+]] {
// CHECK-TLS4-NEXT: entry:
-// CHECK-TLS4-NEXT: [[TMP0:%.*]] = load i8, ptr @__tls_guard, align 1, !dbg [[DBG327:![0-9]+]]
+// CHECK-TLS4-NEXT: [[TLS_ADR_9:%.*]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard)
+// CHECK-TLS4-NEXT: [[TMP0:%.*]] = load i8, ptr [[TLS_ADR_9]], align 1, !dbg [[DBG327:![0-9]+]]
// CHECK-TLS4-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0, !dbg [[DBG327]]
// CHECK-TLS4-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT:%.*]], label [[EXIT:%.*]], !dbg [[DBG327]], !prof [[PROF119]]
// CHECK-TLS4: init:
-// CHECK-TLS4-NEXT: store i8 1, ptr @__tls_guard, align 1, !dbg [[DBG327]]
+// CHECK-TLS4-NEXT: [[TLS_ADR_10:%.*]] = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @__tls_guard)
+// CHECK-TLS4-NEXT: store i8 1, ptr [[TLS_ADR_10]], align 1, !dbg [[DBG327]]
// CHECK-TLS4-NEXT: call void @__cxx_global_var_init(), !dbg [[DBG327]]
// CHECK-TLS4-NEXT: call void @__cxx_global_var_init.2(), !dbg [[DBG327]]
// CHECK-TLS4-NEXT: br label [[EXIT]], !dbg [[DBG327]]
>From 9989b88e0206f130a0870b490b890e1f3ddcdfbf Mon Sep 17 00:00:00 2001
From: Nikola Tesic <nikola.tesic at nextsilicon.com>
Date: Wed, 26 Jun 2024 14:09:02 +0300
Subject: [PATCH 2/2] Fixup1: Use CreateThreadLocalAddress API directly
---
clang/lib/CodeGen/CGDeclCXX.cpp | 22 ++++++++++++++++------
1 file changed, 16 insertions(+), 6 deletions(-)
diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp
index 0663a083bf3e8..54051f146e191 100644
--- a/clang/lib/CodeGen/CGDeclCXX.cpp
+++ b/clang/lib/CodeGen/CGDeclCXX.cpp
@@ -1059,8 +1059,13 @@ CodeGenFunction::GenerateCXXGlobalInitFunc(llvm::Function *Fn,
if (Guard.isValid()) {
// If we have a guard variable, check whether we've already performed
// these initializations. This happens for TLS initialization functions.
- llvm::Value *GuardVal = EmitLoadOfScalar(
- MakeAddrLValue(Guard, getContext().IntTy), SourceLocation());
+ Address GuardAddr = Guard;
+ if (auto *GV = dyn_cast<llvm::GlobalValue>(Guard.getPointer()))
+ // Get the thread-local address via intrinsic.
+ if (GV->isThreadLocal())
+ GuardAddr = GuardAddr.withPointer(
+ Builder.CreateThreadLocalAddress(GV), NotKnownNonNull);
+ llvm::Value *GuardVal = Builder.CreateLoad(GuardAddr);
llvm::Value *Uninit =
Builder.CreateIsNull(GuardVal, "guard.uninitialized");
llvm::BasicBlock *InitBlock = createBasicBlock("init");
@@ -1071,18 +1076,23 @@ CodeGenFunction::GenerateCXXGlobalInitFunc(llvm::Function *Fn,
// Mark as initialized before initializing anything else. If the
// initializers use previously-initialized thread_local vars, that's
// probably supposed to be OK, but the standard doesn't say.
- EmitStoreOfScalar(llvm::ConstantInt::get(GuardVal->getType(), 1),
- MakeAddrLValue(Guard, getContext().IntTy));
+ if (auto *GV = dyn_cast<llvm::GlobalValue>(Guard.getPointer()))
+ // Get the thread-local address via intrinsic.
+ if (GV->isThreadLocal())
+ GuardAddr = GuardAddr.withPointer(
+ Builder.CreateThreadLocalAddress(GV), NotKnownNonNull);
+ Builder.CreateStore(llvm::ConstantInt::get(GuardVal->getType(), 1),
+ GuardAddr);
// Emit invariant start for TLS guard address.
if (CGM.getCodeGenOpts().OptimizationLevel > 0) {
uint64_t Width =
CGM.getDataLayout().getTypeAllocSize(GuardVal->getType());
llvm::Value *TLSAddr = Guard.getPointer();
- // Get the thread-local address via intrinsic.
if (auto *GV = dyn_cast<llvm::GlobalValue>(Guard.getPointer()))
+ // Get the thread-local address via intrinsic.
if (GV->isThreadLocal())
- TLSAddr = Builder.CreateThreadLocalAddress(Guard.getPointer());
+ TLSAddr = Builder.CreateThreadLocalAddress(GV);
Builder.CreateInvariantStart(
TLSAddr, llvm::ConstantInt::getSigned(Int64Ty, Width));
}
More information about the cfe-commits
mailing list