[clang] [llvm] [X86] Add Support for X86 TLSDESC Relocations (PR #83136)
Phoebe Wang via cfe-commits
cfe-commits at lists.llvm.org
Wed Feb 28 05:22:31 PST 2024
https://github.com/phoebewang updated https://github.com/llvm/llvm-project/pull/83136
>From cdc9ee6c322af0ceed162f3f714bcd0a22e020c3 Mon Sep 17 00:00:00 2001
From: Phoebe Wang <phoebe.wang at intel.com>
Date: Tue, 27 Feb 2024 22:16:38 +0800
Subject: [PATCH 1/2] [X86] Add Support for X86 TLSDESC Relocations
---
clang/lib/Driver/ToolChains/CommonArgs.cpp | 3 +-
clang/test/Driver/tls-dialect.c | 2 +-
.../lib/Target/X86/MCTargetDesc/X86BaseInfo.h | 14 ++
llvm/lib/Target/X86/X86AsmPrinter.cpp | 2 +
llvm/lib/Target/X86/X86ISelLowering.cpp | 98 +++++++----
llvm/lib/Target/X86/X86MCInstLower.cpp | 30 +++-
llvm/test/CodeGen/X86/tls-desc.ll | 165 ++++++++++++++++++
7 files changed, 273 insertions(+), 41 deletions(-)
create mode 100644 llvm/test/CodeGen/X86/tls-desc.ll
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index faceee85a2f8dc..c66e3ee12e50c4 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -740,7 +740,8 @@ bool tools::isTLSDESCEnabled(const ToolChain &TC,
SupportedArgument = V == "desc" || V == "trad";
EnableTLSDESC = V == "desc";
} else if (Triple.isX86()) {
- SupportedArgument = V == "gnu";
+ SupportedArgument = V == "gnu" || V == "gnu2";
+ EnableTLSDESC = V == "gnu2";
} else {
Unsupported = true;
}
diff --git a/clang/test/Driver/tls-dialect.c b/clang/test/Driver/tls-dialect.c
index f73915b28ec2a3..a808dd81531ce7 100644
--- a/clang/test/Driver/tls-dialect.c
+++ b/clang/test/Driver/tls-dialect.c
@@ -2,6 +2,7 @@
// RUN: %clang -### --target=riscv64-linux -mtls-dialect=trad %s 2>&1 | FileCheck --check-prefix=NODESC %s
// RUN: %clang -### --target=riscv64-linux %s 2>&1 | FileCheck --check-prefix=NODESC %s
// RUN: %clang -### --target=x86_64-linux -mtls-dialect=gnu %s 2>&1 | FileCheck --check-prefix=NODESC %s
+// RUN: %clang -### --target=x86_64-linux -mtls-dialect=gnu2 %s 2>&1 | FileCheck --check-prefix=DESC %s
/// Android supports TLSDESC by default on RISC-V
/// TLSDESC is not on by default in Linux, even on RISC-V, and is covered above
@@ -18,7 +19,6 @@
/// Unsupported argument
// RUN: not %clang -### --target=riscv64-linux -mtls-dialect=gnu2 %s 2>&1 | FileCheck --check-prefix=UNSUPPORTED-ARG %s
-// RUN: not %clang -### --target=x86_64-linux -mtls-dialect=gnu2 %s 2>&1 | FileCheck --check-prefix=UNSUPPORTED-ARG %s
// DESC: "-cc1" {{.*}}"-enable-tlsdesc"
// NODESC-NOT: "-enable-tlsdesc"
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index 4442b80861b61a..1877550f8c40bb 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -431,6 +431,20 @@ enum TOF {
/// See 'ELF Handling for Thread-Local Storage' for more details.
/// SYMBOL_LABEL @TLSLDM
MO_TLSLDM,
+ /// MO_TLSCALL - On a symbol operand this indicates that the immediate is
+ /// the index of the TLS descriptor function for the symbol. Used in both
+ /// the IA32 and x86-64 local dynamic TLS access model.
+ /// See 'RFC-TLSDESC-x86' for more details.
+ /// SYMBOL_LABEL @TLSCALL
+ MO_TLSCALL,
+ /// MO_TLSDESC - On a symbol operand this indicates that the immediate is
+ /// the index of the TLS descriptor argument for the symbol. When this
+ /// argument is passed to a call getting from index at TLSCALL, the function will
+ /// return the offset for the symbol. Used in both the IA32 and x86-64 local
+ /// dynamic TLS access model.
+ /// See 'RFC-TLSDESC-x86' for more details.
+ /// SYMBOL_LABEL @TLSDESC
+ MO_TLSDESC,
/// MO_GOTTPOFF - On a symbol operand this indicates that the immediate is
/// the offset of the GOT entry with the thread-pointer offset for the
/// symbol. Used in the x86-64 initial exec TLS access model.
diff --git a/llvm/lib/Target/X86/X86AsmPrinter.cpp b/llvm/lib/Target/X86/X86AsmPrinter.cpp
index 3395a13545e454..d8e111db1cec42 100644
--- a/llvm/lib/Target/X86/X86AsmPrinter.cpp
+++ b/llvm/lib/Target/X86/X86AsmPrinter.cpp
@@ -271,6 +271,8 @@ void X86AsmPrinter::PrintSymbolOperand(const MachineOperand &MO,
case X86II::MO_TLSGD: O << "@TLSGD"; break;
case X86II::MO_TLSLD: O << "@TLSLD"; break;
case X86II::MO_TLSLDM: O << "@TLSLDM"; break;
+ case X86II::MO_TLSDESC: O << "@TLSDESC"; break;
+ case X86II::MO_TLSCALL: O << "@TLSCALL"; break;
case X86II::MO_GOTTPOFF: O << "@GOTTPOFF"; break;
case X86II::MO_INDNTPOFF: O << "@INDNTPOFF"; break;
case X86II::MO_TPOFF: O << "@TPOFF"; break;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a86f13135173b0..88314bcf510e9a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -18515,17 +18515,17 @@ X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
return LowerGlobalOrExternal(Op, DAG, /*ForCall=*/false);
}
-static SDValue
-GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
- SDValue *InGlue, const EVT PtrVT, unsigned ReturnReg,
- unsigned char OperandFlags, bool LocalDynamic = false) {
+static SDValue GetTLSADDR(SelectionDAG &DAG, SDValue Chain,
+ GlobalAddressSDNode *GA, SDValue *InGlue,
+ const EVT PtrVT, unsigned ReturnReg,
+ unsigned char OperandFlags, bool UseTLSDESC = false,
+ bool LocalDynamic = false) {
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
SDLoc dl(GA);
- SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
- GA->getValueType(0),
- GA->getOffset(),
- OperandFlags);
+ SDValue TGA = DAG.getTargetGlobalAddress(
+ GA->getGlobal(), dl, GA->getValueType(0), GA->getOffset(),
+ UseTLSDESC ? X86II::MO_TLSDESC : OperandFlags);
X86ISD::NodeType CallType = LocalDynamic ? X86ISD::TLSBASEADDR
: X86ISD::TLSADDR;
@@ -18543,13 +18543,27 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
MFI.setHasCalls(true);
SDValue Glue = Chain.getValue(1);
- return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Glue);
+ SDValue Ret = DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Glue);
+
+ if (!UseTLSDESC)
+ return Ret;
+
+ const X86Subtarget &Subtarget = DAG.getSubtarget<X86Subtarget>();
+ MVT VT = Subtarget.isTarget64BitLP64() ? MVT::i64 : MVT::i32;
+ unsigned Seg = Subtarget.is64Bit() ? X86AS::FS : X86AS::GS;
+
+ Value *Ptr = Constant::getNullValue(PointerType::get(*DAG.getContext(), Seg));
+ SDValue Offset =
+ DAG.getLoad(VT, dl, DAG.getEntryNode(), DAG.getIntPtrConstant(0, dl),
+ MachinePointerInfo(Ptr));
+ return DAG.getNode(ISD::ADD, dl, VT, Ret, Offset);
}
// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 32 bit
-static SDValue
-LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
- const EVT PtrVT) {
+static SDValue LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG,
+ const EVT PtrVT,
+ bool UseTLSDESC) {
SDValue InGlue;
SDLoc dl(GA); // ? function entry point might be better
SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
@@ -18557,23 +18571,26 @@ LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
SDLoc(), PtrVT), InGlue);
InGlue = Chain.getValue(1);
- return GetTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX, X86II::MO_TLSGD);
+ return GetTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX, X86II::MO_TLSGD,
+ UseTLSDESC);
}
// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit LP64
-static SDValue
-LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG,
- const EVT PtrVT) {
- return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT,
- X86::RAX, X86II::MO_TLSGD);
+static SDValue LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG,
+ const EVT PtrVT,
+ bool UseTLSDESC) {
+ return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, X86::RAX,
+ X86II::MO_TLSGD, UseTLSDESC);
}
// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit ILP32
-static SDValue
-LowerToTLSGeneralDynamicModelX32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
- const EVT PtrVT) {
- return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT,
- X86::EAX, X86II::MO_TLSGD);
+static SDValue LowerToTLSGeneralDynamicModelX32(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG,
+ const EVT PtrVT,
+ bool UseTLSDESC) {
+ return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, X86::EAX,
+ X86II::MO_TLSGD, UseTLSDESC);
}
static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA,
@@ -18590,14 +18607,16 @@ static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA,
if (Is64Bit) {
unsigned ReturnReg = Is64BitLP64 ? X86::RAX : X86::EAX;
Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, ReturnReg,
- X86II::MO_TLSLD, /*LocalDynamic=*/true);
+ X86II::MO_TLSLD, /*UseTLSDESC=*/false,
+ /*LocalDynamic=*/true);
} else {
SDValue InGlue;
SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), InGlue);
InGlue = Chain.getValue(1);
Base = GetTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX,
- X86II::MO_TLSLDM, /*LocalDynamic=*/true);
+ X86II::MO_TLSLDM, /*UseTLSDESC=*/false,
+ /*LocalDynamic=*/true);
}
// Note: the CleanupLocalDynamicTLSPass will remove redundant computations
@@ -18684,21 +18703,26 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
if (Subtarget.isTargetELF()) {
TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
+ bool UseTLSDESC = DAG.getTarget().useTLSDESC();
switch (model) {
- case TLSModel::GeneralDynamic:
- if (Subtarget.is64Bit()) {
- if (Subtarget.isTarget64BitLP64())
- return LowerToTLSGeneralDynamicModel64(GA, DAG, PtrVT);
- return LowerToTLSGeneralDynamicModelX32(GA, DAG, PtrVT);
- }
- return LowerToTLSGeneralDynamicModel32(GA, DAG, PtrVT);
- case TLSModel::LocalDynamic:
+ case TLSModel::LocalDynamic:
+ if (!UseTLSDESC)
return LowerToTLSLocalDynamicModel(GA, DAG, PtrVT, Subtarget.is64Bit(),
Subtarget.isTarget64BitLP64());
- case TLSModel::InitialExec:
- case TLSModel::LocalExec:
- return LowerToTLSExecModel(GA, DAG, PtrVT, model, Subtarget.is64Bit(),
- PositionIndependent);
+ [[fallthrough]];
+ case TLSModel::GeneralDynamic:
+ if (Subtarget.is64Bit()) {
+ if (Subtarget.isTarget64BitLP64()) {
+ // auto PtrVT = getPointerTy(DAG.getDataLayout(), X86AS::FS);
+ return LowerToTLSGeneralDynamicModel64(GA, DAG, PtrVT, UseTLSDESC);
+ }
+ return LowerToTLSGeneralDynamicModelX32(GA, DAG, PtrVT, UseTLSDESC);
+ }
+ return LowerToTLSGeneralDynamicModel32(GA, DAG, PtrVT, UseTLSDESC);
+ case TLSModel::InitialExec:
+ case TLSModel::LocalExec:
+ return LowerToTLSExecModel(GA, DAG, PtrVT, model, Subtarget.is64Bit(),
+ PositionIndependent);
}
llvm_unreachable("Unknown TLS model.");
}
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index d3b7d97a83caf0..e447e17c2d7d09 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -257,6 +257,12 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
case X86II::MO_TLSLDM:
RefKind = MCSymbolRefExpr::VK_TLSLDM;
break;
+ case X86II::MO_TLSDESC:
+ RefKind = MCSymbolRefExpr::VK_TLSDESC;
+ break;
+ case X86II::MO_TLSCALL:
+ RefKind = MCSymbolRefExpr::VK_TLSCALL;
+ break;
case X86II::MO_GOTTPOFF:
RefKind = MCSymbolRefExpr::VK_GOTTPOFF;
break;
@@ -524,13 +530,14 @@ void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
bool Is64BitsLP64 = MI.getOpcode() == X86::TLS_addr64 ||
MI.getOpcode() == X86::TLS_base_addr64;
MCContext &Ctx = OutStreamer->getContext();
+ bool isTLSDESC = MI.getOperand(3).getTargetFlags() == X86II::MO_TLSDESC;
MCSymbolRefExpr::VariantKind SRVK;
switch (MI.getOpcode()) {
case X86::TLS_addr32:
case X86::TLS_addr64:
case X86::TLS_addrX32:
- SRVK = MCSymbolRefExpr::VK_TLSGD;
+ SRVK = isTLSDESC ? MCSymbolRefExpr::VK_TLSDESC : MCSymbolRefExpr::VK_TLSGD;
break;
case X86::TLS_base_addr32:
SRVK = MCSymbolRefExpr::VK_TLSLDM;
@@ -554,7 +561,26 @@ void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
bool UseGot = MMI->getModule()->getRtLibUseGOT() &&
Ctx.getAsmInfo()->canRelaxRelocations();
- if (Is64Bits) {
+ if (isTLSDESC) {
+ const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(
+ MCInstLowering.GetSymbolFromOperand(MI.getOperand(3)),
+ MCSymbolRefExpr::VK_TLSCALL, Ctx);
+ EmitAndCountInstruction(
+ MCInstBuilder(Is64BitsLP64 ? X86::LEA64r : X86::LEA32r)
+ .addReg(Is64BitsLP64 ? X86::RAX : X86::EAX)
+ .addReg(Is64Bits ? X86::RIP : X86::EBX)
+ .addImm(1)
+ .addReg(0)
+ .addExpr(Sym)
+ .addReg(0));
+ EmitAndCountInstruction(
+ MCInstBuilder(Is64Bits ? X86::CALL64m : X86::CALL32m)
+ .addReg(Is64BitsLP64 ? X86::RAX : X86::EAX)
+ .addImm(1)
+ .addReg(0)
+ .addExpr(Expr)
+ .addReg(0));
+ } else if (Is64Bits) {
bool NeedsPadding = SRVK == MCSymbolRefExpr::VK_TLSGD;
if (NeedsPadding && Is64BitsLP64)
EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
diff --git a/llvm/test/CodeGen/X86/tls-desc.ll b/llvm/test/CodeGen/X86/tls-desc.ll
new file mode 100644
index 00000000000000..ed363df6f78a43
--- /dev/null
+++ b/llvm/test/CodeGen/X86/tls-desc.ll
@@ -0,0 +1,165 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s -mtriple=i686-unknown-unknown --relocation-model=pic -enable-tlsdesc | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-pc-linux-gnux32 --relocation-model=pic -enable-tlsdesc | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown --relocation-model=pic -enable-tlsdesc | FileCheck %s --check-prefix=X64
+
+ at x = thread_local global i32 0, align 4
+ at y = internal thread_local global i32 0, align 4
+
+define ptr @f1() nounwind {
+; X86-LABEL: f1:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebx
+; X86-NEXT: calll .L0$pb
+; X86-NEXT: .L0$pb:
+; X86-NEXT: popl %ebx
+; X86-NEXT: .Ltmp0:
+; X86-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp0-.L0$pb), %ebx
+; X86-NEXT: leal x at tlsdesc(%ebx), %eax
+; X86-NEXT: calll *x at tlscall(%eax)
+; X86-NEXT: addl %gs:0, %eax
+; X86-NEXT: popl %ebx
+; X86-NEXT: retl
+;
+; X32-LABEL: f1:
+; X32: # %bb.0:
+; X32-NEXT: pushq %rax
+; X32-NEXT: leal x at tlsdesc(%rip), %eax
+; X32-NEXT: callq *x at tlscall(%eax)
+; X32-NEXT: # kill: def $eax killed $eax def $rax
+; X32-NEXT: addl %fs:0, %eax
+; X32-NEXT: popq %rcx
+; X32-NEXT: retq
+;
+; X64-LABEL: f1:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: leaq x at tlsdesc(%rip), %rax
+; X64-NEXT: callq *x at tlscall(%rax)
+; X64-NEXT: addq %fs:0, %rax
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %1 = tail call ptr @llvm.threadlocal.address.p0(ptr @x)
+ ret ptr %1
+}
+
+define i32 @f2() nounwind {
+; X86-LABEL: f2:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %esi
+; X86-NEXT: calll .L1$pb
+; X86-NEXT: .L1$pb:
+; X86-NEXT: popl %ebx
+; X86-NEXT: .Ltmp1:
+; X86-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp1-.L1$pb), %ebx
+; X86-NEXT: movl %gs:0, %esi
+; X86-NEXT: leal x at tlsdesc(%ebx), %eax
+; X86-NEXT: calll *x at tlscall(%eax)
+; X86-NEXT: movl (%eax,%esi), %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %ebx
+; X86-NEXT: retl
+;
+; X32-LABEL: f2:
+; X32: # %bb.0:
+; X32-NEXT: pushq %rbx
+; X32-NEXT: movl %fs:0, %ebx
+; X32-NEXT: leal x at tlsdesc(%rip), %eax
+; X32-NEXT: callq *x at tlscall(%eax)
+; X32-NEXT: movl (%eax,%ebx), %eax
+; X32-NEXT: popq %rbx
+; X32-NEXT: retq
+;
+; X64-LABEL: f2:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rbx
+; X64-NEXT: movq %fs:0, %rbx
+; X64-NEXT: leaq x at tlsdesc(%rip), %rax
+; X64-NEXT: callq *x at tlscall(%rax)
+; X64-NEXT: movl (%rax,%rbx), %eax
+; X64-NEXT: popq %rbx
+; X64-NEXT: retq
+ %1 = tail call ptr @llvm.threadlocal.address.p0(ptr @x)
+ %2 = load i32, ptr %1
+ ret i32 %2
+}
+
+define ptr @f3() nounwind {
+; X86-LABEL: f3:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebx
+; X86-NEXT: calll .L2$pb
+; X86-NEXT: .L2$pb:
+; X86-NEXT: popl %ebx
+; X86-NEXT: .Ltmp2:
+; X86-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp2-.L2$pb), %ebx
+; X86-NEXT: leal x at tlsdesc(%ebx), %eax
+; X86-NEXT: calll *x at tlscall(%eax)
+; X86-NEXT: addl %gs:0, %eax
+; X86-NEXT: popl %ebx
+; X86-NEXT: retl
+;
+; X32-LABEL: f3:
+; X32: # %bb.0:
+; X32-NEXT: pushq %rax
+; X32-NEXT: leal x at tlsdesc(%rip), %eax
+; X32-NEXT: callq *x at tlscall(%eax)
+; X32-NEXT: # kill: def $eax killed $eax def $rax
+; X32-NEXT: addl %fs:0, %eax
+; X32-NEXT: popq %rcx
+; X32-NEXT: retq
+;
+; X64-LABEL: f3:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: leaq x at tlsdesc(%rip), %rax
+; X64-NEXT: callq *x at tlscall(%rax)
+; X64-NEXT: addq %fs:0, %rax
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %1 = tail call ptr @llvm.threadlocal.address.p0(ptr @x)
+ ret ptr %1
+}
+
+define i32 @f4() nounwind {
+; X86-LABEL: f4:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %esi
+; X86-NEXT: calll .L3$pb
+; X86-NEXT: .L3$pb:
+; X86-NEXT: popl %ebx
+; X86-NEXT: .Ltmp3:
+; X86-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp3-.L3$pb), %ebx
+; X86-NEXT: movl %gs:0, %esi
+; X86-NEXT: leal x at tlsdesc(%ebx), %eax
+; X86-NEXT: calll *x at tlscall(%eax)
+; X86-NEXT: movl (%eax,%esi), %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %ebx
+; X86-NEXT: retl
+;
+; X32-LABEL: f4:
+; X32: # %bb.0:
+; X32-NEXT: pushq %rbx
+; X32-NEXT: movl %fs:0, %ebx
+; X32-NEXT: leal x at tlsdesc(%rip), %eax
+; X32-NEXT: callq *x at tlscall(%eax)
+; X32-NEXT: movl (%eax,%ebx), %eax
+; X32-NEXT: popq %rbx
+; X32-NEXT: retq
+;
+; X64-LABEL: f4:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rbx
+; X64-NEXT: movq %fs:0, %rbx
+; X64-NEXT: leaq x at tlsdesc(%rip), %rax
+; X64-NEXT: callq *x at tlscall(%rax)
+; X64-NEXT: movl (%rax,%rbx), %eax
+; X64-NEXT: popq %rbx
+; X64-NEXT: retq
+ %1 = tail call ptr @llvm.threadlocal.address.p0(ptr @x)
+ %2 = load i32, ptr %1
+ ret i32 %2
+}
>From 6eb75d6c99c926c055d809207dea918dd3ab4bf5 Mon Sep 17 00:00:00 2001
From: Phoebe Wang <phoebe.wang at intel.com>
Date: Wed, 28 Feb 2024 21:22:00 +0800
Subject: [PATCH 2/2] Address review comments
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 16 +++---
llvm/test/CodeGen/X86/tls-desc.ll | 71 ++++++++++++++++++++++---
2 files changed, 72 insertions(+), 15 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 88314bcf510e9a..1feaad80c8e35a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -18515,7 +18515,7 @@ X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
return LowerGlobalOrExternal(Op, DAG, /*ForCall=*/false);
}
-static SDValue GetTLSADDR(SelectionDAG &DAG, SDValue Chain,
+static SDValue getTLSADDR(SelectionDAG &DAG, SDValue Chain,
GlobalAddressSDNode *GA, SDValue *InGlue,
const EVT PtrVT, unsigned ReturnReg,
unsigned char OperandFlags, bool UseTLSDESC = false,
@@ -18571,7 +18571,7 @@ static SDValue LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA,
SDLoc(), PtrVT), InGlue);
InGlue = Chain.getValue(1);
- return GetTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX, X86II::MO_TLSGD,
+ return getTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX, X86II::MO_TLSGD,
UseTLSDESC);
}
@@ -18580,7 +18580,7 @@ static SDValue LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA,
SelectionDAG &DAG,
const EVT PtrVT,
bool UseTLSDESC) {
- return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, X86::RAX,
+ return getTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, X86::RAX,
X86II::MO_TLSGD, UseTLSDESC);
}
@@ -18589,7 +18589,7 @@ static SDValue LowerToTLSGeneralDynamicModelX32(GlobalAddressSDNode *GA,
SelectionDAG &DAG,
const EVT PtrVT,
bool UseTLSDESC) {
- return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, X86::EAX,
+ return getTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, X86::EAX,
X86II::MO_TLSGD, UseTLSDESC);
}
@@ -18606,7 +18606,7 @@ static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA,
SDValue Base;
if (Is64Bit) {
unsigned ReturnReg = Is64BitLP64 ? X86::RAX : X86::EAX;
- Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, ReturnReg,
+ Base = getTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, ReturnReg,
X86II::MO_TLSLD, /*UseTLSDESC=*/false,
/*LocalDynamic=*/true);
} else {
@@ -18614,7 +18614,7 @@ static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA,
SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), InGlue);
InGlue = Chain.getValue(1);
- Base = GetTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX,
+ Base = getTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX,
X86II::MO_TLSLDM, /*UseTLSDESC=*/false,
/*LocalDynamic=*/true);
}
@@ -18712,10 +18712,8 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
[[fallthrough]];
case TLSModel::GeneralDynamic:
if (Subtarget.is64Bit()) {
- if (Subtarget.isTarget64BitLP64()) {
- // auto PtrVT = getPointerTy(DAG.getDataLayout(), X86AS::FS);
+ if (Subtarget.isTarget64BitLP64())
return LowerToTLSGeneralDynamicModel64(GA, DAG, PtrVT, UseTLSDESC);
- }
return LowerToTLSGeneralDynamicModelX32(GA, DAG, PtrVT, UseTLSDESC);
}
return LowerToTLSGeneralDynamicModel32(GA, DAG, PtrVT, UseTLSDESC);
diff --git a/llvm/test/CodeGen/X86/tls-desc.ll b/llvm/test/CodeGen/X86/tls-desc.ll
index ed363df6f78a43..7df1a5a09ce1ee 100644
--- a/llvm/test/CodeGen/X86/tls-desc.ll
+++ b/llvm/test/CodeGen/X86/tls-desc.ll
@@ -1,46 +1,105 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc < %s -mtriple=i686-unknown-unknown --relocation-model=pic -enable-tlsdesc | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=i686 --relocation-model=pic -enable-tlsdesc | FileCheck %s --check-prefix=X86
; RUN: llc < %s -mtriple=x86_64-pc-linux-gnux32 --relocation-model=pic -enable-tlsdesc | FileCheck %s --check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown --relocation-model=pic -enable-tlsdesc | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64 --relocation-model=pic -enable-tlsdesc | FileCheck %s --check-prefix=X64
@x = thread_local global i32 0, align 4
@y = internal thread_local global i32 0, align 4
+;; FIXME: GCC's x86-64 port assumes that FLAGS_REG and RAX are changed while all other registers are preserved.
define ptr @f1() nounwind {
; X86-LABEL: f1:
; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: subl $12, %esp
; X86-NEXT: calll .L0$pb
; X86-NEXT: .L0$pb:
; X86-NEXT: popl %ebx
; X86-NEXT: .Ltmp0:
; X86-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp0-.L0$pb), %ebx
+; X86-NEXT: #APP
+; X86-NEXT: #NO_APP
+; X86-NEXT: movl %edx, (%esp) # 4-byte Spill
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: leal x at tlsdesc(%ebx), %eax
; X86-NEXT: calll *x at tlscall(%eax)
; X86-NEXT: addl %gs:0, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: movl (%esp), %ebx # 4-byte Reload
+; X86-NEXT: #APP
+; X86-NEXT: #NO_APP
+; X86-NEXT: addl $12, %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; X32-LABEL: f1:
; X32: # %bb.0:
+; X32-NEXT: pushq %rbp
+; X32-NEXT: pushq %r15
+; X32-NEXT: pushq %r14
+; X32-NEXT: pushq %r13
+; X32-NEXT: pushq %r12
+; X32-NEXT: pushq %rbx
; X32-NEXT: pushq %rax
+; X32-NEXT: #APP
+; X32-NEXT: #NO_APP
; X32-NEXT: leal x at tlsdesc(%rip), %eax
; X32-NEXT: callq *x at tlscall(%eax)
; X32-NEXT: # kill: def $eax killed $eax def $rax
; X32-NEXT: addl %fs:0, %eax
-; X32-NEXT: popq %rcx
+; X32-NEXT: #APP
+; X32-NEXT: #NO_APP
+; X32-NEXT: addl $8, %esp
+; X32-NEXT: popq %rbx
+; X32-NEXT: popq %r12
+; X32-NEXT: popq %r13
+; X32-NEXT: popq %r14
+; X32-NEXT: popq %r15
+; X32-NEXT: popq %rbp
; X32-NEXT: retq
;
; X64-LABEL: f1:
; X64: # %bb.0:
+; X64-NEXT: pushq %rbp
+; X64-NEXT: pushq %r15
+; X64-NEXT: pushq %r14
+; X64-NEXT: pushq %r13
+; X64-NEXT: pushq %r12
+; X64-NEXT: pushq %rbx
; X64-NEXT: pushq %rax
+; X64-NEXT: #APP
+; X64-NEXT: #NO_APP
; X64-NEXT: leaq x at tlsdesc(%rip), %rax
; X64-NEXT: callq *x at tlscall(%rax)
; X64-NEXT: addq %fs:0, %rax
-; X64-NEXT: popq %rcx
+; X64-NEXT: #APP
+; X64-NEXT: #NO_APP
+; X64-NEXT: addq $8, %rsp
+; X64-NEXT: popq %rbx
+; X64-NEXT: popq %r12
+; X64-NEXT: popq %r13
+; X64-NEXT: popq %r14
+; X64-NEXT: popq %r15
+; X64-NEXT: popq %rbp
; X64-NEXT: retq
- %1 = tail call ptr @llvm.threadlocal.address.p0(ptr @x)
- ret ptr %1
+ %a = call { i32, i32, i32, i32, i32, i32 } asm sideeffect "", "=r,=r,=r,=r,=r,=r,~{dirflag},~{fpsr},~{flags}"()
+ %b = call ptr @llvm.threadlocal.address.p0(ptr @x)
+ %a.0 = extractvalue { i32, i32, i32, i32, i32, i32 } %a, 0
+ %a.1 = extractvalue { i32, i32, i32, i32, i32, i32 } %a, 1
+ %a.2 = extractvalue { i32, i32, i32, i32, i32, i32 } %a, 2
+ %a.3 = extractvalue { i32, i32, i32, i32, i32, i32 } %a, 3
+ %a.4 = extractvalue { i32, i32, i32, i32, i32, i32 } %a, 4
+ %a.5 = extractvalue { i32, i32, i32, i32, i32, i32 } %a, 5
+ call void asm sideeffect "", "r,r,r,r,r,r,~{dirflag},~{fpsr},~{flags}"(i32 %a.0, i32 %a.1, i32 %a.2, i32 %a.3, i32 %a.4, i32 %a.5)
+ ret ptr %b
}
define i32 @f2() nounwind {
More information about the cfe-commits
mailing list