[llvm] [X86] Insert CALLSEQ when lowering GlobalTLSAddress for ELF targets (PR #113706)
Fabian Ritter via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 25 09:25:33 PDT 2024
https://github.com/ritter-x2a created https://github.com/llvm/llvm-project/pull/113706
When lowering a TLS address for an ELF target, we introduce a call to obtain the TLS base address. So far, we do not insert CALLSEQ_START/END markers around this call when it is generated, but use a custom inserter to insert them in a later phase. This is problematic, since the TLS address call can land in a CALLSEQ for another calls before it is wrapped in its own CALLSEQ. That results in nested CALLSEQs, which are illegal and cause errors when expensive checks are enabled, e.g., in issues #45574 and #98042.
This patch instead wraps each TLS address call in a CALLSEQ when it is generated so that instruction selection can avoid nested CALLSEQs. This is an alternative to PR #106965, which instead changes the custom inserter to avoid generating CALLSEQs when the TLS address call is already in a CALLSEQ.
This patch also effectively reverts commit 228978c, which introduced the CustomInserter that so far added the CALLSEQ around TLSAddrs.
Fixes #45574 and #98042.
>From ee73f87a4098a88aebc24025316c856f65084cc9 Mon Sep 17 00:00:00 2001
From: Fabian Ritter <fabian.ritter at amd.com>
Date: Fri, 25 Oct 2024 10:42:13 -0400
Subject: [PATCH] [X86] Insert CALLSEQ when lowering GlobalTLSAddress for ELF
targets
When lowering a TLS address for an ELF target, we introduce a call to obtain
the TLS base address. So far, we do not insert CALLSEQ_START/END markers around
this call when it is generated, but use a custom inserter to insert them in a
later phase. This is problematic, since the TLS address call can land in a
CALLSEQ for another calls before it is wrapped in its own CALLSEQ. That results
in nested CALLSEQs, which are illegal and cause errors when expensive checks
are enabled, e.g., in issues #45574 and #98042.
This patch instead wraps each TLS address call in a CALLSEQ when it is
generated so that instruction selection can avoid nested CALLSEQs. This is an
alternative to PR #106965, which instead changes the custom inserter to avoid
generating CALLSEQs when the TLS address call is already in a CALLSEQ.
This patch also effectively reverts commit 228978c, which introduced the
CustomInserter that so far added the CALLSEQ around TLSAddrs.
Fixes #45574 and #98042.
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 121 ++++++++----------
llvm/lib/Target/X86/X86ISelLowering.h | 3 -
llvm/lib/Target/X86/X86InstrCompiler.td | 6 +-
.../test/CodeGen/X86/tls-function-argument.ll | 30 +++++
4 files changed, 83 insertions(+), 77 deletions(-)
create mode 100644 llvm/test/CodeGen/X86/tls-function-argument.ll
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a6d77873ec2901..2f35c2435d78c1 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -18828,10 +18828,11 @@ X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
return LowerGlobalOrExternal(Op, DAG, /*ForCall=*/false);
}
-static SDValue
-GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
- SDValue *InGlue, const EVT PtrVT, unsigned ReturnReg,
- unsigned char OperandFlags, bool LocalDynamic = false) {
+static SDValue GetTLSADDR(SelectionDAG &DAG, SDValue Chain,
+ GlobalAddressSDNode *GA, const EVT PtrVT,
+ unsigned ReturnReg, unsigned char OperandFlags,
+ bool LoadGlobalBaseReg = false,
+ bool LocalDynamic = false) {
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
SDLoc dl(GA);
@@ -18841,8 +18842,25 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
TGA = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT, OperandFlags);
auto UI = TGA->use_begin();
// Reuse existing GetTLSADDR node if we can find it.
- if (UI != TGA->use_end())
- return SDValue(*UI->use_begin()->use_begin(), 0);
+ if (UI != TGA->use_end()) {
+ // TLSDESC uses TGA.
+ auto TLSDescOp = UI;
+ assert(TLSDescOp->getOpcode() == X86ISD::TLSDESC &&
+ "Unexpected TLSDESC DAG");
+ // CALLSEQ_END uses TGA via a chain and glue.
+ auto CallSeqEndOp = TLSDescOp->use_begin();
+ assert(CallSeqEndOp->getOpcode() == ISD::CALLSEQ_END &&
+ "Unexpected TLSDESC DAG");
+ // CopyFromReg uses CALLSEQ_END via a chain and glue.
+ auto CopyFromRegOp = CallSeqEndOp->use_begin();
+ assert(CopyFromRegOp->getOpcode() == ISD::CopyFromReg &&
+ "Unexpected TLSDESC DAG");
+ // The Add generated at the final return of this function uses
+ // CopyFromReg.
+ auto AddOp = CopyFromRegOp->use_begin();
+ assert(AddOp->getOpcode() == ISD::ADD && "Unexpected TLSDESC DAG");
+ return SDValue(*AddOp, 0);
+ }
} else {
TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0),
GA->getOffset(), OperandFlags);
@@ -18852,13 +18870,20 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
: LocalDynamic ? X86ISD::TLSBASEADDR
: X86ISD::TLSADDR;
- if (InGlue) {
- SDValue Ops[] = { Chain, TGA, *InGlue };
+ Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl);
+ if (LoadGlobalBaseReg) {
+ SDValue InGlue;
+ Chain = DAG.getCopyToReg(Chain, dl, X86::EBX,
+ DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT),
+ InGlue);
+ InGlue = Chain.getValue(1);
+ SDValue Ops[] = {Chain, TGA, InGlue};
Chain = DAG.getNode(CallType, dl, NodeTys, Ops);
} else {
- SDValue Ops[] = { Chain, TGA };
+ SDValue Ops[] = {Chain, TGA};
Chain = DAG.getNode(CallType, dl, NodeTys, Ops);
}
+ Chain = DAG.getCALLSEQ_END(Chain, 0, 0, Chain.getValue(1), dl);
// TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
MFI.setAdjustsStack(true);
@@ -18884,30 +18909,24 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
static SDValue
LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
const EVT PtrVT) {
- SDValue InGlue;
- SDLoc dl(GA); // ? function entry point might be better
- SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
- DAG.getNode(X86ISD::GlobalBaseReg,
- SDLoc(), PtrVT), InGlue);
- InGlue = Chain.getValue(1);
-
- return GetTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX, X86II::MO_TLSGD);
+ return GetTLSADDR(DAG, DAG.getEntryNode(), GA, PtrVT, X86::EAX,
+ X86II::MO_TLSGD, /*LoadGlobalBaseReg=*/true);
}
// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit LP64
static SDValue
LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG,
const EVT PtrVT) {
- return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT,
- X86::RAX, X86II::MO_TLSGD);
+ return GetTLSADDR(DAG, DAG.getEntryNode(), GA, PtrVT, X86::RAX,
+ X86II::MO_TLSGD);
}
// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit ILP32
static SDValue
LowerToTLSGeneralDynamicModelX32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
const EVT PtrVT) {
- return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT,
- X86::EAX, X86II::MO_TLSGD);
+ return GetTLSADDR(DAG, DAG.getEntryNode(), GA, PtrVT, X86::EAX,
+ X86II::MO_TLSGD);
}
static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA,
@@ -18916,22 +18935,20 @@ static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA,
SDLoc dl(GA);
// Get the start address of the TLS block for this module.
- X86MachineFunctionInfo *MFI = DAG.getMachineFunction()
- .getInfo<X86MachineFunctionInfo>();
+ X86MachineFunctionInfo *MFI =
+ DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>();
MFI->incNumLocalDynamicTLSAccesses();
SDValue Base;
if (Is64Bit) {
unsigned ReturnReg = Is64BitLP64 ? X86::RAX : X86::EAX;
- Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, ReturnReg,
- X86II::MO_TLSLD, /*LocalDynamic=*/true);
+ Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, PtrVT, ReturnReg,
+ X86II::MO_TLSLD, /*LoadGlobalBaseReg=*/false,
+ /*LocalDynamic=*/true);
} else {
- SDValue InGlue;
- SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
- DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), InGlue);
- InGlue = Chain.getValue(1);
- Base = GetTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX,
- X86II::MO_TLSLDM, /*LocalDynamic=*/true);
+ Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, PtrVT, X86::EAX,
+ X86II::MO_TLSLDM, /*LoadGlobalBaseReg=*/true,
+ /*LocalDynamic=*/true);
}
// Note: the CleanupLocalDynamicTLSPass will remove redundant computations
@@ -36002,36 +36019,6 @@ X86TargetLowering::EmitLoweredCatchRet(MachineInstr &MI,
return BB;
}
-MachineBasicBlock *
-X86TargetLowering::EmitLoweredTLSAddr(MachineInstr &MI,
- MachineBasicBlock *BB) const {
- // So, here we replace TLSADDR with the sequence:
- // adjust_stackdown -> TLSADDR -> adjust_stackup.
- // We need this because TLSADDR is lowered into calls
- // inside MC, therefore without the two markers shrink-wrapping
- // may push the prologue/epilogue pass them.
- const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
- const MIMetadata MIMD(MI);
- MachineFunction &MF = *BB->getParent();
-
- // Emit CALLSEQ_START right before the instruction.
- MF.getFrameInfo().setAdjustsStack(true);
- unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
- MachineInstrBuilder CallseqStart =
- BuildMI(MF, MIMD, TII.get(AdjStackDown)).addImm(0).addImm(0).addImm(0);
- BB->insert(MachineBasicBlock::iterator(MI), CallseqStart);
-
- // Emit CALLSEQ_END right after the instruction.
- // We don't call erase from parent because we want to keep the
- // original instruction around.
- unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
- MachineInstrBuilder CallseqEnd =
- BuildMI(MF, MIMD, TII.get(AdjStackUp)).addImm(0).addImm(0);
- BB->insertAfter(MachineBasicBlock::iterator(MI), CallseqEnd);
-
- return BB;
-}
-
MachineBasicBlock *
X86TargetLowering::EmitLoweredTLSCall(MachineInstr &MI,
MachineBasicBlock *BB) const {
@@ -37030,16 +37017,8 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
return X86::TMM0 + Imm;
};
switch (MI.getOpcode()) {
- default: llvm_unreachable("Unexpected instr type to insert");
- case X86::TLS_addr32:
- case X86::TLS_addr64:
- case X86::TLS_addrX32:
- case X86::TLS_base_addr32:
- case X86::TLS_base_addr64:
- case X86::TLS_base_addrX32:
- case X86::TLS_desc32:
- case X86::TLS_desc64:
- return EmitLoweredTLSAddr(MI, BB);
+ default:
+ llvm_unreachable("Unexpected instr type to insert");
case X86::INDIRECT_THUNK_CALL32:
case X86::INDIRECT_THUNK_CALL64:
case X86::INDIRECT_THUNK_TCRETURN32:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 14ada1721fd40e..2db25d6dda061a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1848,9 +1848,6 @@ namespace llvm {
MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI,
MachineBasicBlock *BB) const;
- MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
- MachineBasicBlock *BB) const;
-
MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
MachineBasicBlock *BB) const;
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index a05c3f028442c0..51cee2eacb9686 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -478,7 +478,7 @@ let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, FP7,
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS, DF],
- usesCustomInserter = 1, Uses = [ESP, SSP] in {
+ Uses = [ESP, SSP] in {
def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
"# TLS_addr32",
[(X86tlsaddr tls32addr:$sym)]>,
@@ -498,7 +498,7 @@ let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS, DF],
- usesCustomInserter = 1, Uses = [RSP, SSP] in {
+ Uses = [RSP, SSP] in {
def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
"# TLS_addr64",
[(X86tlsaddr tls64addr:$sym)]>,
@@ -520,7 +520,7 @@ def TLS_base_addrX32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
// TLSDESC only clobbers EAX and EFLAGS. ESP is marked as a use to prevent
// stack-pointer assignments that appear immediately before calls from
// potentially appearing dead.
-let Defs = [EAX, EFLAGS], usesCustomInserter = 1, Uses = [RSP, SSP] in {
+let Defs = [EAX, EFLAGS], Uses = [RSP, SSP] in {
def TLS_desc32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
"# TLS_desc32", [(X86tlsdesc tls32addr:$sym)]>;
def TLS_desc64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
diff --git a/llvm/test/CodeGen/X86/tls-function-argument.ll b/llvm/test/CodeGen/X86/tls-function-argument.ll
new file mode 100644
index 00000000000000..9b6ab529db3ea3
--- /dev/null
+++ b/llvm/test/CodeGen/X86/tls-function-argument.ll
@@ -0,0 +1,30 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=x86_64 -verify-machineinstrs -relocation-model=pic < %s | FileCheck %s
+
+; Passing a pointer to thread-local storage to a function can be problematic
+; since computing such addresses requires a function call that is introduced
+; very late in instruction selection. We need to ensure that we don't introduce
+; nested call sequence markers if this function call happens in a call sequence.
+
+ at TLS = internal thread_local global i64 zeroinitializer, align 8
+declare void @bar(ptr)
+define internal void @foo() {
+; CHECK-LABEL: foo:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset %rbx, -16
+; CHECK-NEXT: leaq TLS at TLSLD(%rip), %rdi
+; CHECK-NEXT: callq __tls_get_addr at PLT
+; CHECK-NEXT: leaq TLS at DTPOFF(%rax), %rbx
+; CHECK-NEXT: movq %rbx, %rdi
+; CHECK-NEXT: callq bar at PLT
+; CHECK-NEXT: movq %rbx, %rdi
+; CHECK-NEXT: callq bar at PLT
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+ call void @bar(ptr @TLS)
+ call void @bar(ptr @TLS)
+ ret void
+}
More information about the llvm-commits
mailing list