[llvm-branch-commits] [llvm] [X86] Avoid generating nested CALLSEQ for TLS pointer function arguments (PR #106965)

Fabian Ritter via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Thu Oct 10 03:16:10 PDT 2024


https://github.com/ritter-x2a updated https://github.com/llvm/llvm-project/pull/106965

>From 58e48285971bedaa3d2cb878ddd290569b936e10 Mon Sep 17 00:00:00 2001
From: Fabian Ritter <fabian.ritter at amd.com>
Date: Mon, 2 Sep 2024 05:37:33 -0400
Subject: [PATCH] [X86] Avoid generating nested CALLSEQ for TLS pointer
 function arguments

When a pointer to thread-local storage is passed in a function call,
ISel first lowers the call and wraps the resulting code in CALLSEQ
markers. Afterwards, to compute the pointer to TLS, a call to retrieve
the TLS base address is generated and then wrapped in a set of CALLSEQ
markers. If the latter call is inserted into the call sequence of the
former call, this leads to nested call frames, which are illegal and
lead to errors in the machine verifier.

This patch avoids surrounding the call to compute the TLS base address
in CALLSEQ markers if it is already surrounded by such markers. It
relies on zero-sized call frames being represented in the call frame
size info stored in the MachineBBs.

Fixes #45574 and #98042.
---
 llvm/include/llvm/CodeGen/MachineFrameInfo.h  | 25 ++++++++++++++--
 llvm/lib/CodeGen/FinalizeISel.cpp             |  6 ++++
 llvm/lib/Target/X86/X86ISelLowering.cpp       |  9 ++++++
 .../test/CodeGen/X86/tls-function-argument.ll | 30 +++++++++++++++++++
 4 files changed, 68 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/tls-function-argument.ll

diff --git a/llvm/include/llvm/CodeGen/MachineFrameInfo.h b/llvm/include/llvm/CodeGen/MachineFrameInfo.h
index cdfdbc27fc6e41..9b53bf0dfb84d9 100644
--- a/llvm/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineFrameInfo.h
@@ -26,6 +26,7 @@ class MachineFunction;
 class MachineBasicBlock;
 class BitVector;
 class AllocaInst;
+class MachineFrameSizeInfo;
 class TargetInstrInfo;
 
 /// The CalleeSavedInfo class tracks the information need to locate where a
@@ -283,6 +284,10 @@ class MachineFrameInfo {
   /// It is only valid during and after prolog/epilog code insertion.
   uint64_t MaxCallFrameSize = ~UINT64_C(0);
 
+  /// Call frame sizes for the MachineFunction's MachineBasicBlocks. This is set
+  /// by the MachineFrameSizeInfo constructor and cleared by its destructor.
+  MachineFrameSizeInfo *SizeInfo = nullptr;
+
   /// The number of bytes of callee saved registers that the target wants to
   /// report for the current function in the CodeView S_FRAMEPROC record.
   unsigned CVBytesOfCalleeSavedRegisters = 0;
@@ -676,6 +681,13 @@ class MachineFrameInfo {
   }
   void setMaxCallFrameSize(uint64_t S) { MaxCallFrameSize = S; }
 
+  /// Return an object that can be queried for call frame sizes at specific
+  /// locations in the MachineFunction. Constructing a MachineFrameSizeInfo
+  /// object for the MachineFunction automatically makes it available via this
+  /// field during the object's lifetime.
+  MachineFrameSizeInfo *getSizeInfo() const { return SizeInfo; }
+  void setSizeInfo(MachineFrameSizeInfo *SI) { SizeInfo = SI; }
+
   /// Returns how many bytes of callee-saved registers the target pushed in the
   /// prologue. Only used for debug info.
   unsigned getCVBytesOfCalleeSavedRegisters() const {
@@ -851,7 +863,11 @@ class MachineFrameInfo {
 /// MachineBasicBlocks of a MachineFunction based on call frame setup and
 /// destroy pseudo instructions. Usually, no call frame is open at block
 /// boundaries, except if a call sequence has been split into multiple blocks.
-/// Computing this information is deferred until it is queried.
+///
+/// Computing this information is deferred until it is queried. Upon
+/// construction, a MachineFrameSizeInfo object registers itself in the
+/// MachineFunction's MachineFrameInfo (and it unregisters when destructed).
+/// While registered, it can be retrieved via MachineFrameInfo::getSizeInfo().
 ///
 /// This class assumes that call frame instructions are placed properly, i.e.,
 /// every program path hits a frame destroy of equal size after hitting a frame
@@ -859,7 +875,12 @@ class MachineFrameInfo {
 /// frame sequences are not allowed.
 class MachineFrameSizeInfo {
 public:
-  MachineFrameSizeInfo(MachineFunction &MF) : MF(MF) {}
+  MachineFrameSizeInfo(MachineFunction &MF) : MF(MF) {
+    assert(MF.getFrameInfo().getSizeInfo() == nullptr);
+    MF.getFrameInfo().setSizeInfo(this);
+  }
+
+  ~MachineFrameSizeInfo() { MF.getFrameInfo().setSizeInfo(nullptr); }
 
   /// Get the call frame size just before MI. Contains no value if MI is not in
   /// a call sequence. Zero-sized call frames are possible.
diff --git a/llvm/lib/CodeGen/FinalizeISel.cpp b/llvm/lib/CodeGen/FinalizeISel.cpp
index 477512dc6b0320..a04e0af86cddd5 100644
--- a/llvm/lib/CodeGen/FinalizeISel.cpp
+++ b/llvm/lib/CodeGen/FinalizeISel.cpp
@@ -47,6 +47,12 @@ static std::pair<bool, bool> runImpl(MachineFunction &MF) {
   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
   const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
 
+  // Pseudo-Lowering might require the sizes of call frames, so compute them
+  // (lazily). The MachineFrameSizeInfo registers itself in MF's
+  // MachineFrameInfo for the SizeInfo's lifetime and does not need to be passed
+  // explicitly.
+  const MachineFrameSizeInfo MFSI(MF);
+
   // Iterate through each instruction in the function, looking for pseudos.
   for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
     MachineBasicBlock *MBB = &*I;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index aca4d29549e9e0..4dd41a0b3287f7 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -35971,6 +35971,15 @@ X86TargetLowering::EmitLoweredTLSAddr(MachineInstr &MI,
   // inside MC, therefore without the two markers shrink-wrapping
   // may push the prologue/epilogue pass them.
   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+
+  // Do not introduce CALLSEQ markers if we are already in a call sequence.
+  // Nested call sequences are not allowed and cause errors in the machine
+  // verifier.
+  MachineFrameSizeInfo *MFSI = MI.getMF()->getFrameInfo().getSizeInfo();
+  assert(MFSI && "Call frame size information needs to be available!");
+  if (MFSI->getCallFrameSizeAt(MI).has_value())
+    return BB;
+
   const MIMetadata MIMD(MI);
   MachineFunction &MF = *BB->getParent();
 
diff --git a/llvm/test/CodeGen/X86/tls-function-argument.ll b/llvm/test/CodeGen/X86/tls-function-argument.ll
new file mode 100644
index 00000000000000..9b6ab529db3ea3
--- /dev/null
+++ b/llvm/test/CodeGen/X86/tls-function-argument.ll
@@ -0,0 +1,30 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=x86_64 -verify-machineinstrs -relocation-model=pic < %s | FileCheck %s
+
+; Passing a pointer to thread-local storage to a function can be problematic
+; since computing such addresses requires a function call that is introduced
+; very late in instruction selection. We need to ensure that we don't introduce
+; nested call sequence markers if this function call happens in a call sequence.
+
+ at TLS = internal thread_local global i64 zeroinitializer, align 8
+declare void @bar(ptr)
+define internal void @foo() {
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset %rbx, -16
+; CHECK-NEXT:    leaq TLS at TLSLD(%rip), %rdi
+; CHECK-NEXT:    callq __tls_get_addr at PLT
+; CHECK-NEXT:    leaq TLS at DTPOFF(%rax), %rbx
+; CHECK-NEXT:    movq %rbx, %rdi
+; CHECK-NEXT:    callq bar at PLT
+; CHECK-NEXT:    movq %rbx, %rdi
+; CHECK-NEXT:    callq bar at PLT
+; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+  call void @bar(ptr @TLS)
+  call void @bar(ptr @TLS)
+  ret void
+}



More information about the llvm-branch-commits mailing list