[llvm] [Sparc] Remove bogus stack adjustment for LD/GD TLS (PR #149890)

Jessica Clarke via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 21 14:55:28 PDT 2025


https://github.com/jrtc27 updated https://github.com/llvm/llvm-project/pull/149890

>From f95d2b243668c63511cc26b5876b30cbfaff3ad2 Mon Sep 17 00:00:00 2001
From: Jessica Clarke <jrtc27 at jrtc27.com>
Date: Mon, 21 Jul 2025 20:58:16 +0100
Subject: [PATCH 1/3] [NFC][Sparc] Pre-commit a test showing inefficient and
 broken LD/GD TLS

---
 llvm/test/CodeGen/SPARC/tls-sp.ll | 111 ++++++++++++++++++++++++++++++
 1 file changed, 111 insertions(+)
 create mode 100644 llvm/test/CodeGen/SPARC/tls-sp.ll

diff --git a/llvm/test/CodeGen/SPARC/tls-sp.ll b/llvm/test/CodeGen/SPARC/tls-sp.ll
new file mode 100644
index 0000000000000..928d1dcbe8300
--- /dev/null
+++ b/llvm/test/CodeGen/SPARC/tls-sp.ll
@@ -0,0 +1,111 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=sparc -relocation-model=pic < %s | FileCheck --check-prefix=SPARC %s
+; RUN: llc -mtriple=sparc64 -relocation-model=pic < %s | FileCheck --check-prefix=SPARC64 %s
+
+;; TODO: Fix the code generation for these functions.
+
+ at x = external thread_local global i8
+
+;; Test that we don't over-allocate stack space when calling __tls_get_addr
+;; with the call frame pseudos able to be eliminated.
+define ptr @no_alloca() nounwind {
+; SPARC-LABEL: no_alloca:
+; SPARC:       ! %bb.0: ! %entry
+; SPARC-NEXT:    save %sp, -96, %sp
+; SPARC-NEXT:  .Ltmp0:
+; SPARC-NEXT:    call .Ltmp1
+; SPARC-NEXT:  .Ltmp2:
+; SPARC-NEXT:    sethi %hi(_GLOBAL_OFFSET_TABLE_+(.Ltmp2-.Ltmp0)), %i0
+; SPARC-NEXT:  .Ltmp1:
+; SPARC-NEXT:    or %i0, %lo(_GLOBAL_OFFSET_TABLE_+(.Ltmp1-.Ltmp0)), %i0
+; SPARC-NEXT:    add %i0, %o7, %i0
+; SPARC-NEXT:    sethi %tgd_hi22(x), %i1
+; SPARC-NEXT:    add %i1, %tgd_lo10(x), %i1
+; SPARC-NEXT:    add %i0, %i1, %o0, %tgd_add(x)
+; SPARC-NEXT:    call __tls_get_addr, %tgd_call(x)
+; SPARC-NEXT:    nop
+; SPARC-NEXT:    ret
+; SPARC-NEXT:    restore %g0, %o0, %o0
+;
+; SPARC64-LABEL: no_alloca:
+; SPARC64:       ! %bb.0: ! %entry
+; SPARC64-NEXT:    save %sp, -144, %sp
+; SPARC64-NEXT:  .Ltmp0:
+; SPARC64-NEXT:    rd %pc, %o7
+; SPARC64-NEXT:  .Ltmp2:
+; SPARC64-NEXT:    sethi %hi(_GLOBAL_OFFSET_TABLE_+(.Ltmp2-.Ltmp0)), %i0
+; SPARC64-NEXT:  .Ltmp1:
+; SPARC64-NEXT:    or %i0, %lo(_GLOBAL_OFFSET_TABLE_+(.Ltmp1-.Ltmp0)), %i0
+; SPARC64-NEXT:    add %i0, %o7, %i0
+; SPARC64-NEXT:    sethi %tgd_hi22(x), %i1
+; SPARC64-NEXT:    add %i1, %tgd_lo10(x), %i1
+; SPARC64-NEXT:    add %i0, %i1, %o0, %tgd_add(x)
+; SPARC64-NEXT:    call __tls_get_addr, %tgd_call(x)
+; SPARC64-NEXT:    nop
+; SPARC64-NEXT:    ret
+; SPARC64-NEXT:    restore %g0, %o0, %o0
+entry:
+  %0 = call ptr @llvm.threadlocal.address.p0(ptr @x)
+  ret ptr %0
+}
+
+;; Test that %sp is valid for the call to __tls_get_addr. We store to a dynamic
+;; alloca in order to prevent eliminating any call frame pseudos from the call.
+define ptr @dynamic_alloca(i64 %n) nounwind {
+; SPARC-LABEL: dynamic_alloca:
+; SPARC:       ! %bb.0: ! %entry
+; SPARC-NEXT:    save %sp, -96, %sp
+; SPARC-NEXT:  .Ltmp3:
+; SPARC-NEXT:    call .Ltmp4
+; SPARC-NEXT:  .Ltmp5:
+; SPARC-NEXT:    sethi %hi(_GLOBAL_OFFSET_TABLE_+(.Ltmp5-.Ltmp3)), %i0
+; SPARC-NEXT:  .Ltmp4:
+; SPARC-NEXT:    or %i0, %lo(_GLOBAL_OFFSET_TABLE_+(.Ltmp4-.Ltmp3)), %i0
+; SPARC-NEXT:    add %i0, %o7, %i0
+; SPARC-NEXT:    add %sp, -1, %sp
+; SPARC-NEXT:    sethi %tgd_hi22(x), %i2
+; SPARC-NEXT:    add %i2, %tgd_lo10(x), %i2
+; SPARC-NEXT:    add %i0, %i2, %o0, %tgd_add(x)
+; SPARC-NEXT:    call __tls_get_addr, %tgd_call(x)
+; SPARC-NEXT:    nop
+; SPARC-NEXT:    add %sp, 1, %sp
+; SPARC-NEXT:    add %i1, 7, %i0
+; SPARC-NEXT:    and %i0, -8, %i0
+; SPARC-NEXT:    sub %sp, %i0, %i0
+; SPARC-NEXT:    add %i0, -8, %sp
+; SPARC-NEXT:    mov 1, %i1
+; SPARC-NEXT:    stb %i1, [%i0+88]
+; SPARC-NEXT:    ret
+; SPARC-NEXT:    restore %g0, %o0, %o0
+;
+; SPARC64-LABEL: dynamic_alloca:
+; SPARC64:       ! %bb.0: ! %entry
+; SPARC64-NEXT:    save %sp, -128, %sp
+; SPARC64-NEXT:  .Ltmp3:
+; SPARC64-NEXT:    rd %pc, %o7
+; SPARC64-NEXT:  .Ltmp5:
+; SPARC64-NEXT:    sethi %hi(_GLOBAL_OFFSET_TABLE_+(.Ltmp5-.Ltmp3)), %i1
+; SPARC64-NEXT:  .Ltmp4:
+; SPARC64-NEXT:    or %i1, %lo(_GLOBAL_OFFSET_TABLE_+(.Ltmp4-.Ltmp3)), %i1
+; SPARC64-NEXT:    add %i1, %o7, %i1
+; SPARC64-NEXT:    add %sp, -1, %sp
+; SPARC64-NEXT:    sethi %tgd_hi22(x), %i2
+; SPARC64-NEXT:    add %i2, %tgd_lo10(x), %i2
+; SPARC64-NEXT:    add %i1, %i2, %o0, %tgd_add(x)
+; SPARC64-NEXT:    call __tls_get_addr, %tgd_call(x)
+; SPARC64-NEXT:    nop
+; SPARC64-NEXT:    add %sp, 1, %sp
+; SPARC64-NEXT:    add %i0, 15, %i0
+; SPARC64-NEXT:    and %i0, -16, %i0
+; SPARC64-NEXT:    sub %sp, %i0, %i0
+; SPARC64-NEXT:    mov %i0, %sp
+; SPARC64-NEXT:    mov 1, %i1
+; SPARC64-NEXT:    stb %i1, [%i0+2175]
+; SPARC64-NEXT:    ret
+; SPARC64-NEXT:    restore %g0, %o0, %o0
+entry:
+  %0 = call ptr @llvm.threadlocal.address.p0(ptr @x)
+  %1 = alloca i8, i64 %n
+  store i8 1, ptr %1
+  ret ptr %0
+}

>From 383b78d8eac0a95e4f19e08182b5ea9e4ea14b54 Mon Sep 17 00:00:00 2001
From: Jessica Clarke <jrtc27 at jrtc27.com>
Date: Mon, 21 Jul 2025 20:59:30 +0100
Subject: [PATCH 2/3] [Sparc] Remove bogus stack adjustment for LD/GD TLS

This argument is the number of bytes to adjust the stack by for the
duration of the call. In most cases, PEI is able to eliminate the
corresponding call frame pseudos, folding them into the initial stack
frame allocation (rounded up to stack alignment), where it just ends up
allocating more space than needed. However, in the rare case where this
cannot be done, e.g. due to the use of a dynamic alloca, the 1 byte
stack adjustment persists and results in a misaligned stack for the
duration of the call. This has been the case ever since TLS support was
added in cb1dca602c43 ("[Sparc] Add support for TLS in sparc."), and I
can only assume that 1 was used erroneously thinking that it is the
number of arguments (as there is 1 register argument for the call), not
the number of bytes for on-stack arguments.

Fixes: https://github.com/llvm/llvm-project/issues/149808
---
 llvm/lib/Target/Sparc/SparcISelLowering.cpp | 4 ++--
 llvm/test/CodeGen/SPARC/tls-sp.ll           | 6 +-----
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
index 21dbe8f585b3e..7a28e49fc6481 100644
--- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -2221,7 +2221,7 @@ SDValue SparcTargetLowering::LowerGlobalTLSAddress(SDValue Op,
     SDValue Chain = DAG.getEntryNode();
     SDValue InGlue;
 
-    Chain = DAG.getCALLSEQ_START(Chain, 1, 0, DL);
+    Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL);
     Chain = DAG.getCopyToReg(Chain, DL, SP::O0, Argument, InGlue);
     InGlue = Chain.getValue(1);
     SDValue Callee = DAG.getTargetExternalSymbol("__tls_get_addr", PtrVT);
@@ -2239,7 +2239,7 @@ SDValue SparcTargetLowering::LowerGlobalTLSAddress(SDValue Op,
                      InGlue};
     Chain = DAG.getNode(SPISD::TLS_CALL, DL, NodeTys, Ops);
     InGlue = Chain.getValue(1);
-    Chain = DAG.getCALLSEQ_END(Chain, 1, 0, InGlue, DL);
+    Chain = DAG.getCALLSEQ_END(Chain, 0, 0, InGlue, DL);
     InGlue = Chain.getValue(1);
     SDValue Ret = DAG.getCopyFromReg(Chain, DL, SP::O0, PtrVT, InGlue);
 
diff --git a/llvm/test/CodeGen/SPARC/tls-sp.ll b/llvm/test/CodeGen/SPARC/tls-sp.ll
index 928d1dcbe8300..11325e5f38770 100644
--- a/llvm/test/CodeGen/SPARC/tls-sp.ll
+++ b/llvm/test/CodeGen/SPARC/tls-sp.ll
@@ -29,7 +29,7 @@ define ptr @no_alloca() nounwind {
 ;
 ; SPARC64-LABEL: no_alloca:
 ; SPARC64:       ! %bb.0: ! %entry
-; SPARC64-NEXT:    save %sp, -144, %sp
+; SPARC64-NEXT:    save %sp, -128, %sp
 ; SPARC64-NEXT:  .Ltmp0:
 ; SPARC64-NEXT:    rd %pc, %o7
 ; SPARC64-NEXT:  .Ltmp2:
@@ -62,13 +62,11 @@ define ptr @dynamic_alloca(i64 %n) nounwind {
 ; SPARC-NEXT:  .Ltmp4:
 ; SPARC-NEXT:    or %i0, %lo(_GLOBAL_OFFSET_TABLE_+(.Ltmp4-.Ltmp3)), %i0
 ; SPARC-NEXT:    add %i0, %o7, %i0
-; SPARC-NEXT:    add %sp, -1, %sp
 ; SPARC-NEXT:    sethi %tgd_hi22(x), %i2
 ; SPARC-NEXT:    add %i2, %tgd_lo10(x), %i2
 ; SPARC-NEXT:    add %i0, %i2, %o0, %tgd_add(x)
 ; SPARC-NEXT:    call __tls_get_addr, %tgd_call(x)
 ; SPARC-NEXT:    nop
-; SPARC-NEXT:    add %sp, 1, %sp
 ; SPARC-NEXT:    add %i1, 7, %i0
 ; SPARC-NEXT:    and %i0, -8, %i0
 ; SPARC-NEXT:    sub %sp, %i0, %i0
@@ -88,13 +86,11 @@ define ptr @dynamic_alloca(i64 %n) nounwind {
 ; SPARC64-NEXT:  .Ltmp4:
 ; SPARC64-NEXT:    or %i1, %lo(_GLOBAL_OFFSET_TABLE_+(.Ltmp4-.Ltmp3)), %i1
 ; SPARC64-NEXT:    add %i1, %o7, %i1
-; SPARC64-NEXT:    add %sp, -1, %sp
 ; SPARC64-NEXT:    sethi %tgd_hi22(x), %i2
 ; SPARC64-NEXT:    add %i2, %tgd_lo10(x), %i2
 ; SPARC64-NEXT:    add %i1, %i2, %o0, %tgd_add(x)
 ; SPARC64-NEXT:    call __tls_get_addr, %tgd_call(x)
 ; SPARC64-NEXT:    nop
-; SPARC64-NEXT:    add %sp, 1, %sp
 ; SPARC64-NEXT:    add %i0, 15, %i0
 ; SPARC64-NEXT:    and %i0, -16, %i0
 ; SPARC64-NEXT:    sub %sp, %i0, %i0

>From da8b9253b3a14b025d506579e1731cfad46a416b Mon Sep 17 00:00:00 2001
From: Jessica Clarke <jrtc27 at jrtc27.com>
Date: Mon, 21 Jul 2025 22:55:14 +0100
Subject: [PATCH 3/3] Remove fixed TODO

---
 llvm/test/CodeGen/SPARC/tls-sp.ll | 2 --
 1 file changed, 2 deletions(-)

diff --git a/llvm/test/CodeGen/SPARC/tls-sp.ll b/llvm/test/CodeGen/SPARC/tls-sp.ll
index 11325e5f38770..de9af01398d23 100644
--- a/llvm/test/CodeGen/SPARC/tls-sp.ll
+++ b/llvm/test/CodeGen/SPARC/tls-sp.ll
@@ -2,8 +2,6 @@
 ; RUN: llc -mtriple=sparc -relocation-model=pic < %s | FileCheck --check-prefix=SPARC %s
 ; RUN: llc -mtriple=sparc64 -relocation-model=pic < %s | FileCheck --check-prefix=SPARC64 %s
 
-;; TODO: Fix the code generation for these functions.
-
 @x = external thread_local global i8
 
 ;; Test that we don't over-allocate stack space when calling __tls_get_addr



More information about the llvm-commits mailing list