[llvm] [PowerPC] Do not merge TLS constants within PPCMergeStringPool.cpp (PR #94059)

Amy Kwan via llvm-commits llvm-commits at lists.llvm.org
Fri May 31 15:18:25 PDT 2024


https://github.com/amy-kwan created https://github.com/llvm/llvm-project/pull/94059

This patch prevents thread-local constants to be merged within PPCMergeStringPool.cpp.

The PPCMergeStringPool pass primarily merges non-thread-local constants together, and thread-local constants should not be mixed together with other (non-thread-local) constants. In the event that thread-local and other non-thread-local constants are pooled together, the llvm.threadlocal.address intrinsic can fail as it expects its argument to be a thread-local global value, but the merged string structure created by the PPCMergeStringPool pass is not thread-local as a whole.

>From 5d63733d2266abe38ee57744e263f5ea2ddb7768 Mon Sep 17 00:00:00 2001
From: Amy Kwan <amy.kwan1 at ibm.com>
Date: Fri, 31 May 2024 17:17:07 -0500
Subject: [PATCH] [PowerPC] Do not merge TLS constants within
 PPCMergeStringPool.cpp

This patch prevents thread-local constants to be merged within PPCMergeStringPool.cpp.

The PPCMergeStringPool pass primarily merges non-thread-local constants
together, and thread-local constants should not be mixed together with other
(non-thread-local) constants. In the event that thread-local and other
non-thread-local constants are pooled together, the llvm.threadlocal.address
intrinsic can fail as it expects its argument to be a thread-local global value,
but the merged string structure created by the PPCMergeStringPool pass is not
thread-local as a whole.
---
 .../lib/Target/PowerPC/PPCMergeStringPool.cpp |   5 +
 .../PowerPC/mergeable-string-pool-tls.ll      | 260 ++++++++++++++++++
 2 files changed, 265 insertions(+)
 create mode 100644 llvm/test/CodeGen/PowerPC/mergeable-string-pool-tls.ll

diff --git a/llvm/lib/Target/PowerPC/PPCMergeStringPool.cpp b/llvm/lib/Target/PowerPC/PPCMergeStringPool.cpp
index 309938accdf4c..dc7d2910aaa7e 100644
--- a/llvm/lib/Target/PowerPC/PPCMergeStringPool.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMergeStringPool.cpp
@@ -215,6 +215,11 @@ void PPCMergeStringPool::collectCandidateConstants(Module &M) {
         Global.getLinkage() != GlobalValue::InternalLinkage)
       continue;
 
+    // Do not pool thread-local constants, as the pooled strings can contain
+    // non-thread-local constants, and these should not be mixed together.
+    if (Global.isThreadLocal())
+      continue;
+
     LLVM_DEBUG(dbgs() << "Constant data of Global: ");
     LLVM_DEBUG(ConstData->dump());
     LLVM_DEBUG(dbgs() << "\n\n");
diff --git a/llvm/test/CodeGen/PowerPC/mergeable-string-pool-tls.ll b/llvm/test/CodeGen/PowerPC/mergeable-string-pool-tls.ll
new file mode 100644
index 0000000000000..3abe148dbb00d
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/mergeable-string-pool-tls.ll
@@ -0,0 +1,260 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -verify-machineinstrs -mtriple powerpc64-ibm-aix-xcoff -mcpu=pwr8 \
+; RUN:     -ppc-asm-full-reg-names < %s | FileCheck %s \
+; RUN:     --check-prefix=CHECK64
+; RUN: llc -verify-machineinstrs -mtriple powerpc-ibm-aix-xcoff -mcpu=pwr8 \
+; RUN:     -ppc-asm-full-reg-names < %s | FileCheck %s \
+; RUN:     --check-prefix=CHECK32
+; RUN: llc -verify-machineinstrs -mtriple powerpc64le-unknown-linux -mcpu=pwr8 \
+; RUN:     -ppc-asm-full-reg-names < %s | FileCheck %s \
+; RUN:     --check-prefix=LINUX64LE
+; RUN: llc -verify-machineinstrs -mtriple powerpc64-unknown-linux -mcpu=pwr8 \
+; RUN:     -ppc-asm-full-reg-names < %s | FileCheck %s \
+; RUN:     --check-prefix=LINUX64BE
+
+ at .str = private unnamed_addr constant [47 x i8] c"TLS variable 1, 2 and non-TLS var: %s, %s, %s\0A\00", align 1
+ at a = internal thread_local constant [5 x i8] c"tls1\00", align 1
+ at b = internal thread_local constant [5 x i8] c"tls2\00", align 1
+ at c = internal constant [15 x i8] c"Regular global\00", align 1
+ at d = internal constant [10 x i32] [i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10], align 4
+ at e = internal constant [4 x float] [float 0x4055F33340000000, float 0x4056333340000000, float 0x40567999A0000000, float 0x4056B33340000000], align 4
+
+declare noundef signext i32 @printf(ptr nocapture noundef readonly, ...) local_unnamed_addr #0
+declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
+declare void @callee(ptr noundef) local_unnamed_addr #3
+declare void @callee2(ptr noundef) local_unnamed_addr #3
+
+define void @print_tls_func() {
+; CHECK64-LABEL: print_tls_func:
+; CHECK64:       # %bb.0: # %entry
+; CHECK64-NEXT:    mflr r0
+; CHECK64-NEXT:    stdu r1, -112(r1)
+; CHECK64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; CHECK64-NEXT:    std r0, 128(r1)
+; CHECK64-NEXT:    ld r6, L..C1(r2) # @__ModuleStringPool
+; CHECK64-NEXT:    bla .__tls_get_mod[PR]
+; CHECK64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @a
+; CHECK64-NEXT:    ld r5, L..C3(r2) # target-flags(ppc-tlsld) @b
+; CHECK64-NEXT:    add r4, r3, r4
+; CHECK64-NEXT:    add r5, r3, r5
+; CHECK64-NEXT:    addi r3, r6, 71
+; CHECK64-NEXT:    addi r6, r6, 56
+; CHECK64-NEXT:    bl .printf[PR]
+; CHECK64-NEXT:    nop
+; CHECK64-NEXT:    addi r1, r1, 112
+; CHECK64-NEXT:    ld r0, 16(r1)
+; CHECK64-NEXT:    mtlr r0
+; CHECK64-NEXT:    blr
+;
+; CHECK32-LABEL: print_tls_func:
+; CHECK32:       # %bb.0: # %entry
+; CHECK32-NEXT:    mflr r0
+; CHECK32-NEXT:    stwu r1, -64(r1)
+; CHECK32-NEXT:    lwz r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; CHECK32-NEXT:    stw r0, 72(r1)
+; CHECK32-NEXT:    lwz r6, L..C1(r2) # @__ModuleStringPool
+; CHECK32-NEXT:    bla .__tls_get_mod[PR]
+; CHECK32-NEXT:    lwz r4, L..C2(r2) # target-flags(ppc-tlsld) @a
+; CHECK32-NEXT:    lwz r5, L..C3(r2) # target-flags(ppc-tlsld) @b
+; CHECK32-NEXT:    add r4, r3, r4
+; CHECK32-NEXT:    add r5, r3, r5
+; CHECK32-NEXT:    addi r3, r6, 71
+; CHECK32-NEXT:    addi r6, r6, 56
+; CHECK32-NEXT:    bl .printf[PR]
+; CHECK32-NEXT:    nop
+; CHECK32-NEXT:    addi r1, r1, 64
+; CHECK32-NEXT:    lwz r0, 8(r1)
+; CHECK32-NEXT:    mtlr r0
+; CHECK32-NEXT:    blr
+;
+; LINUX64LE-LABEL: print_tls_func:
+; LINUX64LE:       # %bb.0: # %entry
+; LINUX64LE-NEXT:    mflr r0
+; LINUX64LE-NEXT:    stdu r1, -96(r1)
+; LINUX64LE-NEXT:    std r0, 112(r1)
+; LINUX64LE-NEXT:    .cfi_def_cfa_offset 96
+; LINUX64LE-NEXT:    .cfi_offset lr, 16
+; LINUX64LE-NEXT:    addis r3, r13, a at tprel@ha
+; LINUX64LE-NEXT:    addi r4, r3, a at tprel@l
+; LINUX64LE-NEXT:    addis r3, r13, b at tprel@ha
+; LINUX64LE-NEXT:    addi r5, r3, b at tprel@l
+; LINUX64LE-NEXT:    addis r3, r2, .L__ModuleStringPool at toc@ha
+; LINUX64LE-NEXT:    addi r6, r3, .L__ModuleStringPool at toc@l
+; LINUX64LE-NEXT:    addi r3, r6, 71
+; LINUX64LE-NEXT:    addi r6, r6, 56
+; LINUX64LE-NEXT:    bl printf
+; LINUX64LE-NEXT:    nop
+; LINUX64LE-NEXT:    addi r1, r1, 96
+; LINUX64LE-NEXT:    ld r0, 16(r1)
+; LINUX64LE-NEXT:    mtlr r0
+; LINUX64LE-NEXT:    blr
+;
+; LINUX64BE-LABEL: print_tls_func:
+; LINUX64BE:       # %bb.0: # %entry
+; LINUX64BE-NEXT:    mflr r0
+; LINUX64BE-NEXT:    stdu r1, -128(r1)
+; LINUX64BE-NEXT:    std r0, 144(r1)
+; LINUX64BE-NEXT:    .cfi_def_cfa_offset 128
+; LINUX64BE-NEXT:    .cfi_offset lr, 16
+; LINUX64BE-NEXT:    .cfi_offset r30, -16
+; LINUX64BE-NEXT:    addis r3, r2, a at got@tlsld at ha
+; LINUX64BE-NEXT:    std r30, 112(r1) # 8-byte Folded Spill
+; LINUX64BE-NEXT:    addi r3, r3, a at got@tlsld at l
+; LINUX64BE-NEXT:    bl __tls_get_addr(a at tlsld)
+; LINUX64BE-NEXT:    nop
+; LINUX64BE-NEXT:    addis r3, r3, a at dtprel@ha
+; LINUX64BE-NEXT:    addi r30, r3, a at dtprel@l
+; LINUX64BE-NEXT:    addis r3, r2, b at got@tlsld at ha
+; LINUX64BE-NEXT:    addi r3, r3, b at got@tlsld at l
+; LINUX64BE-NEXT:    bl __tls_get_addr(b at tlsld)
+; LINUX64BE-NEXT:    nop
+; LINUX64BE-NEXT:    addis r3, r3, b at dtprel@ha
+; LINUX64BE-NEXT:    addi r5, r3, b at dtprel@l
+; LINUX64BE-NEXT:    addis r3, r2, .L__ModuleStringPool at toc@ha
+; LINUX64BE-NEXT:    addi r4, r3, .L__ModuleStringPool at toc@l
+; LINUX64BE-NEXT:    addi r3, r4, 71
+; LINUX64BE-NEXT:    addi r6, r4, 56
+; LINUX64BE-NEXT:    mr r4, r30
+; LINUX64BE-NEXT:    bl printf
+; LINUX64BE-NEXT:    nop
+; LINUX64BE-NEXT:    ld r30, 112(r1) # 8-byte Folded Reload
+; LINUX64BE-NEXT:    addi r1, r1, 128
+; LINUX64BE-NEXT:    ld r0, 16(r1)
+; LINUX64BE-NEXT:    mtlr r0
+; LINUX64BE-NEXT:    blr
+entry:
+  %0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @a)
+  %1 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @b)
+  %call = tail call signext i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str, ptr noundef nonnull %0, ptr noundef nonnull %1, ptr noundef nonnull @c)
+  ret void
+}
+
+define void @test_func() {
+; CHECK64-LABEL: test_func:
+; CHECK64:       # %bb.0: # %entry
+; CHECK64-NEXT:    mflr r0
+; CHECK64-NEXT:    stdu r1, -112(r1)
+; CHECK64-NEXT:    ld r3, L..C1(r2) # @__ModuleStringPool
+; CHECK64-NEXT:    std r0, 128(r1)
+; CHECK64-NEXT:    addi r3, r3, 16
+; CHECK64-NEXT:    bl .callee[PR]
+; CHECK64-NEXT:    nop
+; CHECK64-NEXT:    addi r1, r1, 112
+; CHECK64-NEXT:    ld r0, 16(r1)
+; CHECK64-NEXT:    mtlr r0
+; CHECK64-NEXT:    blr
+;
+; CHECK32-LABEL: test_func:
+; CHECK32:       # %bb.0: # %entry
+; CHECK32-NEXT:    mflr r0
+; CHECK32-NEXT:    stwu r1, -64(r1)
+; CHECK32-NEXT:    lwz r3, L..C1(r2) # @__ModuleStringPool
+; CHECK32-NEXT:    stw r0, 72(r1)
+; CHECK32-NEXT:    addi r3, r3, 16
+; CHECK32-NEXT:    bl .callee[PR]
+; CHECK32-NEXT:    nop
+; CHECK32-NEXT:    addi r1, r1, 64
+; CHECK32-NEXT:    lwz r0, 8(r1)
+; CHECK32-NEXT:    mtlr r0
+; CHECK32-NEXT:    blr
+;
+; LINUX64LE-LABEL: test_func:
+; LINUX64LE:       # %bb.0: # %entry
+; LINUX64LE-NEXT:    mflr r0
+; LINUX64LE-NEXT:    stdu r1, -32(r1)
+; LINUX64LE-NEXT:    std r0, 48(r1)
+; LINUX64LE-NEXT:    .cfi_def_cfa_offset 32
+; LINUX64LE-NEXT:    .cfi_offset lr, 16
+; LINUX64LE-NEXT:    addis r3, r2, .L__ModuleStringPool at toc@ha
+; LINUX64LE-NEXT:    addi r3, r3, .L__ModuleStringPool at toc@l
+; LINUX64LE-NEXT:    addi r3, r3, 16
+; LINUX64LE-NEXT:    bl callee
+; LINUX64LE-NEXT:    nop
+; LINUX64LE-NEXT:    addi r1, r1, 32
+; LINUX64LE-NEXT:    ld r0, 16(r1)
+; LINUX64LE-NEXT:    mtlr r0
+; LINUX64LE-NEXT:    blr
+;
+; LINUX64BE-LABEL: test_func:
+; LINUX64BE:       # %bb.0: # %entry
+; LINUX64BE-NEXT:    mflr r0
+; LINUX64BE-NEXT:    stdu r1, -112(r1)
+; LINUX64BE-NEXT:    std r0, 128(r1)
+; LINUX64BE-NEXT:    .cfi_def_cfa_offset 112
+; LINUX64BE-NEXT:    .cfi_offset lr, 16
+; LINUX64BE-NEXT:    addis r3, r2, .L__ModuleStringPool at toc@ha
+; LINUX64BE-NEXT:    addi r3, r3, .L__ModuleStringPool at toc@l
+; LINUX64BE-NEXT:    addi r3, r3, 16
+; LINUX64BE-NEXT:    bl callee
+; LINUX64BE-NEXT:    nop
+; LINUX64BE-NEXT:    addi r1, r1, 112
+; LINUX64BE-NEXT:    ld r0, 16(r1)
+; LINUX64BE-NEXT:    mtlr r0
+; LINUX64BE-NEXT:    blr
+entry:
+  tail call void @callee(ptr noundef nonnull @d) #4
+  ret void
+}
+
+define void @test_func2() {
+; CHECK64-LABEL: test_func2:
+; CHECK64:       # %bb.0: # %entry
+; CHECK64-NEXT:    mflr r0
+; CHECK64-NEXT:    stdu r1, -112(r1)
+; CHECK64-NEXT:    ld r3, L..C1(r2) # @__ModuleStringPool
+; CHECK64-NEXT:    std r0, 128(r1)
+; CHECK64-NEXT:    bl .callee2[PR]
+; CHECK64-NEXT:    nop
+; CHECK64-NEXT:    addi r1, r1, 112
+; CHECK64-NEXT:    ld r0, 16(r1)
+; CHECK64-NEXT:    mtlr r0
+; CHECK64-NEXT:    blr
+;
+; CHECK32-LABEL: test_func2:
+; CHECK32:       # %bb.0: # %entry
+; CHECK32-NEXT:    mflr r0
+; CHECK32-NEXT:    stwu r1, -64(r1)
+; CHECK32-NEXT:    lwz r3, L..C1(r2) # @__ModuleStringPool
+; CHECK32-NEXT:    stw r0, 72(r1)
+; CHECK32-NEXT:    bl .callee2[PR]
+; CHECK32-NEXT:    nop
+; CHECK32-NEXT:    addi r1, r1, 64
+; CHECK32-NEXT:    lwz r0, 8(r1)
+; CHECK32-NEXT:    mtlr r0
+; CHECK32-NEXT:    blr
+;
+; LINUX64LE-LABEL: test_func2:
+; LINUX64LE:       # %bb.0: # %entry
+; LINUX64LE-NEXT:    mflr r0
+; LINUX64LE-NEXT:    stdu r1, -32(r1)
+; LINUX64LE-NEXT:    std r0, 48(r1)
+; LINUX64LE-NEXT:    .cfi_def_cfa_offset 32
+; LINUX64LE-NEXT:    .cfi_offset lr, 16
+; LINUX64LE-NEXT:    addis r3, r2, .L__ModuleStringPool at toc@ha
+; LINUX64LE-NEXT:    addi r3, r3, .L__ModuleStringPool at toc@l
+; LINUX64LE-NEXT:    bl callee2
+; LINUX64LE-NEXT:    nop
+; LINUX64LE-NEXT:    addi r1, r1, 32
+; LINUX64LE-NEXT:    ld r0, 16(r1)
+; LINUX64LE-NEXT:    mtlr r0
+; LINUX64LE-NEXT:    blr
+;
+; LINUX64BE-LABEL: test_func2:
+; LINUX64BE:       # %bb.0: # %entry
+; LINUX64BE-NEXT:    mflr r0
+; LINUX64BE-NEXT:    stdu r1, -112(r1)
+; LINUX64BE-NEXT:    std r0, 128(r1)
+; LINUX64BE-NEXT:    .cfi_def_cfa_offset 112
+; LINUX64BE-NEXT:    .cfi_offset lr, 16
+; LINUX64BE-NEXT:    addis r3, r2, .L__ModuleStringPool at toc@ha
+; LINUX64BE-NEXT:    addi r3, r3, .L__ModuleStringPool at toc@l
+; LINUX64BE-NEXT:    bl callee2
+; LINUX64BE-NEXT:    nop
+; LINUX64BE-NEXT:    addi r1, r1, 112
+; LINUX64BE-NEXT:    ld r0, 16(r1)
+; LINUX64BE-NEXT:    mtlr r0
+; LINUX64BE-NEXT:    blr
+entry:
+  tail call void @callee2(ptr noundef nonnull @e) #4
+  ret void
+}



More information about the llvm-commits mailing list