[llvm] [PowerPC][NFC] Add base test case for small-local-dynamic-tls on AIX (PR #84711)

Felix via llvm-commits llvm-commits at lists.llvm.org
Sun Mar 10 20:40:53 PDT 2024


https://github.com/orcguru created https://github.com/llvm/llvm-project/pull/84711

None

>From 2af2a4c0ef58c17a98a50b784fa2add25cd54ab4 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Sun, 10 Mar 2024 23:38:27 -0400
Subject: [PATCH] [PowerPC][NFC] Add base test case for small-local-dynamic-tls
 on AIX

---
 .../aix-small-local-dynamic-tls-char.ll       |  336 ++++++
 .../aix-small-local-dynamic-tls-double.ll     |  339 ++++++
 .../aix-small-local-dynamic-tls-float.ll      |  339 ++++++
 .../aix-small-local-dynamic-tls-int.ll        |  375 ++++++
 ...aix-small-local-dynamic-tls-largeaccess.ll |  363 ++++++
 .../aix-small-local-dynamic-tls-longlong.ll   |  368 ++++++
 .../aix-small-local-dynamic-tls-short.ll      |  336 ++++++
 .../CodeGen/PowerPC/aix-tls-ld-ldst-O0.ll     | 1031 +++++++++++++++++
 8 files changed, 3487 insertions(+)
 create mode 100644 llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-char.ll
 create mode 100644 llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-double.ll
 create mode 100644 llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-float.ll
 create mode 100644 llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-int.ll
 create mode 100644 llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-largeaccess.ll
 create mode 100644 llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-longlong.ll
 create mode 100644 llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-short.ll
 create mode 100644 llvm/test/CodeGen/PowerPC/aix-tls-ld-ldst-O0.ll

diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-char.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-char.ll
new file mode 100644
index 00000000000000..6fb8683330a303
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-char.ll
@@ -0,0 +1,336 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff < %s \
+; RUN:      | FileCheck %s --check-prefix=SMALL-LOCAL-DYNAMIC-SMALLCM64
+; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff --code-model=large \
+; RUN:      < %s | FileCheck %s \
+; RUN:      --check-prefix=SMALL-LOCAL-DYNAMIC-LARGECM64
+
+ at ThreadLocalVarInit = thread_local(localdynamic) global i8 1, align 1
+ at VarInit = local_unnamed_addr global i8 87, align 1
+ at IThreadLocalVarInit = internal thread_local(localdynamic) global i8 1, align 1
+declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
+ at c = thread_local(localdynamic) global [87 x i8] zeroinitializer, align 1
+
+define nonnull ptr @AddrTest1() local_unnamed_addr #0 {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: AddrTest1:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C1(r2) # target-flags(ppc-tlsld) @c
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r3, r3, 1
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: AddrTest1:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C1 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C1 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r3, r3, 1
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @c)
+  %arrayidx = getelementptr inbounds [87 x i8], ptr %0, i64 0, i64 1
+  ret ptr %arrayidx
+}
+
+define void @storeITLInit(i8 noundef zeroext %x) {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: storeITLInit:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mr r6, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stbx r6, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: storeITLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mr r6, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r7, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r7)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stbx r6, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @IThreadLocalVarInit)
+  store i8 %x, ptr %0, align 1
+  ret void
+}
+
+define void @storeTLInit(i8 noundef zeroext %x) {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: storeTLInit:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mr r6, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stbx r6, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: storeTLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mr r6, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r7, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r7)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stbx r6, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @ThreadLocalVarInit)
+  store i8 %x, ptr %0, align 1
+  ret void
+}
+
+define zeroext i8 @loadITLInit() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadITLInit:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lbzx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadITLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lbzx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @IThreadLocalVarInit)
+  %1 = load i8, ptr %0, align 1
+  ret i8 %1
+}
+
+define zeroext i8 @loadITLInit2() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadITLInit2:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lbzx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C4(r2) # @VarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lbz r4, 0(r4)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r4, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    clrldi r3, r3, 56
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadITLInit2:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lbzx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r4, L..C4 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C4 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lbz r4, 0(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r4, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    clrldi r3, r3, 56
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @IThreadLocalVarInit)
+  %1 = load i8, ptr %0, align 1
+  %2 = load i8, ptr @VarInit, align 1
+  %add = add i8 %2, %1
+  ret i8 %add
+}
+
+define zeroext i8 @loadTLInit() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadTLInit:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lbzx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadTLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lbzx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @ThreadLocalVarInit)
+  %1 = load i8, ptr %0, align 1
+  ret i8 %1
+}
+
+define zeroext i8 @loadTLInit2() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadTLInit2:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lbzx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C4(r2) # @VarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lbz r4, 0(r4)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r4, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    clrldi r3, r3, 56
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadTLInit2:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lbzx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r4, L..C4 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C4 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lbz r4, 0(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r4, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    clrldi r3, r3, 56
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @ThreadLocalVarInit)
+  %1 = load i8, ptr %0, align 1
+  %2 = load i8, ptr @VarInit, align 1
+  %add = add i8 %2, %1
+  ret i8 %add
+}
+
+define void @loadStore1(i8 noundef zeroext %x) {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadStore1:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lbzx r5, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r5, r5, 9
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stbx r5, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadStore1:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lbzx r5, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r5, r5, 9
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stbx r5, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @IThreadLocalVarInit)
+  %1 = load i8, ptr %0, align 1
+  %add = add i8 %1, 9
+  store i8 %add, ptr %0, align 1
+  ret void
+}
diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-double.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-double.ll
new file mode 100644
index 00000000000000..3dfc0aa6d4aff1
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-double.ll
@@ -0,0 +1,339 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff < %s \
+; RUN:      | FileCheck %s --check-prefix=SMALL-LOCAL-DYNAMIC-SMALLCM64
+; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff --code-model=large \
+; RUN:      < %s | FileCheck %s \
+; RUN:      --check-prefix=SMALL-LOCAL-DYNAMIC-LARGECM64
+
+ at ThreadLocalVarInit = thread_local(localdynamic) global double 1.000000e+00, align 8
+ at VarInit = local_unnamed_addr global double 8.700000e+01, align 8
+ at IThreadLocalVarInit = internal thread_local(localdynamic) global double 1.000000e+00, align 8
+declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
+ at f = thread_local(localdynamic) global [87 x double] zeroinitializer, align 8
+
+define nonnull ptr @AddrTest1() local_unnamed_addr #0 {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: AddrTest1:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C1(r2) # target-flags(ppc-tlsld) @f
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r3, r3, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: AddrTest1:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C1 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C1 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r3, r3, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @f)
+  %arrayidx = getelementptr inbounds [87 x double], ptr %0, i64 0, i64 6
+  ret ptr %arrayidx
+}
+
+define void @storeITLInit(double noundef %x) {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: storeITLInit:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stfdx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: storeITLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stfdx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit)
+  store double %x, ptr %0, align 8
+  ret void
+}
+
+define void @storeTLInit(double noundef %x) {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: storeTLInit:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stfdx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: storeTLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stfdx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit)
+  store double %x, ptr %0, align 8
+  ret void
+}
+
+define double @loadITLInit() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadITLInit:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfdx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadITLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfdx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit)
+  %1 = load double, ptr %0, align 8
+  ret double %1
+}
+
+define double @loadITLInit2() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadITLInit2:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfdx f0, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C4(r2) # @VarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfd f1, 0(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    xsadddp f1, f0, f1
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadITLInit2:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfdx f0, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C4 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C4 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfd f1, 0(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    xsadddp f1, f0, f1
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit)
+  %1 = load double, ptr %0, align 8
+  %2 = load double, ptr @VarInit, align 8
+  %add = fadd double %1, %2
+  ret double %add
+}
+
+define double @loadTLInit() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadTLInit:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfdx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadTLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfdx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit)
+  %1 = load double, ptr %0, align 8
+  ret double %1
+}
+
+define double @loadTLInit2() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadTLInit2:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfdx f0, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C4(r2) # @VarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfd f1, 0(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    xsadddp f1, f0, f1
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadTLInit2:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfdx f0, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C4 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C4 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfd f1, 0(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    xsadddp f1, f0, f1
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit)
+  %1 = load double, ptr %0, align 8
+  %2 = load double, ptr @VarInit, align 8
+  %add = fadd double %1, %2
+  ret double %add
+}
+
+define void @loadStore1(double noundef %x) {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadStore1:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    vspltisw v2, 1
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    vspltisw v3, 8
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    xvcvsxwdp vs0, vs34
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfdx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    xsadddp f0, f1, f0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    xvcvsxwdp vs1, vs35
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    xsadddp f0, f0, f1
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stfdx f0, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadStore1:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    vspltisw v2, 1
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    vspltisw v3, 8
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    xvcvsxwdp vs0, vs34
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfdx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    xsadddp f0, f1, f0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    xvcvsxwdp vs1, vs35
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    xsadddp f0, f0, f1
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stfdx f0, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit)
+  %1 = load double, ptr %0, align 8
+  %inc = fadd double %1, 1.000000e+00
+  %add = fadd double %inc, 8.000000e+00
+  store double %add, ptr %0, align 8
+  ret void
+}
diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-float.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-float.ll
new file mode 100644
index 00000000000000..69a4baa1d19bb8
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-float.ll
@@ -0,0 +1,339 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff < %s \
+; RUN:      | FileCheck %s --check-prefix=SMALL-LOCAL-DYNAMIC-SMALLCM64
+; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff --code-model=large \
+; RUN:      < %s | FileCheck %s \
+; RUN:      --check-prefix=SMALL-LOCAL-DYNAMIC-LARGECM64
+
+ at ThreadLocalVarInit = thread_local(localdynamic) global float 1.000000e+00, align 4
+ at VarInit = local_unnamed_addr global float 8.700000e+01, align 4
+ at IThreadLocalVarInit = internal thread_local(localdynamic) global float 1.000000e+00, align 4
+declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
+ at e = thread_local(localdynamic) global [87 x float] zeroinitializer, align 4
+
+define nonnull ptr @AddrTest1() local_unnamed_addr #0 {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: AddrTest1:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C1(r2) # target-flags(ppc-tlsld) @e
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r3, r3, 16
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: AddrTest1:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C1 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C1 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r3, r3, 16
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @e)
+  %arrayidx = getelementptr inbounds [87 x float], ptr %0, i64 0, i64 4
+  ret ptr %arrayidx
+}
+
+define void @storeITLInit(float noundef %x) {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: storeITLInit:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stfsx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: storeITLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stfsx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit)
+  store float %x, ptr %0, align 4
+  ret void
+}
+
+define void @storeTLInit(float noundef %x) {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: storeTLInit:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stfsx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: storeTLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stfsx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit)
+  store float %x, ptr %0, align 4
+  ret void
+}
+
+define float @loadITLInit() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadITLInit:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfsx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadITLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfsx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit)
+  %1 = load float, ptr %0, align 4
+  ret float %1
+}
+
+define float @loadITLInit2() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadITLInit2:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfsx f0, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C4(r2) # @VarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfs f1, 0(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    fadds f1, f0, f1
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadITLInit2:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfsx f0, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C4 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C4 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfs f1, 0(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    fadds f1, f0, f1
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit)
+  %1 = load float, ptr %0, align 4
+  %2 = load float, ptr @VarInit, align 4
+  %add = fadd float %1, %2
+  ret float %add
+}
+
+define float @loadTLInit() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadTLInit:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfsx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadTLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfsx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit)
+  %1 = load float, ptr %0, align 4
+  ret float %1
+}
+
+define float @loadTLInit2() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadTLInit2:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfsx f0, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C4(r2) # @VarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfs f1, 0(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    fadds f1, f0, f1
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadTLInit2:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfsx f0, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C4 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C4 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfs f1, 0(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    fadds f1, f0, f1
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit)
+  %1 = load float, ptr %0, align 4
+  %2 = load float, ptr @VarInit, align 4
+  %add = fadd float %1, %2
+  ret float %add
+}
+
+define void @loadStore1(float noundef %x) {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadStore1:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    vspltisw v2, 1
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    vspltisw v3, 8
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    xvcvsxwdp vs0, vs34
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfsx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    fadds f0, f1, f0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    xvcvsxwdp vs1, vs35
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    fadds f0, f0, f1
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stfsx f0, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadStore1:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    vspltisw v2, 1
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    vspltisw v3, 8
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    xvcvsxwdp vs0, vs34
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfsx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    fadds f0, f1, f0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    xvcvsxwdp vs1, vs35
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    fadds f0, f0, f1
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stfsx f0, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit)
+  %1 = load float, ptr %0, align 4
+  %inc = fadd float %1, 1.000000e+00
+  %add = fadd float %inc, 8.000000e+00
+  store float %add, ptr %0, align 4
+  ret void
+}
diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-int.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-int.ll
new file mode 100644
index 00000000000000..ff397169208f46
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-int.ll
@@ -0,0 +1,375 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff < %s \
+; RUN:      | FileCheck %s --check-prefix=SMALL-LOCAL-DYNAMIC-SMALLCM64
+; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff --code-model=large \
+; RUN:      < %s | FileCheck %s \
+; RUN:      --check-prefix=SMALL-LOCAL-DYNAMIC-LARGECM64
+
+ at ThreadLocalVarInit = thread_local(localdynamic) global i32 1, align 4
+ at VarInit = local_unnamed_addr global i32 87, align 4
+ at IThreadLocalVarInit = internal thread_local(localdynamic) global i32 1, align 4
+declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
+%struct.anon = type { i32 }
+ at ThreadLocalStruct = thread_local(localdynamic) global %struct.anon zeroinitializer, align 1
+ at a = thread_local(localdynamic) global [87 x i32] zeroinitializer, align 4
+
+define nonnull ptr @AddrTest1() local_unnamed_addr #0 {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: AddrTest1:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C1(r2) # target-flags(ppc-tlsld) @a
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r3, r3, 12
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: AddrTest1:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C1 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C1 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r3, r3, 12
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @a)
+  %arrayidx = getelementptr inbounds [87 x i32], ptr %0, i64 0, i64 3
+  ret ptr %arrayidx
+}
+
+define signext i32 @testUnaligned() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: testUnaligned:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @ThreadLocalStruct
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwax r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: testUnaligned:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwax r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @ThreadLocalStruct)
+  %x = getelementptr inbounds %struct.anon, ptr %0, i32 0, i32 0
+  %1 = load i32, ptr %x, align 1
+  ret i32 %1
+}
+
+define void @storeITLInit(i32 noundef signext %x) {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: storeITLInit:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mr r6, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stwx r6, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: storeITLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mr r6, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r7, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r7)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stwx r6, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit)
+  store i32 %x, ptr %0, align 4
+  ret void
+}
+
+define void @storeTLInit(i32 noundef signext %x) {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: storeTLInit:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mr r6, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C4(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stwx r6, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: storeTLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mr r6, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r7, L..C4 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C4 at l(r7)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stwx r6, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit)
+  store i32 %x, ptr %0, align 4
+  ret void
+}
+
+define signext i32 @loadITLInit() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadITLInit:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwax r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadITLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwax r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit)
+  %1 = load i32, ptr %0, align 4
+  ret i32 %1
+}
+
+define signext i32 @loadITLInit2() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadITLInit2:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwzx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C5(r2) # @VarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwz r4, 0(r4)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r4, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    extsw r3, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadITLInit2:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwzx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r4, L..C5 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C5 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwz r4, 0(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r4, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    extsw r3, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit)
+  %1 = load i32, ptr %0, align 4
+  %2 = load i32, ptr @VarInit, align 4
+  %add = add nsw i32 %2, %1
+  ret i32 %add
+}
+
+define signext i32 @loadTLInit() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadTLInit:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C4(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwax r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadTLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C4 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C4 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwax r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit)
+  %1 = load i32, ptr %0, align 4
+  ret i32 %1
+}
+
+define signext i32 @loadTLInit2() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadTLInit2:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C4(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwzx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C5(r2) # @VarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwz r4, 0(r4)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r4, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    extsw r3, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadTLInit2:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C4 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C4 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwzx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r4, L..C5 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C5 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwz r4, 0(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r4, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    extsw r3, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit)
+  %1 = load i32, ptr %0, align 4
+  %2 = load i32, ptr @VarInit, align 4
+  %add = add nsw i32 %2, %1
+  ret i32 %add
+}
+
+define void @loadStore1(i32 noundef signext %x) {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadStore1:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwzx r5, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r5, r5, 9
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stwx r5, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadStore1:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwzx r5, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r5, r5, 9
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stwx r5, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit)
+  %1 = load i32, ptr %0, align 4
+  %add = add nsw i32 %1, 9
+  store i32 %add, ptr %0, align 4
+  ret void
+}
diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-largeaccess.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-largeaccess.ll
new file mode 100644
index 00000000000000..643ad01a9ae3f2
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-largeaccess.ll
@@ -0,0 +1,363 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff < %s \
+; RUN:      | FileCheck %s --check-prefix=SMALL-LOCAL-DYNAMIC-SMALLCM64
+; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff --code-model=large \
+; RUN:      < %s | FileCheck %s \
+; RUN:      --check-prefix=SMALL-LOCAL-DYNAMIC-LARGECM64
+
+; Test disassembly of object.
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff -xcoff-traceback-table=false \
+; RUN:      --code-model=large -filetype=obj -o %t.o < %s
+; RUN: llvm-objdump -D -r --symbol-description %t.o | FileCheck -D#NFA=2 --check-prefix=DIS %s
+
+ at mySmallLocalDynamicTLSv1 = thread_local(localdynamic) global [8187 x i32] zeroinitializer, align 4
+ at mySmallLocalDynamicTLS2 = thread_local(localdynamic) global [4000 x i32] zeroinitializer, align 4
+ at mySmallLocalDynamicTLS3 = thread_local(localdynamic) global [4000 x i32] zeroinitializer, align 4
+ at mySmallLocalDynamicTLS4 = thread_local(localdynamic) global [4000 x i32] zeroinitializer, align 4
+ at mySmallLocalDynamicTLS5 = thread_local(localdynamic) global [4000 x i32] zeroinitializer, align 4
+ at mySmallLocalDynamicTLSv2 = thread_local(localdynamic) global [9000 x i32] zeroinitializer, align 4
+declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
+
+; All accesses use a "faster" local-dynamic sequence directly off the thread pointer.
+define signext i32 @StoreArrays1() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: StoreArrays1:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r6, L..C1(r2) # target-flags(ppc-tlsld) @mySmallLocalDynamicTLS2
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r7, L..C2(r2) # target-flags(ppc-tlsld) @mySmallLocalDynamicTLS3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r8, L..C3(r2) # target-flags(ppc-tlsld) @mySmallLocalDynamicTLS4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r9, L..C4(r2) # target-flags(ppc-tlsld) @mySmallLocalDynamicTLS5
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r5, L..C5(r2) # target-flags(ppc-tlsld) @mySmallLocalDynamicTLSv1
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 1
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r6, r3, r6
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r7, r3, r7
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r8, r3, r8
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r9, r3, r9
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stwux r4, r3, r5
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r4, 24(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 2
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r3, 320(r6)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r3, 324(r7)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 88
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r4, 328(r8)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r3, 332(r9)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 102
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: StoreArrays1:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C1 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r7, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r8, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r9, L..C4 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r7, L..C2 at l(r7)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r8, L..C3 at l(r8)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r9, L..C4 at l(r9)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r5, L..C1 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C5 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 1
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r6, L..C5 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r7, r3, r7
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r8, r3, r8
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r9, r3, r9
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r6, r3, r6
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stwux r4, r3, r5
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r4, 24(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 2
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r3, 320(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r3, 324(r7)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 88
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r4, 328(r8)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r3, 332(r9)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 102
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalDynamicTLSv1)
+  store i32 1, ptr %0, align 4
+  %arrayidx1 = getelementptr inbounds [8187 x i32], ptr %0, i64 0, i64 6
+  store i32 4, ptr %arrayidx1, align 4
+  %1 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalDynamicTLS2)
+  %arrayidx2 = getelementptr inbounds [4000 x i32], ptr %1, i64 0, i64 80
+  store i32 2, ptr %arrayidx2, align 4
+  %2 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalDynamicTLS3)
+  %arrayidx3 = getelementptr inbounds [4000 x i32], ptr %2, i64 0, i64 81
+  store i32 3, ptr %arrayidx3, align 4
+  %3 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalDynamicTLS4)
+  %arrayidx4 = getelementptr inbounds [4000 x i32], ptr %3, i64 0, i64 82
+  store i32 4, ptr %arrayidx4, align 4
+  %4 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalDynamicTLS5)
+  %arrayidx5 = getelementptr inbounds [4000 x i32], ptr %4, i64 0, i64 83
+  store i32 88, ptr %arrayidx5, align 4
+  %5 = load i32, ptr %0, align 4
+  %6 = load i32, ptr %arrayidx1, align 4
+  %7 = load i32, ptr %arrayidx2, align 4
+  %8 = load i32, ptr %arrayidx3, align 4
+  %9 = load i32, ptr %arrayidx4, align 4
+  %add = add i32 %5, 88
+  %add9 = add i32 %add, %6
+  %add11 = add i32 %add9, %7
+  %add13 = add i32 %add11, %8
+  %add15 = add i32 %add13, %9
+  ret i32 %add15
+}
+
+; Example of one access using the regular local-dynamic access from the TOC.
+define signext i32 @StoreArrays2() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: StoreArrays2:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r6, L..C1(r2) # target-flags(ppc-tlsld) @mySmallLocalDynamicTLS2
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r7, L..C2(r2) # target-flags(ppc-tlsld) @mySmallLocalDynamicTLS3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r8, L..C3(r2) # target-flags(ppc-tlsld) @mySmallLocalDynamicTLS4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r9, L..C4(r2) # target-flags(ppc-tlsld) @mySmallLocalDynamicTLS5
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r5, L..C6(r2) # target-flags(ppc-tlsld) @mySmallLocalDynamicTLSv2
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 1
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r6, r3, r6
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r7, r3, r7
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r8, r3, r8
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r9, r3, r9
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stwux r4, r3, r5
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r4, 24(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 2
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r3, 320(r6)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r3, 324(r7)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 88
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r4, 328(r8)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r3, 332(r9)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 102
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: StoreArrays2:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C6 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r7, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r8, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r9, L..C4 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r7, L..C2 at l(r7)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r8, L..C3 at l(r8)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r9, L..C4 at l(r9)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r5, L..C6 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C5 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 1
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r6, L..C5 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r7, r3, r7
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r8, r3, r8
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r9, r3, r9
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r6, r3, r6
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stwux r4, r3, r5
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r4, 24(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 2
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r3, 320(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r3, 324(r7)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 88
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r4, 328(r8)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r3, 332(r9)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 102
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalDynamicTLSv2)
+  store i32 1, ptr %0, align 4
+  %arrayidx1 = getelementptr inbounds [9000 x i32], ptr %0, i64 0, i64 6
+  store i32 4, ptr %arrayidx1, align 4
+  %1 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalDynamicTLS2)
+  %arrayidx2 = getelementptr inbounds [4000 x i32], ptr %1, i64 0, i64 80
+  store i32 2, ptr %arrayidx2, align 4
+  %2 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalDynamicTLS3)
+  %arrayidx3 = getelementptr inbounds [4000 x i32], ptr %2, i64 0, i64 81
+  store i32 3, ptr %arrayidx3, align 4
+  %3 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalDynamicTLS4)
+  %arrayidx4 = getelementptr inbounds [4000 x i32], ptr %3, i64 0, i64 82
+  store i32 4, ptr %arrayidx4, align 4
+  %4 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalDynamicTLS5)
+  %arrayidx5 = getelementptr inbounds [4000 x i32], ptr %4, i64 0, i64 83
+  store i32 88, ptr %arrayidx5, align 4
+  %5 = load i32, ptr %0, align 4
+  %6 = load i32, ptr %arrayidx1, align 4
+  %7 = load i32, ptr %arrayidx2, align 4
+  %8 = load i32, ptr %arrayidx3, align 4
+  %9 = load i32, ptr %arrayidx4, align 4
+  %add = add i32 %5, 88
+  %add9 = add i32 %add, %6
+  %add11 = add i32 %add9, %7
+  %add13 = add i32 %add11, %8
+  %add15 = add i32 %add13, %9
+  ret i32 %add15
+}
+
+; DIS:      file format aix5coff64-rs6000
+; DIS:      Disassembly of section .text:
+; DIS:      0000000000000000 (idx: [[#NFA+5]]) .StoreArrays1:
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                mflr 0
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                stdu 1, -48(1)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+15]]) _$TLSML[TC]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 6, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+17]]) mySmallLocalDynamicTLSv1[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                std 0, 64(1)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 7, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+19]]) mySmallLocalDynamicTLS3[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 3, 0(3)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+15]]) _$TLSML[TC]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 8, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+21]]) mySmallLocalDynamicTLS4[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 9, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+23]]) mySmallLocalDynamicTLS5[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 7, 16(7)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+19]]) mySmallLocalDynamicTLS3[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 8, 24(8)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+21]]) mySmallLocalDynamicTLS4[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 9, 32(9)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+23]]) mySmallLocalDynamicTLS5[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                bla 0x0
+; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA              (idx: [[#NFA+1]]) .__tls_get_mod[PR]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 5, 8(6)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+17]]) mySmallLocalDynamicTLSv1[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 6, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+25]]) mySmallLocalDynamicTLS2[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                li 4, 1
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 6, 40(6)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+25]]) mySmallLocalDynamicTLS2[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                add 7, 3, 7
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                add 8, 3, 8
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                add 9, 3, 9
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                add 6, 3, 6
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                stwux 4, 3, 5
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                li 4, 4
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                stw 4, 24(3)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                li 3, 2
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                stw 3, 320(6)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                li 3, 3
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                stw 3, 324(7)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                li 3, 88
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                stw 4, 328(8)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                stw 3, 332(9)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                li 3, 102
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addi 1, 1, 48
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 0, 16(1)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                mtlr 0
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                blr
+
+; DIS:      0000000000000090 (idx: [[#NFA+7]]) .StoreArrays2:
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                mflr 0
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                stdu 1, -48(1)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+15]]) _$TLSML[TC]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 6, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+27]]) mySmallLocalDynamicTLSv2[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                std 0, 64(1)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 7, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+19]]) mySmallLocalDynamicTLS3[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 3, 0(3)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+15]]) _$TLSML[TC]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 8, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+21]]) mySmallLocalDynamicTLS4[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 9, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+23]]) mySmallLocalDynamicTLS5[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 7, 16(7)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+19]]) mySmallLocalDynamicTLS3[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 8, 24(8)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+21]]) mySmallLocalDynamicTLS4[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 9, 32(9)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+23]]) mySmallLocalDynamicTLS5[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                bla 0x0
+; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA              (idx: [[#NFA+1]]) .__tls_get_mod[PR]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 5, 48(6)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+27]]) mySmallLocalDynamicTLSv2[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 6, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU	        (idx: [[#NFA+25]]) mySmallLocalDynamicTLS2[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                li 4, 1
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 6, 40(6)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+25]]) mySmallLocalDynamicTLS2[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                add 7, 3, 7
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                add 8, 3, 8
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                add 9, 3, 9
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                add 6, 3, 6
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                stwux 4, 3, 5
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                li 4, 4
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                stw 4, 24(3)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                li 3, 2
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                stw 3, 320(6)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                li 3, 3
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                stw 3, 324(7)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                li 3, 88
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                stw 4, 328(8)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                stw 3, 332(9)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                li 3, 102
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addi 1, 1, 48
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 0, 16(1)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                mtlr 0
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                blr
+
+
+; DIS:      Disassembly of section .data:
+; DIS:      0000000000000120 (idx: [[#NFA+9]]) StoreArrays1[DS]:
+; DIS-NEXT:      120: 00 00 00 00
+; DIS-NEXT: 0000000000000120:  R_POS	(idx: [[#NFA+5]]) .StoreArrays1
+; DIS-NEXT:      124: 00 00 00 00
+; DIS-NEXT:      128: 00 00 00 00
+; DIS-NEXT: 0000000000000128:  R_POS        (idx: [[#NFA+13]]) TOC[TC0]
+; DIS-NEXT:      12c: 00 00 01 50
+
+; DIS:      0000000000000138 (idx: [[#NFA+11]]) StoreArrays2[DS]:
+; DIS-NEXT:      138: 00 00 00 00
+; DIS-NEXT: 0000000000000138:  R_POS	(idx: [[#NFA+7]]) .StoreArrays2
+; DIS-NEXT:      13c: 00 00 00 90
+; DIS-NEXT:      140: 00 00 00 00
+; DIS-NEXT: 0000000000000140:  R_POS        (idx: [[#NFA+13]]) TOC[TC0]
+; DIS-NEXT:      144: 00 00 01 50
+
+; DIS:      0000000000000180 (idx: [[#NFA+27]]) mySmallLocalDynamicTLSv2[TE]:
+; DIS-NEXT:      180: 00 00 00 00
+; DIS-NEXT: 0000000000000180:  R_TLS_LD     (idx: [[#NFA+39]]) mySmallLocalDynamicTLSv2[TL]
+; DIS-NEXT:      184: 00 01 79 ec
+
+; DIS:      Disassembly of section .tdata:
+; DIS:      0000000000000000 (idx: [[#NFA+29]]) mySmallLocalDynamicTLSv1[TL]:
+; DIS:      0000000000007fec (idx: [[#NFA+31]]) mySmallLocalDynamicTLS2[TL]:
+; DIS:      000000000000be6c (idx: [[#NFA+33]]) mySmallLocalDynamicTLS3[TL]:
+; DIS:      000000000000fcec (idx: [[#NFA+35]]) mySmallLocalDynamicTLS4[TL]:
+; DIS:      0000000000013b6c (idx: [[#NFA+37]]) mySmallLocalDynamicTLS5[TL]:
+; DIS:      00000000000179ec (idx: [[#NFA+39]]) mySmallLocalDynamicTLSv2[TL]:
diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-longlong.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-longlong.ll
new file mode 100644
index 00000000000000..f9157124fb3ad9
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-longlong.ll
@@ -0,0 +1,368 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff < %s \
+; RUN:      | FileCheck %s --check-prefix=SMALL-LOCAL-DYNAMIC-SMALLCM64
+; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff --code-model=large \
+; RUN:      < %s | FileCheck %s \
+; RUN:      --check-prefix=SMALL-LOCAL-DYNAMIC-LARGECM64
+
+ at ThreadLocalVarInit = thread_local(localdynamic) global i64 1, align 8
+ at VarInit = local_unnamed_addr global i64 87, align 8
+ at IThreadLocalVarInit = internal thread_local(localdynamic) global i64 1, align 8
+declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
+%struct.anon = type { i64 }
+ at ThreadLocalStruct = thread_local(localdynamic) global %struct.anon zeroinitializer, align 1
+ at d = thread_local(localdynamic) global [87 x i64] zeroinitializer, align 8
+
+define nonnull ptr @AddrTest1() local_unnamed_addr #0 {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: AddrTest1:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C1(r2) # target-flags(ppc-tlsld) @d
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: AddrTest1:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C1 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C1 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @d)
+  ret ptr %0
+}
+
+define i64 @testUnaligned() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: testUnaligned:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @ThreadLocalStruct
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ldx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: testUnaligned:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ldx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @ThreadLocalStruct)
+  %x = getelementptr inbounds %struct.anon, ptr %0, i32 0, i32 0
+  %1 = load i64, ptr %x, align 1
+  ret i64 %1
+}
+
+define void @storeITLInit(i64 noundef %x) {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: storeITLInit:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mr r6, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdx r6, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: storeITLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mr r6, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r7, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r7)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdx r6, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit)
+  store i64 %x, ptr %0, align 8
+  ret void
+}
+
+define void @storeTLInit(i64 noundef %x) {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: storeTLInit:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mr r6, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C4(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdx r6, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: storeTLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mr r6, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r7, L..C4 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C4 at l(r7)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdx r6, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit)
+  store i64 %x, ptr %0, align 8
+  ret void
+}
+
+define i64 @loadITLInit() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadITLInit:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ldx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadITLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ldx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit)
+  %1 = load i64, ptr %0, align 8
+  ret i64 %1
+}
+
+define i64 @loadITLInit2() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadITLInit2:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ldx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C5(r2) # @VarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, 0(r4)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r4, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadITLInit2:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ldx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r4, L..C5 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C5 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, 0(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r4, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit)
+  %1 = load i64, ptr %0, align 8
+  %2 = load i64, ptr @VarInit, align 8
+  %add = add nsw i64 %2, %1
+  ret i64 %add
+}
+
+define i64 @loadTLInit() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadTLInit:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C4(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ldx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadTLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C4 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C4 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ldx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit)
+  %1 = load i64, ptr %0, align 8
+  ret i64 %1
+}
+
+define i64 @loadTLInit2() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadTLInit2:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C4(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ldx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C5(r2) # @VarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, 0(r4)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r4, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadTLInit2:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C4 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C4 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ldx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r4, L..C5 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C5 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, 0(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r4, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit)
+  %1 = load i64, ptr %0, align 8
+  %2 = load i64, ptr @VarInit, align 8
+  %add = add nsw i64 %2, %1
+  ret i64 %add
+}
+
+define void @loadStore1(i64 noundef %x) {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadStore1:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ldx r5, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r5, r5, 9
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdx r5, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadStore1:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ldx r5, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r5, r5, 9
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdx r5, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit)
+  %1 = load i64, ptr %0, align 8
+  %add = add nsw i64 %1, 9
+  store i64 %add, ptr %0, align 8
+  ret void
+}
diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-short.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-short.ll
new file mode 100644
index 00000000000000..7482ca430c9a5b
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-short.ll
@@ -0,0 +1,336 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff < %s \
+; RUN:      | FileCheck %s --check-prefix=SMALL-LOCAL-DYNAMIC-SMALLCM64
+; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff --code-model=large \
+; RUN:      < %s | FileCheck %s \
+; RUN:      --check-prefix=SMALL-LOCAL-DYNAMIC-LARGECM64
+
+ at ThreadLocalVarInit = thread_local(localdynamic) global i16 1, align 2
+ at VarInit = local_unnamed_addr global i16 87, align 2
+ at IThreadLocalVarInit = internal thread_local(localdynamic) global i16 1, align 2
+declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
+ at b = thread_local(localdynamic) global [87 x i16] zeroinitializer, align 2
+
+define nonnull ptr @AddrTest1() local_unnamed_addr #0 {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: AddrTest1:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C1(r2) # target-flags(ppc-tlsld) @b
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r3, r3, 4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: AddrTest1:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C1 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C1 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r3, r3, 4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @b)
+  %arrayidx = getelementptr inbounds [87 x i16], ptr %0, i64 0, i64 2
+  ret ptr %arrayidx
+}
+
+define void @storeITLInit(i16 noundef signext %x) {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: storeITLInit:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mr r6, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    sthx r6, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: storeITLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mr r6, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r7, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r7)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    sthx r6, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @IThreadLocalVarInit)
+  store i16 %x, ptr %0, align 2
+  ret void
+}
+
+define void @storeTLInit(i16 noundef signext %x) {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: storeTLInit:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mr r6, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    sthx r6, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: storeTLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mr r6, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r7, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r7)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    sthx r6, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @ThreadLocalVarInit)
+  store i16 %x, ptr %0, align 2
+  ret void
+}
+
+define signext i16 @loadITLInit() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadITLInit:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lhax r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadITLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lhax r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @IThreadLocalVarInit)
+  %1 = load i16, ptr %0, align 2
+  ret i16 %1
+}
+
+define signext i16 @loadITLInit2() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadITLInit2:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lhzx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C4(r2) # @VarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lhz r4, 0(r4)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r4, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    extsh r3, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadITLInit2:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lhzx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r4, L..C4 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C4 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lhz r4, 0(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r4, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    extsh r3, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @IThreadLocalVarInit)
+  %1 = load i16, ptr %0, align 2
+  %2 = load i16, ptr @VarInit, align 2
+  %add = add i16 %2, %1
+  ret i16 %add
+}
+
+define signext i16 @loadTLInit() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadTLInit:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lhax r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadTLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lhax r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @ThreadLocalVarInit)
+  %1 = load i16, ptr %0, align 2
+  ret i16 %1
+}
+
+define signext i16 @loadTLInit2() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadTLInit2:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lhzx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C4(r2) # @VarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lhz r4, 0(r4)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r4, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    extsh r3, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadTLInit2:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lhzx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r4, L..C4 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C4 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lhz r4, 0(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r4, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    extsh r3, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @ThreadLocalVarInit)
+  %1 = load i16, ptr %0, align 2
+  %2 = load i16, ptr @VarInit, align 2
+  %add = add i16 %2, %1
+  ret i16 %add
+}
+
+define void @loadStore1(i16 noundef signext %x) {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadStore1:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lhzx r5, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r5, r5, 9
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    sthx r5, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadStore1:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lhzx r5, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r5, r5, 9
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    sthx r5, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @IThreadLocalVarInit)
+  %1 = load i16, ptr %0, align 2
+  %add = add i16 %1, 9
+  store i16 %add, ptr %0, align 2
+  ret void
+}
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-ld-ldst-O0.ll b/llvm/test/CodeGen/PowerPC/aix-tls-ld-ldst-O0.ll
new file mode 100644
index 00000000000000..53aa47072de810
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-ld-ldst-O0.ll
@@ -0,0 +1,1031 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc  -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff < %s | FileCheck %s \
+; RUN:      --check-prefix=SMALL64-O0
+; RUN: llc  -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff --code-model=large < %s \
+; RUN:      | FileCheck %s --check-prefix=LARGE64-O0
+; RUN: llc  -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc-ibm-aix-xcoff < %s | FileCheck %s \
+; RUN:      --check-prefix=SMALL32-O0
+; RUN: llc  -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc-ibm-aix-xcoff --code-model=large < %s \
+; RUN:      | FileCheck %s --check-prefix=LARGE32-O0
+; RUN: llc  -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff < %s \
+; RUN:      | FileCheck %s --check-prefix=SMALL-LOCAL-DYNAMIC-SMALLCM64
+; RUN: llc  -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff --code-model=large \
+; RUN:      < %s | FileCheck %s \
+; RUN:      --check-prefix=SMALL-LOCAL-DYNAMIC-LARGECM64
+
+ at TLInt = internal thread_local(localdynamic) global i32 0, align 4
+ at TLLongLong = internal thread_local(localdynamic) global i64 0, align 8
+ at TLDouble = internal thread_local(localdynamic) global double 0.000000e+00, align 8
+ at TLFloat = internal thread_local(localdynamic) global float 0.000000e+00, align 4
+declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
+
+define void @storeInt(i32 noundef %x) {
+; SMALL64-O0-LABEL: storeInt:
+; SMALL64-O0:       # %bb.0: # %entry
+; SMALL64-O0-NEXT:    mflr r0
+; SMALL64-O0-NEXT:    stdu r1, -64(r1)
+; SMALL64-O0-NEXT:    std r0, 80(r1)
+; SMALL64-O0-NEXT:    # kill: def $r3 killed $r3 killed $x3
+; SMALL64-O0-NEXT:    stw r3, 60(r1) # 4-byte Folded Spill
+; SMALL64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL64-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL64-O0-NEXT:    mr r4, r3
+; SMALL64-O0-NEXT:    lwz r3, 60(r1) # 4-byte Folded Reload
+; SMALL64-O0-NEXT:    ld r5, L..C1(r2) # target-flags(ppc-tlsld) @TLInt
+; SMALL64-O0-NEXT:    add r4, r4, r5
+; SMALL64-O0-NEXT:    stw r3, 0(r4)
+; SMALL64-O0-NEXT:    addi r1, r1, 64
+; SMALL64-O0-NEXT:    ld r0, 16(r1)
+; SMALL64-O0-NEXT:    mtlr r0
+; SMALL64-O0-NEXT:    blr
+;
+; LARGE64-O0-LABEL: storeInt:
+; LARGE64-O0:       # %bb.0: # %entry
+; LARGE64-O0-NEXT:    mflr r0
+; LARGE64-O0-NEXT:    stdu r1, -64(r1)
+; LARGE64-O0-NEXT:    std r0, 80(r1)
+; LARGE64-O0-NEXT:    # kill: def $r3 killed $r3 killed $x3
+; LARGE64-O0-NEXT:    stw r3, 60(r1) # 4-byte Folded Spill
+; LARGE64-O0-NEXT:    addis r3, L..C0 at u(r2)
+; LARGE64-O0-NEXT:    std r3, 48(r1) # 8-byte Folded Spill
+; LARGE64-O0-NEXT:    addis r3, L..C1 at u(r2)
+; LARGE64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; LARGE64-O0-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-O0-NEXT:    ld r5, 48(r1) # 8-byte Folded Reload
+; LARGE64-O0-NEXT:    mr r4, r3
+; LARGE64-O0-NEXT:    lwz r3, 60(r1) # 4-byte Folded Reload
+; LARGE64-O0-NEXT:    ld r5, L..C0 at l(r5)
+; LARGE64-O0-NEXT:    add r4, r4, r5
+; LARGE64-O0-NEXT:    stw r3, 0(r4)
+; LARGE64-O0-NEXT:    addi r1, r1, 64
+; LARGE64-O0-NEXT:    ld r0, 16(r1)
+; LARGE64-O0-NEXT:    mtlr r0
+; LARGE64-O0-NEXT:    blr
+;
+; SMALL32-O0-LABEL: storeInt:
+; SMALL32-O0:       # %bb.0: # %entry
+; SMALL32-O0-NEXT:    mflr r0
+; SMALL32-O0-NEXT:    stwu r1, -32(r1)
+; SMALL32-O0-NEXT:    stw r0, 40(r1)
+; SMALL32-O0-NEXT:    stw r3, 28(r1) # 4-byte Folded Spill
+; SMALL32-O0-NEXT:    lwz r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL32-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL32-O0-NEXT:    mr r4, r3
+; SMALL32-O0-NEXT:    lwz r3, 28(r1) # 4-byte Folded Reload
+; SMALL32-O0-NEXT:    lwz r5, L..C1(r2) # target-flags(ppc-tlsld) @TLInt
+; SMALL32-O0-NEXT:    stwx r3, r4, r5
+; SMALL32-O0-NEXT:    addi r1, r1, 32
+; SMALL32-O0-NEXT:    lwz r0, 8(r1)
+; SMALL32-O0-NEXT:    mtlr r0
+; SMALL32-O0-NEXT:    blr
+;
+; LARGE32-O0-LABEL: storeInt:
+; LARGE32-O0:       # %bb.0: # %entry
+; LARGE32-O0-NEXT:    mflr r0
+; LARGE32-O0-NEXT:    stwu r1, -32(r1)
+; LARGE32-O0-NEXT:    stw r0, 40(r1)
+; LARGE32-O0-NEXT:    stw r3, 28(r1) # 4-byte Folded Spill
+; LARGE32-O0-NEXT:    addis r3, L..C0 at u(r2)
+; LARGE32-O0-NEXT:    stw r3, 24(r1) # 4-byte Folded Spill
+; LARGE32-O0-NEXT:    addis r3, L..C1 at u(r2)
+; LARGE32-O0-NEXT:    lwz r3, L..C1 at l(r3)
+; LARGE32-O0-NEXT:    bla .__tls_get_mod[PR]
+; LARGE32-O0-NEXT:    lwz r5, 24(r1) # 4-byte Folded Reload
+; LARGE32-O0-NEXT:    mr r4, r3
+; LARGE32-O0-NEXT:    lwz r3, 28(r1) # 4-byte Folded Reload
+; LARGE32-O0-NEXT:    lwz r5, L..C0 at l(r5)
+; LARGE32-O0-NEXT:    stwx r3, r4, r5
+; LARGE32-O0-NEXT:    addi r1, r1, 32
+; LARGE32-O0-NEXT:    lwz r0, 8(r1)
+; LARGE32-O0-NEXT:    mtlr r0
+; LARGE32-O0-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: storeInt:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 80(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    # kill: def $r3 killed $r3 killed $x3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r3, 60(r1) # 4-byte Folded Spill
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mr r4, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwz r3, 60(r1) # 4-byte Folded Reload
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r5, L..C1(r2) # target-flags(ppc-tlsld) @TLInt
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r4, r4, r5
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r3, 0(r4)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: storeInt:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 80(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    # kill: def $r3 killed $r3 killed $x3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r3, 60(r1) # 4-byte Folded Spill
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r3, 48(r1) # 8-byte Folded Spill
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C1 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C1 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r5, 48(r1) # 8-byte Folded Reload
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mr r4, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwz r3, 60(r1) # 4-byte Folded Reload
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r5, L..C0 at l(r5)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r4, r4, r5
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r3, 0(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TLInt)
+  store i32 %x, ptr %0, align 4
+  ret void
+}
+
+define void @storeLongLong(i64 noundef %x) {
+; SMALL64-O0-LABEL: storeLongLong:
+; SMALL64-O0:       # %bb.0: # %entry
+; SMALL64-O0-NEXT:    mflr r0
+; SMALL64-O0-NEXT:    stdu r1, -64(r1)
+; SMALL64-O0-NEXT:    std r0, 80(r1)
+; SMALL64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; SMALL64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL64-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL64-O0-NEXT:    mr r4, r3
+; SMALL64-O0-NEXT:    ld r3, 56(r1) # 8-byte Folded Reload
+; SMALL64-O0-NEXT:    ld r5, L..C2(r2) # target-flags(ppc-tlsld) @TLLongLong
+; SMALL64-O0-NEXT:    add r4, r4, r5
+; SMALL64-O0-NEXT:    std r3, 0(r4)
+; SMALL64-O0-NEXT:    addi r1, r1, 64
+; SMALL64-O0-NEXT:    ld r0, 16(r1)
+; SMALL64-O0-NEXT:    mtlr r0
+; SMALL64-O0-NEXT:    blr
+;
+; LARGE64-O0-LABEL: storeLongLong:
+; LARGE64-O0:       # %bb.0: # %entry
+; LARGE64-O0-NEXT:    mflr r0
+; LARGE64-O0-NEXT:    stdu r1, -64(r1)
+; LARGE64-O0-NEXT:    std r0, 80(r1)
+; LARGE64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; LARGE64-O0-NEXT:    addis r3, L..C2 at u(r2)
+; LARGE64-O0-NEXT:    std r3, 48(r1) # 8-byte Folded Spill
+; LARGE64-O0-NEXT:    addis r3, L..C1 at u(r2)
+; LARGE64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; LARGE64-O0-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-O0-NEXT:    ld r5, 48(r1) # 8-byte Folded Reload
+; LARGE64-O0-NEXT:    mr r4, r3
+; LARGE64-O0-NEXT:    ld r3, 56(r1) # 8-byte Folded Reload
+; LARGE64-O0-NEXT:    ld r5, L..C2 at l(r5)
+; LARGE64-O0-NEXT:    add r4, r4, r5
+; LARGE64-O0-NEXT:    std r3, 0(r4)
+; LARGE64-O0-NEXT:    addi r1, r1, 64
+; LARGE64-O0-NEXT:    ld r0, 16(r1)
+; LARGE64-O0-NEXT:    mtlr r0
+; LARGE64-O0-NEXT:    blr
+;
+; SMALL32-O0-LABEL: storeLongLong:
+; SMALL32-O0:       # %bb.0: # %entry
+; SMALL32-O0-NEXT:    mflr r0
+; SMALL32-O0-NEXT:    stwu r1, -32(r1)
+; SMALL32-O0-NEXT:    stw r0, 40(r1)
+; SMALL32-O0-NEXT:    stw r4, 28(r1) # 4-byte Folded Spill
+; SMALL32-O0-NEXT:    stw r3, 24(r1) # 4-byte Folded Spill
+; SMALL32-O0-NEXT:    lwz r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL32-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL32-O0-NEXT:    lwz r5, 24(r1) # 4-byte Folded Reload
+; SMALL32-O0-NEXT:    lwz r4, 28(r1) # 4-byte Folded Reload
+; SMALL32-O0-NEXT:    lwz r6, L..C2(r2) # target-flags(ppc-tlsld) @TLLongLong
+; SMALL32-O0-NEXT:    stwux r5, r3, r6
+; SMALL32-O0-NEXT:    stw r4, 4(r3)
+; SMALL32-O0-NEXT:    addi r1, r1, 32
+; SMALL32-O0-NEXT:    lwz r0, 8(r1)
+; SMALL32-O0-NEXT:    mtlr r0
+; SMALL32-O0-NEXT:    blr
+;
+; LARGE32-O0-LABEL: storeLongLong:
+; LARGE32-O0:       # %bb.0: # %entry
+; LARGE32-O0-NEXT:    mflr r0
+; LARGE32-O0-NEXT:    stwu r1, -32(r1)
+; LARGE32-O0-NEXT:    stw r0, 40(r1)
+; LARGE32-O0-NEXT:    stw r4, 28(r1) # 4-byte Folded Spill
+; LARGE32-O0-NEXT:    stw r3, 24(r1) # 4-byte Folded Spill
+; LARGE32-O0-NEXT:    addis r6, L..C2 at u(r2)
+; LARGE32-O0-NEXT:    addis r3, L..C1 at u(r2)
+; LARGE32-O0-NEXT:    lwz r3, L..C1 at l(r3)
+; LARGE32-O0-NEXT:    bla .__tls_get_mod[PR]
+; LARGE32-O0-NEXT:    lwz r5, 24(r1) # 4-byte Folded Reload
+; LARGE32-O0-NEXT:    lwz r4, 28(r1) # 4-byte Folded Reload
+; LARGE32-O0-NEXT:    lwz r6, L..C2 at l(r6)
+; LARGE32-O0-NEXT:    stwux r5, r3, r6
+; LARGE32-O0-NEXT:    stw r4, 4(r3)
+; LARGE32-O0-NEXT:    addi r1, r1, 32
+; LARGE32-O0-NEXT:    lwz r0, 8(r1)
+; LARGE32-O0-NEXT:    mtlr r0
+; LARGE32-O0-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: storeLongLong:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 80(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mr r4, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, 56(r1) # 8-byte Folded Reload
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r5, L..C2(r2) # target-flags(ppc-tlsld) @TLLongLong
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r4, r4, r5
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r3, 0(r4)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: storeLongLong:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 80(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r3, 48(r1) # 8-byte Folded Spill
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C1 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C1 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r5, 48(r1) # 8-byte Folded Reload
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mr r4, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, 56(r1) # 8-byte Folded Reload
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r5, L..C2 at l(r5)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r4, r4, r5
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r3, 0(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @TLLongLong)
+  store i64 %x, ptr %0, align 8
+  ret void
+}
+
+define void @storeDouble(double noundef %x) {
+; SMALL64-O0-LABEL: storeDouble:
+; SMALL64-O0:       # %bb.0: # %entry
+; SMALL64-O0-NEXT:    mflr r0
+; SMALL64-O0-NEXT:    stdu r1, -48(r1)
+; SMALL64-O0-NEXT:    std r0, 64(r1)
+; SMALL64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL64-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL64-O0-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @TLDouble
+; SMALL64-O0-NEXT:    add r3, r3, r4
+; SMALL64-O0-NEXT:    stxsdx f1, 0, r3
+; SMALL64-O0-NEXT:    addi r1, r1, 48
+; SMALL64-O0-NEXT:    ld r0, 16(r1)
+; SMALL64-O0-NEXT:    mtlr r0
+; SMALL64-O0-NEXT:    blr
+;
+; LARGE64-O0-LABEL: storeDouble:
+; LARGE64-O0:       # %bb.0: # %entry
+; LARGE64-O0-NEXT:    mflr r0
+; LARGE64-O0-NEXT:    stdu r1, -64(r1)
+; LARGE64-O0-NEXT:    std r0, 80(r1)
+; LARGE64-O0-NEXT:    addis r3, L..C3 at u(r2)
+; LARGE64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; LARGE64-O0-NEXT:    addis r3, L..C1 at u(r2)
+; LARGE64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; LARGE64-O0-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
+; LARGE64-O0-NEXT:    ld r4, L..C3 at l(r4)
+; LARGE64-O0-NEXT:    add r3, r3, r4
+; LARGE64-O0-NEXT:    stxsdx f1, 0, r3
+; LARGE64-O0-NEXT:    addi r1, r1, 64
+; LARGE64-O0-NEXT:    ld r0, 16(r1)
+; LARGE64-O0-NEXT:    mtlr r0
+; LARGE64-O0-NEXT:    blr
+;
+; SMALL32-O0-LABEL: storeDouble:
+; SMALL32-O0:       # %bb.0: # %entry
+; SMALL32-O0-NEXT:    mflr r0
+; SMALL32-O0-NEXT:    stwu r1, -32(r1)
+; SMALL32-O0-NEXT:    stw r0, 40(r1)
+; SMALL32-O0-NEXT:    lwz r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL32-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL32-O0-NEXT:    lwz r4, L..C3(r2) # target-flags(ppc-tlsld) @TLDouble
+; SMALL32-O0-NEXT:    stfdx f1, r3, r4
+; SMALL32-O0-NEXT:    addi r1, r1, 32
+; SMALL32-O0-NEXT:    lwz r0, 8(r1)
+; SMALL32-O0-NEXT:    mtlr r0
+; SMALL32-O0-NEXT:    blr
+;
+; LARGE32-O0-LABEL: storeDouble:
+; LARGE32-O0:       # %bb.0: # %entry
+; LARGE32-O0-NEXT:    mflr r0
+; LARGE32-O0-NEXT:    stwu r1, -32(r1)
+; LARGE32-O0-NEXT:    stw r0, 40(r1)
+; LARGE32-O0-NEXT:    addis r3, L..C3 at u(r2)
+; LARGE32-O0-NEXT:    stw r3, 28(r1) # 4-byte Folded Spill
+; LARGE32-O0-NEXT:    addis r3, L..C1 at u(r2)
+; LARGE32-O0-NEXT:    lwz r3, L..C1 at l(r3)
+; LARGE32-O0-NEXT:    bla .__tls_get_mod[PR]
+; LARGE32-O0-NEXT:    lwz r4, 28(r1) # 4-byte Folded Reload
+; LARGE32-O0-NEXT:    lwz r4, L..C3 at l(r4)
+; LARGE32-O0-NEXT:    stfdx f1, r3, r4
+; LARGE32-O0-NEXT:    addi r1, r1, 32
+; LARGE32-O0-NEXT:    lwz r0, 8(r1)
+; LARGE32-O0-NEXT:    mtlr r0
+; LARGE32-O0-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: storeDouble:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @TLDouble
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stxsdx f1, 0, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: storeDouble:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 80(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C1 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C1 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stxsdx f1, 0, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @TLDouble)
+  store double %x, ptr %0, align 8
+  ret void
+}
+
+define void @storeFloat(float noundef %x) {
+; SMALL64-O0-LABEL: storeFloat:
+; SMALL64-O0:       # %bb.0: # %entry
+; SMALL64-O0-NEXT:    mflr r0
+; SMALL64-O0-NEXT:    stdu r1, -48(r1)
+; SMALL64-O0-NEXT:    std r0, 64(r1)
+; SMALL64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL64-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL64-O0-NEXT:    ld r4, L..C4(r2) # target-flags(ppc-tlsld) @TLFloat
+; SMALL64-O0-NEXT:    add r3, r3, r4
+; SMALL64-O0-NEXT:    stfs f1, 0(r3)
+; SMALL64-O0-NEXT:    addi r1, r1, 48
+; SMALL64-O0-NEXT:    ld r0, 16(r1)
+; SMALL64-O0-NEXT:    mtlr r0
+; SMALL64-O0-NEXT:    blr
+;
+; LARGE64-O0-LABEL: storeFloat:
+; LARGE64-O0:       # %bb.0: # %entry
+; LARGE64-O0-NEXT:    mflr r0
+; LARGE64-O0-NEXT:    stdu r1, -64(r1)
+; LARGE64-O0-NEXT:    std r0, 80(r1)
+; LARGE64-O0-NEXT:    addis r3, L..C4 at u(r2)
+; LARGE64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; LARGE64-O0-NEXT:    addis r3, L..C1 at u(r2)
+; LARGE64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; LARGE64-O0-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
+; LARGE64-O0-NEXT:    ld r4, L..C4 at l(r4)
+; LARGE64-O0-NEXT:    add r3, r3, r4
+; LARGE64-O0-NEXT:    stfs f1, 0(r3)
+; LARGE64-O0-NEXT:    addi r1, r1, 64
+; LARGE64-O0-NEXT:    ld r0, 16(r1)
+; LARGE64-O0-NEXT:    mtlr r0
+; LARGE64-O0-NEXT:    blr
+;
+; SMALL32-O0-LABEL: storeFloat:
+; SMALL32-O0:       # %bb.0: # %entry
+; SMALL32-O0-NEXT:    mflr r0
+; SMALL32-O0-NEXT:    stwu r1, -32(r1)
+; SMALL32-O0-NEXT:    stw r0, 40(r1)
+; SMALL32-O0-NEXT:    lwz r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL32-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL32-O0-NEXT:    lwz r4, L..C4(r2) # target-flags(ppc-tlsld) @TLFloat
+; SMALL32-O0-NEXT:    stfsx f1, r3, r4
+; SMALL32-O0-NEXT:    addi r1, r1, 32
+; SMALL32-O0-NEXT:    lwz r0, 8(r1)
+; SMALL32-O0-NEXT:    mtlr r0
+; SMALL32-O0-NEXT:    blr
+;
+; LARGE32-O0-LABEL: storeFloat:
+; LARGE32-O0:       # %bb.0: # %entry
+; LARGE32-O0-NEXT:    mflr r0
+; LARGE32-O0-NEXT:    stwu r1, -32(r1)
+; LARGE32-O0-NEXT:    stw r0, 40(r1)
+; LARGE32-O0-NEXT:    addis r3, L..C4 at u(r2)
+; LARGE32-O0-NEXT:    stw r3, 28(r1) # 4-byte Folded Spill
+; LARGE32-O0-NEXT:    addis r3, L..C1 at u(r2)
+; LARGE32-O0-NEXT:    lwz r3, L..C1 at l(r3)
+; LARGE32-O0-NEXT:    bla .__tls_get_mod[PR]
+; LARGE32-O0-NEXT:    lwz r4, 28(r1) # 4-byte Folded Reload
+; LARGE32-O0-NEXT:    lwz r4, L..C4 at l(r4)
+; LARGE32-O0-NEXT:    stfsx f1, r3, r4
+; LARGE32-O0-NEXT:    addi r1, r1, 32
+; LARGE32-O0-NEXT:    lwz r0, 8(r1)
+; LARGE32-O0-NEXT:    mtlr r0
+; LARGE32-O0-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: storeFloat:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C4(r2) # target-flags(ppc-tlsld) @TLFloat
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stfs f1, 0(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: storeFloat:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 80(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C4 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C1 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C1 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C4 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stfs f1, 0(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TLFloat)
+  store float %x, ptr %0, align 4
+  ret void
+}
+
+define i32 @loadInt() {
+; SMALL64-O0-LABEL: loadInt:
+; SMALL64-O0:       # %bb.0: # %entry
+; SMALL64-O0-NEXT:    mflr r0
+; SMALL64-O0-NEXT:    stdu r1, -48(r1)
+; SMALL64-O0-NEXT:    std r0, 64(r1)
+; SMALL64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL64-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL64-O0-NEXT:    ld r4, L..C1(r2) # target-flags(ppc-tlsld) @TLInt
+; SMALL64-O0-NEXT:    add r3, r3, r4
+; SMALL64-O0-NEXT:    lwz r3, 0(r3)
+; SMALL64-O0-NEXT:    addi r1, r1, 48
+; SMALL64-O0-NEXT:    ld r0, 16(r1)
+; SMALL64-O0-NEXT:    mtlr r0
+; SMALL64-O0-NEXT:    blr
+;
+; LARGE64-O0-LABEL: loadInt:
+; LARGE64-O0:       # %bb.0: # %entry
+; LARGE64-O0-NEXT:    mflr r0
+; LARGE64-O0-NEXT:    stdu r1, -64(r1)
+; LARGE64-O0-NEXT:    std r0, 80(r1)
+; LARGE64-O0-NEXT:    addis r3, L..C0 at u(r2)
+; LARGE64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; LARGE64-O0-NEXT:    addis r3, L..C1 at u(r2)
+; LARGE64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; LARGE64-O0-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
+; LARGE64-O0-NEXT:    ld r4, L..C0 at l(r4)
+; LARGE64-O0-NEXT:    add r3, r3, r4
+; LARGE64-O0-NEXT:    lwz r3, 0(r3)
+; LARGE64-O0-NEXT:    addi r1, r1, 64
+; LARGE64-O0-NEXT:    ld r0, 16(r1)
+; LARGE64-O0-NEXT:    mtlr r0
+; LARGE64-O0-NEXT:    blr
+;
+; SMALL32-O0-LABEL: loadInt:
+; SMALL32-O0:       # %bb.0: # %entry
+; SMALL32-O0-NEXT:    mflr r0
+; SMALL32-O0-NEXT:    stwu r1, -32(r1)
+; SMALL32-O0-NEXT:    stw r0, 40(r1)
+; SMALL32-O0-NEXT:    lwz r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL32-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL32-O0-NEXT:    lwz r4, L..C1(r2) # target-flags(ppc-tlsld) @TLInt
+; SMALL32-O0-NEXT:    lwzx r3, r3, r4
+; SMALL32-O0-NEXT:    addi r1, r1, 32
+; SMALL32-O0-NEXT:    lwz r0, 8(r1)
+; SMALL32-O0-NEXT:    mtlr r0
+; SMALL32-O0-NEXT:    blr
+;
+; LARGE32-O0-LABEL: loadInt:
+; LARGE32-O0:       # %bb.0: # %entry
+; LARGE32-O0-NEXT:    mflr r0
+; LARGE32-O0-NEXT:    stwu r1, -32(r1)
+; LARGE32-O0-NEXT:    stw r0, 40(r1)
+; LARGE32-O0-NEXT:    addis r3, L..C0 at u(r2)
+; LARGE32-O0-NEXT:    stw r3, 28(r1) # 4-byte Folded Spill
+; LARGE32-O0-NEXT:    addis r3, L..C1 at u(r2)
+; LARGE32-O0-NEXT:    lwz r3, L..C1 at l(r3)
+; LARGE32-O0-NEXT:    bla .__tls_get_mod[PR]
+; LARGE32-O0-NEXT:    lwz r4, 28(r1) # 4-byte Folded Reload
+; LARGE32-O0-NEXT:    lwz r4, L..C0 at l(r4)
+; LARGE32-O0-NEXT:    lwzx r3, r3, r4
+; LARGE32-O0-NEXT:    addi r1, r1, 32
+; LARGE32-O0-NEXT:    lwz r0, 8(r1)
+; LARGE32-O0-NEXT:    mtlr r0
+; LARGE32-O0-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadInt:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C1(r2) # target-flags(ppc-tlsld) @TLInt
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwz r3, 0(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadInt:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 80(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C1 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C1 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C0 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwz r3, 0(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TLInt)
+  %1 = load i32, ptr %0, align 4
+  ret i32 %1
+}
+
+define i32 @loadLongLong() {
+; SMALL64-O0-LABEL: loadLongLong:
+; SMALL64-O0:       # %bb.0: # %entry
+; SMALL64-O0-NEXT:    mflr r0
+; SMALL64-O0-NEXT:    stdu r1, -48(r1)
+; SMALL64-O0-NEXT:    std r0, 64(r1)
+; SMALL64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL64-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL64-O0-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @TLLongLong
+; SMALL64-O0-NEXT:    add r3, r3, r4
+; SMALL64-O0-NEXT:    ld r3, 0(r3)
+; SMALL64-O0-NEXT:    # kill: def $r3 killed $r3 killed $x3
+; SMALL64-O0-NEXT:    clrldi r3, r3, 32
+; SMALL64-O0-NEXT:    addi r1, r1, 48
+; SMALL64-O0-NEXT:    ld r0, 16(r1)
+; SMALL64-O0-NEXT:    mtlr r0
+; SMALL64-O0-NEXT:    blr
+;
+; LARGE64-O0-LABEL: loadLongLong:
+; LARGE64-O0:       # %bb.0: # %entry
+; LARGE64-O0-NEXT:    mflr r0
+; LARGE64-O0-NEXT:    stdu r1, -64(r1)
+; LARGE64-O0-NEXT:    std r0, 80(r1)
+; LARGE64-O0-NEXT:    addis r3, L..C2 at u(r2)
+; LARGE64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; LARGE64-O0-NEXT:    addis r3, L..C1 at u(r2)
+; LARGE64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; LARGE64-O0-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
+; LARGE64-O0-NEXT:    ld r4, L..C2 at l(r4)
+; LARGE64-O0-NEXT:    add r3, r3, r4
+; LARGE64-O0-NEXT:    ld r3, 0(r3)
+; LARGE64-O0-NEXT:    # kill: def $r3 killed $r3 killed $x3
+; LARGE64-O0-NEXT:    clrldi r3, r3, 32
+; LARGE64-O0-NEXT:    addi r1, r1, 64
+; LARGE64-O0-NEXT:    ld r0, 16(r1)
+; LARGE64-O0-NEXT:    mtlr r0
+; LARGE64-O0-NEXT:    blr
+;
+; SMALL32-O0-LABEL: loadLongLong:
+; SMALL32-O0:       # %bb.0: # %entry
+; SMALL32-O0-NEXT:    mflr r0
+; SMALL32-O0-NEXT:    stwu r1, -32(r1)
+; SMALL32-O0-NEXT:    stw r0, 40(r1)
+; SMALL32-O0-NEXT:    lwz r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL32-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL32-O0-NEXT:    lwz r4, L..C2(r2) # target-flags(ppc-tlsld) @TLLongLong
+; SMALL32-O0-NEXT:    add r3, r3, r4
+; SMALL32-O0-NEXT:    lwz r3, 4(r3)
+; SMALL32-O0-NEXT:    addi r1, r1, 32
+; SMALL32-O0-NEXT:    lwz r0, 8(r1)
+; SMALL32-O0-NEXT:    mtlr r0
+; SMALL32-O0-NEXT:    blr
+;
+; LARGE32-O0-LABEL: loadLongLong:
+; LARGE32-O0:       # %bb.0: # %entry
+; LARGE32-O0-NEXT:    mflr r0
+; LARGE32-O0-NEXT:    stwu r1, -32(r1)
+; LARGE32-O0-NEXT:    stw r0, 40(r1)
+; LARGE32-O0-NEXT:    addis r3, L..C2 at u(r2)
+; LARGE32-O0-NEXT:    stw r3, 28(r1) # 4-byte Folded Spill
+; LARGE32-O0-NEXT:    addis r3, L..C1 at u(r2)
+; LARGE32-O0-NEXT:    lwz r3, L..C1 at l(r3)
+; LARGE32-O0-NEXT:    bla .__tls_get_mod[PR]
+; LARGE32-O0-NEXT:    lwz r4, 28(r1) # 4-byte Folded Reload
+; LARGE32-O0-NEXT:    lwz r4, L..C2 at l(r4)
+; LARGE32-O0-NEXT:    add r3, r3, r4
+; LARGE32-O0-NEXT:    lwz r3, 4(r3)
+; LARGE32-O0-NEXT:    addi r1, r1, 32
+; LARGE32-O0-NEXT:    lwz r0, 8(r1)
+; LARGE32-O0-NEXT:    mtlr r0
+; LARGE32-O0-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadLongLong:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @TLLongLong
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, 0(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    # kill: def $r3 killed $r3 killed $x3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    clrldi r3, r3, 32
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadLongLong:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 80(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C1 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C1 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, 0(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    # kill: def $r3 killed $r3 killed $x3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    clrldi r3, r3, 32
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @TLLongLong)
+  %1 = load i64, ptr %0, align 8
+  %conv = trunc i64 %1 to i32
+  ret i32 %conv
+}
+
+define i32 @loadDouble() {
+; SMALL64-O0-LABEL: loadDouble:
+; SMALL64-O0:       # %bb.0: # %entry
+; SMALL64-O0-NEXT:    mflr r0
+; SMALL64-O0-NEXT:    stdu r1, -64(r1)
+; SMALL64-O0-NEXT:    std r0, 80(r1)
+; SMALL64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL64-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL64-O0-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @TLDouble
+; SMALL64-O0-NEXT:    lfdx f0, r3, r4
+; SMALL64-O0-NEXT:    xscvdpsxws f0, f0
+; SMALL64-O0-NEXT:    addi r3, r1, 52
+; SMALL64-O0-NEXT:    stfiwx f0, 0, r3
+; SMALL64-O0-NEXT:    lwz r3, 52(r1)
+; SMALL64-O0-NEXT:    clrldi r3, r3, 32
+; SMALL64-O0-NEXT:    addi r1, r1, 64
+; SMALL64-O0-NEXT:    ld r0, 16(r1)
+; SMALL64-O0-NEXT:    mtlr r0
+; SMALL64-O0-NEXT:    blr
+;
+; LARGE64-O0-LABEL: loadDouble:
+; LARGE64-O0:       # %bb.0: # %entry
+; LARGE64-O0-NEXT:    mflr r0
+; LARGE64-O0-NEXT:    stdu r1, -80(r1)
+; LARGE64-O0-NEXT:    std r0, 96(r1)
+; LARGE64-O0-NEXT:    addis r3, L..C3 at u(r2)
+; LARGE64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; LARGE64-O0-NEXT:    addis r3, L..C1 at u(r2)
+; LARGE64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; LARGE64-O0-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
+; LARGE64-O0-NEXT:    ld r4, L..C3 at l(r4)
+; LARGE64-O0-NEXT:    lfdx f0, r3, r4
+; LARGE64-O0-NEXT:    xscvdpsxws f0, f0
+; LARGE64-O0-NEXT:    addi r3, r1, 68
+; LARGE64-O0-NEXT:    stfiwx f0, 0, r3
+; LARGE64-O0-NEXT:    lwz r3, 68(r1)
+; LARGE64-O0-NEXT:    clrldi r3, r3, 32
+; LARGE64-O0-NEXT:    addi r1, r1, 80
+; LARGE64-O0-NEXT:    ld r0, 16(r1)
+; LARGE64-O0-NEXT:    mtlr r0
+; LARGE64-O0-NEXT:    blr
+;
+; SMALL32-O0-LABEL: loadDouble:
+; SMALL32-O0:       # %bb.0: # %entry
+; SMALL32-O0-NEXT:    mflr r0
+; SMALL32-O0-NEXT:    stwu r1, -32(r1)
+; SMALL32-O0-NEXT:    stw r0, 40(r1)
+; SMALL32-O0-NEXT:    lwz r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL32-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL32-O0-NEXT:    lwz r4, L..C3(r2) # target-flags(ppc-tlsld) @TLDouble
+; SMALL32-O0-NEXT:    lfdx f0, r3, r4
+; SMALL32-O0-NEXT:    xscvdpsxws f0, f0
+; SMALL32-O0-NEXT:    addi r3, r1, 28
+; SMALL32-O0-NEXT:    stfiwx f0, 0, r3
+; SMALL32-O0-NEXT:    lwz r3, 28(r1)
+; SMALL32-O0-NEXT:    addi r1, r1, 32
+; SMALL32-O0-NEXT:    lwz r0, 8(r1)
+; SMALL32-O0-NEXT:    mtlr r0
+; SMALL32-O0-NEXT:    blr
+;
+; LARGE32-O0-LABEL: loadDouble:
+; LARGE32-O0:       # %bb.0: # %entry
+; LARGE32-O0-NEXT:    mflr r0
+; LARGE32-O0-NEXT:    stwu r1, -32(r1)
+; LARGE32-O0-NEXT:    stw r0, 40(r1)
+; LARGE32-O0-NEXT:    addis r3, L..C3 at u(r2)
+; LARGE32-O0-NEXT:    stw r3, 24(r1) # 4-byte Folded Spill
+; LARGE32-O0-NEXT:    addis r3, L..C1 at u(r2)
+; LARGE32-O0-NEXT:    lwz r3, L..C1 at l(r3)
+; LARGE32-O0-NEXT:    bla .__tls_get_mod[PR]
+; LARGE32-O0-NEXT:    lwz r4, 24(r1) # 4-byte Folded Reload
+; LARGE32-O0-NEXT:    lwz r4, L..C3 at l(r4)
+; LARGE32-O0-NEXT:    lfdx f0, r3, r4
+; LARGE32-O0-NEXT:    xscvdpsxws f0, f0
+; LARGE32-O0-NEXT:    addi r3, r1, 28
+; LARGE32-O0-NEXT:    stfiwx f0, 0, r3
+; LARGE32-O0-NEXT:    lwz r3, 28(r1)
+; LARGE32-O0-NEXT:    addi r1, r1, 32
+; LARGE32-O0-NEXT:    lwz r0, 8(r1)
+; LARGE32-O0-NEXT:    mtlr r0
+; LARGE32-O0-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadDouble:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 80(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @TLDouble
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfdx f0, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    xscvdpsxws f0, f0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r3, r1, 52
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stfiwx f0, 0, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwz r3, 52(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    clrldi r3, r3, 32
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadDouble:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -80(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 96(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C1 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C1 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfdx f0, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    xscvdpsxws f0, f0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r3, r1, 68
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stfiwx f0, 0, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwz r3, 68(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    clrldi r3, r3, 32
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 80
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @TLDouble)
+  %1 = load double, ptr %0, align 8
+  %conv = fptosi double %1 to i32
+  ret i32 %conv
+}
+
+define i32 @loadFloat() {
+; SMALL64-O0-LABEL: loadFloat:
+; SMALL64-O0:       # %bb.0: # %entry
+; SMALL64-O0-NEXT:    mflr r0
+; SMALL64-O0-NEXT:    stdu r1, -64(r1)
+; SMALL64-O0-NEXT:    std r0, 80(r1)
+; SMALL64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL64-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL64-O0-NEXT:    ld r4, L..C4(r2) # target-flags(ppc-tlsld) @TLFloat
+; SMALL64-O0-NEXT:    add r3, r3, r4
+; SMALL64-O0-NEXT:    lfs f0, 0(r3)
+; SMALL64-O0-NEXT:    fctiwz f0, f0
+; SMALL64-O0-NEXT:    stfd f0, 56(r1)
+; SMALL64-O0-NEXT:    lwa r3, 60(r1)
+; SMALL64-O0-NEXT:    clrldi r3, r3, 32
+; SMALL64-O0-NEXT:    addi r1, r1, 64
+; SMALL64-O0-NEXT:    ld r0, 16(r1)
+; SMALL64-O0-NEXT:    mtlr r0
+; SMALL64-O0-NEXT:    blr
+;
+; LARGE64-O0-LABEL: loadFloat:
+; LARGE64-O0:       # %bb.0: # %entry
+; LARGE64-O0-NEXT:    mflr r0
+; LARGE64-O0-NEXT:    stdu r1, -64(r1)
+; LARGE64-O0-NEXT:    std r0, 80(r1)
+; LARGE64-O0-NEXT:    addis r3, L..C4 at u(r2)
+; LARGE64-O0-NEXT:    std r3, 48(r1) # 8-byte Folded Spill
+; LARGE64-O0-NEXT:    addis r3, L..C1 at u(r2)
+; LARGE64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; LARGE64-O0-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-O0-NEXT:    ld r4, 48(r1) # 8-byte Folded Reload
+; LARGE64-O0-NEXT:    ld r4, L..C4 at l(r4)
+; LARGE64-O0-NEXT:    add r3, r3, r4
+; LARGE64-O0-NEXT:    lfs f0, 0(r3)
+; LARGE64-O0-NEXT:    fctiwz f0, f0
+; LARGE64-O0-NEXT:    stfd f0, 56(r1)
+; LARGE64-O0-NEXT:    lwa r3, 60(r1)
+; LARGE64-O0-NEXT:    clrldi r3, r3, 32
+; LARGE64-O0-NEXT:    addi r1, r1, 64
+; LARGE64-O0-NEXT:    ld r0, 16(r1)
+; LARGE64-O0-NEXT:    mtlr r0
+; LARGE64-O0-NEXT:    blr
+;
+; SMALL32-O0-LABEL: loadFloat:
+; SMALL32-O0:       # %bb.0: # %entry
+; SMALL32-O0-NEXT:    mflr r0
+; SMALL32-O0-NEXT:    stwu r1, -32(r1)
+; SMALL32-O0-NEXT:    stw r0, 40(r1)
+; SMALL32-O0-NEXT:    lwz r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL32-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL32-O0-NEXT:    lwz r4, L..C4(r2) # target-flags(ppc-tlsld) @TLFloat
+; SMALL32-O0-NEXT:    lfsx f0, r3, r4
+; SMALL32-O0-NEXT:    xscvdpsxws f0, f0
+; SMALL32-O0-NEXT:    addi r3, r1, 28
+; SMALL32-O0-NEXT:    stfiwx f0, 0, r3
+; SMALL32-O0-NEXT:    lwz r3, 28(r1)
+; SMALL32-O0-NEXT:    addi r1, r1, 32
+; SMALL32-O0-NEXT:    lwz r0, 8(r1)
+; SMALL32-O0-NEXT:    mtlr r0
+; SMALL32-O0-NEXT:    blr
+;
+; LARGE32-O0-LABEL: loadFloat:
+; LARGE32-O0:       # %bb.0: # %entry
+; LARGE32-O0-NEXT:    mflr r0
+; LARGE32-O0-NEXT:    stwu r1, -32(r1)
+; LARGE32-O0-NEXT:    stw r0, 40(r1)
+; LARGE32-O0-NEXT:    addis r3, L..C4 at u(r2)
+; LARGE32-O0-NEXT:    stw r3, 24(r1) # 4-byte Folded Spill
+; LARGE32-O0-NEXT:    addis r3, L..C1 at u(r2)
+; LARGE32-O0-NEXT:    lwz r3, L..C1 at l(r3)
+; LARGE32-O0-NEXT:    bla .__tls_get_mod[PR]
+; LARGE32-O0-NEXT:    lwz r4, 24(r1) # 4-byte Folded Reload
+; LARGE32-O0-NEXT:    lwz r4, L..C4 at l(r4)
+; LARGE32-O0-NEXT:    lfsx f0, r3, r4
+; LARGE32-O0-NEXT:    xscvdpsxws f0, f0
+; LARGE32-O0-NEXT:    addi r3, r1, 28
+; LARGE32-O0-NEXT:    stfiwx f0, 0, r3
+; LARGE32-O0-NEXT:    lwz r3, 28(r1)
+; LARGE32-O0-NEXT:    addi r1, r1, 32
+; LARGE32-O0-NEXT:    lwz r0, 8(r1)
+; LARGE32-O0-NEXT:    mtlr r0
+; LARGE32-O0-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadFloat:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 80(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C4(r2) # target-flags(ppc-tlsld) @TLFloat
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfs f0, 0(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    fctiwz f0, f0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stfd f0, 56(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwa r3, 60(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    clrldi r3, r3, 32
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadFloat:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 80(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C4 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r3, 48(r1) # 8-byte Folded Spill
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C1 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C1 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, 48(r1) # 8-byte Folded Reload
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C4 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfs f0, 0(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    fctiwz f0, f0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stfd f0, 56(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwa r3, 60(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    clrldi r3, r3, 32
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TLFloat)
+  %1 = load float, ptr %0, align 4
+  %conv = fptosi float %1 to i32
+  ret i32 %conv
+}
+
+; TOC Entry Checks.
+
+; SMALL64-O0-LABEL: .toc
+; SMALL64-O0-LABEL:L..C0:
+; SMALL64-O0-NEXT:	.tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; SMALL64-O0-NEXT:	.rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; SMALL64-O0-LABEL:L..C1:
+; SMALL64-O0-NEXT:	.tc TLInt[TC],TLInt[UL]@ld
+; SMALL64-O0-LABEL:L..C2:
+; SMALL64-O0-NEXT:	.tc TLLongLong[TC],TLLongLong[UL]@ld
+; SMALL64-O0-LABEL:L..C3:
+; SMALL64-O0-NEXT:	.tc TLDouble[TC],TLDouble[UL]@ld
+; SMALL64-O0-LABEL:L..C4:
+; SMALL64-O0-NEXT:	.tc TLFloat[TC],TLFloat[UL]@ld
+
+; LARGE64-O0-LABEL: .toc
+; LARGE64-O0-LABEL:L..C0:
+; LARGE64-O0-NEXT:	.tc TLInt[TE],TLInt[UL]@ld
+; LARGE64-O0-LABEL:L..C1:
+; LARGE64-O0-NEXT:	.tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; LARGE64-O0-NEXT:	.rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; LARGE64-O0-LABEL:L..C2:
+; LARGE64-O0-NEXT:	.tc TLLongLong[TE],TLLongLong[UL]@ld
+; LARGE64-O0-LABEL:L..C3:
+; LARGE64-O0-NEXT:	.tc TLDouble[TE],TLDouble[UL]@ld
+; LARGE64-O0-LABEL:L..C4:
+; LARGE64-O0-NEXT:	.tc TLFloat[TE],TLFloat[UL]@ld
+
+; SMALL32-O0-LABEL: .toc
+; SMALL32-O0-LABEL:L..C0:
+; SMALL32-O0-NEXT:	.tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; SMALL32-O0-NEXT:	.rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; SMALL32-O0-LABEL:L..C1:
+; SMALL32-O0-NEXT:	.tc TLInt[TC],TLInt[UL]@ld
+; SMALL32-O0-LABEL:L..C2:
+; SMALL32-O0-NEXT:	.tc TLLongLong[TC],TLLongLong[UL]@ld
+; SMALL32-O0-LABEL:L..C3:
+; SMALL32-O0-NEXT:	.tc TLDouble[TC],TLDouble[UL]@ld
+; SMALL32-O0-LABEL:L..C4:
+; SMALL32-O0-NEXT:	.tc TLFloat[TC],TLFloat[UL]@ld
+
+; LARGE32-O0-LABEL: .toc
+; LARGE32-O0-LABEL:L..C0:
+; LARGE32-O0-NEXT:	.tc TLInt[TE],TLInt[UL]@ld
+; LARGE32-O0-LABEL:L..C1:
+; LARGE32-O0-NEXT:	.tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; LARGE32-O0-NEXT:	.rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; LARGE32-O0-LABEL:L..C2:
+; LARGE32-O0-NEXT:	.tc TLLongLong[TE],TLLongLong[UL]@ld
+; LARGE32-O0-LABEL:L..C3:
+; LARGE32-O0-NEXT:	.tc TLDouble[TE],TLDouble[UL]@ld
+; LARGE32-O0-LABEL:L..C4:
+; LARGE32-O0-NEXT:	.tc TLFloat[TE],TLFloat[UL]@ld



More information about the llvm-commits mailing list