[llvm] [PowerPC][NFC] Add base test case for small-local-dynamic-tls on AIX (PR #84711)

Felix via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 21 20:53:00 PDT 2024


https://github.com/orcguru updated https://github.com/llvm/llvm-project/pull/84711

>From 2af2a4c0ef58c17a98a50b784fa2add25cd54ab4 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Sun, 10 Mar 2024 23:38:27 -0400
Subject: [PATCH 1/4] [PowerPC][NFC] Add base test case for
 small-local-dynamic-tls on AIX

---
 .../aix-small-local-dynamic-tls-char.ll       |  336 ++++++
 .../aix-small-local-dynamic-tls-double.ll     |  339 ++++++
 .../aix-small-local-dynamic-tls-float.ll      |  339 ++++++
 .../aix-small-local-dynamic-tls-int.ll        |  375 ++++++
 ...aix-small-local-dynamic-tls-largeaccess.ll |  363 ++++++
 .../aix-small-local-dynamic-tls-longlong.ll   |  368 ++++++
 .../aix-small-local-dynamic-tls-short.ll      |  336 ++++++
 .../CodeGen/PowerPC/aix-tls-ld-ldst-O0.ll     | 1031 +++++++++++++++++
 8 files changed, 3487 insertions(+)
 create mode 100644 llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-char.ll
 create mode 100644 llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-double.ll
 create mode 100644 llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-float.ll
 create mode 100644 llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-int.ll
 create mode 100644 llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-largeaccess.ll
 create mode 100644 llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-longlong.ll
 create mode 100644 llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-short.ll
 create mode 100644 llvm/test/CodeGen/PowerPC/aix-tls-ld-ldst-O0.ll

diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-char.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-char.ll
new file mode 100644
index 00000000000000..6fb8683330a303
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-char.ll
@@ -0,0 +1,336 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff < %s \
+; RUN:      | FileCheck %s --check-prefix=SMALL-LOCAL-DYNAMIC-SMALLCM64
+; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff --code-model=large \
+; RUN:      < %s | FileCheck %s \
+; RUN:      --check-prefix=SMALL-LOCAL-DYNAMIC-LARGECM64
+
+ at ThreadLocalVarInit = thread_local(localdynamic) global i8 1, align 1
+ at VarInit = local_unnamed_addr global i8 87, align 1
+ at IThreadLocalVarInit = internal thread_local(localdynamic) global i8 1, align 1
+declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
+ at c = thread_local(localdynamic) global [87 x i8] zeroinitializer, align 1
+
+define nonnull ptr @AddrTest1() local_unnamed_addr #0 {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: AddrTest1:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C1(r2) # target-flags(ppc-tlsld) @c
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r3, r3, 1
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: AddrTest1:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C1 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C1 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r3, r3, 1
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @c)
+  %arrayidx = getelementptr inbounds [87 x i8], ptr %0, i64 0, i64 1
+  ret ptr %arrayidx
+}
+
+define void @storeITLInit(i8 noundef zeroext %x) {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: storeITLInit:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mr r6, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stbx r6, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: storeITLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mr r6, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r7, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r7)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stbx r6, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @IThreadLocalVarInit)
+  store i8 %x, ptr %0, align 1
+  ret void
+}
+
+define void @storeTLInit(i8 noundef zeroext %x) {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: storeTLInit:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mr r6, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stbx r6, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: storeTLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mr r6, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r7, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r7)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stbx r6, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @ThreadLocalVarInit)
+  store i8 %x, ptr %0, align 1
+  ret void
+}
+
+define zeroext i8 @loadITLInit() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadITLInit:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lbzx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadITLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lbzx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @IThreadLocalVarInit)
+  %1 = load i8, ptr %0, align 1
+  ret i8 %1
+}
+
+define zeroext i8 @loadITLInit2() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadITLInit2:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lbzx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C4(r2) # @VarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lbz r4, 0(r4)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r4, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    clrldi r3, r3, 56
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadITLInit2:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lbzx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r4, L..C4 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C4 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lbz r4, 0(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r4, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    clrldi r3, r3, 56
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @IThreadLocalVarInit)
+  %1 = load i8, ptr %0, align 1
+  %2 = load i8, ptr @VarInit, align 1
+  %add = add i8 %2, %1
+  ret i8 %add
+}
+
+define zeroext i8 @loadTLInit() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadTLInit:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lbzx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadTLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lbzx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @ThreadLocalVarInit)
+  %1 = load i8, ptr %0, align 1
+  ret i8 %1
+}
+
+define zeroext i8 @loadTLInit2() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadTLInit2:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lbzx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C4(r2) # @VarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lbz r4, 0(r4)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r4, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    clrldi r3, r3, 56
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadTLInit2:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lbzx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r4, L..C4 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C4 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lbz r4, 0(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r4, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    clrldi r3, r3, 56
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @ThreadLocalVarInit)
+  %1 = load i8, ptr %0, align 1
+  %2 = load i8, ptr @VarInit, align 1
+  %add = add i8 %2, %1
+  ret i8 %add
+}
+
+define void @loadStore1(i8 noundef zeroext %x) {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadStore1:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lbzx r5, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r5, r5, 9
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stbx r5, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadStore1:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lbzx r5, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r5, r5, 9
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stbx r5, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @IThreadLocalVarInit)
+  %1 = load i8, ptr %0, align 1
+  %add = add i8 %1, 9
+  store i8 %add, ptr %0, align 1
+  ret void
+}
diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-double.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-double.ll
new file mode 100644
index 00000000000000..3dfc0aa6d4aff1
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-double.ll
@@ -0,0 +1,339 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff < %s \
+; RUN:      | FileCheck %s --check-prefix=SMALL-LOCAL-DYNAMIC-SMALLCM64
+; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff --code-model=large \
+; RUN:      < %s | FileCheck %s \
+; RUN:      --check-prefix=SMALL-LOCAL-DYNAMIC-LARGECM64
+
+ at ThreadLocalVarInit = thread_local(localdynamic) global double 1.000000e+00, align 8
+ at VarInit = local_unnamed_addr global double 8.700000e+01, align 8
+ at IThreadLocalVarInit = internal thread_local(localdynamic) global double 1.000000e+00, align 8
+declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
+ at f = thread_local(localdynamic) global [87 x double] zeroinitializer, align 8
+
+define nonnull ptr @AddrTest1() local_unnamed_addr #0 {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: AddrTest1:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C1(r2) # target-flags(ppc-tlsld) @f
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r3, r3, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: AddrTest1:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C1 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C1 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r3, r3, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @f)
+  %arrayidx = getelementptr inbounds [87 x double], ptr %0, i64 0, i64 6
+  ret ptr %arrayidx
+}
+
+define void @storeITLInit(double noundef %x) {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: storeITLInit:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stfdx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: storeITLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stfdx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit)
+  store double %x, ptr %0, align 8
+  ret void
+}
+
+define void @storeTLInit(double noundef %x) {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: storeTLInit:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stfdx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: storeTLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stfdx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit)
+  store double %x, ptr %0, align 8
+  ret void
+}
+
+define double @loadITLInit() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadITLInit:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfdx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadITLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfdx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit)
+  %1 = load double, ptr %0, align 8
+  ret double %1
+}
+
+define double @loadITLInit2() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadITLInit2:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfdx f0, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C4(r2) # @VarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfd f1, 0(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    xsadddp f1, f0, f1
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadITLInit2:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfdx f0, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C4 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C4 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfd f1, 0(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    xsadddp f1, f0, f1
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit)
+  %1 = load double, ptr %0, align 8
+  %2 = load double, ptr @VarInit, align 8
+  %add = fadd double %1, %2
+  ret double %add
+}
+
+define double @loadTLInit() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadTLInit:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfdx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadTLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfdx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit)
+  %1 = load double, ptr %0, align 8
+  ret double %1
+}
+
+define double @loadTLInit2() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadTLInit2:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfdx f0, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C4(r2) # @VarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfd f1, 0(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    xsadddp f1, f0, f1
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadTLInit2:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfdx f0, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C4 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C4 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfd f1, 0(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    xsadddp f1, f0, f1
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit)
+  %1 = load double, ptr %0, align 8
+  %2 = load double, ptr @VarInit, align 8
+  %add = fadd double %1, %2
+  ret double %add
+}
+
+define void @loadStore1(double noundef %x) {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadStore1:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    vspltisw v2, 1
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    vspltisw v3, 8
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    xvcvsxwdp vs0, vs34
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfdx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    xsadddp f0, f1, f0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    xvcvsxwdp vs1, vs35
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    xsadddp f0, f0, f1
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stfdx f0, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadStore1:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    vspltisw v2, 1
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    vspltisw v3, 8
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    xvcvsxwdp vs0, vs34
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfdx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    xsadddp f0, f1, f0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    xvcvsxwdp vs1, vs35
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    xsadddp f0, f0, f1
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stfdx f0, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit)
+  %1 = load double, ptr %0, align 8
+  %inc = fadd double %1, 1.000000e+00
+  %add = fadd double %inc, 8.000000e+00
+  store double %add, ptr %0, align 8
+  ret void
+}
diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-float.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-float.ll
new file mode 100644
index 00000000000000..69a4baa1d19bb8
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-float.ll
@@ -0,0 +1,339 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff < %s \
+; RUN:      | FileCheck %s --check-prefix=SMALL-LOCAL-DYNAMIC-SMALLCM64
+; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff --code-model=large \
+; RUN:      < %s | FileCheck %s \
+; RUN:      --check-prefix=SMALL-LOCAL-DYNAMIC-LARGECM64
+
+ at ThreadLocalVarInit = thread_local(localdynamic) global float 1.000000e+00, align 4
+ at VarInit = local_unnamed_addr global float 8.700000e+01, align 4
+ at IThreadLocalVarInit = internal thread_local(localdynamic) global float 1.000000e+00, align 4
+declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
+ at e = thread_local(localdynamic) global [87 x float] zeroinitializer, align 4
+
+define nonnull ptr @AddrTest1() local_unnamed_addr #0 {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: AddrTest1:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C1(r2) # target-flags(ppc-tlsld) @e
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r3, r3, 16
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: AddrTest1:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C1 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C1 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r3, r3, 16
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @e)
+  %arrayidx = getelementptr inbounds [87 x float], ptr %0, i64 0, i64 4
+  ret ptr %arrayidx
+}
+
+define void @storeITLInit(float noundef %x) {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: storeITLInit:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stfsx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: storeITLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stfsx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit)
+  store float %x, ptr %0, align 4
+  ret void
+}
+
+define void @storeTLInit(float noundef %x) {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: storeTLInit:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stfsx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: storeTLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stfsx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit)
+  store float %x, ptr %0, align 4
+  ret void
+}
+
+define float @loadITLInit() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadITLInit:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfsx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadITLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfsx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit)
+  %1 = load float, ptr %0, align 4
+  ret float %1
+}
+
+define float @loadITLInit2() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadITLInit2:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfsx f0, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C4(r2) # @VarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfs f1, 0(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    fadds f1, f0, f1
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadITLInit2:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfsx f0, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C4 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C4 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfs f1, 0(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    fadds f1, f0, f1
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit)
+  %1 = load float, ptr %0, align 4
+  %2 = load float, ptr @VarInit, align 4
+  %add = fadd float %1, %2
+  ret float %add
+}
+
+define float @loadTLInit() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadTLInit:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfsx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadTLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfsx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit)
+  %1 = load float, ptr %0, align 4
+  ret float %1
+}
+
+define float @loadTLInit2() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadTLInit2:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfsx f0, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C4(r2) # @VarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfs f1, 0(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    fadds f1, f0, f1
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadTLInit2:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfsx f0, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C4 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C4 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfs f1, 0(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    fadds f1, f0, f1
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit)
+  %1 = load float, ptr %0, align 4
+  %2 = load float, ptr @VarInit, align 4
+  %add = fadd float %1, %2
+  ret float %add
+}
+
+define void @loadStore1(float noundef %x) {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadStore1:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    vspltisw v2, 1
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    vspltisw v3, 8
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    xvcvsxwdp vs0, vs34
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfsx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    fadds f0, f1, f0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    xvcvsxwdp vs1, vs35
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    fadds f0, f0, f1
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stfsx f0, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadStore1:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    vspltisw v2, 1
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    vspltisw v3, 8
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    xvcvsxwdp vs0, vs34
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfsx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    fadds f0, f1, f0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    xvcvsxwdp vs1, vs35
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    fadds f0, f0, f1
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stfsx f0, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit)
+  %1 = load float, ptr %0, align 4
+  %inc = fadd float %1, 1.000000e+00
+  %add = fadd float %inc, 8.000000e+00
+  store float %add, ptr %0, align 4
+  ret void
+}
diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-int.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-int.ll
new file mode 100644
index 00000000000000..ff397169208f46
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-int.ll
@@ -0,0 +1,375 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff < %s \
+; RUN:      | FileCheck %s --check-prefix=SMALL-LOCAL-DYNAMIC-SMALLCM64
+; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff --code-model=large \
+; RUN:      < %s | FileCheck %s \
+; RUN:      --check-prefix=SMALL-LOCAL-DYNAMIC-LARGECM64
+
+ at ThreadLocalVarInit = thread_local(localdynamic) global i32 1, align 4
+ at VarInit = local_unnamed_addr global i32 87, align 4
+ at IThreadLocalVarInit = internal thread_local(localdynamic) global i32 1, align 4
+declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
+%struct.anon = type { i32 }
+ at ThreadLocalStruct = thread_local(localdynamic) global %struct.anon zeroinitializer, align 1
+ at a = thread_local(localdynamic) global [87 x i32] zeroinitializer, align 4
+
+define nonnull ptr @AddrTest1() local_unnamed_addr #0 {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: AddrTest1:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C1(r2) # target-flags(ppc-tlsld) @a
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r3, r3, 12
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: AddrTest1:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C1 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C1 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r3, r3, 12
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @a)
+  %arrayidx = getelementptr inbounds [87 x i32], ptr %0, i64 0, i64 3
+  ret ptr %arrayidx
+}
+
+define signext i32 @testUnaligned() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: testUnaligned:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @ThreadLocalStruct
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwax r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: testUnaligned:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwax r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @ThreadLocalStruct)
+  %x = getelementptr inbounds %struct.anon, ptr %0, i32 0, i32 0
+  %1 = load i32, ptr %x, align 1
+  ret i32 %1
+}
+
+define void @storeITLInit(i32 noundef signext %x) {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: storeITLInit:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mr r6, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stwx r6, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: storeITLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mr r6, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r7, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r7)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stwx r6, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit)
+  store i32 %x, ptr %0, align 4
+  ret void
+}
+
+define void @storeTLInit(i32 noundef signext %x) {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: storeTLInit:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mr r6, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C4(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stwx r6, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: storeTLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mr r6, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r7, L..C4 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C4 at l(r7)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stwx r6, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit)
+  store i32 %x, ptr %0, align 4
+  ret void
+}
+
+define signext i32 @loadITLInit() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadITLInit:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwax r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadITLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwax r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit)
+  %1 = load i32, ptr %0, align 4
+  ret i32 %1
+}
+
+define signext i32 @loadITLInit2() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadITLInit2:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwzx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C5(r2) # @VarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwz r4, 0(r4)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r4, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    extsw r3, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadITLInit2:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwzx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r4, L..C5 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C5 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwz r4, 0(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r4, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    extsw r3, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit)
+  %1 = load i32, ptr %0, align 4
+  %2 = load i32, ptr @VarInit, align 4
+  %add = add nsw i32 %2, %1
+  ret i32 %add
+}
+
+define signext i32 @loadTLInit() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadTLInit:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C4(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwax r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadTLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C4 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C4 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwax r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit)
+  %1 = load i32, ptr %0, align 4
+  ret i32 %1
+}
+
+define signext i32 @loadTLInit2() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadTLInit2:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C4(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwzx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C5(r2) # @VarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwz r4, 0(r4)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r4, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    extsw r3, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadTLInit2:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C4 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C4 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwzx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r4, L..C5 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C5 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwz r4, 0(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r4, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    extsw r3, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit)
+  %1 = load i32, ptr %0, align 4
+  %2 = load i32, ptr @VarInit, align 4
+  %add = add nsw i32 %2, %1
+  ret i32 %add
+}
+
+define void @loadStore1(i32 noundef signext %x) {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadStore1:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwzx r5, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r5, r5, 9
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stwx r5, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadStore1:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwzx r5, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r5, r5, 9
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stwx r5, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit)
+  %1 = load i32, ptr %0, align 4
+  %add = add nsw i32 %1, 9
+  store i32 %add, ptr %0, align 4
+  ret void
+}
diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-largeaccess.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-largeaccess.ll
new file mode 100644
index 00000000000000..643ad01a9ae3f2
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-largeaccess.ll
@@ -0,0 +1,363 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff < %s \
+; RUN:      | FileCheck %s --check-prefix=SMALL-LOCAL-DYNAMIC-SMALLCM64
+; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff --code-model=large \
+; RUN:      < %s | FileCheck %s \
+; RUN:      --check-prefix=SMALL-LOCAL-DYNAMIC-LARGECM64
+
+; Test disassembly of object.
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff -xcoff-traceback-table=false \
+; RUN:      --code-model=large -filetype=obj -o %t.o < %s
+; RUN: llvm-objdump -D -r --symbol-description %t.o | FileCheck -D#NFA=2 --check-prefix=DIS %s
+
+ at mySmallLocalDynamicTLSv1 = thread_local(localdynamic) global [8187 x i32] zeroinitializer, align 4
+ at mySmallLocalDynamicTLS2 = thread_local(localdynamic) global [4000 x i32] zeroinitializer, align 4
+ at mySmallLocalDynamicTLS3 = thread_local(localdynamic) global [4000 x i32] zeroinitializer, align 4
+ at mySmallLocalDynamicTLS4 = thread_local(localdynamic) global [4000 x i32] zeroinitializer, align 4
+ at mySmallLocalDynamicTLS5 = thread_local(localdynamic) global [4000 x i32] zeroinitializer, align 4
+ at mySmallLocalDynamicTLSv2 = thread_local(localdynamic) global [9000 x i32] zeroinitializer, align 4
+declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
+
+; All accesses use a "faster" local-dynamic sequence directly off the thread pointer.
+define signext i32 @StoreArrays1() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: StoreArrays1:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r6, L..C1(r2) # target-flags(ppc-tlsld) @mySmallLocalDynamicTLS2
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r7, L..C2(r2) # target-flags(ppc-tlsld) @mySmallLocalDynamicTLS3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r8, L..C3(r2) # target-flags(ppc-tlsld) @mySmallLocalDynamicTLS4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r9, L..C4(r2) # target-flags(ppc-tlsld) @mySmallLocalDynamicTLS5
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r5, L..C5(r2) # target-flags(ppc-tlsld) @mySmallLocalDynamicTLSv1
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 1
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r6, r3, r6
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r7, r3, r7
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r8, r3, r8
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r9, r3, r9
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stwux r4, r3, r5
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r4, 24(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 2
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r3, 320(r6)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r3, 324(r7)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 88
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r4, 328(r8)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r3, 332(r9)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 102
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: StoreArrays1:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C1 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r7, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r8, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r9, L..C4 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r7, L..C2 at l(r7)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r8, L..C3 at l(r8)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r9, L..C4 at l(r9)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r5, L..C1 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C5 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 1
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r6, L..C5 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r7, r3, r7
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r8, r3, r8
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r9, r3, r9
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r6, r3, r6
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stwux r4, r3, r5
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r4, 24(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 2
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r3, 320(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r3, 324(r7)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 88
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r4, 328(r8)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r3, 332(r9)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 102
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalDynamicTLSv1)
+  store i32 1, ptr %0, align 4
+  %arrayidx1 = getelementptr inbounds [8187 x i32], ptr %0, i64 0, i64 6
+  store i32 4, ptr %arrayidx1, align 4
+  %1 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalDynamicTLS2)
+  %arrayidx2 = getelementptr inbounds [4000 x i32], ptr %1, i64 0, i64 80
+  store i32 2, ptr %arrayidx2, align 4
+  %2 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalDynamicTLS3)
+  %arrayidx3 = getelementptr inbounds [4000 x i32], ptr %2, i64 0, i64 81
+  store i32 3, ptr %arrayidx3, align 4
+  %3 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalDynamicTLS4)
+  %arrayidx4 = getelementptr inbounds [4000 x i32], ptr %3, i64 0, i64 82
+  store i32 4, ptr %arrayidx4, align 4
+  %4 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalDynamicTLS5)
+  %arrayidx5 = getelementptr inbounds [4000 x i32], ptr %4, i64 0, i64 83
+  store i32 88, ptr %arrayidx5, align 4
+  %5 = load i32, ptr %0, align 4
+  %6 = load i32, ptr %arrayidx1, align 4
+  %7 = load i32, ptr %arrayidx2, align 4
+  %8 = load i32, ptr %arrayidx3, align 4
+  %9 = load i32, ptr %arrayidx4, align 4
+  %add = add i32 %5, 88
+  %add9 = add i32 %add, %6
+  %add11 = add i32 %add9, %7
+  %add13 = add i32 %add11, %8
+  %add15 = add i32 %add13, %9
+  ret i32 %add15
+}
+
+; Example of one access using the regular local-dynamic access from the TOC.
+define signext i32 @StoreArrays2() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: StoreArrays2:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r6, L..C1(r2) # target-flags(ppc-tlsld) @mySmallLocalDynamicTLS2
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r7, L..C2(r2) # target-flags(ppc-tlsld) @mySmallLocalDynamicTLS3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r8, L..C3(r2) # target-flags(ppc-tlsld) @mySmallLocalDynamicTLS4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r9, L..C4(r2) # target-flags(ppc-tlsld) @mySmallLocalDynamicTLS5
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r5, L..C6(r2) # target-flags(ppc-tlsld) @mySmallLocalDynamicTLSv2
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 1
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r6, r3, r6
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r7, r3, r7
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r8, r3, r8
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r9, r3, r9
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stwux r4, r3, r5
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r4, 24(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 2
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r3, 320(r6)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r3, 324(r7)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 88
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r4, 328(r8)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r3, 332(r9)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 102
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: StoreArrays2:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C6 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r7, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r8, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r9, L..C4 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r7, L..C2 at l(r7)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r8, L..C3 at l(r8)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r9, L..C4 at l(r9)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r5, L..C6 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C5 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 1
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r6, L..C5 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r7, r3, r7
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r8, r3, r8
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r9, r3, r9
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r6, r3, r6
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stwux r4, r3, r5
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r4, 24(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 2
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r3, 320(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r3, 324(r7)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 88
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r4, 328(r8)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r3, 332(r9)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 102
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalDynamicTLSv2)
+  store i32 1, ptr %0, align 4
+  %arrayidx1 = getelementptr inbounds [9000 x i32], ptr %0, i64 0, i64 6
+  store i32 4, ptr %arrayidx1, align 4
+  %1 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalDynamicTLS2)
+  %arrayidx2 = getelementptr inbounds [4000 x i32], ptr %1, i64 0, i64 80
+  store i32 2, ptr %arrayidx2, align 4
+  %2 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalDynamicTLS3)
+  %arrayidx3 = getelementptr inbounds [4000 x i32], ptr %2, i64 0, i64 81
+  store i32 3, ptr %arrayidx3, align 4
+  %3 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalDynamicTLS4)
+  %arrayidx4 = getelementptr inbounds [4000 x i32], ptr %3, i64 0, i64 82
+  store i32 4, ptr %arrayidx4, align 4
+  %4 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalDynamicTLS5)
+  %arrayidx5 = getelementptr inbounds [4000 x i32], ptr %4, i64 0, i64 83
+  store i32 88, ptr %arrayidx5, align 4
+  %5 = load i32, ptr %0, align 4
+  %6 = load i32, ptr %arrayidx1, align 4
+  %7 = load i32, ptr %arrayidx2, align 4
+  %8 = load i32, ptr %arrayidx3, align 4
+  %9 = load i32, ptr %arrayidx4, align 4
+  %add = add i32 %5, 88
+  %add9 = add i32 %add, %6
+  %add11 = add i32 %add9, %7
+  %add13 = add i32 %add11, %8
+  %add15 = add i32 %add13, %9
+  ret i32 %add15
+}
+
+; DIS:      file format aix5coff64-rs6000
+; DIS:      Disassembly of section .text:
+; DIS:      0000000000000000 (idx: [[#NFA+5]]) .StoreArrays1:
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                mflr 0
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                stdu 1, -48(1)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+15]]) _$TLSML[TC]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 6, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+17]]) mySmallLocalDynamicTLSv1[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                std 0, 64(1)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 7, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+19]]) mySmallLocalDynamicTLS3[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 3, 0(3)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+15]]) _$TLSML[TC]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 8, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+21]]) mySmallLocalDynamicTLS4[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 9, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+23]]) mySmallLocalDynamicTLS5[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 7, 16(7)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+19]]) mySmallLocalDynamicTLS3[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 8, 24(8)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+21]]) mySmallLocalDynamicTLS4[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 9, 32(9)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+23]]) mySmallLocalDynamicTLS5[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                bla 0x0
+; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA              (idx: [[#NFA+1]]) .__tls_get_mod[PR]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 5, 8(6)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+17]]) mySmallLocalDynamicTLSv1[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 6, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+25]]) mySmallLocalDynamicTLS2[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                li 4, 1
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 6, 40(6)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+25]]) mySmallLocalDynamicTLS2[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                add 7, 3, 7
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                add 8, 3, 8
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                add 9, 3, 9
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                add 6, 3, 6
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                stwux 4, 3, 5
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                li 4, 4
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                stw 4, 24(3)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                li 3, 2
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                stw 3, 320(6)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                li 3, 3
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                stw 3, 324(7)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                li 3, 88
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                stw 4, 328(8)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                stw 3, 332(9)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                li 3, 102
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addi 1, 1, 48
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 0, 16(1)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                mtlr 0
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                blr
+
+; DIS:      0000000000000090 (idx: [[#NFA+7]]) .StoreArrays2:
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                mflr 0
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                stdu 1, -48(1)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+15]]) _$TLSML[TC]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 6, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+27]]) mySmallLocalDynamicTLSv2[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                std 0, 64(1)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 7, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+19]]) mySmallLocalDynamicTLS3[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 3, 0(3)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+15]]) _$TLSML[TC]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 8, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+21]]) mySmallLocalDynamicTLS4[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 9, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+23]]) mySmallLocalDynamicTLS5[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 7, 16(7)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+19]]) mySmallLocalDynamicTLS3[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 8, 24(8)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+21]]) mySmallLocalDynamicTLS4[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 9, 32(9)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+23]]) mySmallLocalDynamicTLS5[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                bla 0x0
+; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA              (idx: [[#NFA+1]]) .__tls_get_mod[PR]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 5, 48(6)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+27]]) mySmallLocalDynamicTLSv2[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 6, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU	        (idx: [[#NFA+25]]) mySmallLocalDynamicTLS2[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                li 4, 1
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 6, 40(6)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+25]]) mySmallLocalDynamicTLS2[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                add 7, 3, 7
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                add 8, 3, 8
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                add 9, 3, 9
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                add 6, 3, 6
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                stwux 4, 3, 5
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                li 4, 4
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                stw 4, 24(3)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                li 3, 2
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                stw 3, 320(6)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                li 3, 3
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                stw 3, 324(7)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                li 3, 88
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                stw 4, 328(8)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                stw 3, 332(9)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                li 3, 102
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addi 1, 1, 48
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 0, 16(1)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                mtlr 0
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                blr
+
+
+; DIS:      Disassembly of section .data:
+; DIS:      0000000000000120 (idx: [[#NFA+9]]) StoreArrays1[DS]:
+; DIS-NEXT:      120: 00 00 00 00
+; DIS-NEXT: 0000000000000120:  R_POS	(idx: [[#NFA+5]]) .StoreArrays1
+; DIS-NEXT:      124: 00 00 00 00
+; DIS-NEXT:      128: 00 00 00 00
+; DIS-NEXT: 0000000000000128:  R_POS        (idx: [[#NFA+13]]) TOC[TC0]
+; DIS-NEXT:      12c: 00 00 01 50
+
+; DIS:      0000000000000138 (idx: [[#NFA+11]]) StoreArrays2[DS]:
+; DIS-NEXT:      138: 00 00 00 00
+; DIS-NEXT: 0000000000000138:  R_POS	(idx: [[#NFA+7]]) .StoreArrays2
+; DIS-NEXT:      13c: 00 00 00 90
+; DIS-NEXT:      140: 00 00 00 00
+; DIS-NEXT: 0000000000000140:  R_POS        (idx: [[#NFA+13]]) TOC[TC0]
+; DIS-NEXT:      144: 00 00 01 50
+
+; DIS:      0000000000000180 (idx: [[#NFA+27]]) mySmallLocalDynamicTLSv2[TE]:
+; DIS-NEXT:      180: 00 00 00 00
+; DIS-NEXT: 0000000000000180:  R_TLS_LD     (idx: [[#NFA+39]]) mySmallLocalDynamicTLSv2[TL]
+; DIS-NEXT:      184: 00 01 79 ec
+
+; DIS:      Disassembly of section .tdata:
+; DIS:      0000000000000000 (idx: [[#NFA+29]]) mySmallLocalDynamicTLSv1[TL]:
+; DIS:      0000000000007fec (idx: [[#NFA+31]]) mySmallLocalDynamicTLS2[TL]:
+; DIS:      000000000000be6c (idx: [[#NFA+33]]) mySmallLocalDynamicTLS3[TL]:
+; DIS:      000000000000fcec (idx: [[#NFA+35]]) mySmallLocalDynamicTLS4[TL]:
+; DIS:      0000000000013b6c (idx: [[#NFA+37]]) mySmallLocalDynamicTLS5[TL]:
+; DIS:      00000000000179ec (idx: [[#NFA+39]]) mySmallLocalDynamicTLSv2[TL]:
diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-longlong.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-longlong.ll
new file mode 100644
index 00000000000000..f9157124fb3ad9
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-longlong.ll
@@ -0,0 +1,368 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff < %s \
+; RUN:      | FileCheck %s --check-prefix=SMALL-LOCAL-DYNAMIC-SMALLCM64
+; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff --code-model=large \
+; RUN:      < %s | FileCheck %s \
+; RUN:      --check-prefix=SMALL-LOCAL-DYNAMIC-LARGECM64
+
+ at ThreadLocalVarInit = thread_local(localdynamic) global i64 1, align 8
+ at VarInit = local_unnamed_addr global i64 87, align 8
+ at IThreadLocalVarInit = internal thread_local(localdynamic) global i64 1, align 8
+declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
+%struct.anon = type { i64 }
+ at ThreadLocalStruct = thread_local(localdynamic) global %struct.anon zeroinitializer, align 1
+ at d = thread_local(localdynamic) global [87 x i64] zeroinitializer, align 8
+
+define nonnull ptr @AddrTest1() local_unnamed_addr #0 {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: AddrTest1:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C1(r2) # target-flags(ppc-tlsld) @d
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: AddrTest1:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C1 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C1 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @d)
+  ret ptr %0
+}
+
+define i64 @testUnaligned() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: testUnaligned:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @ThreadLocalStruct
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ldx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: testUnaligned:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ldx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @ThreadLocalStruct)
+  %x = getelementptr inbounds %struct.anon, ptr %0, i32 0, i32 0
+  %1 = load i64, ptr %x, align 1
+  ret i64 %1
+}
+
+define void @storeITLInit(i64 noundef %x) {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: storeITLInit:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mr r6, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdx r6, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: storeITLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mr r6, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r7, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r7)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdx r6, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit)
+  store i64 %x, ptr %0, align 8
+  ret void
+}
+
+define void @storeTLInit(i64 noundef %x) {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: storeTLInit:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mr r6, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C4(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdx r6, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: storeTLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mr r6, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r7, L..C4 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C4 at l(r7)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdx r6, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit)
+  store i64 %x, ptr %0, align 8
+  ret void
+}
+
+define i64 @loadITLInit() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadITLInit:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ldx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadITLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ldx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit)
+  %1 = load i64, ptr %0, align 8
+  ret i64 %1
+}
+
+define i64 @loadITLInit2() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadITLInit2:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ldx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C5(r2) # @VarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, 0(r4)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r4, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadITLInit2:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ldx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r4, L..C5 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C5 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, 0(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r4, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit)
+  %1 = load i64, ptr %0, align 8
+  %2 = load i64, ptr @VarInit, align 8
+  %add = add nsw i64 %2, %1
+  ret i64 %add
+}
+
+define i64 @loadTLInit() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadTLInit:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C4(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ldx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadTLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C4 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C4 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ldx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit)
+  %1 = load i64, ptr %0, align 8
+  ret i64 %1
+}
+
+define i64 @loadTLInit2() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadTLInit2:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C4(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ldx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C5(r2) # @VarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, 0(r4)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r4, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadTLInit2:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C4 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C4 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ldx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r4, L..C5 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C5 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, 0(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r4, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit)
+  %1 = load i64, ptr %0, align 8
+  %2 = load i64, ptr @VarInit, align 8
+  %add = add nsw i64 %2, %1
+  ret i64 %add
+}
+
+define void @loadStore1(i64 noundef %x) {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadStore1:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ldx r5, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r5, r5, 9
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdx r5, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadStore1:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ldx r5, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r5, r5, 9
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdx r5, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit)
+  %1 = load i64, ptr %0, align 8
+  %add = add nsw i64 %1, 9
+  store i64 %add, ptr %0, align 8
+  ret void
+}
diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-short.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-short.ll
new file mode 100644
index 00000000000000..7482ca430c9a5b
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-short.ll
@@ -0,0 +1,336 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff < %s \
+; RUN:      | FileCheck %s --check-prefix=SMALL-LOCAL-DYNAMIC-SMALLCM64
+; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff --code-model=large \
+; RUN:      < %s | FileCheck %s \
+; RUN:      --check-prefix=SMALL-LOCAL-DYNAMIC-LARGECM64
+
+ at ThreadLocalVarInit = thread_local(localdynamic) global i16 1, align 2
+ at VarInit = local_unnamed_addr global i16 87, align 2
+ at IThreadLocalVarInit = internal thread_local(localdynamic) global i16 1, align 2
+declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
+ at b = thread_local(localdynamic) global [87 x i16] zeroinitializer, align 2
+
+define nonnull ptr @AddrTest1() local_unnamed_addr #0 {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: AddrTest1:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C1(r2) # target-flags(ppc-tlsld) @b
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r3, r3, 4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: AddrTest1:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C1 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C1 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r3, r3, 4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @b)
+  %arrayidx = getelementptr inbounds [87 x i16], ptr %0, i64 0, i64 2
+  ret ptr %arrayidx
+}
+
+define void @storeITLInit(i16 noundef signext %x) {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: storeITLInit:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mr r6, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    sthx r6, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: storeITLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mr r6, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r7, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r7)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    sthx r6, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @IThreadLocalVarInit)
+  store i16 %x, ptr %0, align 2
+  ret void
+}
+
+define void @storeTLInit(i16 noundef signext %x) {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: storeTLInit:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mr r6, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    sthx r6, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: storeTLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mr r6, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r7, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r7)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    sthx r6, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @ThreadLocalVarInit)
+  store i16 %x, ptr %0, align 2
+  ret void
+}
+
+define signext i16 @loadITLInit() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadITLInit:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lhax r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadITLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lhax r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @IThreadLocalVarInit)
+  %1 = load i16, ptr %0, align 2
+  ret i16 %1
+}
+
+define signext i16 @loadITLInit2() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadITLInit2:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lhzx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C4(r2) # @VarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lhz r4, 0(r4)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r4, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    extsh r3, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadITLInit2:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lhzx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r4, L..C4 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C4 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lhz r4, 0(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r4, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    extsh r3, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @IThreadLocalVarInit)
+  %1 = load i16, ptr %0, align 2
+  %2 = load i16, ptr @VarInit, align 2
+  %add = add i16 %2, %1
+  ret i16 %add
+}
+
+define signext i16 @loadTLInit() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadTLInit:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lhax r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadTLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lhax r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @ThreadLocalVarInit)
+  %1 = load i16, ptr %0, align 2
+  ret i16 %1
+}
+
+define signext i16 @loadTLInit2() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadTLInit2:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lhzx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C4(r2) # @VarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lhz r4, 0(r4)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r4, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    extsh r3, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadTLInit2:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lhzx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r4, L..C4 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C4 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lhz r4, 0(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r4, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    extsh r3, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @ThreadLocalVarInit)
+  %1 = load i16, ptr %0, align 2
+  %2 = load i16, ptr @VarInit, align 2
+  %add = add i16 %2, %1
+  ret i16 %add
+}
+
+define void @loadStore1(i16 noundef signext %x) {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadStore1:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lhzx r5, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r5, r5, 9
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    sthx r5, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadStore1:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lhzx r5, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r5, r5, 9
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    sthx r5, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @IThreadLocalVarInit)
+  %1 = load i16, ptr %0, align 2
+  %add = add i16 %1, 9
+  store i16 %add, ptr %0, align 2
+  ret void
+}
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-ld-ldst-O0.ll b/llvm/test/CodeGen/PowerPC/aix-tls-ld-ldst-O0.ll
new file mode 100644
index 00000000000000..53aa47072de810
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-ld-ldst-O0.ll
@@ -0,0 +1,1031 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc  -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff < %s | FileCheck %s \
+; RUN:      --check-prefix=SMALL64-O0
+; RUN: llc  -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff --code-model=large < %s \
+; RUN:      | FileCheck %s --check-prefix=LARGE64-O0
+; RUN: llc  -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc-ibm-aix-xcoff < %s | FileCheck %s \
+; RUN:      --check-prefix=SMALL32-O0
+; RUN: llc  -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc-ibm-aix-xcoff --code-model=large < %s \
+; RUN:      | FileCheck %s --check-prefix=LARGE32-O0
+; RUN: llc  -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff < %s \
+; RUN:      | FileCheck %s --check-prefix=SMALL-LOCAL-DYNAMIC-SMALLCM64
+; RUN: llc  -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff --code-model=large \
+; RUN:      < %s | FileCheck %s \
+; RUN:      --check-prefix=SMALL-LOCAL-DYNAMIC-LARGECM64
+
+ at TLInt = internal thread_local(localdynamic) global i32 0, align 4
+ at TLLongLong = internal thread_local(localdynamic) global i64 0, align 8
+ at TLDouble = internal thread_local(localdynamic) global double 0.000000e+00, align 8
+ at TLFloat = internal thread_local(localdynamic) global float 0.000000e+00, align 4
+declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
+
+define void @storeInt(i32 noundef %x) {
+; SMALL64-O0-LABEL: storeInt:
+; SMALL64-O0:       # %bb.0: # %entry
+; SMALL64-O0-NEXT:    mflr r0
+; SMALL64-O0-NEXT:    stdu r1, -64(r1)
+; SMALL64-O0-NEXT:    std r0, 80(r1)
+; SMALL64-O0-NEXT:    # kill: def $r3 killed $r3 killed $x3
+; SMALL64-O0-NEXT:    stw r3, 60(r1) # 4-byte Folded Spill
+; SMALL64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL64-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL64-O0-NEXT:    mr r4, r3
+; SMALL64-O0-NEXT:    lwz r3, 60(r1) # 4-byte Folded Reload
+; SMALL64-O0-NEXT:    ld r5, L..C1(r2) # target-flags(ppc-tlsld) @TLInt
+; SMALL64-O0-NEXT:    add r4, r4, r5
+; SMALL64-O0-NEXT:    stw r3, 0(r4)
+; SMALL64-O0-NEXT:    addi r1, r1, 64
+; SMALL64-O0-NEXT:    ld r0, 16(r1)
+; SMALL64-O0-NEXT:    mtlr r0
+; SMALL64-O0-NEXT:    blr
+;
+; LARGE64-O0-LABEL: storeInt:
+; LARGE64-O0:       # %bb.0: # %entry
+; LARGE64-O0-NEXT:    mflr r0
+; LARGE64-O0-NEXT:    stdu r1, -64(r1)
+; LARGE64-O0-NEXT:    std r0, 80(r1)
+; LARGE64-O0-NEXT:    # kill: def $r3 killed $r3 killed $x3
+; LARGE64-O0-NEXT:    stw r3, 60(r1) # 4-byte Folded Spill
+; LARGE64-O0-NEXT:    addis r3, L..C0 at u(r2)
+; LARGE64-O0-NEXT:    std r3, 48(r1) # 8-byte Folded Spill
+; LARGE64-O0-NEXT:    addis r3, L..C1 at u(r2)
+; LARGE64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; LARGE64-O0-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-O0-NEXT:    ld r5, 48(r1) # 8-byte Folded Reload
+; LARGE64-O0-NEXT:    mr r4, r3
+; LARGE64-O0-NEXT:    lwz r3, 60(r1) # 4-byte Folded Reload
+; LARGE64-O0-NEXT:    ld r5, L..C0 at l(r5)
+; LARGE64-O0-NEXT:    add r4, r4, r5
+; LARGE64-O0-NEXT:    stw r3, 0(r4)
+; LARGE64-O0-NEXT:    addi r1, r1, 64
+; LARGE64-O0-NEXT:    ld r0, 16(r1)
+; LARGE64-O0-NEXT:    mtlr r0
+; LARGE64-O0-NEXT:    blr
+;
+; SMALL32-O0-LABEL: storeInt:
+; SMALL32-O0:       # %bb.0: # %entry
+; SMALL32-O0-NEXT:    mflr r0
+; SMALL32-O0-NEXT:    stwu r1, -32(r1)
+; SMALL32-O0-NEXT:    stw r0, 40(r1)
+; SMALL32-O0-NEXT:    stw r3, 28(r1) # 4-byte Folded Spill
+; SMALL32-O0-NEXT:    lwz r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL32-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL32-O0-NEXT:    mr r4, r3
+; SMALL32-O0-NEXT:    lwz r3, 28(r1) # 4-byte Folded Reload
+; SMALL32-O0-NEXT:    lwz r5, L..C1(r2) # target-flags(ppc-tlsld) @TLInt
+; SMALL32-O0-NEXT:    stwx r3, r4, r5
+; SMALL32-O0-NEXT:    addi r1, r1, 32
+; SMALL32-O0-NEXT:    lwz r0, 8(r1)
+; SMALL32-O0-NEXT:    mtlr r0
+; SMALL32-O0-NEXT:    blr
+;
+; LARGE32-O0-LABEL: storeInt:
+; LARGE32-O0:       # %bb.0: # %entry
+; LARGE32-O0-NEXT:    mflr r0
+; LARGE32-O0-NEXT:    stwu r1, -32(r1)
+; LARGE32-O0-NEXT:    stw r0, 40(r1)
+; LARGE32-O0-NEXT:    stw r3, 28(r1) # 4-byte Folded Spill
+; LARGE32-O0-NEXT:    addis r3, L..C0 at u(r2)
+; LARGE32-O0-NEXT:    stw r3, 24(r1) # 4-byte Folded Spill
+; LARGE32-O0-NEXT:    addis r3, L..C1 at u(r2)
+; LARGE32-O0-NEXT:    lwz r3, L..C1 at l(r3)
+; LARGE32-O0-NEXT:    bla .__tls_get_mod[PR]
+; LARGE32-O0-NEXT:    lwz r5, 24(r1) # 4-byte Folded Reload
+; LARGE32-O0-NEXT:    mr r4, r3
+; LARGE32-O0-NEXT:    lwz r3, 28(r1) # 4-byte Folded Reload
+; LARGE32-O0-NEXT:    lwz r5, L..C0 at l(r5)
+; LARGE32-O0-NEXT:    stwx r3, r4, r5
+; LARGE32-O0-NEXT:    addi r1, r1, 32
+; LARGE32-O0-NEXT:    lwz r0, 8(r1)
+; LARGE32-O0-NEXT:    mtlr r0
+; LARGE32-O0-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: storeInt:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 80(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    # kill: def $r3 killed $r3 killed $x3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r3, 60(r1) # 4-byte Folded Spill
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mr r4, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwz r3, 60(r1) # 4-byte Folded Reload
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r5, L..C1(r2) # target-flags(ppc-tlsld) @TLInt
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r4, r4, r5
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r3, 0(r4)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: storeInt:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 80(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    # kill: def $r3 killed $r3 killed $x3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r3, 60(r1) # 4-byte Folded Spill
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r3, 48(r1) # 8-byte Folded Spill
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C1 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C1 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r5, 48(r1) # 8-byte Folded Reload
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mr r4, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwz r3, 60(r1) # 4-byte Folded Reload
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r5, L..C0 at l(r5)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r4, r4, r5
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r3, 0(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TLInt)
+  store i32 %x, ptr %0, align 4
+  ret void
+}
+
+define void @storeLongLong(i64 noundef %x) {
+; SMALL64-O0-LABEL: storeLongLong:
+; SMALL64-O0:       # %bb.0: # %entry
+; SMALL64-O0-NEXT:    mflr r0
+; SMALL64-O0-NEXT:    stdu r1, -64(r1)
+; SMALL64-O0-NEXT:    std r0, 80(r1)
+; SMALL64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; SMALL64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL64-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL64-O0-NEXT:    mr r4, r3
+; SMALL64-O0-NEXT:    ld r3, 56(r1) # 8-byte Folded Reload
+; SMALL64-O0-NEXT:    ld r5, L..C2(r2) # target-flags(ppc-tlsld) @TLLongLong
+; SMALL64-O0-NEXT:    add r4, r4, r5
+; SMALL64-O0-NEXT:    std r3, 0(r4)
+; SMALL64-O0-NEXT:    addi r1, r1, 64
+; SMALL64-O0-NEXT:    ld r0, 16(r1)
+; SMALL64-O0-NEXT:    mtlr r0
+; SMALL64-O0-NEXT:    blr
+;
+; LARGE64-O0-LABEL: storeLongLong:
+; LARGE64-O0:       # %bb.0: # %entry
+; LARGE64-O0-NEXT:    mflr r0
+; LARGE64-O0-NEXT:    stdu r1, -64(r1)
+; LARGE64-O0-NEXT:    std r0, 80(r1)
+; LARGE64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; LARGE64-O0-NEXT:    addis r3, L..C2 at u(r2)
+; LARGE64-O0-NEXT:    std r3, 48(r1) # 8-byte Folded Spill
+; LARGE64-O0-NEXT:    addis r3, L..C1 at u(r2)
+; LARGE64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; LARGE64-O0-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-O0-NEXT:    ld r5, 48(r1) # 8-byte Folded Reload
+; LARGE64-O0-NEXT:    mr r4, r3
+; LARGE64-O0-NEXT:    ld r3, 56(r1) # 8-byte Folded Reload
+; LARGE64-O0-NEXT:    ld r5, L..C2 at l(r5)
+; LARGE64-O0-NEXT:    add r4, r4, r5
+; LARGE64-O0-NEXT:    std r3, 0(r4)
+; LARGE64-O0-NEXT:    addi r1, r1, 64
+; LARGE64-O0-NEXT:    ld r0, 16(r1)
+; LARGE64-O0-NEXT:    mtlr r0
+; LARGE64-O0-NEXT:    blr
+;
+; SMALL32-O0-LABEL: storeLongLong:
+; SMALL32-O0:       # %bb.0: # %entry
+; SMALL32-O0-NEXT:    mflr r0
+; SMALL32-O0-NEXT:    stwu r1, -32(r1)
+; SMALL32-O0-NEXT:    stw r0, 40(r1)
+; SMALL32-O0-NEXT:    stw r4, 28(r1) # 4-byte Folded Spill
+; SMALL32-O0-NEXT:    stw r3, 24(r1) # 4-byte Folded Spill
+; SMALL32-O0-NEXT:    lwz r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL32-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL32-O0-NEXT:    lwz r5, 24(r1) # 4-byte Folded Reload
+; SMALL32-O0-NEXT:    lwz r4, 28(r1) # 4-byte Folded Reload
+; SMALL32-O0-NEXT:    lwz r6, L..C2(r2) # target-flags(ppc-tlsld) @TLLongLong
+; SMALL32-O0-NEXT:    stwux r5, r3, r6
+; SMALL32-O0-NEXT:    stw r4, 4(r3)
+; SMALL32-O0-NEXT:    addi r1, r1, 32
+; SMALL32-O0-NEXT:    lwz r0, 8(r1)
+; SMALL32-O0-NEXT:    mtlr r0
+; SMALL32-O0-NEXT:    blr
+;
+; LARGE32-O0-LABEL: storeLongLong:
+; LARGE32-O0:       # %bb.0: # %entry
+; LARGE32-O0-NEXT:    mflr r0
+; LARGE32-O0-NEXT:    stwu r1, -32(r1)
+; LARGE32-O0-NEXT:    stw r0, 40(r1)
+; LARGE32-O0-NEXT:    stw r4, 28(r1) # 4-byte Folded Spill
+; LARGE32-O0-NEXT:    stw r3, 24(r1) # 4-byte Folded Spill
+; LARGE32-O0-NEXT:    addis r6, L..C2 at u(r2)
+; LARGE32-O0-NEXT:    addis r3, L..C1 at u(r2)
+; LARGE32-O0-NEXT:    lwz r3, L..C1 at l(r3)
+; LARGE32-O0-NEXT:    bla .__tls_get_mod[PR]
+; LARGE32-O0-NEXT:    lwz r5, 24(r1) # 4-byte Folded Reload
+; LARGE32-O0-NEXT:    lwz r4, 28(r1) # 4-byte Folded Reload
+; LARGE32-O0-NEXT:    lwz r6, L..C2 at l(r6)
+; LARGE32-O0-NEXT:    stwux r5, r3, r6
+; LARGE32-O0-NEXT:    stw r4, 4(r3)
+; LARGE32-O0-NEXT:    addi r1, r1, 32
+; LARGE32-O0-NEXT:    lwz r0, 8(r1)
+; LARGE32-O0-NEXT:    mtlr r0
+; LARGE32-O0-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: storeLongLong:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 80(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mr r4, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, 56(r1) # 8-byte Folded Reload
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r5, L..C2(r2) # target-flags(ppc-tlsld) @TLLongLong
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r4, r4, r5
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r3, 0(r4)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: storeLongLong:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 80(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r3, 48(r1) # 8-byte Folded Spill
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C1 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C1 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r5, 48(r1) # 8-byte Folded Reload
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mr r4, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, 56(r1) # 8-byte Folded Reload
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r5, L..C2 at l(r5)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r4, r4, r5
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r3, 0(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @TLLongLong)
+  store i64 %x, ptr %0, align 8
+  ret void
+}
+
+define void @storeDouble(double noundef %x) {
+; SMALL64-O0-LABEL: storeDouble:
+; SMALL64-O0:       # %bb.0: # %entry
+; SMALL64-O0-NEXT:    mflr r0
+; SMALL64-O0-NEXT:    stdu r1, -48(r1)
+; SMALL64-O0-NEXT:    std r0, 64(r1)
+; SMALL64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL64-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL64-O0-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @TLDouble
+; SMALL64-O0-NEXT:    add r3, r3, r4
+; SMALL64-O0-NEXT:    stxsdx f1, 0, r3
+; SMALL64-O0-NEXT:    addi r1, r1, 48
+; SMALL64-O0-NEXT:    ld r0, 16(r1)
+; SMALL64-O0-NEXT:    mtlr r0
+; SMALL64-O0-NEXT:    blr
+;
+; LARGE64-O0-LABEL: storeDouble:
+; LARGE64-O0:       # %bb.0: # %entry
+; LARGE64-O0-NEXT:    mflr r0
+; LARGE64-O0-NEXT:    stdu r1, -64(r1)
+; LARGE64-O0-NEXT:    std r0, 80(r1)
+; LARGE64-O0-NEXT:    addis r3, L..C3 at u(r2)
+; LARGE64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; LARGE64-O0-NEXT:    addis r3, L..C1 at u(r2)
+; LARGE64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; LARGE64-O0-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
+; LARGE64-O0-NEXT:    ld r4, L..C3 at l(r4)
+; LARGE64-O0-NEXT:    add r3, r3, r4
+; LARGE64-O0-NEXT:    stxsdx f1, 0, r3
+; LARGE64-O0-NEXT:    addi r1, r1, 64
+; LARGE64-O0-NEXT:    ld r0, 16(r1)
+; LARGE64-O0-NEXT:    mtlr r0
+; LARGE64-O0-NEXT:    blr
+;
+; SMALL32-O0-LABEL: storeDouble:
+; SMALL32-O0:       # %bb.0: # %entry
+; SMALL32-O0-NEXT:    mflr r0
+; SMALL32-O0-NEXT:    stwu r1, -32(r1)
+; SMALL32-O0-NEXT:    stw r0, 40(r1)
+; SMALL32-O0-NEXT:    lwz r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL32-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL32-O0-NEXT:    lwz r4, L..C3(r2) # target-flags(ppc-tlsld) @TLDouble
+; SMALL32-O0-NEXT:    stfdx f1, r3, r4
+; SMALL32-O0-NEXT:    addi r1, r1, 32
+; SMALL32-O0-NEXT:    lwz r0, 8(r1)
+; SMALL32-O0-NEXT:    mtlr r0
+; SMALL32-O0-NEXT:    blr
+;
+; LARGE32-O0-LABEL: storeDouble:
+; LARGE32-O0:       # %bb.0: # %entry
+; LARGE32-O0-NEXT:    mflr r0
+; LARGE32-O0-NEXT:    stwu r1, -32(r1)
+; LARGE32-O0-NEXT:    stw r0, 40(r1)
+; LARGE32-O0-NEXT:    addis r3, L..C3 at u(r2)
+; LARGE32-O0-NEXT:    stw r3, 28(r1) # 4-byte Folded Spill
+; LARGE32-O0-NEXT:    addis r3, L..C1 at u(r2)
+; LARGE32-O0-NEXT:    lwz r3, L..C1 at l(r3)
+; LARGE32-O0-NEXT:    bla .__tls_get_mod[PR]
+; LARGE32-O0-NEXT:    lwz r4, 28(r1) # 4-byte Folded Reload
+; LARGE32-O0-NEXT:    lwz r4, L..C3 at l(r4)
+; LARGE32-O0-NEXT:    stfdx f1, r3, r4
+; LARGE32-O0-NEXT:    addi r1, r1, 32
+; LARGE32-O0-NEXT:    lwz r0, 8(r1)
+; LARGE32-O0-NEXT:    mtlr r0
+; LARGE32-O0-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: storeDouble:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @TLDouble
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stxsdx f1, 0, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: storeDouble:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 80(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C1 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C1 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stxsdx f1, 0, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @TLDouble)
+  store double %x, ptr %0, align 8
+  ret void
+}
+
+define void @storeFloat(float noundef %x) {
+; SMALL64-O0-LABEL: storeFloat:
+; SMALL64-O0:       # %bb.0: # %entry
+; SMALL64-O0-NEXT:    mflr r0
+; SMALL64-O0-NEXT:    stdu r1, -48(r1)
+; SMALL64-O0-NEXT:    std r0, 64(r1)
+; SMALL64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL64-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL64-O0-NEXT:    ld r4, L..C4(r2) # target-flags(ppc-tlsld) @TLFloat
+; SMALL64-O0-NEXT:    add r3, r3, r4
+; SMALL64-O0-NEXT:    stfs f1, 0(r3)
+; SMALL64-O0-NEXT:    addi r1, r1, 48
+; SMALL64-O0-NEXT:    ld r0, 16(r1)
+; SMALL64-O0-NEXT:    mtlr r0
+; SMALL64-O0-NEXT:    blr
+;
+; LARGE64-O0-LABEL: storeFloat:
+; LARGE64-O0:       # %bb.0: # %entry
+; LARGE64-O0-NEXT:    mflr r0
+; LARGE64-O0-NEXT:    stdu r1, -64(r1)
+; LARGE64-O0-NEXT:    std r0, 80(r1)
+; LARGE64-O0-NEXT:    addis r3, L..C4 at u(r2)
+; LARGE64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; LARGE64-O0-NEXT:    addis r3, L..C1 at u(r2)
+; LARGE64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; LARGE64-O0-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
+; LARGE64-O0-NEXT:    ld r4, L..C4 at l(r4)
+; LARGE64-O0-NEXT:    add r3, r3, r4
+; LARGE64-O0-NEXT:    stfs f1, 0(r3)
+; LARGE64-O0-NEXT:    addi r1, r1, 64
+; LARGE64-O0-NEXT:    ld r0, 16(r1)
+; LARGE64-O0-NEXT:    mtlr r0
+; LARGE64-O0-NEXT:    blr
+;
+; SMALL32-O0-LABEL: storeFloat:
+; SMALL32-O0:       # %bb.0: # %entry
+; SMALL32-O0-NEXT:    mflr r0
+; SMALL32-O0-NEXT:    stwu r1, -32(r1)
+; SMALL32-O0-NEXT:    stw r0, 40(r1)
+; SMALL32-O0-NEXT:    lwz r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL32-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL32-O0-NEXT:    lwz r4, L..C4(r2) # target-flags(ppc-tlsld) @TLFloat
+; SMALL32-O0-NEXT:    stfsx f1, r3, r4
+; SMALL32-O0-NEXT:    addi r1, r1, 32
+; SMALL32-O0-NEXT:    lwz r0, 8(r1)
+; SMALL32-O0-NEXT:    mtlr r0
+; SMALL32-O0-NEXT:    blr
+;
+; LARGE32-O0-LABEL: storeFloat:
+; LARGE32-O0:       # %bb.0: # %entry
+; LARGE32-O0-NEXT:    mflr r0
+; LARGE32-O0-NEXT:    stwu r1, -32(r1)
+; LARGE32-O0-NEXT:    stw r0, 40(r1)
+; LARGE32-O0-NEXT:    addis r3, L..C4 at u(r2)
+; LARGE32-O0-NEXT:    stw r3, 28(r1) # 4-byte Folded Spill
+; LARGE32-O0-NEXT:    addis r3, L..C1 at u(r2)
+; LARGE32-O0-NEXT:    lwz r3, L..C1 at l(r3)
+; LARGE32-O0-NEXT:    bla .__tls_get_mod[PR]
+; LARGE32-O0-NEXT:    lwz r4, 28(r1) # 4-byte Folded Reload
+; LARGE32-O0-NEXT:    lwz r4, L..C4 at l(r4)
+; LARGE32-O0-NEXT:    stfsx f1, r3, r4
+; LARGE32-O0-NEXT:    addi r1, r1, 32
+; LARGE32-O0-NEXT:    lwz r0, 8(r1)
+; LARGE32-O0-NEXT:    mtlr r0
+; LARGE32-O0-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: storeFloat:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C4(r2) # target-flags(ppc-tlsld) @TLFloat
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stfs f1, 0(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: storeFloat:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 80(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C4 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C1 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C1 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C4 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stfs f1, 0(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TLFloat)
+  store float %x, ptr %0, align 4
+  ret void
+}
+
+define i32 @loadInt() {
+; SMALL64-O0-LABEL: loadInt:
+; SMALL64-O0:       # %bb.0: # %entry
+; SMALL64-O0-NEXT:    mflr r0
+; SMALL64-O0-NEXT:    stdu r1, -48(r1)
+; SMALL64-O0-NEXT:    std r0, 64(r1)
+; SMALL64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL64-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL64-O0-NEXT:    ld r4, L..C1(r2) # target-flags(ppc-tlsld) @TLInt
+; SMALL64-O0-NEXT:    add r3, r3, r4
+; SMALL64-O0-NEXT:    lwz r3, 0(r3)
+; SMALL64-O0-NEXT:    addi r1, r1, 48
+; SMALL64-O0-NEXT:    ld r0, 16(r1)
+; SMALL64-O0-NEXT:    mtlr r0
+; SMALL64-O0-NEXT:    blr
+;
+; LARGE64-O0-LABEL: loadInt:
+; LARGE64-O0:       # %bb.0: # %entry
+; LARGE64-O0-NEXT:    mflr r0
+; LARGE64-O0-NEXT:    stdu r1, -64(r1)
+; LARGE64-O0-NEXT:    std r0, 80(r1)
+; LARGE64-O0-NEXT:    addis r3, L..C0 at u(r2)
+; LARGE64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; LARGE64-O0-NEXT:    addis r3, L..C1 at u(r2)
+; LARGE64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; LARGE64-O0-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
+; LARGE64-O0-NEXT:    ld r4, L..C0 at l(r4)
+; LARGE64-O0-NEXT:    add r3, r3, r4
+; LARGE64-O0-NEXT:    lwz r3, 0(r3)
+; LARGE64-O0-NEXT:    addi r1, r1, 64
+; LARGE64-O0-NEXT:    ld r0, 16(r1)
+; LARGE64-O0-NEXT:    mtlr r0
+; LARGE64-O0-NEXT:    blr
+;
+; SMALL32-O0-LABEL: loadInt:
+; SMALL32-O0:       # %bb.0: # %entry
+; SMALL32-O0-NEXT:    mflr r0
+; SMALL32-O0-NEXT:    stwu r1, -32(r1)
+; SMALL32-O0-NEXT:    stw r0, 40(r1)
+; SMALL32-O0-NEXT:    lwz r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL32-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL32-O0-NEXT:    lwz r4, L..C1(r2) # target-flags(ppc-tlsld) @TLInt
+; SMALL32-O0-NEXT:    lwzx r3, r3, r4
+; SMALL32-O0-NEXT:    addi r1, r1, 32
+; SMALL32-O0-NEXT:    lwz r0, 8(r1)
+; SMALL32-O0-NEXT:    mtlr r0
+; SMALL32-O0-NEXT:    blr
+;
+; LARGE32-O0-LABEL: loadInt:
+; LARGE32-O0:       # %bb.0: # %entry
+; LARGE32-O0-NEXT:    mflr r0
+; LARGE32-O0-NEXT:    stwu r1, -32(r1)
+; LARGE32-O0-NEXT:    stw r0, 40(r1)
+; LARGE32-O0-NEXT:    addis r3, L..C0 at u(r2)
+; LARGE32-O0-NEXT:    stw r3, 28(r1) # 4-byte Folded Spill
+; LARGE32-O0-NEXT:    addis r3, L..C1 at u(r2)
+; LARGE32-O0-NEXT:    lwz r3, L..C1 at l(r3)
+; LARGE32-O0-NEXT:    bla .__tls_get_mod[PR]
+; LARGE32-O0-NEXT:    lwz r4, 28(r1) # 4-byte Folded Reload
+; LARGE32-O0-NEXT:    lwz r4, L..C0 at l(r4)
+; LARGE32-O0-NEXT:    lwzx r3, r3, r4
+; LARGE32-O0-NEXT:    addi r1, r1, 32
+; LARGE32-O0-NEXT:    lwz r0, 8(r1)
+; LARGE32-O0-NEXT:    mtlr r0
+; LARGE32-O0-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadInt:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C1(r2) # target-flags(ppc-tlsld) @TLInt
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwz r3, 0(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadInt:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 80(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C1 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C1 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C0 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwz r3, 0(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TLInt)
+  %1 = load i32, ptr %0, align 4
+  ret i32 %1
+}
+
+define i32 @loadLongLong() {
+; SMALL64-O0-LABEL: loadLongLong:
+; SMALL64-O0:       # %bb.0: # %entry
+; SMALL64-O0-NEXT:    mflr r0
+; SMALL64-O0-NEXT:    stdu r1, -48(r1)
+; SMALL64-O0-NEXT:    std r0, 64(r1)
+; SMALL64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL64-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL64-O0-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @TLLongLong
+; SMALL64-O0-NEXT:    add r3, r3, r4
+; SMALL64-O0-NEXT:    ld r3, 0(r3)
+; SMALL64-O0-NEXT:    # kill: def $r3 killed $r3 killed $x3
+; SMALL64-O0-NEXT:    clrldi r3, r3, 32
+; SMALL64-O0-NEXT:    addi r1, r1, 48
+; SMALL64-O0-NEXT:    ld r0, 16(r1)
+; SMALL64-O0-NEXT:    mtlr r0
+; SMALL64-O0-NEXT:    blr
+;
+; LARGE64-O0-LABEL: loadLongLong:
+; LARGE64-O0:       # %bb.0: # %entry
+; LARGE64-O0-NEXT:    mflr r0
+; LARGE64-O0-NEXT:    stdu r1, -64(r1)
+; LARGE64-O0-NEXT:    std r0, 80(r1)
+; LARGE64-O0-NEXT:    addis r3, L..C2 at u(r2)
+; LARGE64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; LARGE64-O0-NEXT:    addis r3, L..C1 at u(r2)
+; LARGE64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; LARGE64-O0-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
+; LARGE64-O0-NEXT:    ld r4, L..C2 at l(r4)
+; LARGE64-O0-NEXT:    add r3, r3, r4
+; LARGE64-O0-NEXT:    ld r3, 0(r3)
+; LARGE64-O0-NEXT:    # kill: def $r3 killed $r3 killed $x3
+; LARGE64-O0-NEXT:    clrldi r3, r3, 32
+; LARGE64-O0-NEXT:    addi r1, r1, 64
+; LARGE64-O0-NEXT:    ld r0, 16(r1)
+; LARGE64-O0-NEXT:    mtlr r0
+; LARGE64-O0-NEXT:    blr
+;
+; SMALL32-O0-LABEL: loadLongLong:
+; SMALL32-O0:       # %bb.0: # %entry
+; SMALL32-O0-NEXT:    mflr r0
+; SMALL32-O0-NEXT:    stwu r1, -32(r1)
+; SMALL32-O0-NEXT:    stw r0, 40(r1)
+; SMALL32-O0-NEXT:    lwz r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL32-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL32-O0-NEXT:    lwz r4, L..C2(r2) # target-flags(ppc-tlsld) @TLLongLong
+; SMALL32-O0-NEXT:    add r3, r3, r4
+; SMALL32-O0-NEXT:    lwz r3, 4(r3)
+; SMALL32-O0-NEXT:    addi r1, r1, 32
+; SMALL32-O0-NEXT:    lwz r0, 8(r1)
+; SMALL32-O0-NEXT:    mtlr r0
+; SMALL32-O0-NEXT:    blr
+;
+; LARGE32-O0-LABEL: loadLongLong:
+; LARGE32-O0:       # %bb.0: # %entry
+; LARGE32-O0-NEXT:    mflr r0
+; LARGE32-O0-NEXT:    stwu r1, -32(r1)
+; LARGE32-O0-NEXT:    stw r0, 40(r1)
+; LARGE32-O0-NEXT:    addis r3, L..C2 at u(r2)
+; LARGE32-O0-NEXT:    stw r3, 28(r1) # 4-byte Folded Spill
+; LARGE32-O0-NEXT:    addis r3, L..C1 at u(r2)
+; LARGE32-O0-NEXT:    lwz r3, L..C1 at l(r3)
+; LARGE32-O0-NEXT:    bla .__tls_get_mod[PR]
+; LARGE32-O0-NEXT:    lwz r4, 28(r1) # 4-byte Folded Reload
+; LARGE32-O0-NEXT:    lwz r4, L..C2 at l(r4)
+; LARGE32-O0-NEXT:    add r3, r3, r4
+; LARGE32-O0-NEXT:    lwz r3, 4(r3)
+; LARGE32-O0-NEXT:    addi r1, r1, 32
+; LARGE32-O0-NEXT:    lwz r0, 8(r1)
+; LARGE32-O0-NEXT:    mtlr r0
+; LARGE32-O0-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadLongLong:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @TLLongLong
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, 0(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    # kill: def $r3 killed $r3 killed $x3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    clrldi r3, r3, 32
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadLongLong:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 80(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C1 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C1 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, 0(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    # kill: def $r3 killed $r3 killed $x3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    clrldi r3, r3, 32
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @TLLongLong)
+  %1 = load i64, ptr %0, align 8
+  %conv = trunc i64 %1 to i32
+  ret i32 %conv
+}
+
+define i32 @loadDouble() {
+; SMALL64-O0-LABEL: loadDouble:
+; SMALL64-O0:       # %bb.0: # %entry
+; SMALL64-O0-NEXT:    mflr r0
+; SMALL64-O0-NEXT:    stdu r1, -64(r1)
+; SMALL64-O0-NEXT:    std r0, 80(r1)
+; SMALL64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL64-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL64-O0-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @TLDouble
+; SMALL64-O0-NEXT:    lfdx f0, r3, r4
+; SMALL64-O0-NEXT:    xscvdpsxws f0, f0
+; SMALL64-O0-NEXT:    addi r3, r1, 52
+; SMALL64-O0-NEXT:    stfiwx f0, 0, r3
+; SMALL64-O0-NEXT:    lwz r3, 52(r1)
+; SMALL64-O0-NEXT:    clrldi r3, r3, 32
+; SMALL64-O0-NEXT:    addi r1, r1, 64
+; SMALL64-O0-NEXT:    ld r0, 16(r1)
+; SMALL64-O0-NEXT:    mtlr r0
+; SMALL64-O0-NEXT:    blr
+;
+; LARGE64-O0-LABEL: loadDouble:
+; LARGE64-O0:       # %bb.0: # %entry
+; LARGE64-O0-NEXT:    mflr r0
+; LARGE64-O0-NEXT:    stdu r1, -80(r1)
+; LARGE64-O0-NEXT:    std r0, 96(r1)
+; LARGE64-O0-NEXT:    addis r3, L..C3 at u(r2)
+; LARGE64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; LARGE64-O0-NEXT:    addis r3, L..C1 at u(r2)
+; LARGE64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; LARGE64-O0-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
+; LARGE64-O0-NEXT:    ld r4, L..C3 at l(r4)
+; LARGE64-O0-NEXT:    lfdx f0, r3, r4
+; LARGE64-O0-NEXT:    xscvdpsxws f0, f0
+; LARGE64-O0-NEXT:    addi r3, r1, 68
+; LARGE64-O0-NEXT:    stfiwx f0, 0, r3
+; LARGE64-O0-NEXT:    lwz r3, 68(r1)
+; LARGE64-O0-NEXT:    clrldi r3, r3, 32
+; LARGE64-O0-NEXT:    addi r1, r1, 80
+; LARGE64-O0-NEXT:    ld r0, 16(r1)
+; LARGE64-O0-NEXT:    mtlr r0
+; LARGE64-O0-NEXT:    blr
+;
+; SMALL32-O0-LABEL: loadDouble:
+; SMALL32-O0:       # %bb.0: # %entry
+; SMALL32-O0-NEXT:    mflr r0
+; SMALL32-O0-NEXT:    stwu r1, -32(r1)
+; SMALL32-O0-NEXT:    stw r0, 40(r1)
+; SMALL32-O0-NEXT:    lwz r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL32-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL32-O0-NEXT:    lwz r4, L..C3(r2) # target-flags(ppc-tlsld) @TLDouble
+; SMALL32-O0-NEXT:    lfdx f0, r3, r4
+; SMALL32-O0-NEXT:    xscvdpsxws f0, f0
+; SMALL32-O0-NEXT:    addi r3, r1, 28
+; SMALL32-O0-NEXT:    stfiwx f0, 0, r3
+; SMALL32-O0-NEXT:    lwz r3, 28(r1)
+; SMALL32-O0-NEXT:    addi r1, r1, 32
+; SMALL32-O0-NEXT:    lwz r0, 8(r1)
+; SMALL32-O0-NEXT:    mtlr r0
+; SMALL32-O0-NEXT:    blr
+;
+; LARGE32-O0-LABEL: loadDouble:
+; LARGE32-O0:       # %bb.0: # %entry
+; LARGE32-O0-NEXT:    mflr r0
+; LARGE32-O0-NEXT:    stwu r1, -32(r1)
+; LARGE32-O0-NEXT:    stw r0, 40(r1)
+; LARGE32-O0-NEXT:    addis r3, L..C3 at u(r2)
+; LARGE32-O0-NEXT:    stw r3, 24(r1) # 4-byte Folded Spill
+; LARGE32-O0-NEXT:    addis r3, L..C1 at u(r2)
+; LARGE32-O0-NEXT:    lwz r3, L..C1 at l(r3)
+; LARGE32-O0-NEXT:    bla .__tls_get_mod[PR]
+; LARGE32-O0-NEXT:    lwz r4, 24(r1) # 4-byte Folded Reload
+; LARGE32-O0-NEXT:    lwz r4, L..C3 at l(r4)
+; LARGE32-O0-NEXT:    lfdx f0, r3, r4
+; LARGE32-O0-NEXT:    xscvdpsxws f0, f0
+; LARGE32-O0-NEXT:    addi r3, r1, 28
+; LARGE32-O0-NEXT:    stfiwx f0, 0, r3
+; LARGE32-O0-NEXT:    lwz r3, 28(r1)
+; LARGE32-O0-NEXT:    addi r1, r1, 32
+; LARGE32-O0-NEXT:    lwz r0, 8(r1)
+; LARGE32-O0-NEXT:    mtlr r0
+; LARGE32-O0-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadDouble:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 80(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @TLDouble
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfdx f0, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    xscvdpsxws f0, f0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r3, r1, 52
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stfiwx f0, 0, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwz r3, 52(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    clrldi r3, r3, 32
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadDouble:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -80(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 96(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C1 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C1 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfdx f0, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    xscvdpsxws f0, f0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r3, r1, 68
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stfiwx f0, 0, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwz r3, 68(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    clrldi r3, r3, 32
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 80
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @TLDouble)
+  %1 = load double, ptr %0, align 8
+  %conv = fptosi double %1 to i32
+  ret i32 %conv
+}
+
+define i32 @loadFloat() {
+; SMALL64-O0-LABEL: loadFloat:
+; SMALL64-O0:       # %bb.0: # %entry
+; SMALL64-O0-NEXT:    mflr r0
+; SMALL64-O0-NEXT:    stdu r1, -64(r1)
+; SMALL64-O0-NEXT:    std r0, 80(r1)
+; SMALL64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL64-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL64-O0-NEXT:    ld r4, L..C4(r2) # target-flags(ppc-tlsld) @TLFloat
+; SMALL64-O0-NEXT:    add r3, r3, r4
+; SMALL64-O0-NEXT:    lfs f0, 0(r3)
+; SMALL64-O0-NEXT:    fctiwz f0, f0
+; SMALL64-O0-NEXT:    stfd f0, 56(r1)
+; SMALL64-O0-NEXT:    lwa r3, 60(r1)
+; SMALL64-O0-NEXT:    clrldi r3, r3, 32
+; SMALL64-O0-NEXT:    addi r1, r1, 64
+; SMALL64-O0-NEXT:    ld r0, 16(r1)
+; SMALL64-O0-NEXT:    mtlr r0
+; SMALL64-O0-NEXT:    blr
+;
+; LARGE64-O0-LABEL: loadFloat:
+; LARGE64-O0:       # %bb.0: # %entry
+; LARGE64-O0-NEXT:    mflr r0
+; LARGE64-O0-NEXT:    stdu r1, -64(r1)
+; LARGE64-O0-NEXT:    std r0, 80(r1)
+; LARGE64-O0-NEXT:    addis r3, L..C4 at u(r2)
+; LARGE64-O0-NEXT:    std r3, 48(r1) # 8-byte Folded Spill
+; LARGE64-O0-NEXT:    addis r3, L..C1 at u(r2)
+; LARGE64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; LARGE64-O0-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-O0-NEXT:    ld r4, 48(r1) # 8-byte Folded Reload
+; LARGE64-O0-NEXT:    ld r4, L..C4 at l(r4)
+; LARGE64-O0-NEXT:    add r3, r3, r4
+; LARGE64-O0-NEXT:    lfs f0, 0(r3)
+; LARGE64-O0-NEXT:    fctiwz f0, f0
+; LARGE64-O0-NEXT:    stfd f0, 56(r1)
+; LARGE64-O0-NEXT:    lwa r3, 60(r1)
+; LARGE64-O0-NEXT:    clrldi r3, r3, 32
+; LARGE64-O0-NEXT:    addi r1, r1, 64
+; LARGE64-O0-NEXT:    ld r0, 16(r1)
+; LARGE64-O0-NEXT:    mtlr r0
+; LARGE64-O0-NEXT:    blr
+;
+; SMALL32-O0-LABEL: loadFloat:
+; SMALL32-O0:       # %bb.0: # %entry
+; SMALL32-O0-NEXT:    mflr r0
+; SMALL32-O0-NEXT:    stwu r1, -32(r1)
+; SMALL32-O0-NEXT:    stw r0, 40(r1)
+; SMALL32-O0-NEXT:    lwz r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL32-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL32-O0-NEXT:    lwz r4, L..C4(r2) # target-flags(ppc-tlsld) @TLFloat
+; SMALL32-O0-NEXT:    lfsx f0, r3, r4
+; SMALL32-O0-NEXT:    xscvdpsxws f0, f0
+; SMALL32-O0-NEXT:    addi r3, r1, 28
+; SMALL32-O0-NEXT:    stfiwx f0, 0, r3
+; SMALL32-O0-NEXT:    lwz r3, 28(r1)
+; SMALL32-O0-NEXT:    addi r1, r1, 32
+; SMALL32-O0-NEXT:    lwz r0, 8(r1)
+; SMALL32-O0-NEXT:    mtlr r0
+; SMALL32-O0-NEXT:    blr
+;
+; LARGE32-O0-LABEL: loadFloat:
+; LARGE32-O0:       # %bb.0: # %entry
+; LARGE32-O0-NEXT:    mflr r0
+; LARGE32-O0-NEXT:    stwu r1, -32(r1)
+; LARGE32-O0-NEXT:    stw r0, 40(r1)
+; LARGE32-O0-NEXT:    addis r3, L..C4 at u(r2)
+; LARGE32-O0-NEXT:    stw r3, 24(r1) # 4-byte Folded Spill
+; LARGE32-O0-NEXT:    addis r3, L..C1 at u(r2)
+; LARGE32-O0-NEXT:    lwz r3, L..C1 at l(r3)
+; LARGE32-O0-NEXT:    bla .__tls_get_mod[PR]
+; LARGE32-O0-NEXT:    lwz r4, 24(r1) # 4-byte Folded Reload
+; LARGE32-O0-NEXT:    lwz r4, L..C4 at l(r4)
+; LARGE32-O0-NEXT:    lfsx f0, r3, r4
+; LARGE32-O0-NEXT:    xscvdpsxws f0, f0
+; LARGE32-O0-NEXT:    addi r3, r1, 28
+; LARGE32-O0-NEXT:    stfiwx f0, 0, r3
+; LARGE32-O0-NEXT:    lwz r3, 28(r1)
+; LARGE32-O0-NEXT:    addi r1, r1, 32
+; LARGE32-O0-NEXT:    lwz r0, 8(r1)
+; LARGE32-O0-NEXT:    mtlr r0
+; LARGE32-O0-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadFloat:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 80(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C4(r2) # target-flags(ppc-tlsld) @TLFloat
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfs f0, 0(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    fctiwz f0, f0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stfd f0, 56(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwa r3, 60(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    clrldi r3, r3, 32
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadFloat:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 80(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C4 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r3, 48(r1) # 8-byte Folded Spill
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C1 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C1 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, 48(r1) # 8-byte Folded Reload
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C4 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfs f0, 0(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    fctiwz f0, f0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stfd f0, 56(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwa r3, 60(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    clrldi r3, r3, 32
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TLFloat)
+  %1 = load float, ptr %0, align 4
+  %conv = fptosi float %1 to i32
+  ret i32 %conv
+}
+
+; TOC Entry Checks.
+
+; SMALL64-O0-LABEL: .toc
+; SMALL64-O0-LABEL:L..C0:
+; SMALL64-O0-NEXT:	.tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; SMALL64-O0-NEXT:	.rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; SMALL64-O0-LABEL:L..C1:
+; SMALL64-O0-NEXT:	.tc TLInt[TC],TLInt[UL]@ld
+; SMALL64-O0-LABEL:L..C2:
+; SMALL64-O0-NEXT:	.tc TLLongLong[TC],TLLongLong[UL]@ld
+; SMALL64-O0-LABEL:L..C3:
+; SMALL64-O0-NEXT:	.tc TLDouble[TC],TLDouble[UL]@ld
+; SMALL64-O0-LABEL:L..C4:
+; SMALL64-O0-NEXT:	.tc TLFloat[TC],TLFloat[UL]@ld
+
+; LARGE64-O0-LABEL: .toc
+; LARGE64-O0-LABEL:L..C0:
+; LARGE64-O0-NEXT:	.tc TLInt[TE],TLInt[UL]@ld
+; LARGE64-O0-LABEL:L..C1:
+; LARGE64-O0-NEXT:	.tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; LARGE64-O0-NEXT:	.rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; LARGE64-O0-LABEL:L..C2:
+; LARGE64-O0-NEXT:	.tc TLLongLong[TE],TLLongLong[UL]@ld
+; LARGE64-O0-LABEL:L..C3:
+; LARGE64-O0-NEXT:	.tc TLDouble[TE],TLDouble[UL]@ld
+; LARGE64-O0-LABEL:L..C4:
+; LARGE64-O0-NEXT:	.tc TLFloat[TE],TLFloat[UL]@ld
+
+; SMALL32-O0-LABEL: .toc
+; SMALL32-O0-LABEL:L..C0:
+; SMALL32-O0-NEXT:	.tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; SMALL32-O0-NEXT:	.rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; SMALL32-O0-LABEL:L..C1:
+; SMALL32-O0-NEXT:	.tc TLInt[TC],TLInt[UL]@ld
+; SMALL32-O0-LABEL:L..C2:
+; SMALL32-O0-NEXT:	.tc TLLongLong[TC],TLLongLong[UL]@ld
+; SMALL32-O0-LABEL:L..C3:
+; SMALL32-O0-NEXT:	.tc TLDouble[TC],TLDouble[UL]@ld
+; SMALL32-O0-LABEL:L..C4:
+; SMALL32-O0-NEXT:	.tc TLFloat[TC],TLFloat[UL]@ld
+
+; LARGE32-O0-LABEL: .toc
+; LARGE32-O0-LABEL:L..C0:
+; LARGE32-O0-NEXT:	.tc TLInt[TE],TLInt[UL]@ld
+; LARGE32-O0-LABEL:L..C1:
+; LARGE32-O0-NEXT:	.tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; LARGE32-O0-NEXT:	.rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; LARGE32-O0-LABEL:L..C2:
+; LARGE32-O0-NEXT:	.tc TLLongLong[TE],TLLongLong[UL]@ld
+; LARGE32-O0-LABEL:L..C3:
+; LARGE32-O0-NEXT:	.tc TLDouble[TE],TLDouble[UL]@ld
+; LARGE32-O0-LABEL:L..C4:
+; LARGE32-O0-NEXT:	.tc TLFloat[TE],TLFloat[UL]@ld

>From 33b764423f17c035bd08c6ad8ea92db97ae42af5 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Wed, 13 Mar 2024 04:36:35 -0400
Subject: [PATCH 2/4] [NFC] Update

---
 .../aix-small-local-dynamic-tls-char.ll       | 336 ----------------
 .../aix-small-local-dynamic-tls-double.ll     | 339 ----------------
 .../aix-small-local-dynamic-tls-float.ll      | 339 ----------------
 ...ix-small-local-dynamic-tls-largeaccess2.ll | 219 +++++++++++
 .../aix-small-local-dynamic-tls-longlong.ll   | 368 ------------------
 .../aix-small-local-dynamic-tls-short.ll      | 336 ----------------
 ...l => aix-small-local-dynamic-tls-types.ll} | 222 +++++++----
 7 files changed, 355 insertions(+), 1804 deletions(-)
 delete mode 100644 llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-char.ll
 delete mode 100644 llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-double.ll
 delete mode 100644 llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-float.ll
 create mode 100644 llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-largeaccess2.ll
 delete mode 100644 llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-longlong.ll
 delete mode 100644 llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-short.ll
 rename llvm/test/CodeGen/PowerPC/{aix-small-local-dynamic-tls-int.ll => aix-small-local-dynamic-tls-types.ll} (67%)

diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-char.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-char.ll
deleted file mode 100644
index 6fb8683330a303..00000000000000
--- a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-char.ll
+++ /dev/null
@@ -1,336 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
-; RUN:      -mtriple powerpc64-ibm-aix-xcoff < %s \
-; RUN:      | FileCheck %s --check-prefix=SMALL-LOCAL-DYNAMIC-SMALLCM64
-; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
-; RUN:      -mtriple powerpc64-ibm-aix-xcoff --code-model=large \
-; RUN:      < %s | FileCheck %s \
-; RUN:      --check-prefix=SMALL-LOCAL-DYNAMIC-LARGECM64
-
- at ThreadLocalVarInit = thread_local(localdynamic) global i8 1, align 1
- at VarInit = local_unnamed_addr global i8 87, align 1
- at IThreadLocalVarInit = internal thread_local(localdynamic) global i8 1, align 1
-declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
- at c = thread_local(localdynamic) global [87 x i8] zeroinitializer, align 1
-
-define nonnull ptr @AddrTest1() local_unnamed_addr #0 {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: AddrTest1:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C1(r2) # target-flags(ppc-tlsld) @c
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r3, r3, 1
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: AddrTest1:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C1 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C1 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r3, r3, 1
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @c)
-  %arrayidx = getelementptr inbounds [87 x i8], ptr %0, i64 0, i64 1
-  ret ptr %arrayidx
-}
-
-define void @storeITLInit(i8 noundef zeroext %x) {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: storeITLInit:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mr r6, r3
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stbx r6, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: storeITLInit:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mr r6, r3
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r7, L..C2 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r7)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stbx r6, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @IThreadLocalVarInit)
-  store i8 %x, ptr %0, align 1
-  ret void
-}
-
-define void @storeTLInit(i8 noundef zeroext %x) {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: storeTLInit:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mr r6, r3
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stbx r6, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: storeTLInit:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mr r6, r3
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r7, L..C3 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r7)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stbx r6, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @ThreadLocalVarInit)
-  store i8 %x, ptr %0, align 1
-  ret void
-}
-
-define zeroext i8 @loadITLInit() {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadITLInit:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lbzx r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadITLInit:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lbzx r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @IThreadLocalVarInit)
-  %1 = load i8, ptr %0, align 1
-  ret i8 %1
-}
-
-define zeroext i8 @loadITLInit2() {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadITLInit2:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lbzx r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C4(r2) # @VarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lbz r4, 0(r4)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r4, r3
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    clrldi r3, r3, 56
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadITLInit2:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lbzx r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r4, L..C4 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C4 at l(r4)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lbz r4, 0(r4)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r4, r3
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    clrldi r3, r3, 56
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @IThreadLocalVarInit)
-  %1 = load i8, ptr %0, align 1
-  %2 = load i8, ptr @VarInit, align 1
-  %add = add i8 %2, %1
-  ret i8 %add
-}
-
-define zeroext i8 @loadTLInit() {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadTLInit:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lbzx r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadTLInit:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lbzx r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @ThreadLocalVarInit)
-  %1 = load i8, ptr %0, align 1
-  ret i8 %1
-}
-
-define zeroext i8 @loadTLInit2() {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadTLInit2:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lbzx r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C4(r2) # @VarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lbz r4, 0(r4)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r4, r3
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    clrldi r3, r3, 56
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadTLInit2:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lbzx r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r4, L..C4 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C4 at l(r4)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lbz r4, 0(r4)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r4, r3
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    clrldi r3, r3, 56
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @ThreadLocalVarInit)
-  %1 = load i8, ptr %0, align 1
-  %2 = load i8, ptr @VarInit, align 1
-  %add = add i8 %2, %1
-  ret i8 %add
-}
-
-define void @loadStore1(i8 noundef zeroext %x) {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadStore1:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lbzx r5, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r5, r5, 9
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stbx r5, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadStore1:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lbzx r5, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r5, r5, 9
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stbx r5, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @IThreadLocalVarInit)
-  %1 = load i8, ptr %0, align 1
-  %add = add i8 %1, 9
-  store i8 %add, ptr %0, align 1
-  ret void
-}
diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-double.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-double.ll
deleted file mode 100644
index 3dfc0aa6d4aff1..00000000000000
--- a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-double.ll
+++ /dev/null
@@ -1,339 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
-; RUN:      -mtriple powerpc64-ibm-aix-xcoff < %s \
-; RUN:      | FileCheck %s --check-prefix=SMALL-LOCAL-DYNAMIC-SMALLCM64
-; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
-; RUN:      -mtriple powerpc64-ibm-aix-xcoff --code-model=large \
-; RUN:      < %s | FileCheck %s \
-; RUN:      --check-prefix=SMALL-LOCAL-DYNAMIC-LARGECM64
-
- at ThreadLocalVarInit = thread_local(localdynamic) global double 1.000000e+00, align 8
- at VarInit = local_unnamed_addr global double 8.700000e+01, align 8
- at IThreadLocalVarInit = internal thread_local(localdynamic) global double 1.000000e+00, align 8
-declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
- at f = thread_local(localdynamic) global [87 x double] zeroinitializer, align 8
-
-define nonnull ptr @AddrTest1() local_unnamed_addr #0 {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: AddrTest1:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C1(r2) # target-flags(ppc-tlsld) @f
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r3, r3, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: AddrTest1:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C1 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C1 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r3, r3, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @f)
-  %arrayidx = getelementptr inbounds [87 x double], ptr %0, i64 0, i64 6
-  ret ptr %arrayidx
-}
-
-define void @storeITLInit(double noundef %x) {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: storeITLInit:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stfdx f1, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: storeITLInit:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stfdx f1, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit)
-  store double %x, ptr %0, align 8
-  ret void
-}
-
-define void @storeTLInit(double noundef %x) {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: storeTLInit:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stfdx f1, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: storeTLInit:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stfdx f1, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit)
-  store double %x, ptr %0, align 8
-  ret void
-}
-
-define double @loadITLInit() {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadITLInit:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfdx f1, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadITLInit:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfdx f1, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit)
-  %1 = load double, ptr %0, align 8
-  ret double %1
-}
-
-define double @loadITLInit2() {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadITLInit2:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfdx f0, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C4(r2) # @VarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfd f1, 0(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    xsadddp f1, f0, f1
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadITLInit2:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfdx f0, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C4 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C4 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfd f1, 0(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    xsadddp f1, f0, f1
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit)
-  %1 = load double, ptr %0, align 8
-  %2 = load double, ptr @VarInit, align 8
-  %add = fadd double %1, %2
-  ret double %add
-}
-
-define double @loadTLInit() {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadTLInit:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfdx f1, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadTLInit:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfdx f1, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit)
-  %1 = load double, ptr %0, align 8
-  ret double %1
-}
-
-define double @loadTLInit2() {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadTLInit2:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfdx f0, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C4(r2) # @VarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfd f1, 0(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    xsadddp f1, f0, f1
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadTLInit2:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfdx f0, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C4 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C4 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfd f1, 0(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    xsadddp f1, f0, f1
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit)
-  %1 = load double, ptr %0, align 8
-  %2 = load double, ptr @VarInit, align 8
-  %add = fadd double %1, %2
-  ret double %add
-}
-
-define void @loadStore1(double noundef %x) {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadStore1:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    vspltisw v2, 1
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    vspltisw v3, 8
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    xvcvsxwdp vs0, vs34
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfdx f1, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    xsadddp f0, f1, f0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    xvcvsxwdp vs1, vs35
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    xsadddp f0, f0, f1
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stfdx f0, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadStore1:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    vspltisw v2, 1
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    vspltisw v3, 8
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    xvcvsxwdp vs0, vs34
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfdx f1, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    xsadddp f0, f1, f0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    xvcvsxwdp vs1, vs35
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    xsadddp f0, f0, f1
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stfdx f0, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit)
-  %1 = load double, ptr %0, align 8
-  %inc = fadd double %1, 1.000000e+00
-  %add = fadd double %inc, 8.000000e+00
-  store double %add, ptr %0, align 8
-  ret void
-}
diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-float.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-float.ll
deleted file mode 100644
index 69a4baa1d19bb8..00000000000000
--- a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-float.ll
+++ /dev/null
@@ -1,339 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
-; RUN:      -mtriple powerpc64-ibm-aix-xcoff < %s \
-; RUN:      | FileCheck %s --check-prefix=SMALL-LOCAL-DYNAMIC-SMALLCM64
-; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
-; RUN:      -mtriple powerpc64-ibm-aix-xcoff --code-model=large \
-; RUN:      < %s | FileCheck %s \
-; RUN:      --check-prefix=SMALL-LOCAL-DYNAMIC-LARGECM64
-
- at ThreadLocalVarInit = thread_local(localdynamic) global float 1.000000e+00, align 4
- at VarInit = local_unnamed_addr global float 8.700000e+01, align 4
- at IThreadLocalVarInit = internal thread_local(localdynamic) global float 1.000000e+00, align 4
-declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
- at e = thread_local(localdynamic) global [87 x float] zeroinitializer, align 4
-
-define nonnull ptr @AddrTest1() local_unnamed_addr #0 {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: AddrTest1:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C1(r2) # target-flags(ppc-tlsld) @e
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r3, r3, 16
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: AddrTest1:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C1 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C1 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r3, r3, 16
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @e)
-  %arrayidx = getelementptr inbounds [87 x float], ptr %0, i64 0, i64 4
-  ret ptr %arrayidx
-}
-
-define void @storeITLInit(float noundef %x) {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: storeITLInit:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stfsx f1, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: storeITLInit:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stfsx f1, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit)
-  store float %x, ptr %0, align 4
-  ret void
-}
-
-define void @storeTLInit(float noundef %x) {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: storeTLInit:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stfsx f1, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: storeTLInit:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stfsx f1, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit)
-  store float %x, ptr %0, align 4
-  ret void
-}
-
-define float @loadITLInit() {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadITLInit:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfsx f1, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadITLInit:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfsx f1, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit)
-  %1 = load float, ptr %0, align 4
-  ret float %1
-}
-
-define float @loadITLInit2() {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadITLInit2:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfsx f0, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C4(r2) # @VarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfs f1, 0(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    fadds f1, f0, f1
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadITLInit2:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfsx f0, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C4 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C4 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfs f1, 0(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    fadds f1, f0, f1
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit)
-  %1 = load float, ptr %0, align 4
-  %2 = load float, ptr @VarInit, align 4
-  %add = fadd float %1, %2
-  ret float %add
-}
-
-define float @loadTLInit() {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadTLInit:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfsx f1, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadTLInit:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfsx f1, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit)
-  %1 = load float, ptr %0, align 4
-  ret float %1
-}
-
-define float @loadTLInit2() {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadTLInit2:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfsx f0, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C4(r2) # @VarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfs f1, 0(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    fadds f1, f0, f1
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadTLInit2:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfsx f0, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C4 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C4 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfs f1, 0(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    fadds f1, f0, f1
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit)
-  %1 = load float, ptr %0, align 4
-  %2 = load float, ptr @VarInit, align 4
-  %add = fadd float %1, %2
-  ret float %add
-}
-
-define void @loadStore1(float noundef %x) {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadStore1:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    vspltisw v2, 1
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    vspltisw v3, 8
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    xvcvsxwdp vs0, vs34
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfsx f1, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    fadds f0, f1, f0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    xvcvsxwdp vs1, vs35
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    fadds f0, f0, f1
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stfsx f0, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadStore1:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    vspltisw v2, 1
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    vspltisw v3, 8
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    xvcvsxwdp vs0, vs34
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfsx f1, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    fadds f0, f1, f0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    xvcvsxwdp vs1, vs35
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    fadds f0, f0, f1
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stfsx f0, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit)
-  %1 = load float, ptr %0, align 4
-  %inc = fadd float %1, 1.000000e+00
-  %add = fadd float %inc, 8.000000e+00
-  store float %add, ptr %0, align 4
-  ret void
-}
diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-largeaccess2.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-largeaccess2.ll
new file mode 100644
index 00000000000000..b88a73dd626aab
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-largeaccess2.ll
@@ -0,0 +1,219 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff < %s \
+; RUN:      | FileCheck %s --check-prefix=SMALL-LOCAL-DYNAMIC-SMALLCM64
+; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff --code-model=large \
+; RUN:      < %s | FileCheck %s \
+; RUN:      --check-prefix=SMALL-LOCAL-DYNAMIC-LARGECM64
+
+; Test disassembly of object.
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff -xcoff-traceback-table=false \
+; RUN:      --code-model=large -filetype=obj -o %t.o < %s
+; RUN: llvm-objdump -D -r --symbol-description %t.o | FileCheck -D#NFA=2 --check-prefix=DIS %s
+
+ at mySmallLocalDynamicTLS6 = external thread_local(localdynamic) global [60 x i64], align 8
+ at mySmallLocalDynamicTLS2 = thread_local(localdynamic) global [3000 x i64] zeroinitializer, align 8
+ at MyTLSGDVar = thread_local global [800 x i64] zeroinitializer, align 8
+ at mySmallLocalDynamicTLS3 = thread_local(localdynamic) global [3000 x i64] zeroinitializer, align 8
+ at mySmallLocalDynamicTLS4 = thread_local(localdynamic) global [3000 x i64] zeroinitializer, align 8
+ at mySmallLocalDynamicTLS5 = thread_local(localdynamic) global [3000 x i64] zeroinitializer, align 8
+ at mySmallLocalDynamicTLS = thread_local(localdynamic) local_unnamed_addr global [7800 x i64] zeroinitializer, align 8
+declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
+
+; All accesses use a "faster" local-dynamic sequence directly off the thread pointer.
+define i64 @StoreLargeAccess1() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: StoreLargeAccess1:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C1(r2) # target-flags(ppc-tlsld) @mySmallLocalDynamicTLS6
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mr r6, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 212
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r4, r6, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r3, 424(r4)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C2(r2) # target-flags(ppc-tlsld) @mySmallLocalDynamicTLS2
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 203
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r6, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r4, 1200(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C3(r2) # target-flags(ppc-tlsgdm) @MyTLSGDVar
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C4(r2) # target-flags(ppc-tlsgd) @MyTLSGDVar
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_addr[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 44
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r4, 440(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C5(r2) # target-flags(ppc-tlsld) @mySmallLocalDynamicTLS3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 6
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r6, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r4, 2000(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C6(r2) # target-flags(ppc-tlsld) @mySmallLocalDynamicTLS4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 100
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r4, r6, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r3, 6800(r4)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C7(r2) # target-flags(ppc-tlsld) @mySmallLocalDynamicTLS5
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 882
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r6, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r4, 8400(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 1191
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: StoreLargeAccess1:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r7, L..C1 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 212
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mr r6, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C1 at l(r7)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r6, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r4, 424(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 203
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C2 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r6, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r4, 1200(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r4, L..C4 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C3 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C4 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_addr[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 44
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r4, 440(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C5 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 6
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C5 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r6, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r4, 2000(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C6 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 100
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C6 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r6, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r4, 6800(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C7 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 882
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C7 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r6, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r4, 8400(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 1191
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalDynamicTLS6)
+  %arrayidx = getelementptr inbounds [60 x i64], ptr %0, i64 0, i64 53
+  store i64 212, ptr %arrayidx, align 8
+  %1 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalDynamicTLS2)
+  %arrayidx1 = getelementptr inbounds [3000 x i64], ptr %1, i64 0, i64 150
+  store i64 203, ptr %arrayidx1, align 8
+  %2 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @MyTLSGDVar)
+  %arrayidx2 = getelementptr inbounds [800 x i64], ptr %2, i64 0, i64 55
+  store i64 44, ptr %arrayidx2, align 8
+  %3 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalDynamicTLS3)
+  %arrayidx3 = getelementptr inbounds [3000 x i64], ptr %3, i64 0, i64 250
+  store i64 6, ptr %arrayidx3, align 8
+  %4 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalDynamicTLS4)
+  %arrayidx4 = getelementptr inbounds [3000 x i64], ptr %4, i64 0, i64 850
+  store i64 100, ptr %arrayidx4, align 8
+  %5 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalDynamicTLS5)
+  %arrayidx5 = getelementptr inbounds [3000 x i64], ptr %5, i64 0, i64 1050
+  store i64 882, ptr %arrayidx5, align 8
+  %6 = load i64, ptr %arrayidx1, align 8
+  %7 = load i64, ptr %arrayidx3, align 8
+  %8 = load i64, ptr %arrayidx4, align 8
+  %add = add i64 %6, 882
+  %add9 = add i64 %add, %7
+  %add11 = add i64 %add9, %8
+  ret i64 %add11
+}
+
+; DIS:      0000000000000000 (idx: [[#NFA+9]]) .StoreLargeAccess1:
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                mflr 0
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                stdu 1, -48(1)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+15]]) _$TLSML[TC]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                std 0, 64(1)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 7, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+17]]) mySmallLocalDynamicTLS6[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 3, 0(3)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+15]]) _$TLSML[TC]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                bla 0x0
+; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA              (idx: [[#NFA+1]]) .__tls_get_mod[PR]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                li 4, 212
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                mr 6, 3
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 3, 8(7)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+17]]) mySmallLocalDynamicTLS6[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                add 3, 6, 3
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                std 4, 424(3)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+19]]) mySmallLocalDynamicTLS2[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                li 4, 203
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 3, 16(3)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+19]]) mySmallLocalDynamicTLS2[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                add 3, 6, 3
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                std 4, 1200(3)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+21]]) .MyTLSGDVar[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 4, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+23]]) MyTLSGDVar[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 3, 24(3)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+21]]) .MyTLSGDVar[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 4, 32(4)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+23]]) MyTLSGDVar[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                bla 0x0
+; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA              (idx: [[#NFA+3]]) .__tls_get_addr[PR]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                li 4, 44
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                std 4, 440(3)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+25]]) mySmallLocalDynamicTLS3[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                li 4, 6
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 3, 40(3)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+25]]) mySmallLocalDynamicTLS3[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                add 3, 6, 3
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                std 4, 2000(3)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+27]]) mySmallLocalDynamicTLS4[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                li 4, 100
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 3, 48(3)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+27]]) mySmallLocalDynamicTLS4[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                add 3, 6, 3
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                std 4, 6800(3)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+29]]) mySmallLocalDynamicTLS5[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                li 4, 882
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 3, 56(3)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+29]]) mySmallLocalDynamicTLS5[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                add 3, 6, 3
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                std 4, 8400(3)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                li 3, 1191
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addi 1, 1, 48
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 0, 16(1)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                mtlr 0
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                blr
+
+; DIS:      Disassembly of section .data:
+; DIS:      00000000000000b0 (idx: [[#NFA+11]]) StoreLargeAccess1[DS]:
+; DIS-NEXT:       b0: 00 00 00 00
+; DIS-NEXT: 00000000000000b0:  R_POS    (idx: [[#NFA+9]]) .StoreLargeAccess1
+; DIS-NEXT:       b4: 00 00 00 00
+; DIS-NEXT:       b8: 00 00 00 00
+; DIS-NEXT: 00000000000000b8:  R_POS        (idx: [[#NFA+13]]) TOC[TC0]
+; DIS-NEXT:       bc: 00 00 00 c8
+
+; DIS:      Disassembly of section .tdata:
+; DIS:      0000000000000000 (idx: [[#NFA+31]]) mySmallLocalDynamicTLS2[TL]:
+; DIS:      0000000000005dc0 (idx: [[#NFA+33]]) MyTLSGDVar[TL]:
+; DIS:      00000000000076c0 (idx: [[#NFA+35]]) mySmallLocalDynamicTLS3[TL]:
+; DIS:      000000000000d480 (idx: [[#NFA+37]]) mySmallLocalDynamicTLS4[TL]:
+; DIS:      0000000000013240 (idx: [[#NFA+39]]) mySmallLocalDynamicTLS5[TL]:
+; DIS:      0000000000019000 (idx: [[#NFA+41]]) mySmallLocalDynamicTLS[TL]:
diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-longlong.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-longlong.ll
deleted file mode 100644
index f9157124fb3ad9..00000000000000
--- a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-longlong.ll
+++ /dev/null
@@ -1,368 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
-; RUN:      -mtriple powerpc64-ibm-aix-xcoff < %s \
-; RUN:      | FileCheck %s --check-prefix=SMALL-LOCAL-DYNAMIC-SMALLCM64
-; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
-; RUN:      -mtriple powerpc64-ibm-aix-xcoff --code-model=large \
-; RUN:      < %s | FileCheck %s \
-; RUN:      --check-prefix=SMALL-LOCAL-DYNAMIC-LARGECM64
-
- at ThreadLocalVarInit = thread_local(localdynamic) global i64 1, align 8
- at VarInit = local_unnamed_addr global i64 87, align 8
- at IThreadLocalVarInit = internal thread_local(localdynamic) global i64 1, align 8
-declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
-%struct.anon = type { i64 }
- at ThreadLocalStruct = thread_local(localdynamic) global %struct.anon zeroinitializer, align 1
- at d = thread_local(localdynamic) global [87 x i64] zeroinitializer, align 8
-
-define nonnull ptr @AddrTest1() local_unnamed_addr #0 {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: AddrTest1:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C1(r2) # target-flags(ppc-tlsld) @d
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: AddrTest1:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C1 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C1 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @d)
-  ret ptr %0
-}
-
-define i64 @testUnaligned() {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: testUnaligned:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @ThreadLocalStruct
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ldx r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: testUnaligned:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ldx r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @ThreadLocalStruct)
-  %x = getelementptr inbounds %struct.anon, ptr %0, i32 0, i32 0
-  %1 = load i64, ptr %x, align 1
-  ret i64 %1
-}
-
-define void @storeITLInit(i64 noundef %x) {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: storeITLInit:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mr r6, r3
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdx r6, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: storeITLInit:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mr r6, r3
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r7, L..C3 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r7)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdx r6, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit)
-  store i64 %x, ptr %0, align 8
-  ret void
-}
-
-define void @storeTLInit(i64 noundef %x) {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: storeTLInit:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mr r6, r3
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C4(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdx r6, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: storeTLInit:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mr r6, r3
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r7, L..C4 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C4 at l(r7)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdx r6, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit)
-  store i64 %x, ptr %0, align 8
-  ret void
-}
-
-define i64 @loadITLInit() {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadITLInit:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ldx r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadITLInit:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ldx r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit)
-  %1 = load i64, ptr %0, align 8
-  ret i64 %1
-}
-
-define i64 @loadITLInit2() {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadITLInit2:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ldx r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C5(r2) # @VarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, 0(r4)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r4, r3
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadITLInit2:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ldx r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r4, L..C5 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C5 at l(r4)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, 0(r4)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r4, r3
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit)
-  %1 = load i64, ptr %0, align 8
-  %2 = load i64, ptr @VarInit, align 8
-  %add = add nsw i64 %2, %1
-  ret i64 %add
-}
-
-define i64 @loadTLInit() {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadTLInit:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C4(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ldx r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadTLInit:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C4 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C4 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ldx r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit)
-  %1 = load i64, ptr %0, align 8
-  ret i64 %1
-}
-
-define i64 @loadTLInit2() {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadTLInit2:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C4(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ldx r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C5(r2) # @VarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, 0(r4)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r4, r3
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadTLInit2:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C4 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C4 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ldx r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r4, L..C5 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C5 at l(r4)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, 0(r4)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r4, r3
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit)
-  %1 = load i64, ptr %0, align 8
-  %2 = load i64, ptr @VarInit, align 8
-  %add = add nsw i64 %2, %1
-  ret i64 %add
-}
-
-define void @loadStore1(i64 noundef %x) {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadStore1:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ldx r5, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r5, r5, 9
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdx r5, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadStore1:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ldx r5, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r5, r5, 9
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdx r5, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit)
-  %1 = load i64, ptr %0, align 8
-  %add = add nsw i64 %1, 9
-  store i64 %add, ptr %0, align 8
-  ret void
-}
diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-short.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-short.ll
deleted file mode 100644
index 7482ca430c9a5b..00000000000000
--- a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-short.ll
+++ /dev/null
@@ -1,336 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
-; RUN:      -mtriple powerpc64-ibm-aix-xcoff < %s \
-; RUN:      | FileCheck %s --check-prefix=SMALL-LOCAL-DYNAMIC-SMALLCM64
-; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
-; RUN:      -mtriple powerpc64-ibm-aix-xcoff --code-model=large \
-; RUN:      < %s | FileCheck %s \
-; RUN:      --check-prefix=SMALL-LOCAL-DYNAMIC-LARGECM64
-
- at ThreadLocalVarInit = thread_local(localdynamic) global i16 1, align 2
- at VarInit = local_unnamed_addr global i16 87, align 2
- at IThreadLocalVarInit = internal thread_local(localdynamic) global i16 1, align 2
-declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
- at b = thread_local(localdynamic) global [87 x i16] zeroinitializer, align 2
-
-define nonnull ptr @AddrTest1() local_unnamed_addr #0 {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: AddrTest1:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C1(r2) # target-flags(ppc-tlsld) @b
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r3, r3, 4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: AddrTest1:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C1 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C1 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r3, r3, 4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @b)
-  %arrayidx = getelementptr inbounds [87 x i16], ptr %0, i64 0, i64 2
-  ret ptr %arrayidx
-}
-
-define void @storeITLInit(i16 noundef signext %x) {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: storeITLInit:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mr r6, r3
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    sthx r6, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: storeITLInit:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mr r6, r3
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r7, L..C2 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r7)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    sthx r6, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @IThreadLocalVarInit)
-  store i16 %x, ptr %0, align 2
-  ret void
-}
-
-define void @storeTLInit(i16 noundef signext %x) {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: storeTLInit:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mr r6, r3
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    sthx r6, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: storeTLInit:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mr r6, r3
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r7, L..C3 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r7)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    sthx r6, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @ThreadLocalVarInit)
-  store i16 %x, ptr %0, align 2
-  ret void
-}
-
-define signext i16 @loadITLInit() {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadITLInit:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lhax r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadITLInit:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lhax r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @IThreadLocalVarInit)
-  %1 = load i16, ptr %0, align 2
-  ret i16 %1
-}
-
-define signext i16 @loadITLInit2() {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadITLInit2:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lhzx r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C4(r2) # @VarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lhz r4, 0(r4)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r4, r3
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    extsh r3, r3
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadITLInit2:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lhzx r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r4, L..C4 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C4 at l(r4)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lhz r4, 0(r4)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r4, r3
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    extsh r3, r3
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @IThreadLocalVarInit)
-  %1 = load i16, ptr %0, align 2
-  %2 = load i16, ptr @VarInit, align 2
-  %add = add i16 %2, %1
-  ret i16 %add
-}
-
-define signext i16 @loadTLInit() {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadTLInit:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lhax r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadTLInit:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lhax r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @ThreadLocalVarInit)
-  %1 = load i16, ptr %0, align 2
-  ret i16 %1
-}
-
-define signext i16 @loadTLInit2() {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadTLInit2:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lhzx r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C4(r2) # @VarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lhz r4, 0(r4)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r4, r3
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    extsh r3, r3
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadTLInit2:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lhzx r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r4, L..C4 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C4 at l(r4)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lhz r4, 0(r4)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r4, r3
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    extsh r3, r3
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @ThreadLocalVarInit)
-  %1 = load i16, ptr %0, align 2
-  %2 = load i16, ptr @VarInit, align 2
-  %add = add i16 %2, %1
-  ret i16 %add
-}
-
-define void @loadStore1(i16 noundef signext %x) {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadStore1:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lhzx r5, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r5, r5, 9
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    sthx r5, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadStore1:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lhzx r5, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r5, r5, 9
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    sthx r5, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @IThreadLocalVarInit)
-  %1 = load i16, ptr %0, align 2
-  %add = add i16 %1, 9
-  store i16 %add, ptr %0, align 2
-  ret void
-}
diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-int.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-types.ll
similarity index 67%
rename from llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-int.ll
rename to llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-types.ll
index ff397169208f46..434fda645beb88 100644
--- a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-int.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-types.ll
@@ -7,10 +7,18 @@
 ; RUN:      < %s | FileCheck %s \
 ; RUN:      --check-prefix=SMALL-LOCAL-DYNAMIC-LARGECM64
 
- at ThreadLocalVarInit = thread_local(localdynamic) global i32 1, align 4
- at VarInit = local_unnamed_addr global i32 87, align 4
- at IThreadLocalVarInit = internal thread_local(localdynamic) global i32 1, align 4
 declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
+ at TLVIntInit = local_unnamed_addr global i32 87, align 4
+
+ at TLVChar = thread_local(localdynamic) global i8 1, align 1
+ at TLVShort = thread_local(localdynamic) global i8 1, align 2
+ at TLVInt = thread_local(localdynamic) global i32 1, align 4
+ at InternalTLVInt = internal thread_local(localdynamic) global i32 1, align 4
+ at TLVLong = thread_local(localdynamic) global i64 1, align 8
+ at InternalTLVLong = internal thread_local(localdynamic) global i64 1, align 8
+ at TLVFloat = thread_local(localdynamic) global float 1.000000e+00, align 4
+ at InternalTLVDouble = internal thread_local(localdynamic) global double 1.000000e+00, align 8
+
 %struct.anon = type { i32 }
 @ThreadLocalStruct = thread_local(localdynamic) global %struct.anon zeroinitializer, align 1
 @a = thread_local(localdynamic) global [87 x i32] zeroinitializer, align 4
@@ -90,8 +98,8 @@ entry:
   ret i32 %1
 }
 
-define void @storeITLInit(i32 noundef signext %x) {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: storeITLInit:
+define void @testChar(i8 noundef signext %x) {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: testChar:
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
@@ -99,14 +107,14 @@ define void @storeITLInit(i32 noundef signext %x) {
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stwx r6, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @TLVChar
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stbx r6, r3, r4
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
 ;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: storeITLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: testChar:
 ; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
@@ -117,19 +125,19 @@ define void @storeITLInit(i32 noundef signext %x) {
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r7)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stwx r6, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stbx r6, r3, r4
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
 entry:
-  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit)
-  store i32 %x, ptr %0, align 4
+  %0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @TLVChar)
+  store i8 %x, ptr %0, align 1
   ret void
 }
 
-define void @storeTLInit(i32 noundef signext %x) {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: storeTLInit:
+define void @testShort(i16 noundef signext %x) {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: testShort:
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
@@ -137,14 +145,14 @@ define void @storeTLInit(i32 noundef signext %x) {
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C4(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stwx r6, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C4(r2) # target-flags(ppc-tlsld) @TLVShort
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    sthx r6, r3, r4
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
 ;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: storeTLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: testShort:
 ; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
@@ -155,64 +163,64 @@ define void @storeTLInit(i32 noundef signext %x) {
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C4 at l(r7)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stwx r6, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    sthx r6, r3, r4
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
 entry:
-  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit)
-  store i32 %x, ptr %0, align 4
+  %0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @TLVShort)
+  store i16 %x, ptr %0, align 2
   ret void
 }
 
-define signext i32 @loadITLInit() {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadITLInit:
+define signext i32 @testInt1() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: testInt1:
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C5(r2) # target-flags(ppc-tlsld) @TLVInt
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwax r3, r3, r4
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
 ;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadITLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: testInt1:
 ; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C5 at u(r2)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C5 at l(r6)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwax r3, r3, r4
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
 entry:
-  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit)
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TLVInt)
   %1 = load i32, ptr %0, align 4
   ret i32 %1
 }
 
-define signext i32 @loadITLInit2() {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadITLInit2:
+define signext i32 @testInt2() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: testInt2:
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C6(r2) # target-flags(ppc-tlsld) @InternalTLVInt
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwzx r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C5(r2) # @VarInit
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C7(r2) # @TLVIntInit
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwz r4, 0(r4)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r4, r3
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    extsw r3, r3
@@ -221,19 +229,19 @@ define signext i32 @loadITLInit2() {
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
 ;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadITLInit2:
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: testInt2:
 ; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C6 at u(r2)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C6 at l(r6)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwzx r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r4, L..C5 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C5 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r4, L..C7 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C7 at l(r4)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwz r4, 0(r4)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r4, r3
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    extsw r3, r3
@@ -242,134 +250,176 @@ define signext i32 @loadITLInit2() {
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
 entry:
-  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit)
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @InternalTLVInt)
   %1 = load i32, ptr %0, align 4
-  %2 = load i32, ptr @VarInit, align 4
+  %2 = load i32, ptr @TLVIntInit, align 4
   %add = add nsw i32 %2, %1
   ret i32 %add
 }
 
-define signext i32 @loadTLInit() {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadTLInit:
+define signext i64 @testLong1() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: testLong1:
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C4(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwax r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C8(r2) # target-flags(ppc-tlsld) @TLVLong
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ldx r3, r3, r4
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
 ;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadTLInit:
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: testLong1:
 ; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C4 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C8 at u(r2)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C4 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwax r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C8 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ldx r3, r3, r4
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
 entry:
-  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit)
-  %1 = load i32, ptr %0, align 4
-  ret i32 %1
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @TLVLong)
+  %1 = load i64, ptr %0, align 4
+  ret i64 %1
 }
 
-define signext i32 @loadTLInit2() {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadTLInit2:
+define void @testLong2(i64 noundef signext %x) {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: testLong2:
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C4(r2) # target-flags(ppc-tlsld) @ThreadLocalVarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwzx r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C5(r2) # @VarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwz r4, 0(r4)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r4, r3
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    extsw r3, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C9(r2) # target-flags(ppc-tlsld) @InternalTLVLong
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ldx r5, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r5, r5, 9
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdx r5, r3, r4
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
 ;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadTLInit2:
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: testLong2:
 ; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C4 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C9 at u(r2)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C4 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwzx r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r4, L..C5 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C5 at l(r4)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwz r4, 0(r4)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r4, r3
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    extsw r3, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C9 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ldx r5, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r5, r5, 9
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdx r5, r3, r4
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
 entry:
-  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit)
-  %1 = load i32, ptr %0, align 4
-  %2 = load i32, ptr @VarInit, align 4
-  %add = add nsw i32 %2, %1
-  ret i32 %add
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @InternalTLVLong)
+  %1 = load i64, ptr %0, align 8
+  %add = add nsw i64 %1, 9
+  store i64 %add, ptr %0, align 8
+  ret void
 }
 
-define void @loadStore1(i32 noundef signext %x) {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadStore1:
+define void @testFloat(float noundef %x) {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: testFloat:
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    vspltisw v2, 1
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    vspltisw v3, 8
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @IThreadLocalVarInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwzx r5, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r5, r5, 9
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stwx r5, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    xvcvsxwdp vs0, vs34
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C10(r2) # target-flags(ppc-tlsld) @TLVFloat
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfsx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    fadds f0, f1, f0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    xvcvsxwdp vs1, vs35
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    fadds f0, f0, f1
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stfsx f0, r3, r4
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
 ;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadStore1:
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: testFloat:
 ; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    vspltisw v2, 1
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C10 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    vspltisw v3, 8
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    xvcvsxwdp vs0, vs34
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C10 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfsx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    fadds f0, f1, f0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    xvcvsxwdp vs1, vs35
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    fadds f0, f0, f1
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stfsx f0, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TLVFloat)
+  %1 = load float, ptr %0, align 4
+  %inc = fadd float %1, 1.000000e+00
+  %add = fadd float %inc, 8.000000e+00
+  store float %add, ptr %0, align 4
+  ret void
+}
+
+define void @testDouble(double noundef %x) {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: testDouble:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C11(r2) # target-flags(ppc-tlsld) @InternalTLVDouble
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stfdx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: testDouble:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C11 at u(r2)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwzx r5, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r5, r5, 9
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stwx r5, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C11 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stfdx f1, r3, r4
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
 entry:
-  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit)
-  %1 = load i32, ptr %0, align 4
-  %add = add nsw i32 %1, 9
-  store i32 %add, ptr %0, align 4
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @InternalTLVDouble)
+  store double %x, ptr %0, align 8
   ret void
 }

>From b297e872e283bb5dd0cb450b9f8133f181f7be1a Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Wed, 20 Mar 2024 04:38:05 -0400
Subject: [PATCH 3/4] Simplify to two cases: one to show the IR change, and the
 other to show object.

---
 ...aix-small-local-dynamic-tls-largeaccess.ll |  273 ++---
 ...ix-small-local-dynamic-tls-largeaccess2.ll |  219 ----
 .../aix-small-local-dynamic-tls-types.ll      |  351 +-----
 .../CodeGen/PowerPC/aix-tls-ld-ldst-O0.ll     | 1031 -----------------
 4 files changed, 111 insertions(+), 1763 deletions(-)
 delete mode 100644 llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-largeaccess2.ll
 delete mode 100644 llvm/test/CodeGen/PowerPC/aix-tls-ld-ldst-O0.ll

diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-largeaccess.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-largeaccess.ll
index 643ad01a9ae3f2..4c866dbe74819a 100644
--- a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-largeaccess.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-largeaccess.ll
@@ -13,102 +13,40 @@
 ; RUN:      --code-model=large -filetype=obj -o %t.o < %s
 ; RUN: llvm-objdump -D -r --symbol-description %t.o | FileCheck -D#NFA=2 --check-prefix=DIS %s
 
- at mySmallLocalDynamicTLSv1 = thread_local(localdynamic) global [8187 x i32] zeroinitializer, align 4
- at mySmallLocalDynamicTLS2 = thread_local(localdynamic) global [4000 x i32] zeroinitializer, align 4
- at mySmallLocalDynamicTLS3 = thread_local(localdynamic) global [4000 x i32] zeroinitializer, align 4
- at mySmallLocalDynamicTLS4 = thread_local(localdynamic) global [4000 x i32] zeroinitializer, align 4
- at mySmallLocalDynamicTLS5 = thread_local(localdynamic) global [4000 x i32] zeroinitializer, align 4
- at mySmallLocalDynamicTLSv2 = thread_local(localdynamic) global [9000 x i32] zeroinitializer, align 4
+ at ElementIntTLSv1 = thread_local(localdynamic) global [8187 x i32] zeroinitializer, align 4  ; Within 32K
+ at ElementIntTLS2 = thread_local(localdynamic) global [4000 x i32] zeroinitializer, align 4
+ at ElementIntTLS3 = thread_local(localdynamic) global [4000 x i32] zeroinitializer, align 4
+ at ElementIntTLS4 = thread_local(localdynamic) global [4000 x i32] zeroinitializer, align 4
+ at ElementIntTLS5 = thread_local(localdynamic) global [4000 x i32] zeroinitializer, align 4
+ at ElementIntTLSv2 = thread_local(localdynamic) global [9000 x i32] zeroinitializer, align 4  ; Beyond 32K
+
+ at ElementLongTLS6 = external thread_local(localdynamic) global [60 x i64], align 8
+ at ElementLongTLS2 = thread_local(localdynamic) global [3000 x i64] zeroinitializer, align 8  ; Within 32K
+ at MyTLSGDVar = thread_local global [800 x i64] zeroinitializer, align 8
+ at ElementLongTLS3 = thread_local(localdynamic) global [3000 x i64] zeroinitializer, align 8
+ at ElementLongTLS4 = thread_local(localdynamic) global [3000 x i64] zeroinitializer, align 8
+ at ElementLongTLS5 = thread_local(localdynamic) global [3000 x i64] zeroinitializer, align 8
+ at ElementLongTLS = thread_local(localdynamic) local_unnamed_addr global [7800 x i64] zeroinitializer, align 8  ; Beyond 32K
+
 declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
 
 ; All accesses use a "faster" local-dynamic sequence directly off the thread pointer.
-define signext i32 @StoreArrays1() {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: StoreArrays1:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r6, L..C1(r2) # target-flags(ppc-tlsld) @mySmallLocalDynamicTLS2
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r7, L..C2(r2) # target-flags(ppc-tlsld) @mySmallLocalDynamicTLS3
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r8, L..C3(r2) # target-flags(ppc-tlsld) @mySmallLocalDynamicTLS4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r9, L..C4(r2) # target-flags(ppc-tlsld) @mySmallLocalDynamicTLS5
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r5, L..C5(r2) # target-flags(ppc-tlsld) @mySmallLocalDynamicTLSv1
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 1
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r6, r3, r6
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r7, r3, r7
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r8, r3, r8
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r9, r3, r9
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stwux r4, r3, r5
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r4, 24(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 2
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r3, 320(r6)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 3
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r3, 324(r7)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 88
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r4, 328(r8)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r3, 332(r9)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 102
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: StoreArrays1:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C1 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r7, L..C2 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r8, L..C3 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r9, L..C4 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r7, L..C2 at l(r7)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r8, L..C3 at l(r8)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r9, L..C4 at l(r9)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r5, L..C1 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C5 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 1
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r6, L..C5 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r7, r3, r7
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r8, r3, r8
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r9, r3, r9
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r6, r3, r6
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stwux r4, r3, r5
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r4, 24(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 2
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r3, 320(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 3
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r3, 324(r7)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 88
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r4, 328(r8)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r3, 332(r9)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 102
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+define signext i32 @test1() {
 entry:
-  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalDynamicTLSv1)
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ElementIntTLSv1)
   store i32 1, ptr %0, align 4
   %arrayidx1 = getelementptr inbounds [8187 x i32], ptr %0, i64 0, i64 6
   store i32 4, ptr %arrayidx1, align 4
-  %1 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalDynamicTLS2)
+  %1 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ElementIntTLS2)
   %arrayidx2 = getelementptr inbounds [4000 x i32], ptr %1, i64 0, i64 80
   store i32 2, ptr %arrayidx2, align 4
-  %2 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalDynamicTLS3)
+  %2 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ElementIntTLS3)
   %arrayidx3 = getelementptr inbounds [4000 x i32], ptr %2, i64 0, i64 81
   store i32 3, ptr %arrayidx3, align 4
-  %3 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalDynamicTLS4)
+  %3 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ElementIntTLS4)
   %arrayidx4 = getelementptr inbounds [4000 x i32], ptr %3, i64 0, i64 82
   store i32 4, ptr %arrayidx4, align 4
-  %4 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalDynamicTLS5)
+  %4 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ElementIntTLS5)
   %arrayidx5 = getelementptr inbounds [4000 x i32], ptr %4, i64 0, i64 83
   store i32 88, ptr %arrayidx5, align 4
   %5 = load i32, ptr %0, align 4
@@ -125,93 +63,22 @@ entry:
 }
 
 ; Example of one access using the regular local-dynamic access from the TOC.
-define signext i32 @StoreArrays2() {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: StoreArrays2:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r6, L..C1(r2) # target-flags(ppc-tlsld) @mySmallLocalDynamicTLS2
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r7, L..C2(r2) # target-flags(ppc-tlsld) @mySmallLocalDynamicTLS3
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r8, L..C3(r2) # target-flags(ppc-tlsld) @mySmallLocalDynamicTLS4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r9, L..C4(r2) # target-flags(ppc-tlsld) @mySmallLocalDynamicTLS5
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r5, L..C6(r2) # target-flags(ppc-tlsld) @mySmallLocalDynamicTLSv2
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 1
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r6, r3, r6
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r7, r3, r7
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r8, r3, r8
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r9, r3, r9
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stwux r4, r3, r5
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r4, 24(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 2
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r3, 320(r6)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 3
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r3, 324(r7)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 88
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r4, 328(r8)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r3, 332(r9)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 102
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: StoreArrays2:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C6 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r7, L..C2 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r8, L..C3 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r9, L..C4 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r7, L..C2 at l(r7)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r8, L..C3 at l(r8)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r9, L..C4 at l(r9)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r5, L..C6 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C5 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 1
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r6, L..C5 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r7, r3, r7
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r8, r3, r8
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r9, r3, r9
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r6, r3, r6
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stwux r4, r3, r5
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r4, 24(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 2
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r3, 320(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 3
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r3, 324(r7)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 88
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r4, 328(r8)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r3, 332(r9)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 102
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+define signext i32 @test2() {
 entry:
-  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalDynamicTLSv2)
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ElementIntTLSv2)
   store i32 1, ptr %0, align 4
   %arrayidx1 = getelementptr inbounds [9000 x i32], ptr %0, i64 0, i64 6
   store i32 4, ptr %arrayidx1, align 4
-  %1 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalDynamicTLS2)
+  %1 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ElementIntTLS2)
   %arrayidx2 = getelementptr inbounds [4000 x i32], ptr %1, i64 0, i64 80
   store i32 2, ptr %arrayidx2, align 4
-  %2 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalDynamicTLS3)
+  %2 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ElementIntTLS3)
   %arrayidx3 = getelementptr inbounds [4000 x i32], ptr %2, i64 0, i64 81
   store i32 3, ptr %arrayidx3, align 4
-  %3 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalDynamicTLS4)
+  %3 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ElementIntTLS4)
   %arrayidx4 = getelementptr inbounds [4000 x i32], ptr %3, i64 0, i64 82
   store i32 4, ptr %arrayidx4, align 4
-  %4 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @mySmallLocalDynamicTLS5)
+  %4 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ElementIntTLS5)
   %arrayidx5 = getelementptr inbounds [4000 x i32], ptr %4, i64 0, i64 83
   store i32 88, ptr %arrayidx5, align 4
   %5 = load i32, ptr %0, align 4
@@ -227,6 +94,36 @@ entry:
   ret i32 %add15
 }
 
+; All accesses use a "faster" local-dynamic sequence directly off the thread pointer.
+define i64 @test3() {
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ElementLongTLS6)
+  %arrayidx = getelementptr inbounds [60 x i64], ptr %0, i64 0, i64 53
+  store i64 212, ptr %arrayidx, align 8
+  %1 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ElementLongTLS2)
+  %arrayidx1 = getelementptr inbounds [3000 x i64], ptr %1, i64 0, i64 150
+  store i64 203, ptr %arrayidx1, align 8
+  %2 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @MyTLSGDVar)
+  %arrayidx2 = getelementptr inbounds [800 x i64], ptr %2, i64 0, i64 55
+  store i64 44, ptr %arrayidx2, align 8
+  %3 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ElementLongTLS3)
+  %arrayidx3 = getelementptr inbounds [3000 x i64], ptr %3, i64 0, i64 250
+  store i64 6, ptr %arrayidx3, align 8
+  %4 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ElementLongTLS4)
+  %arrayidx4 = getelementptr inbounds [3000 x i64], ptr %4, i64 0, i64 850
+  store i64 100, ptr %arrayidx4, align 8
+  %5 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ElementLongTLS5)
+  %arrayidx5 = getelementptr inbounds [3000 x i64], ptr %5, i64 0, i64 1050
+  store i64 882, ptr %arrayidx5, align 8
+  %6 = load i64, ptr %arrayidx1, align 8
+  %7 = load i64, ptr %arrayidx3, align 8
+  %8 = load i64, ptr %arrayidx4, align 8
+  %add = add i64 %6, 882
+  %add9 = add i64 %add, %7
+  %add11 = add i64 %add9, %8
+  ret i64 %add11
+}
+
 ; DIS:      file format aix5coff64-rs6000
 ; DIS:      Disassembly of section .text:
 ; DIS:      0000000000000000 (idx: [[#NFA+5]]) .StoreArrays1:
@@ -235,31 +132,31 @@ entry:
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
 ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+15]]) _$TLSML[TC]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 6, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+17]]) mySmallLocalDynamicTLSv1[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+17]]) ElementIntTLSv1[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                std 0, 64(1)
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 7, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+19]]) mySmallLocalDynamicTLS3[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+19]]) ElementIntTLS3[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 3, 0(3)
 ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+15]]) _$TLSML[TC]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 8, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+21]]) mySmallLocalDynamicTLS4[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+21]]) ElementIntTLS4[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 9, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+23]]) mySmallLocalDynamicTLS5[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+23]]) ElementIntTLS5[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 7, 16(7)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+19]]) mySmallLocalDynamicTLS3[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+19]]) ElementIntTLS3[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 8, 24(8)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+21]]) mySmallLocalDynamicTLS4[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+21]]) ElementIntTLS4[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 9, 32(9)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+23]]) mySmallLocalDynamicTLS5[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+23]]) ElementIntTLS5[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                bla 0x0
 ; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA              (idx: [[#NFA+1]]) .__tls_get_mod[PR]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 5, 8(6)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+17]]) mySmallLocalDynamicTLSv1[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+17]]) ElementIntTLSv1[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 6, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+25]]) mySmallLocalDynamicTLS2[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+25]]) ElementIntTLS2[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                li 4, 1
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 6, 40(6)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+25]]) mySmallLocalDynamicTLS2[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+25]]) ElementIntTLS2[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                add 7, 3, 7
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                add 8, 3, 8
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                add 9, 3, 9
@@ -286,31 +183,31 @@ entry:
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
 ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+15]]) _$TLSML[TC]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 6, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+27]]) mySmallLocalDynamicTLSv2[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+27]]) ElementIntTLSv2[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                std 0, 64(1)
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 7, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+19]]) mySmallLocalDynamicTLS3[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+19]]) ElementIntTLS3[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 3, 0(3)
 ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+15]]) _$TLSML[TC]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 8, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+21]]) mySmallLocalDynamicTLS4[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+21]]) ElementIntTLS4[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 9, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+23]]) mySmallLocalDynamicTLS5[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+23]]) ElementIntTLS5[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 7, 16(7)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+19]]) mySmallLocalDynamicTLS3[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+19]]) ElementIntTLS3[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 8, 24(8)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+21]]) mySmallLocalDynamicTLS4[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+21]]) ElementIntTLS4[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 9, 32(9)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+23]]) mySmallLocalDynamicTLS5[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+23]]) ElementIntTLS5[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                bla 0x0
 ; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA              (idx: [[#NFA+1]]) .__tls_get_mod[PR]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 5, 48(6)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+27]]) mySmallLocalDynamicTLSv2[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+27]]) ElementIntTLSv2[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 6, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU	        (idx: [[#NFA+25]]) mySmallLocalDynamicTLS2[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU	        (idx: [[#NFA+25]]) ElementIntTLS2[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                li 4, 1
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 6, 40(6)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+25]]) mySmallLocalDynamicTLS2[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+25]]) ElementIntTLS2[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                add 7, 3, 7
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                add 8, 3, 8
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                add 9, 3, 9
@@ -349,15 +246,15 @@ entry:
 ; DIS-NEXT: 0000000000000140:  R_POS        (idx: [[#NFA+13]]) TOC[TC0]
 ; DIS-NEXT:      144: 00 00 01 50
 
-; DIS:      0000000000000180 (idx: [[#NFA+27]]) mySmallLocalDynamicTLSv2[TE]:
+; DIS:      0000000000000180 (idx: [[#NFA+27]]) ElementIntTLSv2[TE]:
 ; DIS-NEXT:      180: 00 00 00 00
-; DIS-NEXT: 0000000000000180:  R_TLS_LD     (idx: [[#NFA+39]]) mySmallLocalDynamicTLSv2[TL]
+; DIS-NEXT: 0000000000000180:  R_TLS_LD     (idx: [[#NFA+39]]) ElementIntTLSv2[TL]
 ; DIS-NEXT:      184: 00 01 79 ec
 
 ; DIS:      Disassembly of section .tdata:
-; DIS:      0000000000000000 (idx: [[#NFA+29]]) mySmallLocalDynamicTLSv1[TL]:
-; DIS:      0000000000007fec (idx: [[#NFA+31]]) mySmallLocalDynamicTLS2[TL]:
-; DIS:      000000000000be6c (idx: [[#NFA+33]]) mySmallLocalDynamicTLS3[TL]:
-; DIS:      000000000000fcec (idx: [[#NFA+35]]) mySmallLocalDynamicTLS4[TL]:
-; DIS:      0000000000013b6c (idx: [[#NFA+37]]) mySmallLocalDynamicTLS5[TL]:
-; DIS:      00000000000179ec (idx: [[#NFA+39]]) mySmallLocalDynamicTLSv2[TL]:
+; DIS:      0000000000000000 (idx: [[#NFA+29]]) ElementIntTLSv1[TL]:
+; DIS:      0000000000007fec (idx: [[#NFA+31]]) ElementIntTLS2[TL]:
+; DIS:      000000000000be6c (idx: [[#NFA+33]]) ElementIntTLS3[TL]:
+; DIS:      000000000000fcec (idx: [[#NFA+35]]) ElementIntTLS4[TL]:
+; DIS:      0000000000013b6c (idx: [[#NFA+37]]) ElementIntTLS5[TL]:
+; DIS:      00000000000179ec (idx: [[#NFA+39]]) ElementIntTLSv2[TL]:
diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-largeaccess2.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-largeaccess2.ll
deleted file mode 100644
index b88a73dd626aab..00000000000000
--- a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-largeaccess2.ll
+++ /dev/null
@@ -1,219 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
-; RUN:      -mtriple powerpc64-ibm-aix-xcoff < %s \
-; RUN:      | FileCheck %s --check-prefix=SMALL-LOCAL-DYNAMIC-SMALLCM64
-; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
-; RUN:      -mtriple powerpc64-ibm-aix-xcoff --code-model=large \
-; RUN:      < %s | FileCheck %s \
-; RUN:      --check-prefix=SMALL-LOCAL-DYNAMIC-LARGECM64
-
-; Test disassembly of object.
-; RUN: llc -verify-machineinstrs -mcpu=pwr7 \
-; RUN:      -mtriple powerpc64-ibm-aix-xcoff -xcoff-traceback-table=false \
-; RUN:      --code-model=large -filetype=obj -o %t.o < %s
-; RUN: llvm-objdump -D -r --symbol-description %t.o | FileCheck -D#NFA=2 --check-prefix=DIS %s
-
- at mySmallLocalDynamicTLS6 = external thread_local(localdynamic) global [60 x i64], align 8
- at mySmallLocalDynamicTLS2 = thread_local(localdynamic) global [3000 x i64] zeroinitializer, align 8
- at MyTLSGDVar = thread_local global [800 x i64] zeroinitializer, align 8
- at mySmallLocalDynamicTLS3 = thread_local(localdynamic) global [3000 x i64] zeroinitializer, align 8
- at mySmallLocalDynamicTLS4 = thread_local(localdynamic) global [3000 x i64] zeroinitializer, align 8
- at mySmallLocalDynamicTLS5 = thread_local(localdynamic) global [3000 x i64] zeroinitializer, align 8
- at mySmallLocalDynamicTLS = thread_local(localdynamic) local_unnamed_addr global [7800 x i64] zeroinitializer, align 8
-declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
-
-; All accesses use a "faster" local-dynamic sequence directly off the thread pointer.
-define i64 @StoreLargeAccess1() {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: StoreLargeAccess1:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C1(r2) # target-flags(ppc-tlsld) @mySmallLocalDynamicTLS6
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mr r6, r3
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 212
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r4, r6, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r3, 424(r4)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C2(r2) # target-flags(ppc-tlsld) @mySmallLocalDynamicTLS2
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 203
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r6, r3
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r4, 1200(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C3(r2) # target-flags(ppc-tlsgdm) @MyTLSGDVar
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C4(r2) # target-flags(ppc-tlsgd) @MyTLSGDVar
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_addr[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 44
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r4, 440(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C5(r2) # target-flags(ppc-tlsld) @mySmallLocalDynamicTLS3
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 6
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r6, r3
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r4, 2000(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C6(r2) # target-flags(ppc-tlsld) @mySmallLocalDynamicTLS4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 100
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r4, r6, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r3, 6800(r4)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C7(r2) # target-flags(ppc-tlsld) @mySmallLocalDynamicTLS5
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 882
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r6, r3
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r4, 8400(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 1191
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: StoreLargeAccess1:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r7, L..C1 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 212
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mr r6, r3
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C1 at l(r7)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r6, r3
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r4, 424(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C2 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 203
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C2 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r6, r3
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r4, 1200(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C3 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r4, L..C4 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C3 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C4 at l(r4)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_addr[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 44
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r4, 440(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C5 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 6
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C5 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r6, r3
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r4, 2000(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C6 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 100
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C6 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r6, r3
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r4, 6800(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C7 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 882
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C7 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r6, r3
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r4, 8400(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 1191
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalDynamicTLS6)
-  %arrayidx = getelementptr inbounds [60 x i64], ptr %0, i64 0, i64 53
-  store i64 212, ptr %arrayidx, align 8
-  %1 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalDynamicTLS2)
-  %arrayidx1 = getelementptr inbounds [3000 x i64], ptr %1, i64 0, i64 150
-  store i64 203, ptr %arrayidx1, align 8
-  %2 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @MyTLSGDVar)
-  %arrayidx2 = getelementptr inbounds [800 x i64], ptr %2, i64 0, i64 55
-  store i64 44, ptr %arrayidx2, align 8
-  %3 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalDynamicTLS3)
-  %arrayidx3 = getelementptr inbounds [3000 x i64], ptr %3, i64 0, i64 250
-  store i64 6, ptr %arrayidx3, align 8
-  %4 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalDynamicTLS4)
-  %arrayidx4 = getelementptr inbounds [3000 x i64], ptr %4, i64 0, i64 850
-  store i64 100, ptr %arrayidx4, align 8
-  %5 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalDynamicTLS5)
-  %arrayidx5 = getelementptr inbounds [3000 x i64], ptr %5, i64 0, i64 1050
-  store i64 882, ptr %arrayidx5, align 8
-  %6 = load i64, ptr %arrayidx1, align 8
-  %7 = load i64, ptr %arrayidx3, align 8
-  %8 = load i64, ptr %arrayidx4, align 8
-  %add = add i64 %6, 882
-  %add9 = add i64 %add, %7
-  %add11 = add i64 %add9, %8
-  ret i64 %add11
-}
-
-; DIS:      0000000000000000 (idx: [[#NFA+9]]) .StoreLargeAccess1:
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                mflr 0
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                stdu 1, -48(1)
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+15]]) _$TLSML[TC]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                std 0, 64(1)
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 7, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+17]]) mySmallLocalDynamicTLS6[TE]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 3, 0(3)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+15]]) _$TLSML[TC]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                bla 0x0
-; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA              (idx: [[#NFA+1]]) .__tls_get_mod[PR]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                li 4, 212
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                mr 6, 3
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 3, 8(7)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+17]]) mySmallLocalDynamicTLS6[TE]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                add 3, 6, 3
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                std 4, 424(3)
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+19]]) mySmallLocalDynamicTLS2[TE]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                li 4, 203
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 3, 16(3)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+19]]) mySmallLocalDynamicTLS2[TE]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                add 3, 6, 3
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                std 4, 1200(3)
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+21]]) .MyTLSGDVar[TE]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 4, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+23]]) MyTLSGDVar[TE]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 3, 24(3)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+21]]) .MyTLSGDVar[TE]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 4, 32(4)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+23]]) MyTLSGDVar[TE]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                bla 0x0
-; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA              (idx: [[#NFA+3]]) .__tls_get_addr[PR]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                li 4, 44
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                std 4, 440(3)
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+25]]) mySmallLocalDynamicTLS3[TE]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                li 4, 6
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 3, 40(3)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+25]]) mySmallLocalDynamicTLS3[TE]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                add 3, 6, 3
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                std 4, 2000(3)
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+27]]) mySmallLocalDynamicTLS4[TE]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                li 4, 100
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 3, 48(3)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+27]]) mySmallLocalDynamicTLS4[TE]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                add 3, 6, 3
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                std 4, 6800(3)
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+29]]) mySmallLocalDynamicTLS5[TE]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                li 4, 882
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 3, 56(3)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+29]]) mySmallLocalDynamicTLS5[TE]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                add 3, 6, 3
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                std 4, 8400(3)
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                li 3, 1191
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addi 1, 1, 48
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 0, 16(1)
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                mtlr 0
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                blr
-
-; DIS:      Disassembly of section .data:
-; DIS:      00000000000000b0 (idx: [[#NFA+11]]) StoreLargeAccess1[DS]:
-; DIS-NEXT:       b0: 00 00 00 00
-; DIS-NEXT: 00000000000000b0:  R_POS    (idx: [[#NFA+9]]) .StoreLargeAccess1
-; DIS-NEXT:       b4: 00 00 00 00
-; DIS-NEXT:       b8: 00 00 00 00
-; DIS-NEXT: 00000000000000b8:  R_POS        (idx: [[#NFA+13]]) TOC[TC0]
-; DIS-NEXT:       bc: 00 00 00 c8
-
-; DIS:      Disassembly of section .tdata:
-; DIS:      0000000000000000 (idx: [[#NFA+31]]) mySmallLocalDynamicTLS2[TL]:
-; DIS:      0000000000005dc0 (idx: [[#NFA+33]]) MyTLSGDVar[TL]:
-; DIS:      00000000000076c0 (idx: [[#NFA+35]]) mySmallLocalDynamicTLS3[TL]:
-; DIS:      000000000000d480 (idx: [[#NFA+37]]) mySmallLocalDynamicTLS4[TL]:
-; DIS:      0000000000013240 (idx: [[#NFA+39]]) mySmallLocalDynamicTLS5[TL]:
-; DIS:      0000000000019000 (idx: [[#NFA+41]]) mySmallLocalDynamicTLS[TL]:
diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-types.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-types.ll
index 434fda645beb88..c6defd8637fd0e 100644
--- a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-types.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-types.ll
@@ -24,37 +24,6 @@ declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
 @a = thread_local(localdynamic) global [87 x i32] zeroinitializer, align 4
 
 define nonnull ptr @AddrTest1() local_unnamed_addr #0 {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: AddrTest1:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C1(r2) # target-flags(ppc-tlsld) @a
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r3, r3, 12
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: AddrTest1:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C1 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C1 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r3, r3, 12
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
 entry:
   %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @a)
   %arrayidx = getelementptr inbounds [87 x i32], ptr %0, i64 0, i64 3
@@ -62,35 +31,6 @@ entry:
 }
 
 define signext i32 @testUnaligned() {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: testUnaligned:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @ThreadLocalStruct
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwax r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: testUnaligned:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwax r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
 entry:
   %0 = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @ThreadLocalStruct)
   %x = getelementptr inbounds %struct.anon, ptr %0, i32 0, i32 0
@@ -99,37 +39,6 @@ entry:
 }
 
 define void @testChar(i8 noundef signext %x) {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: testChar:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mr r6, r3
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @TLVChar
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stbx r6, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: testChar:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mr r6, r3
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r7, L..C3 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r7)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stbx r6, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
 entry:
   %0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @TLVChar)
   store i8 %x, ptr %0, align 1
@@ -137,37 +46,6 @@ entry:
 }
 
 define void @testShort(i16 noundef signext %x) {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: testShort:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mr r6, r3
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C4(r2) # target-flags(ppc-tlsld) @TLVShort
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    sthx r6, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: testShort:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mr r6, r3
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r7, L..C4 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C4 at l(r7)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    sthx r6, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
 entry:
   %0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @TLVShort)
   store i16 %x, ptr %0, align 2
@@ -175,35 +53,6 @@ entry:
 }
 
 define signext i32 @testInt1() {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: testInt1:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C5(r2) # target-flags(ppc-tlsld) @TLVInt
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwax r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: testInt1:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C5 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C5 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwax r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
 entry:
   %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TLVInt)
   %1 = load i32, ptr %0, align 4
@@ -211,44 +60,6 @@ entry:
 }
 
 define signext i32 @testInt2() {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: testInt2:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C6(r2) # target-flags(ppc-tlsld) @InternalTLVInt
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwzx r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C7(r2) # @TLVIntInit
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwz r4, 0(r4)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r4, r3
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    extsw r3, r3
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: testInt2:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C6 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C6 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwzx r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r4, L..C7 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C7 at l(r4)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwz r4, 0(r4)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r4, r3
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    extsw r3, r3
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
 entry:
   %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @InternalTLVInt)
   %1 = load i32, ptr %0, align 4
@@ -258,35 +69,6 @@ entry:
 }
 
 define signext i64 @testLong1() {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: testLong1:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C8(r2) # target-flags(ppc-tlsld) @TLVLong
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ldx r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: testLong1:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C8 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C8 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ldx r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
 entry:
   %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @TLVLong)
   %1 = load i64, ptr %0, align 4
@@ -294,39 +76,6 @@ entry:
 }
 
 define void @testLong2(i64 noundef signext %x) {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: testLong2:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C9(r2) # target-flags(ppc-tlsld) @InternalTLVLong
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ldx r5, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r5, r5, 9
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdx r5, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: testLong2:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C9 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C9 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ldx r5, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r5, r5, 9
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdx r5, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
 entry:
   %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @InternalTLVLong)
   %1 = load i64, ptr %0, align 8
@@ -335,50 +84,15 @@ entry:
   ret void
 }
 
-define void @testFloat(float noundef %x) {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: testFloat:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    vspltisw v2, 1
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    vspltisw v3, 8
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    xvcvsxwdp vs0, vs34
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C10(r2) # target-flags(ppc-tlsld) @TLVFloat
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfsx f1, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    fadds f0, f1, f0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    xvcvsxwdp vs1, vs35
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    fadds f0, f0, f1
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stfsx f0, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: testFloat:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    vspltisw v2, 1
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C10 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    vspltisw v3, 8
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    xvcvsxwdp vs0, vs34
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C10 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfsx f1, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    fadds f0, f1, f0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    xvcvsxwdp vs1, vs35
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    fadds f0, f0, f1
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stfsx f0, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+define i32 @testLong3() {
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @TLVLong)
+  %1 = load i64, ptr %0, align 8
+  %conv = trunc i64 %1 to i32
+  ret i32 %conv
+}
+
+define void @testFloat1(float noundef %x) {
 entry:
   %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TLVFloat)
   %1 = load float, ptr %0, align 4
@@ -388,38 +102,25 @@ entry:
   ret void
 }
 
-define void @testDouble(double noundef %x) {
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: testDouble:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C11(r2) # target-flags(ppc-tlsld) @InternalTLVDouble
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stfdx f1, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: testDouble:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C11 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C11 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stfdx f1, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+define i32 @testFloat2() {
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TLVFloat)
+  %1 = load float, ptr %0, align 4
+  %conv = fptosi float %1 to i32
+  ret i32 %conv
+}
+
+define void @testDouble1(double noundef %x) {
 entry:
   %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @InternalTLVDouble)
   store double %x, ptr %0, align 8
   ret void
 }
+
+define i32 @testDouble2() {
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @InternalTLVDouble)
+  %1 = load double, ptr %0, align 8
+  %conv = fptosi double %1 to i32
+  ret i32 %conv
+}
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-ld-ldst-O0.ll b/llvm/test/CodeGen/PowerPC/aix-tls-ld-ldst-O0.ll
deleted file mode 100644
index 53aa47072de810..00000000000000
--- a/llvm/test/CodeGen/PowerPC/aix-tls-ld-ldst-O0.ll
+++ /dev/null
@@ -1,1031 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: llc  -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
-; RUN:      -mtriple powerpc64-ibm-aix-xcoff < %s | FileCheck %s \
-; RUN:      --check-prefix=SMALL64-O0
-; RUN: llc  -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
-; RUN:      -mtriple powerpc64-ibm-aix-xcoff --code-model=large < %s \
-; RUN:      | FileCheck %s --check-prefix=LARGE64-O0
-; RUN: llc  -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
-; RUN:      -mtriple powerpc-ibm-aix-xcoff < %s | FileCheck %s \
-; RUN:      --check-prefix=SMALL32-O0
-; RUN: llc  -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
-; RUN:      -mtriple powerpc-ibm-aix-xcoff --code-model=large < %s \
-; RUN:      | FileCheck %s --check-prefix=LARGE32-O0
-; RUN: llc  -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
-; RUN:      -mtriple powerpc64-ibm-aix-xcoff < %s \
-; RUN:      | FileCheck %s --check-prefix=SMALL-LOCAL-DYNAMIC-SMALLCM64
-; RUN: llc  -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
-; RUN:      -mtriple powerpc64-ibm-aix-xcoff --code-model=large \
-; RUN:      < %s | FileCheck %s \
-; RUN:      --check-prefix=SMALL-LOCAL-DYNAMIC-LARGECM64
-
- at TLInt = internal thread_local(localdynamic) global i32 0, align 4
- at TLLongLong = internal thread_local(localdynamic) global i64 0, align 8
- at TLDouble = internal thread_local(localdynamic) global double 0.000000e+00, align 8
- at TLFloat = internal thread_local(localdynamic) global float 0.000000e+00, align 4
-declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
-
-define void @storeInt(i32 noundef %x) {
-; SMALL64-O0-LABEL: storeInt:
-; SMALL64-O0:       # %bb.0: # %entry
-; SMALL64-O0-NEXT:    mflr r0
-; SMALL64-O0-NEXT:    stdu r1, -64(r1)
-; SMALL64-O0-NEXT:    std r0, 80(r1)
-; SMALL64-O0-NEXT:    # kill: def $r3 killed $r3 killed $x3
-; SMALL64-O0-NEXT:    stw r3, 60(r1) # 4-byte Folded Spill
-; SMALL64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL64-O0-NEXT:    bla .__tls_get_mod[PR]
-; SMALL64-O0-NEXT:    mr r4, r3
-; SMALL64-O0-NEXT:    lwz r3, 60(r1) # 4-byte Folded Reload
-; SMALL64-O0-NEXT:    ld r5, L..C1(r2) # target-flags(ppc-tlsld) @TLInt
-; SMALL64-O0-NEXT:    add r4, r4, r5
-; SMALL64-O0-NEXT:    stw r3, 0(r4)
-; SMALL64-O0-NEXT:    addi r1, r1, 64
-; SMALL64-O0-NEXT:    ld r0, 16(r1)
-; SMALL64-O0-NEXT:    mtlr r0
-; SMALL64-O0-NEXT:    blr
-;
-; LARGE64-O0-LABEL: storeInt:
-; LARGE64-O0:       # %bb.0: # %entry
-; LARGE64-O0-NEXT:    mflr r0
-; LARGE64-O0-NEXT:    stdu r1, -64(r1)
-; LARGE64-O0-NEXT:    std r0, 80(r1)
-; LARGE64-O0-NEXT:    # kill: def $r3 killed $r3 killed $x3
-; LARGE64-O0-NEXT:    stw r3, 60(r1) # 4-byte Folded Spill
-; LARGE64-O0-NEXT:    addis r3, L..C0 at u(r2)
-; LARGE64-O0-NEXT:    std r3, 48(r1) # 8-byte Folded Spill
-; LARGE64-O0-NEXT:    addis r3, L..C1 at u(r2)
-; LARGE64-O0-NEXT:    ld r3, L..C1 at l(r3)
-; LARGE64-O0-NEXT:    bla .__tls_get_mod[PR]
-; LARGE64-O0-NEXT:    ld r5, 48(r1) # 8-byte Folded Reload
-; LARGE64-O0-NEXT:    mr r4, r3
-; LARGE64-O0-NEXT:    lwz r3, 60(r1) # 4-byte Folded Reload
-; LARGE64-O0-NEXT:    ld r5, L..C0 at l(r5)
-; LARGE64-O0-NEXT:    add r4, r4, r5
-; LARGE64-O0-NEXT:    stw r3, 0(r4)
-; LARGE64-O0-NEXT:    addi r1, r1, 64
-; LARGE64-O0-NEXT:    ld r0, 16(r1)
-; LARGE64-O0-NEXT:    mtlr r0
-; LARGE64-O0-NEXT:    blr
-;
-; SMALL32-O0-LABEL: storeInt:
-; SMALL32-O0:       # %bb.0: # %entry
-; SMALL32-O0-NEXT:    mflr r0
-; SMALL32-O0-NEXT:    stwu r1, -32(r1)
-; SMALL32-O0-NEXT:    stw r0, 40(r1)
-; SMALL32-O0-NEXT:    stw r3, 28(r1) # 4-byte Folded Spill
-; SMALL32-O0-NEXT:    lwz r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL32-O0-NEXT:    bla .__tls_get_mod[PR]
-; SMALL32-O0-NEXT:    mr r4, r3
-; SMALL32-O0-NEXT:    lwz r3, 28(r1) # 4-byte Folded Reload
-; SMALL32-O0-NEXT:    lwz r5, L..C1(r2) # target-flags(ppc-tlsld) @TLInt
-; SMALL32-O0-NEXT:    stwx r3, r4, r5
-; SMALL32-O0-NEXT:    addi r1, r1, 32
-; SMALL32-O0-NEXT:    lwz r0, 8(r1)
-; SMALL32-O0-NEXT:    mtlr r0
-; SMALL32-O0-NEXT:    blr
-;
-; LARGE32-O0-LABEL: storeInt:
-; LARGE32-O0:       # %bb.0: # %entry
-; LARGE32-O0-NEXT:    mflr r0
-; LARGE32-O0-NEXT:    stwu r1, -32(r1)
-; LARGE32-O0-NEXT:    stw r0, 40(r1)
-; LARGE32-O0-NEXT:    stw r3, 28(r1) # 4-byte Folded Spill
-; LARGE32-O0-NEXT:    addis r3, L..C0 at u(r2)
-; LARGE32-O0-NEXT:    stw r3, 24(r1) # 4-byte Folded Spill
-; LARGE32-O0-NEXT:    addis r3, L..C1 at u(r2)
-; LARGE32-O0-NEXT:    lwz r3, L..C1 at l(r3)
-; LARGE32-O0-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-O0-NEXT:    lwz r5, 24(r1) # 4-byte Folded Reload
-; LARGE32-O0-NEXT:    mr r4, r3
-; LARGE32-O0-NEXT:    lwz r3, 28(r1) # 4-byte Folded Reload
-; LARGE32-O0-NEXT:    lwz r5, L..C0 at l(r5)
-; LARGE32-O0-NEXT:    stwx r3, r4, r5
-; LARGE32-O0-NEXT:    addi r1, r1, 32
-; LARGE32-O0-NEXT:    lwz r0, 8(r1)
-; LARGE32-O0-NEXT:    mtlr r0
-; LARGE32-O0-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: storeInt:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 80(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    # kill: def $r3 killed $r3 killed $x3
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r3, 60(r1) # 4-byte Folded Spill
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mr r4, r3
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwz r3, 60(r1) # 4-byte Folded Reload
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r5, L..C1(r2) # target-flags(ppc-tlsld) @TLInt
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r4, r4, r5
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r3, 0(r4)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 64
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: storeInt:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 80(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    # kill: def $r3 killed $r3 killed $x3
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r3, 60(r1) # 4-byte Folded Spill
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r3, 48(r1) # 8-byte Folded Spill
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C1 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C1 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r5, 48(r1) # 8-byte Folded Reload
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mr r4, r3
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwz r3, 60(r1) # 4-byte Folded Reload
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r5, L..C0 at l(r5)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r4, r4, r5
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r3, 0(r4)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 64
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TLInt)
-  store i32 %x, ptr %0, align 4
-  ret void
-}
-
-define void @storeLongLong(i64 noundef %x) {
-; SMALL64-O0-LABEL: storeLongLong:
-; SMALL64-O0:       # %bb.0: # %entry
-; SMALL64-O0-NEXT:    mflr r0
-; SMALL64-O0-NEXT:    stdu r1, -64(r1)
-; SMALL64-O0-NEXT:    std r0, 80(r1)
-; SMALL64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
-; SMALL64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL64-O0-NEXT:    bla .__tls_get_mod[PR]
-; SMALL64-O0-NEXT:    mr r4, r3
-; SMALL64-O0-NEXT:    ld r3, 56(r1) # 8-byte Folded Reload
-; SMALL64-O0-NEXT:    ld r5, L..C2(r2) # target-flags(ppc-tlsld) @TLLongLong
-; SMALL64-O0-NEXT:    add r4, r4, r5
-; SMALL64-O0-NEXT:    std r3, 0(r4)
-; SMALL64-O0-NEXT:    addi r1, r1, 64
-; SMALL64-O0-NEXT:    ld r0, 16(r1)
-; SMALL64-O0-NEXT:    mtlr r0
-; SMALL64-O0-NEXT:    blr
-;
-; LARGE64-O0-LABEL: storeLongLong:
-; LARGE64-O0:       # %bb.0: # %entry
-; LARGE64-O0-NEXT:    mflr r0
-; LARGE64-O0-NEXT:    stdu r1, -64(r1)
-; LARGE64-O0-NEXT:    std r0, 80(r1)
-; LARGE64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
-; LARGE64-O0-NEXT:    addis r3, L..C2 at u(r2)
-; LARGE64-O0-NEXT:    std r3, 48(r1) # 8-byte Folded Spill
-; LARGE64-O0-NEXT:    addis r3, L..C1 at u(r2)
-; LARGE64-O0-NEXT:    ld r3, L..C1 at l(r3)
-; LARGE64-O0-NEXT:    bla .__tls_get_mod[PR]
-; LARGE64-O0-NEXT:    ld r5, 48(r1) # 8-byte Folded Reload
-; LARGE64-O0-NEXT:    mr r4, r3
-; LARGE64-O0-NEXT:    ld r3, 56(r1) # 8-byte Folded Reload
-; LARGE64-O0-NEXT:    ld r5, L..C2 at l(r5)
-; LARGE64-O0-NEXT:    add r4, r4, r5
-; LARGE64-O0-NEXT:    std r3, 0(r4)
-; LARGE64-O0-NEXT:    addi r1, r1, 64
-; LARGE64-O0-NEXT:    ld r0, 16(r1)
-; LARGE64-O0-NEXT:    mtlr r0
-; LARGE64-O0-NEXT:    blr
-;
-; SMALL32-O0-LABEL: storeLongLong:
-; SMALL32-O0:       # %bb.0: # %entry
-; SMALL32-O0-NEXT:    mflr r0
-; SMALL32-O0-NEXT:    stwu r1, -32(r1)
-; SMALL32-O0-NEXT:    stw r0, 40(r1)
-; SMALL32-O0-NEXT:    stw r4, 28(r1) # 4-byte Folded Spill
-; SMALL32-O0-NEXT:    stw r3, 24(r1) # 4-byte Folded Spill
-; SMALL32-O0-NEXT:    lwz r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL32-O0-NEXT:    bla .__tls_get_mod[PR]
-; SMALL32-O0-NEXT:    lwz r5, 24(r1) # 4-byte Folded Reload
-; SMALL32-O0-NEXT:    lwz r4, 28(r1) # 4-byte Folded Reload
-; SMALL32-O0-NEXT:    lwz r6, L..C2(r2) # target-flags(ppc-tlsld) @TLLongLong
-; SMALL32-O0-NEXT:    stwux r5, r3, r6
-; SMALL32-O0-NEXT:    stw r4, 4(r3)
-; SMALL32-O0-NEXT:    addi r1, r1, 32
-; SMALL32-O0-NEXT:    lwz r0, 8(r1)
-; SMALL32-O0-NEXT:    mtlr r0
-; SMALL32-O0-NEXT:    blr
-;
-; LARGE32-O0-LABEL: storeLongLong:
-; LARGE32-O0:       # %bb.0: # %entry
-; LARGE32-O0-NEXT:    mflr r0
-; LARGE32-O0-NEXT:    stwu r1, -32(r1)
-; LARGE32-O0-NEXT:    stw r0, 40(r1)
-; LARGE32-O0-NEXT:    stw r4, 28(r1) # 4-byte Folded Spill
-; LARGE32-O0-NEXT:    stw r3, 24(r1) # 4-byte Folded Spill
-; LARGE32-O0-NEXT:    addis r6, L..C2 at u(r2)
-; LARGE32-O0-NEXT:    addis r3, L..C1 at u(r2)
-; LARGE32-O0-NEXT:    lwz r3, L..C1 at l(r3)
-; LARGE32-O0-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-O0-NEXT:    lwz r5, 24(r1) # 4-byte Folded Reload
-; LARGE32-O0-NEXT:    lwz r4, 28(r1) # 4-byte Folded Reload
-; LARGE32-O0-NEXT:    lwz r6, L..C2 at l(r6)
-; LARGE32-O0-NEXT:    stwux r5, r3, r6
-; LARGE32-O0-NEXT:    stw r4, 4(r3)
-; LARGE32-O0-NEXT:    addi r1, r1, 32
-; LARGE32-O0-NEXT:    lwz r0, 8(r1)
-; LARGE32-O0-NEXT:    mtlr r0
-; LARGE32-O0-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: storeLongLong:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 80(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mr r4, r3
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, 56(r1) # 8-byte Folded Reload
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r5, L..C2(r2) # target-flags(ppc-tlsld) @TLLongLong
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r4, r4, r5
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r3, 0(r4)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 64
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: storeLongLong:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 80(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C2 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r3, 48(r1) # 8-byte Folded Spill
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C1 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C1 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r5, 48(r1) # 8-byte Folded Reload
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mr r4, r3
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, 56(r1) # 8-byte Folded Reload
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r5, L..C2 at l(r5)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r4, r4, r5
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r3, 0(r4)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 64
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @TLLongLong)
-  store i64 %x, ptr %0, align 8
-  ret void
-}
-
-define void @storeDouble(double noundef %x) {
-; SMALL64-O0-LABEL: storeDouble:
-; SMALL64-O0:       # %bb.0: # %entry
-; SMALL64-O0-NEXT:    mflr r0
-; SMALL64-O0-NEXT:    stdu r1, -48(r1)
-; SMALL64-O0-NEXT:    std r0, 64(r1)
-; SMALL64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL64-O0-NEXT:    bla .__tls_get_mod[PR]
-; SMALL64-O0-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @TLDouble
-; SMALL64-O0-NEXT:    add r3, r3, r4
-; SMALL64-O0-NEXT:    stxsdx f1, 0, r3
-; SMALL64-O0-NEXT:    addi r1, r1, 48
-; SMALL64-O0-NEXT:    ld r0, 16(r1)
-; SMALL64-O0-NEXT:    mtlr r0
-; SMALL64-O0-NEXT:    blr
-;
-; LARGE64-O0-LABEL: storeDouble:
-; LARGE64-O0:       # %bb.0: # %entry
-; LARGE64-O0-NEXT:    mflr r0
-; LARGE64-O0-NEXT:    stdu r1, -64(r1)
-; LARGE64-O0-NEXT:    std r0, 80(r1)
-; LARGE64-O0-NEXT:    addis r3, L..C3 at u(r2)
-; LARGE64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
-; LARGE64-O0-NEXT:    addis r3, L..C1 at u(r2)
-; LARGE64-O0-NEXT:    ld r3, L..C1 at l(r3)
-; LARGE64-O0-NEXT:    bla .__tls_get_mod[PR]
-; LARGE64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
-; LARGE64-O0-NEXT:    ld r4, L..C3 at l(r4)
-; LARGE64-O0-NEXT:    add r3, r3, r4
-; LARGE64-O0-NEXT:    stxsdx f1, 0, r3
-; LARGE64-O0-NEXT:    addi r1, r1, 64
-; LARGE64-O0-NEXT:    ld r0, 16(r1)
-; LARGE64-O0-NEXT:    mtlr r0
-; LARGE64-O0-NEXT:    blr
-;
-; SMALL32-O0-LABEL: storeDouble:
-; SMALL32-O0:       # %bb.0: # %entry
-; SMALL32-O0-NEXT:    mflr r0
-; SMALL32-O0-NEXT:    stwu r1, -32(r1)
-; SMALL32-O0-NEXT:    stw r0, 40(r1)
-; SMALL32-O0-NEXT:    lwz r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL32-O0-NEXT:    bla .__tls_get_mod[PR]
-; SMALL32-O0-NEXT:    lwz r4, L..C3(r2) # target-flags(ppc-tlsld) @TLDouble
-; SMALL32-O0-NEXT:    stfdx f1, r3, r4
-; SMALL32-O0-NEXT:    addi r1, r1, 32
-; SMALL32-O0-NEXT:    lwz r0, 8(r1)
-; SMALL32-O0-NEXT:    mtlr r0
-; SMALL32-O0-NEXT:    blr
-;
-; LARGE32-O0-LABEL: storeDouble:
-; LARGE32-O0:       # %bb.0: # %entry
-; LARGE32-O0-NEXT:    mflr r0
-; LARGE32-O0-NEXT:    stwu r1, -32(r1)
-; LARGE32-O0-NEXT:    stw r0, 40(r1)
-; LARGE32-O0-NEXT:    addis r3, L..C3 at u(r2)
-; LARGE32-O0-NEXT:    stw r3, 28(r1) # 4-byte Folded Spill
-; LARGE32-O0-NEXT:    addis r3, L..C1 at u(r2)
-; LARGE32-O0-NEXT:    lwz r3, L..C1 at l(r3)
-; LARGE32-O0-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-O0-NEXT:    lwz r4, 28(r1) # 4-byte Folded Reload
-; LARGE32-O0-NEXT:    lwz r4, L..C3 at l(r4)
-; LARGE32-O0-NEXT:    stfdx f1, r3, r4
-; LARGE32-O0-NEXT:    addi r1, r1, 32
-; LARGE32-O0-NEXT:    lwz r0, 8(r1)
-; LARGE32-O0-NEXT:    mtlr r0
-; LARGE32-O0-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: storeDouble:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @TLDouble
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stxsdx f1, 0, r3
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: storeDouble:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 80(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C3 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C1 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C1 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r4)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stxsdx f1, 0, r3
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 64
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @TLDouble)
-  store double %x, ptr %0, align 8
-  ret void
-}
-
-define void @storeFloat(float noundef %x) {
-; SMALL64-O0-LABEL: storeFloat:
-; SMALL64-O0:       # %bb.0: # %entry
-; SMALL64-O0-NEXT:    mflr r0
-; SMALL64-O0-NEXT:    stdu r1, -48(r1)
-; SMALL64-O0-NEXT:    std r0, 64(r1)
-; SMALL64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL64-O0-NEXT:    bla .__tls_get_mod[PR]
-; SMALL64-O0-NEXT:    ld r4, L..C4(r2) # target-flags(ppc-tlsld) @TLFloat
-; SMALL64-O0-NEXT:    add r3, r3, r4
-; SMALL64-O0-NEXT:    stfs f1, 0(r3)
-; SMALL64-O0-NEXT:    addi r1, r1, 48
-; SMALL64-O0-NEXT:    ld r0, 16(r1)
-; SMALL64-O0-NEXT:    mtlr r0
-; SMALL64-O0-NEXT:    blr
-;
-; LARGE64-O0-LABEL: storeFloat:
-; LARGE64-O0:       # %bb.0: # %entry
-; LARGE64-O0-NEXT:    mflr r0
-; LARGE64-O0-NEXT:    stdu r1, -64(r1)
-; LARGE64-O0-NEXT:    std r0, 80(r1)
-; LARGE64-O0-NEXT:    addis r3, L..C4 at u(r2)
-; LARGE64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
-; LARGE64-O0-NEXT:    addis r3, L..C1 at u(r2)
-; LARGE64-O0-NEXT:    ld r3, L..C1 at l(r3)
-; LARGE64-O0-NEXT:    bla .__tls_get_mod[PR]
-; LARGE64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
-; LARGE64-O0-NEXT:    ld r4, L..C4 at l(r4)
-; LARGE64-O0-NEXT:    add r3, r3, r4
-; LARGE64-O0-NEXT:    stfs f1, 0(r3)
-; LARGE64-O0-NEXT:    addi r1, r1, 64
-; LARGE64-O0-NEXT:    ld r0, 16(r1)
-; LARGE64-O0-NEXT:    mtlr r0
-; LARGE64-O0-NEXT:    blr
-;
-; SMALL32-O0-LABEL: storeFloat:
-; SMALL32-O0:       # %bb.0: # %entry
-; SMALL32-O0-NEXT:    mflr r0
-; SMALL32-O0-NEXT:    stwu r1, -32(r1)
-; SMALL32-O0-NEXT:    stw r0, 40(r1)
-; SMALL32-O0-NEXT:    lwz r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL32-O0-NEXT:    bla .__tls_get_mod[PR]
-; SMALL32-O0-NEXT:    lwz r4, L..C4(r2) # target-flags(ppc-tlsld) @TLFloat
-; SMALL32-O0-NEXT:    stfsx f1, r3, r4
-; SMALL32-O0-NEXT:    addi r1, r1, 32
-; SMALL32-O0-NEXT:    lwz r0, 8(r1)
-; SMALL32-O0-NEXT:    mtlr r0
-; SMALL32-O0-NEXT:    blr
-;
-; LARGE32-O0-LABEL: storeFloat:
-; LARGE32-O0:       # %bb.0: # %entry
-; LARGE32-O0-NEXT:    mflr r0
-; LARGE32-O0-NEXT:    stwu r1, -32(r1)
-; LARGE32-O0-NEXT:    stw r0, 40(r1)
-; LARGE32-O0-NEXT:    addis r3, L..C4 at u(r2)
-; LARGE32-O0-NEXT:    stw r3, 28(r1) # 4-byte Folded Spill
-; LARGE32-O0-NEXT:    addis r3, L..C1 at u(r2)
-; LARGE32-O0-NEXT:    lwz r3, L..C1 at l(r3)
-; LARGE32-O0-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-O0-NEXT:    lwz r4, 28(r1) # 4-byte Folded Reload
-; LARGE32-O0-NEXT:    lwz r4, L..C4 at l(r4)
-; LARGE32-O0-NEXT:    stfsx f1, r3, r4
-; LARGE32-O0-NEXT:    addi r1, r1, 32
-; LARGE32-O0-NEXT:    lwz r0, 8(r1)
-; LARGE32-O0-NEXT:    mtlr r0
-; LARGE32-O0-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: storeFloat:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C4(r2) # target-flags(ppc-tlsld) @TLFloat
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stfs f1, 0(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: storeFloat:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 80(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C4 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C1 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C1 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C4 at l(r4)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stfs f1, 0(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 64
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TLFloat)
-  store float %x, ptr %0, align 4
-  ret void
-}
-
-define i32 @loadInt() {
-; SMALL64-O0-LABEL: loadInt:
-; SMALL64-O0:       # %bb.0: # %entry
-; SMALL64-O0-NEXT:    mflr r0
-; SMALL64-O0-NEXT:    stdu r1, -48(r1)
-; SMALL64-O0-NEXT:    std r0, 64(r1)
-; SMALL64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL64-O0-NEXT:    bla .__tls_get_mod[PR]
-; SMALL64-O0-NEXT:    ld r4, L..C1(r2) # target-flags(ppc-tlsld) @TLInt
-; SMALL64-O0-NEXT:    add r3, r3, r4
-; SMALL64-O0-NEXT:    lwz r3, 0(r3)
-; SMALL64-O0-NEXT:    addi r1, r1, 48
-; SMALL64-O0-NEXT:    ld r0, 16(r1)
-; SMALL64-O0-NEXT:    mtlr r0
-; SMALL64-O0-NEXT:    blr
-;
-; LARGE64-O0-LABEL: loadInt:
-; LARGE64-O0:       # %bb.0: # %entry
-; LARGE64-O0-NEXT:    mflr r0
-; LARGE64-O0-NEXT:    stdu r1, -64(r1)
-; LARGE64-O0-NEXT:    std r0, 80(r1)
-; LARGE64-O0-NEXT:    addis r3, L..C0 at u(r2)
-; LARGE64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
-; LARGE64-O0-NEXT:    addis r3, L..C1 at u(r2)
-; LARGE64-O0-NEXT:    ld r3, L..C1 at l(r3)
-; LARGE64-O0-NEXT:    bla .__tls_get_mod[PR]
-; LARGE64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
-; LARGE64-O0-NEXT:    ld r4, L..C0 at l(r4)
-; LARGE64-O0-NEXT:    add r3, r3, r4
-; LARGE64-O0-NEXT:    lwz r3, 0(r3)
-; LARGE64-O0-NEXT:    addi r1, r1, 64
-; LARGE64-O0-NEXT:    ld r0, 16(r1)
-; LARGE64-O0-NEXT:    mtlr r0
-; LARGE64-O0-NEXT:    blr
-;
-; SMALL32-O0-LABEL: loadInt:
-; SMALL32-O0:       # %bb.0: # %entry
-; SMALL32-O0-NEXT:    mflr r0
-; SMALL32-O0-NEXT:    stwu r1, -32(r1)
-; SMALL32-O0-NEXT:    stw r0, 40(r1)
-; SMALL32-O0-NEXT:    lwz r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL32-O0-NEXT:    bla .__tls_get_mod[PR]
-; SMALL32-O0-NEXT:    lwz r4, L..C1(r2) # target-flags(ppc-tlsld) @TLInt
-; SMALL32-O0-NEXT:    lwzx r3, r3, r4
-; SMALL32-O0-NEXT:    addi r1, r1, 32
-; SMALL32-O0-NEXT:    lwz r0, 8(r1)
-; SMALL32-O0-NEXT:    mtlr r0
-; SMALL32-O0-NEXT:    blr
-;
-; LARGE32-O0-LABEL: loadInt:
-; LARGE32-O0:       # %bb.0: # %entry
-; LARGE32-O0-NEXT:    mflr r0
-; LARGE32-O0-NEXT:    stwu r1, -32(r1)
-; LARGE32-O0-NEXT:    stw r0, 40(r1)
-; LARGE32-O0-NEXT:    addis r3, L..C0 at u(r2)
-; LARGE32-O0-NEXT:    stw r3, 28(r1) # 4-byte Folded Spill
-; LARGE32-O0-NEXT:    addis r3, L..C1 at u(r2)
-; LARGE32-O0-NEXT:    lwz r3, L..C1 at l(r3)
-; LARGE32-O0-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-O0-NEXT:    lwz r4, 28(r1) # 4-byte Folded Reload
-; LARGE32-O0-NEXT:    lwz r4, L..C0 at l(r4)
-; LARGE32-O0-NEXT:    lwzx r3, r3, r4
-; LARGE32-O0-NEXT:    addi r1, r1, 32
-; LARGE32-O0-NEXT:    lwz r0, 8(r1)
-; LARGE32-O0-NEXT:    mtlr r0
-; LARGE32-O0-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadInt:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C1(r2) # target-flags(ppc-tlsld) @TLInt
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwz r3, 0(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadInt:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 80(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C1 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C1 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C0 at l(r4)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwz r3, 0(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 64
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TLInt)
-  %1 = load i32, ptr %0, align 4
-  ret i32 %1
-}
-
-define i32 @loadLongLong() {
-; SMALL64-O0-LABEL: loadLongLong:
-; SMALL64-O0:       # %bb.0: # %entry
-; SMALL64-O0-NEXT:    mflr r0
-; SMALL64-O0-NEXT:    stdu r1, -48(r1)
-; SMALL64-O0-NEXT:    std r0, 64(r1)
-; SMALL64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL64-O0-NEXT:    bla .__tls_get_mod[PR]
-; SMALL64-O0-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @TLLongLong
-; SMALL64-O0-NEXT:    add r3, r3, r4
-; SMALL64-O0-NEXT:    ld r3, 0(r3)
-; SMALL64-O0-NEXT:    # kill: def $r3 killed $r3 killed $x3
-; SMALL64-O0-NEXT:    clrldi r3, r3, 32
-; SMALL64-O0-NEXT:    addi r1, r1, 48
-; SMALL64-O0-NEXT:    ld r0, 16(r1)
-; SMALL64-O0-NEXT:    mtlr r0
-; SMALL64-O0-NEXT:    blr
-;
-; LARGE64-O0-LABEL: loadLongLong:
-; LARGE64-O0:       # %bb.0: # %entry
-; LARGE64-O0-NEXT:    mflr r0
-; LARGE64-O0-NEXT:    stdu r1, -64(r1)
-; LARGE64-O0-NEXT:    std r0, 80(r1)
-; LARGE64-O0-NEXT:    addis r3, L..C2 at u(r2)
-; LARGE64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
-; LARGE64-O0-NEXT:    addis r3, L..C1 at u(r2)
-; LARGE64-O0-NEXT:    ld r3, L..C1 at l(r3)
-; LARGE64-O0-NEXT:    bla .__tls_get_mod[PR]
-; LARGE64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
-; LARGE64-O0-NEXT:    ld r4, L..C2 at l(r4)
-; LARGE64-O0-NEXT:    add r3, r3, r4
-; LARGE64-O0-NEXT:    ld r3, 0(r3)
-; LARGE64-O0-NEXT:    # kill: def $r3 killed $r3 killed $x3
-; LARGE64-O0-NEXT:    clrldi r3, r3, 32
-; LARGE64-O0-NEXT:    addi r1, r1, 64
-; LARGE64-O0-NEXT:    ld r0, 16(r1)
-; LARGE64-O0-NEXT:    mtlr r0
-; LARGE64-O0-NEXT:    blr
-;
-; SMALL32-O0-LABEL: loadLongLong:
-; SMALL32-O0:       # %bb.0: # %entry
-; SMALL32-O0-NEXT:    mflr r0
-; SMALL32-O0-NEXT:    stwu r1, -32(r1)
-; SMALL32-O0-NEXT:    stw r0, 40(r1)
-; SMALL32-O0-NEXT:    lwz r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL32-O0-NEXT:    bla .__tls_get_mod[PR]
-; SMALL32-O0-NEXT:    lwz r4, L..C2(r2) # target-flags(ppc-tlsld) @TLLongLong
-; SMALL32-O0-NEXT:    add r3, r3, r4
-; SMALL32-O0-NEXT:    lwz r3, 4(r3)
-; SMALL32-O0-NEXT:    addi r1, r1, 32
-; SMALL32-O0-NEXT:    lwz r0, 8(r1)
-; SMALL32-O0-NEXT:    mtlr r0
-; SMALL32-O0-NEXT:    blr
-;
-; LARGE32-O0-LABEL: loadLongLong:
-; LARGE32-O0:       # %bb.0: # %entry
-; LARGE32-O0-NEXT:    mflr r0
-; LARGE32-O0-NEXT:    stwu r1, -32(r1)
-; LARGE32-O0-NEXT:    stw r0, 40(r1)
-; LARGE32-O0-NEXT:    addis r3, L..C2 at u(r2)
-; LARGE32-O0-NEXT:    stw r3, 28(r1) # 4-byte Folded Spill
-; LARGE32-O0-NEXT:    addis r3, L..C1 at u(r2)
-; LARGE32-O0-NEXT:    lwz r3, L..C1 at l(r3)
-; LARGE32-O0-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-O0-NEXT:    lwz r4, 28(r1) # 4-byte Folded Reload
-; LARGE32-O0-NEXT:    lwz r4, L..C2 at l(r4)
-; LARGE32-O0-NEXT:    add r3, r3, r4
-; LARGE32-O0-NEXT:    lwz r3, 4(r3)
-; LARGE32-O0-NEXT:    addi r1, r1, 32
-; LARGE32-O0-NEXT:    lwz r0, 8(r1)
-; LARGE32-O0-NEXT:    mtlr r0
-; LARGE32-O0-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadLongLong:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @TLLongLong
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, 0(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    # kill: def $r3 killed $r3 killed $x3
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    clrldi r3, r3, 32
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadLongLong:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 80(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C2 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C1 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C1 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r4)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, 0(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    # kill: def $r3 killed $r3 killed $x3
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    clrldi r3, r3, 32
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 64
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @TLLongLong)
-  %1 = load i64, ptr %0, align 8
-  %conv = trunc i64 %1 to i32
-  ret i32 %conv
-}
-
-define i32 @loadDouble() {
-; SMALL64-O0-LABEL: loadDouble:
-; SMALL64-O0:       # %bb.0: # %entry
-; SMALL64-O0-NEXT:    mflr r0
-; SMALL64-O0-NEXT:    stdu r1, -64(r1)
-; SMALL64-O0-NEXT:    std r0, 80(r1)
-; SMALL64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL64-O0-NEXT:    bla .__tls_get_mod[PR]
-; SMALL64-O0-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @TLDouble
-; SMALL64-O0-NEXT:    lfdx f0, r3, r4
-; SMALL64-O0-NEXT:    xscvdpsxws f0, f0
-; SMALL64-O0-NEXT:    addi r3, r1, 52
-; SMALL64-O0-NEXT:    stfiwx f0, 0, r3
-; SMALL64-O0-NEXT:    lwz r3, 52(r1)
-; SMALL64-O0-NEXT:    clrldi r3, r3, 32
-; SMALL64-O0-NEXT:    addi r1, r1, 64
-; SMALL64-O0-NEXT:    ld r0, 16(r1)
-; SMALL64-O0-NEXT:    mtlr r0
-; SMALL64-O0-NEXT:    blr
-;
-; LARGE64-O0-LABEL: loadDouble:
-; LARGE64-O0:       # %bb.0: # %entry
-; LARGE64-O0-NEXT:    mflr r0
-; LARGE64-O0-NEXT:    stdu r1, -80(r1)
-; LARGE64-O0-NEXT:    std r0, 96(r1)
-; LARGE64-O0-NEXT:    addis r3, L..C3 at u(r2)
-; LARGE64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
-; LARGE64-O0-NEXT:    addis r3, L..C1 at u(r2)
-; LARGE64-O0-NEXT:    ld r3, L..C1 at l(r3)
-; LARGE64-O0-NEXT:    bla .__tls_get_mod[PR]
-; LARGE64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
-; LARGE64-O0-NEXT:    ld r4, L..C3 at l(r4)
-; LARGE64-O0-NEXT:    lfdx f0, r3, r4
-; LARGE64-O0-NEXT:    xscvdpsxws f0, f0
-; LARGE64-O0-NEXT:    addi r3, r1, 68
-; LARGE64-O0-NEXT:    stfiwx f0, 0, r3
-; LARGE64-O0-NEXT:    lwz r3, 68(r1)
-; LARGE64-O0-NEXT:    clrldi r3, r3, 32
-; LARGE64-O0-NEXT:    addi r1, r1, 80
-; LARGE64-O0-NEXT:    ld r0, 16(r1)
-; LARGE64-O0-NEXT:    mtlr r0
-; LARGE64-O0-NEXT:    blr
-;
-; SMALL32-O0-LABEL: loadDouble:
-; SMALL32-O0:       # %bb.0: # %entry
-; SMALL32-O0-NEXT:    mflr r0
-; SMALL32-O0-NEXT:    stwu r1, -32(r1)
-; SMALL32-O0-NEXT:    stw r0, 40(r1)
-; SMALL32-O0-NEXT:    lwz r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL32-O0-NEXT:    bla .__tls_get_mod[PR]
-; SMALL32-O0-NEXT:    lwz r4, L..C3(r2) # target-flags(ppc-tlsld) @TLDouble
-; SMALL32-O0-NEXT:    lfdx f0, r3, r4
-; SMALL32-O0-NEXT:    xscvdpsxws f0, f0
-; SMALL32-O0-NEXT:    addi r3, r1, 28
-; SMALL32-O0-NEXT:    stfiwx f0, 0, r3
-; SMALL32-O0-NEXT:    lwz r3, 28(r1)
-; SMALL32-O0-NEXT:    addi r1, r1, 32
-; SMALL32-O0-NEXT:    lwz r0, 8(r1)
-; SMALL32-O0-NEXT:    mtlr r0
-; SMALL32-O0-NEXT:    blr
-;
-; LARGE32-O0-LABEL: loadDouble:
-; LARGE32-O0:       # %bb.0: # %entry
-; LARGE32-O0-NEXT:    mflr r0
-; LARGE32-O0-NEXT:    stwu r1, -32(r1)
-; LARGE32-O0-NEXT:    stw r0, 40(r1)
-; LARGE32-O0-NEXT:    addis r3, L..C3 at u(r2)
-; LARGE32-O0-NEXT:    stw r3, 24(r1) # 4-byte Folded Spill
-; LARGE32-O0-NEXT:    addis r3, L..C1 at u(r2)
-; LARGE32-O0-NEXT:    lwz r3, L..C1 at l(r3)
-; LARGE32-O0-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-O0-NEXT:    lwz r4, 24(r1) # 4-byte Folded Reload
-; LARGE32-O0-NEXT:    lwz r4, L..C3 at l(r4)
-; LARGE32-O0-NEXT:    lfdx f0, r3, r4
-; LARGE32-O0-NEXT:    xscvdpsxws f0, f0
-; LARGE32-O0-NEXT:    addi r3, r1, 28
-; LARGE32-O0-NEXT:    stfiwx f0, 0, r3
-; LARGE32-O0-NEXT:    lwz r3, 28(r1)
-; LARGE32-O0-NEXT:    addi r1, r1, 32
-; LARGE32-O0-NEXT:    lwz r0, 8(r1)
-; LARGE32-O0-NEXT:    mtlr r0
-; LARGE32-O0-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadDouble:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 80(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @TLDouble
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfdx f0, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    xscvdpsxws f0, f0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r3, r1, 52
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stfiwx f0, 0, r3
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwz r3, 52(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    clrldi r3, r3, 32
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 64
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadDouble:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -80(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 96(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C3 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C1 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C1 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r4)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfdx f0, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    xscvdpsxws f0, f0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r3, r1, 68
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stfiwx f0, 0, r3
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwz r3, 68(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    clrldi r3, r3, 32
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 80
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @TLDouble)
-  %1 = load double, ptr %0, align 8
-  %conv = fptosi double %1 to i32
-  ret i32 %conv
-}
-
-define i32 @loadFloat() {
-; SMALL64-O0-LABEL: loadFloat:
-; SMALL64-O0:       # %bb.0: # %entry
-; SMALL64-O0-NEXT:    mflr r0
-; SMALL64-O0-NEXT:    stdu r1, -64(r1)
-; SMALL64-O0-NEXT:    std r0, 80(r1)
-; SMALL64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL64-O0-NEXT:    bla .__tls_get_mod[PR]
-; SMALL64-O0-NEXT:    ld r4, L..C4(r2) # target-flags(ppc-tlsld) @TLFloat
-; SMALL64-O0-NEXT:    add r3, r3, r4
-; SMALL64-O0-NEXT:    lfs f0, 0(r3)
-; SMALL64-O0-NEXT:    fctiwz f0, f0
-; SMALL64-O0-NEXT:    stfd f0, 56(r1)
-; SMALL64-O0-NEXT:    lwa r3, 60(r1)
-; SMALL64-O0-NEXT:    clrldi r3, r3, 32
-; SMALL64-O0-NEXT:    addi r1, r1, 64
-; SMALL64-O0-NEXT:    ld r0, 16(r1)
-; SMALL64-O0-NEXT:    mtlr r0
-; SMALL64-O0-NEXT:    blr
-;
-; LARGE64-O0-LABEL: loadFloat:
-; LARGE64-O0:       # %bb.0: # %entry
-; LARGE64-O0-NEXT:    mflr r0
-; LARGE64-O0-NEXT:    stdu r1, -64(r1)
-; LARGE64-O0-NEXT:    std r0, 80(r1)
-; LARGE64-O0-NEXT:    addis r3, L..C4 at u(r2)
-; LARGE64-O0-NEXT:    std r3, 48(r1) # 8-byte Folded Spill
-; LARGE64-O0-NEXT:    addis r3, L..C1 at u(r2)
-; LARGE64-O0-NEXT:    ld r3, L..C1 at l(r3)
-; LARGE64-O0-NEXT:    bla .__tls_get_mod[PR]
-; LARGE64-O0-NEXT:    ld r4, 48(r1) # 8-byte Folded Reload
-; LARGE64-O0-NEXT:    ld r4, L..C4 at l(r4)
-; LARGE64-O0-NEXT:    add r3, r3, r4
-; LARGE64-O0-NEXT:    lfs f0, 0(r3)
-; LARGE64-O0-NEXT:    fctiwz f0, f0
-; LARGE64-O0-NEXT:    stfd f0, 56(r1)
-; LARGE64-O0-NEXT:    lwa r3, 60(r1)
-; LARGE64-O0-NEXT:    clrldi r3, r3, 32
-; LARGE64-O0-NEXT:    addi r1, r1, 64
-; LARGE64-O0-NEXT:    ld r0, 16(r1)
-; LARGE64-O0-NEXT:    mtlr r0
-; LARGE64-O0-NEXT:    blr
-;
-; SMALL32-O0-LABEL: loadFloat:
-; SMALL32-O0:       # %bb.0: # %entry
-; SMALL32-O0-NEXT:    mflr r0
-; SMALL32-O0-NEXT:    stwu r1, -32(r1)
-; SMALL32-O0-NEXT:    stw r0, 40(r1)
-; SMALL32-O0-NEXT:    lwz r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL32-O0-NEXT:    bla .__tls_get_mod[PR]
-; SMALL32-O0-NEXT:    lwz r4, L..C4(r2) # target-flags(ppc-tlsld) @TLFloat
-; SMALL32-O0-NEXT:    lfsx f0, r3, r4
-; SMALL32-O0-NEXT:    xscvdpsxws f0, f0
-; SMALL32-O0-NEXT:    addi r3, r1, 28
-; SMALL32-O0-NEXT:    stfiwx f0, 0, r3
-; SMALL32-O0-NEXT:    lwz r3, 28(r1)
-; SMALL32-O0-NEXT:    addi r1, r1, 32
-; SMALL32-O0-NEXT:    lwz r0, 8(r1)
-; SMALL32-O0-NEXT:    mtlr r0
-; SMALL32-O0-NEXT:    blr
-;
-; LARGE32-O0-LABEL: loadFloat:
-; LARGE32-O0:       # %bb.0: # %entry
-; LARGE32-O0-NEXT:    mflr r0
-; LARGE32-O0-NEXT:    stwu r1, -32(r1)
-; LARGE32-O0-NEXT:    stw r0, 40(r1)
-; LARGE32-O0-NEXT:    addis r3, L..C4 at u(r2)
-; LARGE32-O0-NEXT:    stw r3, 24(r1) # 4-byte Folded Spill
-; LARGE32-O0-NEXT:    addis r3, L..C1 at u(r2)
-; LARGE32-O0-NEXT:    lwz r3, L..C1 at l(r3)
-; LARGE32-O0-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-O0-NEXT:    lwz r4, 24(r1) # 4-byte Folded Reload
-; LARGE32-O0-NEXT:    lwz r4, L..C4 at l(r4)
-; LARGE32-O0-NEXT:    lfsx f0, r3, r4
-; LARGE32-O0-NEXT:    xscvdpsxws f0, f0
-; LARGE32-O0-NEXT:    addi r3, r1, 28
-; LARGE32-O0-NEXT:    stfiwx f0, 0, r3
-; LARGE32-O0-NEXT:    lwz r3, 28(r1)
-; LARGE32-O0-NEXT:    addi r1, r1, 32
-; LARGE32-O0-NEXT:    lwz r0, 8(r1)
-; LARGE32-O0-NEXT:    mtlr r0
-; LARGE32-O0-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: loadFloat:
-; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 80(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C4(r2) # target-flags(ppc-tlsld) @TLFloat
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfs f0, 0(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    fctiwz f0, f0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stfd f0, 56(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwa r3, 60(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    clrldi r3, r3, 32
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 64
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
-;
-; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: loadFloat:
-; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 80(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C4 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r3, 48(r1) # 8-byte Folded Spill
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C1 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C1 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, 48(r1) # 8-byte Folded Reload
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C4 at l(r4)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfs f0, 0(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    fctiwz f0, f0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stfd f0, 56(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwa r3, 60(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    clrldi r3, r3, 32
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 64
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
-entry:
-  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TLFloat)
-  %1 = load float, ptr %0, align 4
-  %conv = fptosi float %1 to i32
-  ret i32 %conv
-}
-
-; TOC Entry Checks.
-
-; SMALL64-O0-LABEL: .toc
-; SMALL64-O0-LABEL:L..C0:
-; SMALL64-O0-NEXT:	.tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
-; SMALL64-O0-NEXT:	.rename _Renamed..5f24__TLSML[TC],"_$TLSML"
-; SMALL64-O0-LABEL:L..C1:
-; SMALL64-O0-NEXT:	.tc TLInt[TC],TLInt[UL]@ld
-; SMALL64-O0-LABEL:L..C2:
-; SMALL64-O0-NEXT:	.tc TLLongLong[TC],TLLongLong[UL]@ld
-; SMALL64-O0-LABEL:L..C3:
-; SMALL64-O0-NEXT:	.tc TLDouble[TC],TLDouble[UL]@ld
-; SMALL64-O0-LABEL:L..C4:
-; SMALL64-O0-NEXT:	.tc TLFloat[TC],TLFloat[UL]@ld
-
-; LARGE64-O0-LABEL: .toc
-; LARGE64-O0-LABEL:L..C0:
-; LARGE64-O0-NEXT:	.tc TLInt[TE],TLInt[UL]@ld
-; LARGE64-O0-LABEL:L..C1:
-; LARGE64-O0-NEXT:	.tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
-; LARGE64-O0-NEXT:	.rename _Renamed..5f24__TLSML[TC],"_$TLSML"
-; LARGE64-O0-LABEL:L..C2:
-; LARGE64-O0-NEXT:	.tc TLLongLong[TE],TLLongLong[UL]@ld
-; LARGE64-O0-LABEL:L..C3:
-; LARGE64-O0-NEXT:	.tc TLDouble[TE],TLDouble[UL]@ld
-; LARGE64-O0-LABEL:L..C4:
-; LARGE64-O0-NEXT:	.tc TLFloat[TE],TLFloat[UL]@ld
-
-; SMALL32-O0-LABEL: .toc
-; SMALL32-O0-LABEL:L..C0:
-; SMALL32-O0-NEXT:	.tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
-; SMALL32-O0-NEXT:	.rename _Renamed..5f24__TLSML[TC],"_$TLSML"
-; SMALL32-O0-LABEL:L..C1:
-; SMALL32-O0-NEXT:	.tc TLInt[TC],TLInt[UL]@ld
-; SMALL32-O0-LABEL:L..C2:
-; SMALL32-O0-NEXT:	.tc TLLongLong[TC],TLLongLong[UL]@ld
-; SMALL32-O0-LABEL:L..C3:
-; SMALL32-O0-NEXT:	.tc TLDouble[TC],TLDouble[UL]@ld
-; SMALL32-O0-LABEL:L..C4:
-; SMALL32-O0-NEXT:	.tc TLFloat[TC],TLFloat[UL]@ld
-
-; LARGE32-O0-LABEL: .toc
-; LARGE32-O0-LABEL:L..C0:
-; LARGE32-O0-NEXT:	.tc TLInt[TE],TLInt[UL]@ld
-; LARGE32-O0-LABEL:L..C1:
-; LARGE32-O0-NEXT:	.tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
-; LARGE32-O0-NEXT:	.rename _Renamed..5f24__TLSML[TC],"_$TLSML"
-; LARGE32-O0-LABEL:L..C2:
-; LARGE32-O0-NEXT:	.tc TLLongLong[TE],TLLongLong[UL]@ld
-; LARGE32-O0-LABEL:L..C3:
-; LARGE32-O0-NEXT:	.tc TLDouble[TE],TLDouble[UL]@ld
-; LARGE32-O0-LABEL:L..C4:
-; LARGE32-O0-NEXT:	.tc TLFloat[TE],TLFloat[UL]@ld

>From 326c063c156b6d703187798a347c41f6d0df7a11 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Thu, 21 Mar 2024 23:52:44 -0400
Subject: [PATCH 4/4] update checks

---
 ...aix-small-local-dynamic-tls-largeaccess.ll |  630 ++++++++--
 .../aix-small-local-dynamic-tls-types.ll      | 1040 ++++++++++++++++-
 2 files changed, 1491 insertions(+), 179 deletions(-)

diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-largeaccess.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-largeaccess.ll
index 4c866dbe74819a..eb16bae67150e3 100644
--- a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-largeaccess.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-largeaccess.ll
@@ -30,133 +30,362 @@
 
 declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
 
-; All accesses use a "faster" local-dynamic sequence directly off the thread pointer.
+; All accesses use a "faster" local-dynamic sequence directly off the module handle.
+; Exercise PPCXCOFFObjectWriter::getRelocTypeAndSignSize/fixup_ppc_half16.
 define signext i32 @test1() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: test1:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r6, L..C1(r2) # target-flags(ppc-tlsld) @ElementIntTLS2
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r7, L..C2(r2) # target-flags(ppc-tlsld) @ElementIntTLS3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r8, L..C3(r2) # target-flags(ppc-tlsld) @ElementIntTLS4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r9, L..C4(r2) # target-flags(ppc-tlsld) @ElementIntTLS5
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r5, L..C5(r2) # target-flags(ppc-tlsld) @ElementIntTLSv1
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 1
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r6, r3, r6
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r7, r3, r7
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r8, r3, r8
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r9, r3, r9
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stwux r4, r3, r5
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r4, 24(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 2
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r3, 320(r6)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r3, 324(r7)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 88
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r4, 328(r8)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r3, 332(r9)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 102
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: test1:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C1 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r7, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r8, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r9, L..C4 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r7, L..C2 at l(r7)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r8, L..C3 at l(r8)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r9, L..C4 at l(r9)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r5, L..C1 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C5 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 1
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r6, L..C5 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r7, r3, r7
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r8, r3, r8
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r9, r3, r9
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r6, r3, r6
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stwux r4, r3, r5
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r4, 24(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 2
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r3, 320(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r3, 324(r7)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 88
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r4, 328(r8)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r3, 332(r9)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 102
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
 entry:
-  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ElementIntTLSv1)
-  store i32 1, ptr %0, align 4
-  %arrayidx1 = getelementptr inbounds [8187 x i32], ptr %0, i64 0, i64 6
+  %tls1 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ElementIntTLSv1)
+  store i32 1, ptr %tls1, align 4
+  %arrayidx1 = getelementptr inbounds [8187 x i32], ptr %tls1, i64 0, i64 6
   store i32 4, ptr %arrayidx1, align 4
-  %1 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ElementIntTLS2)
-  %arrayidx2 = getelementptr inbounds [4000 x i32], ptr %1, i64 0, i64 80
+  %tls2 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ElementIntTLS2)
+  %arrayidx2 = getelementptr inbounds [4000 x i32], ptr %tls2, i64 0, i64 80
   store i32 2, ptr %arrayidx2, align 4
-  %2 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ElementIntTLS3)
-  %arrayidx3 = getelementptr inbounds [4000 x i32], ptr %2, i64 0, i64 81
+  %tls3 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ElementIntTLS3)
+  %arrayidx3 = getelementptr inbounds [4000 x i32], ptr %tls3, i64 0, i64 81
   store i32 3, ptr %arrayidx3, align 4
-  %3 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ElementIntTLS4)
-  %arrayidx4 = getelementptr inbounds [4000 x i32], ptr %3, i64 0, i64 82
+  %tls4 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ElementIntTLS4)
+  %arrayidx4 = getelementptr inbounds [4000 x i32], ptr %tls4, i64 0, i64 82
   store i32 4, ptr %arrayidx4, align 4
-  %4 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ElementIntTLS5)
-  %arrayidx5 = getelementptr inbounds [4000 x i32], ptr %4, i64 0, i64 83
+  %tls5 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ElementIntTLS5)
+  %arrayidx5 = getelementptr inbounds [4000 x i32], ptr %tls5, i64 0, i64 83
   store i32 88, ptr %arrayidx5, align 4
-  %5 = load i32, ptr %0, align 4
-  %6 = load i32, ptr %arrayidx1, align 4
-  %7 = load i32, ptr %arrayidx2, align 4
-  %8 = load i32, ptr %arrayidx3, align 4
-  %9 = load i32, ptr %arrayidx4, align 4
-  %add = add i32 %5, 88
-  %add9 = add i32 %add, %6
-  %add11 = add i32 %add9, %7
-  %add13 = add i32 %add11, %8
-  %add15 = add i32 %add13, %9
-  ret i32 %add15
-}
-
-; Example of one access using the regular local-dynamic access from the TOC.
-define signext i32 @test2() {
-entry:
-  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ElementIntTLSv2)
-  store i32 1, ptr %0, align 4
-  %arrayidx1 = getelementptr inbounds [9000 x i32], ptr %0, i64 0, i64 6
-  store i32 4, ptr %arrayidx1, align 4
-  %1 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ElementIntTLS2)
-  %arrayidx2 = getelementptr inbounds [4000 x i32], ptr %1, i64 0, i64 80
-  store i32 2, ptr %arrayidx2, align 4
-  %2 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ElementIntTLS3)
-  %arrayidx3 = getelementptr inbounds [4000 x i32], ptr %2, i64 0, i64 81
-  store i32 3, ptr %arrayidx3, align 4
-  %3 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ElementIntTLS4)
-  %arrayidx4 = getelementptr inbounds [4000 x i32], ptr %3, i64 0, i64 82
-  store i32 4, ptr %arrayidx4, align 4
-  %4 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ElementIntTLS5)
-  %arrayidx5 = getelementptr inbounds [4000 x i32], ptr %4, i64 0, i64 83
-  store i32 88, ptr %arrayidx5, align 4
-  %5 = load i32, ptr %0, align 4
-  %6 = load i32, ptr %arrayidx1, align 4
-  %7 = load i32, ptr %arrayidx2, align 4
-  %8 = load i32, ptr %arrayidx3, align 4
-  %9 = load i32, ptr %arrayidx4, align 4
-  %add = add i32 %5, 88
-  %add9 = add i32 %add, %6
-  %add11 = add i32 %add9, %7
-  %add13 = add i32 %add11, %8
-  %add15 = add i32 %add13, %9
-  ret i32 %add15
+  %load1 = load i32, ptr %tls1, align 4
+  %load2 = load i32, ptr %arrayidx1, align 4
+  %load3 = load i32, ptr %arrayidx2, align 4
+  %load4 = load i32, ptr %arrayidx3, align 4
+  %load5 = load i32, ptr %arrayidx4, align 4
+  %add = add i32 %load1, 88
+  %add6 = add i32 %add, %load2
+  %add8 = add i32 %add6, %load3
+  %add10 = add i32 %add8, %load4
+  %add12 = add i32 %add10, %load5
+  ret i32 %add12
 }
 
-; All accesses use a "faster" local-dynamic sequence directly off the thread pointer.
-define i64 @test3() {
+; All accesses use a "faster" local-dynamic sequence directly off the module handle.
+; Exercise PPCXCOFFObjectWriter::getRelocTypeAndSignSize/fixup_ppc_half16ds.
+define i64 @test2() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: test2:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C6(r2) # target-flags(ppc-tlsld) @ElementLongTLS6
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mr r6, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 212
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r4, r6, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r3, 424(r4)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C7(r2) # target-flags(ppc-tlsld) @ElementLongTLS2
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 203
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r6, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r4, 1200(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C8(r2) # target-flags(ppc-tlsgdm) @MyTLSGDVar
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C9(r2) # target-flags(ppc-tlsgd) @MyTLSGDVar
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_addr[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 44
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r4, 440(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C10(r2) # target-flags(ppc-tlsld) @ElementLongTLS3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 6
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r6, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r4, 2000(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C11(r2) # target-flags(ppc-tlsld) @ElementLongTLS4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 100
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r4, r6, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r3, 6800(r4)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C12(r2) # target-flags(ppc-tlsld) @ElementLongTLS5
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 882
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r6, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r4, 8400(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 1191
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: test2:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r7, L..C6 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 212
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mr r6, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C6 at l(r7)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r6, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r4, 424(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C7 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 203
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C7 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r6, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r4, 1200(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C8 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r4, L..C9 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C8 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C9 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_addr[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 44
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r4, 440(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C10 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 6
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C10 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r6, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r4, 2000(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C11 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 100
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C11 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r6, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r4, 6800(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C12 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 882
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C12 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r6, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r4, 8400(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 1191
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
 entry:
-  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ElementLongTLS6)
-  %arrayidx = getelementptr inbounds [60 x i64], ptr %0, i64 0, i64 53
+  %tls1 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ElementLongTLS6)
+  %arrayidx = getelementptr inbounds [60 x i64], ptr %tls1, i64 0, i64 53
   store i64 212, ptr %arrayidx, align 8
-  %1 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ElementLongTLS2)
-  %arrayidx1 = getelementptr inbounds [3000 x i64], ptr %1, i64 0, i64 150
+  %tls2 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ElementLongTLS2)
+  %arrayidx1 = getelementptr inbounds [3000 x i64], ptr %tls2, i64 0, i64 150
   store i64 203, ptr %arrayidx1, align 8
-  %2 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @MyTLSGDVar)
-  %arrayidx2 = getelementptr inbounds [800 x i64], ptr %2, i64 0, i64 55
+  %tls3 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @MyTLSGDVar)
+  %arrayidx2 = getelementptr inbounds [800 x i64], ptr %tls3, i64 0, i64 55
   store i64 44, ptr %arrayidx2, align 8
-  %3 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ElementLongTLS3)
-  %arrayidx3 = getelementptr inbounds [3000 x i64], ptr %3, i64 0, i64 250
+  %tls4 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ElementLongTLS3)
+  %arrayidx3 = getelementptr inbounds [3000 x i64], ptr %tls4, i64 0, i64 250
   store i64 6, ptr %arrayidx3, align 8
-  %4 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ElementLongTLS4)
-  %arrayidx4 = getelementptr inbounds [3000 x i64], ptr %4, i64 0, i64 850
+  %tls5 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ElementLongTLS4)
+  %arrayidx4 = getelementptr inbounds [3000 x i64], ptr %tls5, i64 0, i64 850
   store i64 100, ptr %arrayidx4, align 8
-  %5 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ElementLongTLS5)
-  %arrayidx5 = getelementptr inbounds [3000 x i64], ptr %5, i64 0, i64 1050
+  %tls6 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ElementLongTLS5)
+  %arrayidx5 = getelementptr inbounds [3000 x i64], ptr %tls6, i64 0, i64 1050
   store i64 882, ptr %arrayidx5, align 8
-  %6 = load i64, ptr %arrayidx1, align 8
-  %7 = load i64, ptr %arrayidx3, align 8
-  %8 = load i64, ptr %arrayidx4, align 8
-  %add = add i64 %6, 882
-  %add9 = add i64 %add, %7
-  %add11 = add i64 %add9, %8
+  %load1 = load i64, ptr %arrayidx1, align 8
+  %load2 = load i64, ptr %arrayidx3, align 8
+  %load3 = load i64, ptr %arrayidx4, align 8
+  %add = add i64 %load1, 882
+  %add9 = add i64 %add, %load2
+  %add11 = add i64 %add9, %load3
   ret i64 %add11
 }
 
+; Example of one access using the regular local-dynamic access from the TOC.
+define signext i32 @test3() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: test3:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r6, L..C1(r2) # target-flags(ppc-tlsld) @ElementIntTLS2
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r7, L..C2(r2) # target-flags(ppc-tlsld) @ElementIntTLS3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r8, L..C3(r2) # target-flags(ppc-tlsld) @ElementIntTLS4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r9, L..C4(r2) # target-flags(ppc-tlsld) @ElementIntTLS5
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r5, L..C13(r2) # target-flags(ppc-tlsld) @ElementIntTLSv2
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 1
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r6, r3, r6
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r7, r3, r7
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r8, r3, r8
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r9, r3, r9
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stwux r4, r3, r5
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r4, 24(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 2
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r3, 320(r6)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r3, 324(r7)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 88
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r4, 328(r8)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r3, 332(r9)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 102
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: test3:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C13 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r7, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r8, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r9, L..C4 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r7, L..C2 at l(r7)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r8, L..C3 at l(r8)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r9, L..C4 at l(r9)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r5, L..C13 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C5 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 1
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r6, L..C5 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r7, r3, r7
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r8, r3, r8
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r9, r3, r9
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r6, r3, r6
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stwux r4, r3, r5
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r4, 24(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 2
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r3, 320(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r3, 324(r7)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 88
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r4, 328(r8)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r3, 332(r9)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 102
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+entry:
+  %tls1 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ElementIntTLSv2)
+  store i32 1, ptr %tls1, align 4
+  %arrayidx1 = getelementptr inbounds [9000 x i32], ptr %tls1, i64 0, i64 6
+  store i32 4, ptr %arrayidx1, align 4
+  %tls2 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ElementIntTLS2)
+  %arrayidx2 = getelementptr inbounds [4000 x i32], ptr %tls2, i64 0, i64 80
+  store i32 2, ptr %arrayidx2, align 4
+  %tls3 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ElementIntTLS3)
+  %arrayidx3 = getelementptr inbounds [4000 x i32], ptr %tls3, i64 0, i64 81
+  store i32 3, ptr %arrayidx3, align 4
+  %tls4 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ElementIntTLS4)
+  %arrayidx4 = getelementptr inbounds [4000 x i32], ptr %tls4, i64 0, i64 82
+  store i32 4, ptr %arrayidx4, align 4
+  %tls5 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ElementIntTLS5)
+  %arrayidx5 = getelementptr inbounds [4000 x i32], ptr %tls5, i64 0, i64 83
+  store i32 88, ptr %arrayidx5, align 4
+  %load1 = load i32, ptr %tls1, align 4
+  %load2 = load i32, ptr %arrayidx1, align 4
+  %load3 = load i32, ptr %arrayidx2, align 4
+  %load4 = load i32, ptr %arrayidx3, align 4
+  %load5 = load i32, ptr %arrayidx4, align 4
+  %add = add i32 %load1, 88
+  %add9 = add i32 %add, %load2
+  %add11 = add i32 %add9, %load3
+  %add13 = add i32 %add11, %load4
+  %add15 = add i32 %add13, %load5
+  ret i32 %add15
+}
+
 ; DIS:      file format aix5coff64-rs6000
 ; DIS:      Disassembly of section .text:
-; DIS:      0000000000000000 (idx: [[#NFA+5]]) .StoreArrays1:
+; DIS:      0000000000000000 (idx: [[#NFA+9]]) .test1:
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                mflr 0
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                stdu 1, -48(1)
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+15]]) _$TLSML[TC]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU	(idx: [[#NFA+23]]) _$TLSML[TC]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 6, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+17]]) ElementIntTLSv1[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU	(idx: [[#NFA+25]]) ElementIntTLSv1[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                std 0, 64(1)
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 7, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+19]]) ElementIntTLS3[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU	(idx: [[#NFA+27]]) ElementIntTLS3[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 3, 0(3)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+15]]) _$TLSML[TC]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	(idx: [[#NFA+23]]) _$TLSML[TC]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 8, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+21]]) ElementIntTLS4[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU	(idx: [[#NFA+29]]) ElementIntTLS4[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 9, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+23]]) ElementIntTLS5[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU	(idx: [[#NFA+31]]) ElementIntTLS5[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 7, 16(7)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+19]]) ElementIntTLS3[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	(idx: [[#NFA+27]]) ElementIntTLS3[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 8, 24(8)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+21]]) ElementIntTLS4[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	(idx: [[#NFA+29]]) ElementIntTLS4[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 9, 32(9)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+23]]) ElementIntTLS5[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	(idx: [[#NFA+31]]) ElementIntTLS5[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                bla 0x0
-; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA              (idx: [[#NFA+1]]) .__tls_get_mod[PR]
+; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA	(idx: [[#NFA+1]]) .__tls_get_mod[PR]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 5, 8(6)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+17]]) ElementIntTLSv1[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	(idx: [[#NFA+25]]) ElementIntTLSv1[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 6, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+25]]) ElementIntTLS2[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU	(idx: [[#NFA+33]]) ElementIntTLS2[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                li 4, 1
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 6, 40(6)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+25]]) ElementIntTLS2[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	(idx: [[#NFA+33]]) ElementIntTLS2[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                add 7, 3, 7
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                add 8, 3, 8
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                add 9, 3, 9
@@ -177,37 +406,101 @@ entry:
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                mtlr 0
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                blr
 
-; DIS:      0000000000000090 (idx: [[#NFA+7]]) .StoreArrays2:
+; DIS:      0000000000000090 (idx: [[#NFA+11]]) .test2:
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                mflr 0
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                stdu 1, -48(1)
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+15]]) _$TLSML[TC]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU	(idx: [[#NFA+23]]) _$TLSML[TC]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                std 0, 64(1)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 7, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU	(idx: [[#NFA+35]]) ElementLongTLS6[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 3, 0(3)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	(idx: [[#NFA+23]]) _$TLSML[TC]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                bla 0x0
+; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA	(idx: [[#NFA+1]]) .__tls_get_mod[PR]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                li 4, 212
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                mr	6, 3
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 3, 48(7)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	(idx: [[#NFA+35]]) ElementLongTLS6[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                add 3, 6, 3
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                std 4, 424(3)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU	(idx: [[#NFA+37]]) ElementLongTLS2[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                li 4, 203
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 3, 56(3)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	(idx: [[#NFA+37]]) ElementLongTLS2[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                add 3, 6, 3
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                std 4, 1200(3)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU	(idx: [[#NFA+39]]) .MyTLSGDVar[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 4, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU	(idx: [[#NFA+41]]) MyTLSGDVar[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 3, 64(3)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	(idx: [[#NFA+39]]) .MyTLSGDVar[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 4, 72(4)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	(idx: [[#NFA+41]]) MyTLSGDVar[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                bla 0x0
+; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA	(idx: [[#NFA+3]]) .__tls_get_addr[PR]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                li 4, 44
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                std 4, 440(3)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU	(idx: [[#NFA+43]]) ElementLongTLS3[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                li 4, 6
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 3, 80(3)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	(idx: [[#NFA+43]]) ElementLongTLS3[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                add 3, 6, 3
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                std 4, 2000(3)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU	(idx: [[#NFA+45]]) ElementLongTLS4[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                li 4, 100
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 3, 88(3)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	(idx: [[#NFA+45]]) ElementLongTLS4[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                add 3, 6, 3
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                std 4, 6800(3)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU	(idx: [[#NFA+47]]) ElementLongTLS5[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                li 4, 882
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 3, 96(3)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	(idx: [[#NFA+47]]) ElementLongTLS5[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                add 3, 6, 3
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                std 4, 8400(3)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                li 3, 1191
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addi 1, 1, 48
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 0, 16(1)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                mtlr 0
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                blr
+
+; DIS:      0000000000000140 (idx: [[#NFA+13]]) .test3:
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                mflr 0
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                stdu 1, -48(1)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU	(idx: [[#NFA+23]]) _$TLSML[TC]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 6, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+27]]) ElementIntTLSv2[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU	(idx: [[#NFA+49]]) ElementIntTLSv2[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                std 0, 64(1)
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 7, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+19]]) ElementIntTLS3[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU	(idx: [[#NFA+27]]) ElementIntTLS3[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 3, 0(3)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+15]]) _$TLSML[TC]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	(idx: [[#NFA+23]]) _$TLSML[TC]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 8, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+21]]) ElementIntTLS4[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU	(idx: [[#NFA+29]]) ElementIntTLS4[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 9, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU         (idx: [[#NFA+23]]) ElementIntTLS5[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU	(idx: [[#NFA+31]]) ElementIntTLS5[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 7, 16(7)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+19]]) ElementIntTLS3[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	(idx: [[#NFA+27]]) ElementIntTLS3[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 8, 24(8)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+21]]) ElementIntTLS4[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	(idx: [[#NFA+29]]) ElementIntTLS4[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 9, 32(9)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+23]]) ElementIntTLS5[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	(idx: [[#NFA+31]]) ElementIntTLS5[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                bla 0x0
-; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA              (idx: [[#NFA+1]]) .__tls_get_mod[PR]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 5, 48(6)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+27]]) ElementIntTLSv2[TE]
+; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA	(idx: [[#NFA+1]]) .__tls_get_mod[PR]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 5, 104(6)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	(idx: [[#NFA+49]]) ElementIntTLSv2[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 6, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU	        (idx: [[#NFA+25]]) ElementIntTLS2[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU	(idx: [[#NFA+33]]) ElementIntTLS2[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                li 4, 1
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 6, 40(6)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	        (idx: [[#NFA+25]]) ElementIntTLS2[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	(idx: [[#NFA+33]]) ElementIntTLS2[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                add 7, 3, 7
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                add 8, 3, 8
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                add 9, 3, 9
@@ -228,33 +521,112 @@ entry:
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                mtlr 0
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                blr
 
-
 ; DIS:      Disassembly of section .data:
-; DIS:      0000000000000120 (idx: [[#NFA+9]]) StoreArrays1[DS]:
-; DIS-NEXT:      120: 00 00 00 00
-; DIS-NEXT: 0000000000000120:  R_POS	(idx: [[#NFA+5]]) .StoreArrays1
-; DIS-NEXT:      124: 00 00 00 00
-; DIS-NEXT:      128: 00 00 00 00
-; DIS-NEXT: 0000000000000128:  R_POS        (idx: [[#NFA+13]]) TOC[TC0]
-; DIS-NEXT:      12c: 00 00 01 50
 
-; DIS:      0000000000000138 (idx: [[#NFA+11]]) StoreArrays2[DS]:
-; DIS-NEXT:      138: 00 00 00 00
-; DIS-NEXT: 0000000000000138:  R_POS	(idx: [[#NFA+7]]) .StoreArrays2
-; DIS-NEXT:      13c: 00 00 00 90
-; DIS-NEXT:      140: 00 00 00 00
-; DIS-NEXT: 0000000000000140:  R_POS        (idx: [[#NFA+13]]) TOC[TC0]
-; DIS-NEXT:      144: 00 00 01 50
+; DIS:      00000000000001d0 (idx: 17) test1[DS]:
+; DIS-NEXT:      1d0: 00 00 00 00
+; DIS-NEXT: 00000000000001d0:  R_POS	(idx: [[#NFA+9]]) .test1
+; DIS-NEXT:      1d4: 00 00 00 00
+; DIS-NEXT:      1d8: 00 00 00 00
+; DIS-NEXT: 00000000000001d8:  R_POS	(idx: [[#NFA+21]]) TOC[TC0]
+; DIS-NEXT:      1dc: 00 00 02 18
+
+; DIS:      00000000000001e8 (idx: 19) test2[DS]:
+; DIS-NEXT:      1e8: 00 00 00 00
+; DIS-NEXT: 00000000000001e8:  R_POS	(idx: [[#NFA+11]]) .test2
+; DIS-NEXT:      1ec: 00 00 00 90
+; DIS-NEXT:      1f0: 00 00 00 00
+; DIS-NEXT: 00000000000001f0:  R_POS	(idx: [[#NFA+21]]) TOC[TC0]
+; DIS-NEXT:      1f4: 00 00 02 18
+
+; DIS:      0000000000000200 (idx: 21) test3[DS]:
+; DIS-NEXT:      200: 00 00 00 00
+; DIS-NEXT: 0000000000000200:  R_POS	(idx: [[#NFA+13]]) .test3
+; DIS-NEXT:      204: 00 00 01 40
+; DIS-NEXT:      208: 00 00 00 00
+; DIS-NEXT: 0000000000000208:  R_POS	(idx: [[#NFA+21]]) TOC[TC0]
+; DIS-NEXT:      20c: 00 00 02 18
+
+; DIS:      0000000000000218 (idx: 25) _$TLSML[TC]:
+; DIS-NEXT:      218: 00 00 00 00
+; DIS-NEXT: 0000000000000218:  R_TLSML	(idx: [[#NFA+23]]) _$TLSML[TC]
+; DIS-NEXT:      21c: 00 00 00 00
+
+; DIS:      0000000000000220 (idx: 27) ElementIntTLSv1[TE]:
+; DIS-NEXT:      220: 00 00 00 00
+; DIS-NEXT: 0000000000000220:  R_TLS_LD	(idx: [[#NFA+51]]) ElementIntTLSv1[TL]
+; DIS-NEXT:      224: 00 00 00 00
+
+; DIS:      0000000000000228 (idx: 29) ElementIntTLS3[TE]:
+; DIS-NEXT:      228: 00 00 00 00
+; DIS-NEXT: 0000000000000228:  R_TLS_LD	(idx: [[#NFA+55]]) ElementIntTLS3[TL]
+; DIS-NEXT:      22c: 00 00 be 6c
+
+; DIS:      0000000000000230 (idx: 31) ElementIntTLS4[TE]:
+; DIS-NEXT:      230: 00 00 00 00
+; DIS-NEXT: 0000000000000230:  R_TLS_LD	(idx: [[#NFA+57]]) ElementIntTLS4[TL]
+; DIS-NEXT:      234: 00 00 fc ec
+
+; DIS:      0000000000000238 (idx: 33) ElementIntTLS5[TE]:
+; DIS-NEXT:      238: 00 00 00 00
+; DIS-NEXT: 0000000000000238:  R_TLS_LD	(idx: [[#NFA+59]]) ElementIntTLS5[TL]
+; DIS-NEXT:      23c: 00 01 3b 6c
+
+; DIS:      0000000000000240 (idx: 35) ElementIntTLS2[TE]:
+; DIS-NEXT:      240: 00 00 00 00
+; DIS-NEXT: 0000000000000240:  R_TLS_LD	(idx: [[#NFA+53]]) ElementIntTLS2[TL]
+; DIS-NEXT:      244: 00 00 7f ec
+
+; DIS:      0000000000000248 (idx: 37) ElementLongTLS6[TE]:
+; DIS-NEXT:      248: 00 00 00 00
+; DIS-NEXT: 0000000000000248:  R_TLS_LD	(idx: [[#NFA+5]]) ElementLongTLS6[UL]
+; DIS-NEXT:      24c: 00 00 00 00
+
+; DIS:      0000000000000250 (idx: 39) ElementLongTLS2[TE]:
+; DIS-NEXT:      250: 00 00 00 00
+; DIS-NEXT: 0000000000000250:  R_TLS_LD	(idx: [[#NFA+63]]) ElementLongTLS2[TL]
+; DIS-NEXT:      254: 00 02 06 90
+
+; DIS:      0000000000000258 (idx: 41) .MyTLSGDVar[TE]:
+; DIS-NEXT:      258: 00 00 00 00
+; DIS-NEXT: 0000000000000258:  R_TLSM	(idx: [[#NFA+65]]) MyTLSGDVar[TL]
+; DIS-NEXT:      25c: 00 00 00 00
+
+; DIS:      0000000000000260 (idx: 43) MyTLSGDVar[TE]:
+; DIS-NEXT:      260: 00 00 00 00
+; DIS-NEXT: 0000000000000260:  R_TLS	(idx: [[#NFA+65]]) MyTLSGDVar[TL]
+; DIS-NEXT:      264: 00 02 64 50
+
+; DIS:      0000000000000268 (idx: 45) ElementLongTLS3[TE]:
+; DIS-NEXT:      268: 00 00 00 00
+; DIS-NEXT: 0000000000000268:  R_TLS_LD	(idx: [[#NFA+67]]) ElementLongTLS3[TL]
+; DIS-NEXT:      26c: 00 02 7d 50
+
+; DIS:      0000000000000270 (idx: 47) ElementLongTLS4[TE]:
+; DIS-NEXT:      270: 00 00 00 00
+; DIS-NEXT: 0000000000000270:  R_TLS_LD	(idx: [[#NFA+69]]) ElementLongTLS4[TL]
+; DIS-NEXT:      274: 00 02 db 10
+
+; DIS:      0000000000000278 (idx: 49) ElementLongTLS5[TE]:
+; DIS-NEXT:      278: 00 00 00 00
+; DIS-NEXT: 0000000000000278:  R_TLS_LD	(idx: [[#NFA+71]]) ElementLongTLS5[TL]
+; DIS-NEXT:      27c: 00 03 38 d0
 
-; DIS:      0000000000000180 (idx: [[#NFA+27]]) ElementIntTLSv2[TE]:
-; DIS-NEXT:      180: 00 00 00 00
-; DIS-NEXT: 0000000000000180:  R_TLS_LD     (idx: [[#NFA+39]]) ElementIntTLSv2[TL]
-; DIS-NEXT:      184: 00 01 79 ec
+; DIS:      0000000000000280 (idx: 51) ElementIntTLSv2[TE]:
+; DIS-NEXT:      280: 00 00 00 00
+; DIS-NEXT: 0000000000000280:  R_TLS_LD	(idx: [[#NFA+61]]) ElementIntTLSv2[TL]
+; DIS-NEXT:      284: 00 01 79 ec
 
 ; DIS:      Disassembly of section .tdata:
-; DIS:      0000000000000000 (idx: [[#NFA+29]]) ElementIntTLSv1[TL]:
-; DIS:      0000000000007fec (idx: [[#NFA+31]]) ElementIntTLS2[TL]:
-; DIS:      000000000000be6c (idx: [[#NFA+33]]) ElementIntTLS3[TL]:
-; DIS:      000000000000fcec (idx: [[#NFA+35]]) ElementIntTLS4[TL]:
-; DIS:      0000000000013b6c (idx: [[#NFA+37]]) ElementIntTLS5[TL]:
-; DIS:      00000000000179ec (idx: [[#NFA+39]]) ElementIntTLSv2[TL]:
+; DIS:      0000000000000000 (idx: [[#NFA+51]]) ElementIntTLSv1[TL]:
+; DIS:      0000000000007fec (idx: [[#NFA+53]]) ElementIntTLS2[TL]:
+; DIS:      000000000000be6c (idx: [[#NFA+55]]) ElementIntTLS3[TL]:
+; DIS:      000000000000fcec (idx: [[#NFA+57]]) ElementIntTLS4[TL]:
+; DIS:      0000000000013b6c (idx: [[#NFA+59]]) ElementIntTLS5[TL]:
+; DIS:      00000000000179ec (idx: [[#NFA+61]]) ElementIntTLSv2[TL]:
+; DIS:      0000000000020690 (idx: [[#NFA+63]]) ElementLongTLS2[TL]:
+; DIS:      0000000000026450 (idx: [[#NFA+65]]) MyTLSGDVar[TL]:
+; DIS:      0000000000027d50 (idx: [[#NFA+67]]) ElementLongTLS3[TL]:
+; DIS:      000000000002db10 (idx: [[#NFA+69]]) ElementLongTLS4[TL]:
+; DIS:      00000000000338d0 (idx: [[#NFA+71]]) ElementLongTLS5[TL]:
+; DIS:      0000000000039690 (idx: [[#NFA+73]]) ElementLongTLS[TL]:
diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-types.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-types.ll
index c6defd8637fd0e..d996d86a23d868 100644
--- a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-types.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-types.ll
@@ -6,121 +6,1061 @@
 ; RUN:      -mtriple powerpc64-ibm-aix-xcoff --code-model=large \
 ; RUN:      < %s | FileCheck %s \
 ; RUN:      --check-prefix=SMALL-LOCAL-DYNAMIC-LARGECM64
+; RUN: llc  -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff < %s \
+; RUN:      | FileCheck %s --check-prefix=SMALL-LOCAL-DYNAMIC-SMALLCM64-O0
+; RUN: llc  -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff --code-model=large \
+; RUN:      < %s | FileCheck %s \
+; RUN:      --check-prefix=SMALL-LOCAL-DYNAMIC-LARGECM64-O0
 
 declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
- at TLVIntInit = local_unnamed_addr global i32 87, align 4
+ at tlv_int_init = local_unnamed_addr global i32 87, align 4
 
- at TLVChar = thread_local(localdynamic) global i8 1, align 1
- at TLVShort = thread_local(localdynamic) global i8 1, align 2
- at TLVInt = thread_local(localdynamic) global i32 1, align 4
- at InternalTLVInt = internal thread_local(localdynamic) global i32 1, align 4
- at TLVLong = thread_local(localdynamic) global i64 1, align 8
- at InternalTLVLong = internal thread_local(localdynamic) global i64 1, align 8
- at TLVFloat = thread_local(localdynamic) global float 1.000000e+00, align 4
- at InternalTLVDouble = internal thread_local(localdynamic) global double 1.000000e+00, align 8
+ at tlv_char = thread_local(localdynamic) global i8 1, align 1
+ at tlv_short = thread_local(localdynamic) global i8 1, align 2
+ at tlv_int = thread_local(localdynamic) global i32 1, align 4
+ at internal_tlv_int = internal thread_local(localdynamic) global i32 1, align 4
+ at tlv_long = thread_local(localdynamic) global i64 1, align 8
+ at internal_tlv_long = internal thread_local(localdynamic) global i64 1, align 8
+ at tlv_float = thread_local(localdynamic) global float 1.000000e+00, align 4
+ at internal_tlv_double = internal thread_local(localdynamic) global double 1.000000e+00, align 8
 
 %struct.anon = type { i32 }
 @ThreadLocalStruct = thread_local(localdynamic) global %struct.anon zeroinitializer, align 1
 @a = thread_local(localdynamic) global [87 x i32] zeroinitializer, align 4
 
-define nonnull ptr @AddrTest1() local_unnamed_addr #0 {
+define nonnull ptr @AddrTest1() local_unnamed_addr {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: AddrTest1:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C1(r2) # target-flags(ppc-tlsld) @a
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r3, r3, 12
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: AddrTest1:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C1 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C1 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r3, r3, 12
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-LABEL: AddrTest1:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C1(r2) # target-flags(ppc-tlsld) @a
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r3, r3, 12
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-LABEL: AddrTest1:
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 80(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C0 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r3, r3, 12
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    blr
 entry:
-  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @a)
-  %arrayidx = getelementptr inbounds [87 x i32], ptr %0, i64 0, i64 3
+  %tlv_addr = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @a)
+  %arrayidx = getelementptr inbounds [87 x i32], ptr %tlv_addr, i64 0, i64 3
   ret ptr %arrayidx
 }
 
 define signext i32 @testUnaligned() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: testUnaligned:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @ThreadLocalStruct
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwax r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: testUnaligned:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwax r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-LABEL: testUnaligned:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @ThreadLocalStruct
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lwa r3, 0(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-LABEL: testUnaligned:
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 80(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C2 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lwa r3, 0(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    blr
 entry:
-  %0 = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @ThreadLocalStruct)
-  %x = getelementptr inbounds %struct.anon, ptr %0, i32 0, i32 0
-  %1 = load i32, ptr %x, align 1
-  ret i32 %1
+  %tlv_addr = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @ThreadLocalStruct)
+  %x = getelementptr inbounds %struct.anon, ptr %tlv_addr, i32 0, i32 0
+  %value = load i32, ptr %x, align 1
+  ret i32 %value
 }
 
 define void @testChar(i8 noundef signext %x) {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: testChar:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mr r6, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @tlv_char
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stbx r6, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: testChar:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mr r6, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r7, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r7)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stbx r6, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-LABEL: testChar:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stdu r1, -64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 80(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    # kill: def $r3 killed $r3 killed $x3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stw r3, 60(r1) # 4-byte Folded Spill
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mr r4, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lwz r3, 60(r1) # 4-byte Folded Reload
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r5, L..C3(r2) # target-flags(ppc-tlsld) @tlv_char
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r4, r4, r5
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stb r3, 0(r4)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-LABEL: testChar:
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 80(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    # kill: def $r3 killed $r3 killed $x3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stw r3, 60(r1) # 4-byte Folded Spill
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 48(r1) # 8-byte Folded Spill
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r5, 48(r1) # 8-byte Folded Reload
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mr r4, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lwz r3, 60(r1) # 4-byte Folded Reload
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r5, L..C3 at l(r5)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r4, r4, r5
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stb r3, 0(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    blr
 entry:
-  %0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @TLVChar)
-  store i8 %x, ptr %0, align 1
+  %tlv_addr = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @tlv_char)
+  store i8 %x, ptr %tlv_addr, align 1
   ret void
 }
 
 define void @testShort(i16 noundef signext %x) {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: testShort:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mr r6, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C4(r2) # target-flags(ppc-tlsld) @tlv_short
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    sthx r6, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: testShort:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mr r6, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r7, L..C4 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C4 at l(r7)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    sthx r6, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-LABEL: testShort:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stdu r1, -64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 80(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    # kill: def $r3 killed $r3 killed $x3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stw r3, 60(r1) # 4-byte Folded Spill
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mr r4, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lwz r3, 60(r1) # 4-byte Folded Reload
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r5, L..C4(r2) # target-flags(ppc-tlsld) @tlv_short
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r4, r4, r5
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    sth r3, 0(r4)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-LABEL: testShort:
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 80(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    # kill: def $r3 killed $r3 killed $x3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stw r3, 60(r1) # 4-byte Folded Spill
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C4 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 48(r1) # 8-byte Folded Spill
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r5, 48(r1) # 8-byte Folded Reload
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mr r4, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lwz r3, 60(r1) # 4-byte Folded Reload
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r5, L..C4 at l(r5)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r4, r4, r5
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    sth r3, 0(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    blr
 entry:
-  %0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @TLVShort)
-  store i16 %x, ptr %0, align 2
+  %tlv_addr = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @tlv_short)
+  store i16 %x, ptr %tlv_addr, align 2
   ret void
 }
 
 define signext i32 @testInt1() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: testInt1:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C5(r2) # target-flags(ppc-tlsld) @tlv_int
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwax r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: testInt1:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C5 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C5 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwax r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-LABEL: testInt1:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C5(r2) # target-flags(ppc-tlsld) @tlv_int
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lwa r3, 0(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-LABEL: testInt1:
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 80(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C5 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C5 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lwa r3, 0(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    blr
 entry:
-  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TLVInt)
-  %1 = load i32, ptr %0, align 4
-  ret i32 %1
+  %tlv_addr = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @tlv_int)
+  %value = load i32, ptr %tlv_addr, align 4
+  ret i32 %value
 }
 
 define signext i32 @testInt2() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: testInt2:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C6(r2) # target-flags(ppc-tlsld) @internal_tlv_int
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwzx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C7(r2) # @tlv_int_init
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwz r4, 0(r4)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r4, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    extsw r3, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: testInt2:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C6 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C6 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwzx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r4, L..C7 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C7 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwz r4, 0(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r4, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    extsw r3, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-LABEL: testInt2:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C6(r2) # target-flags(ppc-tlsld) @internal_tlv_int
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lwz r4, 0(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C7(r2) # @tlv_int_init
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lwz r3, 0(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    extsw r3, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-LABEL: testInt2:
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 80(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C6 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C6 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lwz r4, 0(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C7 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C7 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lwz r3, 0(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    extsw r3, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    blr
 entry:
-  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @InternalTLVInt)
-  %1 = load i32, ptr %0, align 4
-  %2 = load i32, ptr @TLVIntInit, align 4
-  %add = add nsw i32 %2, %1
-  ret i32 %add
+  %tlv_addr = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @internal_tlv_int)
+  %tlv_val = load i32, ptr %tlv_addr, align 4
+  %global_val = load i32, ptr @tlv_int_init, align 4
+  %sum = add nsw i32 %global_val, %tlv_val
+  ret i32 %sum
 }
 
 define signext i64 @testLong1() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: testLong1:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C8(r2) # target-flags(ppc-tlsld) @tlv_long
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ldx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: testLong1:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C8 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C8 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ldx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-LABEL: testLong1:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C8(r2) # target-flags(ppc-tlsld) @tlv_long
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, 0(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-LABEL: testLong1:
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 80(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C8 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C8 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, 0(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    blr
 entry:
-  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @TLVLong)
-  %1 = load i64, ptr %0, align 4
-  ret i64 %1
+  %tlv_addr = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @tlv_long)
+  %value = load i64, ptr %tlv_addr, align 4
+  ret i64 %value
 }
 
 define void @testLong2(i64 noundef signext %x) {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: testLong2:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C9(r2) # target-flags(ppc-tlsld) @internal_tlv_long
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ldx r5, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r5, r5, 9
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdx r5, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: testLong2:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C9 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C9 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ldx r5, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r5, r5, 9
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdx r5, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-LABEL: testLong2:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C9(r2) # target-flags(ppc-tlsld) @internal_tlv_long
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r4, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, 0(r4)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r3, r3, 9
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r3, 0(r4)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-LABEL: testLong2:
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 80(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C9 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C9 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r4, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, 0(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r3, r3, 9
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 0(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    blr
 entry:
-  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @InternalTLVLong)
-  %1 = load i64, ptr %0, align 8
-  %add = add nsw i64 %1, 9
-  store i64 %add, ptr %0, align 8
+  %tlv_addr = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @internal_tlv_long)
+  %value = load i64, ptr %tlv_addr, align 8
+  %add = add nsw i64 %value, 9
+  store i64 %add, ptr %tlv_addr, align 8
   ret void
 }
 
 define i32 @testLong3() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: testLong3:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C8(r2) # target-flags(ppc-tlsld) @tlv_long
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ldx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: testLong3:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C8 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C8 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ldx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-LABEL: testLong3:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C8(r2) # target-flags(ppc-tlsld) @tlv_long
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, 0(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    # kill: def $r3 killed $r3 killed $x3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    clrldi r3, r3, 32
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-LABEL: testLong3:
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 80(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C8 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C8 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, 0(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    # kill: def $r3 killed $r3 killed $x3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    clrldi r3, r3, 32
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    blr
 entry:
-  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @TLVLong)
-  %1 = load i64, ptr %0, align 8
-  %conv = trunc i64 %1 to i32
+  %tlv_addr = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @tlv_long)
+  %value = load i64, ptr %tlv_addr, align 8
+  %conv = trunc i64 %value to i32
   ret i32 %conv
 }
 
 define void @testFloat1(float noundef %x) {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: testFloat1:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    vspltisw v2, 1
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    vspltisw v3, 8
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    xvcvsxwdp vs0, vs34
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C10(r2) # target-flags(ppc-tlsld) @tlv_float
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfsx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    fadds f0, f1, f0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    xvcvsxwdp vs1, vs35
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    fadds f0, f0, f1
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stfsx f0, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: testFloat1:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    vspltisw v2, 1
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C10 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    vspltisw v3, 8
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    xvcvsxwdp vs0, vs34
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C10 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfsx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    fadds f0, f1, f0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    xvcvsxwdp vs1, vs35
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    fadds f0, f0, f1
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stfsx f0, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-LABEL: testFloat1:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C10(r2) # target-flags(ppc-tlsld) @tlv_float
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lfs f0, 0(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C11(r2) # %const.1
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lfs f1, 0(r4)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    fadds f0, f0, f1
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C12(r2) # %const.0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lfs f1, 0(r4)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    fadds f0, f0, f1
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stfs f0, 0(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-LABEL: testFloat1:
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 80(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C10 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C10 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lfs f0, 0(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r4, L..C11 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C11 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lfs f1, 0(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    fadds f0, f0, f1
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r4, L..C12 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C12 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lfs f1, 0(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    fadds f0, f0, f1
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stfs f0, 0(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    blr
 entry:
-  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TLVFloat)
-  %1 = load float, ptr %0, align 4
-  %inc = fadd float %1, 1.000000e+00
+  %tlv_addr = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @tlv_float)
+  %value = load float, ptr %tlv_addr, align 4
+  %inc = fadd float %value, 1.000000e+00
   %add = fadd float %inc, 8.000000e+00
-  store float %add, ptr %0, align 4
+  store float %add, ptr %tlv_addr, align 4
   ret void
 }
 
 define i32 @testFloat2() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: testFloat2:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 80(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C10(r2) # target-flags(ppc-tlsld) @tlv_float
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfsx f0, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r3, r1, 60
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    xscvdpsxws f0, f0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stfiwx f0, 0, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwz r3, 60(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: testFloat2:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C10 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 80(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C10 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfsx f0, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r3, r1, 60
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    xscvdpsxws f0, f0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stfiwx f0, 0, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwz r3, 60(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-LABEL: testFloat2:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stdu r1, -64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 80(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C10(r2) # target-flags(ppc-tlsld) @tlv_float
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lfs f0, 0(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    fctiwz f0, f0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stfd f0, 56(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lwa r3, 60(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    clrldi r3, r3, 32
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-LABEL: testFloat2:
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 80(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C10 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 48(r1) # 8-byte Folded Spill
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, 48(r1) # 8-byte Folded Reload
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C10 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lfs f0, 0(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    fctiwz f0, f0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stfd f0, 56(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lwa r3, 60(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    clrldi r3, r3, 32
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    blr
 entry:
-  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TLVFloat)
-  %1 = load float, ptr %0, align 4
-  %conv = fptosi float %1 to i32
+  %tlv_addr = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @tlv_float)
+  %value = load float, ptr %tlv_addr, align 4
+  %conv = fptosi float %value to i32
   ret i32 %conv
 }
 
 define void @testDouble1(double noundef %x) {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: testDouble1:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C11(r2) # target-flags(ppc-tlsld) @internal_tlv_double
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stfdx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: testDouble1:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C11 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C11 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stfdx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-LABEL: testDouble1:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C13(r2) # target-flags(ppc-tlsld) @internal_tlv_double
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stxsdx f1, 0, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-LABEL: testDouble1:
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 80(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C13 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C13 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stxsdx f1, 0, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    blr
 entry:
-  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @InternalTLVDouble)
-  store double %x, ptr %0, align 8
+  %tlv_addr = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @internal_tlv_double)
+  store double %x, ptr %tlv_addr, align 8
   ret void
 }
 
 define i32 @testDouble2() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: testDouble2:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 80(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C11(r2) # target-flags(ppc-tlsld) @internal_tlv_double
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfdx f0, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r3, r1, 60
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    xscvdpsxws f0, f0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stfiwx f0, 0, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwz r3, 60(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: testDouble2:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C11 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 80(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C11 at l(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfdx f0, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r3, r1, 60
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    xscvdpsxws f0, f0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stfiwx f0, 0, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwz r3, 60(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-LABEL: testDouble2:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stdu r1, -64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 80(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C13(r2) # target-flags(ppc-tlsld) @internal_tlv_double
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lfdx f0, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    xscvdpsxws f0, f0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r3, r1, 52
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stfiwx f0, 0, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lwz r3, 52(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    clrldi r3, r3, 32
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-LABEL: testDouble2:
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -80(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 96(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C13 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C13 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lfdx f0, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    xscvdpsxws f0, f0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r3, r1, 68
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stfiwx f0, 0, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lwz r3, 68(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    clrldi r3, r3, 32
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 80
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    blr
 entry:
-  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @InternalTLVDouble)
-  %1 = load double, ptr %0, align 8
-  %conv = fptosi double %1 to i32
+  %tlv_addr = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @internal_tlv_double)
+  %value = load double, ptr %tlv_addr, align 8
+  %conv = fptosi double %value to i32
   ret i32 %conv
 }



More information about the llvm-commits mailing list