[llvm] [PowerPC][AIX] Enable aix-small-local-dynamic-tls target attribute (PR #86641)

Felix via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 8 19:48:54 PDT 2024


https://github.com/orcguru updated https://github.com/llvm/llvm-project/pull/86641

>From fd78e95cd5d32343bcb87f8a6c22f66b52409900 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Tue, 26 Mar 2024 01:15:56 -0400
Subject: [PATCH 1/5] [PowerPC] Add target attribute for
 -maix-small-local-dynamic-tls option

---
 .../MCTargetDesc/PPCXCOFFObjectWriter.cpp     |    4 +
 llvm/lib/Target/PowerPC/PPC.td                |    9 +
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp     |   20 +-
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   |   32 +-
 llvm/lib/Target/PowerPC/PPCMCInstLower.cpp    |   13 +-
 llvm/lib/Target/PowerPC/PPCSubtarget.cpp      |   26 +-
 ...aix-small-local-dynamic-tls-largeaccess.ll |  182 +--
 .../aix-small-local-dynamic-tls-types.ll      | 1340 ++++++++++++++---
 ...ix-small-local-exec-tls-opt-IRattribute.ll |   25 +-
 .../check-aix-small-local-exec-tls-opt.ll     |   14 +-
 10 files changed, 1274 insertions(+), 391 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
index f4998e9b9dcba8..714ce64a39391e 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
@@ -71,6 +71,8 @@ std::pair<uint8_t, uint8_t> PPCXCOFFObjectWriter::getRelocTypeAndSignSize(
       return {XCOFF::RelocationType::R_TOCL, SignAndSizeForHalf16};
     case MCSymbolRefExpr::VK_PPC_AIX_TLSLE:
       return {XCOFF::RelocationType::R_TLS_LE, SignAndSizeForHalf16};
+    case MCSymbolRefExpr::VK_PPC_AIX_TLSLD:
+      return {XCOFF::RelocationType::R_TLS_LD, SignAndSizeForHalf16};
     }
   } break;
   case PPC::fixup_ppc_half16ds:
@@ -86,6 +88,8 @@ std::pair<uint8_t, uint8_t> PPCXCOFFObjectWriter::getRelocTypeAndSignSize(
       return {XCOFF::RelocationType::R_TOCL, 15};
     case MCSymbolRefExpr::VK_PPC_AIX_TLSLE:
       return {XCOFF::RelocationType::R_TLS_LE, 15};
+    case MCSymbolRefExpr::VK_PPC_AIX_TLSLD:
+      return {XCOFF::RelocationType::R_TLS_LD, 15};
     }
   } break;
   case PPC::fixup_ppc_br24:
diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td
index 535616d33a8032..12d6b868f28545 100644
--- a/llvm/lib/Target/PowerPC/PPC.td
+++ b/llvm/lib/Target/PowerPC/PPC.td
@@ -329,6 +329,15 @@ def FeatureAIXLocalExecTLS :
                    "Produce a TOC-free local-exec TLS sequence for this function "
                    "for 64-bit AIX">;
 
+// Specifies that local-dynamic TLS accesses in any function with this target
+// attribute should use the optimized sequence (where the offset is an immediate
+// off module-hlandle for which the linker might add fix-up code if the
+// immediate is too large).
+def FeatureAIXLocalDynamicTLS :
+  SubtargetFeature<"aix-small-local-dynamic-tls", "HasAIXSmallLocalDynamicTLS",
+                   "true", "Produce a faster local-dynamic TLS sequence for this "
+                   " function for 64-bit AIX">;
+
 def FeaturePredictableSelectIsExpensive :
   SubtargetFeature<"predictable-select-expensive",
                    "PredictableSelectIsExpensive",
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 16942c6893a16d..7716aa4dc70f5f 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -803,7 +803,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
   MCInst TmpInst;
   const bool IsPPC64 = Subtarget->isPPC64();
   const bool IsAIX = Subtarget->isAIXABI();
-  const bool HasAIXSmallLocalExecTLS = Subtarget->hasAIXSmallLocalExecTLS();
+  const bool HasAIXSmallLocalTLS = Subtarget->hasAIXSmallLocalExecTLS() ||
+                                   Subtarget->hasAIXSmallLocalDynamicTLS();
   const Module *M = MF->getFunction().getParent();
   PICLevel::Level PL = M->getPICLevel();
 
@@ -1612,11 +1613,11 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
   case PPC::LFD:
   case PPC::STFD:
   case PPC::ADDI8: {
-    // A faster non-TOC-based local-exec sequence is represented by `addi`
-    // or a load/store instruction (that directly loads or stores off of the
-    // thread pointer) with an immediate operand having the MO_TPREL_FLAG.
+    // A faster non-TOC-based local-[exec|dynamic] sequence is represented by
+    // `addi` or a load/store instruction (that directly loads or stores off of
+    // the thread pointer) with an immediate operand having the MO_TPREL_FLAG.
     // Such instructions do not otherwise arise.
-    if (!HasAIXSmallLocalExecTLS)
+    if (!HasAIXSmallLocalTLS)
       break;
     bool IsMIADDI8 = MI->getOpcode() == PPC::ADDI8;
     unsigned OpNum = IsMIADDI8 ? 2 : 1;
@@ -1624,7 +1625,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     unsigned Flag = MO.getTargetFlags();
     if (Flag == PPCII::MO_TPREL_FLAG ||
         Flag == PPCII::MO_GOT_TPREL_PCREL_FLAG ||
-        Flag == PPCII::MO_TPREL_PCREL_FLAG) {
+        Flag == PPCII::MO_TPREL_PCREL_FLAG || Flag == PPCII::MO_TLSLD_FLAG) {
       LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
 
       const MCExpr *Expr = getAdjustedLocalExecExpr(MO, MO.getOffset());
@@ -1672,7 +1673,12 @@ const MCExpr *PPCAsmPrinter::getAdjustedLocalExecExpr(const MachineOperand &MO,
 
   assert(MO.isGlobal() && "Only expecting a global MachineOperand here!");
   const GlobalValue *GValue = MO.getGlobal();
-  assert(TM.getTLSModel(GValue) == TLSModel::LocalExec &&
+  // TODO: handle aix-small-local-dynamic-tls none-zero offset case.
+  TLSModel::Model Model = TM.getTLSModel(GValue);
+  if (Model == TLSModel::LocalDynamic) {
+    return nullptr;
+  }
+  assert(Model == TLSModel::LocalExec &&
          "Only local-exec accesses are handled!");
 
   bool IsGlobalADeclaration = GValue->isDeclarationForLinker();
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 43e4a34a9b3483..75ea1aa61b3a2a 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -153,10 +153,10 @@ static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
 
 static const char AIXSSPCanaryWordName[] = "__ssp_canary_word";
 
-// A faster local-exec TLS access sequence (enabled with the
-// -maix-small-local-exec-tls option) can be produced for TLS variables;
-// consistent with the IBM XL compiler, we apply a max size of slightly under
-// 32KB.
+// A faster local-[exec|dynamic] TLS access sequence (enabled with the
+// -maix-small-local-[exec|dynamic]-tls option) can be produced for TLS
+// variables; consistent with the IBM XL compiler, we apply a max size of
+// slightly under 32KB.
 constexpr uint64_t AIXSmallTlsPolicySizeLimit = 32751;
 
 // FIXME: Remove this once the bug has been fixed!
@@ -3434,6 +3434,14 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
   }
 
   if (Model == TLSModel::LocalDynamic) {
+    bool HasAIXSmallLocalDynamicTLS = Subtarget.hasAIXSmallLocalDynamicTLS();
+
+    // We do not implement the 32-bit version of the faster access sequence
+    // for local-dynamic that is controlled by -maix-small-local-dynamic-tls.
+    if (!Is64Bit && HasAIXSmallLocalDynamicTLS)
+      report_fatal_error("The small-local-dynamic TLS access sequence is "
+                         "currently only supported on AIX (64-bit mode).");
+
     // For local-dynamic on AIX, we need to generate one TOC entry for each
     // variable offset, and a single module-handle TOC entry for the entire
     // file.
@@ -3454,6 +3462,22 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
     SDValue ModuleHandle =
         DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, ModuleHandleTOC);
 
+    // With the -maix-small-local-dynamic-tls option, produce a faster access
+    // sequence for local-dynamic TLS variables where the offset from the
+    // module-handle is encoded as an immediate operand.
+    //
+    // We only utilize the faster local-dynamic access sequence when the TLS
+    // variable has a size within the policy limit. We treat types that are
+    // not sized or are empty as being over the policy size limit.
+    if (HasAIXSmallLocalDynamicTLS) {
+      Type *GVType = GV->getValueType();
+      if (GVType->isSized() && !GVType->isEmptyTy() &&
+          GV->getParent()->getDataLayout().getTypeAllocSize(GVType) <=
+              AIXSmallTlsPolicySizeLimit)
+        return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA,
+                           ModuleHandle);
+    }
+
     return DAG.getNode(ISD::ADD, dl, PtrVT, ModuleHandle, VariableOffset);
   }
 
diff --git a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
index 9a3ca5a7829362..c05bb37e58bf60 100644
--- a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -96,15 +96,18 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
     RefKind = MCSymbolRefExpr::VK_PPC_GOT_TLSLD_PCREL;
   else if (MO.getTargetFlags() == PPCII::MO_GOT_TPREL_PCREL_FLAG)
     RefKind = MCSymbolRefExpr::VK_PPC_GOT_TPREL_PCREL;
-  else if (MO.getTargetFlags() == PPCII::MO_TPREL_FLAG) {
+  else if (MO.getTargetFlags() == PPCII::MO_TPREL_FLAG ||
+           MO.getTargetFlags() == PPCII::MO_TLSLD_FLAG) {
     assert(MO.isGlobal() && "Only expecting a global MachineOperand here!");
     TLSModel::Model Model = TM.getTLSModel(MO.getGlobal());
-    // For the local-exec TLS model, we may generate the offset from the TLS
-    // base as an immediate operand (instead of using a TOC entry).
-    // Set the relocation type in case the result is used for purposes other
-    // than a TOC reference. In TOC reference cases, this result is discarded.
+    // For the local-[exec|dynamic] TLS model, we may generate the offset from
+    // the TLS base as an immediate operand (instead of using a TOC entry). Set
+    // the relocation type in case the result is used for purposes other than a
+    // TOC reference. In TOC reference cases, this result is discarded.
     if (Model == TLSModel::LocalExec)
       RefKind = MCSymbolRefExpr::VK_PPC_AIX_TLSLE;
+    else if (Model == TLSModel::LocalDynamic)
+      RefKind = MCSymbolRefExpr::VK_PPC_AIX_TLSLD;
   }
 
   const MachineInstr *MI = MO.getParent();
diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
index 653d9bda99192a..d1722555f1fcb3 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -124,22 +124,22 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef TuneCPU,
   // Determine endianness.
   IsLittleEndian = TM.isLittleEndian();
 
-  if (HasAIXSmallLocalExecTLS) {
+  if (HasAIXSmallLocalExecTLS || HasAIXSmallLocalDynamicTLS) {
     if (!TargetTriple.isOSAIX() || !IsPPC64)
-      report_fatal_error(
-          "The aix-small-local-exec-tls attribute is only supported on AIX in "
-          "64-bit mode.\n",
-          false);
-    // The aix-small-local-exec-tls attribute should only be used with
+      report_fatal_error("The aix-small-local-[exec|dynamic]-tls attribute is "
+                         "only supported on AIX in "
+                         "64-bit mode.\n",
+                         false);
+    // The aix-small-local-[exec|dynamic]-tls attribute should only be used with
     // -data-sections, as having data sections turned off with this option
-    // is not ideal for performance. Moreover, the small-local-exec-tls region
-    // is a limited resource, and should not be used for variables that may
-    // be replaced.
+    // is not ideal for performance. Moreover, the
+    // small-local-[exec|dynamic]-tls region is a limited resource, and should
+    // not be used for variables that may be replaced.
     if (!TM.getDataSections())
-      report_fatal_error(
-          "The aix-small-local-exec-tls attribute can only be specified with "
-          "-data-sections.\n",
-          false);
+      report_fatal_error("The aix-small-local-[exec|dynamic]-tls attribute can "
+                         "only be specified with "
+                         "-data-sections.\n",
+                         false);
   }
 }
 
diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-largeaccess.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-largeaccess.ll
index eb16bae67150e3..7db1048c258cd0 100644
--- a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-largeaccess.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-largeaccess.ll
@@ -1,9 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
 ; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
-; RUN:      -mtriple powerpc64-ibm-aix-xcoff < %s \
+; RUN:      -mattr=+aix-small-local-dynamic-tls -mtriple powerpc64-ibm-aix-xcoff < %s \
 ; RUN:      | FileCheck %s --check-prefix=SMALL-LOCAL-DYNAMIC-SMALLCM64
 ; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
-; RUN:      -mtriple powerpc64-ibm-aix-xcoff --code-model=large \
+; RUN:      -mattr=+aix-small-local-dynamic-tls -mtriple powerpc64-ibm-aix-xcoff --code-model=large \
 ; RUN:      < %s | FileCheck %s \
 ; RUN:      --check-prefix=SMALL-LOCAL-DYNAMIC-LARGECM64
 
@@ -39,27 +39,23 @@ define signext i32 @test1() {
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r6, L..C1(r2) # target-flags(ppc-tlsld) @ElementIntTLS2
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r7, L..C2(r2) # target-flags(ppc-tlsld) @ElementIntTLS3
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r8, L..C3(r2) # target-flags(ppc-tlsld) @ElementIntTLS4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r9, L..C4(r2) # target-flags(ppc-tlsld) @ElementIntTLS5
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r6, 4
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r5, L..C5(r2) # target-flags(ppc-tlsld) @ElementIntTLSv1
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 1
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r6, r3, r6
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r7, r3, r7
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r8, r3, r8
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r9, r3, r9
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stwux r4, r3, r5
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r4, 24(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 2
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r3, 320(r6)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 3
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r3, 324(r7)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 88
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r4, 328(r8)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r3, 332(r9)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r5, 1
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    la r4, ElementIntTLSv1[TL]@ld(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r5, ElementIntTLSv1[TL]@ld(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    la r5, ElementIntTLS2[TL]@ld(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r6, 24(r4)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 2
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r4, 320(r5)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    la r4, ElementIntTLS3[TL]@ld(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r5, 3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r5, 324(r4)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    la r4, ElementIntTLS4[TL]@ld(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    la r3, ElementIntTLS5[TL]@ld(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r6, 328(r4)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 88
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r4, 332(r3)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 102
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
@@ -71,34 +67,25 @@ define signext i32 @test1() {
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C1 at u(r2)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r7, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r6, 4
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r8, L..C3 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r9, L..C4 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r7, L..C2 at l(r7)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r8, L..C3 at l(r8)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r9, L..C4 at l(r9)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r5, L..C1 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C5 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 1
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r6, L..C5 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r7, r3, r7
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r8, r3, r8
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r9, r3, r9
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r6, r3, r6
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stwux r4, r3, r5
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r4, 24(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 2
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r3, 320(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 3
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r3, 324(r7)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 88
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r4, 328(r8)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r3, 332(r9)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r5, 1
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    la r4, ElementIntTLSv1[TL]@ld(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r5, ElementIntTLSv1[TL]@ld(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    la r5, ElementIntTLS2[TL]@ld(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r6, 24(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 2
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r4, 320(r5)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    la r4, ElementIntTLS3[TL]@ld(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r5, 3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r5, 324(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    la r4, ElementIntTLS4[TL]@ld(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    la r3, ElementIntTLS5[TL]@ld(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r6, 328(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 88
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r4, 332(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 102
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
@@ -144,31 +131,26 @@ define i64 @test2() {
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C6(r2) # target-flags(ppc-tlsld) @ElementLongTLS6
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mr r6, r3
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 212
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r4, r6, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r3, 424(r4)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C7(r2) # target-flags(ppc-tlsld) @ElementLongTLS2
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    la r3, ElementLongTLS6[UL]@ld(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 212
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r4, 424(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    la r3, ElementLongTLS2[TL]@ld(r6)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 203
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r6, r3
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r4, 1200(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C8(r2) # target-flags(ppc-tlsgdm) @MyTLSGDVar
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C9(r2) # target-flags(ppc-tlsgd) @MyTLSGDVar
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C1(r2) # target-flags(ppc-tlsgdm) @MyTLSGDVar
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsgd) @MyTLSGDVar
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 44
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r4, 440(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C10(r2) # target-flags(ppc-tlsld) @ElementLongTLS3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    la r3, ElementLongTLS3[TL]@ld(r6)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 6
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r6, r3
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r4, 2000(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C11(r2) # target-flags(ppc-tlsld) @ElementLongTLS4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 100
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r4, r6, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r3, 6800(r4)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C12(r2) # target-flags(ppc-tlsld) @ElementLongTLS5
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    la r3, ElementLongTLS4[TL]@ld(r6)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 100
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r4, 6800(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    la r3, ElementLongTLS5[TL]@ld(r6)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 882
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r6, r3
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r4, 8400(r3)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 1191
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
@@ -182,40 +164,30 @@ define i64 @test2() {
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r7, L..C6 at u(r2)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 212
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mr r6, r3
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C6 at l(r7)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r6, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    la r3, ElementLongTLS6[UL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r4, 424(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C7 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    la r3, ElementLongTLS2[TL]@ld(r6)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 203
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C7 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r6, r3
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r4, 1200(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C8 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r4, L..C9 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C8 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C9 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C1 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r4, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C1 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r4)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 44
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r4, 440(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C10 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    la r3, ElementLongTLS3[TL]@ld(r6)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 6
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C10 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r6, r3
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r4, 2000(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C11 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    la r3, ElementLongTLS4[TL]@ld(r6)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 100
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C11 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r6, r3
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r4, 6800(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C12 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    la r3, ElementLongTLS5[TL]@ld(r6)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 882
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C12 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r6, r3
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r4, 8400(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 1191
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
@@ -257,27 +229,23 @@ define signext i32 @test3() {
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r8, L..C3(r2) # target-flags(ppc-tlsld) @ElementIntTLSv2
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r6, L..C1(r2) # target-flags(ppc-tlsld) @ElementIntTLS2
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r7, L..C2(r2) # target-flags(ppc-tlsld) @ElementIntTLS3
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r8, L..C3(r2) # target-flags(ppc-tlsld) @ElementIntTLS4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r9, L..C4(r2) # target-flags(ppc-tlsld) @ElementIntTLS5
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r5, L..C13(r2) # target-flags(ppc-tlsld) @ElementIntTLSv2
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 1
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r6, r3, r6
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r7, r3, r7
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r8, r3, r8
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r9, r3, r9
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stwux r4, r3, r5
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    la r5, ElementIntTLS2[TL]@ld(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    la r6, ElementIntTLS3[TL]@ld(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    la r7, ElementIntTLS4[TL]@ld(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    la r9, ElementIntTLS5[TL]@ld(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stwux r4, r3, r8
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 4
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r4, 24(r3)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 2
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r3, 320(r6)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r3, 320(r5)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 3
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r3, 324(r7)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r3, 324(r6)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 88
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r4, 328(r8)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r4, 328(r7)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r3, 332(r9)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 102
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
@@ -290,29 +258,21 @@ define signext i32 @test3() {
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C13 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r7, L..C2 at u(r2)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r8, L..C3 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r9, L..C4 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r7, L..C2 at l(r7)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r8, L..C3 at l(r8)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r9, L..C4 at l(r9)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r6, L..C3 at l(r6)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r5, L..C13 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C5 at u(r2)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 1
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r6, L..C5 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r7, r3, r7
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r8, r3, r8
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r9, r3, r9
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r6, r3, r6
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stwux r4, r3, r5
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    la r5, ElementIntTLS2[TL]@ld(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    la r7, ElementIntTLS3[TL]@ld(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    la r8, ElementIntTLS4[TL]@ld(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    la r9, ElementIntTLS5[TL]@ld(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stwux r4, r3, r6
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 4
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r4, 24(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 2
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r3, 320(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r3, 320(r5)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 3
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r3, 324(r7)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 88
diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-types.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-types.ll
index d996d86a23d868..489ef11e7040a2 100644
--- a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-types.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-types.ll
@@ -1,18 +1,30 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
 ; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
-; RUN:      -mtriple powerpc64-ibm-aix-xcoff < %s \
+; RUN:      -mattr=+aix-small-local-dynamic-tls -mtriple powerpc64-ibm-aix-xcoff < %s \
 ; RUN:      | FileCheck %s --check-prefix=SMALL-LOCAL-DYNAMIC-SMALLCM64
 ; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mattr=+aix-small-local-dynamic-tls -mtriple powerpc64-ibm-aix-xcoff --code-model=large \
+; RUN:      < %s | FileCheck %s --check-prefix=SMALL-LOCAL-DYNAMIC-LARGECM64
+; RUN: llc  -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mattr=+aix-small-local-dynamic-tls -mtriple powerpc64-ibm-aix-xcoff < %s \
+; RUN:      | FileCheck %s --check-prefix=SMALL-LOCAL-DYNAMIC-SMALLCM64-O0
+; RUN: llc  -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mattr=+aix-small-local-dynamic-tls -mtriple powerpc64-ibm-aix-xcoff --code-model=large \
+; RUN:      < %s | FileCheck %s --check-prefix=SMALL-LOCAL-DYNAMIC-LARGECM64-O0
+; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff < %s \
+; RUN:      | FileCheck %s --check-prefix=DEFAULT-LOCAL-DYNAMIC-SMALLCM64
+; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
 ; RUN:      -mtriple powerpc64-ibm-aix-xcoff --code-model=large \
 ; RUN:      < %s | FileCheck %s \
-; RUN:      --check-prefix=SMALL-LOCAL-DYNAMIC-LARGECM64
+; RUN:      --check-prefix=DEFAULT-LOCAL-DYNAMIC-LARGECM64
 ; RUN: llc  -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
 ; RUN:      -mtriple powerpc64-ibm-aix-xcoff < %s \
-; RUN:      | FileCheck %s --check-prefix=SMALL-LOCAL-DYNAMIC-SMALLCM64-O0
+; RUN:      | FileCheck %s --check-prefix=DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0
 ; RUN: llc  -O0 -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
 ; RUN:      -mtriple powerpc64-ibm-aix-xcoff --code-model=large \
 ; RUN:      < %s | FileCheck %s \
-; RUN:      --check-prefix=SMALL-LOCAL-DYNAMIC-LARGECM64-O0
+; RUN:      --check-prefix=DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0
 
 declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
 @tlv_int_init = local_unnamed_addr global i32 87, align 4
@@ -38,8 +50,7 @@ define nonnull ptr @AddrTest1() local_unnamed_addr {
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C1(r2) # target-flags(ppc-tlsld) @a
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    la r3, a[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r3, r3, 12
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
@@ -51,12 +62,10 @@ define nonnull ptr @AddrTest1() local_unnamed_addr {
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C1 at u(r2)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C1 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    la r3, a[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r3, r3, 12
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
@@ -70,8 +79,7 @@ define nonnull ptr @AddrTest1() local_unnamed_addr {
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C1(r2) # target-flags(ppc-tlsld) @a
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    la r3, a[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r3, r3, 12
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r0, 16(r1)
@@ -81,21 +89,83 @@ define nonnull ptr @AddrTest1() local_unnamed_addr {
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-LABEL: AddrTest1:
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0:       # %bb.0: # %entry
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 80(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C0 at l(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C0 at l(r4)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    la r3, a[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r3, r3, 12
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r0, 16(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mtlr r0
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-LABEL: AddrTest1:
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C1(r2) # target-flags(ppc-tlsld) @a
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r3, r3, 12
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-LABEL: AddrTest1:
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C1 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C1 at l(r6)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r3, r3, 12
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-LABEL: AddrTest1:
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stdu r1, -48(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 64(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C1(r2) # target-flags(ppc-tlsld) @a
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r3, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r3, r3, 12
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r1, r1, 48
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-LABEL: AddrTest1:
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -64(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 80(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C0 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C0 at l(r4)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r3, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r3, r3, 12
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 64
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    blr
 entry:
   %tlv_addr = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @a)
   %arrayidx = getelementptr inbounds [87 x i32], ptr %tlv_addr, i64 0, i64 3
@@ -110,8 +180,8 @@ define signext i32 @testUnaligned() {
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @ThreadLocalStruct
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwax r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    la r3, ThreadLocalStruct[TL]@ld(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwa r3, 0(r3)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
@@ -122,12 +192,11 @@ define signext i32 @testUnaligned() {
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwax r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    la r3, ThreadLocalStruct[TL]@ld(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwa r3, 0(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
@@ -140,8 +209,7 @@ define signext i32 @testUnaligned() {
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @ThreadLocalStruct
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    la r3, ThreadLocalStruct[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lwa r3, 0(r3)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r0, 16(r1)
@@ -151,21 +219,81 @@ define signext i32 @testUnaligned() {
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-LABEL: testUnaligned:
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0:       # %bb.0: # %entry
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 80(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C2 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C0 at l(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C2 at l(r4)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    la r3, ThreadLocalStruct[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lwa r3, 0(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r0, 16(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mtlr r0
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-LABEL: testUnaligned:
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @ThreadLocalStruct
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwax r3, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-LABEL: testUnaligned:
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C2 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r6)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwax r3, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-LABEL: testUnaligned:
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stdu r1, -48(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 64(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @ThreadLocalStruct
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r3, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lwa r3, 0(r3)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r1, r1, 48
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-LABEL: testUnaligned:
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -64(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 80(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C2 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C2 at l(r4)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r3, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lwa r3, 0(r3)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 64
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    blr
 entry:
   %tlv_addr = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @ThreadLocalStruct)
   %x = getelementptr inbounds %struct.anon, ptr %tlv_addr, i32 0, i32 0
@@ -182,8 +310,7 @@ define void @testChar(i8 noundef signext %x) {
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @tlv_char
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stbx r6, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stb r6, tlv_char[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
@@ -195,12 +322,10 @@ define void @testChar(i8 noundef signext %x) {
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mr r6, r3
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r7, L..C3 at u(r2)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r7)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stbx r6, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stb r6, tlv_char[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
@@ -217,8 +342,7 @@ define void @testChar(i8 noundef signext %x) {
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mr r4, r3
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lwz r3, 60(r1) # 4-byte Folded Reload
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r5, L..C3(r2) # target-flags(ppc-tlsld) @tlv_char
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r4, r4, r5
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    la r4, tlv_char[TL]@ld(r4)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stb r3, 0(r4)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r1, r1, 64
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r0, 16(r1)
@@ -232,21 +356,91 @@ define void @testChar(i8 noundef signext %x) {
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 80(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    # kill: def $r3 killed $r3 killed $x3
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stw r3, 60(r1) # 4-byte Folded Spill
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C3 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 48(r1) # 8-byte Folded Spill
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C0 at l(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r5, 48(r1) # 8-byte Folded Reload
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mr r4, r3
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lwz r3, 60(r1) # 4-byte Folded Reload
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r5, L..C3 at l(r5)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r4, r4, r5
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    la r4, tlv_char[TL]@ld(r4)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stb r3, 0(r4)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 64
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r0, 16(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mtlr r0
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-LABEL: testChar:
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mr r6, r3
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @tlv_char
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stbx r6, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-LABEL: testChar:
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    mr r6, r3
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r7, L..C3 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r7)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    stbx r6, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-LABEL: testChar:
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stdu r1, -64(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 80(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    # kill: def $r3 killed $r3 killed $x3
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stw r3, 60(r1) # 4-byte Folded Spill
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mr r4, r3
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lwz r3, 60(r1) # 4-byte Folded Reload
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r5, L..C3(r2) # target-flags(ppc-tlsld) @tlv_char
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r4, r4, r5
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stb r3, 0(r4)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r1, r1, 64
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-LABEL: testChar:
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -64(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 80(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    # kill: def $r3 killed $r3 killed $x3
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stw r3, 60(r1) # 4-byte Folded Spill
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C3 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 48(r1) # 8-byte Folded Spill
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r5, 48(r1) # 8-byte Folded Reload
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mr r4, r3
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lwz r3, 60(r1) # 4-byte Folded Reload
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r5, L..C3 at l(r5)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r4, r4, r5
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stb r3, 0(r4)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 64
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    blr
 entry:
   %tlv_addr = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @tlv_char)
   store i8 %x, ptr %tlv_addr, align 1
@@ -262,8 +456,7 @@ define void @testShort(i16 noundef signext %x) {
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C4(r2) # target-flags(ppc-tlsld) @tlv_short
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    sthx r6, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    sth r6, tlv_short[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
@@ -275,12 +468,10 @@ define void @testShort(i16 noundef signext %x) {
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mr r6, r3
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r7, L..C4 at u(r2)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C4 at l(r7)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    sthx r6, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    sth r6, tlv_short[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
@@ -297,8 +488,7 @@ define void @testShort(i16 noundef signext %x) {
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mr r4, r3
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lwz r3, 60(r1) # 4-byte Folded Reload
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r5, L..C4(r2) # target-flags(ppc-tlsld) @tlv_short
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r4, r4, r5
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    la r4, tlv_short[TL]@ld(r4)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    sth r3, 0(r4)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r1, r1, 64
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r0, 16(r1)
@@ -312,21 +502,91 @@ define void @testShort(i16 noundef signext %x) {
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 80(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    # kill: def $r3 killed $r3 killed $x3
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stw r3, 60(r1) # 4-byte Folded Spill
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C4 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 48(r1) # 8-byte Folded Spill
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C0 at l(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r5, 48(r1) # 8-byte Folded Reload
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mr r4, r3
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lwz r3, 60(r1) # 4-byte Folded Reload
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r5, L..C4 at l(r5)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r4, r4, r5
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    la r4, tlv_short[TL]@ld(r4)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    sth r3, 0(r4)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 64
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r0, 16(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mtlr r0
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-LABEL: testShort:
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mr r6, r3
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C4(r2) # target-flags(ppc-tlsld) @tlv_short
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    sthx r6, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-LABEL: testShort:
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    mr r6, r3
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r7, L..C4 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C4 at l(r7)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    sthx r6, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-LABEL: testShort:
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stdu r1, -64(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 80(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    # kill: def $r3 killed $r3 killed $x3
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stw r3, 60(r1) # 4-byte Folded Spill
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mr r4, r3
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lwz r3, 60(r1) # 4-byte Folded Reload
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r5, L..C4(r2) # target-flags(ppc-tlsld) @tlv_short
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r4, r4, r5
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    sth r3, 0(r4)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r1, r1, 64
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-LABEL: testShort:
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -64(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 80(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    # kill: def $r3 killed $r3 killed $x3
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stw r3, 60(r1) # 4-byte Folded Spill
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C4 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 48(r1) # 8-byte Folded Spill
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r5, 48(r1) # 8-byte Folded Reload
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mr r4, r3
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lwz r3, 60(r1) # 4-byte Folded Reload
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r5, L..C4 at l(r5)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r4, r4, r5
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    sth r3, 0(r4)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 64
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    blr
 entry:
   %tlv_addr = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @tlv_short)
   store i16 %x, ptr %tlv_addr, align 2
@@ -341,8 +601,7 @@ define signext i32 @testInt1() {
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C5(r2) # target-flags(ppc-tlsld) @tlv_int
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwax r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwa r3, tlv_int[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
@@ -353,12 +612,10 @@ define signext i32 @testInt1() {
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C5 at u(r2)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C5 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwax r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwa r3, tlv_int[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
@@ -371,8 +628,7 @@ define signext i32 @testInt1() {
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C5(r2) # target-flags(ppc-tlsld) @tlv_int
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    la r3, tlv_int[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lwa r3, 0(r3)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r0, 16(r1)
@@ -382,21 +638,81 @@ define signext i32 @testInt1() {
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-LABEL: testInt1:
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0:       # %bb.0: # %entry
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 80(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C5 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C0 at l(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C5 at l(r4)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    la r3, tlv_int[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lwa r3, 0(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r0, 16(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mtlr r0
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-LABEL: testInt1:
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C5(r2) # target-flags(ppc-tlsld) @tlv_int
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwax r3, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-LABEL: testInt1:
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C5 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C5 at l(r6)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwax r3, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-LABEL: testInt1:
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stdu r1, -48(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 64(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C5(r2) # target-flags(ppc-tlsld) @tlv_int
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r3, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lwa r3, 0(r3)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r1, r1, 48
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-LABEL: testInt1:
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -64(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 80(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C5 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C5 at l(r4)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r3, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lwa r3, 0(r3)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 64
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    blr
 entry:
   %tlv_addr = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @tlv_int)
   %value = load i32, ptr %tlv_addr, align 4
@@ -411,9 +727,8 @@ define signext i32 @testInt2() {
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C6(r2) # target-flags(ppc-tlsld) @internal_tlv_int
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwzx r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C7(r2) # @tlv_int_init
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C1(r2) # @tlv_int_init
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwz r3, internal_tlv_int[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwz r4, 0(r4)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r4, r3
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    extsw r3, r3
@@ -427,14 +742,12 @@ define signext i32 @testInt2() {
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C6 at u(r2)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C6 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwzx r3, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r4, L..C7 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C7 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r4, L..C1 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C1 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwz r3, internal_tlv_int[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwz r4, 0(r4)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r4, r3
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    extsw r3, r3
@@ -450,10 +763,9 @@ define signext i32 @testInt2() {
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C6(r2) # target-flags(ppc-tlsld) @internal_tlv_int
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    la r3, internal_tlv_int[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lwz r4, 0(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C7(r2) # @tlv_int_init
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C1(r2) # @tlv_int_init
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lwz r3, 0(r3)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r3, r3, r4
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    extsw r3, r3
@@ -465,26 +777,104 @@ define signext i32 @testInt2() {
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-LABEL: testInt2:
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0:       # %bb.0: # %entry
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 80(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C6 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C0 at l(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C6 at l(r4)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    la r3, internal_tlv_int[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lwz r4, 0(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C7 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C7 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lwz r3, 0(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r3, r3, r4
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    extsw r3, r3
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r0, 16(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mtlr r0
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-LABEL: testInt2:
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C6(r2) # target-flags(ppc-tlsld) @internal_tlv_int
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwzx r3, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C7(r2) # @tlv_int_init
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwz r4, 0(r4)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r4, r3
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    extsw r3, r3
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-LABEL: testInt2:
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C6 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C6 at l(r6)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwzx r3, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r4, L..C7 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C7 at l(r4)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwz r4, 0(r4)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r4, r3
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    extsw r3, r3
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-LABEL: testInt2:
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stdu r1, -48(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 64(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C6(r2) # target-flags(ppc-tlsld) @internal_tlv_int
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r3, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lwz r4, 0(r3)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C7(r2) # @tlv_int_init
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lwz r3, 0(r3)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r3, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    extsw r3, r3
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r1, r1, 48
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-LABEL: testInt2:
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -64(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 80(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C6 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C6 at l(r4)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r3, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lwz r4, 0(r3)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C7 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C7 at l(r3)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lwz r3, 0(r3)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r3, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    extsw r3, r3
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 64
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    blr
 entry:
   %tlv_addr = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @internal_tlv_int)
   %tlv_val = load i32, ptr %tlv_addr, align 4
@@ -501,8 +891,7 @@ define signext i64 @testLong1() {
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C8(r2) # target-flags(ppc-tlsld) @tlv_long
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ldx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, tlv_long[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
@@ -513,12 +902,10 @@ define signext i64 @testLong1() {
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C8 at u(r2)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C8 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ldx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, tlv_long[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
@@ -531,8 +918,7 @@ define signext i64 @testLong1() {
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C8(r2) # target-flags(ppc-tlsld) @tlv_long
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    la r3, tlv_long[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, 0(r3)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r0, 16(r1)
@@ -542,21 +928,81 @@ define signext i64 @testLong1() {
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-LABEL: testLong1:
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0:       # %bb.0: # %entry
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 80(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C8 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C0 at l(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C8 at l(r4)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    la r3, tlv_long[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, 0(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r0, 16(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mtlr r0
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-LABEL: testLong1:
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C8(r2) # target-flags(ppc-tlsld) @tlv_long
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ldx r3, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-LABEL: testLong1:
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C8 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C8 at l(r6)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ldx r3, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-LABEL: testLong1:
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stdu r1, -48(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 64(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C8(r2) # target-flags(ppc-tlsld) @tlv_long
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r3, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, 0(r3)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r1, r1, 48
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-LABEL: testLong1:
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -64(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 80(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C8 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C8 at l(r4)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r3, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, 0(r3)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 64
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    blr
 entry:
   %tlv_addr = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @tlv_long)
   %value = load i64, ptr %tlv_addr, align 4
@@ -571,10 +1017,9 @@ define void @testLong2(i64 noundef signext %x) {
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C9(r2) # target-flags(ppc-tlsld) @internal_tlv_long
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ldx r5, r3, r4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r5, r5, 9
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdx r5, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, internal_tlv_long[TL]@ld(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r4, r4, 9
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r4, internal_tlv_long[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
@@ -585,14 +1030,12 @@ define void @testLong2(i64 noundef signext %x) {
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C9 at u(r2)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C9 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ldx r5, r3, r4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r5, r5, 9
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdx r5, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, internal_tlv_long[TL]@ld(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r4, r4, 9
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r4, internal_tlv_long[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
@@ -605,8 +1048,7 @@ define void @testLong2(i64 noundef signext %x) {
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C9(r2) # target-flags(ppc-tlsld) @internal_tlv_long
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r4, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    la r4, internal_tlv_long[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, 0(r4)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r3, r3, 9
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r3, 0(r4)
@@ -618,23 +1060,91 @@ define void @testLong2(i64 noundef signext %x) {
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-LABEL: testLong2:
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0:       # %bb.0: # %entry
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 80(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C9 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C0 at l(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C9 at l(r4)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r4, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    la r4, internal_tlv_long[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, 0(r4)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r3, r3, 9
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 0(r4)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r0, 16(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mtlr r0
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-LABEL: testLong2:
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C9(r2) # target-flags(ppc-tlsld) @internal_tlv_long
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ldx r5, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r5, r5, 9
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdx r5, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-LABEL: testLong2:
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C9 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C9 at l(r6)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ldx r5, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r5, r5, 9
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdx r5, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-LABEL: testLong2:
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stdu r1, -48(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 64(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C9(r2) # target-flags(ppc-tlsld) @internal_tlv_long
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r4, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, 0(r4)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r3, r3, 9
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r3, 0(r4)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r1, r1, 48
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-LABEL: testLong2:
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -64(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 80(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C9 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C9 at l(r4)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r4, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, 0(r4)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r3, r3, 9
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 0(r4)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 64
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    blr
 entry:
   %tlv_addr = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @internal_tlv_long)
   %value = load i64, ptr %tlv_addr, align 8
@@ -651,8 +1161,7 @@ define i32 @testLong3() {
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C8(r2) # target-flags(ppc-tlsld) @tlv_long
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ldx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, tlv_long[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
@@ -663,12 +1172,10 @@ define i32 @testLong3() {
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C8 at u(r2)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C8 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ldx r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, tlv_long[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
@@ -681,8 +1188,7 @@ define i32 @testLong3() {
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C8(r2) # target-flags(ppc-tlsld) @tlv_long
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    la r3, tlv_long[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, 0(r3)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    # kill: def $r3 killed $r3 killed $x3
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    clrldi r3, r3, 32
@@ -694,23 +1200,87 @@ define i32 @testLong3() {
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-LABEL: testLong3:
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0:       # %bb.0: # %entry
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 80(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C8 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C0 at l(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C8 at l(r4)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    la r3, tlv_long[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, 0(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    # kill: def $r3 killed $r3 killed $x3
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    clrldi r3, r3, 32
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r0, 16(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mtlr r0
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-LABEL: testLong3:
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C8(r2) # target-flags(ppc-tlsld) @tlv_long
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ldx r3, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-LABEL: testLong3:
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C8 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C8 at l(r6)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ldx r3, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-LABEL: testLong3:
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stdu r1, -48(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 64(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C8(r2) # target-flags(ppc-tlsld) @tlv_long
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r3, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, 0(r3)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    # kill: def $r3 killed $r3 killed $x3
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    clrldi r3, r3, 32
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r1, r1, 48
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-LABEL: testLong3:
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -64(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 80(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C8 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C8 at l(r4)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r3, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, 0(r3)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    # kill: def $r3 killed $r3 killed $x3
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    clrldi r3, r3, 32
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 64
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    blr
 entry:
   %tlv_addr = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @tlv_long)
   %value = load i64, ptr %tlv_addr, align 8
@@ -723,18 +1293,17 @@ define void @testFloat1(float noundef %x) {
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    vspltisw v2, 1
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    vspltisw v3, 8
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    xvcvsxwdp vs0, vs34
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C10(r2) # target-flags(ppc-tlsld) @tlv_float
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfsx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfs f1, tlv_float[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    fadds f0, f1, f0
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    xvcvsxwdp vs1, vs35
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    fadds f0, f0, f1
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stfsx f0, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stfs f0, tlv_float[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
@@ -744,20 +1313,18 @@ define void @testFloat1(float noundef %x) {
 ; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    vspltisw v2, 1
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C10 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    vspltisw v3, 8
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    xvcvsxwdp vs0, vs34
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C10 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfsx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfs f1, tlv_float[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    fadds f0, f1, f0
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    xvcvsxwdp vs1, vs35
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    fadds f0, f0, f1
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stfsx f0, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stfs f0, tlv_float[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
@@ -770,13 +1337,12 @@ define void @testFloat1(float noundef %x) {
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C10(r2) # target-flags(ppc-tlsld) @tlv_float
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    la r3, tlv_float[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lfs f0, 0(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C11(r2) # %const.1
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C2(r2) # %const.1
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lfs f1, 0(r4)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    fadds f0, f0, f1
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C12(r2) # %const.0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C3(r2) # %const.0
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lfs f1, 0(r4)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    fadds f0, f0, f1
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stfs f0, 0(r3)
@@ -788,30 +1354,120 @@ define void @testFloat1(float noundef %x) {
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-LABEL: testFloat1:
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0:       # %bb.0: # %entry
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 80(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C10 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C0 at l(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C10 at l(r4)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    la r3, tlv_float[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lfs f0, 0(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r4, L..C11 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C11 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r4, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C2 at l(r4)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lfs f1, 0(r4)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    fadds f0, f0, f1
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r4, L..C12 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C12 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r4, L..C3 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C3 at l(r4)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lfs f1, 0(r4)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    fadds f0, f0, f1
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stfs f0, 0(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r0, 16(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mtlr r0
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-LABEL: testFloat1:
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    vspltisw v2, 1
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    vspltisw v3, 8
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    xvcvsxwdp vs0, vs34
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C10(r2) # target-flags(ppc-tlsld) @tlv_float
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfsx f1, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    fadds f0, f1, f0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    xvcvsxwdp vs1, vs35
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    fadds f0, f0, f1
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stfsx f0, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-LABEL: testFloat1:
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    vspltisw v2, 1
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C10 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    vspltisw v3, 8
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    xvcvsxwdp vs0, vs34
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C10 at l(r6)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfsx f1, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    fadds f0, f1, f0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    xvcvsxwdp vs1, vs35
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    fadds f0, f0, f1
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    stfsx f0, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-LABEL: testFloat1:
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stdu r1, -48(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 64(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C10(r2) # target-flags(ppc-tlsld) @tlv_float
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r3, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lfs f0, 0(r3)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C11(r2) # %const.1
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lfs f1, 0(r4)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    fadds f0, f0, f1
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C12(r2) # %const.0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lfs f1, 0(r4)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    fadds f0, f0, f1
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stfs f0, 0(r3)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r1, r1, 48
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-LABEL: testFloat1:
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -64(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 80(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C10 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C10 at l(r4)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r3, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lfs f0, 0(r3)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r4, L..C11 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C11 at l(r4)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lfs f1, 0(r4)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    fadds f0, f0, f1
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r4, L..C12 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C12 at l(r4)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lfs f1, 0(r4)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    fadds f0, f0, f1
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stfs f0, 0(r3)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 64
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    blr
 entry:
   %tlv_addr = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @tlv_float)
   %value = load float, ptr %tlv_addr, align 4
@@ -829,8 +1485,7 @@ define i32 @testFloat2() {
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 80(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C10(r2) # target-flags(ppc-tlsld) @tlv_float
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfsx f0, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfs f0, tlv_float[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r3, r1, 60
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    xscvdpsxws f0, f0
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stfiwx f0, 0, r3
@@ -845,12 +1500,10 @@ define i32 @testFloat2() {
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -64(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C10 at u(r2)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 80(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C10 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfsx f0, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfs f0, tlv_float[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r3, r1, 60
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    xscvdpsxws f0, f0
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stfiwx f0, 0, r3
@@ -867,8 +1520,7 @@ define i32 @testFloat2() {
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 80(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C10(r2) # target-flags(ppc-tlsld) @tlv_float
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    la r3, tlv_float[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lfs f0, 0(r3)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    fctiwz f0, f0
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stfd f0, 56(r1)
@@ -884,14 +1536,10 @@ define i32 @testFloat2() {
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -64(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 80(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C10 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 48(r1) # 8-byte Folded Spill
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C0 at l(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, 48(r1) # 8-byte Folded Reload
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C10 at l(r4)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    la r3, tlv_float[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lfs f0, 0(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    fctiwz f0, f0
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stfd f0, 56(r1)
@@ -901,6 +1549,86 @@ define i32 @testFloat2() {
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r0, 16(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mtlr r0
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-LABEL: testFloat2:
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -64(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 80(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C10(r2) # target-flags(ppc-tlsld) @tlv_float
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfsx f0, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r3, r1, 60
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    xscvdpsxws f0, f0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stfiwx f0, 0, r3
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwz r3, 60(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 64
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-LABEL: testFloat2:
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -64(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C10 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 80(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C10 at l(r6)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfsx f0, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r3, r1, 60
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    xscvdpsxws f0, f0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    stfiwx f0, 0, r3
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwz r3, 60(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 64
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-LABEL: testFloat2:
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stdu r1, -64(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 80(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C10(r2) # target-flags(ppc-tlsld) @tlv_float
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r3, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lfs f0, 0(r3)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    fctiwz f0, f0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stfd f0, 56(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lwa r3, 60(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    clrldi r3, r3, 32
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r1, r1, 64
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-LABEL: testFloat2:
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -64(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 80(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C10 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 48(r1) # 8-byte Folded Spill
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, 48(r1) # 8-byte Folded Reload
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C10 at l(r4)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r3, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lfs f0, 0(r3)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    fctiwz f0, f0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stfd f0, 56(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lwa r3, 60(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    clrldi r3, r3, 32
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 64
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    blr
 entry:
   %tlv_addr = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @tlv_float)
   %value = load float, ptr %tlv_addr, align 4
@@ -916,8 +1644,7 @@ define void @testDouble1(double noundef %x) {
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C11(r2) # target-flags(ppc-tlsld) @internal_tlv_double
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stfdx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stfd f1, internal_tlv_double[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
@@ -928,12 +1655,10 @@ define void @testDouble1(double noundef %x) {
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C11 at u(r2)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C11 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stfdx f1, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stfd f1, internal_tlv_double[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
@@ -946,8 +1671,7 @@ define void @testDouble1(double noundef %x) {
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C13(r2) # target-flags(ppc-tlsld) @internal_tlv_double
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    la r3, internal_tlv_double[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stxsdx f1, 0, r3
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r0, 16(r1)
@@ -957,21 +1681,81 @@ define void @testDouble1(double noundef %x) {
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-LABEL: testDouble1:
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0:       # %bb.0: # %entry
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 80(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C13 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C0 at l(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C13 at l(r4)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r3, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    la r3, internal_tlv_double[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stxsdx f1, 0, r3
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 64
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r0, 16(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mtlr r0
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-LABEL: testDouble1:
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C11(r2) # target-flags(ppc-tlsld) @internal_tlv_double
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stfdx f1, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-LABEL: testDouble1:
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C11 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C11 at l(r6)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    stfdx f1, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-LABEL: testDouble1:
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stdu r1, -48(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 64(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C13(r2) # target-flags(ppc-tlsld) @internal_tlv_double
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r3, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stxsdx f1, 0, r3
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r1, r1, 48
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-LABEL: testDouble1:
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -64(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 80(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C13 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C13 at l(r4)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r3, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stxsdx f1, 0, r3
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 64
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    blr
 entry:
   %tlv_addr = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @internal_tlv_double)
   store double %x, ptr %tlv_addr, align 8
@@ -986,8 +1770,7 @@ define i32 @testDouble2() {
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 80(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C11(r2) # target-flags(ppc-tlsld) @internal_tlv_double
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfdx f0, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfd f0, internal_tlv_double[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r3, r1, 60
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    xscvdpsxws f0, f0
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stfiwx f0, 0, r3
@@ -1002,12 +1785,10 @@ define i32 @testDouble2() {
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -64(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C11 at u(r2)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 80(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C11 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfdx f0, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfd f0, internal_tlv_double[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r3, r1, 60
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    xscvdpsxws f0, f0
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stfiwx f0, 0, r3
@@ -1024,8 +1805,9 @@ define i32 @testDouble2() {
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 80(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C13(r2) # target-flags(ppc-tlsld) @internal_tlv_double
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lfdx f0, r3, r4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    la r3, internal_tlv_double[TL]@ld(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lfd f0, 0(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    # kill: def $f1 killed $f0
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    xscvdpsxws f0, f0
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r3, r1, 52
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stfiwx f0, 0, r3
@@ -1039,25 +1821,103 @@ define i32 @testDouble2() {
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-LABEL: testDouble2:
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0:       # %bb.0: # %entry
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -80(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 96(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C13 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 80(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C0 at l(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C13 at l(r4)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lfdx f0, r3, r4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    la r3, internal_tlv_double[TL]@ld(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lfd f0, 0(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    # kill: def $f1 killed $f0
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    xscvdpsxws f0, f0
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r3, r1, 68
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r3, r1, 52
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stfiwx f0, 0, r3
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lwz r3, 68(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lwz r3, 52(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    clrldi r3, r3, 32
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 80
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 64
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r0, 16(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mtlr r0
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-LABEL: testDouble2:
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -64(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 80(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C11(r2) # target-flags(ppc-tlsld) @internal_tlv_double
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfdx f0, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r3, r1, 60
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    xscvdpsxws f0, f0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stfiwx f0, 0, r3
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwz r3, 60(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 64
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-LABEL: testDouble2:
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -64(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C11 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 80(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C11 at l(r6)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfdx f0, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r3, r1, 60
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    xscvdpsxws f0, f0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    stfiwx f0, 0, r3
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwz r3, 60(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 64
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-LABEL: testDouble2:
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stdu r1, -64(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 80(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C13(r2) # target-flags(ppc-tlsld) @internal_tlv_double
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lfdx f0, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    xscvdpsxws f0, f0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r3, r1, 52
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stfiwx f0, 0, r3
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lwz r3, 52(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    clrldi r3, r3, 32
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r1, r1, 64
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-LABEL: testDouble2:
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -80(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 96(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C13 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C13 at l(r4)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lfdx f0, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    xscvdpsxws f0, f0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r3, r1, 68
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stfiwx f0, 0, r3
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lwz r3, 68(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    clrldi r3, r3, 32
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 80
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    blr
 entry:
   %tlv_addr = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @internal_tlv_double)
   %value = load double, ptr %tlv_addr, align 8
diff --git a/llvm/test/CodeGen/PowerPC/check-aix-small-local-exec-tls-opt-IRattribute.ll b/llvm/test/CodeGen/PowerPC/check-aix-small-local-exec-tls-opt-IRattribute.ll
index dc78ae8436df6b..fc49e9416803bb 100644
--- a/llvm/test/CodeGen/PowerPC/check-aix-small-local-exec-tls-opt-IRattribute.ll
+++ b/llvm/test/CodeGen/PowerPC/check-aix-small-local-exec-tls-opt-IRattribute.ll
@@ -8,23 +8,32 @@
 ; RUN:   -data-sections=false < %s 2>&1 | \
 ; RUN: FileCheck %s --check-prefix=CHECK-UNSUPPORTED-NO-DATASEC
 
-define dso_local signext i32 @testWithIRAttr() #0 {
+define dso_local signext i32 @testWithIRAttrLocalExec() #0 {
 entry:
   ret i32 0
 }
-; Check that the aix-small-local-exec-tls attribute is not supported on Linux and AIX (32-bit).
-; CHECK-NOT-SUPPORTED: The aix-small-local-exec-tls attribute is only supported on AIX in 64-bit mode.
 
-; Check that the aix-small-local-exec-tls attribute is only supported when
+define dso_local signext i32 @testWithIRAttrLocalDynamic() #1 {
+entry:
+  ret i32 0
+}
+
+; Check that the aix-small-local-[exec|dynamic]-tls attribute is not supported on Linux and AIX (32-bit).
+; CHECK-NOT-SUPPORTED: The aix-small-local-[exec|dynamic]-tls attribute is only supported on AIX in 64-bit mode.
+
+; Check that the aix-small-local-[exec|dynamic]-tls attribute is only supported when
 ; data sections are enabled.
-; CHECK-UNSUPPORTED-NO-DATASEC: The aix-small-local-exec-tls attribute can only be specified with -data-sections.
+; CHECK-UNSUPPORTED-NO-DATASEC: The aix-small-local-[exec|dynamic]-tls attribute can only be specified with -data-sections.
 
 ; Make sure that the test was actually compiled successfully after using the
-; aix-small-local-exec-tls attribute.
-; CHECK-LABEL: testWithIRAttr:
+; aix-small-local-[exec|dynamic]-tls attribute.
+; CHECK-LABEL: testWithIRAttrLocalExec:
+; CHECK:        li r3, 0
+; CHECK-NEXT:   blr
+; CHECK-LABEL: testWithIRAttrLocalDynamic:
 ; CHECK:        li r3, 0
 ; CHECK-NEXT:   blr
-
 
 attributes #0 = { "target-features"="+aix-small-local-exec-tls" }
+attributes #1 = { "target-features"="+aix-small-local-dynamic-tls" }
 
diff --git a/llvm/test/CodeGen/PowerPC/check-aix-small-local-exec-tls-opt.ll b/llvm/test/CodeGen/PowerPC/check-aix-small-local-exec-tls-opt.ll
index 5104b7c6c83989..25bab5b5efcd2b 100644
--- a/llvm/test/CodeGen/PowerPC/check-aix-small-local-exec-tls-opt.ll
+++ b/llvm/test/CodeGen/PowerPC/check-aix-small-local-exec-tls-opt.ll
@@ -6,17 +6,25 @@
 ; RUN: not llc -mtriple powerpc64le-unknown-linux-gnu -mattr=+aix-small-local-exec-tls \
 ; RUN:   -ppc-asm-full-reg-names < %s 2>&1 | \
 ; RUN:   FileCheck %s --check-prefix=CHECK-NOT-SUPPORTED
+; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -mattr=+aix-small-local-dynamic-tls \
+; RUN:   -ppc-asm-full-reg-names < %s | FileCheck %s
+; RUN: not llc -mtriple powerpc-ibm-aix-xcoff -mattr=+aix-small-local-dynamic-tls \
+; RUN:   -ppc-asm-full-reg-names < %s 2>&1 | \
+; RUN:   FileCheck %s --check-prefix=CHECK-NOT-SUPPORTED
+; RUN: not llc -mtriple powerpc64le-unknown-linux-gnu -mattr=+aix-small-local-dynamic-tls \
+; RUN:   -ppc-asm-full-reg-names < %s 2>&1 | \
+; RUN:   FileCheck %s --check-prefix=CHECK-NOT-SUPPORTED
 
 define dso_local signext i32 @testNoIRAttr() {
 entry:
   ret i32 0
 }
 
-; Check that the aix-small-local-exec-tls attribute is not supported on Linux and AIX (32-bit).
-; CHECK-NOT-SUPPORTED: The aix-small-local-exec-tls attribute is only supported on AIX in 64-bit mode.
+; Check that the aix-small-local-[exec|dynamic]-tls attribute is not supported on Linux and AIX (32-bit).
+; CHECK-NOT-SUPPORTED: The aix-small-local-[exec|dynamic]-tls attribute is only supported on AIX in 64-bit mode.
 
 ; Make sure that the test was actually compiled successfully after using the
-; aix-small-local-exec-tls attribute.
+; aix-small-local-[exec|dynamic]-tls attribute.
 ; CHECK-LABEL: testNoIRAttr:
 ; CHECK:        li r3, 0
 ; CHECK-NEXT:   blr

>From 74c16912635baa2a89470860beaba21ce0f27ba5 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Tue, 26 Mar 2024 03:13:52 -0400
Subject: [PATCH 2/5] Update test case

---
 .../aix-small-local-dynamic-tls-types.ll      | 429 ++++++++++++++----
 1 file changed, 340 insertions(+), 89 deletions(-)

diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-types.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-types.ll
index 489ef11e7040a2..161a58a90296ad 100644
--- a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-types.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-types.ll
@@ -39,7 +39,8 @@ declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
 @internal_tlv_double = internal thread_local(localdynamic) global double 1.000000e+00, align 8
 
 %struct.anon = type { i32 }
- at ThreadLocalStruct = thread_local(localdynamic) global %struct.anon zeroinitializer, align 1
+ at ThreadLocalStructAlign1 = thread_local(localdynamic) global %struct.anon zeroinitializer, align 1
+ at ThreadLocalStructAlign4 = thread_local(localdynamic) global %struct.anon zeroinitializer, align 4
 @a = thread_local(localdynamic) global [87 x i32] zeroinitializer, align 4
 
 define nonnull ptr @AddrTest1() local_unnamed_addr {
@@ -172,6 +173,129 @@ entry:
   ret ptr %arrayidx
 }
 
+; Demonstrates when offset is zero, addi for the offset is optimized out.
+define nonnull ptr @AddrTest2() local_unnamed_addr {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: AddrTest2:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    la r3, a[TL]@ld(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: AddrTest2:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    la r3, a[TL]@ld(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-LABEL: AddrTest2:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    la r3, a[TL]@ld(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-LABEL: AddrTest2:
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    la r3, a[TL]@ld(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-LABEL: AddrTest2:
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C1(r2) # target-flags(ppc-tlsld) @a
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-LABEL: AddrTest2:
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C1 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C1 at l(r6)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-LABEL: AddrTest2:
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stdu r1, -48(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 64(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C1(r2) # target-flags(ppc-tlsld) @a
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r3, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r1, r1, 48
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-LABEL: AddrTest2:
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -64(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 80(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C0 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C0 at l(r4)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r3, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 64
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    blr
+entry:
+  %tlv_addr = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @a)
+  %arrayidx = getelementptr inbounds [87 x i32], ptr %tlv_addr, i64 0, i64 0
+  ret ptr %arrayidx
+}
+
 define signext i32 @testUnaligned() {
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: testUnaligned:
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
@@ -180,7 +304,7 @@ define signext i32 @testUnaligned() {
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    la r3, ThreadLocalStruct[TL]@ld(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    la r3, ThreadLocalStructAlign1[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwa r3, 0(r3)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
@@ -195,7 +319,7 @@ define signext i32 @testUnaligned() {
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    la r3, ThreadLocalStruct[TL]@ld(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    la r3, ThreadLocalStructAlign1[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwa r3, 0(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
@@ -209,7 +333,7 @@ define signext i32 @testUnaligned() {
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    la r3, ThreadLocalStruct[TL]@ld(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    la r3, ThreadLocalStructAlign1[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lwa r3, 0(r3)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r0, 16(r1)
@@ -224,7 +348,7 @@ define signext i32 @testUnaligned() {
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C0 at u(r2)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C0 at l(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    la r3, ThreadLocalStruct[TL]@ld(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    la r3, ThreadLocalStructAlign1[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lwa r3, 0(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r0, 16(r1)
@@ -238,7 +362,7 @@ define signext i32 @testUnaligned() {
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @ThreadLocalStruct
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @ThreadLocalStructAlign1
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwax r3, r3, r4
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
@@ -268,7 +392,7 @@ define signext i32 @testUnaligned() {
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 64(r1)
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
-; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @ThreadLocalStruct
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsld) @ThreadLocalStructAlign1
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r3, r3, r4
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lwa r3, 0(r3)
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r1, r1, 48
@@ -295,10 +419,137 @@ define signext i32 @testUnaligned() {
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mtlr r0
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    blr
 entry:
-  %tlv_addr = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @ThreadLocalStruct)
-  %x = getelementptr inbounds %struct.anon, ptr %tlv_addr, i32 0, i32 0
-  %value = load i32, ptr %x, align 1
-  ret i32 %value
+  %tlv_addr1 = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @ThreadLocalStructAlign1)
+  %x = getelementptr inbounds %struct.anon, ptr %tlv_addr1, i32 0, i32 0
+  %value1 = load i32, ptr %x, align 1
+  ret i32 %value1
+}
+
+define signext i32 @testAligned() {
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-LABEL: testAligned:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwa r3, ThreadLocalStructAlign4[TL]@ld(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-LABEL: testAligned:
+; SMALL-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwa r3, ThreadLocalStructAlign4[TL]@ld(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-LABEL: testAligned:
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    la r3, ThreadLocalStructAlign4[TL]@ld(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lwa r3, 0(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    blr
+;
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-LABEL: testAligned:
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0:       # %bb.0: # %entry
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -48(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 64(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    la r3, ThreadLocalStructAlign4[TL]@ld(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lwa r3, 0(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 48
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r0, 16(r1)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mtlr r0
+; SMALL-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-LABEL: testAligned:
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @ThreadLocalStructAlign4
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwax r3, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-LABEL: testAligned:
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r6)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwax r3, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-LABEL: testAligned:
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stdu r1, -48(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 64(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @ThreadLocalStructAlign4
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r3, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lwa r3, 0(r3)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r1, r1, 48
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    blr
+;
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-LABEL: testAligned:
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0:       # %bb.0: # %entry
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -64(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 80(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C3 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C3 at l(r4)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r3, r3, r4
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lwa r3, 0(r3)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 64
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r0, 16(r1)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mtlr r0
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    blr
+entry:
+  %tlv_addr1 = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalStructAlign4)
+  %x = getelementptr inbounds %struct.anon, ptr %tlv_addr1, i32 0, i32 0
+  %value1 = load i32, ptr %x, align 1
+  ret i32 %value1
 }
 
 define void @testChar(i8 noundef signext %x) {
@@ -376,7 +627,7 @@ define void @testChar(i8 noundef signext %x) {
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tlsld) @tlv_char
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C4(r2) # target-flags(ppc-tlsld) @tlv_char
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stbx r6, r3, r4
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
@@ -389,11 +640,11 @@ define void @testChar(i8 noundef signext %x) {
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    mr r6, r3
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r7, L..C3 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r7, L..C4 at u(r2)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C3 at l(r7)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C4 at l(r7)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    stbx r6, r3, r4
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
@@ -411,7 +662,7 @@ define void @testChar(i8 noundef signext %x) {
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mr r4, r3
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lwz r3, 60(r1) # 4-byte Folded Reload
-; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r5, L..C3(r2) # target-flags(ppc-tlsld) @tlv_char
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r5, L..C4(r2) # target-flags(ppc-tlsld) @tlv_char
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r4, r4, r5
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stb r3, 0(r4)
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r1, r1, 64
@@ -426,7 +677,7 @@ define void @testChar(i8 noundef signext %x) {
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 80(r1)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    # kill: def $r3 killed $r3 killed $x3
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stw r3, 60(r1) # 4-byte Folded Spill
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C3 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C4 at u(r2)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 48(r1) # 8-byte Folded Spill
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
@@ -434,7 +685,7 @@ define void @testChar(i8 noundef signext %x) {
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r5, 48(r1) # 8-byte Folded Reload
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mr r4, r3
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lwz r3, 60(r1) # 4-byte Folded Reload
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r5, L..C3 at l(r5)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r5, L..C4 at l(r5)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r4, r4, r5
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stb r3, 0(r4)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 64
@@ -522,7 +773,7 @@ define void @testShort(i16 noundef signext %x) {
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C4(r2) # target-flags(ppc-tlsld) @tlv_short
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C5(r2) # target-flags(ppc-tlsld) @tlv_short
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    sthx r6, r3, r4
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
@@ -535,11 +786,11 @@ define void @testShort(i16 noundef signext %x) {
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    mr r6, r3
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r7, L..C4 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r7, L..C5 at u(r2)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C4 at l(r7)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C5 at l(r7)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    sthx r6, r3, r4
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
@@ -557,7 +808,7 @@ define void @testShort(i16 noundef signext %x) {
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    mr r4, r3
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lwz r3, 60(r1) # 4-byte Folded Reload
-; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r5, L..C4(r2) # target-flags(ppc-tlsld) @tlv_short
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r5, L..C5(r2) # target-flags(ppc-tlsld) @tlv_short
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r4, r4, r5
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    sth r3, 0(r4)
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r1, r1, 64
@@ -572,7 +823,7 @@ define void @testShort(i16 noundef signext %x) {
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 80(r1)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    # kill: def $r3 killed $r3 killed $x3
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stw r3, 60(r1) # 4-byte Folded Spill
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C4 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C5 at u(r2)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 48(r1) # 8-byte Folded Spill
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
@@ -580,7 +831,7 @@ define void @testShort(i16 noundef signext %x) {
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r5, 48(r1) # 8-byte Folded Reload
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mr r4, r3
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lwz r3, 60(r1) # 4-byte Folded Reload
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r5, L..C4 at l(r5)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r5, L..C5 at l(r5)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r4, r4, r5
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    sth r3, 0(r4)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 64
@@ -657,7 +908,7 @@ define signext i32 @testInt1() {
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C5(r2) # target-flags(ppc-tlsld) @tlv_int
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C6(r2) # target-flags(ppc-tlsld) @tlv_int
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwax r3, r3, r4
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
@@ -669,11 +920,11 @@ define signext i32 @testInt1() {
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C5 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C6 at u(r2)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C5 at l(r6)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C6 at l(r6)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwax r3, r3, r4
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
@@ -687,7 +938,7 @@ define signext i32 @testInt1() {
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 64(r1)
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
-; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C5(r2) # target-flags(ppc-tlsld) @tlv_int
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C6(r2) # target-flags(ppc-tlsld) @tlv_int
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r3, r3, r4
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lwa r3, 0(r3)
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r1, r1, 48
@@ -700,13 +951,13 @@ define signext i32 @testInt1() {
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -64(r1)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 80(r1)
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C5 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C6 at u(r2)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C5 at l(r4)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C6 at l(r4)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r3, r3, r4
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lwa r3, 0(r3)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 64
@@ -801,9 +1052,9 @@ define signext i32 @testInt2() {
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C6(r2) # target-flags(ppc-tlsld) @internal_tlv_int
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C7(r2) # target-flags(ppc-tlsld) @internal_tlv_int
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwzx r3, r3, r4
-; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C7(r2) # @tlv_int_init
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C8(r2) # @tlv_int_init
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lwz r4, 0(r4)
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    add r3, r4, r3
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    extsw r3, r3
@@ -817,14 +1068,14 @@ define signext i32 @testInt2() {
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C6 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C7 at u(r2)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C6 at l(r6)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C7 at l(r6)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwzx r3, r3, r4
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r4, L..C7 at u(r2)
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C7 at l(r4)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r4, L..C8 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C8 at l(r4)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    lwz r4, 0(r4)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    add r3, r4, r3
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    extsw r3, r3
@@ -840,10 +1091,10 @@ define signext i32 @testInt2() {
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 64(r1)
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
-; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C6(r2) # target-flags(ppc-tlsld) @internal_tlv_int
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C7(r2) # target-flags(ppc-tlsld) @internal_tlv_int
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r3, r3, r4
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lwz r4, 0(r3)
-; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C7(r2) # @tlv_int_init
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C8(r2) # @tlv_int_init
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lwz r3, 0(r3)
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r3, r3, r4
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    extsw r3, r3
@@ -857,17 +1108,17 @@ define signext i32 @testInt2() {
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -64(r1)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 80(r1)
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C6 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C7 at u(r2)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C6 at l(r4)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C7 at l(r4)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r3, r3, r4
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lwz r4, 0(r3)
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C7 at u(r2)
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C7 at l(r3)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C8 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C8 at l(r3)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lwz r3, 0(r3)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r3, r3, r4
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    extsw r3, r3
@@ -947,7 +1198,7 @@ define signext i64 @testLong1() {
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C8(r2) # target-flags(ppc-tlsld) @tlv_long
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C9(r2) # target-flags(ppc-tlsld) @tlv_long
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ldx r3, r3, r4
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
@@ -959,11 +1210,11 @@ define signext i64 @testLong1() {
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C8 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C9 at u(r2)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C8 at l(r6)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C9 at l(r6)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ldx r3, r3, r4
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
@@ -977,7 +1228,7 @@ define signext i64 @testLong1() {
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 64(r1)
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
-; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C8(r2) # target-flags(ppc-tlsld) @tlv_long
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C9(r2) # target-flags(ppc-tlsld) @tlv_long
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r3, r3, r4
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, 0(r3)
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r1, r1, 48
@@ -990,13 +1241,13 @@ define signext i64 @testLong1() {
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -64(r1)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 80(r1)
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C8 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C9 at u(r2)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C8 at l(r4)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C9 at l(r4)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r3, r3, r4
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, 0(r3)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 64
@@ -1081,7 +1332,7 @@ define void @testLong2(i64 noundef signext %x) {
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C9(r2) # target-flags(ppc-tlsld) @internal_tlv_long
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C10(r2) # target-flags(ppc-tlsld) @internal_tlv_long
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ldx r5, r3, r4
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r5, r5, 9
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdx r5, r3, r4
@@ -1095,11 +1346,11 @@ define void @testLong2(i64 noundef signext %x) {
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C9 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C10 at u(r2)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C9 at l(r6)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C10 at l(r6)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ldx r5, r3, r4
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r5, r5, 9
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdx r5, r3, r4
@@ -1115,7 +1366,7 @@ define void @testLong2(i64 noundef signext %x) {
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 64(r1)
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
-; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C9(r2) # target-flags(ppc-tlsld) @internal_tlv_long
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C10(r2) # target-flags(ppc-tlsld) @internal_tlv_long
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r4, r3, r4
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, 0(r4)
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r3, r3, 9
@@ -1130,13 +1381,13 @@ define void @testLong2(i64 noundef signext %x) {
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -64(r1)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 80(r1)
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C9 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C10 at u(r2)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C9 at l(r4)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C10 at l(r4)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r4, r3, r4
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, 0(r4)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r3, r3, 9
@@ -1221,7 +1472,7 @@ define i32 @testLong3() {
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C8(r2) # target-flags(ppc-tlsld) @tlv_long
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C9(r2) # target-flags(ppc-tlsld) @tlv_long
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ldx r3, r3, r4
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
@@ -1233,11 +1484,11 @@ define i32 @testLong3() {
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C8 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C9 at u(r2)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C8 at l(r6)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C9 at l(r6)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ldx r3, r3, r4
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
@@ -1251,7 +1502,7 @@ define i32 @testLong3() {
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 64(r1)
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
-; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C8(r2) # target-flags(ppc-tlsld) @tlv_long
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C9(r2) # target-flags(ppc-tlsld) @tlv_long
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r3, r3, r4
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, 0(r3)
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    # kill: def $r3 killed $r3 killed $x3
@@ -1266,13 +1517,13 @@ define i32 @testLong3() {
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -64(r1)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 80(r1)
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C8 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C9 at u(r2)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C8 at l(r4)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C9 at l(r4)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r3, r3, r4
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, 0(r3)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    # kill: def $r3 killed $r3 killed $x3
@@ -1385,7 +1636,7 @@ define void @testFloat1(float noundef %x) {
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    vspltisw v3, 8
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    xvcvsxwdp vs0, vs34
-; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C10(r2) # target-flags(ppc-tlsld) @tlv_float
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C11(r2) # target-flags(ppc-tlsld) @tlv_float
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfsx f1, r3, r4
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    fadds f0, f1, f0
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    xvcvsxwdp vs1, vs35
@@ -1402,13 +1653,13 @@ define void @testFloat1(float noundef %x) {
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    vspltisw v2, 1
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C10 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C11 at u(r2)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    vspltisw v3, 8
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    xvcvsxwdp vs0, vs34
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C10 at l(r6)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C11 at l(r6)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfsx f1, r3, r4
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    fadds f0, f1, f0
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    xvcvsxwdp vs1, vs35
@@ -1426,13 +1677,13 @@ define void @testFloat1(float noundef %x) {
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 64(r1)
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
-; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C10(r2) # target-flags(ppc-tlsld) @tlv_float
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C11(r2) # target-flags(ppc-tlsld) @tlv_float
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r3, r3, r4
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lfs f0, 0(r3)
-; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C11(r2) # %const.1
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C12(r2) # %const.1
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lfs f1, 0(r4)
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    fadds f0, f0, f1
-; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C12(r2) # %const.0
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C13(r2) # %const.0
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lfs f1, 0(r4)
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    fadds f0, f0, f1
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stfs f0, 0(r3)
@@ -1446,23 +1697,23 @@ define void @testFloat1(float noundef %x) {
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -64(r1)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 80(r1)
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C10 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C11 at u(r2)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C10 at l(r4)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C11 at l(r4)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r3, r3, r4
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lfs f0, 0(r3)
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r4, L..C11 at u(r2)
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C11 at l(r4)
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lfs f1, 0(r4)
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    fadds f0, f0, f1
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r4, L..C12 at u(r2)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C12 at l(r4)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lfs f1, 0(r4)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    fadds f0, f0, f1
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r4, L..C13 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C13 at l(r4)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lfs f1, 0(r4)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    fadds f0, f0, f1
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stfs f0, 0(r3)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 64
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r0, 16(r1)
@@ -1557,7 +1808,7 @@ define i32 @testFloat2() {
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 80(r1)
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C10(r2) # target-flags(ppc-tlsld) @tlv_float
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C11(r2) # target-flags(ppc-tlsld) @tlv_float
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfsx f0, r3, r4
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r3, r1, 60
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    xscvdpsxws f0, f0
@@ -1573,11 +1824,11 @@ define i32 @testFloat2() {
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -64(r1)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C10 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C11 at u(r2)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 80(r1)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C10 at l(r6)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C11 at l(r6)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfsx f0, r3, r4
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r3, r1, 60
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    xscvdpsxws f0, f0
@@ -1595,7 +1846,7 @@ define i32 @testFloat2() {
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 80(r1)
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
-; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C10(r2) # target-flags(ppc-tlsld) @tlv_float
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C11(r2) # target-flags(ppc-tlsld) @tlv_float
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r3, r3, r4
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lfs f0, 0(r3)
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    fctiwz f0, f0
@@ -1612,13 +1863,13 @@ define i32 @testFloat2() {
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -64(r1)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 80(r1)
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C10 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C11 at u(r2)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 48(r1) # 8-byte Folded Spill
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, 48(r1) # 8-byte Folded Reload
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C10 at l(r4)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C11 at l(r4)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r3, r3, r4
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lfs f0, 0(r3)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    fctiwz f0, f0
@@ -1700,7 +1951,7 @@ define void @testDouble1(double noundef %x) {
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C11(r2) # target-flags(ppc-tlsld) @internal_tlv_double
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C12(r2) # target-flags(ppc-tlsld) @internal_tlv_double
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stfdx f1, r3, r4
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
@@ -1712,11 +1963,11 @@ define void @testDouble1(double noundef %x) {
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C11 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C12 at u(r2)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C11 at l(r6)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C12 at l(r6)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    stfdx f1, r3, r4
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
@@ -1730,7 +1981,7 @@ define void @testDouble1(double noundef %x) {
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 64(r1)
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
-; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C13(r2) # target-flags(ppc-tlsld) @internal_tlv_double
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C14(r2) # target-flags(ppc-tlsld) @internal_tlv_double
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    add r3, r3, r4
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    stxsdx f1, 0, r3
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r1, r1, 48
@@ -1743,13 +1994,13 @@ define void @testDouble1(double noundef %x) {
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -64(r1)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 80(r1)
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C13 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C14 at u(r2)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C13 at l(r4)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C14 at l(r4)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    add r3, r3, r4
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stxsdx f1, 0, r3
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r1, r1, 64
@@ -1846,7 +2097,7 @@ define i32 @testDouble2() {
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 80(r1)
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C11(r2) # target-flags(ppc-tlsld) @internal_tlv_double
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C12(r2) # target-flags(ppc-tlsld) @internal_tlv_double
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    lfdx f0, r3, r4
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r3, r1, 60
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-NEXT:    xscvdpsxws f0, f0
@@ -1862,11 +2113,11 @@ define i32 @testDouble2() {
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    mflr r0
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -64(r1)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C11 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C12 at u(r2)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 80(r1)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C11 at l(r6)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C12 at l(r6)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    lfdx f0, r3, r4
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r3, r1, 60
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-NEXT:    xscvdpsxws f0, f0
@@ -1884,7 +2135,7 @@ define i32 @testDouble2() {
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    std r0, 80(r1)
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    bla .__tls_get_mod[PR]
-; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C13(r2) # target-flags(ppc-tlsld) @internal_tlv_double
+; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    ld r4, L..C14(r2) # target-flags(ppc-tlsld) @internal_tlv_double
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    lfdx f0, r3, r4
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    xscvdpsxws f0, f0
 ; DEFAULT-LOCAL-DYNAMIC-SMALLCM64-O0-NEXT:    addi r3, r1, 52
@@ -1901,13 +2152,13 @@ define i32 @testDouble2() {
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    mflr r0
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    stdu r1, -80(r1)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r0, 96(r1)
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C13 at u(r2)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C14 at u(r2)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    std r3, 56(r1) # 8-byte Folded Spill
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addis r3, L..C1 at u(r2)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r3, L..C1 at l(r3)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    bla .__tls_get_mod[PR]
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, 56(r1) # 8-byte Folded Reload
-; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C13 at l(r4)
+; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    ld r4, L..C14 at l(r4)
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    lfdx f0, r3, r4
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    xscvdpsxws f0, f0
 ; DEFAULT-LOCAL-DYNAMIC-LARGECM64-O0-NEXT:    addi r3, r1, 68

>From a4662870386607ce6cf197ad46ab5b0610999fe9 Mon Sep 17 00:00:00 2001
From: "Felix (Ting Wang)" <wangting at gmail.com>
Date: Tue, 9 Apr 2024 09:42:44 +0800
Subject: [PATCH 3/5] Update llvm/lib/Target/PowerPC/PPC.td

Co-authored-by: Amy Kwan <amy.kwan1 at ibm.com>
---
 llvm/lib/Target/PowerPC/PPC.td | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td
index 12d6b868f28545..8ebd363828b1c3 100644
--- a/llvm/lib/Target/PowerPC/PPC.td
+++ b/llvm/lib/Target/PowerPC/PPC.td
@@ -331,7 +331,7 @@ def FeatureAIXLocalExecTLS :
 
 // Specifies that local-dynamic TLS accesses in any function with this target
 // attribute should use the optimized sequence (where the offset is an immediate
-// off module-hlandle for which the linker might add fix-up code if the
+// off the module-handle for which the linker might add fix-up code for if the
 // immediate is too large).
 def FeatureAIXLocalDynamicTLS :
   SubtargetFeature<"aix-small-local-dynamic-tls", "HasAIXSmallLocalDynamicTLS",

>From a1fae3c4ba826ad6b611fcc7064430b6d6d5bbc8 Mon Sep 17 00:00:00 2001
From: "Felix (Ting Wang)" <wangting at gmail.com>
Date: Tue, 9 Apr 2024 09:42:59 +0800
Subject: [PATCH 4/5] Update llvm/lib/Target/PowerPC/PPC.td

Co-authored-by: Amy Kwan <amy.kwan1 at ibm.com>
---
 llvm/lib/Target/PowerPC/PPC.td | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td
index 8ebd363828b1c3..b962ed28d72000 100644
--- a/llvm/lib/Target/PowerPC/PPC.td
+++ b/llvm/lib/Target/PowerPC/PPC.td
@@ -336,7 +336,7 @@ def FeatureAIXLocalExecTLS :
 def FeatureAIXLocalDynamicTLS :
   SubtargetFeature<"aix-small-local-dynamic-tls", "HasAIXSmallLocalDynamicTLS",
                    "true", "Produce a faster local-dynamic TLS sequence for this "
-                   " function for 64-bit AIX">;
+                   "function for 64-bit AIX">;
 
 def FeaturePredictableSelectIsExpensive :
   SubtargetFeature<"predictable-select-expensive",

>From b53f76bb8c1d3de09a1c1e4b46d4793f67704b34 Mon Sep 17 00:00:00 2001
From: "Felix (Ting Wang)" <wangting at gmail.com>
Date: Tue, 9 Apr 2024 09:43:11 +0800
Subject: [PATCH 5/5] Update llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp

Co-authored-by: Amy Kwan <amy.kwan1 at ibm.com>
---
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 7716aa4dc70f5f..1c57b92057fff5 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -1673,7 +1673,7 @@ const MCExpr *PPCAsmPrinter::getAdjustedLocalExecExpr(const MachineOperand &MO,
 
   assert(MO.isGlobal() && "Only expecting a global MachineOperand here!");
   const GlobalValue *GValue = MO.getGlobal();
-  // TODO: handle aix-small-local-dynamic-tls none-zero offset case.
+  // TODO: Handle the aix-small-local-dynamic-tls non-zero offset case.
   TLSModel::Model Model = TM.getTLSModel(GValue);
   if (Model == TLSModel::LocalDynamic) {
     return nullptr;



More information about the llvm-commits mailing list