[llvm] [PowerPC] Support local-dynamic TLS relocation on AIX (PR #66316)

Ting Wang via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 20 00:44:44 PDT 2023


https://github.com/orcguru updated https://github.com/llvm/llvm-project/pull/66316

>From a73427dff8675679b29277cc0d4be6568cdb3846 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Wed, 13 Sep 2023 22:21:11 -0400
Subject: [PATCH 1/4] [PowerPC] Support local-dynamic TLS relocation on AIX

---
 llvm/include/llvm/MC/MCExpr.h                 |   2 +
 llvm/lib/MC/MCExpr.cpp                        |   4 +
 .../PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp  |   9 +-
 .../MCTargetDesc/PPCXCOFFObjectWriter.cpp     |   4 +
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp     |  90 ++++-
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   |  30 +-
 llvm/lib/Target/PowerPC/PPCISelLowering.h     |  14 +-
 llvm/lib/Target/PowerPC/PPCInstr64Bit.td      |  13 +-
 llvm/lib/Target/PowerPC/PPCInstrInfo.td       |  15 +-
 llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp |  73 +++-
 .../test/CodeGen/PowerPC/aix-tls-gd-double.ll |  90 +++--
 llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll   |  96 +++--
 .../CodeGen/PowerPC/aix-tls-gd-longlong.ll    | 298 +++++++-------
 .../CodeGen/PowerPC/aix-tls-local-dynamic.ll  | 364 ++++++++++++++++++
 .../PowerPC/aix-tls-xcoff-reloc-large.ll      | 286 +++++++-------
 .../CodeGen/PowerPC/aix-tls-xcoff-reloc.ll    | 288 +++++++-------
 16 files changed, 1149 insertions(+), 527 deletions(-)
 create mode 100644 llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll

diff --git a/llvm/include/llvm/MC/MCExpr.h b/llvm/include/llvm/MC/MCExpr.h
index 67836292874f5f7..f3bc0491fd2f193 100644
--- a/llvm/include/llvm/MC/MCExpr.h
+++ b/llvm/include/llvm/MC/MCExpr.h
@@ -301,6 +301,8 @@ class MCSymbolRefExpr : public MCExpr {
     VK_PPC_AIX_TLSGDM,      // symbol at m
     VK_PPC_AIX_TLSIE,       // symbol at ie
     VK_PPC_AIX_TLSLE,       // symbol at le
+    VK_PPC_AIX_TLSLD,       // symbol at ld
+    VK_PPC_AIX_TLSML,       // symbol at ml
     VK_PPC_GOT_TLSLD,       // symbol at got@tlsld
     VK_PPC_GOT_TLSLD_LO,    // symbol at got@tlsld at l
     VK_PPC_GOT_TLSLD_HI,    // symbol at got@tlsld at h
diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp
index 73e6569f96e4630..bc1bb9b80630546 100644
--- a/llvm/lib/MC/MCExpr.cpp
+++ b/llvm/lib/MC/MCExpr.cpp
@@ -331,6 +331,10 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
     return "ie";
   case VK_PPC_AIX_TLSLE:
     return "le";
+  case VK_PPC_AIX_TLSLD:
+    return "ld";
+  case VK_PPC_AIX_TLSML:
+    return "ml";
   case VK_PPC_GOT_TLSLD: return "got at tlsld";
   case VK_PPC_GOT_TLSLD_LO: return "got at tlsld@l";
   case VK_PPC_GOT_TLSLD_HI: return "got at tlsld@h";
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index a804dd823daa4da..22cd2fc03ef7c39 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -231,12 +231,15 @@ class PPCTargetAsmStreamer : public PPCTargetStreamer {
       MCSymbolXCOFF *TCSym =
           cast<MCSectionXCOFF>(Streamer.getCurrentSectionOnly())
               ->getQualNameSymbol();
-      // On AIX, we have a region handle (symbol at m) and the variable offset
-      // (symbol@{gd|ie|le}) for TLS variables, depending on the TLS model.
+      // On AIX, we have a region handle (symbol at m), module handle
+      // (__TLSML[TC]@ml) and the variable offset (symbol@{gd|ie|le|ld}) for TLS
+      // variables, depending on the TLS model.
       if (Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGD ||
           Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGDM ||
           Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSIE ||
-          Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLE)
+          Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLE ||
+          Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLD ||
+          Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSML)
         OS << "\t.tc " << TCSym->getName() << "," << XSym->getName() << "@"
            << MCSymbolRefExpr::getVariantKindName(Kind) << '\n';
       else
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
index 065daf42fe6eb0c..f4998e9b9dcba86 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
@@ -116,6 +116,10 @@ std::pair<uint8_t, uint8_t> PPCXCOFFObjectWriter::getRelocTypeAndSignSize(
       return {XCOFF::RelocationType::R_TLS_IE, SignAndSizeForFKData};
     case MCSymbolRefExpr::VK_PPC_AIX_TLSLE:
       return {XCOFF::RelocationType::R_TLS_LE, SignAndSizeForFKData};
+    case MCSymbolRefExpr::VK_PPC_AIX_TLSLD:
+      return {XCOFF::RelocationType::R_TLS_LD, SignAndSizeForFKData};
+    case MCSymbolRefExpr::VK_PPC_AIX_TLSML:
+      return {XCOFF::RelocationType::R_TLSML, SignAndSizeForFKData};
     case MCSymbolRefExpr::VK_None:
       return {XCOFF::RelocationType::R_POS, SignAndSizeForFKData};
     }
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 4b97e3e1a09152f..1c34dc3fefceaba 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -613,12 +613,23 @@ void PPCAsmPrinter::LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI) {
     EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::NOP));
 }
 
-/// This helper function creates the TlsGetAddr MCSymbol for AIX. We will
-/// create the csect and use the qual-name symbol instead of creating just the
-/// external symbol.
+/// This helper function creates the TlsGetAddr/TlsGetMod MCSymbol for AIX. We
+/// will create the csect and use the qual-name symbol instead of creating just
+/// the external symbol.
 static MCSymbol *createMCSymbolForTlsGetAddr(MCContext &Ctx, unsigned MIOpc) {
-  StringRef SymName =
-      MIOpc == PPC::GETtlsTpointer32AIX ? ".__get_tpointer" : ".__tls_get_addr";
+  StringRef SymName;
+  switch (MIOpc) {
+  default:
+    SymName = ".__tls_get_addr";
+    break;
+  case PPC::GETtlsTpointer32AIX:
+    SymName = ".__get_tpointer";
+    break;
+  case PPC::GETtlsMOD32AIX:
+  case PPC::GETtlsMOD64AIX:
+    SymName = ".__tls_get_mod";
+    break;
+  }
   return Ctx
       .getXCOFFSection(SymName, SectionKind::getText(),
                        XCOFF::CsectProperties(XCOFF::XMC_PR, XCOFF::XTY_ER))
@@ -660,14 +671,15 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
          "GETtls[ld]ADDR[32] must read GPR3");
 
   if (Subtarget->isAIXABI()) {
-    // On AIX, the variable offset should already be in R4 and the region handle
-    // should already be in R3.
-    // For TLSGD, which currently is the only supported access model, we only
-    // need to generate an absolute branch to .__tls_get_addr.
+    // On AIX, for TLSGD the variable offset should already be in R4 and the
+    // region handle should already be in R3, need to generate an absolute
+    // branch to .__tls_get_addr. For TLSLD the module handle should already be
+    // in R3, need to generate branch to .__tls_get_mod.
     Register VarOffsetReg = Subtarget->isPPC64() ? PPC::X4 : PPC::R4;
     (void)VarOffsetReg;
-    assert(MI->getOperand(2).isReg() &&
-           MI->getOperand(2).getReg() == VarOffsetReg &&
+    assert((MI->getNumExplicitOperands() < 3 ||
+            (MI->getOperand(2).isReg() &&
+             MI->getOperand(2).getReg() == VarOffsetReg)) &&
            "GETtls[ld]ADDR[32] must read GPR4");
     EmitAIXTlsCallHelper(MI);
     return;
@@ -710,6 +722,8 @@ static MCSymbol *getMCSymbolForTOCPseudoMO(const MachineOperand &MO,
     return AP.GetJTISymbol(MO.getIndex());
   case MachineOperand::MO_BlockAddress:
     return AP.GetBlockAddressSymbol(MO.getBlockAddress());
+  case MachineOperand::MO_ExternalSymbol:
+    return AP.OutContext.getOrCreateSymbol(MO.getSymbolName());
   default:
     llvm_unreachable("Unexpected operand type to get symbol.");
   }
@@ -757,6 +771,16 @@ getTOCEntryTypeForMO(const MachineOperand &MO) {
     llvm_unreachable("Unexpected operand type to get TOC type.");
   }
 }
+
+// On AIX, TLS-local-dynamic requires that symbol for the module handle must
+// have the name "_$TLSML". This symbol is used as one TOC symbol reference
+// itself with ML relocation type, thus it has "[TC]" attached to its name.
+static inline bool isSpecialAIXSymbolTLSML(const MachineOperand &MO,
+                                           const bool IsAIX) {
+  return IsAIX && MO.isSymbol() &&
+         (std::strcmp(MO.getSymbolName(), "_$TLSML[TC]") == 0);
+}
+
 /// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to
 /// the current output stream.
 ///
@@ -846,6 +870,15 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
       return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGDM;
     if (MO.getTargetFlags() & PPCII::MO_TLSGD_FLAG)
       return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGD;
+    if (MO.getTargetFlags() & PPCII::MO_TLSLD_FLAG) {
+      if (isSpecialAIXSymbolTLSML(MO, IsAIX))
+        // FIXME: On AIX the ML relocation type is only valid for a reference to
+        // a TOC symbol from the symbol itself, and right now its only user is
+        // symbol "_$TLSML". Use symbol name to decide that R_TLSML is expected.
+        return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSML;
+      if (IsAIX)
+        return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLD;
+    }
     return MCSymbolRefExpr::VariantKind::VK_None;
   };
 
@@ -964,7 +997,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     TmpInst.setOpcode(PPC::LWZ);
 
     const MachineOperand &MO = MI->getOperand(1);
-    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress() ||
+            isSpecialAIXSymbolTLSML(MO, IsAIX)) &&
            "Invalid operand for LWZtoc.");
 
     // Map the operand to its corresponding MCSymbol.
@@ -1053,7 +1087,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     TmpInst.setOpcode(PPC::LD);
 
     const MachineOperand &MO = MI->getOperand(1);
-    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress() ||
+            isSpecialAIXSymbolTLSML(MO, IsAIX)) &&
            "Invalid operand!");
 
     // Map the operand to its corresponding MCSymbol.
@@ -1091,7 +1126,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     TmpInst.setOpcode(PPC::ADDIS);
 
     const MachineOperand &MO = MI->getOperand(2);
-    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress() ||
+            isSpecialAIXSymbolTLSML(MO, IsAIX)) &&
            "Invalid operand for ADDIStocHA.");
 
     // Map the machine operand to its corresponding MCSymbol.
@@ -1124,7 +1160,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     TmpInst.setOpcode(PPC::LWZ);
 
     const MachineOperand &MO = MI->getOperand(1);
-    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress() ||
+            isSpecialAIXSymbolTLSML(MO, IsAIX)) &&
            "Invalid operand for LWZtocL.");
 
     // Map the machine operand to its corresponding MCSymbol.
@@ -1156,7 +1193,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     TmpInst.setOpcode(PPC::ADDIS8);
 
     const MachineOperand &MO = MI->getOperand(2);
-    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress() ||
+            isSpecialAIXSymbolTLSML(MO, IsAIX)) &&
            "Invalid operand for ADDIStocHA8!");
 
     const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
@@ -1166,7 +1204,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     const bool GlobalToc =
         MO.isGlobal() && Subtarget->isGVIndirectSymbol(MO.getGlobal());
     if (GlobalToc || MO.isJTI() || MO.isBlockAddress() ||
-        (MO.isCPI() && TM.getCodeModel() == CodeModel::Large))
+        (MO.isCPI() && TM.getCodeModel() == CodeModel::Large) ||
+        isSpecialAIXSymbolTLSML(MO, IsAIX))
       MOSymbol = lookUpOrCreateTOCEntry(MOSymbol, getTOCEntryTypeForMO(MO), VK);
 
     VK = IsAIX ? MCSymbolRefExpr::VK_PPC_U : MCSymbolRefExpr::VK_PPC_TOC_HA;
@@ -1195,8 +1234,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     TmpInst.setOpcode(PPC::LD);
 
     const MachineOperand &MO = MI->getOperand(1);
-    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() ||
-            MO.isBlockAddress()) &&
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress() ||
+            isSpecialAIXSymbolTLSML(MO, IsAIX)) &&
            "Invalid operand for LDtocL!");
 
     LLVM_DEBUG(assert(
@@ -1362,6 +1401,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
   case PPC::GETtlsADDRPCREL:
   case PPC::GETtlsADDR32AIX:
   case PPC::GETtlsADDR64AIX:
+  case PPC::GETtlsMOD32AIX:
+  case PPC::GETtlsMOD64AIX:
     // Transform: %r3 = GETtlsADDRNNAIX %r3, %r4 (for NN == 32/64).
     // Into: BLA .__tls_get_addr()
     // Unlike on Linux, there is no symbol or relocation needed for this call.
@@ -2710,6 +2751,15 @@ void PPCAIXAsmPrinter::emitEndOfAsmFile(Module &M) {
       MCSymbol *S = OutContext.getOrCreateSymbol(Name);
       TCEntry = cast<MCSectionXCOFF>(
           getObjFileLowering().getSectionForTOCEntry(S, TM));
+    } else if (I.first.second ==
+               MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSML) {
+      // AIX assembler expects TC storage-mapping class for the "_$TLSML"
+      // symbol.
+      MCSection *MCSect = getObjFileLowering().getContext().getXCOFFSection(
+          cast<MCSymbolXCOFF>(I.first.first)->getSymbolTableName(),
+          SectionKind::getData(),
+          XCOFF::CsectProperties(XCOFF::XMC_TC, XCOFF::XTY_SD));
+      TCEntry = cast<MCSectionXCOFF>(MCSect);
     } else {
       TCEntry = cast<MCSectionXCOFF>(
           getObjFileLowering().getSectionForTOCEntry(I.first.first, TM));
@@ -2826,6 +2876,8 @@ void PPCAIXAsmPrinter::emitInstruction(const MachineInstr *MI) {
 		 MMI->hasDebugInfo());
     break;
   }
+  case PPC::GETtlsMOD32AIX:
+  case PPC::GETtlsMOD64AIX:
   case PPC::GETtlsTpointer32AIX:
   case PPC::GETtlsADDR64AIX:
   case PPC::GETtlsADDR32AIX: {
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 95f2243178c8a10..9297e84cedd7375 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1771,9 +1771,13 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
   case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
   case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
+  case PPCISD::GET_TLS_MOD:
+    return "PPCISD::GET_TLS_MOD";
   case PPCISD::GET_TPOINTER:    return "PPCISD::GET_TPOINTER";
   case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
   case PPCISD::TLSGD_AIX:       return "PPCISD::TLSGD_AIX";
+  case PPCISD::TLSLD_AIX:
+    return "PPCISD::TLSLD_AIX";
   case PPCISD::ADDIS_TLSLD_HA:  return "PPCISD::ADDIS_TLSLD_HA";
   case PPCISD::ADDI_TLSLD_L:    return "PPCISD::ADDI_TLSLD_L";
   case PPCISD::GET_TLSLD_ADDR:  return "PPCISD::GET_TLSLD_ADDR";
@@ -3412,13 +3416,25 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
     return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset);
   }
 
-  // Only Local-Exec, Initial-Exec and General-Dynamic TLS models are currently
-  // supported models. If Local- or Initial-exec are not possible or specified,
-  // all GlobalTLSAddress nodes are lowered using the general-dynamic model.
-  // We need to generate two TOC entries, one for the variable offset, one for
-  // the region handle. The global address for the TOC entry of the region
-  // handle is created with the MO_TLSGDM_FLAG flag and the global address
-  // for the TOC entry of the variable offset is created with MO_TLSGD_FLAG.
+  if (Model == TLSModel::LocalDynamic) {
+    // For local-dynamic on AIX, we need to generate two TOC entries, one for
+    // the variable offset, the other for the module handle. The module handle
+    // is encapsulated inside the TLSLD_AIX pseudo node, and will be expanded by
+    // PPCTLSDynamicCall.
+    SDValue VariableOffsetTGA =
+        DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
+    SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
+    return DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, VariableOffset);
+  }
+
+  // The Local-Exec, Initial-Exec, Local-Dynamic, and General-Dynamic TLS models
+  // are currently supported access models. If Local- or Initial-exec or
+  // local-dynamic is not possible or specified, all GlobalTLSAddress nodes are
+  // lowered using the general-dynamic model. We need to generate two TOC
+  // entries, one for the variable offset, one for the region handle. The global
+  // address for the TOC entry of the region handle is created with the
+  // MO_TLSGDM_FLAG flag and the global address for the TOC entry of the
+  // variable offset is created with MO_TLSGD_FLAG.
   SDValue VariableOffsetTGA =
       DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGD_FLAG);
   SDValue RegionHandleTGA =
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 7c62e370f1536a4..cffe5c73fcd6d01 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -370,11 +370,23 @@ namespace llvm {
     /// G8RC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY
     /// Op that combines two register copies of TOC entries
     /// (region handle into R3 and variable offset into R4) followed by a
-    /// GET_TLS_ADDR node which will be expanded to a call to __get_tls_addr.
+    /// GET_TLS_ADDR node which will be expanded to a call to __tls_get_addr.
     /// This node is used in 64-bit mode as well (in which case the result is
     /// G8RC and inputs are X3/X4).
     TLSGD_AIX,
 
+    /// %x3 = GET_TLS_MOD _$TLSML - For the AIX local-dynamic TLS model,
+    /// produces a call to __tls_get_mod(_$TLSML\@ml).
+    GET_TLS_MOD,
+
+    /// [GP|G8]RC = TLSLD_AIX, TOC_ENTRY(variable offset)
+    /// Op that internally creates TOC entry for the "_$TLSML" symbol, generates
+    /// GET_TLS_MOD node which will be expanded into a call to __tls_get_mod,
+    /// and then add the variable offset with the result from the call.
+    /// This node is used in both 32-bit and 64-bit modes. The only difference
+    /// is register class.
+    TLSLD_AIX,
+
     /// G8RC = ADDIS_TLSLD_HA %x2, Symbol - For the local-dynamic TLS
     /// model, produces an ADDIS8 instruction that adds the GOT base
     /// register to sym\@got\@tlsld\@ha.
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index fd436d422298355..51e1cd544452adf 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -1584,12 +1584,19 @@ def GETtlsldADDRPCREL : GETtlsldADDRPseudo <"#GETtlsldADDRPCREL">;
 // so we don't need to mark it with a size of 8 bytes. Finally, the assembly
 // manual mentions this exact set of registers as the clobbered set, others
 // are guaranteed not to be clobbered.
-let Defs = [X0,X4,X5,X11,LR8,CR0] in
+let Defs = [X0,X4,X5,X11,LR8,CR0] in {
 def GETtlsADDR64AIX :
   PPCEmitTimePseudo<(outs g8rc:$rD),(ins g8rc:$offset, g8rc:$handle),
                     "GETtlsADDR64AIX",
                     [(set i64:$rD,
                       (PPCgetTlsAddr i64:$offset, i64:$handle))]>, isPPC64;
+// On AIX, the call to __tls_get_mod need one input in X3 for the module handle.
+def GETtlsMOD64AIX :
+  PPCEmitTimePseudo<(outs g8rc:$rD),(ins g8rc:$handle),
+                    "GETtlsMOD64AIX",
+                    [(set i64:$rD,
+                      (PPCgetTlsMod i64:$handle))]>, isPPC64;
+}
 }
 
 // Combined op for ADDItlsgdL and GETtlsADDR, late expanded.  X3 and LR8
@@ -1622,6 +1629,10 @@ def TLSGDAIX8 :
                      "#TLSGDAIX8",
                      [(set i64:$rD,
                        (PPCTlsgdAIX i64:$offset, i64:$handle))]>;
+// This pseudo is expanded to one copy to put the module handle in R3, then call
+// GETtlsMOD64AIX, and then add variable offset to the output from the call.
+def TLSLDAIX8 : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc:$handle),
+                     "#TLSLDAIX8", [(set i64:$rD, (PPCTlsldAIX i64:$handle))]>;
 // Combined op for ADDItlsldL and GETtlsADDR, late expanded.  X3 and LR8
 // are true defines, while the rest of the Defs are clobbers.
 let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index e1c513dc52dcee8..a0b37a484de976b 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -213,12 +213,14 @@ def PPCaddTls     : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>;
 def PPCaddisTlsgdHA : SDNode<"PPCISD::ADDIS_TLSGD_HA", SDTIntBinOp>;
 def PPCaddiTlsgdL   : SDNode<"PPCISD::ADDI_TLSGD_L", SDTIntBinOp>;
 def PPCgetTlsAddr   : SDNode<"PPCISD::GET_TLS_ADDR", SDTIntBinOp>;
+def PPCgetTlsMod   : SDNode<"PPCISD::GET_TLS_MOD", SDTIntUnaryOp>;
 def PPCgetTpointer : SDNode<"PPCISD::GET_TPOINTER", SDTIntLeaf, []>;
 def PPCaddiTlsgdLAddr : SDNode<"PPCISD::ADDI_TLSGD_L_ADDR",
                                SDTypeProfile<1, 3, [
                                  SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
                                  SDTCisSameAs<0, 3>, SDTCisInt<0> ]>>;
 def PPCTlsgdAIX     : SDNode<"PPCISD::TLSGD_AIX", SDTIntBinOp>;
+def PPCTlsldAIX     : SDNode<"PPCISD::TLSLD_AIX", SDTIntUnaryOp>;
 def PPCaddisTlsldHA : SDNode<"PPCISD::ADDIS_TLSLD_HA", SDTIntBinOp>;
 def PPCaddiTlsldL   : SDNode<"PPCISD::ADDI_TLSLD_L", SDTIntBinOp>;
 def PPCgetTlsldAddr : SDNode<"PPCISD::GET_TLSLD_ADDR", SDTIntBinOp>;
@@ -3237,11 +3239,16 @@ def GETtlsADDR32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$s
 // The rest of the Defs are the exact set of registers that will be clobbered by
 // the call.
 let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
-    Defs = [R0,R4,R5,R11,LR,CR0] in
+    Defs = [R0,R4,R5,R11,LR,CR0] in {
 def GETtlsADDR32AIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$offset, gprc:$handle),
                           "GETtlsADDR32AIX",
                           [(set i32:$rD,
                             (PPCgetTlsAddr i32:$offset, i32:$handle))]>;
+def GETtlsMOD32AIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$handle),
+                          "GETtlsMOD32AIX",
+                          [(set i32:$rD,
+                            (PPCgetTlsMod i32:$handle))]>;
+}
 
 // For local-exec accesses on 32-bit AIX, a call to .__get_tpointer is
 // generated to retrieve the thread pointer. GETtlsTpointer32AIX clobbers both
@@ -3281,6 +3288,12 @@ def TLSGDAIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$offset, gprc:$handl
                           "#TLSGDAIX",
                           [(set i32:$rD,
                             (PPCTlsgdAIX i32:$offset, i32:$handle))]>;
+// This pseudo is expanded to one copy to put the module handle in R3, then call
+// GETtlsMOD32AIX, and then add variable offset to the output from the call.
+def TLSLDAIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$handle),
+                          "#TLSLDAIX",
+                          [(set i32:$rD,
+                            (PPCTlsldAIX i32:$handle))]>;
 // LR is a true define, while the rest of the Defs are clobbers.  R3 is
 // explicitly defined when this op is created, so not mentioned here.
 let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 9518d5347065cce..32a75a32b9d87f6 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -51,6 +51,11 @@ namespace {
       bool Is64Bit = MBB.getParent()->getSubtarget<PPCSubtarget>().isPPC64();
       bool IsAIX = MBB.getParent()->getSubtarget<PPCSubtarget>().isAIXABI();
       bool IsPCREL = false;
+      MachineFunction *MF = MBB.getParent();
+      MachineRegisterInfo &RegInfo = MF->getRegInfo();
+      const TargetRegisterClass *GPRNoZero =
+          Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
+                  : &PPC::GPRC_and_GPRC_NOR0RegClass;
 
       for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
            I != IE;) {
@@ -64,6 +69,8 @@ namespace {
             MI.getOpcode() != PPC::ADDItlsldLADDR &&
             MI.getOpcode() != PPC::ADDItlsgdLADDR32 &&
             MI.getOpcode() != PPC::ADDItlsldLADDR32 &&
+            MI.getOpcode() != PPC::TLSLDAIX &&
+            MI.getOpcode() != PPC::TLSLDAIX8 &&
             MI.getOpcode() != PPC::TLSGDAIX &&
             MI.getOpcode() != PPC::TLSGDAIX8 && !IsTLSTPRelMI && !IsPCREL) {
           // Although we create ADJCALLSTACKDOWN and ADJCALLSTACKUP
@@ -109,6 +116,12 @@ namespace {
           Opc1 = PPC::ADDItlsldL32;
           Opc2 = PPC::GETtlsldADDR32;
           break;
+        case PPC::TLSLDAIX:
+          Opc2 = PPC::GETtlsMOD32AIX;
+          break;
+        case PPC::TLSLDAIX8:
+          Opc2 = PPC::GETtlsMOD64AIX;
+          break;
         case PPC::TLSGDAIX8:
           // TLSGDAIX8 is expanded to two copies and GET_TLS_ADDR, so we only
           // set Opc2 here.
@@ -145,19 +158,55 @@ namespace {
                                                               .addImm(0);
 
         if (IsAIX) {
-          // The variable offset and region handle are copied in r4 and r3. The
-          // copies are followed by GETtlsADDR32AIX/GETtlsADDR64AIX.
-          if (!IsTLSTPRelMI) {
-            BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR4)
-                .addReg(MI.getOperand(1).getReg());
+          if (MI.getOpcode() == PPC::TLSLDAIX8 ||
+              MI.getOpcode() == PPC::TLSLDAIX) {
+            // For Local-Dynamic
+            auto &Subtarget = MBB.getParent()->getSubtarget<PPCSubtarget>();
+            bool IsLargeModel =
+                Subtarget.getTargetMachine().getCodeModel() == CodeModel::Large;
+            Register ModuleHandleHReg;
+            unsigned LDTocOp =
+                Is64Bit ? (IsLargeModel ? PPC::LDtocL : PPC::LDtoc)
+                        : (IsLargeModel ? PPC::LWZtocL : PPC::LWZtoc);
+            if (IsLargeModel) {
+              ModuleHandleHReg = RegInfo.createVirtualRegister(GPRNoZero);
+              BuildMI(MBB, I, DL,
+                      TII->get(Is64Bit ? PPC::ADDIStocHA8 : PPC::ADDIStocHA),
+                      ModuleHandleHReg)
+                  .addReg(Subtarget.getTOCPointerRegister())
+                  .addExternalSymbol("_$TLSML[TC]", PPCII::MO_TLSLD_FLAG);
+            }
+            Register MHReg = RegInfo.createVirtualRegister(GPRNoZero);
+            BuildMI(MBB, I, DL, TII->get(LDTocOp), MHReg)
+                .addExternalSymbol("_$TLSML[TC]", PPCII::MO_TLSLD_FLAG)
+                .addReg(IsLargeModel
+                            ? ModuleHandleHReg
+                            : Register(Subtarget.getTOCPointerRegister()));
             BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR3)
-                .addReg(MI.getOperand(2).getReg());
-            BuildMI(MBB, I, DL, TII->get(Opc2), GPR3).addReg(GPR3).addReg(GPR4);
-          } else
-            // The opcode of GETtlsTpointer32AIX does not change, because later
-            // this instruction will be expanded into a call to .__get_tpointer,
-            // which will return the thread pointer into r3.
-            BuildMI(MBB, I, DL, TII->get(Opc2), GPR3);
+                .addReg(MHReg);
+            // The call to __tls_get_mod.
+            BuildMI(MBB, I, DL, TII->get(Opc2), GPR3).addReg(GPR3);
+            BuildMI(MBB, I, DL, TII->get(Is64Bit ? PPC::ADD8 : PPC::ADD4), GPR3)
+                .addReg(GPR3)
+                .addReg(MI.getOperand(1).getReg());
+          } else {
+            // For Global-Dynamic
+            // The variable offset and region handle are copied in r4 and r3.
+            // The copies are followed by GETtlsADDR32AIX/GETtlsADDR64AIX.
+            if (!IsTLSTPRelMI) {
+              BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR4)
+                  .addReg(MI.getOperand(1).getReg());
+              BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR3)
+                  .addReg(MI.getOperand(2).getReg());
+              BuildMI(MBB, I, DL, TII->get(Opc2), GPR3)
+                  .addReg(GPR3)
+                  .addReg(GPR4);
+            } else
+              // The opcode of GETtlsTpointer32AIX does not change, because
+              // later this instruction will be expanded into a call to
+              // .__get_tpointer, which will return the thread pointer into r3.
+              BuildMI(MBB, I, DL, TII->get(Opc2), GPR3);
+          }
         } else {
           MachineInstr *Addi;
           if (IsPCREL) {
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll
index 46ce3bfd450bd0d..aa24930a0caebc6 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll
@@ -156,10 +156,11 @@ define void @storesTIInit(double %Val) #0 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    lwz 3, L..C4(2) # target-flags(ppc-lo) @TIInit
-; SMALL32-NEXT:    lwz 4, L..C5(2) # target-flags(ppc-tlsgd) @TIInit
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    bla .__tls_get_addr[PR]
+; SMALL32-NEXT:    lwz 6, L..C4(2) # target-flags(ppc-tlsld) @TIInit
+; SMALL32-NEXT:    lwz 3, L..C5(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
+; SMALL32-NEXT:    bla .__tls_get_mod[PR]
+; SMALL32-NEXT:    add 3, 3, 6
 ; SMALL32-NEXT:    stfd 1, 0(3)
 ; SMALL32-NEXT:    addi 1, 1, 32
 ; SMALL32-NEXT:    lwz 0, 8(1)
@@ -172,10 +173,11 @@ define void @storesTIInit(double %Val) #0 {
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    addis 4, L..C5 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
-; LARGE32-NEXT:    lwz 4, L..C5 at l(4)
-; LARGE32-NEXT:    bla .__tls_get_addr[PR]
+; LARGE32-NEXT:    lwz 6, L..C4 at l(3)
+; LARGE32-NEXT:    addis 3, L..C5 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C5 at l(3)
+; LARGE32-NEXT:    bla .__tls_get_mod[PR]
+; LARGE32-NEXT:    add 3, 3, 6
 ; LARGE32-NEXT:    stfd 1, 0(3)
 ; LARGE32-NEXT:    addi 1, 1, 32
 ; LARGE32-NEXT:    lwz 0, 8(1)
@@ -186,10 +188,11 @@ define void @storesTIInit(double %Val) #0 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    ld 3, L..C4(2) # target-flags(ppc-lo) @TIInit
-; SMALL64-NEXT:    ld 4, L..C5(2) # target-flags(ppc-tlsgd) @TIInit
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    bla .__tls_get_addr[PR]
+; SMALL64-NEXT:    ld 6, L..C4(2) # target-flags(ppc-tlsld) @TIInit
+; SMALL64-NEXT:    ld 3, L..C5(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
+; SMALL64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL64-NEXT:    add 3, 3, 6
 ; SMALL64-NEXT:    stfd 1, 0(3)
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -201,11 +204,12 @@ define void @storesTIInit(double %Val) #0 {
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    addis 3, L..C4 at u(2)
-; LARGE64-NEXT:    addis 4, L..C5 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 3, L..C4 at l(3)
-; LARGE64-NEXT:    ld 4, L..C5 at l(4)
-; LARGE64-NEXT:    bla .__tls_get_addr[PR]
+; LARGE64-NEXT:    ld 6, L..C4 at l(3)
+; LARGE64-NEXT:    addis 3, L..C5 at u(2)
+; LARGE64-NEXT:    ld 3, L..C5 at l(3)
+; LARGE64-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-NEXT:    add 3, 3, 6
 ; LARGE64-NEXT:    stfd 1, 0(3)
 ; LARGE64-NEXT:    addi 1, 1, 48
 ; LARGE64-NEXT:    ld 0, 16(1)
@@ -452,11 +456,12 @@ define double @loadsTIInit() #1 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    lwz 3, L..C4(2) # target-flags(ppc-lo) @TIInit
-; SMALL32-NEXT:    lwz 4, L..C5(2) # target-flags(ppc-tlsgd) @TIInit
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    bla .__tls_get_addr[PR]
+; SMALL32-NEXT:    lwz 6, L..C4(2) # target-flags(ppc-tlsld) @TIInit
+; SMALL32-NEXT:    lwz 3, L..C5(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
+; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C8(2) # @GInit
+; SMALL32-NEXT:    add 3, 3, 6
 ; SMALL32-NEXT:    lfd 0, 0(3)
 ; SMALL32-NEXT:    lfd 1, 0(4)
 ; SMALL32-NEXT:    fadd 1, 0, 1
@@ -471,10 +476,11 @@ define double @loadsTIInit() #1 {
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    addis 4, L..C5 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
-; LARGE32-NEXT:    lwz 4, L..C5 at l(4)
-; LARGE32-NEXT:    bla .__tls_get_addr[PR]
+; LARGE32-NEXT:    lwz 6, L..C4 at l(3)
+; LARGE32-NEXT:    addis 3, L..C5 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C5 at l(3)
+; LARGE32-NEXT:    bla .__tls_get_mod[PR]
+; LARGE32-NEXT:    add 3, 3, 6
 ; LARGE32-NEXT:    lfd 0, 0(3)
 ; LARGE32-NEXT:    addis 3, L..C8 at u(2)
 ; LARGE32-NEXT:    lwz 3, L..C8 at l(3)
@@ -489,11 +495,12 @@ define double @loadsTIInit() #1 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    ld 3, L..C4(2) # target-flags(ppc-lo) @TIInit
-; SMALL64-NEXT:    ld 4, L..C5(2) # target-flags(ppc-tlsgd) @TIInit
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    bla .__tls_get_addr[PR]
+; SMALL64-NEXT:    ld 6, L..C4(2) # target-flags(ppc-tlsld) @TIInit
+; SMALL64-NEXT:    ld 3, L..C5(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
+; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C8(2) # @GInit
+; SMALL64-NEXT:    add 3, 3, 6
 ; SMALL64-NEXT:    lfd 0, 0(3)
 ; SMALL64-NEXT:    lfd 1, 0(4)
 ; SMALL64-NEXT:    fadd 1, 0, 1
@@ -507,11 +514,12 @@ define double @loadsTIInit() #1 {
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    addis 3, L..C4 at u(2)
-; LARGE64-NEXT:    addis 4, L..C5 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 3, L..C4 at l(3)
-; LARGE64-NEXT:    ld 4, L..C5 at l(4)
-; LARGE64-NEXT:    bla .__tls_get_addr[PR]
+; LARGE64-NEXT:    ld 6, L..C4 at l(3)
+; LARGE64-NEXT:    addis 3, L..C5 at u(2)
+; LARGE64-NEXT:    ld 3, L..C5 at l(3)
+; LARGE64-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-NEXT:    add 3, 3, 6
 ; LARGE64-NEXT:    addis 4, L..C8 at u(2)
 ; LARGE64-NEXT:    lfd 0, 0(3)
 ; LARGE64-NEXT:    ld 3, L..C8 at l(4)
@@ -610,12 +618,16 @@ entry:
   ret double %add
 }
 
-; External symbol reference checks for .__tls_get_addr
+; External symbol reference checks for .__tls_get_addr/.__tls_get_mod
 
 ; SMALL32: .extern .__tls_get_addr[PR]
+; SMALL32: .extern .__tls_get_mod[PR]
 ; SMALL64: .extern .__tls_get_addr[PR]
+; SMALL64: .extern .__tls_get_mod[PR]
 ; LARGE32: .extern .__tls_get_addr[PR]
+; LARGE32: .extern .__tls_get_mod[PR]
 ; LARGE64: .extern .__tls_get_addr[PR]
+; LARGE64: .extern .__tls_get_mod[PR]
 
 ; TOC entry checks
 
@@ -629,9 +641,10 @@ entry:
 ; SMALL32-LABEL:  L..C3:
 ; SMALL32-NEXT:   .tc TGInit[TC],TGInit[TL]@gd
 ; SMALL32-LABEL:  L..C4:
-; SMALL32-NEXT:   .tc .TIInit[TC],TIInit[TL]@m
+; SMALL32-NEXT:   .tc TIInit[TC],TIInit[TL]@ld
 ; SMALL32-LABEL:  L..C5:
-; SMALL32-NEXT:   .tc TIInit[TC],TIInit[TL]@gd
+; SMALL32-NEXT:   .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; SMALL32-NEXT:   .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
 ; SMALL32-LABEL:  L..C6:
 ; SMALL32-NEXT:   .tc .TWInit[TC],TWInit[TL]@m
 ; SMALL32-LABEL:  L..C7:
@@ -649,9 +662,10 @@ entry:
 ; LARGE32-LABEL:  L..C3:
 ; LARGE32-NEXT:   .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE32-LABEL:  L..C4:
-; LARGE32-NEXT:   .tc .TIInit[TE],TIInit[TL]@m
+; LARGE32-NEXT:   .tc TIInit[TE],TIInit[TL]@ld
 ; LARGE32-LABEL:  L..C5:
-; LARGE32-NEXT:   .tc TIInit[TE],TIInit[TL]@gd
+; LARGE32-NEXT:   .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; LARGE32-NEXT:   .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
 ; LARGE32-LABEL:  L..C6:
 ; LARGE32-NEXT:   .tc .TWInit[TE],TWInit[TL]@m
 ; LARGE32-LABEL:  L..C7:
@@ -669,9 +683,10 @@ entry:
 ; SMALL64-LABEL:  L..C3:
 ; SMALL64-NEXT:  .tc TGInit[TC],TGInit[TL]@gd
 ; SMALL64-LABEL:  L..C4:
-; SMALL64-NEXT:  .tc .TIInit[TC],TIInit[TL]@m
+; SMALL64-NEXT:  .tc TIInit[TC],TIInit[TL]@ld
 ; SMALL64-LABEL:  L..C5:
-; SMALL64-NEXT:  .tc TIInit[TC],TIInit[TL]@gd
+; SMALL64-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; SMALL64-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
 ; SMALL64-LABEL:  L..C6:
 ; SMALL64-NEXT:  .tc .TWInit[TC],TWInit[TL]@m
 ; SMALL64-LABEL:  L..C7:
@@ -689,9 +704,10 @@ entry:
 ; LARGE64-LABEL:  L..C3:
 ; LARGE64-NEXT:  .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE64-LABEL:  L..C4:
-; LARGE64-NEXT:  .tc .TIInit[TE],TIInit[TL]@m
+; LARGE64-NEXT:  .tc TIInit[TE],TIInit[TL]@ld
 ; LARGE64-LABEL:  L..C5:
-; LARGE64-NEXT:  .tc TIInit[TE],TIInit[TL]@gd
+; LARGE64-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; LARGE64-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
 ; LARGE64-LABEL:  L..C6:
 ; LARGE64-NEXT:  .tc .TWInit[TE],TWInit[TL]@m
 ; LARGE64-LABEL:  L..C7:
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll
index 887c4521a4c90ad..dc75db43d38c706 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll
@@ -163,11 +163,12 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    mr 6, 3
-; SMALL32-NEXT:    lwz 3, L..C4(2)
-; SMALL32-NEXT:    lwz 4, L..C5(2)
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    bla .__tls_get_addr[PR]
+; SMALL32-NEXT:    mr 6, 3
+; SMALL32-NEXT:    lwz 7, L..C4(2)
+; SMALL32-NEXT:    lwz 3, L..C5(2)
+; SMALL32-NEXT:    bla .__tls_get_mod[PR]
+; SMALL32-NEXT:    add 3, 3, 7
 ; SMALL32-NEXT:    stw 6, 0(3)
 ; SMALL32-NEXT:    addi 1, 1, 32
 ; SMALL32-NEXT:    lwz 0, 8(1)
@@ -181,10 +182,11 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    mr 6, 3
 ; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    addis 4, L..C5 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
-; LARGE32-NEXT:    lwz 4, L..C5 at l(4)
-; LARGE32-NEXT:    bla .__tls_get_addr[PR]
+; LARGE32-NEXT:    lwz 7, L..C4 at l(3)
+; LARGE32-NEXT:    addis 3, L..C5 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C5 at l(3)
+; LARGE32-NEXT:    bla .__tls_get_mod[PR]
+; LARGE32-NEXT:    add 3, 3, 7
 ; LARGE32-NEXT:    stw 6, 0(3)
 ; LARGE32-NEXT:    addi 1, 1, 32
 ; LARGE32-NEXT:    lwz 0, 8(1)
@@ -195,11 +197,12 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    mr 6, 3
-; SMALL64-NEXT:    ld 3, L..C4(2)
-; SMALL64-NEXT:    ld 4, L..C5(2)
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    bla .__tls_get_addr[PR]
+; SMALL64-NEXT:    mr 6, 3
+; SMALL64-NEXT:    ld 7, L..C4(2)
+; SMALL64-NEXT:    ld 3, L..C5(2)
+; SMALL64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL64-NEXT:    add 3, 3, 7
 ; SMALL64-NEXT:    stw 6, 0(3)
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -212,11 +215,12 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    mr 6, 3
 ; LARGE64-NEXT:    addis 3, L..C4 at u(2)
-; LARGE64-NEXT:    addis 4, L..C5 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 3, L..C4 at l(3)
-; LARGE64-NEXT:    ld 4, L..C5 at l(4)
-; LARGE64-NEXT:    bla .__tls_get_addr[PR]
+; LARGE64-NEXT:    ld 7, L..C4 at l(3)
+; LARGE64-NEXT:    addis 3, L..C5 at u(2)
+; LARGE64-NEXT:    ld 3, L..C5 at l(3)
+; LARGE64-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-NEXT:    add 3, 3, 7
 ; LARGE64-NEXT:    stw 6, 0(3)
 ; LARGE64-NEXT:    addi 1, 1, 48
 ; LARGE64-NEXT:    ld 0, 16(1)
@@ -467,11 +471,12 @@ define i32 @loadsTIUninit() #1 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    lwz 3, L..C4(2)
-; SMALL32-NEXT:    lwz 4, L..C5(2)
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    bla .__tls_get_addr[PR]
+; SMALL32-NEXT:    lwz 6, L..C4(2)
+; SMALL32-NEXT:    lwz 3, L..C5(2)
+; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C8(2)
+; SMALL32-NEXT:    add 3, 3, 6
 ; SMALL32-NEXT:    lwz 3, 0(3)
 ; SMALL32-NEXT:    lwz 4, 0(4)
 ; SMALL32-NEXT:    add 3, 4, 3
@@ -486,10 +491,11 @@ define i32 @loadsTIUninit() #1 {
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    addis 4, L..C5 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
-; LARGE32-NEXT:    lwz 4, L..C5 at l(4)
-; LARGE32-NEXT:    bla .__tls_get_addr[PR]
+; LARGE32-NEXT:    lwz 6, L..C4 at l(3)
+; LARGE32-NEXT:    addis 3, L..C5 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C5 at l(3)
+; LARGE32-NEXT:    bla .__tls_get_mod[PR]
+; LARGE32-NEXT:    add 3, 3, 6
 ; LARGE32-NEXT:    lwz 3, 0(3)
 ; LARGE32-NEXT:    addis 4, L..C8 at u(2)
 ; LARGE32-NEXT:    lwz 4, L..C8 at l(4)
@@ -504,11 +510,12 @@ define i32 @loadsTIUninit() #1 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    ld 3, L..C4(2)
-; SMALL64-NEXT:    ld 4, L..C5(2)
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    bla .__tls_get_addr[PR]
+; SMALL64-NEXT:    ld 6, L..C4(2)
+; SMALL64-NEXT:    ld 3, L..C5(2)
+; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C8(2)
+; SMALL64-NEXT:    add 3, 3, 6
 ; SMALL64-NEXT:    lwz 3, 0(3)
 ; SMALL64-NEXT:    lwz 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
@@ -522,14 +529,15 @@ define i32 @loadsTIUninit() #1 {
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    addis 3, L..C4 at u(2)
-; LARGE64-NEXT:    addis 4, L..C5 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 3, L..C4 at l(3)
-; LARGE64-NEXT:    ld 4, L..C5 at l(4)
-; LARGE64-NEXT:    bla .__tls_get_addr[PR]
+; LARGE64-NEXT:    ld 6, L..C4 at l(3)
+; LARGE64-NEXT:    addis 3, L..C5 at u(2)
+; LARGE64-NEXT:    ld 3, L..C5 at l(3)
+; LARGE64-NEXT:    bla .__tls_get_mod[PR]
 ; LARGE64-NEXT:    addis 4, L..C8 at u(2)
-; LARGE64-NEXT:    lwz 3, 0(3)
+; LARGE64-NEXT:    add 3, 3, 6
 ; LARGE64-NEXT:    ld 4, L..C8 at l(4)
+; LARGE64-NEXT:    lwz 3, 0(3)
 ; LARGE64-NEXT:    lwz 4, 0(4)
 ; LARGE64-NEXT:    add 3, 4, 3
 ; LARGE64-NEXT:    addi 1, 1, 48
@@ -625,12 +633,16 @@ entry:
   ret i32 %add
 }
 
-; External symbol reference checks for .__tls_get_addr
+; External symbol reference checks for .__tls_get_addr/.__tls_get_mod
 
 ; SMALL32: .extern .__tls_get_addr[PR]
+; SMALL32: .extern .__tls_get_mod[PR]
 ; SMALL64: .extern .__tls_get_addr[PR]
+; SMALL64: .extern .__tls_get_mod[PR]
 ; LARGE32: .extern .__tls_get_addr[PR]
+; LARGE32: .extern .__tls_get_mod[PR]
 ; LARGE64: .extern .__tls_get_addr[PR]
+; LARGE64: .extern .__tls_get_mod[PR]
 
 ; TOC entry checks
 
@@ -644,9 +656,10 @@ entry:
 ; SMALL32-LABEL: L..C3:
 ; SMALL32-NEXT:	 .tc TGInit[TC],TGInit[TL]@gd
 ; SMALL32-LABEL: L..C4:
-; SMALL32-NEXT:	 .tc .TIUninit[TC],TIUninit[UL]@m
+; SMALL32-NEXT:	 .tc TIUninit[TC],TIUninit[UL]@ld
 ; SMALL32-LABEL: L..C5:
-; SMALL32-NEXT:	 .tc TIUninit[TC],TIUninit[UL]@gd
+; SMALL32-NEXT:	 .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; SMALL32-NEXT:	 .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
 ; SMALL32-LABEL: L..C6:
 ; SMALL32-NEXT:	 .tc .TWUninit[TC],TWUninit[TL]@m
 ; SMALL32-LABEL: L..C7:
@@ -664,9 +677,10 @@ entry:
 ; LARGE32-LABEL: L..C3:
 ; LARGE32-NEXT:  .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE32-LABEL: L..C4:
-; LARGE32-NEXT:  .tc .TIUninit[TE],TIUninit[UL]@m
+; LARGE32-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
 ; LARGE32-LABEL: L..C5:
-; LARGE32-NEXT:  .tc TIUninit[TE],TIUninit[UL]@gd
+; LARGE32-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; LARGE32-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
 ; LARGE32-LABEL: L..C6:
 ; LARGE32-NEXT:  .tc .TWUninit[TE],TWUninit[TL]@m
 ; LARGE32-LABEL: L..C7:
@@ -684,9 +698,10 @@ entry:
 ; SMALL64-LABEL:  L..C3:
 ; SMALL64-NEXT:   .tc TGInit[TC],TGInit[TL]@gd
 ; SMALL64-LABEL:  L..C4:
-; SMALL64-NEXT:   .tc .TIUninit[TC],TIUninit[UL]@m
+; SMALL64-NEXT:   .tc TIUninit[TC],TIUninit[UL]@ld
 ; SMALL64-LABEL:  L..C5:
-; SMALL64-NEXT:   .tc TIUninit[TC],TIUninit[UL]@gd
+; SMALL64-NEXT:   .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; SMALL64-NEXT:   .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
 ; SMALL64-LABEL:  L..C6:
 ; SMALL64-NEXT:   .tc .TWUninit[TC],TWUninit[TL]@m
 ; SMALL64-LABEL:  L..C7:
@@ -704,9 +719,10 @@ entry:
 ; LARGE64-LABEL:  L..C3:
 ; LARGE64-NEXT:  .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE64-LABEL:  L..C4:
-; LARGE64-NEXT:  .tc .TIUninit[TE],TIUninit[UL]@m
+; LARGE64-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
 ; LARGE64-LABEL:  L..C5:
-; LARGE64-NEXT:  .tc TIUninit[TE],TIUninit[UL]@gd
+; LARGE64-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; LARGE64-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
 ; LARGE64-LABEL:  L..C6:
 ; LARGE64-NEXT:  .tc .TWUninit[TE],TWUninit[TL]@m
 ; LARGE64-LABEL:  L..C7:
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll
index 47813b59ba804c2..d19b5ad9b5e50a1 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll
@@ -97,12 +97,13 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    mr 6, 4
-; SMALL32-NEXT:    mr 7, 3
-; SMALL32-NEXT:    lwz 3, L..C2(2)
-; SMALL32-NEXT:    lwz 4, L..C3(2)
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    bla .__tls_get_addr[PR]
+; SMALL32-NEXT:    mr 7, 3
+; SMALL32-NEXT:    lwz 8, L..C2(2)
+; SMALL32-NEXT:    mr 6, 4
+; SMALL32-NEXT:    lwz 3, L..C3(2)
+; SMALL32-NEXT:    bla .__tls_get_mod[PR]
+; SMALL32-NEXT:    add 3, 3, 8
 ; SMALL32-NEXT:    stw 6, 4(3)
 ; SMALL32-NEXT:    stw 7, 0(3)
 ; SMALL32-NEXT:    addi 1, 1, 32
@@ -115,13 +116,14 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; LARGE32-NEXT:    mflr 0
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
-; LARGE32-NEXT:    mr 6, 4
 ; LARGE32-NEXT:    mr 7, 3
+; LARGE32-NEXT:    mr 6, 4
 ; LARGE32-NEXT:    addis 3, L..C2 at u(2)
-; LARGE32-NEXT:    addis 4, L..C3 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C2 at l(3)
-; LARGE32-NEXT:    lwz 4, L..C3 at l(4)
-; LARGE32-NEXT:    bla .__tls_get_addr[PR]
+; LARGE32-NEXT:    lwz 8, L..C2 at l(3)
+; LARGE32-NEXT:    addis 3, L..C3 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C3 at l(3)
+; LARGE32-NEXT:    bla .__tls_get_mod[PR]
+; LARGE32-NEXT:    add 3, 3, 8
 ; LARGE32-NEXT:    stw 6, 4(3)
 ; LARGE32-NEXT:    stw 7, 0(3)
 ; LARGE32-NEXT:    addi 1, 1, 32
@@ -133,11 +135,12 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    mr 6, 3
-; SMALL64-NEXT:    ld 3, L..C2(2)
-; SMALL64-NEXT:    ld 4, L..C3(2)
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    bla .__tls_get_addr[PR]
+; SMALL64-NEXT:    mr 6, 3
+; SMALL64-NEXT:    ld 7, L..C2(2)
+; SMALL64-NEXT:    ld 3, L..C3(2)
+; SMALL64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL64-NEXT:    add 3, 3, 7
 ; SMALL64-NEXT:    std 6, 0(3)
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -150,11 +153,12 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    mr 6, 3
 ; LARGE64-NEXT:    addis 3, L..C2 at u(2)
-; LARGE64-NEXT:    addis 4, L..C3 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 3, L..C2 at l(3)
-; LARGE64-NEXT:    ld 4, L..C3 at l(4)
-; LARGE64-NEXT:    bla .__tls_get_addr[PR]
+; LARGE64-NEXT:    ld 7, L..C2 at l(3)
+; LARGE64-NEXT:    addis 3, L..C3 at u(2)
+; LARGE64-NEXT:    ld 3, L..C3 at l(3)
+; LARGE64-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-NEXT:    add 3, 3, 7
 ; LARGE64-NEXT:    std 6, 0(3)
 ; LARGE64-NEXT:    addi 1, 1, 48
 ; LARGE64-NEXT:    ld 0, 16(1)
@@ -171,12 +175,13 @@ define void @storesTIInit(i64 %Val) #0 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    mr 6, 4
-; SMALL32-NEXT:    mr 7, 3
-; SMALL32-NEXT:    lwz 3, L..C4(2)
-; SMALL32-NEXT:    lwz 4, L..C5(2)
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    bla .__tls_get_addr[PR]
+; SMALL32-NEXT:    mr 7, 3
+; SMALL32-NEXT:    lwz 8, L..C4(2)
+; SMALL32-NEXT:    mr 6, 4
+; SMALL32-NEXT:    lwz 3, L..C3(2)
+; SMALL32-NEXT:    bla .__tls_get_mod[PR]
+; SMALL32-NEXT:    add 3, 3, 8
 ; SMALL32-NEXT:    stw 6, 4(3)
 ; SMALL32-NEXT:    stw 7, 0(3)
 ; SMALL32-NEXT:    addi 1, 1, 32
@@ -189,13 +194,14 @@ define void @storesTIInit(i64 %Val) #0 {
 ; LARGE32-NEXT:    mflr 0
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
-; LARGE32-NEXT:    mr 6, 4
 ; LARGE32-NEXT:    mr 7, 3
+; LARGE32-NEXT:    mr 6, 4
 ; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    addis 4, L..C5 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
-; LARGE32-NEXT:    lwz 4, L..C5 at l(4)
-; LARGE32-NEXT:    bla .__tls_get_addr[PR]
+; LARGE32-NEXT:    lwz 8, L..C4 at l(3)
+; LARGE32-NEXT:    addis 3, L..C3 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C3 at l(3)
+; LARGE32-NEXT:    bla .__tls_get_mod[PR]
+; LARGE32-NEXT:    add 3, 3, 8
 ; LARGE32-NEXT:    stw 6, 4(3)
 ; LARGE32-NEXT:    stw 7, 0(3)
 ; LARGE32-NEXT:    addi 1, 1, 32
@@ -207,11 +213,12 @@ define void @storesTIInit(i64 %Val) #0 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    mr 6, 3
-; SMALL64-NEXT:    ld 3, L..C4(2)
-; SMALL64-NEXT:    ld 4, L..C5(2)
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    bla .__tls_get_addr[PR]
+; SMALL64-NEXT:    mr 6, 3
+; SMALL64-NEXT:    ld 7, L..C4(2)
+; SMALL64-NEXT:    ld 3, L..C3(2)
+; SMALL64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL64-NEXT:    add 3, 3, 7
 ; SMALL64-NEXT:    std 6, 0(3)
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -224,11 +231,12 @@ define void @storesTIInit(i64 %Val) #0 {
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    mr 6, 3
 ; LARGE64-NEXT:    addis 3, L..C4 at u(2)
-; LARGE64-NEXT:    addis 4, L..C5 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 3, L..C4 at l(3)
-; LARGE64-NEXT:    ld 4, L..C5 at l(4)
-; LARGE64-NEXT:    bla .__tls_get_addr[PR]
+; LARGE64-NEXT:    ld 7, L..C4 at l(3)
+; LARGE64-NEXT:    addis 3, L..C3 at u(2)
+; LARGE64-NEXT:    ld 3, L..C3 at l(3)
+; LARGE64-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-NEXT:    add 3, 3, 7
 ; LARGE64-NEXT:    std 6, 0(3)
 ; LARGE64-NEXT:    addi 1, 1, 48
 ; LARGE64-NEXT:    ld 0, 16(1)
@@ -247,8 +255,8 @@ define void @storesTWInit(i64 %Val) #0 {
 ; SMALL32-NEXT:    stwu 1, -32(1)
 ; SMALL32-NEXT:    mr 6, 4
 ; SMALL32-NEXT:    mr 7, 3
-; SMALL32-NEXT:    lwz 3, L..C6(2)
-; SMALL32-NEXT:    lwz 4, L..C7(2)
+; SMALL32-NEXT:    lwz 3, L..C5(2)
+; SMALL32-NEXT:    lwz 4, L..C6(2)
 ; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL32-NEXT:    stw 6, 4(3)
@@ -265,10 +273,10 @@ define void @storesTWInit(i64 %Val) #0 {
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    mr 6, 4
 ; LARGE32-NEXT:    mr 7, 3
-; LARGE32-NEXT:    addis 3, L..C6 at u(2)
-; LARGE32-NEXT:    addis 4, L..C7 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C6 at l(3)
-; LARGE32-NEXT:    lwz 4, L..C7 at l(4)
+; LARGE32-NEXT:    addis 3, L..C5 at u(2)
+; LARGE32-NEXT:    addis 4, L..C6 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C5 at l(3)
+; LARGE32-NEXT:    lwz 4, L..C6 at l(4)
 ; LARGE32-NEXT:    bla .__tls_get_addr[PR]
 ; LARGE32-NEXT:    stw 6, 4(3)
 ; LARGE32-NEXT:    stw 7, 0(3)
@@ -282,8 +290,8 @@ define void @storesTWInit(i64 %Val) #0 {
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
 ; SMALL64-NEXT:    mr 6, 3
-; SMALL64-NEXT:    ld 3, L..C6(2)
-; SMALL64-NEXT:    ld 4, L..C7(2)
+; SMALL64-NEXT:    ld 3, L..C5(2)
+; SMALL64-NEXT:    ld 4, L..C6(2)
 ; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL64-NEXT:    std 6, 0(3)
@@ -297,11 +305,11 @@ define void @storesTWInit(i64 %Val) #0 {
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    mr 6, 3
-; LARGE64-NEXT:    addis 3, L..C6 at u(2)
-; LARGE64-NEXT:    addis 4, L..C7 at u(2)
+; LARGE64-NEXT:    addis 3, L..C5 at u(2)
+; LARGE64-NEXT:    addis 4, L..C6 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 3, L..C6 at l(3)
-; LARGE64-NEXT:    ld 4, L..C7 at l(4)
+; LARGE64-NEXT:    ld 3, L..C5 at l(3)
+; LARGE64-NEXT:    ld 4, L..C6 at l(4)
 ; LARGE64-NEXT:    bla .__tls_get_addr[PR]
 ; LARGE64-NEXT:    std 6, 0(3)
 ; LARGE64-NEXT:    addi 1, 1, 48
@@ -323,7 +331,7 @@ define i64 @loadsTGInit() #1 {
 ; SMALL32-NEXT:    lwz 4, L..C1(2)
 ; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_addr[PR]
-; SMALL32-NEXT:    lwz 4, L..C8(2)
+; SMALL32-NEXT:    lwz 4, L..C7(2)
 ; SMALL32-NEXT:    lwz 5, 4(3)
 ; SMALL32-NEXT:    lwz 6, 4(4)
 ; SMALL32-NEXT:    lwz 3, 0(3)
@@ -347,8 +355,8 @@ define i64 @loadsTGInit() #1 {
 ; LARGE32-NEXT:    bla .__tls_get_addr[PR]
 ; LARGE32-NEXT:    lwz 4, 4(3)
 ; LARGE32-NEXT:    lwz 3, 0(3)
-; LARGE32-NEXT:    addis 5, L..C8 at u(2)
-; LARGE32-NEXT:    lwz 5, L..C8 at l(5)
+; LARGE32-NEXT:    addis 5, L..C7 at u(2)
+; LARGE32-NEXT:    lwz 5, L..C7 at l(5)
 ; LARGE32-NEXT:    lwz 6, 4(5)
 ; LARGE32-NEXT:    lwz 5, 0(5)
 ; LARGE32-NEXT:    addc 4, 6, 4
@@ -366,7 +374,7 @@ define i64 @loadsTGInit() #1 {
 ; SMALL64-NEXT:    ld 4, L..C1(2)
 ; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_addr[PR]
-; SMALL64-NEXT:    ld 4, L..C8(2)
+; SMALL64-NEXT:    ld 4, L..C7(2)
 ; SMALL64-NEXT:    ld 3, 0(3)
 ; SMALL64-NEXT:    ld 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
@@ -385,9 +393,9 @@ define i64 @loadsTGInit() #1 {
 ; LARGE64-NEXT:    ld 3, L..C0 at l(3)
 ; LARGE64-NEXT:    ld 4, L..C1 at l(4)
 ; LARGE64-NEXT:    bla .__tls_get_addr[PR]
-; LARGE64-NEXT:    addis 4, L..C8 at u(2)
+; LARGE64-NEXT:    addis 4, L..C7 at u(2)
 ; LARGE64-NEXT:    ld 3, 0(3)
-; LARGE64-NEXT:    ld 4, L..C8 at l(4)
+; LARGE64-NEXT:    ld 4, L..C7 at l(4)
 ; LARGE64-NEXT:    ld 4, 0(4)
 ; LARGE64-NEXT:    add 3, 4, 3
 ; LARGE64-NEXT:    addi 1, 1, 48
@@ -407,11 +415,12 @@ define i64 @loadsTIUninit() #1 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    lwz 3, L..C2(2)
-; SMALL32-NEXT:    lwz 4, L..C3(2)
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    bla .__tls_get_addr[PR]
-; SMALL32-NEXT:    lwz 4, L..C8(2)
+; SMALL32-NEXT:    lwz 6, L..C2(2)
+; SMALL32-NEXT:    lwz 3, L..C3(2)
+; SMALL32-NEXT:    bla .__tls_get_mod[PR]
+; SMALL32-NEXT:    lwz 4, L..C7(2)
+; SMALL32-NEXT:    add 3, 3, 6
 ; SMALL32-NEXT:    lwz 5, 4(3)
 ; SMALL32-NEXT:    lwz 6, 4(4)
 ; SMALL32-NEXT:    lwz 3, 0(3)
@@ -429,14 +438,15 @@ define i64 @loadsTIUninit() #1 {
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    addis 3, L..C2 at u(2)
-; LARGE32-NEXT:    addis 4, L..C3 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C2 at l(3)
-; LARGE32-NEXT:    lwz 4, L..C3 at l(4)
-; LARGE32-NEXT:    bla .__tls_get_addr[PR]
+; LARGE32-NEXT:    lwz 6, L..C2 at l(3)
+; LARGE32-NEXT:    addis 3, L..C3 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C3 at l(3)
+; LARGE32-NEXT:    bla .__tls_get_mod[PR]
+; LARGE32-NEXT:    add 3, 3, 6
 ; LARGE32-NEXT:    lwz 4, 4(3)
 ; LARGE32-NEXT:    lwz 3, 0(3)
-; LARGE32-NEXT:    addis 5, L..C8 at u(2)
-; LARGE32-NEXT:    lwz 5, L..C8 at l(5)
+; LARGE32-NEXT:    addis 5, L..C7 at u(2)
+; LARGE32-NEXT:    lwz 5, L..C7 at l(5)
 ; LARGE32-NEXT:    lwz 6, 4(5)
 ; LARGE32-NEXT:    lwz 5, 0(5)
 ; LARGE32-NEXT:    addc 4, 6, 4
@@ -450,11 +460,12 @@ define i64 @loadsTIUninit() #1 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    ld 3, L..C2(2)
-; SMALL64-NEXT:    ld 4, L..C3(2)
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    bla .__tls_get_addr[PR]
-; SMALL64-NEXT:    ld 4, L..C8(2)
+; SMALL64-NEXT:    ld 6, L..C2(2)
+; SMALL64-NEXT:    ld 3, L..C3(2)
+; SMALL64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL64-NEXT:    ld 4, L..C7(2)
+; SMALL64-NEXT:    add 3, 3, 6
 ; SMALL64-NEXT:    ld 3, 0(3)
 ; SMALL64-NEXT:    ld 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
@@ -468,14 +479,15 @@ define i64 @loadsTIUninit() #1 {
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    addis 3, L..C2 at u(2)
-; LARGE64-NEXT:    addis 4, L..C3 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 3, L..C2 at l(3)
-; LARGE64-NEXT:    ld 4, L..C3 at l(4)
-; LARGE64-NEXT:    bla .__tls_get_addr[PR]
-; LARGE64-NEXT:    addis 4, L..C8 at u(2)
+; LARGE64-NEXT:    ld 6, L..C2 at l(3)
+; LARGE64-NEXT:    addis 3, L..C3 at u(2)
+; LARGE64-NEXT:    ld 3, L..C3 at l(3)
+; LARGE64-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-NEXT:    addis 4, L..C7 at u(2)
+; LARGE64-NEXT:    add 3, 3, 6
+; LARGE64-NEXT:    ld 4, L..C7 at l(4)
 ; LARGE64-NEXT:    ld 3, 0(3)
-; LARGE64-NEXT:    ld 4, L..C8 at l(4)
 ; LARGE64-NEXT:    ld 4, 0(4)
 ; LARGE64-NEXT:    add 3, 4, 3
 ; LARGE64-NEXT:    addi 1, 1, 48
@@ -495,11 +507,12 @@ define i64 @loadsTIInit() #1 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    lwz 3, L..C4(2)
-; SMALL32-NEXT:    lwz 4, L..C5(2)
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    bla .__tls_get_addr[PR]
-; SMALL32-NEXT:    lwz 4, L..C8(2)
+; SMALL32-NEXT:    lwz 6, L..C4(2)
+; SMALL32-NEXT:    lwz 3, L..C3(2)
+; SMALL32-NEXT:    bla .__tls_get_mod[PR]
+; SMALL32-NEXT:    lwz 4, L..C7(2)
+; SMALL32-NEXT:    add 3, 3, 6
 ; SMALL32-NEXT:    lwz 5, 4(3)
 ; SMALL32-NEXT:    lwz 6, 4(4)
 ; SMALL32-NEXT:    lwz 3, 0(3)
@@ -517,14 +530,15 @@ define i64 @loadsTIInit() #1 {
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    addis 4, L..C5 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
-; LARGE32-NEXT:    lwz 4, L..C5 at l(4)
-; LARGE32-NEXT:    bla .__tls_get_addr[PR]
+; LARGE32-NEXT:    lwz 6, L..C4 at l(3)
+; LARGE32-NEXT:    addis 3, L..C3 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C3 at l(3)
+; LARGE32-NEXT:    bla .__tls_get_mod[PR]
+; LARGE32-NEXT:    add 3, 3, 6
 ; LARGE32-NEXT:    lwz 4, 4(3)
 ; LARGE32-NEXT:    lwz 3, 0(3)
-; LARGE32-NEXT:    addis 5, L..C8 at u(2)
-; LARGE32-NEXT:    lwz 5, L..C8 at l(5)
+; LARGE32-NEXT:    addis 5, L..C7 at u(2)
+; LARGE32-NEXT:    lwz 5, L..C7 at l(5)
 ; LARGE32-NEXT:    lwz 6, 4(5)
 ; LARGE32-NEXT:    lwz 5, 0(5)
 ; LARGE32-NEXT:    addc 4, 6, 4
@@ -538,11 +552,12 @@ define i64 @loadsTIInit() #1 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    ld 3, L..C4(2)
-; SMALL64-NEXT:    ld 4, L..C5(2)
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    bla .__tls_get_addr[PR]
-; SMALL64-NEXT:    ld 4, L..C8(2)
+; SMALL64-NEXT:    ld 6, L..C4(2)
+; SMALL64-NEXT:    ld 3, L..C3(2)
+; SMALL64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL64-NEXT:    ld 4, L..C7(2)
+; SMALL64-NEXT:    add 3, 3, 6
 ; SMALL64-NEXT:    ld 3, 0(3)
 ; SMALL64-NEXT:    ld 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
@@ -556,14 +571,15 @@ define i64 @loadsTIInit() #1 {
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    addis 3, L..C4 at u(2)
-; LARGE64-NEXT:    addis 4, L..C5 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 3, L..C4 at l(3)
-; LARGE64-NEXT:    ld 4, L..C5 at l(4)
-; LARGE64-NEXT:    bla .__tls_get_addr[PR]
-; LARGE64-NEXT:    addis 4, L..C8 at u(2)
+; LARGE64-NEXT:    ld 6, L..C4 at l(3)
+; LARGE64-NEXT:    addis 3, L..C3 at u(2)
+; LARGE64-NEXT:    ld 3, L..C3 at l(3)
+; LARGE64-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-NEXT:    addis 4, L..C7 at u(2)
+; LARGE64-NEXT:    add 3, 3, 6
+; LARGE64-NEXT:    ld 4, L..C7 at l(4)
 ; LARGE64-NEXT:    ld 3, 0(3)
-; LARGE64-NEXT:    ld 4, L..C8 at l(4)
 ; LARGE64-NEXT:    ld 4, 0(4)
 ; LARGE64-NEXT:    add 3, 4, 3
 ; LARGE64-NEXT:    addi 1, 1, 48
@@ -583,11 +599,11 @@ define i64 @loadsTWInit() #1 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    lwz 3, L..C6(2)
-; SMALL32-NEXT:    lwz 4, L..C7(2)
+; SMALL32-NEXT:    lwz 3, L..C5(2)
+; SMALL32-NEXT:    lwz 4, L..C6(2)
 ; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_addr[PR]
-; SMALL32-NEXT:    lwz 4, L..C8(2)
+; SMALL32-NEXT:    lwz 4, L..C7(2)
 ; SMALL32-NEXT:    lwz 5, 4(3)
 ; SMALL32-NEXT:    lwz 6, 4(4)
 ; SMALL32-NEXT:    lwz 3, 0(3)
@@ -604,15 +620,15 @@ define i64 @loadsTWInit() #1 {
 ; LARGE32-NEXT:    mflr 0
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
-; LARGE32-NEXT:    addis 3, L..C6 at u(2)
-; LARGE32-NEXT:    addis 4, L..C7 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C6 at l(3)
-; LARGE32-NEXT:    lwz 4, L..C7 at l(4)
+; LARGE32-NEXT:    addis 3, L..C5 at u(2)
+; LARGE32-NEXT:    addis 4, L..C6 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C5 at l(3)
+; LARGE32-NEXT:    lwz 4, L..C6 at l(4)
 ; LARGE32-NEXT:    bla .__tls_get_addr[PR]
 ; LARGE32-NEXT:    lwz 4, 4(3)
 ; LARGE32-NEXT:    lwz 3, 0(3)
-; LARGE32-NEXT:    addis 5, L..C8 at u(2)
-; LARGE32-NEXT:    lwz 5, L..C8 at l(5)
+; LARGE32-NEXT:    addis 5, L..C7 at u(2)
+; LARGE32-NEXT:    lwz 5, L..C7 at l(5)
 ; LARGE32-NEXT:    lwz 6, 4(5)
 ; LARGE32-NEXT:    lwz 5, 0(5)
 ; LARGE32-NEXT:    addc 4, 6, 4
@@ -626,11 +642,11 @@ define i64 @loadsTWInit() #1 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    ld 3, L..C6(2)
-; SMALL64-NEXT:    ld 4, L..C7(2)
+; SMALL64-NEXT:    ld 3, L..C5(2)
+; SMALL64-NEXT:    ld 4, L..C6(2)
 ; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_addr[PR]
-; SMALL64-NEXT:    ld 4, L..C8(2)
+; SMALL64-NEXT:    ld 4, L..C7(2)
 ; SMALL64-NEXT:    ld 3, 0(3)
 ; SMALL64-NEXT:    ld 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
@@ -643,15 +659,15 @@ define i64 @loadsTWInit() #1 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
-; LARGE64-NEXT:    addis 3, L..C6 at u(2)
-; LARGE64-NEXT:    addis 4, L..C7 at u(2)
+; LARGE64-NEXT:    addis 3, L..C5 at u(2)
+; LARGE64-NEXT:    addis 4, L..C6 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 3, L..C6 at l(3)
-; LARGE64-NEXT:    ld 4, L..C7 at l(4)
+; LARGE64-NEXT:    ld 3, L..C5 at l(3)
+; LARGE64-NEXT:    ld 4, L..C6 at l(4)
 ; LARGE64-NEXT:    bla .__tls_get_addr[PR]
-; LARGE64-NEXT:    addis 4, L..C8 at u(2)
+; LARGE64-NEXT:    addis 4, L..C7 at u(2)
 ; LARGE64-NEXT:    ld 3, 0(3)
-; LARGE64-NEXT:    ld 4, L..C8 at l(4)
+; LARGE64-NEXT:    ld 4, L..C7 at l(4)
 ; LARGE64-NEXT:    ld 4, 0(4)
 ; LARGE64-NEXT:    add 3, 4, 3
 ; LARGE64-NEXT:    addi 1, 1, 48
@@ -665,12 +681,16 @@ entry:
   ret i64 %add
 }
 
-; External symbol reference checks for .__tls_get_addr
+; External symbol reference checks for .__tls_get_addr/.__tls_get_mod
 
 ; SMALL32: .extern .__tls_get_addr[PR]
+; SMALL32: .extern .__tls_get_mod[PR]
 ; SMALL64: .extern .__tls_get_addr[PR]
+; SMALL64: .extern .__tls_get_mod[PR]
 ; LARGE32: .extern .__tls_get_addr[PR]
+; LARGE32: .extern .__tls_get_mod[PR]
 ; LARGE64: .extern .__tls_get_addr[PR]
+; LARGE64: .extern .__tls_get_mod[PR]
 
 ; TOC entry checks
 
@@ -680,18 +700,17 @@ entry:
 ; SMALL32-LABEL:  L..C1:
 ; SMALL32-NEXT:  .tc TGInit[TC],TGInit[TL]@gd
 ; SMALL32-LABEL:  L..C2:
-; SMALL32-NEXT:  .tc .TIUninit[TC],TIUninit[UL]@m
+; SMALL32-NEXT:  .tc TIUninit[TC],TIUninit[UL]@ld
 ; SMALL32-LABEL:  L..C3:
-; SMALL32-NEXT:  .tc TIUninit[TC],TIUninit[UL]@gd
+; SMALL32-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; SMALL32-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
 ; SMALL32-LABEL:  L..C4:
-; SMALL32-NEXT:  .tc .TIInit[TC],TIInit[TL]@m
+; SMALL32-NEXT:  .tc TIInit[TC],TIInit[TL]@ld
 ; SMALL32-LABEL:  L..C5:
-; SMALL32-NEXT:  .tc TIInit[TC],TIInit[TL]@gd
-; SMALL32-LABEL:  L..C6:
 ; SMALL32-NEXT:  .tc .TWInit[TC],TWInit[TL]@m
-; SMALL32-LABEL:  L..C7:
+; SMALL32-LABEL:  L..C6:
 ; SMALL32-NEXT:  .tc TWInit[TC],TWInit[TL]@gd
-; SMALL32-LABEL:  L..C8:
+; SMALL32-LABEL:  L..C7:
 ; SMALL32-NEXT:  .tc GInit[TC],GInit[RW]
 
 ; LARGE32-LABEL:  .toc
@@ -700,18 +719,17 @@ entry:
 ; LARGE32-LABEL:  L..C1:
 ; LARGE32-NEXT:  .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE32-LABEL:  L..C2:
-; LARGE32-NEXT:  .tc .TIUninit[TE],TIUninit[UL]@m
+; LARGE32-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
 ; LARGE32-LABEL:  L..C3:
-; LARGE32-NEXT:  .tc TIUninit[TE],TIUninit[UL]@gd
+; LARGE32-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; LARGE32-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
 ; LARGE32-LABEL:  L..C4:
-; LARGE32-NEXT:  .tc .TIInit[TE],TIInit[TL]@m
+; LARGE32-NEXT:  .tc TIInit[TE],TIInit[TL]@ld
 ; LARGE32-LABEL:  L..C5:
-; LARGE32-NEXT:  .tc TIInit[TE],TIInit[TL]@gd
-; LARGE32-LABEL:  L..C6:
 ; LARGE32-NEXT:  .tc .TWInit[TE],TWInit[TL]@m
-; LARGE32-LABEL:  L..C7:
+; LARGE32-LABEL:  L..C6:
 ; LARGE32-NEXT:  .tc TWInit[TE],TWInit[TL]@gd
-; LARGE32-LABEL:  L..C8:
+; LARGE32-LABEL:  L..C7:
 ; LARGE32-NEXT:  .tc GInit[TE],GInit[RW]
 
 ; SMALL64-LABEL:  .toc
@@ -720,18 +738,17 @@ entry:
 ; SMALL64-LABEL:  L..C1:
 ; SMALL64-NEXT:  .tc TGInit[TC],TGInit[TL]@gd
 ; SMALL64-LABEL:  L..C2:
-; SMALL64-NEXT:  .tc .TIUninit[TC],TIUninit[UL]@m
+; SMALL64-NEXT:  .tc TIUninit[TC],TIUninit[UL]@ld
 ; SMALL64-LABEL:  L..C3:
-; SMALL64-NEXT:  .tc TIUninit[TC],TIUninit[UL]@gd
+; SMALL64-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; SMALL64-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
 ; SMALL64-LABEL:  L..C4:
-; SMALL64-NEXT:  .tc .TIInit[TC],TIInit[TL]@m
+; SMALL64-NEXT:  .tc TIInit[TC],TIInit[TL]@ld
 ; SMALL64-LABEL:  L..C5:
-; SMALL64-NEXT:  .tc TIInit[TC],TIInit[TL]@gd
-; SMALL64-LABEL:  L..C6:
 ; SMALL64-NEXT:  .tc .TWInit[TC],TWInit[TL]@m
-; SMALL64-LABEL:  L..C7:
+; SMALL64-LABEL:  L..C6:
 ; SMALL64-NEXT:  .tc TWInit[TC],TWInit[TL]@gd
-; SMALL64-LABEL:  L..C8:
+; SMALL64-LABEL:  L..C7:
 ; SMALL64-NEXT:  .tc GInit[TC],GInit[RW]
 
 ; LARGE64-LABEL:  .toc
@@ -740,18 +757,17 @@ entry:
 ; LARGE64-LABEL:  L..C1:
 ; LARGE64-NEXT:  .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE64-LABEL:  L..C2:
-; LARGE64-NEXT:  .tc .TIUninit[TE],TIUninit[UL]@m
+; LARGE64-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
 ; LARGE64-LABEL:  L..C3:
-; LARGE64-NEXT:  .tc TIUninit[TE],TIUninit[UL]@gd
+; LARGE64-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; LARGE64-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
 ; LARGE64-LABEL:  L..C4:
-; LARGE64-NEXT:  .tc .TIInit[TE],TIInit[TL]@m
+; LARGE64-NEXT:  .tc TIInit[TE],TIInit[TL]@ld
 ; LARGE64-LABEL:  L..C5:
-; LARGE64-NEXT:  .tc TIInit[TE],TIInit[TL]@gd
-; LARGE64-LABEL:  L..C6:
 ; LARGE64-NEXT:  .tc .TWInit[TE],TWInit[TL]@m
-; LARGE64-LABEL:  L..C7:
+; LARGE64-LABEL:  L..C6:
 ; LARGE64-NEXT:  .tc TWInit[TE],TWInit[TL]@gd
-; LARGE64-LABEL:  L..C8:
+; LARGE64-LABEL:  L..C7:
 ; LARGE64-NEXT:  .tc GInit[TE],GInit[RW]
 
 attributes #0 = { nofree norecurse nounwind willreturn writeonly "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-spe,-vsx" }
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll b/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll
new file mode 100644
index 000000000000000..9faa99b6eb2ba74
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll
@@ -0,0 +1,364 @@
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec -mtriple powerpc64-ibm-aix-xcoff \
+; RUN:     --code-model=small < %s | FileCheck %s --check-prefixes=SMALL64,SMALL
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec -mtriple powerpc64-ibm-aix-xcoff \
+; RUN:     --code-model=large < %s | FileCheck %s --check-prefixes=LARGE64,LARGE
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec -mtriple powerpc-ibm-aix-xcoff \
+; RUN:     --code-model=small < %s | FileCheck %s --check-prefixes=SMALL32,SMALL
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec -mtriple powerpc-ibm-aix-xcoff \
+; RUN:     --code-model=large < %s | FileCheck %s --check-prefixes=LARGE32,LARGE
+
+ at TGInit = thread_local(localdynamic) global i32 42, align 4
+ at TGUninit = thread_local(localdynamic) global i32 0, align 4
+ at TIInit = internal thread_local(localdynamic) global i32 42, align 4
+ at TIUninit = internal thread_local(localdynamic) global i32 0, align 4
+ at TWInit = weak thread_local(localdynamic) global i32 42, align 4
+ at TWUninit = weak thread_local(localdynamic) global i32 0, align 4
+
+define i32 @loadTGInit() {
+; SMALL-LABEL:  loadTGInit:
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
+; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL:        bla .__tls_get_mod[PR]
+; SMALL:        add [[TGInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; SMALL:        lwz [[TGInitValR:[0-9]+]], 0([[TGInitAddrR]])
+;
+; LARGE-LABEL:  loadTGInit:
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        add [[TGInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; LARGE:        lwz [[TGInitValR:[0-9]+]], 0([[TGInitAddrR]])
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TGInit)
+  %1 = load i32, ptr %0, align 4
+  ret i32 %1
+}
+
+define void @storeTGInit(i32 noundef signext %i) {
+; SMALL-LABEL:  storeTGInit:
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
+; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL:        bla .__tls_get_mod[PR]
+; SMALL:        add [[TGInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; SMALL:        stw [[TGInitValR:[0-9]+]], 0([[TGInitAddrR]])
+;
+; LARGE-LABEL:  storeTGInit:
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        add [[TGInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; LARGE:        stw [[TGInitValR:[0-9]+]], 0([[TGInitAddrR]])
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TGInit)
+  store i32 %i, ptr %0, align 4
+  ret void
+}
+
+define i32 @loadTGUninit() {
+; SMALL-LABEL:  loadTGUninit:
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
+; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL:        bla .__tls_get_mod[PR]
+; SMALL:        add [[TGUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; SMALL:        lwz [[TGUninitValR:[0-9]+]], 0([[TGUninitAddrR]])
+;
+; LARGE-LABEL:  loadTGUninit:
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        add [[TGUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; LARGE:        lwz [[TGUninitValR:[0-9]+]], 0([[TGUninitAddrR]])
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TGUninit)
+  %1 = load i32, ptr %0, align 4
+  ret i32 %1
+}
+
+define void @storeTGUninit(i32 noundef signext %i) {
+; SMALL-LABEL:  storeTGUninit:
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
+; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL:        bla .__tls_get_mod[PR]
+; SMALL:        add [[TGUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; SMALL:        stw [[TGUninitValR:[0-9]+]], 0([[TGUninitAddrR]])
+;
+; LARGE-LABEL:  storeTGUninit:
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        add [[TGUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; LARGE:        stw [[TGUninitValR:[0-9]+]], 0([[TGUninitAddrR]])
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TGUninit)
+  store i32 %i, ptr %0, align 4
+  ret void
+}
+
+define i32 @loadTIInit() {
+; SMALL-LABEL:  loadTIInit:
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
+; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL:        bla .__tls_get_mod[PR]
+; SMALL:        add [[TIInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; SMALL:        lwz [[TIInitValR:[0-9]+]], 0([[TIInitAddrR]])
+;
+; LARGE-LABEL:  loadTIInit:
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        add [[TIInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; LARGE:        lwz [[TIInitValR:[0-9]+]], 0([[TIInitAddrR]])
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TIInit)
+  %1 = load i32, ptr %0, align 4
+  ret i32 %1
+}
+
+define void @storeTIInit(i32 noundef signext %i) {
+; SMALL-LABEL:  storeTIInit:
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
+; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL:        bla .__tls_get_mod[PR]
+; SMALL:        add [[TIInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; SMALL:        stw [[TIInitValR:[0-9]+]], 0([[TIInitAddrR]])
+;
+; LARGE-LABEL:  storeTIInit:
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        add [[TIInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; LARGE:        stw [[TIInitValR:[0-9]+]], 0([[TIInitAddrR]])
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TIInit)
+  store i32 %i, ptr %0, align 4
+  ret void
+}
+
+define i32 @loadTIUninit() {
+; SMALL-LABEL:  loadTIUninit:
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
+; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL:        bla .__tls_get_mod[PR]
+; SMALL:        add [[TIUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; SMALL:        lwz [[TIUninitValR:[0-9]+]], 0([[TIUninitAddrR]])
+;
+; LARGE-LABEL:  loadTIUninit:
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        add [[TIUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; LARGE:        lwz [[TIUninitValR:[0-9]+]], 0([[TIUninitAddrR]])
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TIUninit)
+  %1 = load i32, ptr %0, align 4
+  ret i32 %1
+}
+
+define void @storeTIUninit(i32 noundef signext %i) {
+; SMALL-LABEL:  storeTIUninit:
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
+; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL:        bla .__tls_get_mod[PR]
+; SMALL:        add [[TIUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; SMALL:        stw [[TIUninitValR:[0-9]+]], 0([[TIUninitAddrR]])
+;
+; LARGE-LABEL:  storeTIUninit:
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        add [[TIUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; LARGE:        stw [[TIUninitValR:[0-9]+]], 0([[TIUninitAddrR]])
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TIUninit)
+  store i32 %i, ptr %0, align 4
+  ret void
+}
+
+define i32 @loadTWInit() {
+; SMALL-LABEL:  loadTWInit:
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
+; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL:        bla .__tls_get_mod[PR]
+; SMALL:        add [[TWInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; SMALL:        lwz [[TWInitValR:[0-9]+]], 0([[TWInitAddrR]])
+;
+; LARGE-LABEL:  loadTWInit:
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        add [[TWInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; LARGE:        lwz [[TWInitValR:[0-9]+]], 0([[TWInitAddrR]])
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TWInit)
+  %1 = load i32, ptr %0, align 4
+  ret i32 %1
+}
+
+define void @storeTWInit(i32 noundef signext %i) {
+; SMALL-LABEL:  storeTWInit:
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
+; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL:        bla .__tls_get_mod[PR]
+; SMALL:        add [[TWInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; SMALL:        stw [[TWInitValR:[0-9]+]], 0([[TWInitAddrR]])
+;
+; LARGE-LABEL:  storeTWInit:
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        add [[TWInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; LARGE:        stw [[TWInitValR:[0-9]+]], 0([[TWInitAddrR]])
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TWInit)
+  store i32 %i, ptr %0, align 4
+  ret void
+}
+
+define i32 @loadTWUninit() {
+; SMALL-LABEL:  loadTWUninit:
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
+; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL:        bla .__tls_get_mod[PR]
+; SMALL:        add [[TWUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; SMALL:        lwz [[TWUninitValR:[0-9]+]], 0([[TWUninitAddrR]])
+;
+; LARGE-LABEL:  loadTWUninit:
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        add [[TWUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; LARGE:        lwz [[TWUninitValR:[0-9]+]], 0([[TWUninitAddrR]])
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TWUninit)
+  %1 = load i32, ptr %0, align 4
+  ret i32 %1
+}
+
+define void @storeTWUninit(i32 noundef signext %i) {
+; SMALL-LABEL:  storeTWUninit:
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
+; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL:        bla .__tls_get_mod[PR]
+; SMALL:        add [[TWUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; SMALL:        stw [[TWUninitValR:[0-9]+]], 0([[TWUninitAddrR]])
+;
+; LARGE-LABEL:  storeTWUninit:
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        add [[TWUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; LARGE:        stw [[TWUninitValR:[0-9]+]], 0([[TWUninitAddrR]])
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TWUninit)
+  store i32 %i, ptr %0, align 4
+  ret void
+}
+
+; SMALL:          .extern .__tls_get_mod[PR]
+; LARGE:          .extern .__tls_get_mod[PR]
+
+; SMALL:        [[TGInitL]]:
+; SMALL-NEXT:   .tc TGInit[TC],TGInit[TL]@ld
+; SMALL:        [[ModuleHandleL]]:
+; SMALL-NEXT:   .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; SMALL-NEXT:   .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; SMALL:        [[TGUninitL]]:
+; SMALL-NEXT:   .tc TGUninit[TC],TGUninit[TL]@ld
+; SMALL:        [[TIInitL]]:
+; SMALL-NEXT:   .tc TIInit[TC],TIInit[TL]@ld
+; SMALL:        [[TIUninitL]]:
+; SMALL-NEXT:   .tc TIUninit[TC],TIUninit[UL]@ld
+; SMALL:        [[TWInitL]]:
+; SMALL-NEXT:   .tc TWInit[TC],TWInit[TL]@ld
+; SMALL:        [[TWUninitL]]:
+; SMALL-NEXT:   .tc TWUninit[TC],TWUninit[TL]@ld
+
+; LARGE:        [[TGInitL]]:
+; LARGE-NEXT:   .tc TGInit[TE],TGInit[TL]@ld
+; LARGE:        [[ModuleHandleL]]:
+; LARGE-NEXT:   .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; LARGE-NEXT:   .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; LARGE:        [[TGUninitL]]:
+; LARGE-NEXT:   .tc TGUninit[TE],TGUninit[TL]@ld
+; LARGE:        [[TIInitL]]:
+; LARGE-NEXT:   .tc TIInit[TE],TIInit[TL]@ld
+; LARGE:        [[TIUninitL]]:
+; LARGE-NEXT:   .tc TIUninit[TE],TIUninit[UL]@ld
+; LARGE:        [[TWInitL]]:
+; LARGE-NEXT:   .tc TWInit[TE],TWInit[TL]@ld
+; LARGE:        [[TWUninitL]]:
+; LARGE-NEXT:   .tc TWUninit[TE],TWUninit[TL]@ld
+
+declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull)
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll
index 2dae8ee96e20d19..3e9f28965bbe3d6 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll
@@ -5,6 +5,7 @@
 ; RUN: llvm-objdump -D -r --symbol-description %t.o | FileCheck --check-prefix=DIS %s
 
 @GInit = global double 1.000000e+00, align 8
+; @TIInit is local-dynamic indeed
 @TIInit = internal thread_local global i64 1, align 8
 @TWInit = weak thread_local global double 1.000000e+00, align 8
 
@@ -32,7 +33,7 @@ entry:
 ; RELOC-NEXT:   Section (index: 1) .text {
 ; RELOC-NEXT:   Relocation {
 ; RELOC-NEXT:     Virtual Address: 0x16
-; RELOC-NEXT:     Symbol: .TIInit (17)
+; RELOC-NEXT:     Symbol: TIInit (19)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
@@ -44,19 +45,19 @@ entry:
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
-; RELOC-NEXT:     Type: R_TOCU (0x30)
+; RELOC-NEXT:     Type: R_TOCL (0x31)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
 ; RELOC-NEXT:     Virtual Address: 0x1E
-; RELOC-NEXT:     Symbol: .TIInit (17)
+; RELOC-NEXT:     Symbol: _$TLSML (21)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
-; RELOC-NEXT:     Type: R_TOCL (0x31)
+; RELOC-NEXT:     Type: R_TOCU (0x30)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
 ; RELOC-NEXT:     Virtual Address: 0x22
-; RELOC-NEXT:     Symbol: TIInit (19)
+; RELOC-NEXT:     Symbol: _$TLSML (21)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
@@ -64,63 +65,63 @@ entry:
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
 ; RELOC-NEXT:     Virtual Address: 0x24
-; RELOC-NEXT:     Symbol: .__tls_get_addr (1)
+; RELOC-NEXT:     Symbol: .__tls_get_mod (1)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 26
 ; RELOC-NEXT:     Type: R_RBA (0x18)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x4E
-; RELOC-NEXT:     Symbol: .TWInit (21)
+; RELOC-NEXT:     Virtual Address: 0x5E
+; RELOC-NEXT:     Symbol: .TWInit (23)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
 ; RELOC-NEXT:     Type: R_TOCU (0x30)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x52
-; RELOC-NEXT:     Symbol: TWInit (23)
+; RELOC-NEXT:     Virtual Address: 0x62
+; RELOC-NEXT:     Symbol: TWInit (25)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
 ; RELOC-NEXT:     Type: R_TOCU (0x30)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x56
-; RELOC-NEXT:     Symbol: .TWInit (21)
+; RELOC-NEXT:     Virtual Address: 0x66
+; RELOC-NEXT:     Symbol: .TWInit (23)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
 ; RELOC-NEXT:     Type: R_TOCL (0x31)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x5A
-; RELOC-NEXT:     Symbol: TWInit (23)
+; RELOC-NEXT:     Virtual Address: 0x6A
+; RELOC-NEXT:     Symbol: TWInit (25)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
 ; RELOC-NEXT:     Type: R_TOCL (0x31)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x5C
-; RELOC-NEXT:     Symbol: .__tls_get_addr (1)
+; RELOC-NEXT:     Virtual Address: 0x6C
+; RELOC-NEXT:     Symbol: .__tls_get_addr (3)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 26
 ; RELOC-NEXT:     Type: R_RBA (0x18)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x66
-; RELOC-NEXT:     Symbol: GInit (25)
+; RELOC-NEXT:     Virtual Address: 0x76
+; RELOC-NEXT:     Symbol: GInit (27)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
 ; RELOC-NEXT:     Type: R_TOCU (0x30)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x6A
-; RELOC-NEXT:     Symbol: GInit (25)
+; RELOC-NEXT:     Virtual Address: 0x7A
+; RELOC-NEXT:     Symbol: GInit (27)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
@@ -129,72 +130,72 @@ entry:
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Section (index: 2) .data {
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x90
-; RELOC-NEXT:   Symbol: .storesTIInit (5)
+; RELOC-NEXT:   Virtual Address: 0xA0
+; RELOC-NEXT:   Symbol: .storesTIInit (7)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x94
-; RELOC-NEXT:   Symbol: TOC (15)
+; RELOC-NEXT:   Virtual Address: 0xA4
+; RELOC-NEXT:   Symbol: TOC (17)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x9C
-; RELOC-NEXT:   Symbol: .loadsTWInit (7)
+; RELOC-NEXT:   Virtual Address: 0xAC
+; RELOC-NEXT:   Symbol: .loadsTWInit (9)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xA0
-; RELOC-NEXT:   Symbol: TOC (15)
+; RELOC-NEXT:   Virtual Address: 0xB0
+; RELOC-NEXT:   Symbol: TOC (17)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xA8
-; RELOC-NEXT:   Symbol: TIInit (27)
+; RELOC-NEXT:   Virtual Address: 0xB8
+; RELOC-NEXT:   Symbol: TIInit (29)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
-; RELOC-NEXT:   Type: R_TLSM (0x24)
+; RELOC-NEXT:   Type: R_TLS_LD (0x22)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xAC
-; RELOC-NEXT:   Symbol: TIInit (27)
+; RELOC-NEXT:   Virtual Address: 0xBC
+; RELOC-NEXT:   Symbol: _$TLSML (21)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
-; RELOC-NEXT:   Type: R_TLS (0x20)
+; RELOC-NEXT:   Type: R_TLSML (0x25)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xB0
-; RELOC-NEXT:   Symbol: TWInit (29)
+; RELOC-NEXT:   Virtual Address: 0xC0
+; RELOC-NEXT:   Symbol: TWInit (31)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
 ; RELOC-NEXT:   Type: R_TLSM (0x24)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xB4
-; RELOC-NEXT:   Symbol: TWInit (29)
+; RELOC-NEXT:   Virtual Address: 0xC4
+; RELOC-NEXT:   Symbol: TWInit (31)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
 ; RELOC-NEXT:   Type: R_TLS (0x20)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xB8
-; RELOC-NEXT:   Symbol: GInit (9)
+; RELOC-NEXT:   Virtual Address: 0xC8
+; RELOC-NEXT:   Symbol: GInit (11)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
@@ -220,7 +221,7 @@ entry:
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 1
-; SYM-NEXT:     Name: .__tls_get_addr
+; SYM-NEXT:     Name: .__tls_get_mod
 ; SYM-NEXT:     Value (RelocatableAddress): 0x0
 ; SYM-NEXT:     Section: N_UNDEF
 ; SYM-NEXT:     Type: 0x0
@@ -240,15 +241,35 @@ entry:
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 3
-; SYM-NEXT:     Name:
+; SYM-NEXT:     Name: .__tls_get_addr
+; SYM-NEXT:     Value (RelocatableAddress): 0x0
+; SYM-NEXT:     Section: N_UNDEF
+; SYM-NEXT:     Type: 0x0
+; SYM-NEXT:     StorageClass: C_EXT (0x2)
+; SYM-NEXT:     NumberOfAuxEntries: 1
+; SYM-NEXT:     CSECT Auxiliary Entry {
+; SYM-NEXT:       Index: 4
+; SYM-NEXT:       SectionLen: 0
+; SYM-NEXT:       ParameterHashIndex: 0x0
+; SYM-NEXT:       TypeChkSectNum: 0x0
+; SYM-NEXT:       SymbolAlignmentLog2: 0
+; SYM-NEXT:       SymbolType: XTY_ER (0x0)
+; SYM-NEXT:       StorageMappingClass: XMC_PR (0x0)
+; SYM-NEXT:       StabInfoIndex: 0x0
+; SYM-NEXT:       StabSectNum: 0x0
+; SYM-NEXT:     }
+; SYM-NEXT:   }
+; SYM-NEXT:   Symbol {
+; SYM-NEXT:     Index: 5
+; SYM-NEXT:     Name: 
 ; SYM-NEXT:     Value (RelocatableAddress): 0x0
 ; SYM-NEXT:     Section: .text
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 4
-; SYM-NEXT:       SectionLen: 132
+; SYM-NEXT:       Index: 6
+; SYM-NEXT:       SectionLen: 148
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 5
@@ -259,7 +280,7 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 5
+; SYM-NEXT:     Index: 7
 ; SYM-NEXT:     Name: .storesTIInit
 ; SYM-NEXT:     Value (RelocatableAddress): 0x0
 ; SYM-NEXT:     Section: .text
@@ -267,8 +288,8 @@ entry:
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 6
-; SYM-NEXT:       ContainingCsectSymbolIndex: 3
+; SYM-NEXT:       Index: 8
+; SYM-NEXT:       ContainingCsectSymbolIndex: 5
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 0
@@ -279,16 +300,16 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 7
+; SYM-NEXT:     Index: 9
 ; SYM-NEXT:     Name: .loadsTWInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x40
+; SYM-NEXT:     Value (RelocatableAddress): 0x50
 ; SYM-NEXT:     Section: .text
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 8
-; SYM-NEXT:       ContainingCsectSymbolIndex: 3
+; SYM-NEXT:       Index: 10
+; SYM-NEXT:       ContainingCsectSymbolIndex: 5
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 0
@@ -299,15 +320,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 9
+; SYM-NEXT:     Index: 11
 ; SYM-NEXT:     Name: GInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x88
+; SYM-NEXT:     Value (RelocatableAddress): 0x98
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 10
+; SYM-NEXT:       Index: 12
 ; SYM-NEXT:       SectionLen: 8
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -319,15 +340,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 11
+; SYM-NEXT:     Index: 13
 ; SYM-NEXT:     Name: storesTIInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x90
+; SYM-NEXT:     Value (RelocatableAddress): 0xA0
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 12
+; SYM-NEXT:       Index: 14
 ; SYM-NEXT:       SectionLen: 12
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -339,15 +360,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 13
+; SYM-NEXT:     Index: 15
 ; SYM-NEXT:     Name: loadsTWInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x9C
+; SYM-NEXT:     Value (RelocatableAddress): 0xAC
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 14
+; SYM-NEXT:       Index: 16
 ; SYM-NEXT:       SectionLen: 12
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -359,15 +380,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 15
+; SYM-NEXT:     Index: 17
 ; SYM-NEXT:     Name: TOC
-; SYM-NEXT:     Value (RelocatableAddress): 0xA8
+; SYM-NEXT:     Value (RelocatableAddress): 0xB8
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 16
+; SYM-NEXT:       Index: 18
 ; SYM-NEXT:       SectionLen: 0
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -379,15 +400,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 17
-; SYM-NEXT:     Name: .TIInit
-; SYM-NEXT:     Value (RelocatableAddress): 0xA8
+; SYM-NEXT:     Index: 19
+; SYM-NEXT:     Name: TIInit
+; SYM-NEXT:     Value (RelocatableAddress): 0xB8
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 18
+; SYM-NEXT:       Index: 20
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -399,35 +420,35 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 19
-; SYM-NEXT:     Name: TIInit
-; SYM-NEXT:     Value (RelocatableAddress): 0xAC
+; SYM-NEXT:     Index: 21
+; SYM-NEXT:     Name: _$TLSML
+; SYM-NEXT:     Value (RelocatableAddress): 0xBC
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 20
+; SYM-NEXT:       Index: 22
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 2
 ; SYM-NEXT:       SymbolType: XTY_SD (0x1)
-; SYM-NEXT:       StorageMappingClass: XMC_TE (0x16)
+; SYM-NEXT:       StorageMappingClass: XMC_TC (0x3)
 ; SYM-NEXT:       StabInfoIndex: 0x0
 ; SYM-NEXT:       StabSectNum: 0x0
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 21
+; SYM-NEXT:     Index: 23
 ; SYM-NEXT:     Name: .TWInit
-; SYM-NEXT:     Value (RelocatableAddress): 0xB0
+; SYM-NEXT:     Value (RelocatableAddress): 0xC0
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 22
+; SYM-NEXT:       Index: 24
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -439,15 +460,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 23
+; SYM-NEXT:     Index: 25
 ; SYM-NEXT:     Name: TWInit
-; SYM-NEXT:     Value (RelocatableAddress): 0xB4
+; SYM-NEXT:     Value (RelocatableAddress): 0xC4
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 24
+; SYM-NEXT:       Index: 26
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -459,15 +480,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 25
+; SYM-NEXT:     Index: 27
 ; SYM-NEXT:     Name: GInit
-; SYM-NEXT:     Value (RelocatableAddress): 0xB8
+; SYM-NEXT:     Value (RelocatableAddress): 0xC8
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 26
+; SYM-NEXT:       Index: 28
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -479,7 +500,7 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 27
+; SYM-NEXT:     Index: 29
 ; SYM-NEXT:     Name: TIInit
 ; SYM-NEXT:     Value (RelocatableAddress): 0x0
 ; SYM-NEXT:     Section: .tdata
@@ -487,7 +508,7 @@ entry:
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 28
+; SYM-NEXT:       Index: 30
 ; SYM-NEXT:       SectionLen: 8
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -499,7 +520,7 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 29
+; SYM-NEXT:     Index: 31
 ; SYM-NEXT:     Name: TWInit
 ; SYM-NEXT:     Value (RelocatableAddress): 0x8
 ; SYM-NEXT:     Section: .tdata
@@ -507,7 +528,7 @@ entry:
 ; SYM-NEXT:     StorageClass: C_WEAKEXT (0x6F)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 30
+; SYM-NEXT:       Index: 32
 ; SYM-NEXT:       SectionLen: 8
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -522,47 +543,48 @@ entry:
 
 ; DIS:      {{.*}}aix-tls-xcoff-reloc-large.ll.tmp.o:	file format aixcoff-rs6000
 ; DIS:      Disassembly of section .text:
-; DIS:      00000000 (idx: 5) .storesTIInit:
+; DIS:      00000000 (idx: 7) .storesTIInit:
 ; DIS-NEXT:                                       mflr 0
 ; DIS-NEXT:                                       stwu 1, -32(1)
 ; DIS-NEXT:                                       stw 0, 40(1)
-; DIS-NEXT:                                       mr 6, 4
 ; DIS-NEXT:                                       mr 7, 3
+; DIS-NEXT:                                       mr 6, 4
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 17) .TIInit[TE]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 4, 2, 0
 ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 19) TIInit[TE]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 3, 0(3)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 17) .TIInit[TE]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 4, 4(4)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 8, 0(3)
 ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 19) TIInit[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 21) _$TLSML[TC]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 3, 4(3)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 21) _$TLSML[TC]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                bla 0
-; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA  (idx: 1)      .__tls_get_addr[PR]
+; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA  (idx: 1)      .__tls_get_mod[PR]
+; DIS-NEXT:                                       add 3, 3, 8
 ; DIS-NEXT:                                       stw 6, 4(3)
 ; DIS-NEXT:                                       stw 7, 0(3)
 ; DIS-NEXT:                                       addi 1, 1, 32
 ; DIS-NEXT:                                       lwz 0, 8(1)
 ; DIS-NEXT:                                       mtlr 0
 ; DIS-NEXT:                                       blr
-; DIS:      00000040 (idx: 7) .loadsTWInit:
+; DIS:      00000050 (idx: 9) .loadsTWInit:
 ; DIS-NEXT:                                       mflr 0
 ; DIS-NEXT:                                       stwu 1, -32(1)
 ; DIS-NEXT:                                       stw 0, 40(1)
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 21) .TWInit[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 23) .TWInit[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 4, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 23) TWInit[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 25) TWInit[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 3, 8(3)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 21) .TWInit[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 23) .TWInit[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 4, 12(4)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 23) TWInit[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 25) TWInit[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                bla 0
-; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA  (idx: 1)      .__tls_get_addr[PR]
+; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA  (idx: 3)      .__tls_get_addr[PR]
 ; DIS-NEXT:                                       lfd 0, 0(3)
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 25) GInit[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 27) GInit[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 3, 16(3)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 25) GInit[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 27) GInit[TE]
 ; DIS-NEXT:                                       lfd 1, 0(3)
 ; DIS-NEXT:                                       fadd 1, 0, 1
 ; DIS-NEXT:                                       addi 1, 1, 32
@@ -571,42 +593,42 @@ entry:
 ; DIS-NEXT:                                       blr
 
 ; DIS:      Disassembly of section .data:
-; DIS:      00000088  (idx: 9) GInit[RW]:
-; DIS-NEXT:       88: 3f f0 00 00
-; DIS-NEXT:       8c: 00 00 00 00
-; DIS:      00000090  (idx: 11) storesTIInit[DS]:
-; DIS-NEXT:       90: 00 00 00 00
-; DIS-NEXT: 00000090: R_POS (idx: 5) .storesTIInit
-; DIS-NEXT:       94: 00 00 00 a8
-; DIS-NEXT: 00000094: R_POS (idx: 15) TOC[TC0]
-; DIS-NEXT:       98: 00 00 00 00
-; DIS:      0000009c  (idx: 13) loadsTWInit[DS]:
-; DIS-NEXT:       9c: 00 00 00 40
-; DIS-NEXT: 0000009c: R_POS (idx: 7) .loadsTWInit
-; DIS-NEXT:       a0: 00 00 00 a8
-; DIS-NEXT: 000000a0: R_POS (idx: 15) TOC[TC0]
-; DIS-NEXT:       a4: 00 00 00 00
-; DIS:      000000a8  (idx: 17) .TIInit[TE]:
+; DIS:      00000098  (idx: 11) GInit[RW]:
+; DIS-NEXT:       98: 3f f0 00 00
+; DIS-NEXT:       9c: 00 00 00 00
+; DIS:      000000a0  (idx: 13) storesTIInit[DS]:
+; DIS-NEXT:       a0: 00 00 00 00
+; DIS-NEXT: 000000a0: R_POS (idx: 7) .storesTIInit
+; DIS-NEXT:       a4: 00 00 00 b8
+; DIS-NEXT: 000000a4: R_POS (idx: 17) TOC[TC0]
 ; DIS-NEXT:       a8: 00 00 00 00
-; DIS-NEXT: 000000a8: R_TLSM (idx: 27) TIInit[TL]
-; DIS:      000000ac  (idx: 19) TIInit[TE]:
-; DIS-NEXT:       ac: 00 00 00 00
-; DIS-NEXT: 000000ac: R_TLS (idx: 27) TIInit[TL]
-; DIS:      000000b0  (idx: 21) .TWInit[TE]:
-; DIS-NEXT:       b0: 00 00 00 00
-; DIS-NEXT: 000000b0: R_TLSM (idx: 29) TWInit[TL]
-; DIS:      000000b4  (idx: 23) TWInit[TE]:
-; DIS-NEXT:       b4: 00 00 00 08
-; DIS-NEXT: 000000b4: R_TLS (idx: 29) TWInit[TL]
-; DIS:      000000b8  (idx: 25) GInit[TE]:
-; DIS-NEXT:       b8: 00 00 00 88
-; DIS-NEXT: 000000b8: R_POS (idx: 9) GInit[RW]
+; DIS:      000000ac  (idx: 15) loadsTWInit[DS]:
+; DIS-NEXT:       ac: 00 00 00 50
+; DIS-NEXT: 000000ac: R_POS (idx: 9) .loadsTWInit
+; DIS-NEXT:       b0: 00 00 00 b8
+; DIS-NEXT: 000000b0: R_POS (idx: 17) TOC[TC0]
+; DIS-NEXT:       b4: 00 00 00 00
+; DIS:      000000b8  (idx: 19) TIInit[TE]:
+; DIS-NEXT:       b8: 00 00 00 00
+; DIS-NEXT: 000000b8: R_TLS_LD (idx: 29) TIInit[TL]
+; DIS:      000000bc  (idx: 21) _$TLSML[TC]:
+; DIS-NEXT:       bc: 00 00 00 00
+; DIS-NEXT: 000000bc: R_TLSML (idx: 21) _$TLSML[TC]
+; DIS:      000000c0  (idx: 23) .TWInit[TE]:
+; DIS-NEXT:       c0: 00 00 00 00
+; DIS-NEXT: 000000c0: R_TLSM (idx: 31) TWInit[TL]
+; DIS:      000000c4  (idx: 25) TWInit[TE]:
+; DIS-NEXT:       c4: 00 00 00 08
+; DIS-NEXT: 000000c4: R_TLS (idx: 31) TWInit[TL]
+; DIS:      000000c8  (idx: 27) GInit[TE]:
+; DIS-NEXT:       c8: 00 00 00 98
+; DIS-NEXT: 000000c8: R_POS (idx: 11) GInit[RW]
 
 ; DIS:      Disassembly of section .tdata:
-; DIS:      00000000  (idx: 27) TIInit[TL]:
+; DIS:      00000000  (idx: 29) TIInit[TL]:
 ; DIS-NEXT:        0: 00 00 00 00
 ; DIS-NEXT:        4: 00 00 00 01
-; DIS:      00000008  (idx: 29) TWInit[TL]:
+; DIS:      00000008  (idx: 31) TWInit[TL]:
 ; DIS-NEXT:        8: 3f f0 00 00
 ; DIS-NEXT:        c: 00 00 00 00
 
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
index 0779686b54f3ac0..ae1dae7955914b9 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
@@ -7,6 +7,7 @@
 @const_ivar = constant i32 6, align 4
 @GInit = global i32 1, align 4
 @TGInit = thread_local global i32 1, align 4
+; @TIUninit is local-dynamic indeed
 @TIUninit = internal thread_local global i32 0, align 4
 
 ; Function Attrs: nofree norecurse nounwind willreturn writeonly
@@ -32,16 +33,16 @@ entry:
 ; RELOC-NEXT: Relocations [
 ; RELOC-NEXT:   Section (index: 1) .text {
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0xE
-; RELOC-NEXT:     Symbol: .TIUninit (23)
+; RELOC-NEXT:     Virtual Address: 0x12
+; RELOC-NEXT:     Symbol: TIUninit (25)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
 ; RELOC-NEXT:     Type: R_TOC (0x3)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x12
-; RELOC-NEXT:     Symbol: TIUninit (25)
+; RELOC-NEXT:     Virtual Address: 0x16
+; RELOC-NEXT:     Symbol: _$TLSML (27)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
@@ -49,39 +50,39 @@ entry:
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
 ; RELOC-NEXT:     Virtual Address: 0x18
-; RELOC-NEXT:     Symbol: .__tls_get_addr (1)
+; RELOC-NEXT:     Symbol: .__tls_get_mod (1)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 26
 ; RELOC-NEXT:     Type: R_RBA (0x18)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x3A
-; RELOC-NEXT:     Symbol: .TGInit (27)
+; RELOC-NEXT:     Virtual Address: 0x4A
+; RELOC-NEXT:     Symbol: .TGInit (29)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
 ; RELOC-NEXT:     Type: R_TOC (0x3)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x3E
-; RELOC-NEXT:     Symbol: TGInit (29)
+; RELOC-NEXT:     Virtual Address: 0x4E
+; RELOC-NEXT:     Symbol: TGInit (31)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
 ; RELOC-NEXT:     Type: R_TOC (0x3)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x44
-; RELOC-NEXT:     Symbol: .__tls_get_addr (1)
+; RELOC-NEXT:     Virtual Address: 0x54
+; RELOC-NEXT:     Symbol: .__tls_get_addr (3)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 26
 ; RELOC-NEXT:     Type: R_RBA (0x18)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x4A
-; RELOC-NEXT:     Symbol: GInit (31)
+; RELOC-NEXT:     Virtual Address: 0x5A
+; RELOC-NEXT:     Symbol: GInit (33)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
@@ -90,72 +91,72 @@ entry:
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Section (index: 2) .data {
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x70
-; RELOC-NEXT:   Symbol: .storesTIUninit (5)
+; RELOC-NEXT:   Virtual Address: 0x80
+; RELOC-NEXT:   Symbol: .storesTIUninit (7)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x74
-; RELOC-NEXT:   Symbol: TOC (21)
+; RELOC-NEXT:   Virtual Address: 0x84
+; RELOC-NEXT:   Symbol: TOC (23)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x7C
-; RELOC-NEXT:   Symbol: .loadsTGInit (7)
+; RELOC-NEXT:   Virtual Address: 0x8C
+; RELOC-NEXT:   Symbol: .loadsTGInit (9)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x80
-; RELOC-NEXT:   Symbol: TOC (21)
+; RELOC-NEXT:   Virtual Address: 0x90
+; RELOC-NEXT:   Symbol: TOC (23)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x88
-; RELOC-NEXT:   Symbol: TIUninit (37)
+; RELOC-NEXT:   Virtual Address: 0x98
+; RELOC-NEXT:   Symbol: TIUninit (39)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
-; RELOC-NEXT:   Type: R_TLSM (0x24)
+; RELOC-NEXT:   Type: R_TLS_LD (0x22)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x8C
-; RELOC-NEXT:   Symbol: TIUninit (37)
+; RELOC-NEXT:   Virtual Address: 0x9C
+; RELOC-NEXT:   Symbol: _$TLSML (27)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
-; RELOC-NEXT:   Type: R_TLS (0x20)
+; RELOC-NEXT:   Type: R_TLSML (0x25)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x90
-; RELOC-NEXT:   Symbol: TGInit (35)
+; RELOC-NEXT:   Virtual Address: 0xA0
+; RELOC-NEXT:   Symbol: TGInit (37)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
 ; RELOC-NEXT:   Type: R_TLSM (0x24)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x94
-; RELOC-NEXT:   Symbol: TGInit (35)
+; RELOC-NEXT:   Virtual Address: 0xA4
+; RELOC-NEXT:   Symbol: TGInit (37)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
 ; RELOC-NEXT:   Type: R_TLS (0x20)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x98
-; RELOC-NEXT:   Symbol: GInit (15)
+; RELOC-NEXT:   Virtual Address: 0xA8
+; RELOC-NEXT:   Symbol: GInit (17)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
@@ -181,7 +182,7 @@ entry:
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 1
-; SYM-NEXT:     Name: .__tls_get_addr
+; SYM-NEXT:     Name: .__tls_get_mod
 ; SYM-NEXT:     Value (RelocatableAddress): 0x0
 ; SYM-NEXT:     Section: N_UNDEF
 ; SYM-NEXT:     Type: 0x0
@@ -201,15 +202,35 @@ entry:
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 3
-; SYM-NEXT:     Name:
+; SYM-NEXT:     Name: .__tls_get_addr
+; SYM-NEXT:     Value (RelocatableAddress): 0x0
+; SYM-NEXT:     Section: N_UNDEF
+; SYM-NEXT:     Type: 0x0
+; SYM-NEXT:     StorageClass: C_EXT (0x2)
+; SYM-NEXT:     NumberOfAuxEntries: 1
+; SYM-NEXT:     CSECT Auxiliary Entry {
+; SYM-NEXT:       Index: 4
+; SYM-NEXT:       SectionLen: 0
+; SYM-NEXT:       ParameterHashIndex: 0x0
+; SYM-NEXT:       TypeChkSectNum: 0x0
+; SYM-NEXT:       SymbolAlignmentLog2: 0
+; SYM-NEXT:       SymbolType: XTY_ER (0x0)
+; SYM-NEXT:       StorageMappingClass: XMC_PR (0x0)
+; SYM-NEXT:       StabInfoIndex: 0x0
+; SYM-NEXT:       StabSectNum: 0x0
+; SYM-NEXT:     }
+; SYM-NEXT:   }
+; SYM-NEXT:   Symbol {
+; SYM-NEXT:     Index: 5
+; SYM-NEXT:     Name: 
 ; SYM-NEXT:     Value (RelocatableAddress): 0x0
 ; SYM-NEXT:     Section: .text
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 4
-; SYM-NEXT:       SectionLen: 104
+; SYM-NEXT:       Index: 6
+; SYM-NEXT:       SectionLen: 120
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 5
@@ -220,7 +241,7 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 5
+; SYM-NEXT:     Index: 7
 ; SYM-NEXT:     Name: .storesTIUninit
 ; SYM-NEXT:     Value (RelocatableAddress): 0x0
 ; SYM-NEXT:     Section: .text
@@ -228,8 +249,8 @@ entry:
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 6
-; SYM-NEXT:       ContainingCsectSymbolIndex: 3
+; SYM-NEXT:       Index: 8
+; SYM-NEXT:       ContainingCsectSymbolIndex: 5
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 0
@@ -240,16 +261,16 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 7
+; SYM-NEXT:     Index: 9
 ; SYM-NEXT:     Name: .loadsTGInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x30
+; SYM-NEXT:     Value (RelocatableAddress): 0x40
 ; SYM-NEXT:     Section: .text
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 8
-; SYM-NEXT:       ContainingCsectSymbolIndex: 3
+; SYM-NEXT:       Index: 10
+; SYM-NEXT:       ContainingCsectSymbolIndex: 5
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 0
@@ -260,15 +281,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 9
+; SYM-NEXT:     Index: 11
 ; SYM-NEXT:     Name: .rodata
-; SYM-NEXT:     Value (RelocatableAddress): 0x68
+; SYM-NEXT:     Value (RelocatableAddress): 0x78
 ; SYM-NEXT:     Section: .text
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 10
+; SYM-NEXT:       Index: 12
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -280,16 +301,16 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 11
+; SYM-NEXT:     Index: 13
 ; SYM-NEXT:     Name: const_ivar
-; SYM-NEXT:     Value (RelocatableAddress): 0x68
+; SYM-NEXT:     Value (RelocatableAddress): 0x78
 ; SYM-NEXT:     Section: .text
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 12
-; SYM-NEXT:       ContainingCsectSymbolIndex: 9
+; SYM-NEXT:       Index: 14
+; SYM-NEXT:       ContainingCsectSymbolIndex: 11
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 0
@@ -300,15 +321,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 13
+; SYM-NEXT:     Index: 15
 ; SYM-NEXT:     Name: .data
-; SYM-NEXT:     Value (RelocatableAddress): 0x6C
+; SYM-NEXT:     Value (RelocatableAddress): 0x7C
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 14
+; SYM-NEXT:       Index: 16
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -320,16 +341,16 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 15
+; SYM-NEXT:     Index: 17
 ; SYM-NEXT:     Name: GInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x6C
+; SYM-NEXT:     Value (RelocatableAddress): 0x7C
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 16
-; SYM-NEXT:       ContainingCsectSymbolIndex: 13
+; SYM-NEXT:       Index: 18
+; SYM-NEXT:       ContainingCsectSymbolIndex: 15
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 0
@@ -340,15 +361,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 17
+; SYM-NEXT:     Index: 19
 ; SYM-NEXT:     Name: storesTIUninit
-; SYM-NEXT:     Value (RelocatableAddress): 0x70
+; SYM-NEXT:     Value (RelocatableAddress): 0x80
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 18
+; SYM-NEXT:       Index: 20
 ; SYM-NEXT:       SectionLen: 12
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -360,15 +381,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 19
+; SYM-NEXT:     Index: 21
 ; SYM-NEXT:     Name: loadsTGInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x7C
+; SYM-NEXT:     Value (RelocatableAddress): 0x8C
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 20
+; SYM-NEXT:       Index: 22
 ; SYM-NEXT:       SectionLen: 12
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -380,15 +401,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 21
+; SYM-NEXT:     Index: 23
 ; SYM-NEXT:     Name: TOC
-; SYM-NEXT:     Value (RelocatableAddress): 0x88
+; SYM-NEXT:     Value (RelocatableAddress): 0x98
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 22
+; SYM-NEXT:       Index: 24
 ; SYM-NEXT:       SectionLen: 0
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -400,15 +421,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 23
-; SYM-NEXT:     Name: .TIUninit
-; SYM-NEXT:     Value (RelocatableAddress): 0x88
+; SYM-NEXT:     Index: 25
+; SYM-NEXT:     Name: TIUninit
+; SYM-NEXT:     Value (RelocatableAddress): 0x98
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 24
+; SYM-NEXT:       Index: 26
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -420,15 +441,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 25
-; SYM-NEXT:     Name: TIUninit
-; SYM-NEXT:     Value (RelocatableAddress): 0x8C
+; SYM-NEXT:     Index: 27
+; SYM-NEXT:     Name: _$TLSML
+; SYM-NEXT:     Value (RelocatableAddress): 0x9C
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 26
+; SYM-NEXT:       Index: 28
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -440,15 +461,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 27
+; SYM-NEXT:     Index: 29
 ; SYM-NEXT:     Name: .TGInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x90
+; SYM-NEXT:     Value (RelocatableAddress): 0xA0
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 28
+; SYM-NEXT:       Index: 30
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -460,15 +481,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 29
+; SYM-NEXT:     Index: 31
 ; SYM-NEXT:     Name: TGInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x94
+; SYM-NEXT:     Value (RelocatableAddress): 0xA4
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 30
+; SYM-NEXT:       Index: 32
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -480,15 +501,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 31
+; SYM-NEXT:     Index: 33
 ; SYM-NEXT:     Name: GInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x98
+; SYM-NEXT:     Value (RelocatableAddress): 0xA8
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 32
+; SYM-NEXT:       Index: 34
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -500,7 +521,7 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 33
+; SYM-NEXT:     Index: 35
 ; SYM-NEXT:     Name: .tdata
 ; SYM-NEXT:     Value (RelocatableAddress): 0x0
 ; SYM-NEXT:     Section: .tdata
@@ -508,7 +529,7 @@ entry:
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 34
+; SYM-NEXT:       Index: 36
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -520,7 +541,7 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 35
+; SYM-NEXT:     Index: 37
 ; SYM-NEXT:     Name: TGInit
 ; SYM-NEXT:     Value (RelocatableAddress): 0x0
 ; SYM-NEXT:     Section: .tdata
@@ -528,8 +549,8 @@ entry:
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 36
-; SYM-NEXT:       ContainingCsectSymbolIndex: 33
+; SYM-NEXT:       Index: 38
+; SYM-NEXT:       ContainingCsectSymbolIndex: 35
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 0
@@ -540,7 +561,7 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 37
+; SYM-NEXT:     Index: 39
 ; SYM-NEXT:     Name: TIUninit
 ; SYM-NEXT:     Value (RelocatableAddress): 0x4
 ; SYM-NEXT:     Section: .tbss
@@ -548,7 +569,7 @@ entry:
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 38
+; SYM-NEXT:       Index: 40
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -563,34 +584,35 @@ entry:
 
 ; DIS:      {{.*}}aix-tls-xcoff-reloc.ll.tmp.o:	file format aixcoff-rs6000
 ; DIS:      Disassembly of section .text:
-; DIS:      00000000 (idx: 5) .storesTIUninit:
+; DIS:      00000000 (idx: 7) .storesTIUninit:
 ; DIS-NEXT:                                      mflr 0
 ; DIS-NEXT:                                      stwu 1, -32(1)
+; DIS-NEXT:                                      stw 0, 40(1)
 ; DIS-NEXT:                                      mr 6, 3
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 3, 0(2)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 23) .TIUninit[TC]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 4, 4(2)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 7, 0(2)
 ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 25) TIUninit[TC]
-; DIS-NEXT:                                      stw 0, 40(1)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 3, 4(2)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 27) _$TLSML[TC]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               bla 0
-; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA (idx: 1)      .__tls_get_addr[PR]
+; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA (idx: 1)      .__tls_get_mod[PR]
+; DIS-NEXT:                                      add 3, 3, 7
 ; DIS-NEXT:                                      stw 6, 0(3)
 ; DIS-NEXT:                                      addi 1, 1, 32
 ; DIS-NEXT:                                      lwz 0, 8(1)
 ; DIS-NEXT:                                      mtlr 0
 ; DIS-NEXT:                                      blr
-; DIS:      00000030 (idx: 7) .loadsTGInit:
+; DIS:      00000040 (idx: 9) .loadsTGInit:
 ; DIS-NEXT:                                      mflr 0
 ; DIS-NEXT:                                      stwu 1, -32(1)
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 3, 8(2)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 27) .TGInit[TC]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 29) .TGInit[TC]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 4, 12(2)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 29) TGInit[TC]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 31) TGInit[TC]
 ; DIS-NEXT:                                      stw 0, 40(1)
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               bla 0
-; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA (idx: 1)      .__tls_get_addr[PR]
+; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA (idx: 3)      .__tls_get_addr[PR]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 4, 16(2)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 31) GInit[TC]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 33) GInit[TC]
 ; DIS-NEXT:                                      lwz 3, 0(3)
 ; DIS-NEXT:                                      lwz 4, 0(4)
 ; DIS-NEXT:                                      add 3, 4, 3
@@ -598,46 +620,46 @@ entry:
 ; DIS-NEXT:                                      lwz 0, 8(1)
 ; DIS-NEXT:                                      mtlr 0
 ; DIS-NEXT:                                      blr
-; DIS:      00000068 (idx: 11) const_ivar:
-; DIS-NEXT:       68: 00 00 00 06
+; DIS:      00000078 (idx: 13) const_ivar:
+; DIS-NEXT:       78: 00 00 00 06
 
 ; DIS:      Disassembly of section .data:
-; DIS:      0000006c  (idx: 15) GInit:
-; DIS-NEXT:       6c: 00 00 00 01
-; DIS:      00000070  (idx: 17) storesTIUninit[DS]:
-; DIS-NEXT:       70: 00 00 00 00
-; DIS-NEXT: 00000070: R_POS (idx: 5) .storesTIUninit
-; DIS-NEXT:       74: 00 00 00 88
-; DIS-NEXT: 00000074: R_POS (idx: 21) TOC[TC0]
-; DIS-NEXT:       78: 00 00 00 00
-; DIS:      0000007c  (idx: 19) loadsTGInit[DS]:
-; DIS-NEXT:       7c: 00 00 00 30
-; DIS-NEXT: 0000007c: R_POS (idx: 7) .loadsTGInit
-; DIS-NEXT:       80: 00 00 00 88
-; DIS-NEXT: 00000080: R_POS (idx: 21) TOC[TC0]
-; DIS-NEXT:       84: 00 00 00 00
-; DIS:      00000088  (idx: 23) .TIUninit[TC]:
+; DIS:      0000007c  (idx: 17) GInit:
+; DIS-NEXT:       7c: 00 00 00 01
+; DIS:      00000080  (idx: 19) storesTIUninit[DS]:
+; DIS-NEXT:       80: 00 00 00 00
+; DIS-NEXT: 00000080: R_POS (idx: 7) .storesTIUninit
+; DIS-NEXT:       84: 00 00 00 98
+; DIS-NEXT: 00000084: R_POS (idx: 23) TOC[TC0]
 ; DIS-NEXT:       88: 00 00 00 00
-; DIS-NEXT: 00000088: R_TLSM (idx: 37) TIUninit[UL]
-; DIS:      0000008c  (idx: 25) TIUninit[TC]:
-; DIS-NEXT:       8c: 00 00 00 04
-; DIS-NEXT: 0000008c: R_TLS (idx: 37) TIUninit[UL]
-; DIS:      00000090  (idx: 27) .TGInit[TC]:
-; DIS-NEXT:       90: 00 00 00 00
-; DIS-NEXT: 00000090: R_TLSM (idx: 35) TGInit
-; DIS:      00000094  (idx: 29) TGInit[TC]:
+; DIS:      0000008c  (idx: 21) loadsTGInit[DS]:
+; DIS-NEXT:       8c: 00 00 00 40
+; DIS-NEXT: 0000008c: R_POS (idx: 9) .loadsTGInit
+; DIS-NEXT:       90: 00 00 00 98
+; DIS-NEXT: 00000090: R_POS (idx: 23) TOC[TC0]
 ; DIS-NEXT:       94: 00 00 00 00
-; DIS-NEXT: 00000094: R_TLS (idx: 35) TGInit
-; DIS:      00000098  (idx: 31) GInit[TC]:
-; DIS-NEXT:       98: 00 00 00 6c
-; DIS-NEXT: 00000098: R_POS (idx: 15) GInit
+; DIS:      00000098  (idx: 25) TIUninit[TC]:
+; DIS-NEXT:       98: 00 00 00 00
+; DIS-NEXT: 00000098: R_TLS_LD (idx: 39) TIUninit[UL]
+; DIS:      0000009c  (idx: 27) _$TLSML[TC]:
+; DIS-NEXT:       9c: 00 00 00 00
+; DIS-NEXT: 0000009c: R_TLSML (idx: 27) _$TLSML[TC]
+; DIS:      000000a0  (idx: 29) .TGInit[TC]:
+; DIS-NEXT:       a0: 00 00 00 00
+; DIS-NEXT: 000000a0: R_TLSM (idx: 37) TGInit
+; DIS:      000000a4  (idx: 31) TGInit[TC]:
+; DIS-NEXT:       a4: 00 00 00 00
+; DIS-NEXT: 000000a4: R_TLS (idx: 37) TGInit
+; DIS:      000000a8  (idx: 33) GInit[TC]:
+; DIS-NEXT:       a8: 00 00 00 7c
+; DIS-NEXT: 000000a8: R_POS (idx: 17) GInit
 
 ; DIS:      Disassembly of section .tdata:
-; DIS:      00000000 (idx: 35) TGInit:
+; DIS:      00000000 (idx: 37) TGInit:
 ; DIS-NEXT:        0: 00 00 00 01
 
 ; DIS:      Disassembly of section .tbss:
-; DIS:      00000004 (idx: 37) TIUninit[UL]:
+; DIS:      00000004 (idx: 39) TIUninit[UL]:
 ; DIS-NEXT: ...
 
 attributes #0 = { nofree norecurse nounwind willreturn writeonly "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-spe,-vsx" }

>From 5426d5c9698024087a70b318c017c2a4ee736aa5 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Thu, 14 Sep 2023 03:46:38 -0400
Subject: [PATCH 2/4] address comments

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   |  4 ++--
 llvm/lib/Target/PowerPC/PPCISelLowering.h     | 12 ++++++------
 llvm/lib/Target/PowerPC/PPCInstrInfo.td       |  2 +-
 llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp | 12 +++++++-----
 4 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 9297e84cedd7375..d7a7dc46064771a 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1771,8 +1771,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
   case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
   case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
-  case PPCISD::GET_TLS_MOD:
-    return "PPCISD::GET_TLS_MOD";
+  case PPCISD::GET_TLS_MOD_AIX:
+    return "PPCISD::GET_TLS_MOD_AIX";
   case PPCISD::GET_TPOINTER:    return "PPCISD::GET_TPOINTER";
   case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
   case PPCISD::TLSGD_AIX:       return "PPCISD::TLSGD_AIX";
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index cffe5c73fcd6d01..ca2713384845f25 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -375,16 +375,16 @@ namespace llvm {
     /// G8RC and inputs are X3/X4).
     TLSGD_AIX,
 
-    /// %x3 = GET_TLS_MOD _$TLSML - For the AIX local-dynamic TLS model,
+    /// %x3 = GET_TLS_MOD_AIX _$TLSML - For the AIX local-dynamic TLS model,
     /// produces a call to __tls_get_mod(_$TLSML\@ml).
-    GET_TLS_MOD,
+    GET_TLS_MOD_AIX,
 
     /// [GP|G8]RC = TLSLD_AIX, TOC_ENTRY(variable offset)
     /// Op that internally creates TOC entry for the "_$TLSML" symbol, generates
-    /// GET_TLS_MOD node which will be expanded into a call to __tls_get_mod,
-    /// and then add the variable offset with the result from the call.
-    /// This node is used in both 32-bit and 64-bit modes. The only difference
-    /// is register class.
+    /// GET_TLS_MOD_AIX node which will be expanded into a call to
+    /// __tls_get_mod, and then add the variable offset with the result from the
+    /// call. This node is used in both 32-bit and 64-bit modes. The only
+    /// difference is register class.
     TLSLD_AIX,
 
     /// G8RC = ADDIS_TLSLD_HA %x2, Symbol - For the local-dynamic TLS
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index a0b37a484de976b..88d0b2bc27c78d0 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -213,7 +213,7 @@ def PPCaddTls     : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>;
 def PPCaddisTlsgdHA : SDNode<"PPCISD::ADDIS_TLSGD_HA", SDTIntBinOp>;
 def PPCaddiTlsgdL   : SDNode<"PPCISD::ADDI_TLSGD_L", SDTIntBinOp>;
 def PPCgetTlsAddr   : SDNode<"PPCISD::GET_TLS_ADDR", SDTIntBinOp>;
-def PPCgetTlsMod   : SDNode<"PPCISD::GET_TLS_MOD", SDTIntUnaryOp>;
+def PPCgetTlsMod   : SDNode<"PPCISD::GET_TLS_MOD_AIX", SDTIntUnaryOp>;
 def PPCgetTpointer : SDNode<"PPCISD::GET_TPOINTER", SDTIntLeaf, []>;
 def PPCaddiTlsgdLAddr : SDNode<"PPCISD::ADDI_TLSGD_L_ADDR",
                                SDTypeProfile<1, 3, [
diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 32a75a32b9d87f6..339c8d4ad383e7d 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -160,8 +160,10 @@ namespace {
         if (IsAIX) {
           if (MI.getOpcode() == PPC::TLSLDAIX8 ||
               MI.getOpcode() == PPC::TLSLDAIX) {
-            // For Local-Dynamic
-            auto &Subtarget = MBB.getParent()->getSubtarget<PPCSubtarget>();
+            // For Local-Dynamic, the module handle is copied in r3. The copy is
+            // followed by GETtlsMOD32AIX/GETtlsMOD64AIX.
+            const PPCSubtarget &Subtarget =
+                MBB.getParent()->getSubtarget<PPCSubtarget>();
             bool IsLargeModel =
                 Subtarget.getTargetMachine().getCodeModel() == CodeModel::Large;
             Register ModuleHandleHReg;
@@ -190,9 +192,9 @@ namespace {
                 .addReg(GPR3)
                 .addReg(MI.getOperand(1).getReg());
           } else {
-            // For Global-Dynamic
-            // The variable offset and region handle are copied in r4 and r3.
-            // The copies are followed by GETtlsADDR32AIX/GETtlsADDR64AIX.
+            // For Global-Dynamic, the variable offset and region handle are
+            // copied in r4 and r3. The copies are followed by
+            // GETtlsADDR32AIX/GETtlsADDR64AIX.
             if (!IsTLSTPRelMI) {
               BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR4)
                   .addReg(MI.getOperand(1).getReg());

>From 29d3f3088794a7ecc2272e417f81cbf1b99f4bf7 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Mon, 18 Sep 2023 04:03:32 -0400
Subject: [PATCH 3/4] [NFC] address comments.

---
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp     | 9 +++++----
 llvm/lib/Target/PowerPC/PPCISelLowering.h     | 8 ++++----
 llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp | 2 +-
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 1c34dc3fefceaba..b88aece7596335c 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -677,10 +677,11 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
     // in R3, need to generate branch to .__tls_get_mod.
     Register VarOffsetReg = Subtarget->isPPC64() ? PPC::X4 : PPC::R4;
     (void)VarOffsetReg;
-    assert((MI->getNumExplicitOperands() < 3 ||
-            (MI->getOperand(2).isReg() &&
-             MI->getOperand(2).getReg() == VarOffsetReg)) &&
-           "GETtls[ld]ADDR[32] must read GPR4");
+    assert(MI->getOpcode() == PPC::GETtlsMOD32AIX ||
+           MI->getOpcode() == PPC::GETtlsMOD64AIX ||
+           (MI->getOperand(2).isReg() &&
+            MI->getOperand(2).getReg() == VarOffsetReg) &&
+               "GETtls[ld]ADDR[32] must read GPR4");
     EmitAIXTlsCallHelper(MI);
     return;
   }
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index ca2713384845f25..dde61ff7c1310be 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -370,20 +370,20 @@ namespace llvm {
     /// G8RC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY
     /// Op that combines two register copies of TOC entries
     /// (region handle into R3 and variable offset into R4) followed by a
-    /// GET_TLS_ADDR node which will be expanded to a call to __tls_get_addr.
+    /// GET_TLS_ADDR node which will be expanded to a call to .__tls_get_addr.
     /// This node is used in 64-bit mode as well (in which case the result is
     /// G8RC and inputs are X3/X4).
     TLSGD_AIX,
 
     /// %x3 = GET_TLS_MOD_AIX _$TLSML - For the AIX local-dynamic TLS model,
-    /// produces a call to __tls_get_mod(_$TLSML\@ml).
+    /// produces a call to .__tls_get_mod(_$TLSML\@ml).
     GET_TLS_MOD_AIX,
 
     /// [GP|G8]RC = TLSLD_AIX, TOC_ENTRY(variable offset)
     /// Op that internally creates TOC entry for the "_$TLSML" symbol, generates
     /// GET_TLS_MOD_AIX node which will be expanded into a call to
-    /// __tls_get_mod, and then add the variable offset with the result from the
-    /// call. This node is used in both 32-bit and 64-bit modes. The only
+    /// .__tls_get_mod, and then add the variable offset with the result from
+    /// the call. This node is used in both 32-bit and 64-bit modes. The only
     /// difference is register class.
     TLSLD_AIX,
 
diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 339c8d4ad383e7d..3fd57f4159d3cd8 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -186,7 +186,7 @@ namespace {
                             : Register(Subtarget.getTOCPointerRegister()));
             BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR3)
                 .addReg(MHReg);
-            // The call to __tls_get_mod.
+            // The call to .__tls_get_mod.
             BuildMI(MBB, I, DL, TII->get(Opc2), GPR3).addReg(GPR3);
             BuildMI(MBB, I, DL, TII->get(Is64Bit ? PPC::ADD8 : PPC::ADD4), GPR3)
                 .addReg(GPR3)

>From 06992e43b09be1404c0e8289e01ce65c6f36ee47 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Wed, 20 Sep 2023 03:44:10 -0400
Subject: [PATCH 4/4] [NFC] address comments

---
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp   | 18 +++++++++---------
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 14 ++++++--------
 2 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index b88aece7596335c..211a5290939b38d 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -671,17 +671,17 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
          "GETtls[ld]ADDR[32] must read GPR3");
 
   if (Subtarget->isAIXABI()) {
-    // On AIX, for TLSGD the variable offset should already be in R4 and the
-    // region handle should already be in R3, need to generate an absolute
-    // branch to .__tls_get_addr. For TLSLD the module handle should already be
-    // in R3, need to generate branch to .__tls_get_mod.
+    // For TLSGD, the variable offset should already be in R4 and the region
+    // handle should already be in R3, generate absolute branch to
+    // .__tls_get_addr. For TLSLD, the module handle should already be in R3,
+    // generate branch to .__tls_get_mod.
     Register VarOffsetReg = Subtarget->isPPC64() ? PPC::X4 : PPC::R4;
     (void)VarOffsetReg;
-    assert(MI->getOpcode() == PPC::GETtlsMOD32AIX ||
-           MI->getOpcode() == PPC::GETtlsMOD64AIX ||
-           (MI->getOperand(2).isReg() &&
-            MI->getOperand(2).getReg() == VarOffsetReg) &&
-               "GETtls[ld]ADDR[32] must read GPR4");
+    assert((MI->getOpcode() == PPC::GETtlsMOD32AIX ||
+            MI->getOpcode() == PPC::GETtlsMOD64AIX ||
+            (MI->getOperand(2).isReg() &&
+             MI->getOperand(2).getReg() == VarOffsetReg)) &&
+           "GETtls[ld]ADDR[32] must read GPR4");
     EmitAIXTlsCallHelper(MI);
     return;
   }
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index d7a7dc46064771a..38d7aad6ec517fb 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -3427,14 +3427,12 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
     return DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, VariableOffset);
   }
 
-  // The Local-Exec, Initial-Exec, Local-Dynamic, and General-Dynamic TLS models
-  // are currently supported access models. If Local- or Initial-exec or
-  // local-dynamic is not possible or specified, all GlobalTLSAddress nodes are
-  // lowered using the general-dynamic model. We need to generate two TOC
-  // entries, one for the variable offset, one for the region handle. The global
-  // address for the TOC entry of the region handle is created with the
-  // MO_TLSGDM_FLAG flag and the global address for the TOC entry of the
-  // variable offset is created with MO_TLSGD_FLAG.
+  // If Local- or Initial-exec or Local-dynamic is not possible or specified,
+  // all GlobalTLSAddress nodes are lowered using the general-dynamic model. We
+  // need to generate two TOC entries, one for the variable offset, one for the
+  // region handle. The global address for the TOC entry of the region handle is
+  // created with the MO_TLSGDM_FLAG flag and the global address for the TOC
+  // entry of the variable offset is created with MO_TLSGD_FLAG.
   SDValue VariableOffsetTGA =
       DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGD_FLAG);
   SDValue RegionHandleTGA =



More information about the llvm-commits mailing list