[llvm] [clang] [PowerPC] Support local-dynamic TLS relocation on AIX (PR #66316)

Felix via cfe-commits cfe-commits at lists.llvm.org
Thu Dec 7 03:49:27 PST 2023


https://github.com/orcguru updated https://github.com/llvm/llvm-project/pull/66316

>From adede7e11fb1dbf84a4c453194743b433ce4eb76 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Wed, 13 Sep 2023 22:21:11 -0400
Subject: [PATCH 01/21] [PowerPC] Support local-dynamic TLS relocation on AIX

---
 llvm/include/llvm/MC/MCExpr.h                 |   2 +
 llvm/lib/MC/MCExpr.cpp                        |   4 +
 .../PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp  |   9 +-
 .../MCTargetDesc/PPCXCOFFObjectWriter.cpp     |   4 +
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp     |  90 ++++-
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   |  30 +-
 llvm/lib/Target/PowerPC/PPCISelLowering.h     |  14 +-
 llvm/lib/Target/PowerPC/PPCInstr64Bit.td      |  13 +-
 llvm/lib/Target/PowerPC/PPCInstrInfo.td       |  15 +-
 llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp |  73 +++-
 .../test/CodeGen/PowerPC/aix-tls-gd-double.ll |  90 +++--
 llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll   |  96 +++--
 .../CodeGen/PowerPC/aix-tls-gd-longlong.ll    | 298 +++++++-------
 .../CodeGen/PowerPC/aix-tls-local-dynamic.ll  | 364 ++++++++++++++++++
 .../PowerPC/aix-tls-xcoff-reloc-large.ll      | 286 +++++++-------
 .../CodeGen/PowerPC/aix-tls-xcoff-reloc.ll    | 288 +++++++-------
 16 files changed, 1149 insertions(+), 527 deletions(-)
 create mode 100644 llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll

diff --git a/llvm/include/llvm/MC/MCExpr.h b/llvm/include/llvm/MC/MCExpr.h
index 67836292874f5..f3bc0491fd2f1 100644
--- a/llvm/include/llvm/MC/MCExpr.h
+++ b/llvm/include/llvm/MC/MCExpr.h
@@ -301,6 +301,8 @@ class MCSymbolRefExpr : public MCExpr {
     VK_PPC_AIX_TLSGDM,      // symbol at m
     VK_PPC_AIX_TLSIE,       // symbol at ie
     VK_PPC_AIX_TLSLE,       // symbol at le
+    VK_PPC_AIX_TLSLD,       // symbol at ld
+    VK_PPC_AIX_TLSML,       // symbol at ml
     VK_PPC_GOT_TLSLD,       // symbol at got@tlsld
     VK_PPC_GOT_TLSLD_LO,    // symbol at got@tlsld at l
     VK_PPC_GOT_TLSLD_HI,    // symbol at got@tlsld at h
diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp
index 73e6569f96e46..bc1bb9b806305 100644
--- a/llvm/lib/MC/MCExpr.cpp
+++ b/llvm/lib/MC/MCExpr.cpp
@@ -331,6 +331,10 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
     return "ie";
   case VK_PPC_AIX_TLSLE:
     return "le";
+  case VK_PPC_AIX_TLSLD:
+    return "ld";
+  case VK_PPC_AIX_TLSML:
+    return "ml";
   case VK_PPC_GOT_TLSLD: return "got at tlsld";
   case VK_PPC_GOT_TLSLD_LO: return "got at tlsld@l";
   case VK_PPC_GOT_TLSLD_HI: return "got at tlsld@h";
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index a804dd823daa4..22cd2fc03ef7c 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -231,12 +231,15 @@ class PPCTargetAsmStreamer : public PPCTargetStreamer {
       MCSymbolXCOFF *TCSym =
           cast<MCSectionXCOFF>(Streamer.getCurrentSectionOnly())
               ->getQualNameSymbol();
-      // On AIX, we have a region handle (symbol at m) and the variable offset
-      // (symbol@{gd|ie|le}) for TLS variables, depending on the TLS model.
+      // On AIX, we have a region handle (symbol at m), module handle
+      // (__TLSML[TC]@ml) and the variable offset (symbol@{gd|ie|le|ld}) for TLS
+      // variables, depending on the TLS model.
       if (Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGD ||
           Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGDM ||
           Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSIE ||
-          Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLE)
+          Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLE ||
+          Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLD ||
+          Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSML)
         OS << "\t.tc " << TCSym->getName() << "," << XSym->getName() << "@"
            << MCSymbolRefExpr::getVariantKindName(Kind) << '\n';
       else
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
index 065daf42fe6eb..f4998e9b9dcba 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
@@ -116,6 +116,10 @@ std::pair<uint8_t, uint8_t> PPCXCOFFObjectWriter::getRelocTypeAndSignSize(
       return {XCOFF::RelocationType::R_TLS_IE, SignAndSizeForFKData};
     case MCSymbolRefExpr::VK_PPC_AIX_TLSLE:
       return {XCOFF::RelocationType::R_TLS_LE, SignAndSizeForFKData};
+    case MCSymbolRefExpr::VK_PPC_AIX_TLSLD:
+      return {XCOFF::RelocationType::R_TLS_LD, SignAndSizeForFKData};
+    case MCSymbolRefExpr::VK_PPC_AIX_TLSML:
+      return {XCOFF::RelocationType::R_TLSML, SignAndSizeForFKData};
     case MCSymbolRefExpr::VK_None:
       return {XCOFF::RelocationType::R_POS, SignAndSizeForFKData};
     }
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 4b551bc51c4f0..1c197521f409a 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -613,12 +613,23 @@ void PPCAsmPrinter::LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI) {
     EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::NOP));
 }
 
-/// This helper function creates the TlsGetAddr MCSymbol for AIX. We will
-/// create the csect and use the qual-name symbol instead of creating just the
-/// external symbol.
+/// This helper function creates the TlsGetAddr/TlsGetMod MCSymbol for AIX. We
+/// will create the csect and use the qual-name symbol instead of creating just
+/// the external symbol.
 static MCSymbol *createMCSymbolForTlsGetAddr(MCContext &Ctx, unsigned MIOpc) {
-  StringRef SymName =
-      MIOpc == PPC::GETtlsTpointer32AIX ? ".__get_tpointer" : ".__tls_get_addr";
+  StringRef SymName;
+  switch (MIOpc) {
+  default:
+    SymName = ".__tls_get_addr";
+    break;
+  case PPC::GETtlsTpointer32AIX:
+    SymName = ".__get_tpointer";
+    break;
+  case PPC::GETtlsMOD32AIX:
+  case PPC::GETtlsMOD64AIX:
+    SymName = ".__tls_get_mod";
+    break;
+  }
   return Ctx
       .getXCOFFSection(SymName, SectionKind::getText(),
                        XCOFF::CsectProperties(XCOFF::XMC_PR, XCOFF::XTY_ER))
@@ -660,14 +671,15 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
          "GETtls[ld]ADDR[32] must read GPR3");
 
   if (Subtarget->isAIXABI()) {
-    // On AIX, the variable offset should already be in R4 and the region handle
-    // should already be in R3.
-    // For TLSGD, which currently is the only supported access model, we only
-    // need to generate an absolute branch to .__tls_get_addr.
+    // On AIX, for TLSGD the variable offset should already be in R4 and the
+    // region handle should already be in R3, need to generate an absolute
+    // branch to .__tls_get_addr. For TLSLD the module handle should already be
+    // in R3, need to generate branch to .__tls_get_mod.
     Register VarOffsetReg = Subtarget->isPPC64() ? PPC::X4 : PPC::R4;
     (void)VarOffsetReg;
-    assert(MI->getOperand(2).isReg() &&
-           MI->getOperand(2).getReg() == VarOffsetReg &&
+    assert((MI->getNumExplicitOperands() < 3 ||
+            (MI->getOperand(2).isReg() &&
+             MI->getOperand(2).getReg() == VarOffsetReg)) &&
            "GETtls[ld]ADDR[32] must read GPR4");
     EmitAIXTlsCallHelper(MI);
     return;
@@ -710,6 +722,8 @@ static MCSymbol *getMCSymbolForTOCPseudoMO(const MachineOperand &MO,
     return AP.GetJTISymbol(MO.getIndex());
   case MachineOperand::MO_BlockAddress:
     return AP.GetBlockAddressSymbol(MO.getBlockAddress());
+  case MachineOperand::MO_ExternalSymbol:
+    return AP.OutContext.getOrCreateSymbol(MO.getSymbolName());
   default:
     llvm_unreachable("Unexpected operand type to get symbol.");
   }
@@ -743,6 +757,16 @@ getTOCEntryTypeForMO(const MachineOperand &MO) {
     llvm_unreachable("Unexpected operand type to get TOC type.");
   }
 }
+
+// On AIX, TLS-local-dynamic requires that symbol for the module handle must
+// have the name "_$TLSML". This symbol is used as one TOC symbol reference
+// itself with ML relocation type, thus it has "[TC]" attached to its name.
+static inline bool isSpecialAIXSymbolTLSML(const MachineOperand &MO,
+                                           const bool IsAIX) {
+  return IsAIX && MO.isSymbol() &&
+         (std::strcmp(MO.getSymbolName(), "_$TLSML[TC]") == 0);
+}
+
 /// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to
 /// the current output stream.
 ///
@@ -835,6 +859,15 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
       return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGDM;
     if (Flag == PPCII::MO_TLSGD_FLAG || Flag == PPCII::MO_GOT_TLSGD_PCREL_FLAG)
       return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGD;
+    if (MO.getTargetFlags() & PPCII::MO_TLSLD_FLAG) {
+      if (isSpecialAIXSymbolTLSML(MO, IsAIX))
+        // FIXME: On AIX the ML relocation type is only valid for a reference to
+        // a TOC symbol from the symbol itself, and right now its only user is
+        // symbol "_$TLSML". Use symbol name to decide that R_TLSML is expected.
+        return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSML;
+      if (IsAIX)
+        return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLD;
+    }
     return MCSymbolRefExpr::VariantKind::VK_None;
   };
 
@@ -953,7 +986,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     TmpInst.setOpcode(PPC::LWZ);
 
     const MachineOperand &MO = MI->getOperand(1);
-    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress() ||
+            isSpecialAIXSymbolTLSML(MO, IsAIX)) &&
            "Invalid operand for LWZtoc.");
 
     // Map the operand to its corresponding MCSymbol.
@@ -1042,7 +1076,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     TmpInst.setOpcode(PPC::LD);
 
     const MachineOperand &MO = MI->getOperand(1);
-    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress() ||
+            isSpecialAIXSymbolTLSML(MO, IsAIX)) &&
            "Invalid operand!");
 
     // Map the operand to its corresponding MCSymbol.
@@ -1080,7 +1115,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     TmpInst.setOpcode(PPC::ADDIS);
 
     const MachineOperand &MO = MI->getOperand(2);
-    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress() ||
+            isSpecialAIXSymbolTLSML(MO, IsAIX)) &&
            "Invalid operand for ADDIStocHA.");
 
     // Map the machine operand to its corresponding MCSymbol.
@@ -1113,7 +1149,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     TmpInst.setOpcode(PPC::LWZ);
 
     const MachineOperand &MO = MI->getOperand(1);
-    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress() ||
+            isSpecialAIXSymbolTLSML(MO, IsAIX)) &&
            "Invalid operand for LWZtocL.");
 
     // Map the machine operand to its corresponding MCSymbol.
@@ -1145,7 +1182,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     TmpInst.setOpcode(PPC::ADDIS8);
 
     const MachineOperand &MO = MI->getOperand(2);
-    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress() ||
+            isSpecialAIXSymbolTLSML(MO, IsAIX)) &&
            "Invalid operand for ADDIStocHA8!");
 
     const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
@@ -1155,7 +1193,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     const bool GlobalToc =
         MO.isGlobal() && Subtarget->isGVIndirectSymbol(MO.getGlobal());
     if (GlobalToc || MO.isJTI() || MO.isBlockAddress() ||
-        (MO.isCPI() && TM.getCodeModel() == CodeModel::Large))
+        (MO.isCPI() && TM.getCodeModel() == CodeModel::Large) ||
+        isSpecialAIXSymbolTLSML(MO, IsAIX))
       MOSymbol = lookUpOrCreateTOCEntry(MOSymbol, getTOCEntryTypeForMO(MO), VK);
 
     VK = IsAIX ? MCSymbolRefExpr::VK_PPC_U : MCSymbolRefExpr::VK_PPC_TOC_HA;
@@ -1184,8 +1223,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     TmpInst.setOpcode(PPC::LD);
 
     const MachineOperand &MO = MI->getOperand(1);
-    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() ||
-            MO.isBlockAddress()) &&
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress() ||
+            isSpecialAIXSymbolTLSML(MO, IsAIX)) &&
            "Invalid operand for LDtocL!");
 
     LLVM_DEBUG(assert(
@@ -1351,6 +1390,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
   case PPC::GETtlsADDRPCREL:
   case PPC::GETtlsADDR32AIX:
   case PPC::GETtlsADDR64AIX:
+  case PPC::GETtlsMOD32AIX:
+  case PPC::GETtlsMOD64AIX:
     // Transform: %r3 = GETtlsADDRNNAIX %r3, %r4 (for NN == 32/64).
     // Into: BLA .__tls_get_addr()
     // Unlike on Linux, there is no symbol or relocation needed for this call.
@@ -2719,6 +2760,15 @@ void PPCAIXAsmPrinter::emitEndOfAsmFile(Module &M) {
       MCSymbol *S = OutContext.getOrCreateSymbol(Name);
       TCEntry = cast<MCSectionXCOFF>(
           getObjFileLowering().getSectionForTOCEntry(S, TM));
+    } else if (I.first.second ==
+               MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSML) {
+      // AIX assembler expects TC storage-mapping class for the "_$TLSML"
+      // symbol.
+      MCSection *MCSect = getObjFileLowering().getContext().getXCOFFSection(
+          cast<MCSymbolXCOFF>(I.first.first)->getSymbolTableName(),
+          SectionKind::getData(),
+          XCOFF::CsectProperties(XCOFF::XMC_TC, XCOFF::XTY_SD));
+      TCEntry = cast<MCSectionXCOFF>(MCSect);
     } else {
       TCEntry = cast<MCSectionXCOFF>(
           getObjFileLowering().getSectionForTOCEntry(I.first.first, TM));
@@ -2835,6 +2885,8 @@ void PPCAIXAsmPrinter::emitInstruction(const MachineInstr *MI) {
 		 MMI->hasDebugInfo());
     break;
   }
+  case PPC::GETtlsMOD32AIX:
+  case PPC::GETtlsMOD64AIX:
   case PPC::GETtlsTpointer32AIX:
   case PPC::GETtlsADDR64AIX:
   case PPC::GETtlsADDR32AIX: {
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index acaf98b62fd94..9d0430ea2e515 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1772,9 +1772,13 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
   case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
   case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
+  case PPCISD::GET_TLS_MOD:
+    return "PPCISD::GET_TLS_MOD";
   case PPCISD::GET_TPOINTER:    return "PPCISD::GET_TPOINTER";
   case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
   case PPCISD::TLSGD_AIX:       return "PPCISD::TLSGD_AIX";
+  case PPCISD::TLSLD_AIX:
+    return "PPCISD::TLSLD_AIX";
   case PPCISD::ADDIS_TLSLD_HA:  return "PPCISD::ADDIS_TLSLD_HA";
   case PPCISD::ADDI_TLSLD_L:    return "PPCISD::ADDI_TLSLD_L";
   case PPCISD::GET_TLSLD_ADDR:  return "PPCISD::GET_TLSLD_ADDR";
@@ -3413,13 +3417,25 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
     return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset);
   }
 
-  // Only Local-Exec, Initial-Exec and General-Dynamic TLS models are currently
-  // supported models. If Local- or Initial-exec are not possible or specified,
-  // all GlobalTLSAddress nodes are lowered using the general-dynamic model.
-  // We need to generate two TOC entries, one for the variable offset, one for
-  // the region handle. The global address for the TOC entry of the region
-  // handle is created with the MO_TLSGDM_FLAG flag and the global address
-  // for the TOC entry of the variable offset is created with MO_TLSGD_FLAG.
+  if (Model == TLSModel::LocalDynamic) {
+    // For local-dynamic on AIX, we need to generate two TOC entries, one for
+    // the variable offset, the other for the module handle. The module handle
+    // is encapsulated inside the TLSLD_AIX pseudo node, and will be expanded by
+    // PPCTLSDynamicCall.
+    SDValue VariableOffsetTGA =
+        DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
+    SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
+    return DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, VariableOffset);
+  }
+
+  // The Local-Exec, Initial-Exec, Local-Dynamic, and General-Dynamic TLS models
+  // are currently supported access models. If Local- or Initial-exec or
+  // local-dynamic is not possible or specified, all GlobalTLSAddress nodes are
+  // lowered using the general-dynamic model. We need to generate two TOC
+  // entries, one for the variable offset, one for the region handle. The global
+  // address for the TOC entry of the region handle is created with the
+  // MO_TLSGDM_FLAG flag and the global address for the TOC entry of the
+  // variable offset is created with MO_TLSGD_FLAG.
   SDValue VariableOffsetTGA =
       DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGD_FLAG);
   SDValue RegionHandleTGA =
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index d8679dcf40180..7f80f70d699a6 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -370,11 +370,23 @@ namespace llvm {
     /// G8RC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY
     /// Op that combines two register copies of TOC entries
     /// (region handle into R3 and variable offset into R4) followed by a
-    /// GET_TLS_ADDR node which will be expanded to a call to __get_tls_addr.
+    /// GET_TLS_ADDR node which will be expanded to a call to __tls_get_addr.
     /// This node is used in 64-bit mode as well (in which case the result is
     /// G8RC and inputs are X3/X4).
     TLSGD_AIX,
 
+    /// %x3 = GET_TLS_MOD _$TLSML - For the AIX local-dynamic TLS model,
+    /// produces a call to __tls_get_mod(_$TLSML\@ml).
+    GET_TLS_MOD,
+
+    /// [GP|G8]RC = TLSLD_AIX, TOC_ENTRY(variable offset)
+    /// Op that internally creates TOC entry for the "_$TLSML" symbol, generates
+    /// GET_TLS_MOD node which will be expanded into a call to __tls_get_mod,
+    /// and then add the variable offset with the result from the call.
+    /// This node is used in both 32-bit and 64-bit modes. The only difference
+    /// is register class.
+    TLSLD_AIX,
+
     /// G8RC = ADDIS_TLSLD_HA %x2, Symbol - For the local-dynamic TLS
     /// model, produces an ADDIS8 instruction that adds the GOT base
     /// register to sym\@got\@tlsld\@ha.
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 0322bb37b1fdf..e4fee9c2fff23 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -1557,12 +1557,19 @@ def GETtlsldADDRPCREL : GETtlsldADDRPseudo <"#GETtlsldADDRPCREL">;
 // so we don't need to mark it with a size of 8 bytes. Finally, the assembly
 // manual mentions this exact set of registers as the clobbered set, others
 // are guaranteed not to be clobbered.
-let Defs = [X0,X4,X5,X11,LR8,CR0] in
+let Defs = [X0,X4,X5,X11,LR8,CR0] in {
 def GETtlsADDR64AIX :
   PPCEmitTimePseudo<(outs g8rc:$rD),(ins g8rc:$offset, g8rc:$handle),
                     "GETtlsADDR64AIX",
                     [(set i64:$rD,
                       (PPCgetTlsAddr i64:$offset, i64:$handle))]>, isPPC64;
+// On AIX, the call to __tls_get_mod need one input in X3 for the module handle.
+def GETtlsMOD64AIX :
+  PPCEmitTimePseudo<(outs g8rc:$rD),(ins g8rc:$handle),
+                    "GETtlsMOD64AIX",
+                    [(set i64:$rD,
+                      (PPCgetTlsMod i64:$handle))]>, isPPC64;
+}
 }
 
 // Combined op for ADDItlsgdL and GETtlsADDR, late expanded.  X3 and LR8
@@ -1595,6 +1602,10 @@ def TLSGDAIX8 :
                      "#TLSGDAIX8",
                      [(set i64:$rD,
                        (PPCTlsgdAIX i64:$offset, i64:$handle))]>;
+// This pseudo is expanded to one copy to put the module handle in R3, then call
+// GETtlsMOD64AIX, and then add variable offset to the output from the call.
+def TLSLDAIX8 : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc:$handle),
+                     "#TLSLDAIX8", [(set i64:$rD, (PPCTlsldAIX i64:$handle))]>;
 // Combined op for ADDItlsldL and GETtlsADDR, late expanded.  X3 and LR8
 // are true defines, while the rest of the Defs are clobbers.
 let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index a97062e0c643f..7dedf17adae70 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -213,12 +213,14 @@ def PPCaddTls     : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>;
 def PPCaddisTlsgdHA : SDNode<"PPCISD::ADDIS_TLSGD_HA", SDTIntBinOp>;
 def PPCaddiTlsgdL   : SDNode<"PPCISD::ADDI_TLSGD_L", SDTIntBinOp>;
 def PPCgetTlsAddr   : SDNode<"PPCISD::GET_TLS_ADDR", SDTIntBinOp>;
+def PPCgetTlsMod   : SDNode<"PPCISD::GET_TLS_MOD", SDTIntUnaryOp>;
 def PPCgetTpointer : SDNode<"PPCISD::GET_TPOINTER", SDTIntLeaf, []>;
 def PPCaddiTlsgdLAddr : SDNode<"PPCISD::ADDI_TLSGD_L_ADDR",
                                SDTypeProfile<1, 3, [
                                  SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
                                  SDTCisSameAs<0, 3>, SDTCisInt<0> ]>>;
 def PPCTlsgdAIX     : SDNode<"PPCISD::TLSGD_AIX", SDTIntBinOp>;
+def PPCTlsldAIX     : SDNode<"PPCISD::TLSLD_AIX", SDTIntUnaryOp>;
 def PPCaddisTlsldHA : SDNode<"PPCISD::ADDIS_TLSLD_HA", SDTIntBinOp>;
 def PPCaddiTlsldL   : SDNode<"PPCISD::ADDI_TLSLD_L", SDTIntBinOp>;
 def PPCgetTlsldAddr : SDNode<"PPCISD::GET_TLSLD_ADDR", SDTIntBinOp>;
@@ -3245,11 +3247,16 @@ def GETtlsADDR32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$s
 // The rest of the Defs are the exact set of registers that will be clobbered by
 // the call.
 let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
-    Defs = [R0,R4,R5,R11,LR,CR0] in
+    Defs = [R0,R4,R5,R11,LR,CR0] in {
 def GETtlsADDR32AIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$offset, gprc:$handle),
                           "GETtlsADDR32AIX",
                           [(set i32:$rD,
                             (PPCgetTlsAddr i32:$offset, i32:$handle))]>;
+def GETtlsMOD32AIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$handle),
+                          "GETtlsMOD32AIX",
+                          [(set i32:$rD,
+                            (PPCgetTlsMod i32:$handle))]>;
+}
 
 // For local-exec accesses on 32-bit AIX, a call to .__get_tpointer is
 // generated to retrieve the thread pointer. GETtlsTpointer32AIX clobbers both
@@ -3289,6 +3296,12 @@ def TLSGDAIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$offset, gprc:$handl
                           "#TLSGDAIX",
                           [(set i32:$rD,
                             (PPCTlsgdAIX i32:$offset, i32:$handle))]>;
+// This pseudo is expanded to one copy to put the module handle in R3, then call
+// GETtlsMOD32AIX, and then add variable offset to the output from the call.
+def TLSLDAIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$handle),
+                          "#TLSLDAIX",
+                          [(set i32:$rD,
+                            (PPCTlsldAIX i32:$handle))]>;
 // LR is a true define, while the rest of the Defs are clobbers.  R3 is
 // explicitly defined when this op is created, so not mentioned here.
 let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 9518d5347065c..32a75a32b9d87 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -51,6 +51,11 @@ namespace {
       bool Is64Bit = MBB.getParent()->getSubtarget<PPCSubtarget>().isPPC64();
       bool IsAIX = MBB.getParent()->getSubtarget<PPCSubtarget>().isAIXABI();
       bool IsPCREL = false;
+      MachineFunction *MF = MBB.getParent();
+      MachineRegisterInfo &RegInfo = MF->getRegInfo();
+      const TargetRegisterClass *GPRNoZero =
+          Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
+                  : &PPC::GPRC_and_GPRC_NOR0RegClass;
 
       for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
            I != IE;) {
@@ -64,6 +69,8 @@ namespace {
             MI.getOpcode() != PPC::ADDItlsldLADDR &&
             MI.getOpcode() != PPC::ADDItlsgdLADDR32 &&
             MI.getOpcode() != PPC::ADDItlsldLADDR32 &&
+            MI.getOpcode() != PPC::TLSLDAIX &&
+            MI.getOpcode() != PPC::TLSLDAIX8 &&
             MI.getOpcode() != PPC::TLSGDAIX &&
             MI.getOpcode() != PPC::TLSGDAIX8 && !IsTLSTPRelMI && !IsPCREL) {
           // Although we create ADJCALLSTACKDOWN and ADJCALLSTACKUP
@@ -109,6 +116,12 @@ namespace {
           Opc1 = PPC::ADDItlsldL32;
           Opc2 = PPC::GETtlsldADDR32;
           break;
+        case PPC::TLSLDAIX:
+          Opc2 = PPC::GETtlsMOD32AIX;
+          break;
+        case PPC::TLSLDAIX8:
+          Opc2 = PPC::GETtlsMOD64AIX;
+          break;
         case PPC::TLSGDAIX8:
           // TLSGDAIX8 is expanded to two copies and GET_TLS_ADDR, so we only
           // set Opc2 here.
@@ -145,19 +158,55 @@ namespace {
                                                               .addImm(0);
 
         if (IsAIX) {
-          // The variable offset and region handle are copied in r4 and r3. The
-          // copies are followed by GETtlsADDR32AIX/GETtlsADDR64AIX.
-          if (!IsTLSTPRelMI) {
-            BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR4)
-                .addReg(MI.getOperand(1).getReg());
+          if (MI.getOpcode() == PPC::TLSLDAIX8 ||
+              MI.getOpcode() == PPC::TLSLDAIX) {
+            // For Local-Dynamic
+            auto &Subtarget = MBB.getParent()->getSubtarget<PPCSubtarget>();
+            bool IsLargeModel =
+                Subtarget.getTargetMachine().getCodeModel() == CodeModel::Large;
+            Register ModuleHandleHReg;
+            unsigned LDTocOp =
+                Is64Bit ? (IsLargeModel ? PPC::LDtocL : PPC::LDtoc)
+                        : (IsLargeModel ? PPC::LWZtocL : PPC::LWZtoc);
+            if (IsLargeModel) {
+              ModuleHandleHReg = RegInfo.createVirtualRegister(GPRNoZero);
+              BuildMI(MBB, I, DL,
+                      TII->get(Is64Bit ? PPC::ADDIStocHA8 : PPC::ADDIStocHA),
+                      ModuleHandleHReg)
+                  .addReg(Subtarget.getTOCPointerRegister())
+                  .addExternalSymbol("_$TLSML[TC]", PPCII::MO_TLSLD_FLAG);
+            }
+            Register MHReg = RegInfo.createVirtualRegister(GPRNoZero);
+            BuildMI(MBB, I, DL, TII->get(LDTocOp), MHReg)
+                .addExternalSymbol("_$TLSML[TC]", PPCII::MO_TLSLD_FLAG)
+                .addReg(IsLargeModel
+                            ? ModuleHandleHReg
+                            : Register(Subtarget.getTOCPointerRegister()));
             BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR3)
-                .addReg(MI.getOperand(2).getReg());
-            BuildMI(MBB, I, DL, TII->get(Opc2), GPR3).addReg(GPR3).addReg(GPR4);
-          } else
-            // The opcode of GETtlsTpointer32AIX does not change, because later
-            // this instruction will be expanded into a call to .__get_tpointer,
-            // which will return the thread pointer into r3.
-            BuildMI(MBB, I, DL, TII->get(Opc2), GPR3);
+                .addReg(MHReg);
+            // The call to __tls_get_mod.
+            BuildMI(MBB, I, DL, TII->get(Opc2), GPR3).addReg(GPR3);
+            BuildMI(MBB, I, DL, TII->get(Is64Bit ? PPC::ADD8 : PPC::ADD4), GPR3)
+                .addReg(GPR3)
+                .addReg(MI.getOperand(1).getReg());
+          } else {
+            // For Global-Dynamic
+            // The variable offset and region handle are copied in r4 and r3.
+            // The copies are followed by GETtlsADDR32AIX/GETtlsADDR64AIX.
+            if (!IsTLSTPRelMI) {
+              BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR4)
+                  .addReg(MI.getOperand(1).getReg());
+              BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR3)
+                  .addReg(MI.getOperand(2).getReg());
+              BuildMI(MBB, I, DL, TII->get(Opc2), GPR3)
+                  .addReg(GPR3)
+                  .addReg(GPR4);
+            } else
+              // The opcode of GETtlsTpointer32AIX does not change, because
+              // later this instruction will be expanded into a call to
+              // .__get_tpointer, which will return the thread pointer into r3.
+              BuildMI(MBB, I, DL, TII->get(Opc2), GPR3);
+          }
         } else {
           MachineInstr *Addi;
           if (IsPCREL) {
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll
index c0ffb8154c691..13bef83cf50fd 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll
@@ -156,10 +156,11 @@ define void @storesTIInit(double %Val) #0 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    lwz 3, L..C4(2) # target-flags(ppc-tlsgdm) @TIInit
-; SMALL32-NEXT:    lwz 4, L..C5(2) # target-flags(ppc-tlsgd) @TIInit
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    bla .__tls_get_addr[PR]
+; SMALL32-NEXT:    lwz 6, L..C4(2) # target-flags(ppc-tlsld) @TIInit
+; SMALL32-NEXT:    lwz 3, L..C5(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
+; SMALL32-NEXT:    bla .__tls_get_mod[PR]
+; SMALL32-NEXT:    add 3, 3, 6
 ; SMALL32-NEXT:    stfd 1, 0(3)
 ; SMALL32-NEXT:    addi 1, 1, 32
 ; SMALL32-NEXT:    lwz 0, 8(1)
@@ -172,10 +173,11 @@ define void @storesTIInit(double %Val) #0 {
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    addis 4, L..C5 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
-; LARGE32-NEXT:    lwz 4, L..C5 at l(4)
-; LARGE32-NEXT:    bla .__tls_get_addr[PR]
+; LARGE32-NEXT:    lwz 6, L..C4 at l(3)
+; LARGE32-NEXT:    addis 3, L..C5 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C5 at l(3)
+; LARGE32-NEXT:    bla .__tls_get_mod[PR]
+; LARGE32-NEXT:    add 3, 3, 6
 ; LARGE32-NEXT:    stfd 1, 0(3)
 ; LARGE32-NEXT:    addi 1, 1, 32
 ; LARGE32-NEXT:    lwz 0, 8(1)
@@ -186,10 +188,11 @@ define void @storesTIInit(double %Val) #0 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    ld 3, L..C4(2) # target-flags(ppc-tlsgdm) @TIInit
-; SMALL64-NEXT:    ld 4, L..C5(2) # target-flags(ppc-tlsgd) @TIInit
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    bla .__tls_get_addr[PR]
+; SMALL64-NEXT:    ld 6, L..C4(2) # target-flags(ppc-tlsld) @TIInit
+; SMALL64-NEXT:    ld 3, L..C5(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
+; SMALL64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL64-NEXT:    add 3, 3, 6
 ; SMALL64-NEXT:    stfd 1, 0(3)
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -201,11 +204,12 @@ define void @storesTIInit(double %Val) #0 {
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    addis 3, L..C4 at u(2)
-; LARGE64-NEXT:    addis 4, L..C5 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 3, L..C4 at l(3)
-; LARGE64-NEXT:    ld 4, L..C5 at l(4)
-; LARGE64-NEXT:    bla .__tls_get_addr[PR]
+; LARGE64-NEXT:    ld 6, L..C4 at l(3)
+; LARGE64-NEXT:    addis 3, L..C5 at u(2)
+; LARGE64-NEXT:    ld 3, L..C5 at l(3)
+; LARGE64-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-NEXT:    add 3, 3, 6
 ; LARGE64-NEXT:    stfd 1, 0(3)
 ; LARGE64-NEXT:    addi 1, 1, 48
 ; LARGE64-NEXT:    ld 0, 16(1)
@@ -452,11 +456,12 @@ define double @loadsTIInit() #1 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    lwz 3, L..C4(2) # target-flags(ppc-tlsgdm) @TIInit
-; SMALL32-NEXT:    lwz 4, L..C5(2) # target-flags(ppc-tlsgd) @TIInit
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    bla .__tls_get_addr[PR]
+; SMALL32-NEXT:    lwz 6, L..C4(2) # target-flags(ppc-tlsld) @TIInit
+; SMALL32-NEXT:    lwz 3, L..C5(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
+; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C8(2) # @GInit
+; SMALL32-NEXT:    add 3, 3, 6
 ; SMALL32-NEXT:    lfd 0, 0(3)
 ; SMALL32-NEXT:    lfd 1, 0(4)
 ; SMALL32-NEXT:    fadd 1, 0, 1
@@ -471,10 +476,11 @@ define double @loadsTIInit() #1 {
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    addis 4, L..C5 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
-; LARGE32-NEXT:    lwz 4, L..C5 at l(4)
-; LARGE32-NEXT:    bla .__tls_get_addr[PR]
+; LARGE32-NEXT:    lwz 6, L..C4 at l(3)
+; LARGE32-NEXT:    addis 3, L..C5 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C5 at l(3)
+; LARGE32-NEXT:    bla .__tls_get_mod[PR]
+; LARGE32-NEXT:    add 3, 3, 6
 ; LARGE32-NEXT:    lfd 0, 0(3)
 ; LARGE32-NEXT:    addis 3, L..C8 at u(2)
 ; LARGE32-NEXT:    lwz 3, L..C8 at l(3)
@@ -489,11 +495,12 @@ define double @loadsTIInit() #1 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    ld 3, L..C4(2) # target-flags(ppc-tlsgdm) @TIInit
-; SMALL64-NEXT:    ld 4, L..C5(2) # target-flags(ppc-tlsgd) @TIInit
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    bla .__tls_get_addr[PR]
+; SMALL64-NEXT:    ld 6, L..C4(2) # target-flags(ppc-tlsld) @TIInit
+; SMALL64-NEXT:    ld 3, L..C5(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
+; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C8(2) # @GInit
+; SMALL64-NEXT:    add 3, 3, 6
 ; SMALL64-NEXT:    lfd 0, 0(3)
 ; SMALL64-NEXT:    lfd 1, 0(4)
 ; SMALL64-NEXT:    fadd 1, 0, 1
@@ -507,11 +514,12 @@ define double @loadsTIInit() #1 {
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    addis 3, L..C4 at u(2)
-; LARGE64-NEXT:    addis 4, L..C5 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 3, L..C4 at l(3)
-; LARGE64-NEXT:    ld 4, L..C5 at l(4)
-; LARGE64-NEXT:    bla .__tls_get_addr[PR]
+; LARGE64-NEXT:    ld 6, L..C4 at l(3)
+; LARGE64-NEXT:    addis 3, L..C5 at u(2)
+; LARGE64-NEXT:    ld 3, L..C5 at l(3)
+; LARGE64-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-NEXT:    add 3, 3, 6
 ; LARGE64-NEXT:    addis 4, L..C8 at u(2)
 ; LARGE64-NEXT:    lfd 0, 0(3)
 ; LARGE64-NEXT:    ld 3, L..C8 at l(4)
@@ -610,12 +618,16 @@ entry:
   ret double %add
 }
 
-; External symbol reference checks for .__tls_get_addr
+; External symbol reference checks for .__tls_get_addr/.__tls_get_mod
 
 ; SMALL32: .extern .__tls_get_addr[PR]
+; SMALL32: .extern .__tls_get_mod[PR]
 ; SMALL64: .extern .__tls_get_addr[PR]
+; SMALL64: .extern .__tls_get_mod[PR]
 ; LARGE32: .extern .__tls_get_addr[PR]
+; LARGE32: .extern .__tls_get_mod[PR]
 ; LARGE64: .extern .__tls_get_addr[PR]
+; LARGE64: .extern .__tls_get_mod[PR]
 
 ; TOC entry checks
 
@@ -629,9 +641,10 @@ entry:
 ; SMALL32-LABEL:  L..C3:
 ; SMALL32-NEXT:   .tc TGInit[TC],TGInit[TL]@gd
 ; SMALL32-LABEL:  L..C4:
-; SMALL32-NEXT:   .tc .TIInit[TC],TIInit[TL]@m
+; SMALL32-NEXT:   .tc TIInit[TC],TIInit[TL]@ld
 ; SMALL32-LABEL:  L..C5:
-; SMALL32-NEXT:   .tc TIInit[TC],TIInit[TL]@gd
+; SMALL32-NEXT:   .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; SMALL32-NEXT:   .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
 ; SMALL32-LABEL:  L..C6:
 ; SMALL32-NEXT:   .tc .TWInit[TC],TWInit[TL]@m
 ; SMALL32-LABEL:  L..C7:
@@ -649,9 +662,10 @@ entry:
 ; LARGE32-LABEL:  L..C3:
 ; LARGE32-NEXT:   .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE32-LABEL:  L..C4:
-; LARGE32-NEXT:   .tc .TIInit[TE],TIInit[TL]@m
+; LARGE32-NEXT:   .tc TIInit[TE],TIInit[TL]@ld
 ; LARGE32-LABEL:  L..C5:
-; LARGE32-NEXT:   .tc TIInit[TE],TIInit[TL]@gd
+; LARGE32-NEXT:   .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; LARGE32-NEXT:   .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
 ; LARGE32-LABEL:  L..C6:
 ; LARGE32-NEXT:   .tc .TWInit[TE],TWInit[TL]@m
 ; LARGE32-LABEL:  L..C7:
@@ -669,9 +683,10 @@ entry:
 ; SMALL64-LABEL:  L..C3:
 ; SMALL64-NEXT:  .tc TGInit[TC],TGInit[TL]@gd
 ; SMALL64-LABEL:  L..C4:
-; SMALL64-NEXT:  .tc .TIInit[TC],TIInit[TL]@m
+; SMALL64-NEXT:  .tc TIInit[TC],TIInit[TL]@ld
 ; SMALL64-LABEL:  L..C5:
-; SMALL64-NEXT:  .tc TIInit[TC],TIInit[TL]@gd
+; SMALL64-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; SMALL64-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
 ; SMALL64-LABEL:  L..C6:
 ; SMALL64-NEXT:  .tc .TWInit[TC],TWInit[TL]@m
 ; SMALL64-LABEL:  L..C7:
@@ -689,9 +704,10 @@ entry:
 ; LARGE64-LABEL:  L..C3:
 ; LARGE64-NEXT:  .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE64-LABEL:  L..C4:
-; LARGE64-NEXT:  .tc .TIInit[TE],TIInit[TL]@m
+; LARGE64-NEXT:  .tc TIInit[TE],TIInit[TL]@ld
 ; LARGE64-LABEL:  L..C5:
-; LARGE64-NEXT:  .tc TIInit[TE],TIInit[TL]@gd
+; LARGE64-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; LARGE64-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
 ; LARGE64-LABEL:  L..C6:
 ; LARGE64-NEXT:  .tc .TWInit[TE],TWInit[TL]@m
 ; LARGE64-LABEL:  L..C7:
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll
index 887c4521a4c90..dc75db43d38c7 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll
@@ -163,11 +163,12 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    mr 6, 3
-; SMALL32-NEXT:    lwz 3, L..C4(2)
-; SMALL32-NEXT:    lwz 4, L..C5(2)
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    bla .__tls_get_addr[PR]
+; SMALL32-NEXT:    mr 6, 3
+; SMALL32-NEXT:    lwz 7, L..C4(2)
+; SMALL32-NEXT:    lwz 3, L..C5(2)
+; SMALL32-NEXT:    bla .__tls_get_mod[PR]
+; SMALL32-NEXT:    add 3, 3, 7
 ; SMALL32-NEXT:    stw 6, 0(3)
 ; SMALL32-NEXT:    addi 1, 1, 32
 ; SMALL32-NEXT:    lwz 0, 8(1)
@@ -181,10 +182,11 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    mr 6, 3
 ; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    addis 4, L..C5 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
-; LARGE32-NEXT:    lwz 4, L..C5 at l(4)
-; LARGE32-NEXT:    bla .__tls_get_addr[PR]
+; LARGE32-NEXT:    lwz 7, L..C4 at l(3)
+; LARGE32-NEXT:    addis 3, L..C5 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C5 at l(3)
+; LARGE32-NEXT:    bla .__tls_get_mod[PR]
+; LARGE32-NEXT:    add 3, 3, 7
 ; LARGE32-NEXT:    stw 6, 0(3)
 ; LARGE32-NEXT:    addi 1, 1, 32
 ; LARGE32-NEXT:    lwz 0, 8(1)
@@ -195,11 +197,12 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    mr 6, 3
-; SMALL64-NEXT:    ld 3, L..C4(2)
-; SMALL64-NEXT:    ld 4, L..C5(2)
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    bla .__tls_get_addr[PR]
+; SMALL64-NEXT:    mr 6, 3
+; SMALL64-NEXT:    ld 7, L..C4(2)
+; SMALL64-NEXT:    ld 3, L..C5(2)
+; SMALL64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL64-NEXT:    add 3, 3, 7
 ; SMALL64-NEXT:    stw 6, 0(3)
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -212,11 +215,12 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    mr 6, 3
 ; LARGE64-NEXT:    addis 3, L..C4 at u(2)
-; LARGE64-NEXT:    addis 4, L..C5 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 3, L..C4 at l(3)
-; LARGE64-NEXT:    ld 4, L..C5 at l(4)
-; LARGE64-NEXT:    bla .__tls_get_addr[PR]
+; LARGE64-NEXT:    ld 7, L..C4 at l(3)
+; LARGE64-NEXT:    addis 3, L..C5 at u(2)
+; LARGE64-NEXT:    ld 3, L..C5 at l(3)
+; LARGE64-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-NEXT:    add 3, 3, 7
 ; LARGE64-NEXT:    stw 6, 0(3)
 ; LARGE64-NEXT:    addi 1, 1, 48
 ; LARGE64-NEXT:    ld 0, 16(1)
@@ -467,11 +471,12 @@ define i32 @loadsTIUninit() #1 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    lwz 3, L..C4(2)
-; SMALL32-NEXT:    lwz 4, L..C5(2)
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    bla .__tls_get_addr[PR]
+; SMALL32-NEXT:    lwz 6, L..C4(2)
+; SMALL32-NEXT:    lwz 3, L..C5(2)
+; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C8(2)
+; SMALL32-NEXT:    add 3, 3, 6
 ; SMALL32-NEXT:    lwz 3, 0(3)
 ; SMALL32-NEXT:    lwz 4, 0(4)
 ; SMALL32-NEXT:    add 3, 4, 3
@@ -486,10 +491,11 @@ define i32 @loadsTIUninit() #1 {
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    addis 4, L..C5 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
-; LARGE32-NEXT:    lwz 4, L..C5 at l(4)
-; LARGE32-NEXT:    bla .__tls_get_addr[PR]
+; LARGE32-NEXT:    lwz 6, L..C4 at l(3)
+; LARGE32-NEXT:    addis 3, L..C5 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C5 at l(3)
+; LARGE32-NEXT:    bla .__tls_get_mod[PR]
+; LARGE32-NEXT:    add 3, 3, 6
 ; LARGE32-NEXT:    lwz 3, 0(3)
 ; LARGE32-NEXT:    addis 4, L..C8 at u(2)
 ; LARGE32-NEXT:    lwz 4, L..C8 at l(4)
@@ -504,11 +510,12 @@ define i32 @loadsTIUninit() #1 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    ld 3, L..C4(2)
-; SMALL64-NEXT:    ld 4, L..C5(2)
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    bla .__tls_get_addr[PR]
+; SMALL64-NEXT:    ld 6, L..C4(2)
+; SMALL64-NEXT:    ld 3, L..C5(2)
+; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C8(2)
+; SMALL64-NEXT:    add 3, 3, 6
 ; SMALL64-NEXT:    lwz 3, 0(3)
 ; SMALL64-NEXT:    lwz 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
@@ -522,14 +529,15 @@ define i32 @loadsTIUninit() #1 {
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    addis 3, L..C4 at u(2)
-; LARGE64-NEXT:    addis 4, L..C5 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 3, L..C4 at l(3)
-; LARGE64-NEXT:    ld 4, L..C5 at l(4)
-; LARGE64-NEXT:    bla .__tls_get_addr[PR]
+; LARGE64-NEXT:    ld 6, L..C4 at l(3)
+; LARGE64-NEXT:    addis 3, L..C5 at u(2)
+; LARGE64-NEXT:    ld 3, L..C5 at l(3)
+; LARGE64-NEXT:    bla .__tls_get_mod[PR]
 ; LARGE64-NEXT:    addis 4, L..C8 at u(2)
-; LARGE64-NEXT:    lwz 3, 0(3)
+; LARGE64-NEXT:    add 3, 3, 6
 ; LARGE64-NEXT:    ld 4, L..C8 at l(4)
+; LARGE64-NEXT:    lwz 3, 0(3)
 ; LARGE64-NEXT:    lwz 4, 0(4)
 ; LARGE64-NEXT:    add 3, 4, 3
 ; LARGE64-NEXT:    addi 1, 1, 48
@@ -625,12 +633,16 @@ entry:
   ret i32 %add
 }
 
-; External symbol reference checks for .__tls_get_addr
+; External symbol reference checks for .__tls_get_addr/.__tls_get_mod
 
 ; SMALL32: .extern .__tls_get_addr[PR]
+; SMALL32: .extern .__tls_get_mod[PR]
 ; SMALL64: .extern .__tls_get_addr[PR]
+; SMALL64: .extern .__tls_get_mod[PR]
 ; LARGE32: .extern .__tls_get_addr[PR]
+; LARGE32: .extern .__tls_get_mod[PR]
 ; LARGE64: .extern .__tls_get_addr[PR]
+; LARGE64: .extern .__tls_get_mod[PR]
 
 ; TOC entry checks
 
@@ -644,9 +656,10 @@ entry:
 ; SMALL32-LABEL: L..C3:
 ; SMALL32-NEXT:	 .tc TGInit[TC],TGInit[TL]@gd
 ; SMALL32-LABEL: L..C4:
-; SMALL32-NEXT:	 .tc .TIUninit[TC],TIUninit[UL]@m
+; SMALL32-NEXT:	 .tc TIUninit[TC],TIUninit[UL]@ld
 ; SMALL32-LABEL: L..C5:
-; SMALL32-NEXT:	 .tc TIUninit[TC],TIUninit[UL]@gd
+; SMALL32-NEXT:	 .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; SMALL32-NEXT:	 .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
 ; SMALL32-LABEL: L..C6:
 ; SMALL32-NEXT:	 .tc .TWUninit[TC],TWUninit[TL]@m
 ; SMALL32-LABEL: L..C7:
@@ -664,9 +677,10 @@ entry:
 ; LARGE32-LABEL: L..C3:
 ; LARGE32-NEXT:  .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE32-LABEL: L..C4:
-; LARGE32-NEXT:  .tc .TIUninit[TE],TIUninit[UL]@m
+; LARGE32-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
 ; LARGE32-LABEL: L..C5:
-; LARGE32-NEXT:  .tc TIUninit[TE],TIUninit[UL]@gd
+; LARGE32-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; LARGE32-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
 ; LARGE32-LABEL: L..C6:
 ; LARGE32-NEXT:  .tc .TWUninit[TE],TWUninit[TL]@m
 ; LARGE32-LABEL: L..C7:
@@ -684,9 +698,10 @@ entry:
 ; SMALL64-LABEL:  L..C3:
 ; SMALL64-NEXT:   .tc TGInit[TC],TGInit[TL]@gd
 ; SMALL64-LABEL:  L..C4:
-; SMALL64-NEXT:   .tc .TIUninit[TC],TIUninit[UL]@m
+; SMALL64-NEXT:   .tc TIUninit[TC],TIUninit[UL]@ld
 ; SMALL64-LABEL:  L..C5:
-; SMALL64-NEXT:   .tc TIUninit[TC],TIUninit[UL]@gd
+; SMALL64-NEXT:   .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; SMALL64-NEXT:   .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
 ; SMALL64-LABEL:  L..C6:
 ; SMALL64-NEXT:   .tc .TWUninit[TC],TWUninit[TL]@m
 ; SMALL64-LABEL:  L..C7:
@@ -704,9 +719,10 @@ entry:
 ; LARGE64-LABEL:  L..C3:
 ; LARGE64-NEXT:  .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE64-LABEL:  L..C4:
-; LARGE64-NEXT:  .tc .TIUninit[TE],TIUninit[UL]@m
+; LARGE64-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
 ; LARGE64-LABEL:  L..C5:
-; LARGE64-NEXT:  .tc TIUninit[TE],TIUninit[UL]@gd
+; LARGE64-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; LARGE64-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
 ; LARGE64-LABEL:  L..C6:
 ; LARGE64-NEXT:  .tc .TWUninit[TE],TWUninit[TL]@m
 ; LARGE64-LABEL:  L..C7:
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll
index 47813b59ba804..d19b5ad9b5e50 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll
@@ -97,12 +97,13 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    mr 6, 4
-; SMALL32-NEXT:    mr 7, 3
-; SMALL32-NEXT:    lwz 3, L..C2(2)
-; SMALL32-NEXT:    lwz 4, L..C3(2)
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    bla .__tls_get_addr[PR]
+; SMALL32-NEXT:    mr 7, 3
+; SMALL32-NEXT:    lwz 8, L..C2(2)
+; SMALL32-NEXT:    mr 6, 4
+; SMALL32-NEXT:    lwz 3, L..C3(2)
+; SMALL32-NEXT:    bla .__tls_get_mod[PR]
+; SMALL32-NEXT:    add 3, 3, 8
 ; SMALL32-NEXT:    stw 6, 4(3)
 ; SMALL32-NEXT:    stw 7, 0(3)
 ; SMALL32-NEXT:    addi 1, 1, 32
@@ -115,13 +116,14 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; LARGE32-NEXT:    mflr 0
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
-; LARGE32-NEXT:    mr 6, 4
 ; LARGE32-NEXT:    mr 7, 3
+; LARGE32-NEXT:    mr 6, 4
 ; LARGE32-NEXT:    addis 3, L..C2 at u(2)
-; LARGE32-NEXT:    addis 4, L..C3 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C2 at l(3)
-; LARGE32-NEXT:    lwz 4, L..C3 at l(4)
-; LARGE32-NEXT:    bla .__tls_get_addr[PR]
+; LARGE32-NEXT:    lwz 8, L..C2 at l(3)
+; LARGE32-NEXT:    addis 3, L..C3 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C3 at l(3)
+; LARGE32-NEXT:    bla .__tls_get_mod[PR]
+; LARGE32-NEXT:    add 3, 3, 8
 ; LARGE32-NEXT:    stw 6, 4(3)
 ; LARGE32-NEXT:    stw 7, 0(3)
 ; LARGE32-NEXT:    addi 1, 1, 32
@@ -133,11 +135,12 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    mr 6, 3
-; SMALL64-NEXT:    ld 3, L..C2(2)
-; SMALL64-NEXT:    ld 4, L..C3(2)
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    bla .__tls_get_addr[PR]
+; SMALL64-NEXT:    mr 6, 3
+; SMALL64-NEXT:    ld 7, L..C2(2)
+; SMALL64-NEXT:    ld 3, L..C3(2)
+; SMALL64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL64-NEXT:    add 3, 3, 7
 ; SMALL64-NEXT:    std 6, 0(3)
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -150,11 +153,12 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    mr 6, 3
 ; LARGE64-NEXT:    addis 3, L..C2 at u(2)
-; LARGE64-NEXT:    addis 4, L..C3 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 3, L..C2 at l(3)
-; LARGE64-NEXT:    ld 4, L..C3 at l(4)
-; LARGE64-NEXT:    bla .__tls_get_addr[PR]
+; LARGE64-NEXT:    ld 7, L..C2 at l(3)
+; LARGE64-NEXT:    addis 3, L..C3 at u(2)
+; LARGE64-NEXT:    ld 3, L..C3 at l(3)
+; LARGE64-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-NEXT:    add 3, 3, 7
 ; LARGE64-NEXT:    std 6, 0(3)
 ; LARGE64-NEXT:    addi 1, 1, 48
 ; LARGE64-NEXT:    ld 0, 16(1)
@@ -171,12 +175,13 @@ define void @storesTIInit(i64 %Val) #0 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    mr 6, 4
-; SMALL32-NEXT:    mr 7, 3
-; SMALL32-NEXT:    lwz 3, L..C4(2)
-; SMALL32-NEXT:    lwz 4, L..C5(2)
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    bla .__tls_get_addr[PR]
+; SMALL32-NEXT:    mr 7, 3
+; SMALL32-NEXT:    lwz 8, L..C4(2)
+; SMALL32-NEXT:    mr 6, 4
+; SMALL32-NEXT:    lwz 3, L..C3(2)
+; SMALL32-NEXT:    bla .__tls_get_mod[PR]
+; SMALL32-NEXT:    add 3, 3, 8
 ; SMALL32-NEXT:    stw 6, 4(3)
 ; SMALL32-NEXT:    stw 7, 0(3)
 ; SMALL32-NEXT:    addi 1, 1, 32
@@ -189,13 +194,14 @@ define void @storesTIInit(i64 %Val) #0 {
 ; LARGE32-NEXT:    mflr 0
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
-; LARGE32-NEXT:    mr 6, 4
 ; LARGE32-NEXT:    mr 7, 3
+; LARGE32-NEXT:    mr 6, 4
 ; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    addis 4, L..C5 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
-; LARGE32-NEXT:    lwz 4, L..C5 at l(4)
-; LARGE32-NEXT:    bla .__tls_get_addr[PR]
+; LARGE32-NEXT:    lwz 8, L..C4 at l(3)
+; LARGE32-NEXT:    addis 3, L..C3 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C3 at l(3)
+; LARGE32-NEXT:    bla .__tls_get_mod[PR]
+; LARGE32-NEXT:    add 3, 3, 8
 ; LARGE32-NEXT:    stw 6, 4(3)
 ; LARGE32-NEXT:    stw 7, 0(3)
 ; LARGE32-NEXT:    addi 1, 1, 32
@@ -207,11 +213,12 @@ define void @storesTIInit(i64 %Val) #0 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    mr 6, 3
-; SMALL64-NEXT:    ld 3, L..C4(2)
-; SMALL64-NEXT:    ld 4, L..C5(2)
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    bla .__tls_get_addr[PR]
+; SMALL64-NEXT:    mr 6, 3
+; SMALL64-NEXT:    ld 7, L..C4(2)
+; SMALL64-NEXT:    ld 3, L..C3(2)
+; SMALL64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL64-NEXT:    add 3, 3, 7
 ; SMALL64-NEXT:    std 6, 0(3)
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -224,11 +231,12 @@ define void @storesTIInit(i64 %Val) #0 {
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    mr 6, 3
 ; LARGE64-NEXT:    addis 3, L..C4 at u(2)
-; LARGE64-NEXT:    addis 4, L..C5 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 3, L..C4 at l(3)
-; LARGE64-NEXT:    ld 4, L..C5 at l(4)
-; LARGE64-NEXT:    bla .__tls_get_addr[PR]
+; LARGE64-NEXT:    ld 7, L..C4 at l(3)
+; LARGE64-NEXT:    addis 3, L..C3 at u(2)
+; LARGE64-NEXT:    ld 3, L..C3 at l(3)
+; LARGE64-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-NEXT:    add 3, 3, 7
 ; LARGE64-NEXT:    std 6, 0(3)
 ; LARGE64-NEXT:    addi 1, 1, 48
 ; LARGE64-NEXT:    ld 0, 16(1)
@@ -247,8 +255,8 @@ define void @storesTWInit(i64 %Val) #0 {
 ; SMALL32-NEXT:    stwu 1, -32(1)
 ; SMALL32-NEXT:    mr 6, 4
 ; SMALL32-NEXT:    mr 7, 3
-; SMALL32-NEXT:    lwz 3, L..C6(2)
-; SMALL32-NEXT:    lwz 4, L..C7(2)
+; SMALL32-NEXT:    lwz 3, L..C5(2)
+; SMALL32-NEXT:    lwz 4, L..C6(2)
 ; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL32-NEXT:    stw 6, 4(3)
@@ -265,10 +273,10 @@ define void @storesTWInit(i64 %Val) #0 {
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    mr 6, 4
 ; LARGE32-NEXT:    mr 7, 3
-; LARGE32-NEXT:    addis 3, L..C6 at u(2)
-; LARGE32-NEXT:    addis 4, L..C7 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C6 at l(3)
-; LARGE32-NEXT:    lwz 4, L..C7 at l(4)
+; LARGE32-NEXT:    addis 3, L..C5 at u(2)
+; LARGE32-NEXT:    addis 4, L..C6 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C5 at l(3)
+; LARGE32-NEXT:    lwz 4, L..C6 at l(4)
 ; LARGE32-NEXT:    bla .__tls_get_addr[PR]
 ; LARGE32-NEXT:    stw 6, 4(3)
 ; LARGE32-NEXT:    stw 7, 0(3)
@@ -282,8 +290,8 @@ define void @storesTWInit(i64 %Val) #0 {
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
 ; SMALL64-NEXT:    mr 6, 3
-; SMALL64-NEXT:    ld 3, L..C6(2)
-; SMALL64-NEXT:    ld 4, L..C7(2)
+; SMALL64-NEXT:    ld 3, L..C5(2)
+; SMALL64-NEXT:    ld 4, L..C6(2)
 ; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL64-NEXT:    std 6, 0(3)
@@ -297,11 +305,11 @@ define void @storesTWInit(i64 %Val) #0 {
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    mr 6, 3
-; LARGE64-NEXT:    addis 3, L..C6 at u(2)
-; LARGE64-NEXT:    addis 4, L..C7 at u(2)
+; LARGE64-NEXT:    addis 3, L..C5 at u(2)
+; LARGE64-NEXT:    addis 4, L..C6 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 3, L..C6 at l(3)
-; LARGE64-NEXT:    ld 4, L..C7 at l(4)
+; LARGE64-NEXT:    ld 3, L..C5 at l(3)
+; LARGE64-NEXT:    ld 4, L..C6 at l(4)
 ; LARGE64-NEXT:    bla .__tls_get_addr[PR]
 ; LARGE64-NEXT:    std 6, 0(3)
 ; LARGE64-NEXT:    addi 1, 1, 48
@@ -323,7 +331,7 @@ define i64 @loadsTGInit() #1 {
 ; SMALL32-NEXT:    lwz 4, L..C1(2)
 ; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_addr[PR]
-; SMALL32-NEXT:    lwz 4, L..C8(2)
+; SMALL32-NEXT:    lwz 4, L..C7(2)
 ; SMALL32-NEXT:    lwz 5, 4(3)
 ; SMALL32-NEXT:    lwz 6, 4(4)
 ; SMALL32-NEXT:    lwz 3, 0(3)
@@ -347,8 +355,8 @@ define i64 @loadsTGInit() #1 {
 ; LARGE32-NEXT:    bla .__tls_get_addr[PR]
 ; LARGE32-NEXT:    lwz 4, 4(3)
 ; LARGE32-NEXT:    lwz 3, 0(3)
-; LARGE32-NEXT:    addis 5, L..C8 at u(2)
-; LARGE32-NEXT:    lwz 5, L..C8 at l(5)
+; LARGE32-NEXT:    addis 5, L..C7 at u(2)
+; LARGE32-NEXT:    lwz 5, L..C7 at l(5)
 ; LARGE32-NEXT:    lwz 6, 4(5)
 ; LARGE32-NEXT:    lwz 5, 0(5)
 ; LARGE32-NEXT:    addc 4, 6, 4
@@ -366,7 +374,7 @@ define i64 @loadsTGInit() #1 {
 ; SMALL64-NEXT:    ld 4, L..C1(2)
 ; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_addr[PR]
-; SMALL64-NEXT:    ld 4, L..C8(2)
+; SMALL64-NEXT:    ld 4, L..C7(2)
 ; SMALL64-NEXT:    ld 3, 0(3)
 ; SMALL64-NEXT:    ld 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
@@ -385,9 +393,9 @@ define i64 @loadsTGInit() #1 {
 ; LARGE64-NEXT:    ld 3, L..C0 at l(3)
 ; LARGE64-NEXT:    ld 4, L..C1 at l(4)
 ; LARGE64-NEXT:    bla .__tls_get_addr[PR]
-; LARGE64-NEXT:    addis 4, L..C8 at u(2)
+; LARGE64-NEXT:    addis 4, L..C7 at u(2)
 ; LARGE64-NEXT:    ld 3, 0(3)
-; LARGE64-NEXT:    ld 4, L..C8 at l(4)
+; LARGE64-NEXT:    ld 4, L..C7 at l(4)
 ; LARGE64-NEXT:    ld 4, 0(4)
 ; LARGE64-NEXT:    add 3, 4, 3
 ; LARGE64-NEXT:    addi 1, 1, 48
@@ -407,11 +415,12 @@ define i64 @loadsTIUninit() #1 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    lwz 3, L..C2(2)
-; SMALL32-NEXT:    lwz 4, L..C3(2)
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    bla .__tls_get_addr[PR]
-; SMALL32-NEXT:    lwz 4, L..C8(2)
+; SMALL32-NEXT:    lwz 6, L..C2(2)
+; SMALL32-NEXT:    lwz 3, L..C3(2)
+; SMALL32-NEXT:    bla .__tls_get_mod[PR]
+; SMALL32-NEXT:    lwz 4, L..C7(2)
+; SMALL32-NEXT:    add 3, 3, 6
 ; SMALL32-NEXT:    lwz 5, 4(3)
 ; SMALL32-NEXT:    lwz 6, 4(4)
 ; SMALL32-NEXT:    lwz 3, 0(3)
@@ -429,14 +438,15 @@ define i64 @loadsTIUninit() #1 {
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    addis 3, L..C2 at u(2)
-; LARGE32-NEXT:    addis 4, L..C3 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C2 at l(3)
-; LARGE32-NEXT:    lwz 4, L..C3 at l(4)
-; LARGE32-NEXT:    bla .__tls_get_addr[PR]
+; LARGE32-NEXT:    lwz 6, L..C2 at l(3)
+; LARGE32-NEXT:    addis 3, L..C3 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C3 at l(3)
+; LARGE32-NEXT:    bla .__tls_get_mod[PR]
+; LARGE32-NEXT:    add 3, 3, 6
 ; LARGE32-NEXT:    lwz 4, 4(3)
 ; LARGE32-NEXT:    lwz 3, 0(3)
-; LARGE32-NEXT:    addis 5, L..C8 at u(2)
-; LARGE32-NEXT:    lwz 5, L..C8 at l(5)
+; LARGE32-NEXT:    addis 5, L..C7 at u(2)
+; LARGE32-NEXT:    lwz 5, L..C7 at l(5)
 ; LARGE32-NEXT:    lwz 6, 4(5)
 ; LARGE32-NEXT:    lwz 5, 0(5)
 ; LARGE32-NEXT:    addc 4, 6, 4
@@ -450,11 +460,12 @@ define i64 @loadsTIUninit() #1 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    ld 3, L..C2(2)
-; SMALL64-NEXT:    ld 4, L..C3(2)
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    bla .__tls_get_addr[PR]
-; SMALL64-NEXT:    ld 4, L..C8(2)
+; SMALL64-NEXT:    ld 6, L..C2(2)
+; SMALL64-NEXT:    ld 3, L..C3(2)
+; SMALL64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL64-NEXT:    ld 4, L..C7(2)
+; SMALL64-NEXT:    add 3, 3, 6
 ; SMALL64-NEXT:    ld 3, 0(3)
 ; SMALL64-NEXT:    ld 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
@@ -468,14 +479,15 @@ define i64 @loadsTIUninit() #1 {
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    addis 3, L..C2 at u(2)
-; LARGE64-NEXT:    addis 4, L..C3 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 3, L..C2 at l(3)
-; LARGE64-NEXT:    ld 4, L..C3 at l(4)
-; LARGE64-NEXT:    bla .__tls_get_addr[PR]
-; LARGE64-NEXT:    addis 4, L..C8 at u(2)
+; LARGE64-NEXT:    ld 6, L..C2 at l(3)
+; LARGE64-NEXT:    addis 3, L..C3 at u(2)
+; LARGE64-NEXT:    ld 3, L..C3 at l(3)
+; LARGE64-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-NEXT:    addis 4, L..C7 at u(2)
+; LARGE64-NEXT:    add 3, 3, 6
+; LARGE64-NEXT:    ld 4, L..C7 at l(4)
 ; LARGE64-NEXT:    ld 3, 0(3)
-; LARGE64-NEXT:    ld 4, L..C8 at l(4)
 ; LARGE64-NEXT:    ld 4, 0(4)
 ; LARGE64-NEXT:    add 3, 4, 3
 ; LARGE64-NEXT:    addi 1, 1, 48
@@ -495,11 +507,12 @@ define i64 @loadsTIInit() #1 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    lwz 3, L..C4(2)
-; SMALL32-NEXT:    lwz 4, L..C5(2)
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    bla .__tls_get_addr[PR]
-; SMALL32-NEXT:    lwz 4, L..C8(2)
+; SMALL32-NEXT:    lwz 6, L..C4(2)
+; SMALL32-NEXT:    lwz 3, L..C3(2)
+; SMALL32-NEXT:    bla .__tls_get_mod[PR]
+; SMALL32-NEXT:    lwz 4, L..C7(2)
+; SMALL32-NEXT:    add 3, 3, 6
 ; SMALL32-NEXT:    lwz 5, 4(3)
 ; SMALL32-NEXT:    lwz 6, 4(4)
 ; SMALL32-NEXT:    lwz 3, 0(3)
@@ -517,14 +530,15 @@ define i64 @loadsTIInit() #1 {
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    addis 4, L..C5 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
-; LARGE32-NEXT:    lwz 4, L..C5 at l(4)
-; LARGE32-NEXT:    bla .__tls_get_addr[PR]
+; LARGE32-NEXT:    lwz 6, L..C4 at l(3)
+; LARGE32-NEXT:    addis 3, L..C3 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C3 at l(3)
+; LARGE32-NEXT:    bla .__tls_get_mod[PR]
+; LARGE32-NEXT:    add 3, 3, 6
 ; LARGE32-NEXT:    lwz 4, 4(3)
 ; LARGE32-NEXT:    lwz 3, 0(3)
-; LARGE32-NEXT:    addis 5, L..C8 at u(2)
-; LARGE32-NEXT:    lwz 5, L..C8 at l(5)
+; LARGE32-NEXT:    addis 5, L..C7 at u(2)
+; LARGE32-NEXT:    lwz 5, L..C7 at l(5)
 ; LARGE32-NEXT:    lwz 6, 4(5)
 ; LARGE32-NEXT:    lwz 5, 0(5)
 ; LARGE32-NEXT:    addc 4, 6, 4
@@ -538,11 +552,12 @@ define i64 @loadsTIInit() #1 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    ld 3, L..C4(2)
-; SMALL64-NEXT:    ld 4, L..C5(2)
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    bla .__tls_get_addr[PR]
-; SMALL64-NEXT:    ld 4, L..C8(2)
+; SMALL64-NEXT:    ld 6, L..C4(2)
+; SMALL64-NEXT:    ld 3, L..C3(2)
+; SMALL64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL64-NEXT:    ld 4, L..C7(2)
+; SMALL64-NEXT:    add 3, 3, 6
 ; SMALL64-NEXT:    ld 3, 0(3)
 ; SMALL64-NEXT:    ld 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
@@ -556,14 +571,15 @@ define i64 @loadsTIInit() #1 {
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    addis 3, L..C4 at u(2)
-; LARGE64-NEXT:    addis 4, L..C5 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 3, L..C4 at l(3)
-; LARGE64-NEXT:    ld 4, L..C5 at l(4)
-; LARGE64-NEXT:    bla .__tls_get_addr[PR]
-; LARGE64-NEXT:    addis 4, L..C8 at u(2)
+; LARGE64-NEXT:    ld 6, L..C4 at l(3)
+; LARGE64-NEXT:    addis 3, L..C3 at u(2)
+; LARGE64-NEXT:    ld 3, L..C3 at l(3)
+; LARGE64-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-NEXT:    addis 4, L..C7 at u(2)
+; LARGE64-NEXT:    add 3, 3, 6
+; LARGE64-NEXT:    ld 4, L..C7 at l(4)
 ; LARGE64-NEXT:    ld 3, 0(3)
-; LARGE64-NEXT:    ld 4, L..C8 at l(4)
 ; LARGE64-NEXT:    ld 4, 0(4)
 ; LARGE64-NEXT:    add 3, 4, 3
 ; LARGE64-NEXT:    addi 1, 1, 48
@@ -583,11 +599,11 @@ define i64 @loadsTWInit() #1 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    lwz 3, L..C6(2)
-; SMALL32-NEXT:    lwz 4, L..C7(2)
+; SMALL32-NEXT:    lwz 3, L..C5(2)
+; SMALL32-NEXT:    lwz 4, L..C6(2)
 ; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_addr[PR]
-; SMALL32-NEXT:    lwz 4, L..C8(2)
+; SMALL32-NEXT:    lwz 4, L..C7(2)
 ; SMALL32-NEXT:    lwz 5, 4(3)
 ; SMALL32-NEXT:    lwz 6, 4(4)
 ; SMALL32-NEXT:    lwz 3, 0(3)
@@ -604,15 +620,15 @@ define i64 @loadsTWInit() #1 {
 ; LARGE32-NEXT:    mflr 0
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
-; LARGE32-NEXT:    addis 3, L..C6 at u(2)
-; LARGE32-NEXT:    addis 4, L..C7 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C6 at l(3)
-; LARGE32-NEXT:    lwz 4, L..C7 at l(4)
+; LARGE32-NEXT:    addis 3, L..C5 at u(2)
+; LARGE32-NEXT:    addis 4, L..C6 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C5 at l(3)
+; LARGE32-NEXT:    lwz 4, L..C6 at l(4)
 ; LARGE32-NEXT:    bla .__tls_get_addr[PR]
 ; LARGE32-NEXT:    lwz 4, 4(3)
 ; LARGE32-NEXT:    lwz 3, 0(3)
-; LARGE32-NEXT:    addis 5, L..C8 at u(2)
-; LARGE32-NEXT:    lwz 5, L..C8 at l(5)
+; LARGE32-NEXT:    addis 5, L..C7 at u(2)
+; LARGE32-NEXT:    lwz 5, L..C7 at l(5)
 ; LARGE32-NEXT:    lwz 6, 4(5)
 ; LARGE32-NEXT:    lwz 5, 0(5)
 ; LARGE32-NEXT:    addc 4, 6, 4
@@ -626,11 +642,11 @@ define i64 @loadsTWInit() #1 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    ld 3, L..C6(2)
-; SMALL64-NEXT:    ld 4, L..C7(2)
+; SMALL64-NEXT:    ld 3, L..C5(2)
+; SMALL64-NEXT:    ld 4, L..C6(2)
 ; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_addr[PR]
-; SMALL64-NEXT:    ld 4, L..C8(2)
+; SMALL64-NEXT:    ld 4, L..C7(2)
 ; SMALL64-NEXT:    ld 3, 0(3)
 ; SMALL64-NEXT:    ld 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
@@ -643,15 +659,15 @@ define i64 @loadsTWInit() #1 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
-; LARGE64-NEXT:    addis 3, L..C6 at u(2)
-; LARGE64-NEXT:    addis 4, L..C7 at u(2)
+; LARGE64-NEXT:    addis 3, L..C5 at u(2)
+; LARGE64-NEXT:    addis 4, L..C6 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 3, L..C6 at l(3)
-; LARGE64-NEXT:    ld 4, L..C7 at l(4)
+; LARGE64-NEXT:    ld 3, L..C5 at l(3)
+; LARGE64-NEXT:    ld 4, L..C6 at l(4)
 ; LARGE64-NEXT:    bla .__tls_get_addr[PR]
-; LARGE64-NEXT:    addis 4, L..C8 at u(2)
+; LARGE64-NEXT:    addis 4, L..C7 at u(2)
 ; LARGE64-NEXT:    ld 3, 0(3)
-; LARGE64-NEXT:    ld 4, L..C8 at l(4)
+; LARGE64-NEXT:    ld 4, L..C7 at l(4)
 ; LARGE64-NEXT:    ld 4, 0(4)
 ; LARGE64-NEXT:    add 3, 4, 3
 ; LARGE64-NEXT:    addi 1, 1, 48
@@ -665,12 +681,16 @@ entry:
   ret i64 %add
 }
 
-; External symbol reference checks for .__tls_get_addr
+; External symbol reference checks for .__tls_get_addr/.__tls_get_mod
 
 ; SMALL32: .extern .__tls_get_addr[PR]
+; SMALL32: .extern .__tls_get_mod[PR]
 ; SMALL64: .extern .__tls_get_addr[PR]
+; SMALL64: .extern .__tls_get_mod[PR]
 ; LARGE32: .extern .__tls_get_addr[PR]
+; LARGE32: .extern .__tls_get_mod[PR]
 ; LARGE64: .extern .__tls_get_addr[PR]
+; LARGE64: .extern .__tls_get_mod[PR]
 
 ; TOC entry checks
 
@@ -680,18 +700,17 @@ entry:
 ; SMALL32-LABEL:  L..C1:
 ; SMALL32-NEXT:  .tc TGInit[TC],TGInit[TL]@gd
 ; SMALL32-LABEL:  L..C2:
-; SMALL32-NEXT:  .tc .TIUninit[TC],TIUninit[UL]@m
+; SMALL32-NEXT:  .tc TIUninit[TC],TIUninit[UL]@ld
 ; SMALL32-LABEL:  L..C3:
-; SMALL32-NEXT:  .tc TIUninit[TC],TIUninit[UL]@gd
+; SMALL32-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; SMALL32-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
 ; SMALL32-LABEL:  L..C4:
-; SMALL32-NEXT:  .tc .TIInit[TC],TIInit[TL]@m
+; SMALL32-NEXT:  .tc TIInit[TC],TIInit[TL]@ld
 ; SMALL32-LABEL:  L..C5:
-; SMALL32-NEXT:  .tc TIInit[TC],TIInit[TL]@gd
-; SMALL32-LABEL:  L..C6:
 ; SMALL32-NEXT:  .tc .TWInit[TC],TWInit[TL]@m
-; SMALL32-LABEL:  L..C7:
+; SMALL32-LABEL:  L..C6:
 ; SMALL32-NEXT:  .tc TWInit[TC],TWInit[TL]@gd
-; SMALL32-LABEL:  L..C8:
+; SMALL32-LABEL:  L..C7:
 ; SMALL32-NEXT:  .tc GInit[TC],GInit[RW]
 
 ; LARGE32-LABEL:  .toc
@@ -700,18 +719,17 @@ entry:
 ; LARGE32-LABEL:  L..C1:
 ; LARGE32-NEXT:  .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE32-LABEL:  L..C2:
-; LARGE32-NEXT:  .tc .TIUninit[TE],TIUninit[UL]@m
+; LARGE32-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
 ; LARGE32-LABEL:  L..C3:
-; LARGE32-NEXT:  .tc TIUninit[TE],TIUninit[UL]@gd
+; LARGE32-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; LARGE32-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
 ; LARGE32-LABEL:  L..C4:
-; LARGE32-NEXT:  .tc .TIInit[TE],TIInit[TL]@m
+; LARGE32-NEXT:  .tc TIInit[TE],TIInit[TL]@ld
 ; LARGE32-LABEL:  L..C5:
-; LARGE32-NEXT:  .tc TIInit[TE],TIInit[TL]@gd
-; LARGE32-LABEL:  L..C6:
 ; LARGE32-NEXT:  .tc .TWInit[TE],TWInit[TL]@m
-; LARGE32-LABEL:  L..C7:
+; LARGE32-LABEL:  L..C6:
 ; LARGE32-NEXT:  .tc TWInit[TE],TWInit[TL]@gd
-; LARGE32-LABEL:  L..C8:
+; LARGE32-LABEL:  L..C7:
 ; LARGE32-NEXT:  .tc GInit[TE],GInit[RW]
 
 ; SMALL64-LABEL:  .toc
@@ -720,18 +738,17 @@ entry:
 ; SMALL64-LABEL:  L..C1:
 ; SMALL64-NEXT:  .tc TGInit[TC],TGInit[TL]@gd
 ; SMALL64-LABEL:  L..C2:
-; SMALL64-NEXT:  .tc .TIUninit[TC],TIUninit[UL]@m
+; SMALL64-NEXT:  .tc TIUninit[TC],TIUninit[UL]@ld
 ; SMALL64-LABEL:  L..C3:
-; SMALL64-NEXT:  .tc TIUninit[TC],TIUninit[UL]@gd
+; SMALL64-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; SMALL64-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
 ; SMALL64-LABEL:  L..C4:
-; SMALL64-NEXT:  .tc .TIInit[TC],TIInit[TL]@m
+; SMALL64-NEXT:  .tc TIInit[TC],TIInit[TL]@ld
 ; SMALL64-LABEL:  L..C5:
-; SMALL64-NEXT:  .tc TIInit[TC],TIInit[TL]@gd
-; SMALL64-LABEL:  L..C6:
 ; SMALL64-NEXT:  .tc .TWInit[TC],TWInit[TL]@m
-; SMALL64-LABEL:  L..C7:
+; SMALL64-LABEL:  L..C6:
 ; SMALL64-NEXT:  .tc TWInit[TC],TWInit[TL]@gd
-; SMALL64-LABEL:  L..C8:
+; SMALL64-LABEL:  L..C7:
 ; SMALL64-NEXT:  .tc GInit[TC],GInit[RW]
 
 ; LARGE64-LABEL:  .toc
@@ -740,18 +757,17 @@ entry:
 ; LARGE64-LABEL:  L..C1:
 ; LARGE64-NEXT:  .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE64-LABEL:  L..C2:
-; LARGE64-NEXT:  .tc .TIUninit[TE],TIUninit[UL]@m
+; LARGE64-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
 ; LARGE64-LABEL:  L..C3:
-; LARGE64-NEXT:  .tc TIUninit[TE],TIUninit[UL]@gd
+; LARGE64-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; LARGE64-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
 ; LARGE64-LABEL:  L..C4:
-; LARGE64-NEXT:  .tc .TIInit[TE],TIInit[TL]@m
+; LARGE64-NEXT:  .tc TIInit[TE],TIInit[TL]@ld
 ; LARGE64-LABEL:  L..C5:
-; LARGE64-NEXT:  .tc TIInit[TE],TIInit[TL]@gd
-; LARGE64-LABEL:  L..C6:
 ; LARGE64-NEXT:  .tc .TWInit[TE],TWInit[TL]@m
-; LARGE64-LABEL:  L..C7:
+; LARGE64-LABEL:  L..C6:
 ; LARGE64-NEXT:  .tc TWInit[TE],TWInit[TL]@gd
-; LARGE64-LABEL:  L..C8:
+; LARGE64-LABEL:  L..C7:
 ; LARGE64-NEXT:  .tc GInit[TE],GInit[RW]
 
 attributes #0 = { nofree norecurse nounwind willreturn writeonly "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-spe,-vsx" }
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll b/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll
new file mode 100644
index 0000000000000..9faa99b6eb2ba
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll
@@ -0,0 +1,364 @@
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec -mtriple powerpc64-ibm-aix-xcoff \
+; RUN:     --code-model=small < %s | FileCheck %s --check-prefixes=SMALL64,SMALL
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec -mtriple powerpc64-ibm-aix-xcoff \
+; RUN:     --code-model=large < %s | FileCheck %s --check-prefixes=LARGE64,LARGE
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec -mtriple powerpc-ibm-aix-xcoff \
+; RUN:     --code-model=small < %s | FileCheck %s --check-prefixes=SMALL32,SMALL
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec -mtriple powerpc-ibm-aix-xcoff \
+; RUN:     --code-model=large < %s | FileCheck %s --check-prefixes=LARGE32,LARGE
+
+ at TGInit = thread_local(localdynamic) global i32 42, align 4
+ at TGUninit = thread_local(localdynamic) global i32 0, align 4
+ at TIInit = internal thread_local(localdynamic) global i32 42, align 4
+ at TIUninit = internal thread_local(localdynamic) global i32 0, align 4
+ at TWInit = weak thread_local(localdynamic) global i32 42, align 4
+ at TWUninit = weak thread_local(localdynamic) global i32 0, align 4
+
+define i32 @loadTGInit() {
+; SMALL-LABEL:  loadTGInit:
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
+; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL:        bla .__tls_get_mod[PR]
+; SMALL:        add [[TGInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; SMALL:        lwz [[TGInitValR:[0-9]+]], 0([[TGInitAddrR]])
+;
+; LARGE-LABEL:  loadTGInit:
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        add [[TGInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; LARGE:        lwz [[TGInitValR:[0-9]+]], 0([[TGInitAddrR]])
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TGInit)
+  %1 = load i32, ptr %0, align 4
+  ret i32 %1
+}
+
+define void @storeTGInit(i32 noundef signext %i) {
+; SMALL-LABEL:  storeTGInit:
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
+; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL:        bla .__tls_get_mod[PR]
+; SMALL:        add [[TGInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; SMALL:        stw [[TGInitValR:[0-9]+]], 0([[TGInitAddrR]])
+;
+; LARGE-LABEL:  storeTGInit:
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        add [[TGInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; LARGE:        stw [[TGInitValR:[0-9]+]], 0([[TGInitAddrR]])
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TGInit)
+  store i32 %i, ptr %0, align 4
+  ret void
+}
+
+define i32 @loadTGUninit() {
+; SMALL-LABEL:  loadTGUninit:
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
+; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL:        bla .__tls_get_mod[PR]
+; SMALL:        add [[TGUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; SMALL:        lwz [[TGUninitValR:[0-9]+]], 0([[TGUninitAddrR]])
+;
+; LARGE-LABEL:  loadTGUninit:
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        add [[TGUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; LARGE:        lwz [[TGUninitValR:[0-9]+]], 0([[TGUninitAddrR]])
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TGUninit)
+  %1 = load i32, ptr %0, align 4
+  ret i32 %1
+}
+
+define void @storeTGUninit(i32 noundef signext %i) {
+; SMALL-LABEL:  storeTGUninit:
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
+; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL:        bla .__tls_get_mod[PR]
+; SMALL:        add [[TGUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; SMALL:        stw [[TGUninitValR:[0-9]+]], 0([[TGUninitAddrR]])
+;
+; LARGE-LABEL:  storeTGUninit:
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        add [[TGUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; LARGE:        stw [[TGUninitValR:[0-9]+]], 0([[TGUninitAddrR]])
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TGUninit)
+  store i32 %i, ptr %0, align 4
+  ret void
+}
+
+define i32 @loadTIInit() {
+; SMALL-LABEL:  loadTIInit:
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
+; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL:        bla .__tls_get_mod[PR]
+; SMALL:        add [[TIInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; SMALL:        lwz [[TIInitValR:[0-9]+]], 0([[TIInitAddrR]])
+;
+; LARGE-LABEL:  loadTIInit:
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        add [[TIInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; LARGE:        lwz [[TIInitValR:[0-9]+]], 0([[TIInitAddrR]])
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TIInit)
+  %1 = load i32, ptr %0, align 4
+  ret i32 %1
+}
+
+define void @storeTIInit(i32 noundef signext %i) {
+; SMALL-LABEL:  storeTIInit:
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
+; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL:        bla .__tls_get_mod[PR]
+; SMALL:        add [[TIInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; SMALL:        stw [[TIInitValR:[0-9]+]], 0([[TIInitAddrR]])
+;
+; LARGE-LABEL:  storeTIInit:
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        add [[TIInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; LARGE:        stw [[TIInitValR:[0-9]+]], 0([[TIInitAddrR]])
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TIInit)
+  store i32 %i, ptr %0, align 4
+  ret void
+}
+
+define i32 @loadTIUninit() {
+; SMALL-LABEL:  loadTIUninit:
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
+; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL:        bla .__tls_get_mod[PR]
+; SMALL:        add [[TIUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; SMALL:        lwz [[TIUninitValR:[0-9]+]], 0([[TIUninitAddrR]])
+;
+; LARGE-LABEL:  loadTIUninit:
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        add [[TIUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; LARGE:        lwz [[TIUninitValR:[0-9]+]], 0([[TIUninitAddrR]])
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TIUninit)
+  %1 = load i32, ptr %0, align 4
+  ret i32 %1
+}
+
+define void @storeTIUninit(i32 noundef signext %i) {
+; SMALL-LABEL:  storeTIUninit:
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
+; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL:        bla .__tls_get_mod[PR]
+; SMALL:        add [[TIUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; SMALL:        stw [[TIUninitValR:[0-9]+]], 0([[TIUninitAddrR]])
+;
+; LARGE-LABEL:  storeTIUninit:
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        add [[TIUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; LARGE:        stw [[TIUninitValR:[0-9]+]], 0([[TIUninitAddrR]])
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TIUninit)
+  store i32 %i, ptr %0, align 4
+  ret void
+}
+
+define i32 @loadTWInit() {
+; SMALL-LABEL:  loadTWInit:
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
+; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL:        bla .__tls_get_mod[PR]
+; SMALL:        add [[TWInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; SMALL:        lwz [[TWInitValR:[0-9]+]], 0([[TWInitAddrR]])
+;
+; LARGE-LABEL:  loadTWInit:
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        add [[TWInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; LARGE:        lwz [[TWInitValR:[0-9]+]], 0([[TWInitAddrR]])
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TWInit)
+  %1 = load i32, ptr %0, align 4
+  ret i32 %1
+}
+
+define void @storeTWInit(i32 noundef signext %i) {
+; SMALL-LABEL:  storeTWInit:
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
+; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL:        bla .__tls_get_mod[PR]
+; SMALL:        add [[TWInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; SMALL:        stw [[TWInitValR:[0-9]+]], 0([[TWInitAddrR]])
+;
+; LARGE-LABEL:  storeTWInit:
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        add [[TWInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; LARGE:        stw [[TWInitValR:[0-9]+]], 0([[TWInitAddrR]])
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TWInit)
+  store i32 %i, ptr %0, align 4
+  ret void
+}
+
+define i32 @loadTWUninit() {
+; SMALL-LABEL:  loadTWUninit:
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
+; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL:        bla .__tls_get_mod[PR]
+; SMALL:        add [[TWUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; SMALL:        lwz [[TWUninitValR:[0-9]+]], 0([[TWUninitAddrR]])
+;
+; LARGE-LABEL:  loadTWUninit:
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        add [[TWUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; LARGE:        lwz [[TWUninitValR:[0-9]+]], 0([[TWUninitAddrR]])
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TWUninit)
+  %1 = load i32, ptr %0, align 4
+  ret i32 %1
+}
+
+define void @storeTWUninit(i32 noundef signext %i) {
+; SMALL-LABEL:  storeTWUninit:
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
+; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL:        bla .__tls_get_mod[PR]
+; SMALL:        add [[TWUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; SMALL:        stw [[TWUninitValR:[0-9]+]], 0([[TWUninitAddrR]])
+;
+; LARGE-LABEL:  storeTWUninit:
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        add [[TWUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; LARGE:        stw [[TWUninitValR:[0-9]+]], 0([[TWUninitAddrR]])
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TWUninit)
+  store i32 %i, ptr %0, align 4
+  ret void
+}
+
+; SMALL:          .extern .__tls_get_mod[PR]
+; LARGE:          .extern .__tls_get_mod[PR]
+
+; SMALL:        [[TGInitL]]:
+; SMALL-NEXT:   .tc TGInit[TC],TGInit[TL]@ld
+; SMALL:        [[ModuleHandleL]]:
+; SMALL-NEXT:   .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; SMALL-NEXT:   .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; SMALL:        [[TGUninitL]]:
+; SMALL-NEXT:   .tc TGUninit[TC],TGUninit[TL]@ld
+; SMALL:        [[TIInitL]]:
+; SMALL-NEXT:   .tc TIInit[TC],TIInit[TL]@ld
+; SMALL:        [[TIUninitL]]:
+; SMALL-NEXT:   .tc TIUninit[TC],TIUninit[UL]@ld
+; SMALL:        [[TWInitL]]:
+; SMALL-NEXT:   .tc TWInit[TC],TWInit[TL]@ld
+; SMALL:        [[TWUninitL]]:
+; SMALL-NEXT:   .tc TWUninit[TC],TWUninit[TL]@ld
+
+; LARGE:        [[TGInitL]]:
+; LARGE-NEXT:   .tc TGInit[TE],TGInit[TL]@ld
+; LARGE:        [[ModuleHandleL]]:
+; LARGE-NEXT:   .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; LARGE-NEXT:   .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; LARGE:        [[TGUninitL]]:
+; LARGE-NEXT:   .tc TGUninit[TE],TGUninit[TL]@ld
+; LARGE:        [[TIInitL]]:
+; LARGE-NEXT:   .tc TIInit[TE],TIInit[TL]@ld
+; LARGE:        [[TIUninitL]]:
+; LARGE-NEXT:   .tc TIUninit[TE],TIUninit[UL]@ld
+; LARGE:        [[TWInitL]]:
+; LARGE-NEXT:   .tc TWInit[TE],TWInit[TL]@ld
+; LARGE:        [[TWUninitL]]:
+; LARGE-NEXT:   .tc TWUninit[TE],TWUninit[TL]@ld
+
+declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull)
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll
index 2dae8ee96e20d..3e9f28965bbe3 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll
@@ -5,6 +5,7 @@
 ; RUN: llvm-objdump -D -r --symbol-description %t.o | FileCheck --check-prefix=DIS %s
 
 @GInit = global double 1.000000e+00, align 8
+; @TIInit is local-dynamic indeed
 @TIInit = internal thread_local global i64 1, align 8
 @TWInit = weak thread_local global double 1.000000e+00, align 8
 
@@ -32,7 +33,7 @@ entry:
 ; RELOC-NEXT:   Section (index: 1) .text {
 ; RELOC-NEXT:   Relocation {
 ; RELOC-NEXT:     Virtual Address: 0x16
-; RELOC-NEXT:     Symbol: .TIInit (17)
+; RELOC-NEXT:     Symbol: TIInit (19)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
@@ -44,19 +45,19 @@ entry:
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
-; RELOC-NEXT:     Type: R_TOCU (0x30)
+; RELOC-NEXT:     Type: R_TOCL (0x31)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
 ; RELOC-NEXT:     Virtual Address: 0x1E
-; RELOC-NEXT:     Symbol: .TIInit (17)
+; RELOC-NEXT:     Symbol: _$TLSML (21)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
-; RELOC-NEXT:     Type: R_TOCL (0x31)
+; RELOC-NEXT:     Type: R_TOCU (0x30)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
 ; RELOC-NEXT:     Virtual Address: 0x22
-; RELOC-NEXT:     Symbol: TIInit (19)
+; RELOC-NEXT:     Symbol: _$TLSML (21)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
@@ -64,63 +65,63 @@ entry:
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
 ; RELOC-NEXT:     Virtual Address: 0x24
-; RELOC-NEXT:     Symbol: .__tls_get_addr (1)
+; RELOC-NEXT:     Symbol: .__tls_get_mod (1)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 26
 ; RELOC-NEXT:     Type: R_RBA (0x18)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x4E
-; RELOC-NEXT:     Symbol: .TWInit (21)
+; RELOC-NEXT:     Virtual Address: 0x5E
+; RELOC-NEXT:     Symbol: .TWInit (23)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
 ; RELOC-NEXT:     Type: R_TOCU (0x30)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x52
-; RELOC-NEXT:     Symbol: TWInit (23)
+; RELOC-NEXT:     Virtual Address: 0x62
+; RELOC-NEXT:     Symbol: TWInit (25)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
 ; RELOC-NEXT:     Type: R_TOCU (0x30)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x56
-; RELOC-NEXT:     Symbol: .TWInit (21)
+; RELOC-NEXT:     Virtual Address: 0x66
+; RELOC-NEXT:     Symbol: .TWInit (23)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
 ; RELOC-NEXT:     Type: R_TOCL (0x31)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x5A
-; RELOC-NEXT:     Symbol: TWInit (23)
+; RELOC-NEXT:     Virtual Address: 0x6A
+; RELOC-NEXT:     Symbol: TWInit (25)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
 ; RELOC-NEXT:     Type: R_TOCL (0x31)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x5C
-; RELOC-NEXT:     Symbol: .__tls_get_addr (1)
+; RELOC-NEXT:     Virtual Address: 0x6C
+; RELOC-NEXT:     Symbol: .__tls_get_addr (3)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 26
 ; RELOC-NEXT:     Type: R_RBA (0x18)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x66
-; RELOC-NEXT:     Symbol: GInit (25)
+; RELOC-NEXT:     Virtual Address: 0x76
+; RELOC-NEXT:     Symbol: GInit (27)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
 ; RELOC-NEXT:     Type: R_TOCU (0x30)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x6A
-; RELOC-NEXT:     Symbol: GInit (25)
+; RELOC-NEXT:     Virtual Address: 0x7A
+; RELOC-NEXT:     Symbol: GInit (27)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
@@ -129,72 +130,72 @@ entry:
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Section (index: 2) .data {
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x90
-; RELOC-NEXT:   Symbol: .storesTIInit (5)
+; RELOC-NEXT:   Virtual Address: 0xA0
+; RELOC-NEXT:   Symbol: .storesTIInit (7)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x94
-; RELOC-NEXT:   Symbol: TOC (15)
+; RELOC-NEXT:   Virtual Address: 0xA4
+; RELOC-NEXT:   Symbol: TOC (17)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x9C
-; RELOC-NEXT:   Symbol: .loadsTWInit (7)
+; RELOC-NEXT:   Virtual Address: 0xAC
+; RELOC-NEXT:   Symbol: .loadsTWInit (9)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xA0
-; RELOC-NEXT:   Symbol: TOC (15)
+; RELOC-NEXT:   Virtual Address: 0xB0
+; RELOC-NEXT:   Symbol: TOC (17)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xA8
-; RELOC-NEXT:   Symbol: TIInit (27)
+; RELOC-NEXT:   Virtual Address: 0xB8
+; RELOC-NEXT:   Symbol: TIInit (29)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
-; RELOC-NEXT:   Type: R_TLSM (0x24)
+; RELOC-NEXT:   Type: R_TLS_LD (0x22)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xAC
-; RELOC-NEXT:   Symbol: TIInit (27)
+; RELOC-NEXT:   Virtual Address: 0xBC
+; RELOC-NEXT:   Symbol: _$TLSML (21)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
-; RELOC-NEXT:   Type: R_TLS (0x20)
+; RELOC-NEXT:   Type: R_TLSML (0x25)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xB0
-; RELOC-NEXT:   Symbol: TWInit (29)
+; RELOC-NEXT:   Virtual Address: 0xC0
+; RELOC-NEXT:   Symbol: TWInit (31)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
 ; RELOC-NEXT:   Type: R_TLSM (0x24)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xB4
-; RELOC-NEXT:   Symbol: TWInit (29)
+; RELOC-NEXT:   Virtual Address: 0xC4
+; RELOC-NEXT:   Symbol: TWInit (31)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
 ; RELOC-NEXT:   Type: R_TLS (0x20)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xB8
-; RELOC-NEXT:   Symbol: GInit (9)
+; RELOC-NEXT:   Virtual Address: 0xC8
+; RELOC-NEXT:   Symbol: GInit (11)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
@@ -220,7 +221,7 @@ entry:
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 1
-; SYM-NEXT:     Name: .__tls_get_addr
+; SYM-NEXT:     Name: .__tls_get_mod
 ; SYM-NEXT:     Value (RelocatableAddress): 0x0
 ; SYM-NEXT:     Section: N_UNDEF
 ; SYM-NEXT:     Type: 0x0
@@ -240,15 +241,35 @@ entry:
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 3
-; SYM-NEXT:     Name:
+; SYM-NEXT:     Name: .__tls_get_addr
+; SYM-NEXT:     Value (RelocatableAddress): 0x0
+; SYM-NEXT:     Section: N_UNDEF
+; SYM-NEXT:     Type: 0x0
+; SYM-NEXT:     StorageClass: C_EXT (0x2)
+; SYM-NEXT:     NumberOfAuxEntries: 1
+; SYM-NEXT:     CSECT Auxiliary Entry {
+; SYM-NEXT:       Index: 4
+; SYM-NEXT:       SectionLen: 0
+; SYM-NEXT:       ParameterHashIndex: 0x0
+; SYM-NEXT:       TypeChkSectNum: 0x0
+; SYM-NEXT:       SymbolAlignmentLog2: 0
+; SYM-NEXT:       SymbolType: XTY_ER (0x0)
+; SYM-NEXT:       StorageMappingClass: XMC_PR (0x0)
+; SYM-NEXT:       StabInfoIndex: 0x0
+; SYM-NEXT:       StabSectNum: 0x0
+; SYM-NEXT:     }
+; SYM-NEXT:   }
+; SYM-NEXT:   Symbol {
+; SYM-NEXT:     Index: 5
+; SYM-NEXT:     Name: 
 ; SYM-NEXT:     Value (RelocatableAddress): 0x0
 ; SYM-NEXT:     Section: .text
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 4
-; SYM-NEXT:       SectionLen: 132
+; SYM-NEXT:       Index: 6
+; SYM-NEXT:       SectionLen: 148
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 5
@@ -259,7 +280,7 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 5
+; SYM-NEXT:     Index: 7
 ; SYM-NEXT:     Name: .storesTIInit
 ; SYM-NEXT:     Value (RelocatableAddress): 0x0
 ; SYM-NEXT:     Section: .text
@@ -267,8 +288,8 @@ entry:
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 6
-; SYM-NEXT:       ContainingCsectSymbolIndex: 3
+; SYM-NEXT:       Index: 8
+; SYM-NEXT:       ContainingCsectSymbolIndex: 5
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 0
@@ -279,16 +300,16 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 7
+; SYM-NEXT:     Index: 9
 ; SYM-NEXT:     Name: .loadsTWInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x40
+; SYM-NEXT:     Value (RelocatableAddress): 0x50
 ; SYM-NEXT:     Section: .text
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 8
-; SYM-NEXT:       ContainingCsectSymbolIndex: 3
+; SYM-NEXT:       Index: 10
+; SYM-NEXT:       ContainingCsectSymbolIndex: 5
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 0
@@ -299,15 +320,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 9
+; SYM-NEXT:     Index: 11
 ; SYM-NEXT:     Name: GInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x88
+; SYM-NEXT:     Value (RelocatableAddress): 0x98
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 10
+; SYM-NEXT:       Index: 12
 ; SYM-NEXT:       SectionLen: 8
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -319,15 +340,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 11
+; SYM-NEXT:     Index: 13
 ; SYM-NEXT:     Name: storesTIInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x90
+; SYM-NEXT:     Value (RelocatableAddress): 0xA0
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 12
+; SYM-NEXT:       Index: 14
 ; SYM-NEXT:       SectionLen: 12
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -339,15 +360,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 13
+; SYM-NEXT:     Index: 15
 ; SYM-NEXT:     Name: loadsTWInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x9C
+; SYM-NEXT:     Value (RelocatableAddress): 0xAC
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 14
+; SYM-NEXT:       Index: 16
 ; SYM-NEXT:       SectionLen: 12
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -359,15 +380,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 15
+; SYM-NEXT:     Index: 17
 ; SYM-NEXT:     Name: TOC
-; SYM-NEXT:     Value (RelocatableAddress): 0xA8
+; SYM-NEXT:     Value (RelocatableAddress): 0xB8
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 16
+; SYM-NEXT:       Index: 18
 ; SYM-NEXT:       SectionLen: 0
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -379,15 +400,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 17
-; SYM-NEXT:     Name: .TIInit
-; SYM-NEXT:     Value (RelocatableAddress): 0xA8
+; SYM-NEXT:     Index: 19
+; SYM-NEXT:     Name: TIInit
+; SYM-NEXT:     Value (RelocatableAddress): 0xB8
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 18
+; SYM-NEXT:       Index: 20
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -399,35 +420,35 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 19
-; SYM-NEXT:     Name: TIInit
-; SYM-NEXT:     Value (RelocatableAddress): 0xAC
+; SYM-NEXT:     Index: 21
+; SYM-NEXT:     Name: _$TLSML
+; SYM-NEXT:     Value (RelocatableAddress): 0xBC
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 20
+; SYM-NEXT:       Index: 22
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 2
 ; SYM-NEXT:       SymbolType: XTY_SD (0x1)
-; SYM-NEXT:       StorageMappingClass: XMC_TE (0x16)
+; SYM-NEXT:       StorageMappingClass: XMC_TC (0x3)
 ; SYM-NEXT:       StabInfoIndex: 0x0
 ; SYM-NEXT:       StabSectNum: 0x0
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 21
+; SYM-NEXT:     Index: 23
 ; SYM-NEXT:     Name: .TWInit
-; SYM-NEXT:     Value (RelocatableAddress): 0xB0
+; SYM-NEXT:     Value (RelocatableAddress): 0xC0
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 22
+; SYM-NEXT:       Index: 24
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -439,15 +460,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 23
+; SYM-NEXT:     Index: 25
 ; SYM-NEXT:     Name: TWInit
-; SYM-NEXT:     Value (RelocatableAddress): 0xB4
+; SYM-NEXT:     Value (RelocatableAddress): 0xC4
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 24
+; SYM-NEXT:       Index: 26
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -459,15 +480,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 25
+; SYM-NEXT:     Index: 27
 ; SYM-NEXT:     Name: GInit
-; SYM-NEXT:     Value (RelocatableAddress): 0xB8
+; SYM-NEXT:     Value (RelocatableAddress): 0xC8
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 26
+; SYM-NEXT:       Index: 28
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -479,7 +500,7 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 27
+; SYM-NEXT:     Index: 29
 ; SYM-NEXT:     Name: TIInit
 ; SYM-NEXT:     Value (RelocatableAddress): 0x0
 ; SYM-NEXT:     Section: .tdata
@@ -487,7 +508,7 @@ entry:
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 28
+; SYM-NEXT:       Index: 30
 ; SYM-NEXT:       SectionLen: 8
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -499,7 +520,7 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 29
+; SYM-NEXT:     Index: 31
 ; SYM-NEXT:     Name: TWInit
 ; SYM-NEXT:     Value (RelocatableAddress): 0x8
 ; SYM-NEXT:     Section: .tdata
@@ -507,7 +528,7 @@ entry:
 ; SYM-NEXT:     StorageClass: C_WEAKEXT (0x6F)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 30
+; SYM-NEXT:       Index: 32
 ; SYM-NEXT:       SectionLen: 8
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -522,47 +543,48 @@ entry:
 
 ; DIS:      {{.*}}aix-tls-xcoff-reloc-large.ll.tmp.o:	file format aixcoff-rs6000
 ; DIS:      Disassembly of section .text:
-; DIS:      00000000 (idx: 5) .storesTIInit:
+; DIS:      00000000 (idx: 7) .storesTIInit:
 ; DIS-NEXT:                                       mflr 0
 ; DIS-NEXT:                                       stwu 1, -32(1)
 ; DIS-NEXT:                                       stw 0, 40(1)
-; DIS-NEXT:                                       mr 6, 4
 ; DIS-NEXT:                                       mr 7, 3
+; DIS-NEXT:                                       mr 6, 4
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 17) .TIInit[TE]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 4, 2, 0
 ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 19) TIInit[TE]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 3, 0(3)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 17) .TIInit[TE]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 4, 4(4)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 8, 0(3)
 ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 19) TIInit[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 21) _$TLSML[TC]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 3, 4(3)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 21) _$TLSML[TC]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                bla 0
-; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA  (idx: 1)      .__tls_get_addr[PR]
+; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA  (idx: 1)      .__tls_get_mod[PR]
+; DIS-NEXT:                                       add 3, 3, 8
 ; DIS-NEXT:                                       stw 6, 4(3)
 ; DIS-NEXT:                                       stw 7, 0(3)
 ; DIS-NEXT:                                       addi 1, 1, 32
 ; DIS-NEXT:                                       lwz 0, 8(1)
 ; DIS-NEXT:                                       mtlr 0
 ; DIS-NEXT:                                       blr
-; DIS:      00000040 (idx: 7) .loadsTWInit:
+; DIS:      00000050 (idx: 9) .loadsTWInit:
 ; DIS-NEXT:                                       mflr 0
 ; DIS-NEXT:                                       stwu 1, -32(1)
 ; DIS-NEXT:                                       stw 0, 40(1)
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 21) .TWInit[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 23) .TWInit[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 4, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 23) TWInit[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 25) TWInit[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 3, 8(3)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 21) .TWInit[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 23) .TWInit[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 4, 12(4)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 23) TWInit[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 25) TWInit[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                bla 0
-; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA  (idx: 1)      .__tls_get_addr[PR]
+; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA  (idx: 3)      .__tls_get_addr[PR]
 ; DIS-NEXT:                                       lfd 0, 0(3)
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 25) GInit[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 27) GInit[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 3, 16(3)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 25) GInit[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 27) GInit[TE]
 ; DIS-NEXT:                                       lfd 1, 0(3)
 ; DIS-NEXT:                                       fadd 1, 0, 1
 ; DIS-NEXT:                                       addi 1, 1, 32
@@ -571,42 +593,42 @@ entry:
 ; DIS-NEXT:                                       blr
 
 ; DIS:      Disassembly of section .data:
-; DIS:      00000088  (idx: 9) GInit[RW]:
-; DIS-NEXT:       88: 3f f0 00 00
-; DIS-NEXT:       8c: 00 00 00 00
-; DIS:      00000090  (idx: 11) storesTIInit[DS]:
-; DIS-NEXT:       90: 00 00 00 00
-; DIS-NEXT: 00000090: R_POS (idx: 5) .storesTIInit
-; DIS-NEXT:       94: 00 00 00 a8
-; DIS-NEXT: 00000094: R_POS (idx: 15) TOC[TC0]
-; DIS-NEXT:       98: 00 00 00 00
-; DIS:      0000009c  (idx: 13) loadsTWInit[DS]:
-; DIS-NEXT:       9c: 00 00 00 40
-; DIS-NEXT: 0000009c: R_POS (idx: 7) .loadsTWInit
-; DIS-NEXT:       a0: 00 00 00 a8
-; DIS-NEXT: 000000a0: R_POS (idx: 15) TOC[TC0]
-; DIS-NEXT:       a4: 00 00 00 00
-; DIS:      000000a8  (idx: 17) .TIInit[TE]:
+; DIS:      00000098  (idx: 11) GInit[RW]:
+; DIS-NEXT:       98: 3f f0 00 00
+; DIS-NEXT:       9c: 00 00 00 00
+; DIS:      000000a0  (idx: 13) storesTIInit[DS]:
+; DIS-NEXT:       a0: 00 00 00 00
+; DIS-NEXT: 000000a0: R_POS (idx: 7) .storesTIInit
+; DIS-NEXT:       a4: 00 00 00 b8
+; DIS-NEXT: 000000a4: R_POS (idx: 17) TOC[TC0]
 ; DIS-NEXT:       a8: 00 00 00 00
-; DIS-NEXT: 000000a8: R_TLSM (idx: 27) TIInit[TL]
-; DIS:      000000ac  (idx: 19) TIInit[TE]:
-; DIS-NEXT:       ac: 00 00 00 00
-; DIS-NEXT: 000000ac: R_TLS (idx: 27) TIInit[TL]
-; DIS:      000000b0  (idx: 21) .TWInit[TE]:
-; DIS-NEXT:       b0: 00 00 00 00
-; DIS-NEXT: 000000b0: R_TLSM (idx: 29) TWInit[TL]
-; DIS:      000000b4  (idx: 23) TWInit[TE]:
-; DIS-NEXT:       b4: 00 00 00 08
-; DIS-NEXT: 000000b4: R_TLS (idx: 29) TWInit[TL]
-; DIS:      000000b8  (idx: 25) GInit[TE]:
-; DIS-NEXT:       b8: 00 00 00 88
-; DIS-NEXT: 000000b8: R_POS (idx: 9) GInit[RW]
+; DIS:      000000ac  (idx: 15) loadsTWInit[DS]:
+; DIS-NEXT:       ac: 00 00 00 50
+; DIS-NEXT: 000000ac: R_POS (idx: 9) .loadsTWInit
+; DIS-NEXT:       b0: 00 00 00 b8
+; DIS-NEXT: 000000b0: R_POS (idx: 17) TOC[TC0]
+; DIS-NEXT:       b4: 00 00 00 00
+; DIS:      000000b8  (idx: 19) TIInit[TE]:
+; DIS-NEXT:       b8: 00 00 00 00
+; DIS-NEXT: 000000b8: R_TLS_LD (idx: 29) TIInit[TL]
+; DIS:      000000bc  (idx: 21) _$TLSML[TC]:
+; DIS-NEXT:       bc: 00 00 00 00
+; DIS-NEXT: 000000bc: R_TLSML (idx: 21) _$TLSML[TC]
+; DIS:      000000c0  (idx: 23) .TWInit[TE]:
+; DIS-NEXT:       c0: 00 00 00 00
+; DIS-NEXT: 000000c0: R_TLSM (idx: 31) TWInit[TL]
+; DIS:      000000c4  (idx: 25) TWInit[TE]:
+; DIS-NEXT:       c4: 00 00 00 08
+; DIS-NEXT: 000000c4: R_TLS (idx: 31) TWInit[TL]
+; DIS:      000000c8  (idx: 27) GInit[TE]:
+; DIS-NEXT:       c8: 00 00 00 98
+; DIS-NEXT: 000000c8: R_POS (idx: 11) GInit[RW]
 
 ; DIS:      Disassembly of section .tdata:
-; DIS:      00000000  (idx: 27) TIInit[TL]:
+; DIS:      00000000  (idx: 29) TIInit[TL]:
 ; DIS-NEXT:        0: 00 00 00 00
 ; DIS-NEXT:        4: 00 00 00 01
-; DIS:      00000008  (idx: 29) TWInit[TL]:
+; DIS:      00000008  (idx: 31) TWInit[TL]:
 ; DIS-NEXT:        8: 3f f0 00 00
 ; DIS-NEXT:        c: 00 00 00 00
 
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
index 0779686b54f3a..ae1dae7955914 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
@@ -7,6 +7,7 @@
 @const_ivar = constant i32 6, align 4
 @GInit = global i32 1, align 4
 @TGInit = thread_local global i32 1, align 4
+; @TIUninit is local-dynamic indeed
 @TIUninit = internal thread_local global i32 0, align 4
 
 ; Function Attrs: nofree norecurse nounwind willreturn writeonly
@@ -32,16 +33,16 @@ entry:
 ; RELOC-NEXT: Relocations [
 ; RELOC-NEXT:   Section (index: 1) .text {
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0xE
-; RELOC-NEXT:     Symbol: .TIUninit (23)
+; RELOC-NEXT:     Virtual Address: 0x12
+; RELOC-NEXT:     Symbol: TIUninit (25)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
 ; RELOC-NEXT:     Type: R_TOC (0x3)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x12
-; RELOC-NEXT:     Symbol: TIUninit (25)
+; RELOC-NEXT:     Virtual Address: 0x16
+; RELOC-NEXT:     Symbol: _$TLSML (27)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
@@ -49,39 +50,39 @@ entry:
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
 ; RELOC-NEXT:     Virtual Address: 0x18
-; RELOC-NEXT:     Symbol: .__tls_get_addr (1)
+; RELOC-NEXT:     Symbol: .__tls_get_mod (1)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 26
 ; RELOC-NEXT:     Type: R_RBA (0x18)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x3A
-; RELOC-NEXT:     Symbol: .TGInit (27)
+; RELOC-NEXT:     Virtual Address: 0x4A
+; RELOC-NEXT:     Symbol: .TGInit (29)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
 ; RELOC-NEXT:     Type: R_TOC (0x3)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x3E
-; RELOC-NEXT:     Symbol: TGInit (29)
+; RELOC-NEXT:     Virtual Address: 0x4E
+; RELOC-NEXT:     Symbol: TGInit (31)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
 ; RELOC-NEXT:     Type: R_TOC (0x3)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x44
-; RELOC-NEXT:     Symbol: .__tls_get_addr (1)
+; RELOC-NEXT:     Virtual Address: 0x54
+; RELOC-NEXT:     Symbol: .__tls_get_addr (3)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 26
 ; RELOC-NEXT:     Type: R_RBA (0x18)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x4A
-; RELOC-NEXT:     Symbol: GInit (31)
+; RELOC-NEXT:     Virtual Address: 0x5A
+; RELOC-NEXT:     Symbol: GInit (33)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
@@ -90,72 +91,72 @@ entry:
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Section (index: 2) .data {
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x70
-; RELOC-NEXT:   Symbol: .storesTIUninit (5)
+; RELOC-NEXT:   Virtual Address: 0x80
+; RELOC-NEXT:   Symbol: .storesTIUninit (7)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x74
-; RELOC-NEXT:   Symbol: TOC (21)
+; RELOC-NEXT:   Virtual Address: 0x84
+; RELOC-NEXT:   Symbol: TOC (23)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x7C
-; RELOC-NEXT:   Symbol: .loadsTGInit (7)
+; RELOC-NEXT:   Virtual Address: 0x8C
+; RELOC-NEXT:   Symbol: .loadsTGInit (9)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x80
-; RELOC-NEXT:   Symbol: TOC (21)
+; RELOC-NEXT:   Virtual Address: 0x90
+; RELOC-NEXT:   Symbol: TOC (23)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x88
-; RELOC-NEXT:   Symbol: TIUninit (37)
+; RELOC-NEXT:   Virtual Address: 0x98
+; RELOC-NEXT:   Symbol: TIUninit (39)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
-; RELOC-NEXT:   Type: R_TLSM (0x24)
+; RELOC-NEXT:   Type: R_TLS_LD (0x22)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x8C
-; RELOC-NEXT:   Symbol: TIUninit (37)
+; RELOC-NEXT:   Virtual Address: 0x9C
+; RELOC-NEXT:   Symbol: _$TLSML (27)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
-; RELOC-NEXT:   Type: R_TLS (0x20)
+; RELOC-NEXT:   Type: R_TLSML (0x25)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x90
-; RELOC-NEXT:   Symbol: TGInit (35)
+; RELOC-NEXT:   Virtual Address: 0xA0
+; RELOC-NEXT:   Symbol: TGInit (37)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
 ; RELOC-NEXT:   Type: R_TLSM (0x24)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x94
-; RELOC-NEXT:   Symbol: TGInit (35)
+; RELOC-NEXT:   Virtual Address: 0xA4
+; RELOC-NEXT:   Symbol: TGInit (37)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
 ; RELOC-NEXT:   Type: R_TLS (0x20)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x98
-; RELOC-NEXT:   Symbol: GInit (15)
+; RELOC-NEXT:   Virtual Address: 0xA8
+; RELOC-NEXT:   Symbol: GInit (17)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
@@ -181,7 +182,7 @@ entry:
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 1
-; SYM-NEXT:     Name: .__tls_get_addr
+; SYM-NEXT:     Name: .__tls_get_mod
 ; SYM-NEXT:     Value (RelocatableAddress): 0x0
 ; SYM-NEXT:     Section: N_UNDEF
 ; SYM-NEXT:     Type: 0x0
@@ -201,15 +202,35 @@ entry:
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 3
-; SYM-NEXT:     Name:
+; SYM-NEXT:     Name: .__tls_get_addr
+; SYM-NEXT:     Value (RelocatableAddress): 0x0
+; SYM-NEXT:     Section: N_UNDEF
+; SYM-NEXT:     Type: 0x0
+; SYM-NEXT:     StorageClass: C_EXT (0x2)
+; SYM-NEXT:     NumberOfAuxEntries: 1
+; SYM-NEXT:     CSECT Auxiliary Entry {
+; SYM-NEXT:       Index: 4
+; SYM-NEXT:       SectionLen: 0
+; SYM-NEXT:       ParameterHashIndex: 0x0
+; SYM-NEXT:       TypeChkSectNum: 0x0
+; SYM-NEXT:       SymbolAlignmentLog2: 0
+; SYM-NEXT:       SymbolType: XTY_ER (0x0)
+; SYM-NEXT:       StorageMappingClass: XMC_PR (0x0)
+; SYM-NEXT:       StabInfoIndex: 0x0
+; SYM-NEXT:       StabSectNum: 0x0
+; SYM-NEXT:     }
+; SYM-NEXT:   }
+; SYM-NEXT:   Symbol {
+; SYM-NEXT:     Index: 5
+; SYM-NEXT:     Name: 
 ; SYM-NEXT:     Value (RelocatableAddress): 0x0
 ; SYM-NEXT:     Section: .text
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 4
-; SYM-NEXT:       SectionLen: 104
+; SYM-NEXT:       Index: 6
+; SYM-NEXT:       SectionLen: 120
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 5
@@ -220,7 +241,7 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 5
+; SYM-NEXT:     Index: 7
 ; SYM-NEXT:     Name: .storesTIUninit
 ; SYM-NEXT:     Value (RelocatableAddress): 0x0
 ; SYM-NEXT:     Section: .text
@@ -228,8 +249,8 @@ entry:
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 6
-; SYM-NEXT:       ContainingCsectSymbolIndex: 3
+; SYM-NEXT:       Index: 8
+; SYM-NEXT:       ContainingCsectSymbolIndex: 5
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 0
@@ -240,16 +261,16 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 7
+; SYM-NEXT:     Index: 9
 ; SYM-NEXT:     Name: .loadsTGInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x30
+; SYM-NEXT:     Value (RelocatableAddress): 0x40
 ; SYM-NEXT:     Section: .text
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 8
-; SYM-NEXT:       ContainingCsectSymbolIndex: 3
+; SYM-NEXT:       Index: 10
+; SYM-NEXT:       ContainingCsectSymbolIndex: 5
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 0
@@ -260,15 +281,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 9
+; SYM-NEXT:     Index: 11
 ; SYM-NEXT:     Name: .rodata
-; SYM-NEXT:     Value (RelocatableAddress): 0x68
+; SYM-NEXT:     Value (RelocatableAddress): 0x78
 ; SYM-NEXT:     Section: .text
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 10
+; SYM-NEXT:       Index: 12
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -280,16 +301,16 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 11
+; SYM-NEXT:     Index: 13
 ; SYM-NEXT:     Name: const_ivar
-; SYM-NEXT:     Value (RelocatableAddress): 0x68
+; SYM-NEXT:     Value (RelocatableAddress): 0x78
 ; SYM-NEXT:     Section: .text
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 12
-; SYM-NEXT:       ContainingCsectSymbolIndex: 9
+; SYM-NEXT:       Index: 14
+; SYM-NEXT:       ContainingCsectSymbolIndex: 11
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 0
@@ -300,15 +321,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 13
+; SYM-NEXT:     Index: 15
 ; SYM-NEXT:     Name: .data
-; SYM-NEXT:     Value (RelocatableAddress): 0x6C
+; SYM-NEXT:     Value (RelocatableAddress): 0x7C
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 14
+; SYM-NEXT:       Index: 16
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -320,16 +341,16 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 15
+; SYM-NEXT:     Index: 17
 ; SYM-NEXT:     Name: GInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x6C
+; SYM-NEXT:     Value (RelocatableAddress): 0x7C
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 16
-; SYM-NEXT:       ContainingCsectSymbolIndex: 13
+; SYM-NEXT:       Index: 18
+; SYM-NEXT:       ContainingCsectSymbolIndex: 15
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 0
@@ -340,15 +361,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 17
+; SYM-NEXT:     Index: 19
 ; SYM-NEXT:     Name: storesTIUninit
-; SYM-NEXT:     Value (RelocatableAddress): 0x70
+; SYM-NEXT:     Value (RelocatableAddress): 0x80
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 18
+; SYM-NEXT:       Index: 20
 ; SYM-NEXT:       SectionLen: 12
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -360,15 +381,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 19
+; SYM-NEXT:     Index: 21
 ; SYM-NEXT:     Name: loadsTGInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x7C
+; SYM-NEXT:     Value (RelocatableAddress): 0x8C
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 20
+; SYM-NEXT:       Index: 22
 ; SYM-NEXT:       SectionLen: 12
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -380,15 +401,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 21
+; SYM-NEXT:     Index: 23
 ; SYM-NEXT:     Name: TOC
-; SYM-NEXT:     Value (RelocatableAddress): 0x88
+; SYM-NEXT:     Value (RelocatableAddress): 0x98
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 22
+; SYM-NEXT:       Index: 24
 ; SYM-NEXT:       SectionLen: 0
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -400,15 +421,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 23
-; SYM-NEXT:     Name: .TIUninit
-; SYM-NEXT:     Value (RelocatableAddress): 0x88
+; SYM-NEXT:     Index: 25
+; SYM-NEXT:     Name: TIUninit
+; SYM-NEXT:     Value (RelocatableAddress): 0x98
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 24
+; SYM-NEXT:       Index: 26
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -420,15 +441,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 25
-; SYM-NEXT:     Name: TIUninit
-; SYM-NEXT:     Value (RelocatableAddress): 0x8C
+; SYM-NEXT:     Index: 27
+; SYM-NEXT:     Name: _$TLSML
+; SYM-NEXT:     Value (RelocatableAddress): 0x9C
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 26
+; SYM-NEXT:       Index: 28
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -440,15 +461,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 27
+; SYM-NEXT:     Index: 29
 ; SYM-NEXT:     Name: .TGInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x90
+; SYM-NEXT:     Value (RelocatableAddress): 0xA0
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 28
+; SYM-NEXT:       Index: 30
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -460,15 +481,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 29
+; SYM-NEXT:     Index: 31
 ; SYM-NEXT:     Name: TGInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x94
+; SYM-NEXT:     Value (RelocatableAddress): 0xA4
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 30
+; SYM-NEXT:       Index: 32
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -480,15 +501,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 31
+; SYM-NEXT:     Index: 33
 ; SYM-NEXT:     Name: GInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x98
+; SYM-NEXT:     Value (RelocatableAddress): 0xA8
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 32
+; SYM-NEXT:       Index: 34
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -500,7 +521,7 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 33
+; SYM-NEXT:     Index: 35
 ; SYM-NEXT:     Name: .tdata
 ; SYM-NEXT:     Value (RelocatableAddress): 0x0
 ; SYM-NEXT:     Section: .tdata
@@ -508,7 +529,7 @@ entry:
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 34
+; SYM-NEXT:       Index: 36
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -520,7 +541,7 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 35
+; SYM-NEXT:     Index: 37
 ; SYM-NEXT:     Name: TGInit
 ; SYM-NEXT:     Value (RelocatableAddress): 0x0
 ; SYM-NEXT:     Section: .tdata
@@ -528,8 +549,8 @@ entry:
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 36
-; SYM-NEXT:       ContainingCsectSymbolIndex: 33
+; SYM-NEXT:       Index: 38
+; SYM-NEXT:       ContainingCsectSymbolIndex: 35
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 0
@@ -540,7 +561,7 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 37
+; SYM-NEXT:     Index: 39
 ; SYM-NEXT:     Name: TIUninit
 ; SYM-NEXT:     Value (RelocatableAddress): 0x4
 ; SYM-NEXT:     Section: .tbss
@@ -548,7 +569,7 @@ entry:
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 38
+; SYM-NEXT:       Index: 40
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -563,34 +584,35 @@ entry:
 
 ; DIS:      {{.*}}aix-tls-xcoff-reloc.ll.tmp.o:	file format aixcoff-rs6000
 ; DIS:      Disassembly of section .text:
-; DIS:      00000000 (idx: 5) .storesTIUninit:
+; DIS:      00000000 (idx: 7) .storesTIUninit:
 ; DIS-NEXT:                                      mflr 0
 ; DIS-NEXT:                                      stwu 1, -32(1)
+; DIS-NEXT:                                      stw 0, 40(1)
 ; DIS-NEXT:                                      mr 6, 3
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 3, 0(2)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 23) .TIUninit[TC]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 4, 4(2)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 7, 0(2)
 ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 25) TIUninit[TC]
-; DIS-NEXT:                                      stw 0, 40(1)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 3, 4(2)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 27) _$TLSML[TC]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               bla 0
-; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA (idx: 1)      .__tls_get_addr[PR]
+; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA (idx: 1)      .__tls_get_mod[PR]
+; DIS-NEXT:                                      add 3, 3, 7
 ; DIS-NEXT:                                      stw 6, 0(3)
 ; DIS-NEXT:                                      addi 1, 1, 32
 ; DIS-NEXT:                                      lwz 0, 8(1)
 ; DIS-NEXT:                                      mtlr 0
 ; DIS-NEXT:                                      blr
-; DIS:      00000030 (idx: 7) .loadsTGInit:
+; DIS:      00000040 (idx: 9) .loadsTGInit:
 ; DIS-NEXT:                                      mflr 0
 ; DIS-NEXT:                                      stwu 1, -32(1)
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 3, 8(2)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 27) .TGInit[TC]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 29) .TGInit[TC]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 4, 12(2)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 29) TGInit[TC]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 31) TGInit[TC]
 ; DIS-NEXT:                                      stw 0, 40(1)
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               bla 0
-; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA (idx: 1)      .__tls_get_addr[PR]
+; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA (idx: 3)      .__tls_get_addr[PR]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 4, 16(2)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 31) GInit[TC]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 33) GInit[TC]
 ; DIS-NEXT:                                      lwz 3, 0(3)
 ; DIS-NEXT:                                      lwz 4, 0(4)
 ; DIS-NEXT:                                      add 3, 4, 3
@@ -598,46 +620,46 @@ entry:
 ; DIS-NEXT:                                      lwz 0, 8(1)
 ; DIS-NEXT:                                      mtlr 0
 ; DIS-NEXT:                                      blr
-; DIS:      00000068 (idx: 11) const_ivar:
-; DIS-NEXT:       68: 00 00 00 06
+; DIS:      00000078 (idx: 13) const_ivar:
+; DIS-NEXT:       78: 00 00 00 06
 
 ; DIS:      Disassembly of section .data:
-; DIS:      0000006c  (idx: 15) GInit:
-; DIS-NEXT:       6c: 00 00 00 01
-; DIS:      00000070  (idx: 17) storesTIUninit[DS]:
-; DIS-NEXT:       70: 00 00 00 00
-; DIS-NEXT: 00000070: R_POS (idx: 5) .storesTIUninit
-; DIS-NEXT:       74: 00 00 00 88
-; DIS-NEXT: 00000074: R_POS (idx: 21) TOC[TC0]
-; DIS-NEXT:       78: 00 00 00 00
-; DIS:      0000007c  (idx: 19) loadsTGInit[DS]:
-; DIS-NEXT:       7c: 00 00 00 30
-; DIS-NEXT: 0000007c: R_POS (idx: 7) .loadsTGInit
-; DIS-NEXT:       80: 00 00 00 88
-; DIS-NEXT: 00000080: R_POS (idx: 21) TOC[TC0]
-; DIS-NEXT:       84: 00 00 00 00
-; DIS:      00000088  (idx: 23) .TIUninit[TC]:
+; DIS:      0000007c  (idx: 17) GInit:
+; DIS-NEXT:       7c: 00 00 00 01
+; DIS:      00000080  (idx: 19) storesTIUninit[DS]:
+; DIS-NEXT:       80: 00 00 00 00
+; DIS-NEXT: 00000080: R_POS (idx: 7) .storesTIUninit
+; DIS-NEXT:       84: 00 00 00 98
+; DIS-NEXT: 00000084: R_POS (idx: 23) TOC[TC0]
 ; DIS-NEXT:       88: 00 00 00 00
-; DIS-NEXT: 00000088: R_TLSM (idx: 37) TIUninit[UL]
-; DIS:      0000008c  (idx: 25) TIUninit[TC]:
-; DIS-NEXT:       8c: 00 00 00 04
-; DIS-NEXT: 0000008c: R_TLS (idx: 37) TIUninit[UL]
-; DIS:      00000090  (idx: 27) .TGInit[TC]:
-; DIS-NEXT:       90: 00 00 00 00
-; DIS-NEXT: 00000090: R_TLSM (idx: 35) TGInit
-; DIS:      00000094  (idx: 29) TGInit[TC]:
+; DIS:      0000008c  (idx: 21) loadsTGInit[DS]:
+; DIS-NEXT:       8c: 00 00 00 40
+; DIS-NEXT: 0000008c: R_POS (idx: 9) .loadsTGInit
+; DIS-NEXT:       90: 00 00 00 98
+; DIS-NEXT: 00000090: R_POS (idx: 23) TOC[TC0]
 ; DIS-NEXT:       94: 00 00 00 00
-; DIS-NEXT: 00000094: R_TLS (idx: 35) TGInit
-; DIS:      00000098  (idx: 31) GInit[TC]:
-; DIS-NEXT:       98: 00 00 00 6c
-; DIS-NEXT: 00000098: R_POS (idx: 15) GInit
+; DIS:      00000098  (idx: 25) TIUninit[TC]:
+; DIS-NEXT:       98: 00 00 00 00
+; DIS-NEXT: 00000098: R_TLS_LD (idx: 39) TIUninit[UL]
+; DIS:      0000009c  (idx: 27) _$TLSML[TC]:
+; DIS-NEXT:       9c: 00 00 00 00
+; DIS-NEXT: 0000009c: R_TLSML (idx: 27) _$TLSML[TC]
+; DIS:      000000a0  (idx: 29) .TGInit[TC]:
+; DIS-NEXT:       a0: 00 00 00 00
+; DIS-NEXT: 000000a0: R_TLSM (idx: 37) TGInit
+; DIS:      000000a4  (idx: 31) TGInit[TC]:
+; DIS-NEXT:       a4: 00 00 00 00
+; DIS-NEXT: 000000a4: R_TLS (idx: 37) TGInit
+; DIS:      000000a8  (idx: 33) GInit[TC]:
+; DIS-NEXT:       a8: 00 00 00 7c
+; DIS-NEXT: 000000a8: R_POS (idx: 17) GInit
 
 ; DIS:      Disassembly of section .tdata:
-; DIS:      00000000 (idx: 35) TGInit:
+; DIS:      00000000 (idx: 37) TGInit:
 ; DIS-NEXT:        0: 00 00 00 01
 
 ; DIS:      Disassembly of section .tbss:
-; DIS:      00000004 (idx: 37) TIUninit[UL]:
+; DIS:      00000004 (idx: 39) TIUninit[UL]:
 ; DIS-NEXT: ...
 
 attributes #0 = { nofree norecurse nounwind willreturn writeonly "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-spe,-vsx" }

>From b55ae47c6725c0443ad480ecb17fa83830b54e48 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Thu, 14 Sep 2023 03:46:38 -0400
Subject: [PATCH 02/21] address comments

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   |  4 ++--
 llvm/lib/Target/PowerPC/PPCISelLowering.h     | 12 ++++++------
 llvm/lib/Target/PowerPC/PPCInstrInfo.td       |  2 +-
 llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp | 12 +++++++-----
 4 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 9d0430ea2e515..87be6cc1f1be1 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1772,8 +1772,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
   case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
   case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
-  case PPCISD::GET_TLS_MOD:
-    return "PPCISD::GET_TLS_MOD";
+  case PPCISD::GET_TLS_MOD_AIX:
+    return "PPCISD::GET_TLS_MOD_AIX";
   case PPCISD::GET_TPOINTER:    return "PPCISD::GET_TPOINTER";
   case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
   case PPCISD::TLSGD_AIX:       return "PPCISD::TLSGD_AIX";
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 7f80f70d699a6..3aaedb3efc0bd 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -375,16 +375,16 @@ namespace llvm {
     /// G8RC and inputs are X3/X4).
     TLSGD_AIX,
 
-    /// %x3 = GET_TLS_MOD _$TLSML - For the AIX local-dynamic TLS model,
+    /// %x3 = GET_TLS_MOD_AIX _$TLSML - For the AIX local-dynamic TLS model,
     /// produces a call to __tls_get_mod(_$TLSML\@ml).
-    GET_TLS_MOD,
+    GET_TLS_MOD_AIX,
 
     /// [GP|G8]RC = TLSLD_AIX, TOC_ENTRY(variable offset)
     /// Op that internally creates TOC entry for the "_$TLSML" symbol, generates
-    /// GET_TLS_MOD node which will be expanded into a call to __tls_get_mod,
-    /// and then add the variable offset with the result from the call.
-    /// This node is used in both 32-bit and 64-bit modes. The only difference
-    /// is register class.
+    /// GET_TLS_MOD_AIX node which will be expanded into a call to
+    /// __tls_get_mod, and then add the variable offset with the result from the
+    /// call. This node is used in both 32-bit and 64-bit modes. The only
+    /// difference is register class.
     TLSLD_AIX,
 
     /// G8RC = ADDIS_TLSLD_HA %x2, Symbol - For the local-dynamic TLS
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 7dedf17adae70..c829b34f130b4 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -213,7 +213,7 @@ def PPCaddTls     : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>;
 def PPCaddisTlsgdHA : SDNode<"PPCISD::ADDIS_TLSGD_HA", SDTIntBinOp>;
 def PPCaddiTlsgdL   : SDNode<"PPCISD::ADDI_TLSGD_L", SDTIntBinOp>;
 def PPCgetTlsAddr   : SDNode<"PPCISD::GET_TLS_ADDR", SDTIntBinOp>;
-def PPCgetTlsMod   : SDNode<"PPCISD::GET_TLS_MOD", SDTIntUnaryOp>;
+def PPCgetTlsMod   : SDNode<"PPCISD::GET_TLS_MOD_AIX", SDTIntUnaryOp>;
 def PPCgetTpointer : SDNode<"PPCISD::GET_TPOINTER", SDTIntLeaf, []>;
 def PPCaddiTlsgdLAddr : SDNode<"PPCISD::ADDI_TLSGD_L_ADDR",
                                SDTypeProfile<1, 3, [
diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 32a75a32b9d87..339c8d4ad383e 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -160,8 +160,10 @@ namespace {
         if (IsAIX) {
           if (MI.getOpcode() == PPC::TLSLDAIX8 ||
               MI.getOpcode() == PPC::TLSLDAIX) {
-            // For Local-Dynamic
-            auto &Subtarget = MBB.getParent()->getSubtarget<PPCSubtarget>();
+            // For Local-Dynamic, the module handle is copied in r3. The copy is
+            // followed by GETtlsMOD32AIX/GETtlsMOD64AIX.
+            const PPCSubtarget &Subtarget =
+                MBB.getParent()->getSubtarget<PPCSubtarget>();
             bool IsLargeModel =
                 Subtarget.getTargetMachine().getCodeModel() == CodeModel::Large;
             Register ModuleHandleHReg;
@@ -190,9 +192,9 @@ namespace {
                 .addReg(GPR3)
                 .addReg(MI.getOperand(1).getReg());
           } else {
-            // For Global-Dynamic
-            // The variable offset and region handle are copied in r4 and r3.
-            // The copies are followed by GETtlsADDR32AIX/GETtlsADDR64AIX.
+            // For Global-Dynamic, the variable offset and region handle are
+            // copied in r4 and r3. The copies are followed by
+            // GETtlsADDR32AIX/GETtlsADDR64AIX.
             if (!IsTLSTPRelMI) {
               BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR4)
                   .addReg(MI.getOperand(1).getReg());

>From 1282f6120b68887ebe65b15226e34df6b0cf19d3 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Mon, 18 Sep 2023 04:03:32 -0400
Subject: [PATCH 03/21] [NFC] address comments.

---
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp     | 9 +++++----
 llvm/lib/Target/PowerPC/PPCISelLowering.h     | 8 ++++----
 llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp | 2 +-
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 1c197521f409a..6211abd5ad8c4 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -677,10 +677,11 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
     // in R3, need to generate branch to .__tls_get_mod.
     Register VarOffsetReg = Subtarget->isPPC64() ? PPC::X4 : PPC::R4;
     (void)VarOffsetReg;
-    assert((MI->getNumExplicitOperands() < 3 ||
-            (MI->getOperand(2).isReg() &&
-             MI->getOperand(2).getReg() == VarOffsetReg)) &&
-           "GETtls[ld]ADDR[32] must read GPR4");
+    assert(MI->getOpcode() == PPC::GETtlsMOD32AIX ||
+           MI->getOpcode() == PPC::GETtlsMOD64AIX ||
+           (MI->getOperand(2).isReg() &&
+            MI->getOperand(2).getReg() == VarOffsetReg) &&
+               "GETtls[ld]ADDR[32] must read GPR4");
     EmitAIXTlsCallHelper(MI);
     return;
   }
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 3aaedb3efc0bd..fed03835fd99a 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -370,20 +370,20 @@ namespace llvm {
     /// G8RC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY
     /// Op that combines two register copies of TOC entries
     /// (region handle into R3 and variable offset into R4) followed by a
-    /// GET_TLS_ADDR node which will be expanded to a call to __tls_get_addr.
+    /// GET_TLS_ADDR node which will be expanded to a call to .__tls_get_addr.
     /// This node is used in 64-bit mode as well (in which case the result is
     /// G8RC and inputs are X3/X4).
     TLSGD_AIX,
 
     /// %x3 = GET_TLS_MOD_AIX _$TLSML - For the AIX local-dynamic TLS model,
-    /// produces a call to __tls_get_mod(_$TLSML\@ml).
+    /// produces a call to .__tls_get_mod(_$TLSML\@ml).
     GET_TLS_MOD_AIX,
 
     /// [GP|G8]RC = TLSLD_AIX, TOC_ENTRY(variable offset)
     /// Op that internally creates TOC entry for the "_$TLSML" symbol, generates
     /// GET_TLS_MOD_AIX node which will be expanded into a call to
-    /// __tls_get_mod, and then add the variable offset with the result from the
-    /// call. This node is used in both 32-bit and 64-bit modes. The only
+    /// .__tls_get_mod, and then add the variable offset with the result from
+    /// the call. This node is used in both 32-bit and 64-bit modes. The only
     /// difference is register class.
     TLSLD_AIX,
 
diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 339c8d4ad383e..3fd57f4159d3c 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -186,7 +186,7 @@ namespace {
                             : Register(Subtarget.getTOCPointerRegister()));
             BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR3)
                 .addReg(MHReg);
-            // The call to __tls_get_mod.
+            // The call to .__tls_get_mod.
             BuildMI(MBB, I, DL, TII->get(Opc2), GPR3).addReg(GPR3);
             BuildMI(MBB, I, DL, TII->get(Is64Bit ? PPC::ADD8 : PPC::ADD4), GPR3)
                 .addReg(GPR3)

>From 06d0508f571ded1c2a01d80bad553623f0d09235 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Wed, 20 Sep 2023 03:44:10 -0400
Subject: [PATCH 04/21] [NFC] address comments

---
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp   | 18 +++++++++---------
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 14 ++++++--------
 2 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 6211abd5ad8c4..dc2adc352b01d 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -671,17 +671,17 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
          "GETtls[ld]ADDR[32] must read GPR3");
 
   if (Subtarget->isAIXABI()) {
-    // On AIX, for TLSGD the variable offset should already be in R4 and the
-    // region handle should already be in R3, need to generate an absolute
-    // branch to .__tls_get_addr. For TLSLD the module handle should already be
-    // in R3, need to generate branch to .__tls_get_mod.
+    // For TLSGD, the variable offset should already be in R4 and the region
+    // handle should already be in R3, generate absolute branch to
+    // .__tls_get_addr. For TLSLD, the module handle should already be in R3,
+    // generate branch to .__tls_get_mod.
     Register VarOffsetReg = Subtarget->isPPC64() ? PPC::X4 : PPC::R4;
     (void)VarOffsetReg;
-    assert(MI->getOpcode() == PPC::GETtlsMOD32AIX ||
-           MI->getOpcode() == PPC::GETtlsMOD64AIX ||
-           (MI->getOperand(2).isReg() &&
-            MI->getOperand(2).getReg() == VarOffsetReg) &&
-               "GETtls[ld]ADDR[32] must read GPR4");
+    assert((MI->getOpcode() == PPC::GETtlsMOD32AIX ||
+            MI->getOpcode() == PPC::GETtlsMOD64AIX ||
+            (MI->getOperand(2).isReg() &&
+             MI->getOperand(2).getReg() == VarOffsetReg)) &&
+           "GETtls[ld]ADDR[32] must read GPR4");
     EmitAIXTlsCallHelper(MI);
     return;
   }
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 87be6cc1f1be1..8f6c57ae746c2 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -3428,14 +3428,12 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
     return DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, VariableOffset);
   }
 
-  // The Local-Exec, Initial-Exec, Local-Dynamic, and General-Dynamic TLS models
-  // are currently supported access models. If Local- or Initial-exec or
-  // local-dynamic is not possible or specified, all GlobalTLSAddress nodes are
-  // lowered using the general-dynamic model. We need to generate two TOC
-  // entries, one for the variable offset, one for the region handle. The global
-  // address for the TOC entry of the region handle is created with the
-  // MO_TLSGDM_FLAG flag and the global address for the TOC entry of the
-  // variable offset is created with MO_TLSGD_FLAG.
+  // If Local- or Initial-exec or Local-dynamic is not possible or specified,
+  // all GlobalTLSAddress nodes are lowered using the general-dynamic model. We
+  // need to generate two TOC entries, one for the variable offset, one for the
+  // region handle. The global address for the TOC entry of the region handle is
+  // created with the MO_TLSGDM_FLAG flag and the global address for the TOC
+  // entry of the variable offset is created with MO_TLSGD_FLAG.
   SDValue VariableOffsetTGA =
       DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGD_FLAG);
   SDValue RegionHandleTGA =

>From 1c66ebf8086cde1f26ecd033456e25fbafc0e9da Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Wed, 20 Sep 2023 23:09:46 -0400
Subject: [PATCH 05/21] [NFC] address comment

---
 llvm/lib/Target/PowerPC/PPCInstr64Bit.td | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index e4fee9c2fff23..a3590580777f3 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -1563,7 +1563,7 @@ def GETtlsADDR64AIX :
                     "GETtlsADDR64AIX",
                     [(set i64:$rD,
                       (PPCgetTlsAddr i64:$offset, i64:$handle))]>, isPPC64;
-// On AIX, the call to __tls_get_mod need one input in X3 for the module handle.
+// On AIX, the call to .__tls_get_mod need one input in X3 for the module handle.
 def GETtlsMOD64AIX :
   PPCEmitTimePseudo<(outs g8rc:$rD),(ins g8rc:$handle),
                     "GETtlsMOD64AIX",

>From 9d4ba3aab211f23c99e6c0f588481eec3bb4620c Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Thu, 21 Sep 2023 23:03:58 -0400
Subject: [PATCH 06/21] Attempt to address comment: use r4 for LoadOffsetToc

However looks like machine-scheduler is interfering, and still schedule
LoadOffsetToc ahead of the .__tls_get_mod call.
---
 llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp |  40 ++++-
 .../test/CodeGen/PowerPC/aix-tls-gd-double.ll |  78 +++++----
 llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll   |  78 +++++----
 .../CodeGen/PowerPC/aix-tls-gd-longlong.ll    | 162 +++++++++---------
 .../CodeGen/PowerPC/aix-tls-local-dynamic.ll  | 128 +++++++-------
 .../PowerPC/aix-tls-xcoff-reloc-large.ll      |  49 +++---
 .../CodeGen/PowerPC/aix-tls-xcoff-reloc.ll    |  48 +++---
 7 files changed, 308 insertions(+), 275 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 3fd57f4159d3c..bb03f8f911c49 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -160,37 +160,61 @@ namespace {
         if (IsAIX) {
           if (MI.getOpcode() == PPC::TLSLDAIX8 ||
               MI.getOpcode() == PPC::TLSLDAIX) {
-            // For Local-Dynamic, the module handle is copied in r3. The copy is
-            // followed by GETtlsMOD32AIX/GETtlsMOD64AIX.
+            // For Local-Dynamic, need to swap the position of VarOffsetInst and
+            // MI, so that VarOffsetInst can use R/X4 to reduce register
+            // pressure.
             const PPCSubtarget &Subtarget =
                 MBB.getParent()->getSubtarget<PPCSubtarget>();
             bool IsLargeModel =
                 Subtarget.getTargetMachine().getCodeModel() == CodeModel::Large;
-            Register ModuleHandleHReg;
             unsigned LDTocOp =
                 Is64Bit ? (IsLargeModel ? PPC::LDtocL : PPC::LDtoc)
                         : (IsLargeModel ? PPC::LWZtocL : PPC::LWZtoc);
+            assert(RegInfo.hasOneDef(MI.getOperand(1).getReg()) &&
+                   "TLSLDAIX expects single def of its operand.");
+            MachineInstr *VarOffsetInst =
+                RegInfo.getOneDef(MI.getOperand(1).getReg())->getParent();
+            assert(VarOffsetInst->getOpcode() == LDTocOp &&
+                   "Unexpected LDTocOp.");
+            if (IsLargeModel) {
+              // Get the ADDIS instruction when using large model.
+              assert(RegInfo.hasOneDef(VarOffsetInst->getOperand(2).getReg()) &&
+                     "LDTocOp expects single def of its operand.");
+              VarOffsetInst =
+                  RegInfo.getOneDef(VarOffsetInst->getOperand(2).getReg())
+                      ->getParent();
+              assert(VarOffsetInst->getOpcode() ==
+                         (Is64Bit ? PPC::ADDIStocHA8 : PPC::ADDIStocHA) &&
+                     "Unexpected ADDIStocHA.");
+              // FIXME: machine-scheduler could schedule ADDIStocHA ahead of
+              // GETtlsMODAIX, and still has to use extra register.
+            }
+            Register ModuleHandleHReg;
             if (IsLargeModel) {
               ModuleHandleHReg = RegInfo.createVirtualRegister(GPRNoZero);
-              BuildMI(MBB, I, DL,
+              BuildMI(MBB, *VarOffsetInst, DL,
                       TII->get(Is64Bit ? PPC::ADDIStocHA8 : PPC::ADDIStocHA),
                       ModuleHandleHReg)
                   .addReg(Subtarget.getTOCPointerRegister())
                   .addExternalSymbol("_$TLSML[TC]", PPCII::MO_TLSLD_FLAG);
             }
             Register MHReg = RegInfo.createVirtualRegister(GPRNoZero);
-            BuildMI(MBB, I, DL, TII->get(LDTocOp), MHReg)
+            BuildMI(MBB, *VarOffsetInst, DL, TII->get(LDTocOp), MHReg)
                 .addExternalSymbol("_$TLSML[TC]", PPCII::MO_TLSLD_FLAG)
                 .addReg(IsLargeModel
                             ? ModuleHandleHReg
                             : Register(Subtarget.getTOCPointerRegister()));
-            BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR3)
+            // The module handle is copied in r3.
+            BuildMI(MBB, *VarOffsetInst, DL, TII->get(TargetOpcode::COPY), GPR3)
                 .addReg(MHReg);
             // The call to .__tls_get_mod.
-            BuildMI(MBB, I, DL, TII->get(Opc2), GPR3).addReg(GPR3);
+            BuildMI(MBB, *VarOffsetInst, DL, TII->get(Opc2), GPR3).addReg(GPR3);
+            // Copy VarOffset to R/X4
+            BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR4)
+                .addReg(MI.getOperand(1).getReg());
             BuildMI(MBB, I, DL, TII->get(Is64Bit ? PPC::ADD8 : PPC::ADD4), GPR3)
                 .addReg(GPR3)
-                .addReg(MI.getOperand(1).getReg());
+                .addReg(GPR4);
           } else {
             // For Global-Dynamic, the variable offset and region handle are
             // copied in r4 and r3. The copies are followed by
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll
index 13bef83cf50fd..ea1a2b8fe9ade 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll
@@ -157,10 +157,10 @@ define void @storesTIInit(double %Val) #0 {
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    lwz 6, L..C4(2) # target-flags(ppc-tlsld) @TIInit
-; SMALL32-NEXT:    lwz 3, L..C5(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
+; SMALL32-NEXT:    lwz 3, L..C4(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
-; SMALL32-NEXT:    add 3, 3, 6
+; SMALL32-NEXT:    lwz 4, L..C5(2) # target-flags(ppc-tlsld) @TIInit
+; SMALL32-NEXT:    add 3, 3, 4
 ; SMALL32-NEXT:    stfd 1, 0(3)
 ; SMALL32-NEXT:    addi 1, 1, 32
 ; SMALL32-NEXT:    lwz 0, 8(1)
@@ -173,11 +173,12 @@ define void @storesTIInit(double %Val) #0 {
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    lwz 6, L..C4 at l(3)
-; LARGE32-NEXT:    addis 3, L..C5 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C5 at l(3)
+; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
+; LARGE32-NEXT:    addis 4, L..C5 at u(2)
+; LARGE32-NEXT:    lwz 6, L..C5 at l(4)
 ; LARGE32-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-NEXT:    add 3, 3, 6
+; LARGE32-NEXT:    mr 4, 6
+; LARGE32-NEXT:    add 3, 3, 4
 ; LARGE32-NEXT:    stfd 1, 0(3)
 ; LARGE32-NEXT:    addi 1, 1, 32
 ; LARGE32-NEXT:    lwz 0, 8(1)
@@ -189,10 +190,10 @@ define void @storesTIInit(double %Val) #0 {
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    ld 6, L..C4(2) # target-flags(ppc-tlsld) @TIInit
-; SMALL64-NEXT:    ld 3, L..C5(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
+; SMALL64-NEXT:    ld 3, L..C4(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL64-NEXT:    add 3, 3, 6
+; SMALL64-NEXT:    ld 4, L..C5(2) # target-flags(ppc-tlsld) @TIInit
+; SMALL64-NEXT:    add 3, 3, 4
 ; SMALL64-NEXT:    stfd 1, 0(3)
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -203,13 +204,13 @@ define void @storesTIInit(double %Val) #0 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
-; LARGE64-NEXT:    addis 3, L..C4 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 6, L..C4 at l(3)
-; LARGE64-NEXT:    addis 3, L..C5 at u(2)
-; LARGE64-NEXT:    ld 3, L..C5 at l(3)
+; LARGE64-NEXT:    addis 3, L..C4 at u(2)
+; LARGE64-NEXT:    addis 6, L..C5 at u(2)
+; LARGE64-NEXT:    ld 3, L..C4 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
-; LARGE64-NEXT:    add 3, 3, 6
+; LARGE64-NEXT:    ld 4, L..C5 at l(6)
+; LARGE64-NEXT:    add 3, 3, 4
 ; LARGE64-NEXT:    stfd 1, 0(3)
 ; LARGE64-NEXT:    addi 1, 1, 48
 ; LARGE64-NEXT:    ld 0, 16(1)
@@ -457,11 +458,11 @@ define double @loadsTIInit() #1 {
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    lwz 6, L..C4(2) # target-flags(ppc-tlsld) @TIInit
-; SMALL32-NEXT:    lwz 3, L..C5(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
+; SMALL32-NEXT:    lwz 3, L..C4(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
+; SMALL32-NEXT:    lwz 4, L..C5(2) # target-flags(ppc-tlsld) @TIInit
+; SMALL32-NEXT:    add 3, 3, 4
 ; SMALL32-NEXT:    lwz 4, L..C8(2) # @GInit
-; SMALL32-NEXT:    add 3, 3, 6
 ; SMALL32-NEXT:    lfd 0, 0(3)
 ; SMALL32-NEXT:    lfd 1, 0(4)
 ; SMALL32-NEXT:    fadd 1, 0, 1
@@ -476,11 +477,12 @@ define double @loadsTIInit() #1 {
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    lwz 6, L..C4 at l(3)
-; LARGE32-NEXT:    addis 3, L..C5 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C5 at l(3)
+; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
+; LARGE32-NEXT:    addis 4, L..C5 at u(2)
+; LARGE32-NEXT:    lwz 6, L..C5 at l(4)
 ; LARGE32-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-NEXT:    add 3, 3, 6
+; LARGE32-NEXT:    mr 4, 6
+; LARGE32-NEXT:    add 3, 3, 4
 ; LARGE32-NEXT:    lfd 0, 0(3)
 ; LARGE32-NEXT:    addis 3, L..C8 at u(2)
 ; LARGE32-NEXT:    lwz 3, L..C8 at l(3)
@@ -496,11 +498,11 @@ define double @loadsTIInit() #1 {
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    ld 6, L..C4(2) # target-flags(ppc-tlsld) @TIInit
-; SMALL64-NEXT:    ld 3, L..C5(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
+; SMALL64-NEXT:    ld 3, L..C4(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL64-NEXT:    ld 4, L..C5(2) # target-flags(ppc-tlsld) @TIInit
+; SMALL64-NEXT:    add 3, 3, 4
 ; SMALL64-NEXT:    ld 4, L..C8(2) # @GInit
-; SMALL64-NEXT:    add 3, 3, 6
 ; SMALL64-NEXT:    lfd 0, 0(3)
 ; SMALL64-NEXT:    lfd 1, 0(4)
 ; SMALL64-NEXT:    fadd 1, 0, 1
@@ -513,13 +515,13 @@ define double @loadsTIInit() #1 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
-; LARGE64-NEXT:    addis 3, L..C4 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 6, L..C4 at l(3)
-; LARGE64-NEXT:    addis 3, L..C5 at u(2)
-; LARGE64-NEXT:    ld 3, L..C5 at l(3)
+; LARGE64-NEXT:    addis 3, L..C4 at u(2)
+; LARGE64-NEXT:    addis 6, L..C5 at u(2)
+; LARGE64-NEXT:    ld 3, L..C4 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
-; LARGE64-NEXT:    add 3, 3, 6
+; LARGE64-NEXT:    ld 4, L..C5 at l(6)
+; LARGE64-NEXT:    add 3, 3, 4
 ; LARGE64-NEXT:    addis 4, L..C8 at u(2)
 ; LARGE64-NEXT:    lfd 0, 0(3)
 ; LARGE64-NEXT:    ld 3, L..C8 at l(4)
@@ -641,10 +643,10 @@ entry:
 ; SMALL32-LABEL:  L..C3:
 ; SMALL32-NEXT:   .tc TGInit[TC],TGInit[TL]@gd
 ; SMALL32-LABEL:  L..C4:
-; SMALL32-NEXT:   .tc TIInit[TC],TIInit[TL]@ld
-; SMALL32-LABEL:  L..C5:
 ; SMALL32-NEXT:   .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; SMALL32-NEXT:   .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; SMALL32-LABEL:  L..C5:
+; SMALL32-NEXT:   .tc TIInit[TC],TIInit[TL]@ld
 ; SMALL32-LABEL:  L..C6:
 ; SMALL32-NEXT:   .tc .TWInit[TC],TWInit[TL]@m
 ; SMALL32-LABEL:  L..C7:
@@ -662,10 +664,10 @@ entry:
 ; LARGE32-LABEL:  L..C3:
 ; LARGE32-NEXT:   .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE32-LABEL:  L..C4:
-; LARGE32-NEXT:   .tc TIInit[TE],TIInit[TL]@ld
-; LARGE32-LABEL:  L..C5:
 ; LARGE32-NEXT:   .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; LARGE32-NEXT:   .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; LARGE32-LABEL:  L..C5:
+; LARGE32-NEXT:   .tc TIInit[TE],TIInit[TL]@ld
 ; LARGE32-LABEL:  L..C6:
 ; LARGE32-NEXT:   .tc .TWInit[TE],TWInit[TL]@m
 ; LARGE32-LABEL:  L..C7:
@@ -683,10 +685,10 @@ entry:
 ; SMALL64-LABEL:  L..C3:
 ; SMALL64-NEXT:  .tc TGInit[TC],TGInit[TL]@gd
 ; SMALL64-LABEL:  L..C4:
-; SMALL64-NEXT:  .tc TIInit[TC],TIInit[TL]@ld
-; SMALL64-LABEL:  L..C5:
 ; SMALL64-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; SMALL64-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; SMALL64-LABEL:  L..C5:
+; SMALL64-NEXT:  .tc TIInit[TC],TIInit[TL]@ld
 ; SMALL64-LABEL:  L..C6:
 ; SMALL64-NEXT:  .tc .TWInit[TC],TWInit[TL]@m
 ; SMALL64-LABEL:  L..C7:
@@ -704,10 +706,10 @@ entry:
 ; LARGE64-LABEL:  L..C3:
 ; LARGE64-NEXT:  .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE64-LABEL:  L..C4:
-; LARGE64-NEXT:  .tc TIInit[TE],TIInit[TL]@ld
-; LARGE64-LABEL:  L..C5:
 ; LARGE64-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; LARGE64-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; LARGE64-LABEL:  L..C5:
+; LARGE64-NEXT:  .tc TIInit[TE],TIInit[TL]@ld
 ; LARGE64-LABEL:  L..C6:
 ; LARGE64-NEXT:  .tc .TWInit[TE],TWInit[TL]@m
 ; LARGE64-LABEL:  L..C7:
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll
index dc75db43d38c7..e1cfb6fd767cb 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll
@@ -165,10 +165,10 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; SMALL32-NEXT:    stwu 1, -32(1)
 ; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    mr 6, 3
-; SMALL32-NEXT:    lwz 7, L..C4(2)
-; SMALL32-NEXT:    lwz 3, L..C5(2)
+; SMALL32-NEXT:    lwz 3, L..C4(2)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
-; SMALL32-NEXT:    add 3, 3, 7
+; SMALL32-NEXT:    lwz 4, L..C5(2)
+; SMALL32-NEXT:    add 3, 3, 4
 ; SMALL32-NEXT:    stw 6, 0(3)
 ; SMALL32-NEXT:    addi 1, 1, 32
 ; SMALL32-NEXT:    lwz 0, 8(1)
@@ -182,11 +182,12 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    mr 6, 3
 ; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    lwz 7, L..C4 at l(3)
-; LARGE32-NEXT:    addis 3, L..C5 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C5 at l(3)
+; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
+; LARGE32-NEXT:    addis 4, L..C5 at u(2)
+; LARGE32-NEXT:    lwz 7, L..C5 at l(4)
 ; LARGE32-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-NEXT:    add 3, 3, 7
+; LARGE32-NEXT:    mr 4, 7
+; LARGE32-NEXT:    add 3, 3, 4
 ; LARGE32-NEXT:    stw 6, 0(3)
 ; LARGE32-NEXT:    addi 1, 1, 32
 ; LARGE32-NEXT:    lwz 0, 8(1)
@@ -199,10 +200,10 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; SMALL64-NEXT:    stdu 1, -48(1)
 ; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    mr 6, 3
-; SMALL64-NEXT:    ld 7, L..C4(2)
-; SMALL64-NEXT:    ld 3, L..C5(2)
+; SMALL64-NEXT:    ld 3, L..C4(2)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL64-NEXT:    add 3, 3, 7
+; SMALL64-NEXT:    ld 4, L..C5(2)
+; SMALL64-NEXT:    add 3, 3, 4
 ; SMALL64-NEXT:    stw 6, 0(3)
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -213,14 +214,14 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    mr 6, 3
 ; LARGE64-NEXT:    addis 3, L..C4 at u(2)
-; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 7, L..C4 at l(3)
-; LARGE64-NEXT:    addis 3, L..C5 at u(2)
-; LARGE64-NEXT:    ld 3, L..C5 at l(3)
+; LARGE64-NEXT:    addis 7, L..C5 at u(2)
+; LARGE64-NEXT:    ld 3, L..C4 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
-; LARGE64-NEXT:    add 3, 3, 7
+; LARGE64-NEXT:    ld 4, L..C5 at l(7)
+; LARGE64-NEXT:    add 3, 3, 4
 ; LARGE64-NEXT:    stw 6, 0(3)
 ; LARGE64-NEXT:    addi 1, 1, 48
 ; LARGE64-NEXT:    ld 0, 16(1)
@@ -472,11 +473,11 @@ define i32 @loadsTIUninit() #1 {
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    lwz 6, L..C4(2)
-; SMALL32-NEXT:    lwz 3, L..C5(2)
+; SMALL32-NEXT:    lwz 3, L..C4(2)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
+; SMALL32-NEXT:    lwz 4, L..C5(2)
+; SMALL32-NEXT:    add 3, 3, 4
 ; SMALL32-NEXT:    lwz 4, L..C8(2)
-; SMALL32-NEXT:    add 3, 3, 6
 ; SMALL32-NEXT:    lwz 3, 0(3)
 ; SMALL32-NEXT:    lwz 4, 0(4)
 ; SMALL32-NEXT:    add 3, 4, 3
@@ -491,11 +492,12 @@ define i32 @loadsTIUninit() #1 {
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    lwz 6, L..C4 at l(3)
-; LARGE32-NEXT:    addis 3, L..C5 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C5 at l(3)
+; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
+; LARGE32-NEXT:    addis 4, L..C5 at u(2)
+; LARGE32-NEXT:    lwz 6, L..C5 at l(4)
 ; LARGE32-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-NEXT:    add 3, 3, 6
+; LARGE32-NEXT:    mr 4, 6
+; LARGE32-NEXT:    add 3, 3, 4
 ; LARGE32-NEXT:    lwz 3, 0(3)
 ; LARGE32-NEXT:    addis 4, L..C8 at u(2)
 ; LARGE32-NEXT:    lwz 4, L..C8 at l(4)
@@ -511,11 +513,11 @@ define i32 @loadsTIUninit() #1 {
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    ld 6, L..C4(2)
-; SMALL64-NEXT:    ld 3, L..C5(2)
+; SMALL64-NEXT:    ld 3, L..C4(2)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL64-NEXT:    ld 4, L..C5(2)
+; SMALL64-NEXT:    add 3, 3, 4
 ; SMALL64-NEXT:    ld 4, L..C8(2)
-; SMALL64-NEXT:    add 3, 3, 6
 ; SMALL64-NEXT:    lwz 3, 0(3)
 ; SMALL64-NEXT:    lwz 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
@@ -528,14 +530,14 @@ define i32 @loadsTIUninit() #1 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
-; LARGE64-NEXT:    addis 3, L..C4 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 6, L..C4 at l(3)
-; LARGE64-NEXT:    addis 3, L..C5 at u(2)
-; LARGE64-NEXT:    ld 3, L..C5 at l(3)
+; LARGE64-NEXT:    addis 3, L..C4 at u(2)
+; LARGE64-NEXT:    addis 6, L..C5 at u(2)
+; LARGE64-NEXT:    ld 3, L..C4 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-NEXT:    ld 4, L..C5 at l(6)
+; LARGE64-NEXT:    add 3, 3, 4
 ; LARGE64-NEXT:    addis 4, L..C8 at u(2)
-; LARGE64-NEXT:    add 3, 3, 6
 ; LARGE64-NEXT:    ld 4, L..C8 at l(4)
 ; LARGE64-NEXT:    lwz 3, 0(3)
 ; LARGE64-NEXT:    lwz 4, 0(4)
@@ -656,10 +658,10 @@ entry:
 ; SMALL32-LABEL: L..C3:
 ; SMALL32-NEXT:	 .tc TGInit[TC],TGInit[TL]@gd
 ; SMALL32-LABEL: L..C4:
-; SMALL32-NEXT:	 .tc TIUninit[TC],TIUninit[UL]@ld
-; SMALL32-LABEL: L..C5:
 ; SMALL32-NEXT:	 .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; SMALL32-NEXT:	 .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; SMALL32-LABEL: L..C5:
+; SMALL32-NEXT:	 .tc TIUninit[TC],TIUninit[UL]@ld
 ; SMALL32-LABEL: L..C6:
 ; SMALL32-NEXT:	 .tc .TWUninit[TC],TWUninit[TL]@m
 ; SMALL32-LABEL: L..C7:
@@ -677,10 +679,10 @@ entry:
 ; LARGE32-LABEL: L..C3:
 ; LARGE32-NEXT:  .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE32-LABEL: L..C4:
-; LARGE32-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
-; LARGE32-LABEL: L..C5:
 ; LARGE32-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; LARGE32-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; LARGE32-LABEL: L..C5:
+; LARGE32-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
 ; LARGE32-LABEL: L..C6:
 ; LARGE32-NEXT:  .tc .TWUninit[TE],TWUninit[TL]@m
 ; LARGE32-LABEL: L..C7:
@@ -698,10 +700,10 @@ entry:
 ; SMALL64-LABEL:  L..C3:
 ; SMALL64-NEXT:   .tc TGInit[TC],TGInit[TL]@gd
 ; SMALL64-LABEL:  L..C4:
-; SMALL64-NEXT:   .tc TIUninit[TC],TIUninit[UL]@ld
-; SMALL64-LABEL:  L..C5:
 ; SMALL64-NEXT:   .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; SMALL64-NEXT:   .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; SMALL64-LABEL:  L..C5:
+; SMALL64-NEXT:   .tc TIUninit[TC],TIUninit[UL]@ld
 ; SMALL64-LABEL:  L..C6:
 ; SMALL64-NEXT:   .tc .TWUninit[TC],TWUninit[TL]@m
 ; SMALL64-LABEL:  L..C7:
@@ -719,10 +721,10 @@ entry:
 ; LARGE64-LABEL:  L..C3:
 ; LARGE64-NEXT:  .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE64-LABEL:  L..C4:
-; LARGE64-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
-; LARGE64-LABEL:  L..C5:
 ; LARGE64-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; LARGE64-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; LARGE64-LABEL:  L..C5:
+; LARGE64-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
 ; LARGE64-LABEL:  L..C6:
 ; LARGE64-NEXT:  .tc .TWUninit[TE],TWUninit[TL]@m
 ; LARGE64-LABEL:  L..C7:
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll
index d19b5ad9b5e50..2e18d6c9bfd7e 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll
@@ -99,11 +99,11 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; SMALL32-NEXT:    stwu 1, -32(1)
 ; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    mr 7, 3
-; SMALL32-NEXT:    lwz 8, L..C2(2)
 ; SMALL32-NEXT:    mr 6, 4
-; SMALL32-NEXT:    lwz 3, L..C3(2)
+; SMALL32-NEXT:    lwz 3, L..C2(2)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
-; SMALL32-NEXT:    add 3, 3, 8
+; SMALL32-NEXT:    lwz 4, L..C3(2)
+; SMALL32-NEXT:    add 3, 3, 4
 ; SMALL32-NEXT:    stw 6, 4(3)
 ; SMALL32-NEXT:    stw 7, 0(3)
 ; SMALL32-NEXT:    addi 1, 1, 32
@@ -119,11 +119,12 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; LARGE32-NEXT:    mr 7, 3
 ; LARGE32-NEXT:    mr 6, 4
 ; LARGE32-NEXT:    addis 3, L..C2 at u(2)
-; LARGE32-NEXT:    lwz 8, L..C2 at l(3)
-; LARGE32-NEXT:    addis 3, L..C3 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C3 at l(3)
+; LARGE32-NEXT:    lwz 3, L..C2 at l(3)
+; LARGE32-NEXT:    addis 4, L..C3 at u(2)
+; LARGE32-NEXT:    lwz 8, L..C3 at l(4)
 ; LARGE32-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-NEXT:    add 3, 3, 8
+; LARGE32-NEXT:    mr 4, 8
+; LARGE32-NEXT:    add 3, 3, 4
 ; LARGE32-NEXT:    stw 6, 4(3)
 ; LARGE32-NEXT:    stw 7, 0(3)
 ; LARGE32-NEXT:    addi 1, 1, 32
@@ -137,10 +138,10 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; SMALL64-NEXT:    stdu 1, -48(1)
 ; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    mr 6, 3
-; SMALL64-NEXT:    ld 7, L..C2(2)
-; SMALL64-NEXT:    ld 3, L..C3(2)
+; SMALL64-NEXT:    ld 3, L..C2(2)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL64-NEXT:    add 3, 3, 7
+; SMALL64-NEXT:    ld 4, L..C3(2)
+; SMALL64-NEXT:    add 3, 3, 4
 ; SMALL64-NEXT:    std 6, 0(3)
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -151,14 +152,14 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    mr 6, 3
 ; LARGE64-NEXT:    addis 3, L..C2 at u(2)
-; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 7, L..C2 at l(3)
-; LARGE64-NEXT:    addis 3, L..C3 at u(2)
-; LARGE64-NEXT:    ld 3, L..C3 at l(3)
+; LARGE64-NEXT:    addis 7, L..C3 at u(2)
+; LARGE64-NEXT:    ld 3, L..C2 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
-; LARGE64-NEXT:    add 3, 3, 7
+; LARGE64-NEXT:    ld 4, L..C3 at l(7)
+; LARGE64-NEXT:    add 3, 3, 4
 ; LARGE64-NEXT:    std 6, 0(3)
 ; LARGE64-NEXT:    addi 1, 1, 48
 ; LARGE64-NEXT:    ld 0, 16(1)
@@ -177,11 +178,11 @@ define void @storesTIInit(i64 %Val) #0 {
 ; SMALL32-NEXT:    stwu 1, -32(1)
 ; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    mr 7, 3
-; SMALL32-NEXT:    lwz 8, L..C4(2)
 ; SMALL32-NEXT:    mr 6, 4
-; SMALL32-NEXT:    lwz 3, L..C3(2)
+; SMALL32-NEXT:    lwz 3, L..C2(2)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
-; SMALL32-NEXT:    add 3, 3, 8
+; SMALL32-NEXT:    lwz 4, L..C4(2)
+; SMALL32-NEXT:    add 3, 3, 4
 ; SMALL32-NEXT:    stw 6, 4(3)
 ; SMALL32-NEXT:    stw 7, 0(3)
 ; SMALL32-NEXT:    addi 1, 1, 32
@@ -196,12 +197,13 @@ define void @storesTIInit(i64 %Val) #0 {
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    mr 7, 3
 ; LARGE32-NEXT:    mr 6, 4
-; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    lwz 8, L..C4 at l(3)
-; LARGE32-NEXT:    addis 3, L..C3 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C3 at l(3)
+; LARGE32-NEXT:    addis 3, L..C2 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C2 at l(3)
+; LARGE32-NEXT:    addis 4, L..C4 at u(2)
+; LARGE32-NEXT:    lwz 8, L..C4 at l(4)
 ; LARGE32-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-NEXT:    add 3, 3, 8
+; LARGE32-NEXT:    mr 4, 8
+; LARGE32-NEXT:    add 3, 3, 4
 ; LARGE32-NEXT:    stw 6, 4(3)
 ; LARGE32-NEXT:    stw 7, 0(3)
 ; LARGE32-NEXT:    addi 1, 1, 32
@@ -215,10 +217,10 @@ define void @storesTIInit(i64 %Val) #0 {
 ; SMALL64-NEXT:    stdu 1, -48(1)
 ; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    mr 6, 3
-; SMALL64-NEXT:    ld 7, L..C4(2)
-; SMALL64-NEXT:    ld 3, L..C3(2)
+; SMALL64-NEXT:    ld 3, L..C2(2)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL64-NEXT:    add 3, 3, 7
+; SMALL64-NEXT:    ld 4, L..C4(2)
+; SMALL64-NEXT:    add 3, 3, 4
 ; SMALL64-NEXT:    std 6, 0(3)
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -229,14 +231,14 @@ define void @storesTIInit(i64 %Val) #0 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
-; LARGE64-NEXT:    mr 6, 3
-; LARGE64-NEXT:    addis 3, L..C4 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 7, L..C4 at l(3)
-; LARGE64-NEXT:    addis 3, L..C3 at u(2)
-; LARGE64-NEXT:    ld 3, L..C3 at l(3)
+; LARGE64-NEXT:    mr 6, 3
+; LARGE64-NEXT:    addis 3, L..C2 at u(2)
+; LARGE64-NEXT:    addis 7, L..C4 at u(2)
+; LARGE64-NEXT:    ld 3, L..C2 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
-; LARGE64-NEXT:    add 3, 3, 7
+; LARGE64-NEXT:    ld 4, L..C4 at l(7)
+; LARGE64-NEXT:    add 3, 3, 4
 ; LARGE64-NEXT:    std 6, 0(3)
 ; LARGE64-NEXT:    addi 1, 1, 48
 ; LARGE64-NEXT:    ld 0, 16(1)
@@ -331,7 +333,7 @@ define i64 @loadsTGInit() #1 {
 ; SMALL32-NEXT:    lwz 4, L..C1(2)
 ; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_addr[PR]
-; SMALL32-NEXT:    lwz 4, L..C7(2)
+; SMALL32-NEXT:    lwz 4, L..C7(2) # @GInit
 ; SMALL32-NEXT:    lwz 5, 4(3)
 ; SMALL32-NEXT:    lwz 6, 4(4)
 ; SMALL32-NEXT:    lwz 3, 0(3)
@@ -374,7 +376,7 @@ define i64 @loadsTGInit() #1 {
 ; SMALL64-NEXT:    ld 4, L..C1(2)
 ; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_addr[PR]
-; SMALL64-NEXT:    ld 4, L..C7(2)
+; SMALL64-NEXT:    ld 4, L..C7(2) # @GInit
 ; SMALL64-NEXT:    ld 3, 0(3)
 ; SMALL64-NEXT:    ld 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
@@ -416,11 +418,11 @@ define i64 @loadsTIUninit() #1 {
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    lwz 6, L..C2(2)
-; SMALL32-NEXT:    lwz 3, L..C3(2)
+; SMALL32-NEXT:    lwz 3, L..C2(2)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
-; SMALL32-NEXT:    lwz 4, L..C7(2)
-; SMALL32-NEXT:    add 3, 3, 6
+; SMALL32-NEXT:    lwz 4, L..C3(2)
+; SMALL32-NEXT:    add 3, 3, 4
+; SMALL32-NEXT:    lwz 4, L..C7(2) # @GInit
 ; SMALL32-NEXT:    lwz 5, 4(3)
 ; SMALL32-NEXT:    lwz 6, 4(4)
 ; SMALL32-NEXT:    lwz 3, 0(3)
@@ -438,11 +440,12 @@ define i64 @loadsTIUninit() #1 {
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    addis 3, L..C2 at u(2)
-; LARGE32-NEXT:    lwz 6, L..C2 at l(3)
-; LARGE32-NEXT:    addis 3, L..C3 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C3 at l(3)
+; LARGE32-NEXT:    lwz 3, L..C2 at l(3)
+; LARGE32-NEXT:    addis 4, L..C3 at u(2)
+; LARGE32-NEXT:    lwz 6, L..C3 at l(4)
 ; LARGE32-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-NEXT:    add 3, 3, 6
+; LARGE32-NEXT:    mr 4, 6
+; LARGE32-NEXT:    add 3, 3, 4
 ; LARGE32-NEXT:    lwz 4, 4(3)
 ; LARGE32-NEXT:    lwz 3, 0(3)
 ; LARGE32-NEXT:    addis 5, L..C7 at u(2)
@@ -461,11 +464,11 @@ define i64 @loadsTIUninit() #1 {
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    ld 6, L..C2(2)
-; SMALL64-NEXT:    ld 3, L..C3(2)
+; SMALL64-NEXT:    ld 3, L..C2(2)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL64-NEXT:    ld 4, L..C7(2)
-; SMALL64-NEXT:    add 3, 3, 6
+; SMALL64-NEXT:    ld 4, L..C3(2)
+; SMALL64-NEXT:    add 3, 3, 4
+; SMALL64-NEXT:    ld 4, L..C7(2) # @GInit
 ; SMALL64-NEXT:    ld 3, 0(3)
 ; SMALL64-NEXT:    ld 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
@@ -478,14 +481,14 @@ define i64 @loadsTIUninit() #1 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
-; LARGE64-NEXT:    addis 3, L..C2 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 6, L..C2 at l(3)
-; LARGE64-NEXT:    addis 3, L..C3 at u(2)
-; LARGE64-NEXT:    ld 3, L..C3 at l(3)
+; LARGE64-NEXT:    addis 3, L..C2 at u(2)
+; LARGE64-NEXT:    addis 6, L..C3 at u(2)
+; LARGE64-NEXT:    ld 3, L..C2 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-NEXT:    ld 4, L..C3 at l(6)
+; LARGE64-NEXT:    add 3, 3, 4
 ; LARGE64-NEXT:    addis 4, L..C7 at u(2)
-; LARGE64-NEXT:    add 3, 3, 6
 ; LARGE64-NEXT:    ld 4, L..C7 at l(4)
 ; LARGE64-NEXT:    ld 3, 0(3)
 ; LARGE64-NEXT:    ld 4, 0(4)
@@ -508,11 +511,11 @@ define i64 @loadsTIInit() #1 {
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    lwz 6, L..C4(2)
-; SMALL32-NEXT:    lwz 3, L..C3(2)
+; SMALL32-NEXT:    lwz 3, L..C2(2)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
-; SMALL32-NEXT:    lwz 4, L..C7(2)
-; SMALL32-NEXT:    add 3, 3, 6
+; SMALL32-NEXT:    lwz 4, L..C4(2)
+; SMALL32-NEXT:    add 3, 3, 4
+; SMALL32-NEXT:    lwz 4, L..C7(2) # @GInit
 ; SMALL32-NEXT:    lwz 5, 4(3)
 ; SMALL32-NEXT:    lwz 6, 4(4)
 ; SMALL32-NEXT:    lwz 3, 0(3)
@@ -529,12 +532,13 @@ define i64 @loadsTIInit() #1 {
 ; LARGE32-NEXT:    mflr 0
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
-; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    lwz 6, L..C4 at l(3)
-; LARGE32-NEXT:    addis 3, L..C3 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C3 at l(3)
+; LARGE32-NEXT:    addis 3, L..C2 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C2 at l(3)
+; LARGE32-NEXT:    addis 4, L..C4 at u(2)
+; LARGE32-NEXT:    lwz 6, L..C4 at l(4)
 ; LARGE32-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-NEXT:    add 3, 3, 6
+; LARGE32-NEXT:    mr 4, 6
+; LARGE32-NEXT:    add 3, 3, 4
 ; LARGE32-NEXT:    lwz 4, 4(3)
 ; LARGE32-NEXT:    lwz 3, 0(3)
 ; LARGE32-NEXT:    addis 5, L..C7 at u(2)
@@ -553,11 +557,11 @@ define i64 @loadsTIInit() #1 {
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    ld 6, L..C4(2)
-; SMALL64-NEXT:    ld 3, L..C3(2)
+; SMALL64-NEXT:    ld 3, L..C2(2)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL64-NEXT:    ld 4, L..C7(2)
-; SMALL64-NEXT:    add 3, 3, 6
+; SMALL64-NEXT:    ld 4, L..C4(2)
+; SMALL64-NEXT:    add 3, 3, 4
+; SMALL64-NEXT:    ld 4, L..C7(2) # @GInit
 ; SMALL64-NEXT:    ld 3, 0(3)
 ; SMALL64-NEXT:    ld 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
@@ -570,14 +574,14 @@ define i64 @loadsTIInit() #1 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
-; LARGE64-NEXT:    addis 3, L..C4 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 6, L..C4 at l(3)
-; LARGE64-NEXT:    addis 3, L..C3 at u(2)
-; LARGE64-NEXT:    ld 3, L..C3 at l(3)
+; LARGE64-NEXT:    addis 3, L..C2 at u(2)
+; LARGE64-NEXT:    addis 6, L..C4 at u(2)
+; LARGE64-NEXT:    ld 3, L..C2 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-NEXT:    ld 4, L..C4 at l(6)
+; LARGE64-NEXT:    add 3, 3, 4
 ; LARGE64-NEXT:    addis 4, L..C7 at u(2)
-; LARGE64-NEXT:    add 3, 3, 6
 ; LARGE64-NEXT:    ld 4, L..C7 at l(4)
 ; LARGE64-NEXT:    ld 3, 0(3)
 ; LARGE64-NEXT:    ld 4, 0(4)
@@ -603,7 +607,7 @@ define i64 @loadsTWInit() #1 {
 ; SMALL32-NEXT:    lwz 4, L..C6(2)
 ; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_addr[PR]
-; SMALL32-NEXT:    lwz 4, L..C7(2)
+; SMALL32-NEXT:    lwz 4, L..C7(2) # @GInit
 ; SMALL32-NEXT:    lwz 5, 4(3)
 ; SMALL32-NEXT:    lwz 6, 4(4)
 ; SMALL32-NEXT:    lwz 3, 0(3)
@@ -646,7 +650,7 @@ define i64 @loadsTWInit() #1 {
 ; SMALL64-NEXT:    ld 4, L..C6(2)
 ; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_addr[PR]
-; SMALL64-NEXT:    ld 4, L..C7(2)
+; SMALL64-NEXT:    ld 4, L..C7(2) # @GInit
 ; SMALL64-NEXT:    ld 3, 0(3)
 ; SMALL64-NEXT:    ld 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
@@ -700,10 +704,10 @@ entry:
 ; SMALL32-LABEL:  L..C1:
 ; SMALL32-NEXT:  .tc TGInit[TC],TGInit[TL]@gd
 ; SMALL32-LABEL:  L..C2:
-; SMALL32-NEXT:  .tc TIUninit[TC],TIUninit[UL]@ld
-; SMALL32-LABEL:  L..C3:
 ; SMALL32-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; SMALL32-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; SMALL32-LABEL:  L..C3:
+; SMALL32-NEXT:  .tc TIUninit[TC],TIUninit[UL]@ld
 ; SMALL32-LABEL:  L..C4:
 ; SMALL32-NEXT:  .tc TIInit[TC],TIInit[TL]@ld
 ; SMALL32-LABEL:  L..C5:
@@ -719,10 +723,10 @@ entry:
 ; LARGE32-LABEL:  L..C1:
 ; LARGE32-NEXT:  .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE32-LABEL:  L..C2:
-; LARGE32-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
-; LARGE32-LABEL:  L..C3:
 ; LARGE32-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; LARGE32-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; LARGE32-LABEL:  L..C3:
+; LARGE32-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
 ; LARGE32-LABEL:  L..C4:
 ; LARGE32-NEXT:  .tc TIInit[TE],TIInit[TL]@ld
 ; LARGE32-LABEL:  L..C5:
@@ -738,10 +742,10 @@ entry:
 ; SMALL64-LABEL:  L..C1:
 ; SMALL64-NEXT:  .tc TGInit[TC],TGInit[TL]@gd
 ; SMALL64-LABEL:  L..C2:
-; SMALL64-NEXT:  .tc TIUninit[TC],TIUninit[UL]@ld
-; SMALL64-LABEL:  L..C3:
 ; SMALL64-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; SMALL64-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; SMALL64-LABEL:  L..C3:
+; SMALL64-NEXT:  .tc TIUninit[TC],TIUninit[UL]@ld
 ; SMALL64-LABEL:  L..C4:
 ; SMALL64-NEXT:  .tc TIInit[TC],TIInit[TL]@ld
 ; SMALL64-LABEL:  L..C5:
@@ -757,10 +761,10 @@ entry:
 ; LARGE64-LABEL:  L..C1:
 ; LARGE64-NEXT:  .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE64-LABEL:  L..C2:
-; LARGE64-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
-; LARGE64-LABEL:  L..C3:
 ; LARGE64-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; LARGE64-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; LARGE64-LABEL:  L..C3:
+; LARGE64-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
 ; LARGE64-LABEL:  L..C4:
 ; LARGE64-NEXT:  .tc TIInit[TE],TIInit[TL]@ld
 ; LARGE64-LABEL:  L..C5:
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll b/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll
index 9faa99b6eb2ba..0513f8de61620 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll
@@ -16,22 +16,22 @@
 
 define i32 @loadTGInit() {
 ; SMALL-LABEL:  loadTGInit:
-; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
-; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
 ; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL:        bla .__tls_get_mod[PR]
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
 ; SMALL:        add [[TGInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; SMALL:        lwz [[TGInitValR:[0-9]+]], 0([[TGInitAddrR]])
 ;
 ; LARGE-LABEL:  loadTGInit:
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@u(2)
-; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        add [[TGInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; LARGE:        lwz [[TGInitValR:[0-9]+]], 0([[TGInitAddrR]])
 entry:
@@ -42,22 +42,22 @@ entry:
 
 define void @storeTGInit(i32 noundef signext %i) {
 ; SMALL-LABEL:  storeTGInit:
-; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
-; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
 ; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL:        bla .__tls_get_mod[PR]
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
 ; SMALL:        add [[TGInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; SMALL:        stw [[TGInitValR:[0-9]+]], 0([[TGInitAddrR]])
 ;
 ; LARGE-LABEL:  storeTGInit:
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@u(2)
-; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        add [[TGInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; LARGE:        stw [[TGInitValR:[0-9]+]], 0([[TGInitAddrR]])
 entry:
@@ -68,22 +68,22 @@ entry:
 
 define i32 @loadTGUninit() {
 ; SMALL-LABEL:  loadTGUninit:
-; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
-; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
 ; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL:        bla .__tls_get_mod[PR]
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
 ; SMALL:        add [[TGUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; SMALL:        lwz [[TGUninitValR:[0-9]+]], 0([[TGUninitAddrR]])
 ;
 ; LARGE-LABEL:  loadTGUninit:
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@u(2)
-; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        add [[TGUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; LARGE:        lwz [[TGUninitValR:[0-9]+]], 0([[TGUninitAddrR]])
 entry:
@@ -94,22 +94,22 @@ entry:
 
 define void @storeTGUninit(i32 noundef signext %i) {
 ; SMALL-LABEL:  storeTGUninit:
-; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
-; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
 ; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL:        bla .__tls_get_mod[PR]
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
 ; SMALL:        add [[TGUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; SMALL:        stw [[TGUninitValR:[0-9]+]], 0([[TGUninitAddrR]])
 ;
 ; LARGE-LABEL:  storeTGUninit:
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@u(2)
-; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        add [[TGUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; LARGE:        stw [[TGUninitValR:[0-9]+]], 0([[TGUninitAddrR]])
 entry:
@@ -120,22 +120,22 @@ entry:
 
 define i32 @loadTIInit() {
 ; SMALL-LABEL:  loadTIInit:
-; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
-; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
 ; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL:        bla .__tls_get_mod[PR]
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
 ; SMALL:        add [[TIInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; SMALL:        lwz [[TIInitValR:[0-9]+]], 0([[TIInitAddrR]])
 ;
 ; LARGE-LABEL:  loadTIInit:
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@u(2)
-; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        add [[TIInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; LARGE:        lwz [[TIInitValR:[0-9]+]], 0([[TIInitAddrR]])
 entry:
@@ -146,22 +146,22 @@ entry:
 
 define void @storeTIInit(i32 noundef signext %i) {
 ; SMALL-LABEL:  storeTIInit:
-; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
-; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
 ; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL:        bla .__tls_get_mod[PR]
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
 ; SMALL:        add [[TIInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; SMALL:        stw [[TIInitValR:[0-9]+]], 0([[TIInitAddrR]])
 ;
 ; LARGE-LABEL:  storeTIInit:
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@u(2)
-; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        add [[TIInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; LARGE:        stw [[TIInitValR:[0-9]+]], 0([[TIInitAddrR]])
 entry:
@@ -172,22 +172,22 @@ entry:
 
 define i32 @loadTIUninit() {
 ; SMALL-LABEL:  loadTIUninit:
-; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
-; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
 ; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL:        bla .__tls_get_mod[PR]
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
 ; SMALL:        add [[TIUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; SMALL:        lwz [[TIUninitValR:[0-9]+]], 0([[TIUninitAddrR]])
 ;
 ; LARGE-LABEL:  loadTIUninit:
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@u(2)
-; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        add [[TIUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; LARGE:        lwz [[TIUninitValR:[0-9]+]], 0([[TIUninitAddrR]])
 entry:
@@ -198,22 +198,22 @@ entry:
 
 define void @storeTIUninit(i32 noundef signext %i) {
 ; SMALL-LABEL:  storeTIUninit:
-; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
-; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
 ; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL:        bla .__tls_get_mod[PR]
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
 ; SMALL:        add [[TIUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; SMALL:        stw [[TIUninitValR:[0-9]+]], 0([[TIUninitAddrR]])
 ;
 ; LARGE-LABEL:  storeTIUninit:
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@u(2)
-; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        add [[TIUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; LARGE:        stw [[TIUninitValR:[0-9]+]], 0([[TIUninitAddrR]])
 entry:
@@ -224,22 +224,22 @@ entry:
 
 define i32 @loadTWInit() {
 ; SMALL-LABEL:  loadTWInit:
-; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
-; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
 ; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL:        bla .__tls_get_mod[PR]
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
 ; SMALL:        add [[TWInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; SMALL:        lwz [[TWInitValR:[0-9]+]], 0([[TWInitAddrR]])
 ;
 ; LARGE-LABEL:  loadTWInit:
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@u(2)
-; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        add [[TWInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; LARGE:        lwz [[TWInitValR:[0-9]+]], 0([[TWInitAddrR]])
 entry:
@@ -250,22 +250,22 @@ entry:
 
 define void @storeTWInit(i32 noundef signext %i) {
 ; SMALL-LABEL:  storeTWInit:
-; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
-; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
 ; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL:        bla .__tls_get_mod[PR]
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
 ; SMALL:        add [[TWInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; SMALL:        stw [[TWInitValR:[0-9]+]], 0([[TWInitAddrR]])
 ;
 ; LARGE-LABEL:  storeTWInit:
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@u(2)
-; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        add [[TWInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; LARGE:        stw [[TWInitValR:[0-9]+]], 0([[TWInitAddrR]])
 entry:
@@ -276,22 +276,22 @@ entry:
 
 define i32 @loadTWUninit() {
 ; SMALL-LABEL:  loadTWUninit:
-; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
-; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
 ; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL:        bla .__tls_get_mod[PR]
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
 ; SMALL:        add [[TWUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; SMALL:        lwz [[TWUninitValR:[0-9]+]], 0([[TWUninitAddrR]])
 ;
 ; LARGE-LABEL:  loadTWUninit:
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@u(2)
-; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        add [[TWUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; LARGE:        lwz [[TWUninitValR:[0-9]+]], 0([[TWUninitAddrR]])
 entry:
@@ -302,22 +302,22 @@ entry:
 
 define void @storeTWUninit(i32 noundef signext %i) {
 ; SMALL-LABEL:  storeTWUninit:
-; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
-; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
 ; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL:        bla .__tls_get_mod[PR]
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
 ; SMALL:        add [[TWUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; SMALL:        stw [[TWUninitValR:[0-9]+]], 0([[TWUninitAddrR]])
 ;
 ; LARGE-LABEL:  storeTWUninit:
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@u(2)
-; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        add [[TWUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; LARGE:        stw [[TWUninitValR:[0-9]+]], 0([[TWUninitAddrR]])
 entry:
@@ -329,11 +329,11 @@ entry:
 ; SMALL:          .extern .__tls_get_mod[PR]
 ; LARGE:          .extern .__tls_get_mod[PR]
 
-; SMALL:        [[TGInitL]]:
-; SMALL-NEXT:   .tc TGInit[TC],TGInit[TL]@ld
 ; SMALL:        [[ModuleHandleL]]:
 ; SMALL-NEXT:   .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; SMALL-NEXT:   .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; SMALL:        [[TGInitL]]:
+; SMALL-NEXT:   .tc TGInit[TC],TGInit[TL]@ld
 ; SMALL:        [[TGUninitL]]:
 ; SMALL-NEXT:   .tc TGUninit[TC],TGUninit[TL]@ld
 ; SMALL:        [[TIInitL]]:
@@ -345,11 +345,11 @@ entry:
 ; SMALL:        [[TWUninitL]]:
 ; SMALL-NEXT:   .tc TWUninit[TC],TWUninit[TL]@ld
 
-; LARGE:        [[TGInitL]]:
-; LARGE-NEXT:   .tc TGInit[TE],TGInit[TL]@ld
 ; LARGE:        [[ModuleHandleL]]:
 ; LARGE-NEXT:   .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; LARGE-NEXT:   .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; LARGE:        [[TGInitL]]:
+; LARGE-NEXT:   .tc TGInit[TE],TGInit[TL]@ld
 ; LARGE:        [[TGUninitL]]:
 ; LARGE-NEXT:   .tc TGUninit[TE],TGUninit[TL]@ld
 ; LARGE:        [[TIInitL]]:
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll
index 3e9f28965bbe3..6eb1b325e9806 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll
@@ -33,7 +33,7 @@ entry:
 ; RELOC-NEXT:   Section (index: 1) .text {
 ; RELOC-NEXT:   Relocation {
 ; RELOC-NEXT:     Virtual Address: 0x16
-; RELOC-NEXT:     Symbol: TIInit (19)
+; RELOC-NEXT:     Symbol: _$TLSML (19)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
@@ -41,7 +41,7 @@ entry:
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
 ; RELOC-NEXT:     Virtual Address: 0x1A
-; RELOC-NEXT:     Symbol: TIInit (19)
+; RELOC-NEXT:     Symbol: _$TLSML (19)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
@@ -49,7 +49,7 @@ entry:
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
 ; RELOC-NEXT:     Virtual Address: 0x1E
-; RELOC-NEXT:     Symbol: _$TLSML (21)
+; RELOC-NEXT:     Symbol: TIInit (21)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
@@ -57,7 +57,7 @@ entry:
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
 ; RELOC-NEXT:     Virtual Address: 0x22
-; RELOC-NEXT:     Symbol: _$TLSML (21)
+; RELOC-NEXT:     Symbol: TIInit (21)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
@@ -163,19 +163,19 @@ entry:
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
 ; RELOC-NEXT:   Virtual Address: 0xB8
-; RELOC-NEXT:   Symbol: TIInit (29)
+; RELOC-NEXT:   Symbol: _$TLSML (19)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
-; RELOC-NEXT:   Type: R_TLS_LD (0x22)
+; RELOC-NEXT:   Type: R_TLSML (0x25)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
 ; RELOC-NEXT:   Virtual Address: 0xBC
-; RELOC-NEXT:   Symbol: _$TLSML (21)
+; RELOC-NEXT:   Symbol: TIInit (29)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
-; RELOC-NEXT:   Type: R_TLSML (0x25)
+; RELOC-NEXT:   Type: R_TLS_LD (0x22)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
 ; RELOC-NEXT:   Virtual Address: 0xC0
@@ -401,7 +401,7 @@ entry:
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 19
-; SYM-NEXT:     Name: TIInit
+; SYM-NEXT:     Name: _$TLSML
 ; SYM-NEXT:     Value (RelocatableAddress): 0xB8
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
@@ -414,14 +414,14 @@ entry:
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 2
 ; SYM-NEXT:       SymbolType: XTY_SD (0x1)
-; SYM-NEXT:       StorageMappingClass: XMC_TE (0x16)
+; SYM-NEXT:       StorageMappingClass: XMC_TC (0x3)
 ; SYM-NEXT:       StabInfoIndex: 0x0
 ; SYM-NEXT:       StabSectNum: 0x0
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 21
-; SYM-NEXT:     Name: _$TLSML
+; SYM-NEXT:     Name: TIInit
 ; SYM-NEXT:     Value (RelocatableAddress): 0xBC
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
@@ -434,7 +434,7 @@ entry:
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 2
 ; SYM-NEXT:       SymbolType: XTY_SD (0x1)
-; SYM-NEXT:       StorageMappingClass: XMC_TC (0x3)
+; SYM-NEXT:       StorageMappingClass: XMC_TE (0x16)
 ; SYM-NEXT:       StabInfoIndex: 0x0
 ; SYM-NEXT:       StabSectNum: 0x0
 ; SYM-NEXT:     }
@@ -550,16 +550,17 @@ entry:
 ; DIS-NEXT:                                       mr 7, 3
 ; DIS-NEXT:                                       mr 6, 4
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 19) TIInit[TE]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 8, 0(3)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 19) TIInit[TE]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 21) _$TLSML[TC]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 3, 4(3)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 21) _$TLSML[TC]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 19) _$TLSML[TC]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 3, 0(3)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 19) _$TLSML[TC]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 4, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 21) TIInit[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 8, 4(4)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 21) TIInit[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                bla 0
 ; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA  (idx: 1)      .__tls_get_mod[PR]
-; DIS-NEXT:                                       add 3, 3, 8
+; DIS-NEXT:                                       mr 4, 8
+; DIS-NEXT:                                       add 3, 3, 4
 ; DIS-NEXT:                                       stw 6, 4(3)
 ; DIS-NEXT:                                       stw 7, 0(3)
 ; DIS-NEXT:                                       addi 1, 1, 32
@@ -608,12 +609,12 @@ entry:
 ; DIS-NEXT:       b0: 00 00 00 b8
 ; DIS-NEXT: 000000b0: R_POS (idx: 17) TOC[TC0]
 ; DIS-NEXT:       b4: 00 00 00 00
-; DIS:      000000b8  (idx: 19) TIInit[TE]:
+; DIS:      000000b8  (idx: 19) _$TLSML[TC]:
 ; DIS-NEXT:       b8: 00 00 00 00
-; DIS-NEXT: 000000b8: R_TLS_LD (idx: 29) TIInit[TL]
-; DIS:      000000bc  (idx: 21) _$TLSML[TC]:
+; DIS-NEXT: 000000b8: R_TLSML (idx: 19) _$TLSML[TC]
+; DIS:      000000bc  (idx: 21) TIInit[TE]:
 ; DIS-NEXT:       bc: 00 00 00 00
-; DIS-NEXT: 000000bc: R_TLSML (idx: 21) _$TLSML[TC]
+; DIS-NEXT: 000000bc: R_TLS_LD (idx: 29) TIInit[TL]
 ; DIS:      000000c0  (idx: 23) .TWInit[TE]:
 ; DIS-NEXT:       c0: 00 00 00 00
 ; DIS-NEXT: 000000c0: R_TLSM (idx: 31) TWInit[TL]
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
index ae1dae7955914..dedd36d8ffc51 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
@@ -34,27 +34,27 @@ entry:
 ; RELOC-NEXT:   Section (index: 1) .text {
 ; RELOC-NEXT:   Relocation {
 ; RELOC-NEXT:     Virtual Address: 0x12
-; RELOC-NEXT:     Symbol: TIUninit (25)
+; RELOC-NEXT:     Symbol: _$TLSML (25)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
 ; RELOC-NEXT:     Type: R_TOC (0x3)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x16
-; RELOC-NEXT:     Symbol: _$TLSML (27)
+; RELOC-NEXT:     Virtual Address: 0x14
+; RELOC-NEXT:     Symbol: .__tls_get_mod (1)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
-; RELOC-NEXT:     Length: 16
-; RELOC-NEXT:     Type: R_TOC (0x3)
+; RELOC-NEXT:     Length: 26
+; RELOC-NEXT:     Type: R_RBA (0x18)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x18
-; RELOC-NEXT:     Symbol: .__tls_get_mod (1)
+; RELOC-NEXT:     Virtual Address: 0x1A
+; RELOC-NEXT:     Symbol: TIUninit (27)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
-; RELOC-NEXT:     Length: 26
-; RELOC-NEXT:     Type: R_RBA (0x18)
+; RELOC-NEXT:     Length: 16
+; RELOC-NEXT:     Type: R_TOC (0x3)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
 ; RELOC-NEXT:     Virtual Address: 0x4A
@@ -124,19 +124,19 @@ entry:
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
 ; RELOC-NEXT:   Virtual Address: 0x98
-; RELOC-NEXT:   Symbol: TIUninit (39)
+; RELOC-NEXT:   Symbol: _$TLSML (25)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
-; RELOC-NEXT:   Type: R_TLS_LD (0x22)
+; RELOC-NEXT:   Type: R_TLSML (0x25)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
 ; RELOC-NEXT:   Virtual Address: 0x9C
-; RELOC-NEXT:   Symbol: _$TLSML (27)
+; RELOC-NEXT:   Symbol: TIUninit (39)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
-; RELOC-NEXT:   Type: R_TLSML (0x25)
+; RELOC-NEXT:   Type: R_TLS_LD (0x22)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
 ; RELOC-NEXT:   Virtual Address: 0xA0
@@ -422,7 +422,7 @@ entry:
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 25
-; SYM-NEXT:     Name: TIUninit
+; SYM-NEXT:     Name: _$TLSML
 ; SYM-NEXT:     Value (RelocatableAddress): 0x98
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
@@ -442,7 +442,7 @@ entry:
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 27
-; SYM-NEXT:     Name: _$TLSML
+; SYM-NEXT:     Name: TIUninit
 ; SYM-NEXT:     Value (RelocatableAddress): 0x9C
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
@@ -589,13 +589,13 @@ entry:
 ; DIS-NEXT:                                      stwu 1, -32(1)
 ; DIS-NEXT:                                      stw 0, 40(1)
 ; DIS-NEXT:                                      mr 6, 3
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 7, 0(2)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 25) TIUninit[TC]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 3, 4(2)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 27) _$TLSML[TC]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 3, 0(2)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 25) _$TLSML[TC]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               bla 0
 ; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA (idx: 1)      .__tls_get_mod[PR]
-; DIS-NEXT:                                      add 3, 3, 7
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 4, 4(2)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 27) TIUninit[TC]
+; DIS-NEXT:                                      add 3, 3, 4
 ; DIS-NEXT:                                      stw 6, 0(3)
 ; DIS-NEXT:                                      addi 1, 1, 32
 ; DIS-NEXT:                                      lwz 0, 8(1)
@@ -638,12 +638,12 @@ entry:
 ; DIS-NEXT:       90: 00 00 00 98
 ; DIS-NEXT: 00000090: R_POS (idx: 23) TOC[TC0]
 ; DIS-NEXT:       94: 00 00 00 00
-; DIS:      00000098  (idx: 25) TIUninit[TC]:
+; DIS:      00000098  (idx: 25) _$TLSML[TC]:
 ; DIS-NEXT:       98: 00 00 00 00
-; DIS-NEXT: 00000098: R_TLS_LD (idx: 39) TIUninit[UL]
-; DIS:      0000009c  (idx: 27) _$TLSML[TC]:
+; DIS-NEXT: 00000098: R_TLSML (idx: 25) _$TLSML[TC]
+; DIS:      0000009c  (idx: 27) TIUninit[TC]:
 ; DIS-NEXT:       9c: 00 00 00 00
-; DIS-NEXT: 0000009c: R_TLSML (idx: 27) _$TLSML[TC]
+; DIS-NEXT: 0000009c: R_TLS_LD (idx: 39) TIUninit[UL]
 ; DIS:      000000a0  (idx: 29) .TGInit[TC]:
 ; DIS-NEXT:       a0: 00 00 00 00
 ; DIS-NEXT: 000000a0: R_TLSM (idx: 37) TGInit

>From e970b907020cfe58bf88af0e0cb92b0f18b0b3da Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Thu, 21 Sep 2023 23:40:30 -0400
Subject: [PATCH 07/21] Remove TLS local-dynamic mode guards for AIX.

---
 clang/include/clang/Basic/DiagnosticDriverKinds.td | 1 -
 clang/lib/Frontend/CompilerInvocation.cpp          | 8 --------
 clang/lib/Sema/SemaDeclAttr.cpp                    | 6 ------
 clang/test/CodeGen/PowerPC/aix-tls-model.cpp       | 9 ++++++---
 clang/test/Sema/aix-attr-tls_model.c               | 2 +-
 5 files changed, 7 insertions(+), 19 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td
index 676f1a62b49dd..cc173d13d6180 100644
--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -678,7 +678,6 @@ def err_drv_cannot_mix_options : Error<"cannot specify '%1' along with '%0'">;
 def err_drv_invalid_object_mode : Error<
   "OBJECT_MODE setting %0 is not recognized and is not a valid setting">;
 
-def err_aix_unsupported_tls_model : Error<"TLS model '%0' is not yet supported on AIX">;
 def err_roptr_requires_data_sections: Error<"-mxcoff-roptr is supported only with -fdata-sections">;
 def err_roptr_cannot_build_shared: Error<"-mxcoff-roptr is not supported with -shared">;
 
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 56de0f75928ca..a009881b7ac0a 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -1989,14 +1989,6 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args,
     Opts.LinkBitcodeFiles.push_back(F);
   }
 
-  if (Arg *A = Args.getLastArg(OPT_ftlsmodel_EQ)) {
-    if (T.isOSAIX()) {
-      StringRef Name = A->getValue();
-      if (Name == "local-dynamic")
-        Diags.Report(diag::err_aix_unsupported_tls_model) << Name;
-    }
-  }
-
   if (Arg *A = Args.getLastArg(OPT_fdenormal_fp_math_EQ)) {
     StringRef Val = A->getValue();
     Opts.FPDenormalMode = llvm::parseDenormalFPAttribute(Val);
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index a345978bb8701..642a9940cb5fb 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -2053,12 +2053,6 @@ static void handleTLSModelAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
     return;
   }
 
-  if (S.Context.getTargetInfo().getTriple().isOSAIX() &&
-      Model == "local-dynamic") {
-    S.Diag(LiteralLoc, diag::err_aix_attr_unsupported_tls_model) << Model;
-    return;
-  }
-
   D->addAttr(::new (S.Context) TLSModelAttr(S.Context, AL, Model));
 }
 
diff --git a/clang/test/CodeGen/PowerPC/aix-tls-model.cpp b/clang/test/CodeGen/PowerPC/aix-tls-model.cpp
index 9fdd6855a89ee..cd0a08aa9a3b7 100644
--- a/clang/test/CodeGen/PowerPC/aix-tls-model.cpp
+++ b/clang/test/CodeGen/PowerPC/aix-tls-model.cpp
@@ -1,11 +1,11 @@
 // RUN: %clang_cc1 %s -triple powerpc-unknown-aix -target-cpu pwr8 -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-GD
 // RUN: %clang_cc1 %s -triple powerpc-unknown-aix -target-cpu pwr8 -ftls-model=global-dynamic -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-GD
-// RUN: not %clang_cc1 %s -triple powerpc-unknown-aix -target-cpu pwr8 -ftls-model=local-dynamic -emit-llvm 2>&1 | FileCheck %s -check-prefix=CHECK-LD-ERROR
+// RUN: %clang_cc1 %s -triple powerpc-unknown-aix -target-cpu pwr8 -ftls-model=local-dynamic -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-LD
 // RUN: %clang_cc1 %s -triple powerpc-unknown-aix -target-cpu pwr8 -ftls-model=initial-exec -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-IE
 // RUN: %clang_cc1 %s -triple powerpc-unknown-aix -target-cpu pwr8 -ftls-model=local-exec -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-LE
 // RUN: %clang_cc1 %s -triple powerpc64-unknown-aix -target-cpu pwr8 -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-GD
 // RUN: %clang_cc1 %s -triple powerpc64-unknown-aix -target-cpu pwr8 -ftls-model=global-dynamic -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-GD
-// RUN: not %clang_cc1 %s -triple powerpc64-unknown-aix -target-cpu pwr8 -ftls-model=local-dynamic -emit-llvm 2>&1 | FileCheck %s -check-prefix=CHECK-LD-ERROR
+// RUN: %clang_cc1 %s -triple powerpc64-unknown-aix -target-cpu pwr8 -ftls-model=local-dynamic -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-LD
 // RUN: %clang_cc1 %s -triple powerpc64-unknown-aix -target-cpu pwr8 -ftls-model=initial-exec -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-IE
 // RUN: %clang_cc1 %s -triple powerpc64-unknown-aix -target-cpu pwr8 -ftls-model=local-exec -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-LE
 
@@ -21,7 +21,10 @@ int f() {
 // CHECK-GD: @z2 ={{.*}} global i32 0
 // CHECK-GD: @x ={{.*}} thread_local global i32 0
 // CHECK-GD: @_ZZ1fvE1y = internal thread_local global i32 0
-// CHECK-LD-ERROR:  error: TLS model 'local-dynamic' is not yet supported on AIX
+// CHECK-LD: @z1 ={{.*}} global i32 0
+// CHECK-LD: @z2 ={{.*}} global i32 0
+// CHECK-LD: @x ={{.*}} thread_local(localdynamic) global i32 0
+// CHECK-LD: @_ZZ1fvE1y = internal thread_local(localdynamic) global i32 0
 // CHECK-IE: @z1 ={{.*}} global i32 0
 // CHECK-IE: @z2 ={{.*}} global i32 0
 // CHECK-IE: @x ={{.*}} thread_local(initialexec) global i32 0
diff --git a/clang/test/Sema/aix-attr-tls_model.c b/clang/test/Sema/aix-attr-tls_model.c
index 9c22d6cceed81..7c2047bced939 100644
--- a/clang/test/Sema/aix-attr-tls_model.c
+++ b/clang/test/Sema/aix-attr-tls_model.c
@@ -6,6 +6,6 @@
 #endif
 
 static __thread int y __attribute((tls_model("global-dynamic"))); // no-warning
-static __thread int y __attribute((tls_model("local-dynamic"))); // expected-error {{TLS model 'local-dynamic' is not yet supported on AIX}}
+static __thread int y __attribute((tls_model("local-dynamic"))); // expected-no-diagnostics
 static __thread int y __attribute((tls_model("initial-exec"))); // no-warning
 static __thread int y __attribute((tls_model("local-exec"))); // no-warning

>From 369868660a330a3a2433c8c06a00ed022d61ddcc Mon Sep 17 00:00:00 2001
From: tingwang <tingwang at tingwangs-MBP.lan>
Date: Sun, 24 Sep 2023 13:11:06 +0800
Subject: [PATCH 08/21] Fixed issues raised by comments and incorporated
 suggested changes. (1) Removed TLSLDAIX argument so that duplicated nodes can
 be eliminated. (2) Try to reuse clobber r4 by moving TLSLDAIX ahead of
 LoadOffsetToc node. (3) Add FIXME comments. (4) Add test case to show
 duplicated .__tls_get_mod can be eliminated.

Below two cases are not updated yet due to environment issue. I will fix
those in next update.
Failed Tests:
  LLVM :: CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll
  LLVM :: CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
---
 .../PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp  |   9 +-
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp     |  28 ++-
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   |  21 +-
 llvm/lib/Target/PowerPC/PPCISelLowering.h     |   4 +-
 llvm/lib/Target/PowerPC/PPCInstr64Bit.td      |  10 +-
 llvm/lib/Target/PowerPC/PPCInstrInfo.td       |   8 +-
 llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp | 123 +++++++-----
 .../test/CodeGen/PowerPC/aix-tls-gd-double.ll |  58 +++---
 llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll   |  58 +++---
 .../CodeGen/PowerPC/aix-tls-gd-longlong.ll    | 164 +++++++---------
 .../CodeGen/PowerPC/aix-tls-local-dynamic.ll  | 184 ++++++++++--------
 11 files changed, 348 insertions(+), 319 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 22cd2fc03ef7c..00dda306a584c 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -231,9 +231,12 @@ class PPCTargetAsmStreamer : public PPCTargetStreamer {
       MCSymbolXCOFF *TCSym =
           cast<MCSectionXCOFF>(Streamer.getCurrentSectionOnly())
               ->getQualNameSymbol();
-      // On AIX, we have a region handle (symbol at m), module handle
-      // (__TLSML[TC]@ml) and the variable offset (symbol@{gd|ie|le|ld}) for TLS
-      // variables, depending on the TLS model.
+      // On AIX, we have TLS variable offsets (symbol@({gd|ie|le|ld}) depending
+      // on the TLS access method (or model). For the general-dynamic access
+      // method, we also have region handle (symbol at m) for each variable. For
+      // local-dynamic, a module handle (__TLSML[TC]@ml) for all variables. For
+      // local-exec and initial-exec, we have a thread pointer, in r13 for
+      // 64-bit mode and returned by .__get_tpointer for 32-bit mode.
       if (Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGD ||
           Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGDM ||
           Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSIE ||
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index dc2adc352b01d..099299bff618f 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -672,9 +672,9 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
 
   if (Subtarget->isAIXABI()) {
     // For TLSGD, the variable offset should already be in R4 and the region
-    // handle should already be in R3, generate absolute branch to
-    // .__tls_get_addr. For TLSLD, the module handle should already be in R3,
-    // generate branch to .__tls_get_mod.
+    // handle should already be in R3. We generate an absolute branch to
+    // .__tls_get_addr. For TLSLD, the module handle should already be in R3.
+    // We generate an absolute branch to .__tls_get_mod.
     Register VarOffsetReg = Subtarget->isPPC64() ? PPC::X4 : PPC::R4;
     (void)VarOffsetReg;
     assert((MI->getOpcode() == PPC::GETtlsMOD32AIX ||
@@ -759,9 +759,10 @@ getTOCEntryTypeForMO(const MachineOperand &MO) {
   }
 }
 
-// On AIX, TLS-local-dynamic requires that symbol for the module handle must
+// FIXME: find alternative approach to get rid of this hack.
+// On AIX, TLS-local-dynamic requires that the symbol for the module handle must
 // have the name "_$TLSML". This symbol is used as one TOC symbol reference
-// itself with ML relocation type, thus it has "[TC]" attached to its name.
+// itself with an ML relocation type, thus it has "[TC]" attached to its name.
 static inline bool isSpecialAIXSymbolTLSML(const MachineOperand &MO,
                                            const bool IsAIX) {
   return IsAIX && MO.isSymbol() &&
@@ -862,9 +863,11 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
       return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGD;
     if (MO.getTargetFlags() & PPCII::MO_TLSLD_FLAG) {
       if (isSpecialAIXSymbolTLSML(MO, IsAIX))
-        // FIXME: On AIX the ML relocation type is only valid for a reference to
-        // a TOC symbol from the symbol itself, and right now its only user is
-        // symbol "_$TLSML". Use symbol name to decide that R_TLSML is expected.
+        // FIXME: Due to the size limit of MachineOperand::SubReg_TargetFlags,
+        // hacked this flag which should have been named MO_TLSLDM_FLAG: on AIX
+        // the ML relocation type is only valid for a reference to a TOC symbol
+        // from the symbol itself, and right now its only user is the symbol
+        // "_$TLSML". Use symbol name to decide that R_TLSML is expected.
         return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSML;
       if (IsAIX)
         return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLD;
@@ -1385,14 +1388,17 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
                    .addExpr(SymGotTlsGD));
     return;
   }
+  case PPC::GETtlsMOD32AIX:
+  case PPC::GETtlsMOD64AIX:
+    // Transform: %r3 = GETtlsMODNNAIX %r3 (for NN == 32/64).
+    // Into: BLA .__tls_get_mod()
+    // Input parameter is a module handle (__TLSML[TC]@ml) for all variables.
   case PPC::GETtlsADDR:
     // Transform: %x3 = GETtlsADDR %x3, @sym
     // Into: BL8_NOP_TLS __tls_get_addr(sym at tlsgd)
   case PPC::GETtlsADDRPCREL:
   case PPC::GETtlsADDR32AIX:
   case PPC::GETtlsADDR64AIX:
-  case PPC::GETtlsMOD32AIX:
-  case PPC::GETtlsMOD64AIX:
     // Transform: %r3 = GETtlsADDRNNAIX %r3, %r4 (for NN == 32/64).
     // Into: BLA .__tls_get_addr()
     // Unlike on Linux, there is no symbol or relocation needed for this call.
@@ -2888,6 +2894,8 @@ void PPCAIXAsmPrinter::emitInstruction(const MachineInstr *MI) {
   }
   case PPC::GETtlsMOD32AIX:
   case PPC::GETtlsMOD64AIX:
+    // A reference to .__tls_get_mod is unknown to the assembler so we need to
+    // emit an external symbol reference.
   case PPC::GETtlsTpointer32AIX:
   case PPC::GETtlsADDR64AIX:
   case PPC::GETtlsADDR32AIX: {
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 8f6c57ae746c2..86c88748d6be6 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1772,13 +1772,11 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
   case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
   case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
-  case PPCISD::GET_TLS_MOD_AIX:
-    return "PPCISD::GET_TLS_MOD_AIX";
+  case PPCISD::GET_TLS_MOD_AIX: return "PPCISD::GET_TLS_MOD_AIX";
   case PPCISD::GET_TPOINTER:    return "PPCISD::GET_TPOINTER";
   case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
   case PPCISD::TLSGD_AIX:       return "PPCISD::TLSGD_AIX";
-  case PPCISD::TLSLD_AIX:
-    return "PPCISD::TLSLD_AIX";
+  case PPCISD::TLSLD_AIX:       return "PPCISD::TLSLD_AIX";
   case PPCISD::ADDIS_TLSLD_HA:  return "PPCISD::ADDIS_TLSLD_HA";
   case PPCISD::ADDI_TLSLD_L:    return "PPCISD::ADDI_TLSLD_L";
   case PPCISD::GET_TLSLD_ADDR:  return "PPCISD::GET_TLSLD_ADDR";
@@ -3418,14 +3416,19 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
   }
 
   if (Model == TLSModel::LocalDynamic) {
-    // For local-dynamic on AIX, we need to generate two TOC entries, one for
-    // the variable offset, the other for the module handle. The module handle
-    // is encapsulated inside the TLSLD_AIX pseudo node, and will be expanded by
-    // PPCTLSDynamicCall.
+    // For local-dynamic on AIX, we need to generate one TOC entry for each
+    // variable offset, and single module-handle TOC entry for the entire file.
+
+    // We are not (1) create GV node (2) call getTOCEntry for the module-handle
+    // due to the reason that the module-handle should not be materialized (i.e.
+    // there should be no symbol-table entry referring to the module-handle).
+    // Instead we will create reference to __TLSML[TC]@ml in PPCTLSDynamicCall
+    // when processing the TLSLD_AIX pseudo node.
+    SDValue ModuleHandle = DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT);
     SDValue VariableOffsetTGA =
         DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
     SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
-    return DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, VariableOffset);
+    return DAG.getNode(ISD::ADD, dl, PtrVT, ModuleHandle, VariableOffset);
   }
 
   // If Local- or Initial-exec or Local-dynamic is not possible or specified,
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index fed03835fd99a..f09a624fe7235 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -381,8 +381,8 @@ namespace llvm {
 
     /// [GP|G8]RC = TLSLD_AIX, TOC_ENTRY(variable offset)
     /// Op that internally creates TOC entry for the "_$TLSML" symbol, generates
-    /// GET_TLS_MOD_AIX node which will be expanded into a call to
-    /// .__tls_get_mod, and then add the variable offset with the result from
+    /// a GET_TLS_MOD_AIX node which will be expanded into a call to
+    /// .__tls_get_mod, and then adds the variable offset with the result from
     /// the call. This node is used in both 32-bit and 64-bit modes. The only
     /// difference is register class.
     TLSLD_AIX,
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index a3590580777f3..e4a6e19dabd34 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -1563,7 +1563,7 @@ def GETtlsADDR64AIX :
                     "GETtlsADDR64AIX",
                     [(set i64:$rD,
                       (PPCgetTlsAddr i64:$offset, i64:$handle))]>, isPPC64;
-// On AIX, the call to .__tls_get_mod need one input in X3 for the module handle.
+// On AIX, the call to .__tls_get_mod needs one input in X3 for the module handle.
 def GETtlsMOD64AIX :
   PPCEmitTimePseudo<(outs g8rc:$rD),(ins g8rc:$handle),
                     "GETtlsMOD64AIX",
@@ -1602,10 +1602,10 @@ def TLSGDAIX8 :
                      "#TLSGDAIX8",
                      [(set i64:$rD,
                        (PPCTlsgdAIX i64:$offset, i64:$handle))]>;
-// This pseudo is expanded to one copy to put the module handle in R3, then call
-// GETtlsMOD64AIX, and then add variable offset to the output from the call.
-def TLSLDAIX8 : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc:$handle),
-                     "#TLSLDAIX8", [(set i64:$rD, (PPCTlsldAIX i64:$handle))]>;
+// This pseudo is expanded to one copy to put the module handle in R3, the call
+// to GETtlsMOD64AIX, and then adds the variable offset to the output from the call.
+def TLSLDAIX8 : PPCEmitTimePseudo<(outs g8rc:$rD), (ins),
+                     "#TLSLDAIX8", [(set i64:$rD, (PPCTlsldAIX))]>;
 // Combined op for ADDItlsldL and GETtlsADDR, late expanded.  X3 and LR8
 // are true defines, while the rest of the Defs are clobbers.
 let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index c829b34f130b4..30837e8735cf8 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -220,7 +220,7 @@ def PPCaddiTlsgdLAddr : SDNode<"PPCISD::ADDI_TLSGD_L_ADDR",
                                  SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
                                  SDTCisSameAs<0, 3>, SDTCisInt<0> ]>>;
 def PPCTlsgdAIX     : SDNode<"PPCISD::TLSGD_AIX", SDTIntBinOp>;
-def PPCTlsldAIX     : SDNode<"PPCISD::TLSLD_AIX", SDTIntUnaryOp>;
+def PPCTlsldAIX     : SDNode<"PPCISD::TLSLD_AIX", SDTIntLeaf>;
 def PPCaddisTlsldHA : SDNode<"PPCISD::ADDIS_TLSLD_HA", SDTIntBinOp>;
 def PPCaddiTlsldL   : SDNode<"PPCISD::ADDI_TLSLD_L", SDTIntBinOp>;
 def PPCgetTlsldAddr : SDNode<"PPCISD::GET_TLSLD_ADDR", SDTIntBinOp>;
@@ -3298,10 +3298,8 @@ def TLSGDAIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$offset, gprc:$handl
                             (PPCTlsgdAIX i32:$offset, i32:$handle))]>;
 // This pseudo is expanded to one copy to put the module handle in R3, then call
 // GETtlsMOD32AIX, and then add variable offset to the output from the call.
-def TLSLDAIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$handle),
-                          "#TLSLDAIX",
-                          [(set i32:$rD,
-                            (PPCTlsldAIX i32:$handle))]>;
+def TLSLDAIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins),
+                          "#TLSLDAIX", [(set i32:$rD, (PPCTlsldAIX))]>;
 // LR is a true define, while the rest of the Defs are clobbers.  R3 is
 // explicitly defined when this op is created, so not mentioned here.
 let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index bb03f8f911c49..ba3f9f04697d7 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -92,7 +92,9 @@ namespace {
         Register InReg = PPC::NoRegister;
         Register GPR3 = Is64Bit ? PPC::X3 : PPC::R3;
         Register GPR4 = Is64Bit ? PPC::X4 : PPC::R4;
-        if (!IsPCREL && !IsTLSTPRelMI)
+        if (!IsPCREL && !IsTLSTPRelMI &&
+            !(MI.getOpcode() == PPC::TLSLDAIX8 ||
+              MI.getOpcode() == PPC::TLSLDAIX))
           InReg = MI.getOperand(1).getReg();
         DebugLoc DL = MI.getDebugLoc();
 
@@ -160,9 +162,9 @@ namespace {
         if (IsAIX) {
           if (MI.getOpcode() == PPC::TLSLDAIX8 ||
               MI.getOpcode() == PPC::TLSLDAIX) {
-            // For Local-Dynamic, need to swap the position of VarOffsetInst and
-            // MI, so that VarOffsetInst can use R/X4 to reduce register
-            // pressure.
+            // It is better to put TLSLDAIX node before LoadOffsetToc node,
+            // because LoadOffsetToc node can use clobbers r4/r5. Search for the
+            // first paired LoadOffsetToc node within the same BB.
             const PPCSubtarget &Subtarget =
                 MBB.getParent()->getSubtarget<PPCSubtarget>();
             bool IsLargeModel =
@@ -170,69 +172,94 @@ namespace {
             unsigned LDTocOp =
                 Is64Bit ? (IsLargeModel ? PPC::LDtocL : PPC::LDtoc)
                         : (IsLargeModel ? PPC::LWZtocL : PPC::LWZtoc);
-            assert(RegInfo.hasOneDef(MI.getOperand(1).getReg()) &&
-                   "TLSLDAIX expects single def of its operand.");
-            MachineInstr *VarOffsetInst =
-                RegInfo.getOneDef(MI.getOperand(1).getReg())->getParent();
-            assert(VarOffsetInst->getOpcode() == LDTocOp &&
-                   "Unexpected LDTocOp.");
-            if (IsLargeModel) {
-              // Get the ADDIS instruction when using large model.
-              assert(RegInfo.hasOneDef(VarOffsetInst->getOperand(2).getReg()) &&
-                     "LDTocOp expects single def of its operand.");
-              VarOffsetInst =
-                  RegInfo.getOneDef(VarOffsetInst->getOperand(2).getReg())
-                      ->getParent();
-              assert(VarOffsetInst->getOpcode() ==
-                         (Is64Bit ? PPC::ADDIStocHA8 : PPC::ADDIStocHA) &&
-                     "Unexpected ADDIStocHA.");
-              // FIXME: machine-scheduler could schedule ADDIStocHA ahead of
-              // GETtlsMODAIX, and still has to use extra register.
+            MachineBasicBlock::iterator Anchor = I;
+            if (!RegInfo.use_empty(OutReg)) {
+              std::set<MachineInstr *> Uses;
+              // Collect all instructions that use OutReg
+              for (MachineOperand &MO : RegInfo.use_operands(OutReg)) {
+                if (Uses.count(MO.getParent()))
+                  continue;
+                Uses.insert(MO.getParent());
+              }
+              // Find the first Add within current BB.
+              MachineBasicBlock::iterator UseIter = MBB.begin();
+              for (MachineBasicBlock::iterator AE = MBB.end(); UseIter != AE;
+                   ++UseIter)
+                if (Uses.count(&*UseIter))
+                  break;
+
+              if (UseIter != MBB.end()) {
+                // Get the instruction that defines the other used register
+                // operand of UseIter. The match pattern is that: UseIter has
+                // exactly one used-operand defined by LDTocOp
+                // (LDtocL/LDtoc/LWZtocL/LWZtoc).
+                MachineInstr *LoadOffsetToc = nullptr;
+                int MatchCount = 0;
+                for (MachineOperand &MO : UseIter->operands()) {
+                  if (MO.isReg() && MO.isUse()) {
+                    if (RegInfo.hasOneDef(MO.getReg())) {
+                      if (RegInfo.getOneDef(MO.getReg())
+                              ->getParent()
+                              ->getOpcode() == LDTocOp) {
+                        LoadOffsetToc =
+                            RegInfo.getOneDef(MO.getReg())->getParent();
+                        ++MatchCount;
+                      }
+                    } else {
+                      // FIXME: analyze this scenario if there is one.
+                      MatchCount = 0;
+                      break;
+                    }
+                  }
+                }
+                // Get the iterator.
+                if (MatchCount == 1 && LoadOffsetToc) {
+                  Anchor = MBB.begin();
+                  for (MachineBasicBlock::iterator AE = MBB.end(); Anchor != AE;
+                       ++Anchor)
+                    if (&*Anchor == LoadOffsetToc)
+                      break;
+
+                  if (Anchor == MBB.end())
+                    Anchor = I;
+                }
+              }
             }
+
+            // Generate instructions refer to the "_$TLSML" symbol
             Register ModuleHandleHReg;
             if (IsLargeModel) {
               ModuleHandleHReg = RegInfo.createVirtualRegister(GPRNoZero);
-              BuildMI(MBB, *VarOffsetInst, DL,
+              BuildMI(MBB, Anchor, DL,
                       TII->get(Is64Bit ? PPC::ADDIStocHA8 : PPC::ADDIStocHA),
                       ModuleHandleHReg)
                   .addReg(Subtarget.getTOCPointerRegister())
                   .addExternalSymbol("_$TLSML[TC]", PPCII::MO_TLSLD_FLAG);
             }
             Register MHReg = RegInfo.createVirtualRegister(GPRNoZero);
-            BuildMI(MBB, *VarOffsetInst, DL, TII->get(LDTocOp), MHReg)
+            BuildMI(MBB, Anchor, DL, TII->get(LDTocOp), MHReg)
                 .addExternalSymbol("_$TLSML[TC]", PPCII::MO_TLSLD_FLAG)
                 .addReg(IsLargeModel
                             ? ModuleHandleHReg
                             : Register(Subtarget.getTOCPointerRegister()));
             // The module handle is copied in r3.
-            BuildMI(MBB, *VarOffsetInst, DL, TII->get(TargetOpcode::COPY), GPR3)
+            BuildMI(MBB, Anchor, DL, TII->get(TargetOpcode::COPY), GPR3)
                 .addReg(MHReg);
             // The call to .__tls_get_mod.
-            BuildMI(MBB, *VarOffsetInst, DL, TII->get(Opc2), GPR3).addReg(GPR3);
-            // Copy VarOffset to R/X4
+            BuildMI(MBB, Anchor, DL, TII->get(Opc2), GPR3).addReg(GPR3);
+          } else if (!IsTLSTPRelMI) {
+            // The variable offset and region handle are copied in r4 and r3.
+            // The copies are followed by GETtlsADDR32AIX/GETtlsADDR64AIX.
             BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR4)
                 .addReg(MI.getOperand(1).getReg());
-            BuildMI(MBB, I, DL, TII->get(Is64Bit ? PPC::ADD8 : PPC::ADD4), GPR3)
-                .addReg(GPR3)
-                .addReg(GPR4);
-          } else {
-            // For Global-Dynamic, the variable offset and region handle are
-            // copied in r4 and r3. The copies are followed by
-            // GETtlsADDR32AIX/GETtlsADDR64AIX.
-            if (!IsTLSTPRelMI) {
-              BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR4)
-                  .addReg(MI.getOperand(1).getReg());
-              BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR3)
-                  .addReg(MI.getOperand(2).getReg());
-              BuildMI(MBB, I, DL, TII->get(Opc2), GPR3)
-                  .addReg(GPR3)
-                  .addReg(GPR4);
-            } else
-              // The opcode of GETtlsTpointer32AIX does not change, because
-              // later this instruction will be expanded into a call to
-              // .__get_tpointer, which will return the thread pointer into r3.
-              BuildMI(MBB, I, DL, TII->get(Opc2), GPR3);
-          }
+            BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR3)
+                .addReg(MI.getOperand(2).getReg());
+            BuildMI(MBB, I, DL, TII->get(Opc2), GPR3).addReg(GPR3).addReg(GPR4);
+          } else
+            // The opcode of GETtlsTpointer32AIX does not change, because later
+            // this instruction will be expanded into a call to .__get_tpointer,
+            // which will return the thread pointer into r3.
+            BuildMI(MBB, I, DL, TII->get(Opc2), GPR3);
         } else {
           MachineInstr *Addi;
           if (IsPCREL) {
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll
index ea1a2b8fe9ade..84ddb83bef457 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll
@@ -160,8 +160,7 @@ define void @storesTIInit(double %Val) #0 {
 ; SMALL32-NEXT:    lwz 3, L..C4(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C5(2) # target-flags(ppc-tlsld) @TIInit
-; SMALL32-NEXT:    add 3, 3, 4
-; SMALL32-NEXT:    stfd 1, 0(3)
+; SMALL32-NEXT:    stfdx 1, 3, 4
 ; SMALL32-NEXT:    addi 1, 1, 32
 ; SMALL32-NEXT:    lwz 0, 8(1)
 ; SMALL32-NEXT:    mtlr 0
@@ -172,14 +171,12 @@ define void @storesTIInit(double %Val) #0 {
 ; LARGE32-NEXT:    mflr 0
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
-; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
-; LARGE32-NEXT:    addis 4, L..C5 at u(2)
-; LARGE32-NEXT:    lwz 6, L..C5 at l(4)
+; LARGE32-NEXT:    addis 6, L..C4 at u(2)
+; LARGE32-NEXT:    addis 3, L..C5 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C5 at l(3)
 ; LARGE32-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-NEXT:    mr 4, 6
-; LARGE32-NEXT:    add 3, 3, 4
-; LARGE32-NEXT:    stfd 1, 0(3)
+; LARGE32-NEXT:    lwz 4, L..C4 at l(6)
+; LARGE32-NEXT:    stfdx 1, 3, 4
 ; LARGE32-NEXT:    addi 1, 1, 32
 ; LARGE32-NEXT:    lwz 0, 8(1)
 ; LARGE32-NEXT:    mtlr 0
@@ -193,8 +190,7 @@ define void @storesTIInit(double %Val) #0 {
 ; SMALL64-NEXT:    ld 3, L..C4(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C5(2) # target-flags(ppc-tlsld) @TIInit
-; SMALL64-NEXT:    add 3, 3, 4
-; SMALL64-NEXT:    stfd 1, 0(3)
+; SMALL64-NEXT:    stfdx 1, 3, 4
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
 ; SMALL64-NEXT:    mtlr 0
@@ -210,8 +206,7 @@ define void @storesTIInit(double %Val) #0 {
 ; LARGE64-NEXT:    ld 3, L..C4 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
 ; LARGE64-NEXT:    ld 4, L..C5 at l(6)
-; LARGE64-NEXT:    add 3, 3, 4
-; LARGE64-NEXT:    stfd 1, 0(3)
+; LARGE64-NEXT:    stfdx 1, 3, 4
 ; LARGE64-NEXT:    addi 1, 1, 48
 ; LARGE64-NEXT:    ld 0, 16(1)
 ; LARGE64-NEXT:    mtlr 0
@@ -461,10 +456,9 @@ define double @loadsTIInit() #1 {
 ; SMALL32-NEXT:    lwz 3, L..C4(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C5(2) # target-flags(ppc-tlsld) @TIInit
-; SMALL32-NEXT:    add 3, 3, 4
-; SMALL32-NEXT:    lwz 4, L..C8(2) # @GInit
-; SMALL32-NEXT:    lfd 0, 0(3)
-; SMALL32-NEXT:    lfd 1, 0(4)
+; SMALL32-NEXT:    lwz 5, L..C8(2) # @GInit
+; SMALL32-NEXT:    lfdx 0, 3, 4
+; SMALL32-NEXT:    lfd 1, 0(5)
 ; SMALL32-NEXT:    fadd 1, 0, 1
 ; SMALL32-NEXT:    addi 1, 1, 32
 ; SMALL32-NEXT:    lwz 0, 8(1)
@@ -476,14 +470,12 @@ define double @loadsTIInit() #1 {
 ; LARGE32-NEXT:    mflr 0
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
-; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
-; LARGE32-NEXT:    addis 4, L..C5 at u(2)
-; LARGE32-NEXT:    lwz 6, L..C5 at l(4)
+; LARGE32-NEXT:    addis 6, L..C4 at u(2)
+; LARGE32-NEXT:    addis 3, L..C5 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C5 at l(3)
 ; LARGE32-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-NEXT:    mr 4, 6
-; LARGE32-NEXT:    add 3, 3, 4
-; LARGE32-NEXT:    lfd 0, 0(3)
+; LARGE32-NEXT:    lwz 4, L..C4 at l(6)
+; LARGE32-NEXT:    lfdx 0, 3, 4
 ; LARGE32-NEXT:    addis 3, L..C8 at u(2)
 ; LARGE32-NEXT:    lwz 3, L..C8 at l(3)
 ; LARGE32-NEXT:    lfd 1, 0(3)
@@ -501,10 +493,9 @@ define double @loadsTIInit() #1 {
 ; SMALL64-NEXT:    ld 3, L..C4(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C5(2) # target-flags(ppc-tlsld) @TIInit
-; SMALL64-NEXT:    add 3, 3, 4
-; SMALL64-NEXT:    ld 4, L..C8(2) # @GInit
-; SMALL64-NEXT:    lfd 0, 0(3)
-; SMALL64-NEXT:    lfd 1, 0(4)
+; SMALL64-NEXT:    ld 5, L..C8(2) # @GInit
+; SMALL64-NEXT:    lfdx 0, 3, 4
+; SMALL64-NEXT:    lfd 1, 0(5)
 ; SMALL64-NEXT:    fadd 1, 0, 1
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -521,10 +512,9 @@ define double @loadsTIInit() #1 {
 ; LARGE64-NEXT:    ld 3, L..C4 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
 ; LARGE64-NEXT:    ld 4, L..C5 at l(6)
-; LARGE64-NEXT:    add 3, 3, 4
-; LARGE64-NEXT:    addis 4, L..C8 at u(2)
-; LARGE64-NEXT:    lfd 0, 0(3)
-; LARGE64-NEXT:    ld 3, L..C8 at l(4)
+; LARGE64-NEXT:    addis 5, L..C8 at u(2)
+; LARGE64-NEXT:    lfdx 0, 3, 4
+; LARGE64-NEXT:    ld 3, L..C8 at l(5)
 ; LARGE64-NEXT:    lfd 1, 0(3)
 ; LARGE64-NEXT:    fadd 1, 0, 1
 ; LARGE64-NEXT:    addi 1, 1, 48
@@ -664,10 +654,10 @@ entry:
 ; LARGE32-LABEL:  L..C3:
 ; LARGE32-NEXT:   .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE32-LABEL:  L..C4:
+; LARGE32-NEXT:   .tc TIInit[TE],TIInit[TL]@ld
+; LARGE32-LABEL:  L..C5:
 ; LARGE32-NEXT:   .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; LARGE32-NEXT:   .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
-; LARGE32-LABEL:  L..C5:
-; LARGE32-NEXT:   .tc TIInit[TE],TIInit[TL]@ld
 ; LARGE32-LABEL:  L..C6:
 ; LARGE32-NEXT:   .tc .TWInit[TE],TWInit[TL]@m
 ; LARGE32-LABEL:  L..C7:
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll
index e1cfb6fd767cb..3001a915da485 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll
@@ -168,8 +168,7 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; SMALL32-NEXT:    lwz 3, L..C4(2)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C5(2)
-; SMALL32-NEXT:    add 3, 3, 4
-; SMALL32-NEXT:    stw 6, 0(3)
+; SMALL32-NEXT:    stwx 6, 3, 4
 ; SMALL32-NEXT:    addi 1, 1, 32
 ; SMALL32-NEXT:    lwz 0, 8(1)
 ; SMALL32-NEXT:    mtlr 0
@@ -181,14 +180,12 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    mr 6, 3
-; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
-; LARGE32-NEXT:    addis 4, L..C5 at u(2)
-; LARGE32-NEXT:    lwz 7, L..C5 at l(4)
+; LARGE32-NEXT:    addis 7, L..C4 at u(2)
+; LARGE32-NEXT:    addis 3, L..C5 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C5 at l(3)
 ; LARGE32-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-NEXT:    mr 4, 7
-; LARGE32-NEXT:    add 3, 3, 4
-; LARGE32-NEXT:    stw 6, 0(3)
+; LARGE32-NEXT:    lwz 4, L..C4 at l(7)
+; LARGE32-NEXT:    stwx 6, 3, 4
 ; LARGE32-NEXT:    addi 1, 1, 32
 ; LARGE32-NEXT:    lwz 0, 8(1)
 ; LARGE32-NEXT:    mtlr 0
@@ -203,8 +200,7 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; SMALL64-NEXT:    ld 3, L..C4(2)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C5(2)
-; SMALL64-NEXT:    add 3, 3, 4
-; SMALL64-NEXT:    stw 6, 0(3)
+; SMALL64-NEXT:    stwx 6, 3, 4
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
 ; SMALL64-NEXT:    mtlr 0
@@ -221,8 +217,7 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; LARGE64-NEXT:    ld 3, L..C4 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
 ; LARGE64-NEXT:    ld 4, L..C5 at l(7)
-; LARGE64-NEXT:    add 3, 3, 4
-; LARGE64-NEXT:    stw 6, 0(3)
+; LARGE64-NEXT:    stwx 6, 3, 4
 ; LARGE64-NEXT:    addi 1, 1, 48
 ; LARGE64-NEXT:    ld 0, 16(1)
 ; LARGE64-NEXT:    mtlr 0
@@ -476,10 +471,9 @@ define i32 @loadsTIUninit() #1 {
 ; SMALL32-NEXT:    lwz 3, L..C4(2)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C5(2)
-; SMALL32-NEXT:    add 3, 3, 4
-; SMALL32-NEXT:    lwz 4, L..C8(2)
-; SMALL32-NEXT:    lwz 3, 0(3)
-; SMALL32-NEXT:    lwz 4, 0(4)
+; SMALL32-NEXT:    lwz 5, L..C8(2)
+; SMALL32-NEXT:    lwzx 3, 3, 4
+; SMALL32-NEXT:    lwz 4, 0(5)
 ; SMALL32-NEXT:    add 3, 4, 3
 ; SMALL32-NEXT:    addi 1, 1, 32
 ; SMALL32-NEXT:    lwz 0, 8(1)
@@ -491,14 +485,12 @@ define i32 @loadsTIUninit() #1 {
 ; LARGE32-NEXT:    mflr 0
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
-; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
-; LARGE32-NEXT:    addis 4, L..C5 at u(2)
-; LARGE32-NEXT:    lwz 6, L..C5 at l(4)
+; LARGE32-NEXT:    addis 6, L..C4 at u(2)
+; LARGE32-NEXT:    addis 3, L..C5 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C5 at l(3)
 ; LARGE32-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-NEXT:    mr 4, 6
-; LARGE32-NEXT:    add 3, 3, 4
-; LARGE32-NEXT:    lwz 3, 0(3)
+; LARGE32-NEXT:    lwz 4, L..C4 at l(6)
+; LARGE32-NEXT:    lwzx 3, 3, 4
 ; LARGE32-NEXT:    addis 4, L..C8 at u(2)
 ; LARGE32-NEXT:    lwz 4, L..C8 at l(4)
 ; LARGE32-NEXT:    lwz 4, 0(4)
@@ -516,10 +508,9 @@ define i32 @loadsTIUninit() #1 {
 ; SMALL64-NEXT:    ld 3, L..C4(2)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C5(2)
-; SMALL64-NEXT:    add 3, 3, 4
-; SMALL64-NEXT:    ld 4, L..C8(2)
-; SMALL64-NEXT:    lwz 3, 0(3)
-; SMALL64-NEXT:    lwz 4, 0(4)
+; SMALL64-NEXT:    ld 5, L..C8(2)
+; SMALL64-NEXT:    lwzx 3, 3, 4
+; SMALL64-NEXT:    lwz 4, 0(5)
 ; SMALL64-NEXT:    add 3, 4, 3
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -536,10 +527,9 @@ define i32 @loadsTIUninit() #1 {
 ; LARGE64-NEXT:    ld 3, L..C4 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
 ; LARGE64-NEXT:    ld 4, L..C5 at l(6)
-; LARGE64-NEXT:    add 3, 3, 4
-; LARGE64-NEXT:    addis 4, L..C8 at u(2)
-; LARGE64-NEXT:    ld 4, L..C8 at l(4)
-; LARGE64-NEXT:    lwz 3, 0(3)
+; LARGE64-NEXT:    addis 5, L..C8 at u(2)
+; LARGE64-NEXT:    lwzx 3, 3, 4
+; LARGE64-NEXT:    ld 4, L..C8 at l(5)
 ; LARGE64-NEXT:    lwz 4, 0(4)
 ; LARGE64-NEXT:    add 3, 4, 3
 ; LARGE64-NEXT:    addi 1, 1, 48
@@ -679,10 +669,10 @@ entry:
 ; LARGE32-LABEL: L..C3:
 ; LARGE32-NEXT:  .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE32-LABEL: L..C4:
+; LARGE32-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
+; LARGE32-LABEL: L..C5:
 ; LARGE32-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; LARGE32-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
-; LARGE32-LABEL: L..C5:
-; LARGE32-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
 ; LARGE32-LABEL: L..C6:
 ; LARGE32-NEXT:  .tc .TWUninit[TE],TWUninit[TL]@m
 ; LARGE32-LABEL: L..C7:
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll
index 2e18d6c9bfd7e..136c62ff509f9 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll
@@ -103,9 +103,8 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; SMALL32-NEXT:    lwz 3, L..C2(2)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C3(2)
-; SMALL32-NEXT:    add 3, 3, 4
+; SMALL32-NEXT:    stwux 7, 3, 4
 ; SMALL32-NEXT:    stw 6, 4(3)
-; SMALL32-NEXT:    stw 7, 0(3)
 ; SMALL32-NEXT:    addi 1, 1, 32
 ; SMALL32-NEXT:    lwz 0, 8(1)
 ; SMALL32-NEXT:    mtlr 0
@@ -118,15 +117,13 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    mr 7, 3
 ; LARGE32-NEXT:    mr 6, 4
-; LARGE32-NEXT:    addis 3, L..C2 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C2 at l(3)
-; LARGE32-NEXT:    addis 4, L..C3 at u(2)
-; LARGE32-NEXT:    lwz 8, L..C3 at l(4)
+; LARGE32-NEXT:    addis 8, L..C2 at u(2)
+; LARGE32-NEXT:    addis 3, L..C3 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C3 at l(3)
 ; LARGE32-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-NEXT:    mr 4, 8
-; LARGE32-NEXT:    add 3, 3, 4
+; LARGE32-NEXT:    lwz 4, L..C2 at l(8)
+; LARGE32-NEXT:    stwux 7, 3, 4
 ; LARGE32-NEXT:    stw 6, 4(3)
-; LARGE32-NEXT:    stw 7, 0(3)
 ; LARGE32-NEXT:    addi 1, 1, 32
 ; LARGE32-NEXT:    lwz 0, 8(1)
 ; LARGE32-NEXT:    mtlr 0
@@ -141,8 +138,7 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; SMALL64-NEXT:    ld 3, L..C2(2)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C3(2)
-; SMALL64-NEXT:    add 3, 3, 4
-; SMALL64-NEXT:    std 6, 0(3)
+; SMALL64-NEXT:    stdx 6, 3, 4
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
 ; SMALL64-NEXT:    mtlr 0
@@ -159,8 +155,7 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; LARGE64-NEXT:    ld 3, L..C2 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
 ; LARGE64-NEXT:    ld 4, L..C3 at l(7)
-; LARGE64-NEXT:    add 3, 3, 4
-; LARGE64-NEXT:    std 6, 0(3)
+; LARGE64-NEXT:    stdx 6, 3, 4
 ; LARGE64-NEXT:    addi 1, 1, 48
 ; LARGE64-NEXT:    ld 0, 16(1)
 ; LARGE64-NEXT:    mtlr 0
@@ -182,9 +177,8 @@ define void @storesTIInit(i64 %Val) #0 {
 ; SMALL32-NEXT:    lwz 3, L..C2(2)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C4(2)
-; SMALL32-NEXT:    add 3, 3, 4
+; SMALL32-NEXT:    stwux 7, 3, 4
 ; SMALL32-NEXT:    stw 6, 4(3)
-; SMALL32-NEXT:    stw 7, 0(3)
 ; SMALL32-NEXT:    addi 1, 1, 32
 ; SMALL32-NEXT:    lwz 0, 8(1)
 ; SMALL32-NEXT:    mtlr 0
@@ -197,15 +191,13 @@ define void @storesTIInit(i64 %Val) #0 {
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    mr 7, 3
 ; LARGE32-NEXT:    mr 6, 4
-; LARGE32-NEXT:    addis 3, L..C2 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C2 at l(3)
-; LARGE32-NEXT:    addis 4, L..C4 at u(2)
-; LARGE32-NEXT:    lwz 8, L..C4 at l(4)
+; LARGE32-NEXT:    addis 8, L..C4 at u(2)
+; LARGE32-NEXT:    addis 3, L..C3 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C3 at l(3)
 ; LARGE32-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-NEXT:    mr 4, 8
-; LARGE32-NEXT:    add 3, 3, 4
+; LARGE32-NEXT:    lwz 4, L..C4 at l(8)
+; LARGE32-NEXT:    stwux 7, 3, 4
 ; LARGE32-NEXT:    stw 6, 4(3)
-; LARGE32-NEXT:    stw 7, 0(3)
 ; LARGE32-NEXT:    addi 1, 1, 32
 ; LARGE32-NEXT:    lwz 0, 8(1)
 ; LARGE32-NEXT:    mtlr 0
@@ -220,8 +212,7 @@ define void @storesTIInit(i64 %Val) #0 {
 ; SMALL64-NEXT:    ld 3, L..C2(2)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C4(2)
-; SMALL64-NEXT:    add 3, 3, 4
-; SMALL64-NEXT:    std 6, 0(3)
+; SMALL64-NEXT:    stdx 6, 3, 4
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
 ; SMALL64-NEXT:    mtlr 0
@@ -238,8 +229,7 @@ define void @storesTIInit(i64 %Val) #0 {
 ; LARGE64-NEXT:    ld 3, L..C2 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
 ; LARGE64-NEXT:    ld 4, L..C4 at l(7)
-; LARGE64-NEXT:    add 3, 3, 4
-; LARGE64-NEXT:    std 6, 0(3)
+; LARGE64-NEXT:    stdx 6, 3, 4
 ; LARGE64-NEXT:    addi 1, 1, 48
 ; LARGE64-NEXT:    ld 0, 16(1)
 ; LARGE64-NEXT:    mtlr 0
@@ -333,7 +323,7 @@ define i64 @loadsTGInit() #1 {
 ; SMALL32-NEXT:    lwz 4, L..C1(2)
 ; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_addr[PR]
-; SMALL32-NEXT:    lwz 4, L..C7(2) # @GInit
+; SMALL32-NEXT:    lwz 4, L..C7(2)
 ; SMALL32-NEXT:    lwz 5, 4(3)
 ; SMALL32-NEXT:    lwz 6, 4(4)
 ; SMALL32-NEXT:    lwz 3, 0(3)
@@ -376,7 +366,7 @@ define i64 @loadsTGInit() #1 {
 ; SMALL64-NEXT:    ld 4, L..C1(2)
 ; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_addr[PR]
-; SMALL64-NEXT:    ld 4, L..C7(2) # @GInit
+; SMALL64-NEXT:    ld 4, L..C7(2)
 ; SMALL64-NEXT:    ld 3, 0(3)
 ; SMALL64-NEXT:    ld 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
@@ -421,14 +411,13 @@ define i64 @loadsTIUninit() #1 {
 ; SMALL32-NEXT:    lwz 3, L..C2(2)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C3(2)
-; SMALL32-NEXT:    add 3, 3, 4
-; SMALL32-NEXT:    lwz 4, L..C7(2) # @GInit
-; SMALL32-NEXT:    lwz 5, 4(3)
-; SMALL32-NEXT:    lwz 6, 4(4)
-; SMALL32-NEXT:    lwz 3, 0(3)
-; SMALL32-NEXT:    lwz 7, 0(4)
-; SMALL32-NEXT:    addc 4, 6, 5
-; SMALL32-NEXT:    adde 3, 7, 3
+; SMALL32-NEXT:    lwz 5, L..C7(2)
+; SMALL32-NEXT:    lwzux 6, 3, 4
+; SMALL32-NEXT:    lwz 4, 4(5)
+; SMALL32-NEXT:    lwz 3, 4(3)
+; SMALL32-NEXT:    lwz 5, 0(5)
+; SMALL32-NEXT:    addc 4, 4, 3
+; SMALL32-NEXT:    adde 3, 5, 6
 ; SMALL32-NEXT:    addi 1, 1, 32
 ; SMALL32-NEXT:    lwz 0, 8(1)
 ; SMALL32-NEXT:    mtlr 0
@@ -439,21 +428,19 @@ define i64 @loadsTIUninit() #1 {
 ; LARGE32-NEXT:    mflr 0
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
-; LARGE32-NEXT:    addis 3, L..C2 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C2 at l(3)
-; LARGE32-NEXT:    addis 4, L..C3 at u(2)
-; LARGE32-NEXT:    lwz 6, L..C3 at l(4)
+; LARGE32-NEXT:    addis 6, L..C2 at u(2)
+; LARGE32-NEXT:    addis 3, L..C3 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C3 at l(3)
 ; LARGE32-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-NEXT:    mr 4, 6
-; LARGE32-NEXT:    add 3, 3, 4
-; LARGE32-NEXT:    lwz 4, 4(3)
-; LARGE32-NEXT:    lwz 3, 0(3)
-; LARGE32-NEXT:    addis 5, L..C7 at u(2)
-; LARGE32-NEXT:    lwz 5, L..C7 at l(5)
-; LARGE32-NEXT:    lwz 6, 4(5)
-; LARGE32-NEXT:    lwz 5, 0(5)
-; LARGE32-NEXT:    addc 4, 6, 4
-; LARGE32-NEXT:    adde 3, 5, 3
+; LARGE32-NEXT:    lwz 4, L..C2 at l(6)
+; LARGE32-NEXT:    lwzux 5, 3, 4
+; LARGE32-NEXT:    lwz 3, 4(3)
+; LARGE32-NEXT:    addis 4, L..C7 at u(2)
+; LARGE32-NEXT:    lwz 4, L..C7 at l(4)
+; LARGE32-NEXT:    lwz 6, 4(4)
+; LARGE32-NEXT:    lwz 7, 0(4)
+; LARGE32-NEXT:    addc 4, 6, 3
+; LARGE32-NEXT:    adde 3, 7, 5
 ; LARGE32-NEXT:    addi 1, 1, 32
 ; LARGE32-NEXT:    lwz 0, 8(1)
 ; LARGE32-NEXT:    mtlr 0
@@ -467,10 +454,9 @@ define i64 @loadsTIUninit() #1 {
 ; SMALL64-NEXT:    ld 3, L..C2(2)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C3(2)
-; SMALL64-NEXT:    add 3, 3, 4
-; SMALL64-NEXT:    ld 4, L..C7(2) # @GInit
-; SMALL64-NEXT:    ld 3, 0(3)
-; SMALL64-NEXT:    ld 4, 0(4)
+; SMALL64-NEXT:    ld 5, L..C7(2)
+; SMALL64-NEXT:    ldx 3, 3, 4
+; SMALL64-NEXT:    ld 4, 0(5)
 ; SMALL64-NEXT:    add 3, 4, 3
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -487,10 +473,9 @@ define i64 @loadsTIUninit() #1 {
 ; LARGE64-NEXT:    ld 3, L..C2 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
 ; LARGE64-NEXT:    ld 4, L..C3 at l(6)
-; LARGE64-NEXT:    add 3, 3, 4
-; LARGE64-NEXT:    addis 4, L..C7 at u(2)
-; LARGE64-NEXT:    ld 4, L..C7 at l(4)
-; LARGE64-NEXT:    ld 3, 0(3)
+; LARGE64-NEXT:    addis 5, L..C7 at u(2)
+; LARGE64-NEXT:    ldx 3, 3, 4
+; LARGE64-NEXT:    ld 4, L..C7 at l(5)
 ; LARGE64-NEXT:    ld 4, 0(4)
 ; LARGE64-NEXT:    add 3, 4, 3
 ; LARGE64-NEXT:    addi 1, 1, 48
@@ -514,14 +499,13 @@ define i64 @loadsTIInit() #1 {
 ; SMALL32-NEXT:    lwz 3, L..C2(2)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C4(2)
-; SMALL32-NEXT:    add 3, 3, 4
-; SMALL32-NEXT:    lwz 4, L..C7(2) # @GInit
-; SMALL32-NEXT:    lwz 5, 4(3)
-; SMALL32-NEXT:    lwz 6, 4(4)
-; SMALL32-NEXT:    lwz 3, 0(3)
-; SMALL32-NEXT:    lwz 7, 0(4)
-; SMALL32-NEXT:    addc 4, 6, 5
-; SMALL32-NEXT:    adde 3, 7, 3
+; SMALL32-NEXT:    lwz 5, L..C7(2)
+; SMALL32-NEXT:    lwzux 6, 3, 4
+; SMALL32-NEXT:    lwz 4, 4(5)
+; SMALL32-NEXT:    lwz 3, 4(3)
+; SMALL32-NEXT:    lwz 5, 0(5)
+; SMALL32-NEXT:    addc 4, 4, 3
+; SMALL32-NEXT:    adde 3, 5, 6
 ; SMALL32-NEXT:    addi 1, 1, 32
 ; SMALL32-NEXT:    lwz 0, 8(1)
 ; SMALL32-NEXT:    mtlr 0
@@ -532,21 +516,19 @@ define i64 @loadsTIInit() #1 {
 ; LARGE32-NEXT:    mflr 0
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
-; LARGE32-NEXT:    addis 3, L..C2 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C2 at l(3)
-; LARGE32-NEXT:    addis 4, L..C4 at u(2)
-; LARGE32-NEXT:    lwz 6, L..C4 at l(4)
+; LARGE32-NEXT:    addis 6, L..C4 at u(2)
+; LARGE32-NEXT:    addis 3, L..C3 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C3 at l(3)
 ; LARGE32-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-NEXT:    mr 4, 6
-; LARGE32-NEXT:    add 3, 3, 4
-; LARGE32-NEXT:    lwz 4, 4(3)
-; LARGE32-NEXT:    lwz 3, 0(3)
-; LARGE32-NEXT:    addis 5, L..C7 at u(2)
-; LARGE32-NEXT:    lwz 5, L..C7 at l(5)
-; LARGE32-NEXT:    lwz 6, 4(5)
-; LARGE32-NEXT:    lwz 5, 0(5)
-; LARGE32-NEXT:    addc 4, 6, 4
-; LARGE32-NEXT:    adde 3, 5, 3
+; LARGE32-NEXT:    lwz 4, L..C4 at l(6)
+; LARGE32-NEXT:    lwzux 5, 3, 4
+; LARGE32-NEXT:    lwz 3, 4(3)
+; LARGE32-NEXT:    addis 4, L..C7 at u(2)
+; LARGE32-NEXT:    lwz 4, L..C7 at l(4)
+; LARGE32-NEXT:    lwz 6, 4(4)
+; LARGE32-NEXT:    lwz 7, 0(4)
+; LARGE32-NEXT:    addc 4, 6, 3
+; LARGE32-NEXT:    adde 3, 7, 5
 ; LARGE32-NEXT:    addi 1, 1, 32
 ; LARGE32-NEXT:    lwz 0, 8(1)
 ; LARGE32-NEXT:    mtlr 0
@@ -560,10 +542,9 @@ define i64 @loadsTIInit() #1 {
 ; SMALL64-NEXT:    ld 3, L..C2(2)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C4(2)
-; SMALL64-NEXT:    add 3, 3, 4
-; SMALL64-NEXT:    ld 4, L..C7(2) # @GInit
-; SMALL64-NEXT:    ld 3, 0(3)
-; SMALL64-NEXT:    ld 4, 0(4)
+; SMALL64-NEXT:    ld 5, L..C7(2)
+; SMALL64-NEXT:    ldx 3, 3, 4
+; SMALL64-NEXT:    ld 4, 0(5)
 ; SMALL64-NEXT:    add 3, 4, 3
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -580,10 +561,9 @@ define i64 @loadsTIInit() #1 {
 ; LARGE64-NEXT:    ld 3, L..C2 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
 ; LARGE64-NEXT:    ld 4, L..C4 at l(6)
-; LARGE64-NEXT:    add 3, 3, 4
-; LARGE64-NEXT:    addis 4, L..C7 at u(2)
-; LARGE64-NEXT:    ld 4, L..C7 at l(4)
-; LARGE64-NEXT:    ld 3, 0(3)
+; LARGE64-NEXT:    addis 5, L..C7 at u(2)
+; LARGE64-NEXT:    ldx 3, 3, 4
+; LARGE64-NEXT:    ld 4, L..C7 at l(5)
 ; LARGE64-NEXT:    ld 4, 0(4)
 ; LARGE64-NEXT:    add 3, 4, 3
 ; LARGE64-NEXT:    addi 1, 1, 48
@@ -607,7 +587,7 @@ define i64 @loadsTWInit() #1 {
 ; SMALL32-NEXT:    lwz 4, L..C6(2)
 ; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_addr[PR]
-; SMALL32-NEXT:    lwz 4, L..C7(2) # @GInit
+; SMALL32-NEXT:    lwz 4, L..C7(2)
 ; SMALL32-NEXT:    lwz 5, 4(3)
 ; SMALL32-NEXT:    lwz 6, 4(4)
 ; SMALL32-NEXT:    lwz 3, 0(3)
@@ -650,7 +630,7 @@ define i64 @loadsTWInit() #1 {
 ; SMALL64-NEXT:    ld 4, L..C6(2)
 ; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_addr[PR]
-; SMALL64-NEXT:    ld 4, L..C7(2) # @GInit
+; SMALL64-NEXT:    ld 4, L..C7(2)
 ; SMALL64-NEXT:    ld 3, 0(3)
 ; SMALL64-NEXT:    ld 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
@@ -723,10 +703,10 @@ entry:
 ; LARGE32-LABEL:  L..C1:
 ; LARGE32-NEXT:  .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE32-LABEL:  L..C2:
+; LARGE32-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
+; LARGE32-LABEL:  L..C3:
 ; LARGE32-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; LARGE32-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
-; LARGE32-LABEL:  L..C3:
-; LARGE32-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
 ; LARGE32-LABEL:  L..C4:
 ; LARGE32-NEXT:  .tc TIInit[TE],TIInit[TL]@ld
 ; LARGE32-LABEL:  L..C5:
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll b/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll
index 0513f8de61620..f7cd73ae609b3 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll
@@ -6,6 +6,10 @@
 ; RUN:     --code-model=small < %s | FileCheck %s --check-prefixes=SMALL32,SMALL
 ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec -mtriple powerpc-ibm-aix-xcoff \
 ; RUN:     --code-model=large < %s | FileCheck %s --check-prefixes=LARGE32,LARGE
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec -mtriple powerpc64-ibm-aix-xcoff \
+; RUN:     --code-model=small -O0 < %s | FileCheck %s --check-prefixes=WITHDUP
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec -mtriple powerpc64-ibm-aix-xcoff \
+; RUN:     --code-model=small -O1 < %s | FileCheck %s --check-prefixes=NODUP
 
 @TGInit = thread_local(localdynamic) global i32 42, align 4
 @TGUninit = thread_local(localdynamic) global i32 0, align 4
@@ -13,6 +17,8 @@
 @TIUninit = internal thread_local(localdynamic) global i32 0, align 4
 @TWInit = weak thread_local(localdynamic) global i32 42, align 4
 @TWUninit = weak thread_local(localdynamic) global i32 0, align 4
+ at x = thread_local(localdynamic) global i32 42, align 4
+ at y = thread_local(localdynamic) global i32 42, align 4
 
 define i32 @loadTGInit() {
 ; SMALL-LABEL:  loadTGInit:
@@ -21,19 +27,18 @@ define i32 @loadTGInit() {
 ; SMALL:        bla .__tls_get_mod[PR]
 ; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
-; SMALL:        add [[TGInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; SMALL:        lwz [[TGInitValR:[0-9]+]], 0([[TGInitAddrR]])
+; SMALL:        lwzx [[TGInitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ;
 ; LARGE-LABEL:  loadTGInit:
-; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@u(2)
+; LARGE32:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE:        add [[TGInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; LARGE:        lwz [[TGInitValR:[0-9]+]], 0([[TGInitAddrR]])
+; LARGE:        lwzx [[TGInitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 entry:
   %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TGInit)
   %1 = load i32, ptr %0, align 4
@@ -47,19 +52,18 @@ define void @storeTGInit(i32 noundef signext %i) {
 ; SMALL:        bla .__tls_get_mod[PR]
 ; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
-; SMALL:        add [[TGInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; SMALL:        stw [[TGInitValR:[0-9]+]], 0([[TGInitAddrR]])
+; SMALL:        stwx [[TGInitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ;
 ; LARGE-LABEL:  storeTGInit:
-; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@u(2)
+; LARGE32:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE:        add [[TGInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; LARGE:        stw [[TGInitValR:[0-9]+]], 0([[TGInitAddrR]])
+; LARGE:        stwx [[TGInitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 entry:
   %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TGInit)
   store i32 %i, ptr %0, align 4
@@ -73,19 +77,18 @@ define i32 @loadTGUninit() {
 ; SMALL:        bla .__tls_get_mod[PR]
 ; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
-; SMALL:        add [[TGUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; SMALL:        lwz [[TGUninitValR:[0-9]+]], 0([[TGUninitAddrR]])
+; SMALL:        lwzx [[TGInitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ;
 ; LARGE-LABEL:  loadTGUninit:
-; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@u(2)
+; LARGE32:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE:        add [[TGUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; LARGE:        lwz [[TGUninitValR:[0-9]+]], 0([[TGUninitAddrR]])
+; LARGE:        lwzx [[TGUninitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 entry:
   %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TGUninit)
   %1 = load i32, ptr %0, align 4
@@ -99,19 +102,18 @@ define void @storeTGUninit(i32 noundef signext %i) {
 ; SMALL:        bla .__tls_get_mod[PR]
 ; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
-; SMALL:        add [[TGUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; SMALL:        stw [[TGUninitValR:[0-9]+]], 0([[TGUninitAddrR]])
+; SMALL:        stwx [[TGUninitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ;
 ; LARGE-LABEL:  storeTGUninit:
-; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@u(2)
+; LARGE32:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE:        add [[TGUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; LARGE:        stw [[TGUninitValR:[0-9]+]], 0([[TGUninitAddrR]])
+; LARGE:        stwx [[TGUninitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 entry:
   %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TGUninit)
   store i32 %i, ptr %0, align 4
@@ -125,19 +127,18 @@ define i32 @loadTIInit() {
 ; SMALL:        bla .__tls_get_mod[PR]
 ; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
-; SMALL:        add [[TIInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; SMALL:        lwz [[TIInitValR:[0-9]+]], 0([[TIInitAddrR]])
+; SMALL:        lwzx [[TIInitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ;
 ; LARGE-LABEL:  loadTIInit:
-; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@u(2)
+; LARGE32:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE:        add [[TIInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; LARGE:        lwz [[TIInitValR:[0-9]+]], 0([[TIInitAddrR]])
+; LARGE:        lwzx [[TIInitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 entry:
   %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TIInit)
   %1 = load i32, ptr %0, align 4
@@ -151,19 +152,18 @@ define void @storeTIInit(i32 noundef signext %i) {
 ; SMALL:        bla .__tls_get_mod[PR]
 ; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
-; SMALL:        add [[TIInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; SMALL:        stw [[TIInitValR:[0-9]+]], 0([[TIInitAddrR]])
+; SMALL:        stwx [[TIInitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ;
 ; LARGE-LABEL:  storeTIInit:
-; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@u(2)
+; LARGE32:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE:        add [[TIInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; LARGE:        stw [[TIInitValR:[0-9]+]], 0([[TIInitAddrR]])
+; LARGE:        stwx [[TIInitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 entry:
   %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TIInit)
   store i32 %i, ptr %0, align 4
@@ -177,19 +177,18 @@ define i32 @loadTIUninit() {
 ; SMALL:        bla .__tls_get_mod[PR]
 ; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
-; SMALL:        add [[TIUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; SMALL:        lwz [[TIUninitValR:[0-9]+]], 0([[TIUninitAddrR]])
+; SMALL:        lwzx [[TIUninitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ;
 ; LARGE-LABEL:  loadTIUninit:
-; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@u(2)
+; LARGE32:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE:        add [[TIUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; LARGE:        lwz [[TIUninitValR:[0-9]+]], 0([[TIUninitAddrR]])
+; LARGE:        lwzx [[TIUninitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 entry:
   %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TIUninit)
   %1 = load i32, ptr %0, align 4
@@ -203,19 +202,18 @@ define void @storeTIUninit(i32 noundef signext %i) {
 ; SMALL:        bla .__tls_get_mod[PR]
 ; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
-; SMALL:        add [[TIUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; SMALL:        stw [[TIUninitValR:[0-9]+]], 0([[TIUninitAddrR]])
+; SMALL:        stwx [[TIUninitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ;
 ; LARGE-LABEL:  storeTIUninit:
-; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@u(2)
+; LARGE32:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE:        add [[TIUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; LARGE:        stw [[TIUninitValR:[0-9]+]], 0([[TIUninitAddrR]])
+; LARGE:        stwx [[TIUninitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 entry:
   %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TIUninit)
   store i32 %i, ptr %0, align 4
@@ -229,19 +227,18 @@ define i32 @loadTWInit() {
 ; SMALL:        bla .__tls_get_mod[PR]
 ; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
-; SMALL:        add [[TWInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; SMALL:        lwz [[TWInitValR:[0-9]+]], 0([[TWInitAddrR]])
+; SMALL:        lwzx [[TWInitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ;
 ; LARGE-LABEL:  loadTWInit:
-; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@u(2)
+; LARGE32:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE:        add [[TWInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; LARGE:        lwz [[TWInitValR:[0-9]+]], 0([[TWInitAddrR]])
+; LARGE:        lwzx [[TWInitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 entry:
   %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TWInit)
   %1 = load i32, ptr %0, align 4
@@ -255,19 +252,18 @@ define void @storeTWInit(i32 noundef signext %i) {
 ; SMALL:        bla .__tls_get_mod[PR]
 ; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
-; SMALL:        add [[TWInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; SMALL:        stw [[TWInitValR:[0-9]+]], 0([[TWInitAddrR]])
+; SMALL:        stwx [[TWInitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ;
 ; LARGE-LABEL:  storeTWInit:
-; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@u(2)
+; LARGE32:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE:        add [[TWInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; LARGE:        stw [[TWInitValR:[0-9]+]], 0([[TWInitAddrR]])
+; LARGE:        stwx [[TWInitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 entry:
   %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TWInit)
   store i32 %i, ptr %0, align 4
@@ -281,19 +277,18 @@ define i32 @loadTWUninit() {
 ; SMALL:        bla .__tls_get_mod[PR]
 ; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
-; SMALL:        add [[TWUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; SMALL:        lwz [[TWUninitValR:[0-9]+]], 0([[TWUninitAddrR]])
+; SMALL:        lwzx [[TWUninitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ;
 ; LARGE-LABEL:  loadTWUninit:
-; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@u(2)
+; LARGE32:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE:        add [[TWUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; LARGE:        lwz [[TWUninitValR:[0-9]+]], 0([[TWUninitAddrR]])
+; LARGE:        lwzx [[TWUninitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 entry:
   %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TWUninit)
   %1 = load i32, ptr %0, align 4
@@ -307,25 +302,53 @@ define void @storeTWUninit(i32 noundef signext %i) {
 ; SMALL:        bla .__tls_get_mod[PR]
 ; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
-; SMALL:        add [[TWUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; SMALL:        stw [[TWUninitValR:[0-9]+]], 0([[TWUninitAddrR]])
+; SMALL:        stwx [[TWUninitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ;
 ; LARGE-LABEL:  storeTWUninit:
-; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@u(2)
+; LARGE32:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE:        add [[TWUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; LARGE:        stw [[TWUninitValR:[0-9]+]], 0([[TWUninitAddrR]])
+; LARGE:        stwx [[TWUninitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 entry:
   %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TWUninit)
   store i32 %i, ptr %0, align 4
   ret void
 }
 
+define i32 @DedupTlsGetMod() #0 {
+; WITHDUP-LABEL:  DedupTlsGetMod:
+; WITHDUP:        ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; WITHDUP-NEXT:   bla .__tls_get_mod[PR]
+; WITHDUP-NEXT:   ld [[OffsetXR:[0-9]+]], [[X:L..C[0-9]+]](2)
+; WITHDUP:        ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; WITHDUP-NEXT:   bla .__tls_get_mod[PR]
+; WITHDUP-NEXT:   ld [[OffsetYR:[0-9]+]], [[Y:L..C[0-9]+]](2)
+; WITHDUP-LABEL:  L..DedupTlsGetMod0:
+;
+; NODUP-LABEL:  DedupTlsGetMod:
+; NODUP:        ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; NODUP-NEXT:   bla .__tls_get_mod[PR]
+; NODUP-NEXT:   ld [[OffsetXR:[0-9]+]], [[X:L..C[0-9]+]](2)
+; NODUP-NEXT:   ld [[OffsetYR:[0-9]+]], [[Y:L..C[0-9]+]](2)
+; NODUP-NEXT:   lwzx [[XValR:[0-9]+]], [[ModuleHandleR]], [[OffsetXR]]
+; NODUP-NEXT:   lwzx [[YValR:[0-9]+]], [[ModuleHandleR]], [[OffsetYR]]
+; NODUP-LABEL:  L..DedupTlsGetMod0:
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, ptr %retval, align 4
+  %0 = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x)
+  %1 = load i32, ptr %0, align 4
+  %2 = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @y)
+  %3 = load i32, ptr %2, align 4
+  %add = add nsw i32 %1, %3
+  ret i32 %add
+}
+
 ; SMALL:          .extern .__tls_get_mod[PR]
 ; LARGE:          .extern .__tls_get_mod[PR]
 
@@ -345,11 +368,18 @@ entry:
 ; SMALL:        [[TWUninitL]]:
 ; SMALL-NEXT:   .tc TWUninit[TC],TWUninit[TL]@ld
 
-; LARGE:        [[ModuleHandleL]]:
-; LARGE-NEXT:   .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
-; LARGE-NEXT:   .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
-; LARGE:        [[TGInitL]]:
-; LARGE-NEXT:   .tc TGInit[TE],TGInit[TL]@ld
+; LARGE64:        [[ModuleHandleL]]:
+; LARGE64-NEXT:   .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; LARGE64-NEXT:   .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; LARGE64:        [[TGInitL]]:
+; LARGE64-NEXT:   .tc TGInit[TE],TGInit[TL]@ld
+;
+; LARGE32:        [[TGInitL]]:
+; LARGE32-NEXT:   .tc TGInit[TE],TGInit[TL]@ld
+; LARGE32:        [[ModuleHandleL]]:
+; LARGE32-NEXT:   .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; LARGE32-NEXT:   .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+;
 ; LARGE:        [[TGUninitL]]:
 ; LARGE-NEXT:   .tc TGUninit[TE],TGUninit[TL]@ld
 ; LARGE:        [[TIInitL]]:

>From 610c6ee1efcc487123aaa8c6bf084ba932c602c6 Mon Sep 17 00:00:00 2001
From: tingwang <tingwang at tingwangs-MacBook-Pro.local>
Date: Mon, 25 Sep 2023 08:52:45 +0800
Subject: [PATCH 09/21] (1) Use GPR3 directly in LoadModuleHandle. (2) Update
 comments.

---
 llvm/lib/Target/PowerPC/PPCInstr64Bit.td      | 3 +--
 llvm/lib/Target/PowerPC/PPCInstrInfo.td       | 3 +--
 llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp | 8 ++------
 3 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index e4a6e19dabd34..b51af57b796f8 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -1602,8 +1602,7 @@ def TLSGDAIX8 :
                      "#TLSGDAIX8",
                      [(set i64:$rD,
                        (PPCTlsgdAIX i64:$offset, i64:$handle))]>;
-// This pseudo is expanded to one copy to put the module handle in R3, the call
-// to GETtlsMOD64AIX, and then adds the variable offset to the output from the call.
+// This pseudo is expanded to load module-handle in X3, and the call to GETtlsMOD64AIX.
 def TLSLDAIX8 : PPCEmitTimePseudo<(outs g8rc:$rD), (ins),
                      "#TLSLDAIX8", [(set i64:$rD, (PPCTlsldAIX))]>;
 // Combined op for ADDItlsldL and GETtlsADDR, late expanded.  X3 and LR8
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 30837e8735cf8..2abb58ae59bdc 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -3296,8 +3296,7 @@ def TLSGDAIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$offset, gprc:$handl
                           "#TLSGDAIX",
                           [(set i32:$rD,
                             (PPCTlsgdAIX i32:$offset, i32:$handle))]>;
-// This pseudo is expanded to one copy to put the module handle in R3, then call
-// GETtlsMOD32AIX, and then add variable offset to the output from the call.
+// This pseudo is expanded to load module-handle in R3, and the call to GETtlsMOD32AIX.
 def TLSLDAIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins),
                           "#TLSLDAIX", [(set i32:$rD, (PPCTlsldAIX))]>;
 // LR is a true define, while the rest of the Defs are clobbers.  R3 is
diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index ba3f9f04697d7..71825dca77bcf 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -226,7 +226,7 @@ namespace {
               }
             }
 
-            // Generate instructions refer to the "_$TLSML" symbol
+            // Generate instructions to load module-handle.
             Register ModuleHandleHReg;
             if (IsLargeModel) {
               ModuleHandleHReg = RegInfo.createVirtualRegister(GPRNoZero);
@@ -236,15 +236,11 @@ namespace {
                   .addReg(Subtarget.getTOCPointerRegister())
                   .addExternalSymbol("_$TLSML[TC]", PPCII::MO_TLSLD_FLAG);
             }
-            Register MHReg = RegInfo.createVirtualRegister(GPRNoZero);
-            BuildMI(MBB, Anchor, DL, TII->get(LDTocOp), MHReg)
+            BuildMI(MBB, Anchor, DL, TII->get(LDTocOp), GPR3)
                 .addExternalSymbol("_$TLSML[TC]", PPCII::MO_TLSLD_FLAG)
                 .addReg(IsLargeModel
                             ? ModuleHandleHReg
                             : Register(Subtarget.getTOCPointerRegister()));
-            // The module handle is copied in r3.
-            BuildMI(MBB, Anchor, DL, TII->get(TargetOpcode::COPY), GPR3)
-                .addReg(MHReg);
             // The call to .__tls_get_mod.
             BuildMI(MBB, Anchor, DL, TII->get(Opc2), GPR3).addReg(GPR3);
           } else if (!IsTLSTPRelMI) {

>From 8917876529d212be5cdd37a9efe7fe36f9478cbd Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Sun, 24 Sep 2023 23:31:34 -0400
Subject: [PATCH 10/21] update lit case

---
 .../PowerPC/aix-tls-xcoff-reloc-large.ll      | 174 +++++++++---------
 .../CodeGen/PowerPC/aix-tls-xcoff-reloc.ll    | 117 ++++++------
 2 files changed, 144 insertions(+), 147 deletions(-)

diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll
index 6eb1b325e9806..70d1e8a592e5d 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll
@@ -33,7 +33,7 @@ entry:
 ; RELOC-NEXT:   Section (index: 1) .text {
 ; RELOC-NEXT:   Relocation {
 ; RELOC-NEXT:     Virtual Address: 0x16
-; RELOC-NEXT:     Symbol: _$TLSML (19)
+; RELOC-NEXT:     Symbol: TIInit (19)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
@@ -41,30 +41,22 @@ entry:
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
 ; RELOC-NEXT:     Virtual Address: 0x1A
-; RELOC-NEXT:     Symbol: _$TLSML (19)
-; RELOC-NEXT:     IsSigned: No
-; RELOC-NEXT:     FixupBitValue: 0
-; RELOC-NEXT:     Length: 16
-; RELOC-NEXT:     Type: R_TOCL (0x31)
-; RELOC-NEXT:   }
-; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x1E
-; RELOC-NEXT:     Symbol: TIInit (21)
+; RELOC-NEXT:     Symbol: _$TLSML (21)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
 ; RELOC-NEXT:     Type: R_TOCU (0x30)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x22
-; RELOC-NEXT:     Symbol: TIInit (21)
+; RELOC-NEXT:     Virtual Address: 0x1E
+; RELOC-NEXT:     Symbol: _$TLSML (21)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
 ; RELOC-NEXT:     Type: R_TOCL (0x31)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x24
+; RELOC-NEXT:     Virtual Address: 0x20
 ; RELOC-NEXT:     Symbol: .__tls_get_mod (1)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
@@ -72,7 +64,15 @@ entry:
 ; RELOC-NEXT:     Type: R_RBA (0x18)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x5E
+; RELOC-NEXT:     Virtual Address: 0x26
+; RELOC-NEXT:     Symbol: TIInit (19)
+; RELOC-NEXT:     IsSigned: No
+; RELOC-NEXT:     FixupBitValue: 0
+; RELOC-NEXT:     Length: 16
+; RELOC-NEXT:     Type: R_TOCL (0x31)
+; RELOC-NEXT:   }
+; RELOC-NEXT:   Relocation {
+; RELOC-NEXT:     Virtual Address: 0x4E
 ; RELOC-NEXT:     Symbol: .TWInit (23)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
@@ -80,7 +80,7 @@ entry:
 ; RELOC-NEXT:     Type: R_TOCU (0x30)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x62
+; RELOC-NEXT:     Virtual Address: 0x52
 ; RELOC-NEXT:     Symbol: TWInit (25)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
@@ -88,7 +88,7 @@ entry:
 ; RELOC-NEXT:     Type: R_TOCU (0x30)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x66
+; RELOC-NEXT:     Virtual Address: 0x56
 ; RELOC-NEXT:     Symbol: .TWInit (23)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
@@ -96,7 +96,7 @@ entry:
 ; RELOC-NEXT:     Type: R_TOCL (0x31)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x6A
+; RELOC-NEXT:     Virtual Address: 0x5A
 ; RELOC-NEXT:     Symbol: TWInit (25)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
@@ -104,7 +104,7 @@ entry:
 ; RELOC-NEXT:     Type: R_TOCL (0x31)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x6C
+; RELOC-NEXT:     Virtual Address: 0x5C
 ; RELOC-NEXT:     Symbol: .__tls_get_addr (3)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
@@ -112,7 +112,7 @@ entry:
 ; RELOC-NEXT:     Type: R_RBA (0x18)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x76
+; RELOC-NEXT:     Virtual Address: 0x66
 ; RELOC-NEXT:     Symbol: GInit (27)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
@@ -120,7 +120,7 @@ entry:
 ; RELOC-NEXT:     Type: R_TOCU (0x30)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x7A
+; RELOC-NEXT:     Virtual Address: 0x6A
 ; RELOC-NEXT:     Symbol: GInit (27)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
@@ -130,7 +130,7 @@ entry:
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Section (index: 2) .data {
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xA0
+; RELOC-NEXT:   Virtual Address: 0x90
 ; RELOC-NEXT:   Symbol: .storesTIInit (7)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
@@ -138,7 +138,7 @@ entry:
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xA4
+; RELOC-NEXT:   Virtual Address: 0x94
 ; RELOC-NEXT:   Symbol: TOC (17)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
@@ -146,7 +146,7 @@ entry:
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xAC
+; RELOC-NEXT:   Virtual Address: 0x9C
 ; RELOC-NEXT:   Symbol: .loadsTWInit (9)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
@@ -154,7 +154,7 @@ entry:
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xB0
+; RELOC-NEXT:   Virtual Address: 0xA0
 ; RELOC-NEXT:   Symbol: TOC (17)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
@@ -162,23 +162,23 @@ entry:
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xB8
-; RELOC-NEXT:   Symbol: _$TLSML (19)
+; RELOC-NEXT:   Virtual Address: 0xA8
+; RELOC-NEXT:   Symbol: TIInit (29)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
-; RELOC-NEXT:   Type: R_TLSML (0x25)
+; RELOC-NEXT:   Type: R_TLS_LD (0x22)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xBC
-; RELOC-NEXT:   Symbol: TIInit (29)
+; RELOC-NEXT:   Virtual Address: 0xAC
+; RELOC-NEXT:   Symbol: _$TLSML (21)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
-; RELOC-NEXT:   Type: R_TLS_LD (0x22)
+; RELOC-NEXT:   Type: R_TLSML (0x25)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xC0
+; RELOC-NEXT:   Virtual Address: 0xB0
 ; RELOC-NEXT:   Symbol: TWInit (31)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
@@ -186,7 +186,7 @@ entry:
 ; RELOC-NEXT:   Type: R_TLSM (0x24)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xC4
+; RELOC-NEXT:   Virtual Address: 0xB4
 ; RELOC-NEXT:   Symbol: TWInit (31)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
@@ -194,7 +194,7 @@ entry:
 ; RELOC-NEXT:   Type: R_TLS (0x20)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xC8
+; RELOC-NEXT:   Virtual Address: 0xB8
 ; RELOC-NEXT:   Symbol: GInit (11)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
@@ -269,7 +269,7 @@ entry:
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
 ; SYM-NEXT:       Index: 6
-; SYM-NEXT:       SectionLen: 148
+; SYM-NEXT:       SectionLen: 132
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 5
@@ -302,7 +302,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 9
 ; SYM-NEXT:     Name: .loadsTWInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x50
+; SYM-NEXT:     Value (RelocatableAddress): 0x40
 ; SYM-NEXT:     Section: .text
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
@@ -322,7 +322,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 11
 ; SYM-NEXT:     Name: GInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x98
+; SYM-NEXT:     Value (RelocatableAddress): 0x88
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
@@ -342,7 +342,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 13
 ; SYM-NEXT:     Name: storesTIInit
-; SYM-NEXT:     Value (RelocatableAddress): 0xA0
+; SYM-NEXT:     Value (RelocatableAddress): 0x90
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
@@ -362,7 +362,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 15
 ; SYM-NEXT:     Name: loadsTWInit
-; SYM-NEXT:     Value (RelocatableAddress): 0xAC
+; SYM-NEXT:     Value (RelocatableAddress): 0x9C
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
@@ -382,7 +382,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 17
 ; SYM-NEXT:     Name: TOC
-; SYM-NEXT:     Value (RelocatableAddress): 0xB8
+; SYM-NEXT:     Value (RelocatableAddress): 0xA8
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
@@ -401,8 +401,8 @@ entry:
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 19
-; SYM-NEXT:     Name: _$TLSML
-; SYM-NEXT:     Value (RelocatableAddress): 0xB8
+; SYM-NEXT:     Name: TIInit
+; SYM-NEXT:     Value (RelocatableAddress): 0xA8
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
@@ -414,15 +414,15 @@ entry:
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 2
 ; SYM-NEXT:       SymbolType: XTY_SD (0x1)
-; SYM-NEXT:       StorageMappingClass: XMC_TC (0x3)
+; SYM-NEXT:       StorageMappingClass: XMC_TE (0x16)
 ; SYM-NEXT:       StabInfoIndex: 0x0
 ; SYM-NEXT:       StabSectNum: 0x0
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 21
-; SYM-NEXT:     Name: TIInit
-; SYM-NEXT:     Value (RelocatableAddress): 0xBC
+; SYM-NEXT:     Name: _$TLSML
+; SYM-NEXT:     Value (RelocatableAddress): 0xAC
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
@@ -434,7 +434,7 @@ entry:
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 2
 ; SYM-NEXT:       SymbolType: XTY_SD (0x1)
-; SYM-NEXT:       StorageMappingClass: XMC_TE (0x16)
+; SYM-NEXT:       StorageMappingClass: XMC_TC (0x3)
 ; SYM-NEXT:       StabInfoIndex: 0x0
 ; SYM-NEXT:       StabSectNum: 0x0
 ; SYM-NEXT:     }
@@ -442,7 +442,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 23
 ; SYM-NEXT:     Name: .TWInit
-; SYM-NEXT:     Value (RelocatableAddress): 0xC0
+; SYM-NEXT:     Value (RelocatableAddress): 0xB0
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
@@ -462,7 +462,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 25
 ; SYM-NEXT:     Name: TWInit
-; SYM-NEXT:     Value (RelocatableAddress): 0xC4
+; SYM-NEXT:     Value (RelocatableAddress): 0xB4
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
@@ -482,7 +482,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 27
 ; SYM-NEXT:     Name: GInit
-; SYM-NEXT:     Value (RelocatableAddress): 0xC8
+; SYM-NEXT:     Value (RelocatableAddress): 0xB8
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
@@ -549,25 +549,23 @@ entry:
 ; DIS-NEXT:                                       stw 0, 40(1)
 ; DIS-NEXT:                                       mr 7, 3
 ; DIS-NEXT:                                       mr 6, 4
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 8, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 19) TIInit[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 19) _$TLSML[TC]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 3, 0(3)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 19) _$TLSML[TC]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 4, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 21) TIInit[TE]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 8, 4(4)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 21) TIInit[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 21) _$TLSML[TC]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 3, 4(3)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 21) _$TLSML[TC]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                bla 0
 ; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA  (idx: 1)      .__tls_get_mod[PR]
-; DIS-NEXT:                                       mr 4, 8
-; DIS-NEXT:                                       add 3, 3, 4
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 4, 0(8)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 19) TIInit[TE]
+; DIS-NEXT:                                       stwux 7, 3, 4
 ; DIS-NEXT:                                       stw 6, 4(3)
-; DIS-NEXT:                                       stw 7, 0(3)
 ; DIS-NEXT:                                       addi 1, 1, 32
 ; DIS-NEXT:                                       lwz 0, 8(1)
 ; DIS-NEXT:                                       mtlr 0
 ; DIS-NEXT:                                       blr
-; DIS:      00000050 (idx: 9) .loadsTWInit:
+; DIS:      00000040 (idx: 9) .loadsTWInit:
 ; DIS-NEXT:                                       mflr 0
 ; DIS-NEXT:                                       stwu 1, -32(1)
 ; DIS-NEXT:                                       stw 0, 40(1)
@@ -594,36 +592,36 @@ entry:
 ; DIS-NEXT:                                       blr
 
 ; DIS:      Disassembly of section .data:
-; DIS:      00000098  (idx: 11) GInit[RW]:
-; DIS-NEXT:       98: 3f f0 00 00
-; DIS-NEXT:       9c: 00 00 00 00
-; DIS:      000000a0  (idx: 13) storesTIInit[DS]:
-; DIS-NEXT:       a0: 00 00 00 00
-; DIS-NEXT: 000000a0: R_POS (idx: 7) .storesTIInit
-; DIS-NEXT:       a4: 00 00 00 b8
-; DIS-NEXT: 000000a4: R_POS (idx: 17) TOC[TC0]
+; DIS:      00000088  (idx: 11) GInit[RW]:
+; DIS-NEXT:       88: 3f f0 00 00
+; DIS-NEXT:       8c: 00 00 00 00
+; DIS:      00000090  (idx: 13) storesTIInit[DS]:
+; DIS-NEXT:       90: 00 00 00 00
+; DIS-NEXT: 00000090: R_POS (idx: 7) .storesTIInit
+; DIS-NEXT:       94: 00 00 00 a8
+; DIS-NEXT: 00000094: R_POS (idx: 17) TOC[TC0]
+; DIS-NEXT:       98: 00 00 00 00
+; DIS:      0000009c  (idx: 15) loadsTWInit[DS]:
+; DIS-NEXT:       9c: 00 00 00 40
+; DIS-NEXT: 0000009c: R_POS (idx: 9) .loadsTWInit
+; DIS-NEXT:       a0: 00 00 00 a8
+; DIS-NEXT: 000000a0: R_POS (idx: 17) TOC[TC0]
+; DIS-NEXT:       a4: 00 00 00 00
+; DIS:      000000a8  (idx: 19) TIInit[TE]:
 ; DIS-NEXT:       a8: 00 00 00 00
-; DIS:      000000ac  (idx: 15) loadsTWInit[DS]:
-; DIS-NEXT:       ac: 00 00 00 50
-; DIS-NEXT: 000000ac: R_POS (idx: 9) .loadsTWInit
-; DIS-NEXT:       b0: 00 00 00 b8
-; DIS-NEXT: 000000b0: R_POS (idx: 17) TOC[TC0]
-; DIS-NEXT:       b4: 00 00 00 00
-; DIS:      000000b8  (idx: 19) _$TLSML[TC]:
-; DIS-NEXT:       b8: 00 00 00 00
-; DIS-NEXT: 000000b8: R_TLSML (idx: 19) _$TLSML[TC]
-; DIS:      000000bc  (idx: 21) TIInit[TE]:
-; DIS-NEXT:       bc: 00 00 00 00
-; DIS-NEXT: 000000bc: R_TLS_LD (idx: 29) TIInit[TL]
-; DIS:      000000c0  (idx: 23) .TWInit[TE]:
-; DIS-NEXT:       c0: 00 00 00 00
-; DIS-NEXT: 000000c0: R_TLSM (idx: 31) TWInit[TL]
-; DIS:      000000c4  (idx: 25) TWInit[TE]:
-; DIS-NEXT:       c4: 00 00 00 08
-; DIS-NEXT: 000000c4: R_TLS (idx: 31) TWInit[TL]
-; DIS:      000000c8  (idx: 27) GInit[TE]:
-; DIS-NEXT:       c8: 00 00 00 98
-; DIS-NEXT: 000000c8: R_POS (idx: 11) GInit[RW]
+; DIS-NEXT: 000000a8: R_TLS_LD (idx: 29) TIInit[TL]
+; DIS:      000000ac  (idx: 21) _$TLSML[TC]:
+; DIS-NEXT:       ac: 00 00 00 00
+; DIS-NEXT: 000000ac: R_TLSML (idx: 21) _$TLSML[TC]
+; DIS:      000000b0  (idx: 23) .TWInit[TE]:
+; DIS-NEXT:       b0: 00 00 00 00
+; DIS-NEXT: 000000b0: R_TLSM (idx: 31) TWInit[TL]
+; DIS:      000000b4  (idx: 25) TWInit[TE]:
+; DIS-NEXT:       b4: 00 00 00 08
+; DIS-NEXT: 000000b4: R_TLS (idx: 31) TWInit[TL]
+; DIS:      000000b8  (idx: 27) GInit[TE]:
+; DIS-NEXT:       b8: 00 00 00 88
+; DIS-NEXT: 000000b8: R_POS (idx: 11) GInit[RW]
 
 ; DIS:      Disassembly of section .tdata:
 ; DIS:      00000000  (idx: 29) TIInit[TL]:
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
index dedd36d8ffc51..c8a737281840a 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
@@ -57,7 +57,7 @@ entry:
 ; RELOC-NEXT:     Type: R_TOC (0x3)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x4A
+; RELOC-NEXT:     Virtual Address: 0x3A
 ; RELOC-NEXT:     Symbol: .TGInit (29)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
@@ -65,7 +65,7 @@ entry:
 ; RELOC-NEXT:     Type: R_TOC (0x3)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x4E
+; RELOC-NEXT:     Virtual Address: 0x3E
 ; RELOC-NEXT:     Symbol: TGInit (31)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
@@ -73,7 +73,7 @@ entry:
 ; RELOC-NEXT:     Type: R_TOC (0x3)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x54
+; RELOC-NEXT:     Virtual Address: 0x44
 ; RELOC-NEXT:     Symbol: .__tls_get_addr (3)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
@@ -81,7 +81,7 @@ entry:
 ; RELOC-NEXT:     Type: R_RBA (0x18)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x5A
+; RELOC-NEXT:     Virtual Address: 0x4A
 ; RELOC-NEXT:     Symbol: GInit (33)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
@@ -91,7 +91,7 @@ entry:
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Section (index: 2) .data {
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x80
+; RELOC-NEXT:   Virtual Address: 0x70
 ; RELOC-NEXT:   Symbol: .storesTIUninit (7)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
@@ -99,7 +99,7 @@ entry:
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x84
+; RELOC-NEXT:   Virtual Address: 0x74
 ; RELOC-NEXT:   Symbol: TOC (23)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
@@ -107,7 +107,7 @@ entry:
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x8C
+; RELOC-NEXT:   Virtual Address: 0x7C
 ; RELOC-NEXT:   Symbol: .loadsTGInit (9)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
@@ -115,7 +115,7 @@ entry:
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x90
+; RELOC-NEXT:   Virtual Address: 0x80
 ; RELOC-NEXT:   Symbol: TOC (23)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
@@ -123,7 +123,7 @@ entry:
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x98
+; RELOC-NEXT:   Virtual Address: 0x88
 ; RELOC-NEXT:   Symbol: _$TLSML (25)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
@@ -131,7 +131,7 @@ entry:
 ; RELOC-NEXT:   Type: R_TLSML (0x25)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x9C
+; RELOC-NEXT:   Virtual Address: 0x8C
 ; RELOC-NEXT:   Symbol: TIUninit (39)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
@@ -139,7 +139,7 @@ entry:
 ; RELOC-NEXT:   Type: R_TLS_LD (0x22)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xA0
+; RELOC-NEXT:   Virtual Address: 0x90
 ; RELOC-NEXT:   Symbol: TGInit (37)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
@@ -147,7 +147,7 @@ entry:
 ; RELOC-NEXT:   Type: R_TLSM (0x24)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xA4
+; RELOC-NEXT:   Virtual Address: 0x94
 ; RELOC-NEXT:   Symbol: TGInit (37)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
@@ -155,7 +155,7 @@ entry:
 ; RELOC-NEXT:   Type: R_TLS (0x20)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xA8
+; RELOC-NEXT:   Virtual Address: 0x98
 ; RELOC-NEXT:   Symbol: GInit (17)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
@@ -230,7 +230,7 @@ entry:
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
 ; SYM-NEXT:       Index: 6
-; SYM-NEXT:       SectionLen: 120
+; SYM-NEXT:       SectionLen: 104
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 5
@@ -263,7 +263,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 9
 ; SYM-NEXT:     Name: .loadsTGInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x40
+; SYM-NEXT:     Value (RelocatableAddress): 0x30
 ; SYM-NEXT:     Section: .text
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
@@ -283,7 +283,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 11
 ; SYM-NEXT:     Name: .rodata
-; SYM-NEXT:     Value (RelocatableAddress): 0x78
+; SYM-NEXT:     Value (RelocatableAddress): 0x68
 ; SYM-NEXT:     Section: .text
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
@@ -303,7 +303,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 13
 ; SYM-NEXT:     Name: const_ivar
-; SYM-NEXT:     Value (RelocatableAddress): 0x78
+; SYM-NEXT:     Value (RelocatableAddress): 0x68
 ; SYM-NEXT:     Section: .text
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
@@ -323,7 +323,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 15
 ; SYM-NEXT:     Name: .data
-; SYM-NEXT:     Value (RelocatableAddress): 0x7C
+; SYM-NEXT:     Value (RelocatableAddress): 0x6C
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
@@ -343,7 +343,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 17
 ; SYM-NEXT:     Name: GInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x7C
+; SYM-NEXT:     Value (RelocatableAddress): 0x6C
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
@@ -363,7 +363,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 19
 ; SYM-NEXT:     Name: storesTIUninit
-; SYM-NEXT:     Value (RelocatableAddress): 0x80
+; SYM-NEXT:     Value (RelocatableAddress): 0x70
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
@@ -383,7 +383,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 21
 ; SYM-NEXT:     Name: loadsTGInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x8C
+; SYM-NEXT:     Value (RelocatableAddress): 0x7C
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
@@ -403,7 +403,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 23
 ; SYM-NEXT:     Name: TOC
-; SYM-NEXT:     Value (RelocatableAddress): 0x98
+; SYM-NEXT:     Value (RelocatableAddress): 0x88
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
@@ -423,7 +423,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 25
 ; SYM-NEXT:     Name: _$TLSML
-; SYM-NEXT:     Value (RelocatableAddress): 0x98
+; SYM-NEXT:     Value (RelocatableAddress): 0x88
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
@@ -443,7 +443,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 27
 ; SYM-NEXT:     Name: TIUninit
-; SYM-NEXT:     Value (RelocatableAddress): 0x9C
+; SYM-NEXT:     Value (RelocatableAddress): 0x8C
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
@@ -463,7 +463,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 29
 ; SYM-NEXT:     Name: .TGInit
-; SYM-NEXT:     Value (RelocatableAddress): 0xA0
+; SYM-NEXT:     Value (RelocatableAddress): 0x90
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
@@ -483,7 +483,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 31
 ; SYM-NEXT:     Name: TGInit
-; SYM-NEXT:     Value (RelocatableAddress): 0xA4
+; SYM-NEXT:     Value (RelocatableAddress): 0x94
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
@@ -503,7 +503,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 33
 ; SYM-NEXT:     Name: GInit
-; SYM-NEXT:     Value (RelocatableAddress): 0xA8
+; SYM-NEXT:     Value (RelocatableAddress): 0x98
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
@@ -595,13 +595,12 @@ entry:
 ; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA (idx: 1)      .__tls_get_mod[PR]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 4, 4(2)
 ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 27) TIUninit[TC]
-; DIS-NEXT:                                      add 3, 3, 4
-; DIS-NEXT:                                      stw 6, 0(3)
+; DIS-NEXT:                                      stwx 6, 3, 4
 ; DIS-NEXT:                                      addi 1, 1, 32
 ; DIS-NEXT:                                      lwz 0, 8(1)
 ; DIS-NEXT:                                      mtlr 0
 ; DIS-NEXT:                                      blr
-; DIS:      00000040 (idx: 9) .loadsTGInit:
+; DIS:      00000030 (idx: 9) .loadsTGInit:
 ; DIS-NEXT:                                      mflr 0
 ; DIS-NEXT:                                      stwu 1, -32(1)
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 3, 8(2)
@@ -620,39 +619,39 @@ entry:
 ; DIS-NEXT:                                      lwz 0, 8(1)
 ; DIS-NEXT:                                      mtlr 0
 ; DIS-NEXT:                                      blr
-; DIS:      00000078 (idx: 13) const_ivar:
-; DIS-NEXT:       78: 00 00 00 06
+; DIS:      00000068 (idx: 13) const_ivar:
+; DIS-NEXT:       68: 00 00 00 06
 
 ; DIS:      Disassembly of section .data:
-; DIS:      0000007c  (idx: 17) GInit:
-; DIS-NEXT:       7c: 00 00 00 01
-; DIS:      00000080  (idx: 19) storesTIUninit[DS]:
-; DIS-NEXT:       80: 00 00 00 00
-; DIS-NEXT: 00000080: R_POS (idx: 7) .storesTIUninit
-; DIS-NEXT:       84: 00 00 00 98
-; DIS-NEXT: 00000084: R_POS (idx: 23) TOC[TC0]
+; DIS:      0000006c  (idx: 17) GInit:
+; DIS-NEXT:       6c: 00 00 00 01
+; DIS:      00000070  (idx: 19) storesTIUninit[DS]:
+; DIS-NEXT:       70: 00 00 00 00
+; DIS-NEXT: 00000070: R_POS (idx: 7) .storesTIUninit
+; DIS-NEXT:       74: 00 00 00 88
+; DIS-NEXT: 00000074: R_POS (idx: 23) TOC[TC0]
+; DIS-NEXT:       78: 00 00 00 00
+; DIS:      0000007c  (idx: 21) loadsTGInit[DS]:
+; DIS-NEXT:       7c: 00 00 00 30
+; DIS-NEXT: 0000007c: R_POS (idx: 9) .loadsTGInit
+; DIS-NEXT:       80: 00 00 00 88
+; DIS-NEXT: 00000080: R_POS (idx: 23) TOC[TC0]
+; DIS-NEXT:       84: 00 00 00 00
+; DIS:      00000088  (idx: 25) _$TLSML[TC]:
 ; DIS-NEXT:       88: 00 00 00 00
-; DIS:      0000008c  (idx: 21) loadsTGInit[DS]:
-; DIS-NEXT:       8c: 00 00 00 40
-; DIS-NEXT: 0000008c: R_POS (idx: 9) .loadsTGInit
-; DIS-NEXT:       90: 00 00 00 98
-; DIS-NEXT: 00000090: R_POS (idx: 23) TOC[TC0]
+; DIS-NEXT: 00000088: R_TLSML (idx: 25) _$TLSML[TC]
+; DIS:      0000008c  (idx: 27) TIUninit[TC]:
+; DIS-NEXT:       8c: 00 00 00 00
+; DIS-NEXT: 0000008c: R_TLS_LD (idx: 39) TIUninit[UL]
+; DIS:      00000090  (idx: 29) .TGInit[TC]:
+; DIS-NEXT:       90: 00 00 00 00
+; DIS-NEXT: 00000090: R_TLSM (idx: 37) TGInit
+; DIS:      00000094  (idx: 31) TGInit[TC]:
 ; DIS-NEXT:       94: 00 00 00 00
-; DIS:      00000098  (idx: 25) _$TLSML[TC]:
-; DIS-NEXT:       98: 00 00 00 00
-; DIS-NEXT: 00000098: R_TLSML (idx: 25) _$TLSML[TC]
-; DIS:      0000009c  (idx: 27) TIUninit[TC]:
-; DIS-NEXT:       9c: 00 00 00 00
-; DIS-NEXT: 0000009c: R_TLS_LD (idx: 39) TIUninit[UL]
-; DIS:      000000a0  (idx: 29) .TGInit[TC]:
-; DIS-NEXT:       a0: 00 00 00 00
-; DIS-NEXT: 000000a0: R_TLSM (idx: 37) TGInit
-; DIS:      000000a4  (idx: 31) TGInit[TC]:
-; DIS-NEXT:       a4: 00 00 00 00
-; DIS-NEXT: 000000a4: R_TLS (idx: 37) TGInit
-; DIS:      000000a8  (idx: 33) GInit[TC]:
-; DIS-NEXT:       a8: 00 00 00 7c
-; DIS-NEXT: 000000a8: R_POS (idx: 17) GInit
+; DIS-NEXT: 00000094: R_TLS (idx: 37) TGInit
+; DIS:      00000098  (idx: 33) GInit[TC]:
+; DIS-NEXT:       98: 00 00 00 6c
+; DIS-NEXT: 00000098: R_POS (idx: 17) GInit
 
 ; DIS:      Disassembly of section .tdata:
 ; DIS:      00000000 (idx: 37) TGInit:

>From 0e4aa9724d4ba103f72581681509fa3e669aa2bb Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Mon, 25 Sep 2023 09:43:41 -0400
Subject: [PATCH 11/21] Fix obj mode var access.

---
 llvm/lib/MC/XCOFFObjectWriter.cpp             |   3 +-
 .../PowerPC/aix-tls-ld-xcoff-reloc-large.ll   | 328 ++++++++++++++++++
 .../CodeGen/PowerPC/aix-tls-xcoff-reloc.ll    |   2 +-
 3 files changed, 331 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/PowerPC/aix-tls-ld-xcoff-reloc-large.ll

diff --git a/llvm/lib/MC/XCOFFObjectWriter.cpp b/llvm/lib/MC/XCOFFObjectWriter.cpp
index 343e2fc877bc3..a898a35a057e6 100644
--- a/llvm/lib/MC/XCOFFObjectWriter.cpp
+++ b/llvm/lib/MC/XCOFFObjectWriter.cpp
@@ -698,7 +698,8 @@ void XCOFFObjectWriter::recordRelocation(MCAssembler &Asm,
   if (Type == XCOFF::RelocationType::R_POS ||
       Type == XCOFF::RelocationType::R_TLS ||
       Type == XCOFF::RelocationType::R_TLS_LE ||
-      Type == XCOFF::RelocationType::R_TLS_IE)
+      Type == XCOFF::RelocationType::R_TLS_IE ||
+      Type == XCOFF::RelocationType::R_TLS_LD)
     // The FixedValue should be symbol's virtual address in this object file
     // plus any constant value that we might get.
     FixedValue = getVirtualAddress(SymA, SymASec) + Target.getConstant();
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-ld-xcoff-reloc-large.ll b/llvm/test/CodeGen/PowerPC/aix-tls-ld-xcoff-reloc-large.ll
new file mode 100644
index 0000000000000..657f6187bcd7f
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-ld-xcoff-reloc-large.ll
@@ -0,0 +1,328 @@
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec -mtriple powerpc64-ibm-aix-xcoff \
+; RUN:     -xcoff-traceback-table=false --code-model=large -filetype=obj -o %t.o < %s
+; RUN: llvm-readobj --relocs --expand-relocs %t.o | FileCheck --check-prefix=RELOC %s
+; RUN: llvm-readobj --syms %t.o | FileCheck --check-prefix=SYM %s
+; RUN: llvm-objdump -D -r --symbol-description %t.o | FileCheck --check-prefix=DIS %s
+
+ at ThreadLocalVarInit = thread_local(localdynamic) global i64 1, align 8
+ at IThreadLocalVarUninit = internal thread_local(localdynamic) global i64 0, align 8
+ at IThreadLocalVarUninit2 = internal thread_local(localdynamic) global i64 0, align 8
+declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull)
+
+define void @storeITLUninit(i64 noundef %x) {
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarUninit)
+  store i64 %x, ptr %0, align 8
+  ret void
+}
+
+define i64 @loadTLInit() {
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit)
+  %1 = load i64, ptr %0, align 8
+  ret i64 %1
+}
+
+define signext i64 @loadTLUninit() {
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarUninit)
+  store i64 1, ptr %0, align 8
+  %1 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarUninit2)
+  %2 = load i64, ptr %1, align 8
+  %add = add nsw i64 %2, 1
+  ret i64 %add
+}
+
+; RELOC:      File: {{.*}}aix-tls-ld-xcoff-reloc-large.ll.tmp.o
+; RELOC-NEXT: Format: aix5coff64-rs6000
+; RELOC-NEXT: Arch: powerpc64
+; RELOC-NEXT: AddressSize: 64bit
+; RELOC-NEXT: Relocations [
+; RELOC:      Virtual Address: 0x12
+; RELOC-NEXT:      Symbol: _$TLSML (19)
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 16
+; RELOC-NEXT:      Type: R_TOCU (0x30)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0x16
+; RELOC-NEXT:      Symbol: IThreadLocalVarUninit (21)
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 16
+; RELOC-NEXT:      Type: R_TOCU (0x30)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0x1A
+; RELOC-NEXT:      Symbol: _$TLSML (19)
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 16
+; RELOC-NEXT:      Type: R_TOCL (0x31)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0x22
+; RELOC-NEXT:      Symbol: IThreadLocalVarUninit (21)
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 16
+; RELOC-NEXT:      Type: R_TOCL (0x31)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0x4E
+; RELOC-NEXT:      Symbol: _$TLSML (19)
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 16
+; RELOC-NEXT:      Type: R_TOCU (0x30)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0x52
+; RELOC-NEXT:      Symbol: ThreadLocalVarInit (23)
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 16
+; RELOC-NEXT:      Type: R_TOCU (0x30)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0x56
+; RELOC-NEXT:      Symbol: _$TLSML (19)
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 16
+; RELOC-NEXT:      Type: R_TOCL (0x31)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0x5E
+; RELOC-NEXT:      Symbol: ThreadLocalVarInit (23)
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 16
+; RELOC-NEXT:      Type: R_TOCL (0x31)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0x8E
+; RELOC-NEXT:      Symbol: _$TLSML (19)
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 16
+; RELOC-NEXT:      Type: R_TOCU (0x30)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0x92
+; RELOC-NEXT:      Symbol: IThreadLocalVarUninit (21)
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 16
+; RELOC-NEXT:      Type: R_TOCU (0x30)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0x96
+; RELOC-NEXT:      Symbol: _$TLSML (19)
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 16
+; RELOC-NEXT:      Type: R_TOCL (0x31)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0x9E
+; RELOC-NEXT:      Symbol: IThreadLocalVarUninit (21)
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 16
+; RELOC-NEXT:      Type: R_TOCL (0x31)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0xAA
+; RELOC-NEXT:      Symbol: IThreadLocalVarUninit2 (25)
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 16
+; RELOC-NEXT:      Type: R_TOCU (0x30)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0xAE
+; RELOC-NEXT:      Symbol: IThreadLocalVarUninit2 (25)
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 16
+; RELOC-NEXT:      Type: R_TOCL (0x31)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0x110
+; RELOC-NEXT:      Symbol: _$TLSML (19)
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 64
+; RELOC-NEXT:      Type: R_TLSML (0x25)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0x118
+; RELOC-NEXT:      Symbol: IThreadLocalVarUninit (29)
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 64
+; RELOC-NEXT:      Type: R_TLS_LD (0x22)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0x120
+; RELOC-NEXT:      Symbol: ThreadLocalVarInit (27)
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 64
+; RELOC-NEXT:      Type: R_TLS_LD (0x22)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0x128
+; RELOC-NEXT:      Symbol: IThreadLocalVarUninit2 (31)
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 64
+; RELOC-NEXT:      Type: R_TLS_LD (0x22)
+; RELOC-NEXT:    }
+
+; SYM:      File: {{.*}}aix-tls-ld-xcoff-reloc-large.ll.tmp.o
+; SYM-NEXT: Format: aix5coff64-rs6000
+; SYM-NEXT: Arch: powerpc64
+; SYM-NEXT: AddressSize: 64bit
+; SYM-NEXT: Symbols [
+; SYM:    Index: 19
+; SYM-NEXT:    Name: _$TLSML
+; SYM-NEXT:    Value (RelocatableAddress): 0x110
+; SYM-NEXT:    Section: .data
+; SYM-NEXT:    Type: 0x0
+; SYM-NEXT:    StorageClass: C_HIDEXT (0x6B)
+; SYM-NEXT:    NumberOfAuxEntries: 1
+; SYM-NEXT:    CSECT Auxiliary Entry {
+; SYM-NEXT:      Index: 20
+; SYM-NEXT:      SectionLen: 8
+; SYM-NEXT:      ParameterHashIndex: 0x0
+; SYM-NEXT:      TypeChkSectNum: 0x0
+; SYM-NEXT:      SymbolAlignmentLog2: 3
+; SYM-NEXT:      SymbolType: XTY_SD (0x1)
+; SYM-NEXT:      StorageMappingClass: XMC_TC (0x3)
+; SYM-NEXT:      Auxiliary Type: AUX_CSECT (0xFB)
+; SYM-NEXT:    }
+; SYM-NEXT:  }
+; SYM:    Index: 21
+; SYM-NEXT:    Name: IThreadLocalVarUninit
+; SYM-NEXT:    Value (RelocatableAddress): 0x118
+; SYM-NEXT:    Section: .data
+; SYM-NEXT:    Type: 0x0
+; SYM-NEXT:    StorageClass: C_HIDEXT (0x6B)
+; SYM-NEXT:    NumberOfAuxEntries: 1
+; SYM-NEXT:    CSECT Auxiliary Entry {
+; SYM-NEXT:      Index: 22
+; SYM-NEXT:      SectionLen: 8
+; SYM-NEXT:      ParameterHashIndex: 0x0
+; SYM-NEXT:      TypeChkSectNum: 0x0
+; SYM-NEXT:      SymbolAlignmentLog2: 3
+; SYM-NEXT:      SymbolType: XTY_SD (0x1)
+; SYM-NEXT:      StorageMappingClass: XMC_TE (0x16)
+; SYM-NEXT:      Auxiliary Type: AUX_CSECT (0xFB)
+; SYM-NEXT:    }
+; SYM-NEXT:  }
+; SYM:    Index: 23
+; SYM-NEXT:    Name: ThreadLocalVarInit
+; SYM-NEXT:    Value (RelocatableAddress): 0x120
+; SYM-NEXT:    Section: .data
+; SYM-NEXT:    Type: 0x0
+; SYM-NEXT:    StorageClass: C_HIDEXT (0x6B)
+; SYM-NEXT:    NumberOfAuxEntries: 1
+; SYM-NEXT:    CSECT Auxiliary Entry {
+; SYM-NEXT:      Index: 24
+; SYM-NEXT:      SectionLen: 8
+; SYM-NEXT:      ParameterHashIndex: 0x0
+; SYM-NEXT:      TypeChkSectNum: 0x0
+; SYM-NEXT:      SymbolAlignmentLog2: 3
+; SYM-NEXT:      SymbolType: XTY_SD (0x1)
+; SYM-NEXT:      StorageMappingClass: XMC_TE (0x16)
+; SYM-NEXT:      Auxiliary Type: AUX_CSECT (0xFB)
+; SYM-NEXT:    }
+; SYM-NEXT:  }
+; SYM:    Index: 25
+; SYM-NEXT:    Name: IThreadLocalVarUninit2
+; SYM-NEXT:    Value (RelocatableAddress): 0x128
+; SYM-NEXT:    Section: .data
+; SYM-NEXT:    Type: 0x0
+; SYM-NEXT:    StorageClass: C_HIDEXT (0x6B)
+; SYM-NEXT:    NumberOfAuxEntries: 1
+; SYM-NEXT:    CSECT Auxiliary Entry {
+; SYM-NEXT:      Index: 26
+; SYM-NEXT:      SectionLen: 8
+; SYM-NEXT:      ParameterHashIndex: 0x0
+; SYM-NEXT:      TypeChkSectNum: 0x0
+; SYM-NEXT:      SymbolAlignmentLog2: 3
+; SYM-NEXT:      SymbolType: XTY_SD (0x1)
+; SYM-NEXT:      StorageMappingClass: XMC_TE (0x16)
+; SYM-NEXT:      Auxiliary Type: AUX_CSECT (0xFB)
+; SYM-NEXT:    }
+; SYM-NEXT:  }
+; SYM:    Index: 27
+; SYM-NEXT:    Name: ThreadLocalVarInit
+; SYM-NEXT:    Value (RelocatableAddress): 0x0
+; SYM-NEXT:    Section: .tdata
+; SYM-NEXT:    Type: 0x0
+; SYM-NEXT:    StorageClass: C_EXT (0x2)
+; SYM-NEXT:    NumberOfAuxEntries: 1
+; SYM-NEXT:    CSECT Auxiliary Entry {
+; SYM-NEXT:      Index: 28
+; SYM-NEXT:      SectionLen: 8
+; SYM-NEXT:      ParameterHashIndex: 0x0
+; SYM-NEXT:      TypeChkSectNum: 0x0
+; SYM-NEXT:      SymbolAlignmentLog2: 3
+; SYM-NEXT:      SymbolType: XTY_SD (0x1)
+; SYM-NEXT:      StorageMappingClass: XMC_TL (0x14)
+; SYM-NEXT:      Auxiliary Type: AUX_CSECT (0xFB)
+; SYM-NEXT:    }
+; SYM-NEXT:  }
+; SYM:    Index: 29
+; SYM-NEXT:    Name: IThreadLocalVarUninit
+; SYM-NEXT:    Value (RelocatableAddress): 0x8
+; SYM-NEXT:    Section: .tbss
+; SYM-NEXT:    Type: 0x0
+; SYM-NEXT:    StorageClass: C_HIDEXT (0x6B)
+; SYM-NEXT:    NumberOfAuxEntries: 1
+; SYM-NEXT:    CSECT Auxiliary Entry {
+; SYM-NEXT:      Index: 30
+; SYM-NEXT:      SectionLen: 8
+; SYM-NEXT:      ParameterHashIndex: 0x0
+; SYM-NEXT:      TypeChkSectNum: 0x0
+; SYM-NEXT:      SymbolAlignmentLog2: 3
+; SYM-NEXT:      SymbolType: XTY_CM (0x3)
+; SYM-NEXT:      StorageMappingClass: XMC_UL (0x15)
+; SYM-NEXT:      Auxiliary Type: AUX_CSECT (0xFB)
+; SYM-NEXT:    }
+; SYM-NEXT:  }
+; SYM:    Index: 31
+; SYM-NEXT:    Name: IThreadLocalVarUninit2
+; SYM-NEXT:    Value (RelocatableAddress): 0x10
+; SYM-NEXT:    Section: .tbss
+; SYM-NEXT:    Type: 0x0
+; SYM-NEXT:    StorageClass: C_HIDEXT (0x6B)
+; SYM-NEXT:    NumberOfAuxEntries: 1
+; SYM-NEXT:    CSECT Auxiliary Entry {
+; SYM-NEXT:      Index: 32
+; SYM-NEXT:      SectionLen: 8
+; SYM-NEXT:      ParameterHashIndex: 0x0
+; SYM-NEXT:      TypeChkSectNum: 0x0
+; SYM-NEXT:      SymbolAlignmentLog2: 3
+; SYM-NEXT:      SymbolType: XTY_CM (0x3)
+; SYM-NEXT:      StorageMappingClass: XMC_UL (0x15)
+; SYM-NEXT:      Auxiliary Type: AUX_CSECT (0xFB)
+; SYM-NEXT:    }
+; SYM-NEXT:  }
+
+; DIS:      {{.*}}aix-tls-ld-xcoff-reloc-large.ll.tmp.o:	file format aix5coff64-rs6000
+; DIS:      Disassembly of section .data:
+; DIS:      0000000000000110 (idx: 19) _$TLSML[TC]:
+; DIS-NEXT:     110: 00 00 00 00
+; DIS-NEXT:     0000000000000110:  R_TLSML	(idx: 19) _$TLSML[TC]
+; DIS-NEXT:     114: 00 00 00 00
+; DIS:      0000000000000118 (idx: 21) IThreadLocalVarUninit[TE]:
+; DIS-NEXT:     118: 00 00 00 00
+; DIS-NEXT:     0000000000000118:  R_TLS_LD	(idx: 29) IThreadLocalVarUninit[UL]
+; DIS-NEXT:     11c: 00 00 00 08
+; DIS:      0000000000000120 (idx: 23) ThreadLocalVarInit[TE]:
+; DIS-NEXT:     120: 00 00 00 00
+; DIS-NEXT:     0000000000000120:  R_TLS_LD	(idx: 27) ThreadLocalVarInit[TL]
+; DIS-NEXT:     124: 00 00 00 00
+; DIS:      0000000000000128 (idx: 25) IThreadLocalVarUninit2[TE]:
+; DIS-NEXT:     128: 00 00 00 00
+; DIS-NEXT:     0000000000000128:  R_TLS_LD	(idx: 31) IThreadLocalVarUninit2[UL]
+; DIS-NEXT:     12c: 00 00 00 10
+
+; DIS:      Disassembly of section .tdata:
+; DIS:      0000000000000000 (idx: 27) ThreadLocalVarInit[TL]:
+; DIS-NEXT:        0: 00 00 00 00
+; DIS-NEXT:        4: 00 00 00 01
+
+; DIS:      Disassembly of section .tbss:
+; DIS:      0000000000000008 (idx: 29) IThreadLocalVarUninit[UL]:
+; DIS-NEXT: ...
+; DIS:      0000000000000010 (idx: 31) IThreadLocalVarUninit2[UL]:
+; DIS-NEXT: ...
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
index c8a737281840a..20110aff8ce65 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
@@ -641,7 +641,7 @@ entry:
 ; DIS-NEXT:       88: 00 00 00 00
 ; DIS-NEXT: 00000088: R_TLSML (idx: 25) _$TLSML[TC]
 ; DIS:      0000008c  (idx: 27) TIUninit[TC]:
-; DIS-NEXT:       8c: 00 00 00 00
+; DIS-NEXT:       8c: 00 00 00 04
 ; DIS-NEXT: 0000008c: R_TLS_LD (idx: 39) TIUninit[UL]
 ; DIS:      00000090  (idx: 29) .TGInit[TC]:
 ; DIS-NEXT:       90: 00 00 00 00

>From a35796b0188858bc224e20300df27948f4aac9bd Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Wed, 27 Sep 2023 02:57:07 -0400
Subject: [PATCH 12/21] [NFC] Incorporate comments.

---
 .../PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp  |  7 ++--
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp     |  5 +--
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   | 15 +++++----
 llvm/lib/Target/PowerPC/PPCISelLowering.h     |  4 +--
 llvm/lib/Target/PowerPC/PPCInstrInfo.td       |  2 +-
 llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp | 33 +++++++++----------
 6 files changed, 34 insertions(+), 32 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 00dda306a584c..5b8f7f0661904 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -234,9 +234,10 @@ class PPCTargetAsmStreamer : public PPCTargetStreamer {
       // On AIX, we have TLS variable offsets (symbol@({gd|ie|le|ld}) depending
       // on the TLS access method (or model). For the general-dynamic access
       // method, we also have region handle (symbol at m) for each variable. For
-      // local-dynamic, a module handle (__TLSML[TC]@ml) for all variables. For
-      // local-exec and initial-exec, we have a thread pointer, in r13 for
-      // 64-bit mode and returned by .__get_tpointer for 32-bit mode.
+      // local-dynamic, there is a module handle (__TLSML[TC]@ml) for all
+      // variables. Finally for local-exec and initial-exec, we have a thread
+      // pointer, in r13 for 64-bit mode and returned by .__get_tpointer for
+      // 32-bit mode.
       if (Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGD ||
           Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGDM ||
           Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSIE ||
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 099299bff618f..34231802ab7c4 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -867,7 +867,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
         // hacked this flag which should have been named MO_TLSLDM_FLAG: on AIX
         // the ML relocation type is only valid for a reference to a TOC symbol
         // from the symbol itself, and right now its only user is the symbol
-        // "_$TLSML". Use symbol name to decide that R_TLSML is expected.
+        // "_$TLSML". The symbol name is used to decide that R_TLSML is
+        // expected.
         return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSML;
       if (IsAIX)
         return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLD;
@@ -2769,7 +2770,7 @@ void PPCAIXAsmPrinter::emitEndOfAsmFile(Module &M) {
           getObjFileLowering().getSectionForTOCEntry(S, TM));
     } else if (I.first.second ==
                MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSML) {
-      // AIX assembler expects TC storage-mapping class for the "_$TLSML"
+      // AIX assembler expects the TC storage-mapping class for the "_$TLSML"
       // symbol.
       MCSection *MCSect = getObjFileLowering().getContext().getXCOFFSection(
           cast<MCSymbolXCOFF>(I.first.first)->getSymbolTableName(),
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 86c88748d6be6..10116c83565a4 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -3417,13 +3417,14 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
 
   if (Model == TLSModel::LocalDynamic) {
     // For local-dynamic on AIX, we need to generate one TOC entry for each
-    // variable offset, and single module-handle TOC entry for the entire file.
-
-    // We are not (1) create GV node (2) call getTOCEntry for the module-handle
-    // due to the reason that the module-handle should not be materialized (i.e.
-    // there should be no symbol-table entry referring to the module-handle).
-    // Instead we will create reference to __TLSML[TC]@ml in PPCTLSDynamicCall
-    // when processing the TLSLD_AIX pseudo node.
+    // variable offset, and a single module-handle TOC entry for the entire
+    // file.
+
+    // We are not able to (1) create a GV node, and (2) call getTOCEntry for the
+    // module-handle due to the reason that the module-handle should not be
+    // materialized (i.e. there should be no symbol-table entry referring to the
+    // module-handle). Instead we will create reference to __TLSML[TC]@ml in
+    // PPCTLSDynamicCall when processing the TLSLD_AIX pseudo node.
     SDValue ModuleHandle = DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT);
     SDValue VariableOffsetTGA =
         DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index f09a624fe7235..7a4200fab81e1 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -380,8 +380,8 @@ namespace llvm {
     GET_TLS_MOD_AIX,
 
     /// [GP|G8]RC = TLSLD_AIX, TOC_ENTRY(variable offset)
-    /// Op that internally creates TOC entry for the "_$TLSML" symbol, generates
-    /// a GET_TLS_MOD_AIX node which will be expanded into a call to
+    /// Op that internally creates a TOC entry for the "_$TLSML" symbol, and
+    /// generates a GET_TLS_MOD_AIX node which will be expanded into a call to
     /// .__tls_get_mod, and then adds the variable offset with the result from
     /// the call. This node is used in both 32-bit and 64-bit modes. The only
     /// difference is register class.
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 2abb58ae59bdc..4eb27b98842fd 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -3296,7 +3296,7 @@ def TLSGDAIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$offset, gprc:$handl
                           "#TLSGDAIX",
                           [(set i32:$rD,
                             (PPCTlsgdAIX i32:$offset, i32:$handle))]>;
-// This pseudo is expanded to load module-handle in R3, and the call to GETtlsMOD32AIX.
+// This pseudo is expanded to load the module-handle in R3, and the call to GETtlsMOD32AIX.
 def TLSLDAIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins),
                           "#TLSLDAIX", [(set i32:$rD, (PPCTlsldAIX))]>;
 // LR is a true define, while the rest of the Defs are clobbers.  R3 is
diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 71825dca77bcf..8a839af1f2eb7 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -48,8 +48,12 @@ namespace {
     bool processBlock(MachineBasicBlock &MBB) {
       bool Changed = false;
       bool NeedFence = true;
-      bool Is64Bit = MBB.getParent()->getSubtarget<PPCSubtarget>().isPPC64();
-      bool IsAIX = MBB.getParent()->getSubtarget<PPCSubtarget>().isAIXABI();
+      const PPCSubtarget &Subtarget =
+          MBB.getParent()->getSubtarget<PPCSubtarget>();
+      bool Is64Bit = Subtarget.isPPC64();
+      bool IsAIX = Subtarget.isAIXABI();
+      bool IsLargeModel =
+          Subtarget.getTargetMachine().getCodeModel() == CodeModel::Large;
       bool IsPCREL = false;
       MachineFunction *MF = MBB.getParent();
       MachineRegisterInfo &RegInfo = MF->getRegInfo();
@@ -64,15 +68,16 @@ namespace {
         // There are a number of slight differences in code generation
         // when we call .__get_tpointer (32-bit AIX TLS).
         bool IsTLSTPRelMI = MI.getOpcode() == PPC::GETtlsTpointer32AIX;
+        bool IsTLSLDAIXMI = (MI.getOpcode() == PPC::TLSLDAIX8 ||
+                             MI.getOpcode() == PPC::TLSLDAIX);
 
         if (MI.getOpcode() != PPC::ADDItlsgdLADDR &&
             MI.getOpcode() != PPC::ADDItlsldLADDR &&
             MI.getOpcode() != PPC::ADDItlsgdLADDR32 &&
             MI.getOpcode() != PPC::ADDItlsldLADDR32 &&
-            MI.getOpcode() != PPC::TLSLDAIX &&
-            MI.getOpcode() != PPC::TLSLDAIX8 &&
             MI.getOpcode() != PPC::TLSGDAIX &&
-            MI.getOpcode() != PPC::TLSGDAIX8 && !IsTLSTPRelMI && !IsPCREL) {
+            MI.getOpcode() != PPC::TLSGDAIX8 && !IsTLSTPRelMI && !IsPCREL &&
+            !IsTLSLDAIXMI) {
           // Although we create ADJCALLSTACKDOWN and ADJCALLSTACKUP
           // as scheduling fences, we skip creating fences if we already
           // have existing ADJCALLSTACKDOWN/UP to avoid nesting,
@@ -92,9 +97,7 @@ namespace {
         Register InReg = PPC::NoRegister;
         Register GPR3 = Is64Bit ? PPC::X3 : PPC::R3;
         Register GPR4 = Is64Bit ? PPC::X4 : PPC::R4;
-        if (!IsPCREL && !IsTLSTPRelMI &&
-            !(MI.getOpcode() == PPC::TLSLDAIX8 ||
-              MI.getOpcode() == PPC::TLSLDAIX))
+        if (!IsPCREL && !IsTLSTPRelMI && !IsTLSLDAIXMI)
           InReg = MI.getOperand(1).getReg();
         DebugLoc DL = MI.getDebugLoc();
 
@@ -160,15 +163,10 @@ namespace {
                                                               .addImm(0);
 
         if (IsAIX) {
-          if (MI.getOpcode() == PPC::TLSLDAIX8 ||
-              MI.getOpcode() == PPC::TLSLDAIX) {
+          if (IsTLSLDAIXMI) {
             // It is better to put TLSLDAIX node before LoadOffsetToc node,
             // because LoadOffsetToc node can use clobbers r4/r5. Search for the
             // first paired LoadOffsetToc node within the same BB.
-            const PPCSubtarget &Subtarget =
-                MBB.getParent()->getSubtarget<PPCSubtarget>();
-            bool IsLargeModel =
-                Subtarget.getTargetMachine().getCodeModel() == CodeModel::Large;
             unsigned LDTocOp =
                 Is64Bit ? (IsLargeModel ? PPC::LDtocL : PPC::LDtoc)
                         : (IsLargeModel ? PPC::LWZtocL : PPC::LWZtoc);
@@ -197,12 +195,13 @@ namespace {
                 int MatchCount = 0;
                 for (MachineOperand &MO : UseIter->operands()) {
                   if (MO.isReg() && MO.isUse()) {
-                    if (RegInfo.hasOneDef(MO.getReg())) {
-                      if (RegInfo.getOneDef(MO.getReg())
+                    Register OffsetReg = MO.getReg();
+                    if (RegInfo.hasOneDef(OffsetReg)) {
+                      if (RegInfo.getOneDef(OffsetReg)
                               ->getParent()
                               ->getOpcode() == LDTocOp) {
                         LoadOffsetToc =
-                            RegInfo.getOneDef(MO.getReg())->getParent();
+                            RegInfo.getOneDef(OffsetReg)->getParent();
                         ++MatchCount;
                       }
                     } else {

>From 139fa72f41fe4893c7c9dba43d8621cb09eb3083 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Wed, 27 Sep 2023 21:11:49 -0400
Subject: [PATCH 13/21] [NFC] Add FIXME to highlight existing issue: The
 "_$TLSML" symbol did not lower through getTOCEntry().

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 10116c83565a4..74f433da4ab07 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -3420,10 +3420,10 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
     // variable offset, and a single module-handle TOC entry for the entire
     // file.
 
-    // We are not able to (1) create a GV node, and (2) call getTOCEntry for the
-    // module-handle due to the reason that the module-handle should not be
-    // materialized (i.e. there should be no symbol-table entry referring to the
-    // module-handle). Instead we will create reference to __TLSML[TC]@ml in
+    // FIXME: We are not able to (1) create a GV node, and (2) call getTOCEntry
+    // for the module-handle due to the reason that the module-handle should not
+    // be materialized (i.e. there should be no symbol-table entry referring to
+    // the module-handle). Instead we will create reference to __TLSML[TC]@ml in
     // PPCTLSDynamicCall when processing the TLSLD_AIX pseudo node.
     SDValue ModuleHandle = DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT);
     SDValue VariableOffsetTGA =

>From 51f08e43f506c6ef7e57b4478078bd9cac3073f0 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Wed, 25 Oct 2023 21:07:42 -0400
Subject: [PATCH 14/21] Address the following FIXME: The "_$TLSML" symbol did
 not lower through getTOCEntry().

The pseudo node "TLSLDAIX" now takes the _$TLSML GV node, and the
ppc-tls-dynamic-call pass is updated to fine tune the relative order
between the LoadOffset at toc node and the .__tls_get_mod node.
---
 llvm/lib/MC/XCOFFObjectWriter.cpp             |  14 +++
 .../PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp  |   6 +-
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp     |  43 +++----
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   |  17 ++-
 llvm/lib/Target/PowerPC/PPCISelLowering.h     |   2 +-
 llvm/lib/Target/PowerPC/PPCInstr64Bit.td      |   6 +-
 llvm/lib/Target/PowerPC/PPCInstrInfo.td       |   8 +-
 llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp | 105 ++++++++----------
 .../test/CodeGen/PowerPC/aix-tls-gd-double.ll |  20 ++--
 llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll   |  20 ++--
 .../CodeGen/PowerPC/aix-tls-gd-longlong.ll    |  36 +++---
 .../PowerPC/aix-tls-ld-xcoff-reloc-large.ll   |  12 +-
 .../CodeGen/PowerPC/aix-tls-local-dynamic.ll  |   2 +-
 .../CodeGen/PowerPC/aix-tls-xcoff-reloc.ll    |   4 +-
 14 files changed, 145 insertions(+), 150 deletions(-)

diff --git a/llvm/lib/MC/XCOFFObjectWriter.cpp b/llvm/lib/MC/XCOFFObjectWriter.cpp
index a898a35a057e6..cfb04b8e6b7af 100644
--- a/llvm/lib/MC/XCOFFObjectWriter.cpp
+++ b/llvm/lib/MC/XCOFFObjectWriter.cpp
@@ -649,6 +649,14 @@ void XCOFFObjectWriter::recordRelocation(MCAssembler &Asm,
                                          uint64_t &FixedValue) {
   auto getIndex = [this](const MCSymbol *Sym,
                          const MCSectionXCOFF *ContainingCsect) {
+    // Fixup relocation flag for AIX TLS local-dynamic mode.
+    if (Sym->getName().equals("_Renamed..5f24__TLSML[UA]")) {
+      for (auto Iter : SymbolIndexMap)
+        if (Iter.first->getName().equals("_Renamed..5f24__TLSML[TC]"))
+          return Iter.second;
+      llvm_unreachable("For AIX TLS local-dynamic mode: "
+                       "_Renamed..5f24__TLSML[TC] not found.");
+    }
     // If we could not find the symbol directly in SymbolIndexMap, this symbol
     // could either be a temporary symbol or an undefined symbol. In this case,
     // we would need to have the relocation reference its csect instead.
@@ -1146,6 +1154,9 @@ void XCOFFObjectWriter::writeSymbolTable(const MCAsmLayout &Layout) {
                      /*NumberOfAuxEntries=*/0);
 
   for (const auto &Csect : UndefinedCsects) {
+    // AIX does not need to emit for the _$TLSML symbol.
+    if (Csect.getSymbolTableName().equals(StringRef("_$TLSML")))
+      continue;
     writeSymbolEntryForControlSection(Csect, XCOFF::ReservedSectionNum::N_UNDEF,
                                       Csect.MCSec->getStorageClass());
   }
@@ -1361,6 +1372,9 @@ void XCOFFObjectWriter::assignAddressesAndIndices(const MCAsmLayout &Layout) {
 
   // Calculate indices for undefined symbols.
   for (auto &Csect : UndefinedCsects) {
+    // AIX does not need to emit for the _$TLSML symbol.
+    if (Csect.getSymbolTableName().equals(StringRef("_$TLSML")))
+      continue;
     Csect.Size = 0;
     Csect.Address = 0;
     Csect.SymbolTableIndex = SymbolTableIndex;
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 5b8f7f0661904..cc50a29d67a72 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -242,10 +242,12 @@ class PPCTargetAsmStreamer : public PPCTargetStreamer {
           Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGDM ||
           Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSIE ||
           Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLE ||
-          Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLD ||
-          Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSML)
+          Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLD)
         OS << "\t.tc " << TCSym->getName() << "," << XSym->getName() << "@"
            << MCSymbolRefExpr::getVariantKindName(Kind) << '\n';
+      else if (Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSML)
+        OS << "\t.tc " << TCSym->getName() << "," << TCSym->getName() << "@"
+           << MCSymbolRefExpr::getVariantKindName(Kind) << '\n';
       else
         OS << "\t.tc " << TCSym->getName() << "," << XSym->getName() << '\n';
 
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 34231802ab7c4..032bab255665b 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -723,8 +723,6 @@ static MCSymbol *getMCSymbolForTOCPseudoMO(const MachineOperand &MO,
     return AP.GetJTISymbol(MO.getIndex());
   case MachineOperand::MO_BlockAddress:
     return AP.GetBlockAddressSymbol(MO.getBlockAddress());
-  case MachineOperand::MO_ExternalSymbol:
-    return AP.OutContext.getOrCreateSymbol(MO.getSymbolName());
   default:
     llvm_unreachable("Unexpected operand type to get symbol.");
   }
@@ -758,17 +756,6 @@ getTOCEntryTypeForMO(const MachineOperand &MO) {
     llvm_unreachable("Unexpected operand type to get TOC type.");
   }
 }
-
-// FIXME: find alternative approach to get rid of this hack.
-// On AIX, TLS-local-dynamic requires that the symbol for the module handle must
-// have the name "_$TLSML". This symbol is used as one TOC symbol reference
-// itself with an ML relocation type, thus it has "[TC]" attached to its name.
-static inline bool isSpecialAIXSymbolTLSML(const MachineOperand &MO,
-                                           const bool IsAIX) {
-  return IsAIX && MO.isSymbol() &&
-         (std::strcmp(MO.getSymbolName(), "_$TLSML[TC]") == 0);
-}
-
 /// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to
 /// the current output stream.
 ///
@@ -862,13 +849,15 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     if (Flag == PPCII::MO_TLSGD_FLAG || Flag == PPCII::MO_GOT_TLSGD_PCREL_FLAG)
       return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGD;
     if (MO.getTargetFlags() & PPCII::MO_TLSLD_FLAG) {
-      if (isSpecialAIXSymbolTLSML(MO, IsAIX))
+      if (IsAIX && MO.isGlobal() &&
+          (MO.getGlobal()->getName().equals("_$TLSML")))
         // FIXME: Due to the size limit of MachineOperand::SubReg_TargetFlags,
         // hacked this flag which should have been named MO_TLSLDM_FLAG: on AIX
         // the ML relocation type is only valid for a reference to a TOC symbol
         // from the symbol itself, and right now its only user is the symbol
         // "_$TLSML". The symbol name is used to decide that R_TLSML is
         // expected.
+        // FIX this once #69695 committed.
         return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSML;
       if (IsAIX)
         return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLD;
@@ -991,8 +980,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     TmpInst.setOpcode(PPC::LWZ);
 
     const MachineOperand &MO = MI->getOperand(1);
-    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress() ||
-            isSpecialAIXSymbolTLSML(MO, IsAIX)) &&
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
            "Invalid operand for LWZtoc.");
 
     // Map the operand to its corresponding MCSymbol.
@@ -1081,8 +1069,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     TmpInst.setOpcode(PPC::LD);
 
     const MachineOperand &MO = MI->getOperand(1);
-    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress() ||
-            isSpecialAIXSymbolTLSML(MO, IsAIX)) &&
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
            "Invalid operand!");
 
     // Map the operand to its corresponding MCSymbol.
@@ -1120,8 +1107,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     TmpInst.setOpcode(PPC::ADDIS);
 
     const MachineOperand &MO = MI->getOperand(2);
-    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress() ||
-            isSpecialAIXSymbolTLSML(MO, IsAIX)) &&
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
            "Invalid operand for ADDIStocHA.");
 
     // Map the machine operand to its corresponding MCSymbol.
@@ -1154,8 +1140,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     TmpInst.setOpcode(PPC::LWZ);
 
     const MachineOperand &MO = MI->getOperand(1);
-    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress() ||
-            isSpecialAIXSymbolTLSML(MO, IsAIX)) &&
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
            "Invalid operand for LWZtocL.");
 
     // Map the machine operand to its corresponding MCSymbol.
@@ -1187,8 +1172,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     TmpInst.setOpcode(PPC::ADDIS8);
 
     const MachineOperand &MO = MI->getOperand(2);
-    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress() ||
-            isSpecialAIXSymbolTLSML(MO, IsAIX)) &&
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
            "Invalid operand for ADDIStocHA8!");
 
     const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
@@ -1198,8 +1182,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     const bool GlobalToc =
         MO.isGlobal() && Subtarget->isGVIndirectSymbol(MO.getGlobal());
     if (GlobalToc || MO.isJTI() || MO.isBlockAddress() ||
-        (MO.isCPI() && TM.getCodeModel() == CodeModel::Large) ||
-        isSpecialAIXSymbolTLSML(MO, IsAIX))
+        (MO.isCPI() && TM.getCodeModel() == CodeModel::Large))
       MOSymbol = lookUpOrCreateTOCEntry(MOSymbol, getTOCEntryTypeForMO(MO), VK);
 
     VK = IsAIX ? MCSymbolRefExpr::VK_PPC_U : MCSymbolRefExpr::VK_PPC_TOC_HA;
@@ -1228,8 +1211,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     TmpInst.setOpcode(PPC::LD);
 
     const MachineOperand &MO = MI->getOperand(1);
-    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress() ||
-            isSpecialAIXSymbolTLSML(MO, IsAIX)) &&
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() ||
+            MO.isBlockAddress()) &&
            "Invalid operand for LDtocL!");
 
     LLVM_DEBUG(assert(
@@ -2098,6 +2081,10 @@ void PPCAIXAsmPrinter::emitLinkage(const GlobalValue *GV,
     }
   }
 
+  // Do not emit _$TLSML symbol.
+  if (GVSym->getName().equals(StringRef("_Renamed..5f24__TLSML[UA]")))
+    return;
+
   OutStreamer->emitXCOFFSymbolLinkageWithVisibility(GVSym, LinkageAttr,
                                                     VisibilityAttr);
 }
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 74f433da4ab07..9209b95dde08d 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -3420,14 +3420,19 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
     // variable offset, and a single module-handle TOC entry for the entire
     // file.
 
-    // FIXME: We are not able to (1) create a GV node, and (2) call getTOCEntry
-    // for the module-handle due to the reason that the module-handle should not
-    // be materialized (i.e. there should be no symbol-table entry referring to
-    // the module-handle). Instead we will create reference to __TLSML[TC]@ml in
-    // PPCTLSDynamicCall when processing the TLSLD_AIX pseudo node.
-    SDValue ModuleHandle = DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT);
     SDValue VariableOffsetTGA =
         DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
+    Module *M = DAG.getMachineFunction().getFunction().getParent();
+    GlobalVariable *TLSGV =
+        dyn_cast_or_null<GlobalVariable>(M->getOrInsertGlobal(
+            StringRef("_$TLSML"), PointerType::getUnqual(*DAG.getContext())));
+    assert(TLSGV && "Not able to create GV for _$TLSML.");
+    // FIXME: create MO_TLSML_FLAG once #69695 committed.
+    SDValue ModuleHandleTGA =
+        DAG.getTargetGlobalAddress(TLSGV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
+    SDValue ModuleHandleTOC = getTOCEntry(DAG, dl, ModuleHandleTGA);
+    SDValue ModuleHandle =
+        DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, ModuleHandleTOC);
     SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
     return DAG.getNode(ISD::ADD, dl, PtrVT, ModuleHandle, VariableOffset);
   }
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 7a4200fab81e1..2b6fcbc93f57e 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -384,7 +384,7 @@ namespace llvm {
     /// generates a GET_TLS_MOD_AIX node which will be expanded into a call to
     /// .__tls_get_mod, and then adds the variable offset with the result from
     /// the call. This node is used in both 32-bit and 64-bit modes. The only
-    /// difference is register class.
+    /// difference is the register class.
     TLSLD_AIX,
 
     /// G8RC = ADDIS_TLSLD_HA %x2, Symbol - For the local-dynamic TLS
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index b51af57b796f8..2949d58ab6647 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -1602,9 +1602,9 @@ def TLSGDAIX8 :
                      "#TLSGDAIX8",
                      [(set i64:$rD,
                        (PPCTlsgdAIX i64:$offset, i64:$handle))]>;
-// This pseudo is expanded to load module-handle in X3, and the call to GETtlsMOD64AIX.
-def TLSLDAIX8 : PPCEmitTimePseudo<(outs g8rc:$rD), (ins),
-                     "#TLSLDAIX8", [(set i64:$rD, (PPCTlsldAIX))]>;
+// This pseudo is expanded to the call to GETtlsMOD64AIX.
+def TLSLDAIX8 : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc:$handle),
+                     "#TLSLDAIX8", [(set i64:$rD, (PPCTlsldAIX i64:$handle))]>;
 // Combined op for ADDItlsldL and GETtlsADDR, late expanded.  X3 and LR8
 // are true defines, while the rest of the Defs are clobbers.
 let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 4eb27b98842fd..e1afc85a7c736 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -220,7 +220,7 @@ def PPCaddiTlsgdLAddr : SDNode<"PPCISD::ADDI_TLSGD_L_ADDR",
                                  SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
                                  SDTCisSameAs<0, 3>, SDTCisInt<0> ]>>;
 def PPCTlsgdAIX     : SDNode<"PPCISD::TLSGD_AIX", SDTIntBinOp>;
-def PPCTlsldAIX     : SDNode<"PPCISD::TLSLD_AIX", SDTIntLeaf>;
+def PPCTlsldAIX     : SDNode<"PPCISD::TLSLD_AIX", SDTIntUnaryOp>;
 def PPCaddisTlsldHA : SDNode<"PPCISD::ADDIS_TLSLD_HA", SDTIntBinOp>;
 def PPCaddiTlsldL   : SDNode<"PPCISD::ADDI_TLSLD_L", SDTIntBinOp>;
 def PPCgetTlsldAddr : SDNode<"PPCISD::GET_TLSLD_ADDR", SDTIntBinOp>;
@@ -3296,9 +3296,9 @@ def TLSGDAIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$offset, gprc:$handl
                           "#TLSGDAIX",
                           [(set i32:$rD,
                             (PPCTlsgdAIX i32:$offset, i32:$handle))]>;
-// This pseudo is expanded to load the module-handle in R3, and the call to GETtlsMOD32AIX.
-def TLSLDAIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins),
-                          "#TLSLDAIX", [(set i32:$rD, (PPCTlsldAIX))]>;
+// This pseudo is expanded to the call to GETtlsMOD32AIX.
+def TLSLDAIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$handle),
+                          "#TLSLDAIX", [(set i32:$rD, (PPCTlsldAIX i32:$handle))]>;
 // LR is a true define, while the rest of the Defs are clobbers.  R3 is
 // explicitly defined when this op is created, so not mentioned here.
 let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 8a839af1f2eb7..a2843ca0a350d 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -57,9 +57,6 @@ namespace {
       bool IsPCREL = false;
       MachineFunction *MF = MBB.getParent();
       MachineRegisterInfo &RegInfo = MF->getRegInfo();
-      const TargetRegisterClass *GPRNoZero =
-          Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
-                  : &PPC::GPRC_and_GPRC_NOR0RegClass;
 
       for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
            I != IE;) {
@@ -97,7 +94,7 @@ namespace {
         Register InReg = PPC::NoRegister;
         Register GPR3 = Is64Bit ? PPC::X3 : PPC::R3;
         Register GPR4 = Is64Bit ? PPC::X4 : PPC::R4;
-        if (!IsPCREL && !IsTLSTPRelMI && !IsTLSLDAIXMI)
+        if (!IsPCREL && !IsTLSTPRelMI)
           InReg = MI.getOperand(1).getReg();
         DebugLoc DL = MI.getDebugLoc();
 
@@ -164,84 +161,74 @@ namespace {
 
         if (IsAIX) {
           if (IsTLSLDAIXMI) {
-            // It is better to put TLSLDAIX node before LoadOffsetToc node,
-            // because LoadOffsetToc node can use clobbers r4/r5. Search for the
-            // first paired LoadOffsetToc node within the same BB.
+            // The relative order between the LoadOffset at toc node, and the
+            // ._tls_get_mod node is being tuned here. It is better to put the
+            // LoadOffset at toc node after the call, since the LoadOffset at toc node
+            // can use clobbers r4/r5. Search for the pattern of two Load at toc
+            // nodes, and then move the LoadOffset at toc node right before the
+            // node that uses the OutReg of the ._tls_get_mod node.
             unsigned LDTocOp =
                 Is64Bit ? (IsLargeModel ? PPC::LDtocL : PPC::LDtoc)
                         : (IsLargeModel ? PPC::LWZtocL : PPC::LWZtoc);
-            MachineBasicBlock::iterator Anchor = I;
             if (!RegInfo.use_empty(OutReg)) {
               std::set<MachineInstr *> Uses;
-              // Collect all instructions that use OutReg
-              for (MachineOperand &MO : RegInfo.use_operands(OutReg)) {
-                if (Uses.count(MO.getParent()))
-                  continue;
+              // Collect all instructions that use the OutReg.
+              for (MachineOperand &MO : RegInfo.use_operands(OutReg))
                 Uses.insert(MO.getParent());
-              }
-              // Find the first Add within current BB.
+              // Find the first user (e.g.: lwax/stfdx) within the current BB.
               MachineBasicBlock::iterator UseIter = MBB.begin();
-              for (MachineBasicBlock::iterator AE = MBB.end(); UseIter != AE;
+              for (MachineBasicBlock::iterator IE = MBB.end(); UseIter != IE;
                    ++UseIter)
                 if (Uses.count(&*UseIter))
                   break;
 
               if (UseIter != MBB.end()) {
-                // Get the instruction that defines the other used register
-                // operand of UseIter. The match pattern is that: UseIter has
-                // exactly one used-operand defined by LDTocOp
-                // (LDtocL/LDtoc/LWZtocL/LWZtoc).
-                MachineInstr *LoadOffsetToc = nullptr;
-                int MatchCount = 0;
-                for (MachineOperand &MO : UseIter->operands()) {
+                // Collect associated Load at toc nodes.
+                std::set<MachineInstr *> LoadFromTocs;
+                for (MachineOperand &MO : UseIter->operands())
                   if (MO.isReg() && MO.isUse()) {
-                    Register OffsetReg = MO.getReg();
-                    if (RegInfo.hasOneDef(OffsetReg)) {
-                      if (RegInfo.getOneDef(OffsetReg)
-                              ->getParent()
-                              ->getOpcode() == LDTocOp) {
-                        LoadOffsetToc =
-                            RegInfo.getOneDef(OffsetReg)->getParent();
-                        ++MatchCount;
-                      }
+                    if (RegInfo.hasOneDef(MO.getReg())) {
+                      MachineInstr *Temp =
+                          RegInfo.getOneDef(MO.getReg())->getParent();
+                      if (Temp == &MI && RegInfo.hasOneDef(InReg))
+                        Temp = RegInfo.getOneDef(InReg)->getParent();
+                      if (Temp->getOpcode() == LDTocOp)
+                        LoadFromTocs.insert(Temp);
                     } else {
                       // FIXME: analyze this scenario if there is one.
-                      MatchCount = 0;
+                      LoadFromTocs.clear();
                       break;
                     }
                   }
-                }
-                // Get the iterator.
-                if (MatchCount == 1 && LoadOffsetToc) {
-                  Anchor = MBB.begin();
-                  for (MachineBasicBlock::iterator AE = MBB.end(); Anchor != AE;
-                       ++Anchor)
-                    if (&*Anchor == LoadOffsetToc)
-                      break;
 
-                  if (Anchor == MBB.end())
-                    Anchor = I;
+                // Check the two Load at toc: one should be _$TLSML, and the other
+                // will be moved before the node that uses the OutReg of the
+                // ._tls_get_mod node.
+                if (LoadFromTocs.size() == 2) {
+                  MachineBasicBlock::iterator TLSMLIter = MBB.end();
+                  MachineBasicBlock::iterator OffsetIter = MBB.end();
+                  for (MachineBasicBlock::iterator I = MBB.begin(),
+                                                   IE = MBB.end();
+                       I != IE; ++I)
+                    if (LoadFromTocs.count(&*I)) {
+                      if (I->getOperand(1).isGlobal() &&
+                          I->getOperand(1).getGlobal()->getName().equals(
+                              "_$TLSML"))
+                        TLSMLIter = I;
+                      else
+                        OffsetIter = I;
+                    }
+                  if (TLSMLIter != MBB.end() && OffsetIter != MBB.end())
+                    OffsetIter->moveBefore(&*UseIter);
                 }
               }
             }
-
-            // Generate instructions to load module-handle.
-            Register ModuleHandleHReg;
-            if (IsLargeModel) {
-              ModuleHandleHReg = RegInfo.createVirtualRegister(GPRNoZero);
-              BuildMI(MBB, Anchor, DL,
-                      TII->get(Is64Bit ? PPC::ADDIStocHA8 : PPC::ADDIStocHA),
-                      ModuleHandleHReg)
-                  .addReg(Subtarget.getTOCPointerRegister())
-                  .addExternalSymbol("_$TLSML[TC]", PPCII::MO_TLSLD_FLAG);
-            }
-            BuildMI(MBB, Anchor, DL, TII->get(LDTocOp), GPR3)
-                .addExternalSymbol("_$TLSML[TC]", PPCII::MO_TLSLD_FLAG)
-                .addReg(IsLargeModel
-                            ? ModuleHandleHReg
-                            : Register(Subtarget.getTOCPointerRegister()));
+            // The module-handle is copied in r3. The copy is followed by
+            // GETtlsMOD32AIX/GETtlsMOD64AIX.
+            BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR3)
+                .addReg(InReg);
             // The call to .__tls_get_mod.
-            BuildMI(MBB, Anchor, DL, TII->get(Opc2), GPR3).addReg(GPR3);
+            BuildMI(MBB, I, DL, TII->get(Opc2), GPR3).addReg(GPR3);
           } else if (!IsTLSTPRelMI) {
             // The variable offset and region handle are copied in r4 and r3.
             // The copies are followed by GETtlsADDR32AIX/GETtlsADDR64AIX.
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll
index 84ddb83bef457..194273738c649 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll
@@ -156,8 +156,8 @@ define void @storesTIInit(double %Val) #0 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
+; SMALL32-NEXT:    lwz 3, L..C4(2) # target-flags(ppc-tlsld) @"_$TLSML"
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    lwz 3, L..C4(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C5(2) # target-flags(ppc-tlsld) @TIInit
 ; SMALL32-NEXT:    stfdx 1, 3, 4
@@ -186,8 +186,8 @@ define void @storesTIInit(double %Val) #0 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
+; SMALL64-NEXT:    ld 3, L..C4(2) # target-flags(ppc-tlsld) @"_$TLSML"
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    ld 3, L..C4(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C5(2) # target-flags(ppc-tlsld) @TIInit
 ; SMALL64-NEXT:    stfdx 1, 3, 4
@@ -200,8 +200,8 @@ define void @storesTIInit(double %Val) #0 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
-; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    addis 3, L..C4 at u(2)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    addis 6, L..C5 at u(2)
 ; LARGE64-NEXT:    ld 3, L..C4 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
@@ -452,13 +452,13 @@ define double @loadsTIInit() #1 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
+; SMALL32-NEXT:    lwz 3, L..C4(2) # target-flags(ppc-tlsld) @"_$TLSML"
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    lwz 3, L..C4(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C5(2) # target-flags(ppc-tlsld) @TIInit
-; SMALL32-NEXT:    lwz 5, L..C8(2) # @GInit
 ; SMALL32-NEXT:    lfdx 0, 3, 4
-; SMALL32-NEXT:    lfd 1, 0(5)
+; SMALL32-NEXT:    lwz 3, L..C8(2) # @GInit
+; SMALL32-NEXT:    lfd 1, 0(3)
 ; SMALL32-NEXT:    fadd 1, 0, 1
 ; SMALL32-NEXT:    addi 1, 1, 32
 ; SMALL32-NEXT:    lwz 0, 8(1)
@@ -489,13 +489,13 @@ define double @loadsTIInit() #1 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
+; SMALL64-NEXT:    ld 3, L..C4(2) # target-flags(ppc-tlsld) @"_$TLSML"
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    ld 3, L..C4(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C5(2) # target-flags(ppc-tlsld) @TIInit
-; SMALL64-NEXT:    ld 5, L..C8(2) # @GInit
 ; SMALL64-NEXT:    lfdx 0, 3, 4
-; SMALL64-NEXT:    lfd 1, 0(5)
+; SMALL64-NEXT:    ld 3, L..C8(2) # @GInit
+; SMALL64-NEXT:    lfd 1, 0(3)
 ; SMALL64-NEXT:    fadd 1, 0, 1
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -506,8 +506,8 @@ define double @loadsTIInit() #1 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
-; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    addis 3, L..C4 at u(2)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    addis 6, L..C5 at u(2)
 ; LARGE64-NEXT:    ld 3, L..C4 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll
index 3001a915da485..bbb8e04b67b95 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll
@@ -163,9 +163,9 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    mr 6, 3
 ; SMALL32-NEXT:    lwz 3, L..C4(2)
+; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C5(2)
 ; SMALL32-NEXT:    stwx 6, 3, 4
@@ -195,9 +195,9 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    mr 6, 3
 ; SMALL64-NEXT:    ld 3, L..C4(2)
+; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C5(2)
 ; SMALL64-NEXT:    stwx 6, 3, 4
@@ -210,9 +210,9 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
-; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    mr 6, 3
 ; LARGE64-NEXT:    addis 3, L..C4 at u(2)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    addis 7, L..C5 at u(2)
 ; LARGE64-NEXT:    ld 3, L..C4 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
@@ -467,13 +467,13 @@ define i32 @loadsTIUninit() #1 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    lwz 3, L..C4(2)
+; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C5(2)
-; SMALL32-NEXT:    lwz 5, L..C8(2)
 ; SMALL32-NEXT:    lwzx 3, 3, 4
-; SMALL32-NEXT:    lwz 4, 0(5)
+; SMALL32-NEXT:    lwz 4, L..C8(2)
+; SMALL32-NEXT:    lwz 4, 0(4)
 ; SMALL32-NEXT:    add 3, 4, 3
 ; SMALL32-NEXT:    addi 1, 1, 32
 ; SMALL32-NEXT:    lwz 0, 8(1)
@@ -504,13 +504,13 @@ define i32 @loadsTIUninit() #1 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    ld 3, L..C4(2)
+; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C5(2)
-; SMALL64-NEXT:    ld 5, L..C8(2)
 ; SMALL64-NEXT:    lwzx 3, 3, 4
-; SMALL64-NEXT:    lwz 4, 0(5)
+; SMALL64-NEXT:    ld 4, L..C8(2)
+; SMALL64-NEXT:    lwz 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -521,8 +521,8 @@ define i32 @loadsTIUninit() #1 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
-; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    addis 3, L..C4 at u(2)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    addis 6, L..C5 at u(2)
 ; LARGE64-NEXT:    ld 3, L..C4 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll
index 136c62ff509f9..ff087a2144488 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll
@@ -97,10 +97,10 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    mr 7, 3
-; SMALL32-NEXT:    mr 6, 4
 ; SMALL32-NEXT:    lwz 3, L..C2(2)
+; SMALL32-NEXT:    stw 0, 40(1)
+; SMALL32-NEXT:    mr 6, 4
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C3(2)
 ; SMALL32-NEXT:    stwux 7, 3, 4
@@ -133,9 +133,9 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    mr 6, 3
 ; SMALL64-NEXT:    ld 3, L..C2(2)
+; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C3(2)
 ; SMALL64-NEXT:    stdx 6, 3, 4
@@ -148,9 +148,9 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
-; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    mr 6, 3
 ; LARGE64-NEXT:    addis 3, L..C2 at u(2)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    addis 7, L..C3 at u(2)
 ; LARGE64-NEXT:    ld 3, L..C2 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
@@ -171,10 +171,10 @@ define void @storesTIInit(i64 %Val) #0 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    mr 7, 3
-; SMALL32-NEXT:    mr 6, 4
 ; SMALL32-NEXT:    lwz 3, L..C2(2)
+; SMALL32-NEXT:    stw 0, 40(1)
+; SMALL32-NEXT:    mr 6, 4
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C4(2)
 ; SMALL32-NEXT:    stwux 7, 3, 4
@@ -207,9 +207,9 @@ define void @storesTIInit(i64 %Val) #0 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    mr 6, 3
 ; SMALL64-NEXT:    ld 3, L..C2(2)
+; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C4(2)
 ; SMALL64-NEXT:    stdx 6, 3, 4
@@ -222,9 +222,9 @@ define void @storesTIInit(i64 %Val) #0 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
-; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    mr 6, 3
 ; LARGE64-NEXT:    addis 3, L..C2 at u(2)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    addis 7, L..C4 at u(2)
 ; LARGE64-NEXT:    ld 3, L..C2 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
@@ -407,8 +407,8 @@ define i64 @loadsTIUninit() #1 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    lwz 3, L..C2(2)
+; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C3(2)
 ; SMALL32-NEXT:    lwz 5, L..C7(2)
@@ -450,13 +450,13 @@ define i64 @loadsTIUninit() #1 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    ld 3, L..C2(2)
+; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C3(2)
-; SMALL64-NEXT:    ld 5, L..C7(2)
 ; SMALL64-NEXT:    ldx 3, 3, 4
-; SMALL64-NEXT:    ld 4, 0(5)
+; SMALL64-NEXT:    ld 4, L..C7(2)
+; SMALL64-NEXT:    ld 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -467,8 +467,8 @@ define i64 @loadsTIUninit() #1 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
-; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    addis 3, L..C2 at u(2)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    addis 6, L..C3 at u(2)
 ; LARGE64-NEXT:    ld 3, L..C2 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
@@ -495,8 +495,8 @@ define i64 @loadsTIInit() #1 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    lwz 3, L..C2(2)
+; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C4(2)
 ; SMALL32-NEXT:    lwz 5, L..C7(2)
@@ -538,13 +538,13 @@ define i64 @loadsTIInit() #1 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    ld 3, L..C2(2)
+; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C4(2)
-; SMALL64-NEXT:    ld 5, L..C7(2)
 ; SMALL64-NEXT:    ldx 3, 3, 4
-; SMALL64-NEXT:    ld 4, 0(5)
+; SMALL64-NEXT:    ld 4, L..C7(2)
+; SMALL64-NEXT:    ld 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -555,8 +555,8 @@ define i64 @loadsTIInit() #1 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
-; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    addis 3, L..C2 at u(2)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    addis 6, L..C4 at u(2)
 ; LARGE64-NEXT:    ld 3, L..C2 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-ld-xcoff-reloc-large.ll b/llvm/test/CodeGen/PowerPC/aix-tls-ld-xcoff-reloc-large.ll
index 657f6187bcd7f..d1ca04a7ace83 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-ld-xcoff-reloc-large.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-ld-xcoff-reloc-large.ll
@@ -38,14 +38,14 @@ entry:
 ; RELOC-NEXT: Arch: powerpc64
 ; RELOC-NEXT: AddressSize: 64bit
 ; RELOC-NEXT: Relocations [
-; RELOC:      Virtual Address: 0x12
+; RELOC:      Virtual Address: 0xE
 ; RELOC-NEXT:      Symbol: _$TLSML (19)
 ; RELOC-NEXT:      IsSigned: No
 ; RELOC-NEXT:      FixupBitValue: 0
 ; RELOC-NEXT:      Length: 16
 ; RELOC-NEXT:      Type: R_TOCU (0x30)
 ; RELOC-NEXT:    }
-; RELOC:      Virtual Address: 0x16
+; RELOC:      Virtual Address: 0x12
 ; RELOC-NEXT:      Symbol: IThreadLocalVarUninit (21)
 ; RELOC-NEXT:      IsSigned: No
 ; RELOC-NEXT:      FixupBitValue: 0
@@ -66,14 +66,14 @@ entry:
 ; RELOC-NEXT:      Length: 16
 ; RELOC-NEXT:      Type: R_TOCL (0x31)
 ; RELOC-NEXT:    }
-; RELOC:      Virtual Address: 0x4E
+; RELOC:      Virtual Address: 0x4A
 ; RELOC-NEXT:      Symbol: _$TLSML (19)
 ; RELOC-NEXT:      IsSigned: No
 ; RELOC-NEXT:      FixupBitValue: 0
 ; RELOC-NEXT:      Length: 16
 ; RELOC-NEXT:      Type: R_TOCU (0x30)
 ; RELOC-NEXT:    }
-; RELOC:      Virtual Address: 0x52
+; RELOC:      Virtual Address: 0x4E
 ; RELOC-NEXT:      Symbol: ThreadLocalVarInit (23)
 ; RELOC-NEXT:      IsSigned: No
 ; RELOC-NEXT:      FixupBitValue: 0
@@ -94,14 +94,14 @@ entry:
 ; RELOC-NEXT:      Length: 16
 ; RELOC-NEXT:      Type: R_TOCL (0x31)
 ; RELOC-NEXT:    }
-; RELOC:      Virtual Address: 0x8E
+; RELOC:      Virtual Address: 0x8A
 ; RELOC-NEXT:      Symbol: _$TLSML (19)
 ; RELOC-NEXT:      IsSigned: No
 ; RELOC-NEXT:      FixupBitValue: 0
 ; RELOC-NEXT:      Length: 16
 ; RELOC-NEXT:      Type: R_TOCU (0x30)
 ; RELOC-NEXT:    }
-; RELOC:      Virtual Address: 0x92
+; RELOC:      Virtual Address: 0x8E
 ; RELOC-NEXT:      Symbol: IThreadLocalVarUninit (21)
 ; RELOC-NEXT:      IsSigned: No
 ; RELOC-NEXT:      FixupBitValue: 0
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll b/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll
index f7cd73ae609b3..c745e17636da1 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll
@@ -327,7 +327,7 @@ define i32 @DedupTlsGetMod() #0 {
 ; WITHDUP-NEXT:   ld [[OffsetXR:[0-9]+]], [[X:L..C[0-9]+]](2)
 ; WITHDUP:        ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; WITHDUP-NEXT:   bla .__tls_get_mod[PR]
-; WITHDUP-NEXT:   ld [[OffsetYR:[0-9]+]], [[Y:L..C[0-9]+]](2)
+; WITHDUP:        ld [[OffsetYR:[0-9]+]], [[Y:L..C[0-9]+]](2)
 ; WITHDUP-LABEL:  L..DedupTlsGetMod0:
 ;
 ; NODUP-LABEL:  DedupTlsGetMod:
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
index 20110aff8ce65..d578920aa858c 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
@@ -33,7 +33,7 @@ entry:
 ; RELOC-NEXT: Relocations [
 ; RELOC-NEXT:   Section (index: 1) .text {
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x12
+; RELOC-NEXT:     Virtual Address: 0xE
 ; RELOC-NEXT:     Symbol: _$TLSML (25)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
@@ -587,10 +587,10 @@ entry:
 ; DIS:      00000000 (idx: 7) .storesTIUninit:
 ; DIS-NEXT:                                      mflr 0
 ; DIS-NEXT:                                      stwu 1, -32(1)
-; DIS-NEXT:                                      stw 0, 40(1)
 ; DIS-NEXT:                                      mr 6, 3
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 3, 0(2)
 ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 25) _$TLSML[TC]
+; DIS-NEXT:                                      stw 0, 40(1)
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               bla 0
 ; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA (idx: 1)      .__tls_get_mod[PR]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 4, 4(2)

>From fd2d1ec5d38bb3834502dffbc9eaf0c8d30d6219 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Thu, 26 Oct 2023 03:42:38 -0400
Subject: [PATCH 15/21] Address comment: use == to compare StringRef names

---
 llvm/lib/MC/XCOFFObjectWriter.cpp             | 8 ++++----
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp     | 5 ++---
 llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp | 3 +--
 3 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/MC/XCOFFObjectWriter.cpp b/llvm/lib/MC/XCOFFObjectWriter.cpp
index cfb04b8e6b7af..5efc8ac841ef4 100644
--- a/llvm/lib/MC/XCOFFObjectWriter.cpp
+++ b/llvm/lib/MC/XCOFFObjectWriter.cpp
@@ -650,9 +650,9 @@ void XCOFFObjectWriter::recordRelocation(MCAssembler &Asm,
   auto getIndex = [this](const MCSymbol *Sym,
                          const MCSectionXCOFF *ContainingCsect) {
     // Fixup relocation flag for AIX TLS local-dynamic mode.
-    if (Sym->getName().equals("_Renamed..5f24__TLSML[UA]")) {
+    if (Sym->getName() == "_Renamed..5f24__TLSML[UA]") {
       for (auto Iter : SymbolIndexMap)
-        if (Iter.first->getName().equals("_Renamed..5f24__TLSML[TC]"))
+        if (Iter.first->getName() == "_Renamed..5f24__TLSML[TC]")
           return Iter.second;
       llvm_unreachable("For AIX TLS local-dynamic mode: "
                        "_Renamed..5f24__TLSML[TC] not found.");
@@ -1155,7 +1155,7 @@ void XCOFFObjectWriter::writeSymbolTable(const MCAsmLayout &Layout) {
 
   for (const auto &Csect : UndefinedCsects) {
     // AIX does not need to emit for the _$TLSML symbol.
-    if (Csect.getSymbolTableName().equals(StringRef("_$TLSML")))
+    if (Csect.getSymbolTableName() == "_$TLSML")
       continue;
     writeSymbolEntryForControlSection(Csect, XCOFF::ReservedSectionNum::N_UNDEF,
                                       Csect.MCSec->getStorageClass());
@@ -1373,7 +1373,7 @@ void XCOFFObjectWriter::assignAddressesAndIndices(const MCAsmLayout &Layout) {
   // Calculate indices for undefined symbols.
   for (auto &Csect : UndefinedCsects) {
     // AIX does not need to emit for the _$TLSML symbol.
-    if (Csect.getSymbolTableName().equals(StringRef("_$TLSML")))
+    if (Csect.getSymbolTableName() == "_$TLSML")
       continue;
     Csect.Size = 0;
     Csect.Address = 0;
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 032bab255665b..8e46af5d68c88 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -849,8 +849,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     if (Flag == PPCII::MO_TLSGD_FLAG || Flag == PPCII::MO_GOT_TLSGD_PCREL_FLAG)
       return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGD;
     if (MO.getTargetFlags() & PPCII::MO_TLSLD_FLAG) {
-      if (IsAIX && MO.isGlobal() &&
-          (MO.getGlobal()->getName().equals("_$TLSML")))
+      if (IsAIX && MO.isGlobal() && MO.getGlobal()->getName() == "_$TLSML")
         // FIXME: Due to the size limit of MachineOperand::SubReg_TargetFlags,
         // hacked this flag which should have been named MO_TLSLDM_FLAG: on AIX
         // the ML relocation type is only valid for a reference to a TOC symbol
@@ -2082,7 +2081,7 @@ void PPCAIXAsmPrinter::emitLinkage(const GlobalValue *GV,
   }
 
   // Do not emit _$TLSML symbol.
-  if (GVSym->getName().equals(StringRef("_Renamed..5f24__TLSML[UA]")))
+  if (GVSym->getName() == "_Renamed..5f24__TLSML[UA]")
     return;
 
   OutStreamer->emitXCOFFSymbolLinkageWithVisibility(GVSym, LinkageAttr,
diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index a2843ca0a350d..452a7a2dcf416 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -212,8 +212,7 @@ namespace {
                        I != IE; ++I)
                     if (LoadFromTocs.count(&*I)) {
                       if (I->getOperand(1).isGlobal() &&
-                          I->getOperand(1).getGlobal()->getName().equals(
-                              "_$TLSML"))
+                          I->getOperand(1).getGlobal()->getName() == "_$TLSML")
                         TLSMLIter = I;
                       else
                         OffsetIter = I;

>From d903739b76682e7f87a3c1af48434e949343d726 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Fri, 27 Oct 2023 10:34:55 -0400
Subject: [PATCH 16/21] Correct SMC setting for the _$TLSML symbol

---
 llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp | 7 +++++++
 llvm/lib/MC/XCOFFObjectWriter.cpp                 | 8 --------
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp         | 2 +-
 3 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 143a4951c1361..6e86f29b8eb5d 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -2392,6 +2392,13 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForExternalReference(
   SmallString<128> Name;
   getNameWithPrefix(Name, GO, TM);
 
+  // AIX TLS local-dynamic requires the setting for the specific symbol name.
+  if (GO->hasName() && GO->getName() == "_$TLSML") {
+    return getContext().getXCOFFSection(
+        Name, SectionKind::getData(),
+        XCOFF::CsectProperties(XCOFF::XMC_TC, XCOFF::XTY_SD));
+  }
+
   XCOFF::StorageMappingClass SMC =
       isa<Function>(GO) ? XCOFF::XMC_DS : XCOFF::XMC_UA;
   if (GO->isThreadLocal())
diff --git a/llvm/lib/MC/XCOFFObjectWriter.cpp b/llvm/lib/MC/XCOFFObjectWriter.cpp
index 5efc8ac841ef4..98a4d8481dc9d 100644
--- a/llvm/lib/MC/XCOFFObjectWriter.cpp
+++ b/llvm/lib/MC/XCOFFObjectWriter.cpp
@@ -649,14 +649,6 @@ void XCOFFObjectWriter::recordRelocation(MCAssembler &Asm,
                                          uint64_t &FixedValue) {
   auto getIndex = [this](const MCSymbol *Sym,
                          const MCSectionXCOFF *ContainingCsect) {
-    // Fixup relocation flag for AIX TLS local-dynamic mode.
-    if (Sym->getName() == "_Renamed..5f24__TLSML[UA]") {
-      for (auto Iter : SymbolIndexMap)
-        if (Iter.first->getName() == "_Renamed..5f24__TLSML[TC]")
-          return Iter.second;
-      llvm_unreachable("For AIX TLS local-dynamic mode: "
-                       "_Renamed..5f24__TLSML[TC] not found.");
-    }
     // If we could not find the symbol directly in SymbolIndexMap, this symbol
     // could either be a temporary symbol or an undefined symbol. In this case,
     // we would need to have the relocation reference its csect instead.
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 8e46af5d68c88..0fbe20c553c3f 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -2081,7 +2081,7 @@ void PPCAIXAsmPrinter::emitLinkage(const GlobalValue *GV,
   }
 
   // Do not emit _$TLSML symbol.
-  if (GVSym->getName() == "_Renamed..5f24__TLSML[UA]")
+  if (GVSym->getName() == "_Renamed..5f24__TLSML[TC]")
     return;
 
   OutStreamer->emitXCOFFSymbolLinkageWithVisibility(GVSym, LinkageAttr,

>From 651cf01e9f62984dbf4475475a2a79f0f48c47a1 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Fri, 27 Oct 2023 10:52:51 -0400
Subject: [PATCH 17/21] Simplify and add check

---
 llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp | 6 ++----
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp                | 3 ++-
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index cc50a29d67a72..5b8f7f0661904 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -242,12 +242,10 @@ class PPCTargetAsmStreamer : public PPCTargetStreamer {
           Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGDM ||
           Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSIE ||
           Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLE ||
-          Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLD)
+          Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLD ||
+          Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSML)
         OS << "\t.tc " << TCSym->getName() << "," << XSym->getName() << "@"
            << MCSymbolRefExpr::getVariantKindName(Kind) << '\n';
-      else if (Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSML)
-        OS << "\t.tc " << TCSym->getName() << "," << TCSym->getName() << "@"
-           << MCSymbolRefExpr::getVariantKindName(Kind) << '\n';
       else
         OS << "\t.tc " << TCSym->getName() << "," << XSym->getName() << '\n';
 
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 0fbe20c553c3f..51f9f5d3b14ea 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -849,7 +849,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     if (Flag == PPCII::MO_TLSGD_FLAG || Flag == PPCII::MO_GOT_TLSGD_PCREL_FLAG)
       return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGD;
     if (MO.getTargetFlags() & PPCII::MO_TLSLD_FLAG) {
-      if (IsAIX && MO.isGlobal() && MO.getGlobal()->getName() == "_$TLSML")
+      if (IsAIX && MO.isGlobal() && MO.getGlobal()->hasName() &&
+          MO.getGlobal()->getName() == "_$TLSML")
         // FIXME: Due to the size limit of MachineOperand::SubReg_TargetFlags,
         // hacked this flag which should have been named MO_TLSLDM_FLAG: on AIX
         // the ML relocation type is only valid for a reference to a TOC symbol

>From 3f2bbfd679f2fb6066481bb9b7e2a47bcdbca807 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Fri, 27 Oct 2023 11:01:32 -0400
Subject: [PATCH 18/21] misc

---
 llvm/lib/MC/XCOFFObjectWriter.cpp         | 4 ++--
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/MC/XCOFFObjectWriter.cpp b/llvm/lib/MC/XCOFFObjectWriter.cpp
index 98a4d8481dc9d..4c89ddefe74f7 100644
--- a/llvm/lib/MC/XCOFFObjectWriter.cpp
+++ b/llvm/lib/MC/XCOFFObjectWriter.cpp
@@ -1146,7 +1146,7 @@ void XCOFFObjectWriter::writeSymbolTable(const MCAsmLayout &Layout) {
                      /*NumberOfAuxEntries=*/0);
 
   for (const auto &Csect : UndefinedCsects) {
-    // AIX does not need to emit for the _$TLSML symbol.
+    // AIX does not need to emit the _$TLSML symbol.
     if (Csect.getSymbolTableName() == "_$TLSML")
       continue;
     writeSymbolEntryForControlSection(Csect, XCOFF::ReservedSectionNum::N_UNDEF,
@@ -1364,7 +1364,7 @@ void XCOFFObjectWriter::assignAddressesAndIndices(const MCAsmLayout &Layout) {
 
   // Calculate indices for undefined symbols.
   for (auto &Csect : UndefinedCsects) {
-    // AIX does not need to emit for the _$TLSML symbol.
+    // AIX does not need to emit the _$TLSML symbol.
     if (Csect.getSymbolTableName() == "_$TLSML")
       continue;
     Csect.Size = 0;
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 51f9f5d3b14ea..ea7ff78434d71 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -2081,7 +2081,7 @@ void PPCAIXAsmPrinter::emitLinkage(const GlobalValue *GV,
     }
   }
 
-  // Do not emit _$TLSML symbol.
+  // Do not emit the _$TLSML symbol.
   if (GVSym->getName() == "_Renamed..5f24__TLSML[TC]")
     return;
 

>From 6258bf03d5a746605971329eb6339a9b637cb85e Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Mon, 30 Oct 2023 04:44:54 -0400
Subject: [PATCH 19/21] Simplify logic by move the XMC_TC setting for the
 _$TLSML symbol into getSectionForTOCEntry()

---
 llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp | 6 +++++-
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp         | 9 ---------
 2 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 6e86f29b8eb5d..e8c674be0a6a7 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -2652,10 +2652,14 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForTOCEntry(
     const MCSymbol *Sym, const TargetMachine &TM) const {
   // Use TE storage-mapping class when large code model is enabled so that
   // the chance of needing -bbigtoc is decreased.
+  // The "_$TLSML" symbol for TLS local-dynamic mode requires XMC_TC.
   return getContext().getXCOFFSection(
       cast<MCSymbolXCOFF>(Sym)->getSymbolTableName(), SectionKind::getData(),
       XCOFF::CsectProperties(
-          TM.getCodeModel() == CodeModel::Large ? XCOFF::XMC_TE : XCOFF::XMC_TC,
+          (TM.getCodeModel() == CodeModel::Large &&
+           cast<MCSymbolXCOFF>(Sym)->getSymbolTableName() != "_$TLSML")
+              ? XCOFF::XMC_TE
+              : XCOFF::XMC_TC,
           XCOFF::XTY_SD));
 }
 
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index ea7ff78434d71..b792a91fa8b2e 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -2755,15 +2755,6 @@ void PPCAIXAsmPrinter::emitEndOfAsmFile(Module &M) {
       MCSymbol *S = OutContext.getOrCreateSymbol(Name);
       TCEntry = cast<MCSectionXCOFF>(
           getObjFileLowering().getSectionForTOCEntry(S, TM));
-    } else if (I.first.second ==
-               MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSML) {
-      // AIX assembler expects the TC storage-mapping class for the "_$TLSML"
-      // symbol.
-      MCSection *MCSect = getObjFileLowering().getContext().getXCOFFSection(
-          cast<MCSymbolXCOFF>(I.first.first)->getSymbolTableName(),
-          SectionKind::getData(),
-          XCOFF::CsectProperties(XCOFF::XMC_TC, XCOFF::XTY_SD));
-      TCEntry = cast<MCSectionXCOFF>(MCSect);
     } else {
       TCEntry = cast<MCSectionXCOFF>(
           getObjFileLowering().getSectionForTOCEntry(I.first.first, TM));

>From bcca7aed63f334effa230d5e421cb0344a087ba4 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Mon, 30 Oct 2023 04:50:47 -0400
Subject: [PATCH 20/21] Add check to make sure GV's name is defined before
 access

---
 llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 452a7a2dcf416..93701d340f800 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -212,6 +212,7 @@ namespace {
                        I != IE; ++I)
                     if (LoadFromTocs.count(&*I)) {
                       if (I->getOperand(1).isGlobal() &&
+                          I->getOperand(1).getGlobal()->hasName() &&
                           I->getOperand(1).getGlobal()->getName() == "_$TLSML")
                         TLSMLIter = I;
                       else

>From 9f2f897c0a8f3d878e92d785520fe75ed198c65a Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Mon, 30 Oct 2023 20:40:27 -0400
Subject: [PATCH 21/21] update

---
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp   | 1 -
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 1 -
 2 files changed, 2 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index b792a91fa8b2e..c56b8ae8f4dba 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -857,7 +857,6 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
         // from the symbol itself, and right now its only user is the symbol
         // "_$TLSML". The symbol name is used to decide that R_TLSML is
         // expected.
-        // FIX this once #69695 committed.
         return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSML;
       if (IsAIX)
         return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLD;
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 9209b95dde08d..571b6312a89b9 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -3427,7 +3427,6 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
         dyn_cast_or_null<GlobalVariable>(M->getOrInsertGlobal(
             StringRef("_$TLSML"), PointerType::getUnqual(*DAG.getContext())));
     assert(TLSGV && "Not able to create GV for _$TLSML.");
-    // FIXME: create MO_TLSML_FLAG once #69695 committed.
     SDValue ModuleHandleTGA =
         DAG.getTargetGlobalAddress(TLSGV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
     SDValue ModuleHandleTOC = getTOCEntry(DAG, dl, ModuleHandleTGA);



More information about the cfe-commits mailing list