[clang] [llvm] [PowerPC] Support local-dynamic TLS relocation on AIX (PR #66316)

Felix via cfe-commits cfe-commits at lists.llvm.org
Wed Feb 7 05:41:48 PST 2024


https://github.com/orcguru updated https://github.com/llvm/llvm-project/pull/66316

>From f7dd67a5f0ddcd87fba7052634cc79bb9cf39229 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Wed, 13 Sep 2023 22:21:11 -0400
Subject: [PATCH 01/27] [PowerPC] Support local-dynamic TLS relocation on AIX

---
 llvm/include/llvm/MC/MCExpr.h                 |   2 +
 llvm/lib/MC/MCExpr.cpp                        |   4 +
 .../PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp  |   9 +-
 .../MCTargetDesc/PPCXCOFFObjectWriter.cpp     |   4 +
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp     |  90 ++++-
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   |  30 +-
 llvm/lib/Target/PowerPC/PPCISelLowering.h     |  14 +-
 llvm/lib/Target/PowerPC/PPCInstr64Bit.td      |  13 +-
 llvm/lib/Target/PowerPC/PPCInstrInfo.td       |  15 +-
 llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp |  73 +++-
 .../test/CodeGen/PowerPC/aix-tls-gd-double.ll |  90 +++--
 llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll   |  96 +++--
 .../CodeGen/PowerPC/aix-tls-gd-longlong.ll    | 298 +++++++-------
 .../CodeGen/PowerPC/aix-tls-local-dynamic.ll  | 364 ++++++++++++++++++
 .../PowerPC/aix-tls-xcoff-reloc-large.ll      | 286 +++++++-------
 .../CodeGen/PowerPC/aix-tls-xcoff-reloc.ll    | 288 +++++++-------
 16 files changed, 1149 insertions(+), 527 deletions(-)
 create mode 100644 llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll

diff --git a/llvm/include/llvm/MC/MCExpr.h b/llvm/include/llvm/MC/MCExpr.h
index 67836292874f5f..f3bc0491fd2f19 100644
--- a/llvm/include/llvm/MC/MCExpr.h
+++ b/llvm/include/llvm/MC/MCExpr.h
@@ -301,6 +301,8 @@ class MCSymbolRefExpr : public MCExpr {
     VK_PPC_AIX_TLSGDM,      // symbol at m
     VK_PPC_AIX_TLSIE,       // symbol at ie
     VK_PPC_AIX_TLSLE,       // symbol at le
+    VK_PPC_AIX_TLSLD,       // symbol at ld
+    VK_PPC_AIX_TLSML,       // symbol at ml
     VK_PPC_GOT_TLSLD,       // symbol at got@tlsld
     VK_PPC_GOT_TLSLD_LO,    // symbol at got@tlsld at l
     VK_PPC_GOT_TLSLD_HI,    // symbol at got@tlsld at h
diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp
index 73e6569f96e463..bc1bb9b8063054 100644
--- a/llvm/lib/MC/MCExpr.cpp
+++ b/llvm/lib/MC/MCExpr.cpp
@@ -331,6 +331,10 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
     return "ie";
   case VK_PPC_AIX_TLSLE:
     return "le";
+  case VK_PPC_AIX_TLSLD:
+    return "ld";
+  case VK_PPC_AIX_TLSML:
+    return "ml";
   case VK_PPC_GOT_TLSLD: return "got at tlsld";
   case VK_PPC_GOT_TLSLD_LO: return "got at tlsld@l";
   case VK_PPC_GOT_TLSLD_HI: return "got at tlsld@h";
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index a804dd823daa4d..22cd2fc03ef7c3 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -231,12 +231,15 @@ class PPCTargetAsmStreamer : public PPCTargetStreamer {
       MCSymbolXCOFF *TCSym =
           cast<MCSectionXCOFF>(Streamer.getCurrentSectionOnly())
               ->getQualNameSymbol();
-      // On AIX, we have a region handle (symbol at m) and the variable offset
-      // (symbol@{gd|ie|le}) for TLS variables, depending on the TLS model.
+      // On AIX, we have a region handle (symbol at m), module handle
+      // (__TLSML[TC]@ml) and the variable offset (symbol@{gd|ie|le|ld}) for TLS
+      // variables, depending on the TLS model.
       if (Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGD ||
           Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGDM ||
           Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSIE ||
-          Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLE)
+          Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLE ||
+          Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLD ||
+          Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSML)
         OS << "\t.tc " << TCSym->getName() << "," << XSym->getName() << "@"
            << MCSymbolRefExpr::getVariantKindName(Kind) << '\n';
       else
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
index 065daf42fe6eb0..f4998e9b9dcba8 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
@@ -116,6 +116,10 @@ std::pair<uint8_t, uint8_t> PPCXCOFFObjectWriter::getRelocTypeAndSignSize(
       return {XCOFF::RelocationType::R_TLS_IE, SignAndSizeForFKData};
     case MCSymbolRefExpr::VK_PPC_AIX_TLSLE:
       return {XCOFF::RelocationType::R_TLS_LE, SignAndSizeForFKData};
+    case MCSymbolRefExpr::VK_PPC_AIX_TLSLD:
+      return {XCOFF::RelocationType::R_TLS_LD, SignAndSizeForFKData};
+    case MCSymbolRefExpr::VK_PPC_AIX_TLSML:
+      return {XCOFF::RelocationType::R_TLSML, SignAndSizeForFKData};
     case MCSymbolRefExpr::VK_None:
       return {XCOFF::RelocationType::R_POS, SignAndSizeForFKData};
     }
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 4b551bc51c4f0e..1c197521f409a1 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -613,12 +613,23 @@ void PPCAsmPrinter::LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI) {
     EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::NOP));
 }
 
-/// This helper function creates the TlsGetAddr MCSymbol for AIX. We will
-/// create the csect and use the qual-name symbol instead of creating just the
-/// external symbol.
+/// This helper function creates the TlsGetAddr/TlsGetMod MCSymbol for AIX. We
+/// will create the csect and use the qual-name symbol instead of creating just
+/// the external symbol.
 static MCSymbol *createMCSymbolForTlsGetAddr(MCContext &Ctx, unsigned MIOpc) {
-  StringRef SymName =
-      MIOpc == PPC::GETtlsTpointer32AIX ? ".__get_tpointer" : ".__tls_get_addr";
+  StringRef SymName;
+  switch (MIOpc) {
+  default:
+    SymName = ".__tls_get_addr";
+    break;
+  case PPC::GETtlsTpointer32AIX:
+    SymName = ".__get_tpointer";
+    break;
+  case PPC::GETtlsMOD32AIX:
+  case PPC::GETtlsMOD64AIX:
+    SymName = ".__tls_get_mod";
+    break;
+  }
   return Ctx
       .getXCOFFSection(SymName, SectionKind::getText(),
                        XCOFF::CsectProperties(XCOFF::XMC_PR, XCOFF::XTY_ER))
@@ -660,14 +671,15 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
          "GETtls[ld]ADDR[32] must read GPR3");
 
   if (Subtarget->isAIXABI()) {
-    // On AIX, the variable offset should already be in R4 and the region handle
-    // should already be in R3.
-    // For TLSGD, which currently is the only supported access model, we only
-    // need to generate an absolute branch to .__tls_get_addr.
+    // On AIX, for TLSGD the variable offset should already be in R4 and the
+    // region handle should already be in R3, need to generate an absolute
+    // branch to .__tls_get_addr. For TLSLD the module handle should already be
+    // in R3, need to generate branch to .__tls_get_mod.
     Register VarOffsetReg = Subtarget->isPPC64() ? PPC::X4 : PPC::R4;
     (void)VarOffsetReg;
-    assert(MI->getOperand(2).isReg() &&
-           MI->getOperand(2).getReg() == VarOffsetReg &&
+    assert((MI->getNumExplicitOperands() < 3 ||
+            (MI->getOperand(2).isReg() &&
+             MI->getOperand(2).getReg() == VarOffsetReg)) &&
            "GETtls[ld]ADDR[32] must read GPR4");
     EmitAIXTlsCallHelper(MI);
     return;
@@ -710,6 +722,8 @@ static MCSymbol *getMCSymbolForTOCPseudoMO(const MachineOperand &MO,
     return AP.GetJTISymbol(MO.getIndex());
   case MachineOperand::MO_BlockAddress:
     return AP.GetBlockAddressSymbol(MO.getBlockAddress());
+  case MachineOperand::MO_ExternalSymbol:
+    return AP.OutContext.getOrCreateSymbol(MO.getSymbolName());
   default:
     llvm_unreachable("Unexpected operand type to get symbol.");
   }
@@ -743,6 +757,16 @@ getTOCEntryTypeForMO(const MachineOperand &MO) {
     llvm_unreachable("Unexpected operand type to get TOC type.");
   }
 }
+
+// On AIX, TLS-local-dynamic requires that symbol for the module handle must
+// have the name "_$TLSML". This symbol is used as one TOC symbol reference
+// itself with ML relocation type, thus it has "[TC]" attached to its name.
+static inline bool isSpecialAIXSymbolTLSML(const MachineOperand &MO,
+                                           const bool IsAIX) {
+  return IsAIX && MO.isSymbol() &&
+         (std::strcmp(MO.getSymbolName(), "_$TLSML[TC]") == 0);
+}
+
 /// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to
 /// the current output stream.
 ///
@@ -835,6 +859,15 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
       return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGDM;
     if (Flag == PPCII::MO_TLSGD_FLAG || Flag == PPCII::MO_GOT_TLSGD_PCREL_FLAG)
       return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGD;
+    if (MO.getTargetFlags() & PPCII::MO_TLSLD_FLAG) {
+      if (isSpecialAIXSymbolTLSML(MO, IsAIX))
+        // FIXME: On AIX the ML relocation type is only valid for a reference to
+        // a TOC symbol from the symbol itself, and right now its only user is
+        // symbol "_$TLSML". Use symbol name to decide that R_TLSML is expected.
+        return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSML;
+      if (IsAIX)
+        return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLD;
+    }
     return MCSymbolRefExpr::VariantKind::VK_None;
   };
 
@@ -953,7 +986,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     TmpInst.setOpcode(PPC::LWZ);
 
     const MachineOperand &MO = MI->getOperand(1);
-    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress() ||
+            isSpecialAIXSymbolTLSML(MO, IsAIX)) &&
            "Invalid operand for LWZtoc.");
 
     // Map the operand to its corresponding MCSymbol.
@@ -1042,7 +1076,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     TmpInst.setOpcode(PPC::LD);
 
     const MachineOperand &MO = MI->getOperand(1);
-    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress() ||
+            isSpecialAIXSymbolTLSML(MO, IsAIX)) &&
            "Invalid operand!");
 
     // Map the operand to its corresponding MCSymbol.
@@ -1080,7 +1115,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     TmpInst.setOpcode(PPC::ADDIS);
 
     const MachineOperand &MO = MI->getOperand(2);
-    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress() ||
+            isSpecialAIXSymbolTLSML(MO, IsAIX)) &&
            "Invalid operand for ADDIStocHA.");
 
     // Map the machine operand to its corresponding MCSymbol.
@@ -1113,7 +1149,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     TmpInst.setOpcode(PPC::LWZ);
 
     const MachineOperand &MO = MI->getOperand(1);
-    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress() ||
+            isSpecialAIXSymbolTLSML(MO, IsAIX)) &&
            "Invalid operand for LWZtocL.");
 
     // Map the machine operand to its corresponding MCSymbol.
@@ -1145,7 +1182,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     TmpInst.setOpcode(PPC::ADDIS8);
 
     const MachineOperand &MO = MI->getOperand(2);
-    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress() ||
+            isSpecialAIXSymbolTLSML(MO, IsAIX)) &&
            "Invalid operand for ADDIStocHA8!");
 
     const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
@@ -1155,7 +1193,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     const bool GlobalToc =
         MO.isGlobal() && Subtarget->isGVIndirectSymbol(MO.getGlobal());
     if (GlobalToc || MO.isJTI() || MO.isBlockAddress() ||
-        (MO.isCPI() && TM.getCodeModel() == CodeModel::Large))
+        (MO.isCPI() && TM.getCodeModel() == CodeModel::Large) ||
+        isSpecialAIXSymbolTLSML(MO, IsAIX))
       MOSymbol = lookUpOrCreateTOCEntry(MOSymbol, getTOCEntryTypeForMO(MO), VK);
 
     VK = IsAIX ? MCSymbolRefExpr::VK_PPC_U : MCSymbolRefExpr::VK_PPC_TOC_HA;
@@ -1184,8 +1223,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     TmpInst.setOpcode(PPC::LD);
 
     const MachineOperand &MO = MI->getOperand(1);
-    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() ||
-            MO.isBlockAddress()) &&
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress() ||
+            isSpecialAIXSymbolTLSML(MO, IsAIX)) &&
            "Invalid operand for LDtocL!");
 
     LLVM_DEBUG(assert(
@@ -1351,6 +1390,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
   case PPC::GETtlsADDRPCREL:
   case PPC::GETtlsADDR32AIX:
   case PPC::GETtlsADDR64AIX:
+  case PPC::GETtlsMOD32AIX:
+  case PPC::GETtlsMOD64AIX:
     // Transform: %r3 = GETtlsADDRNNAIX %r3, %r4 (for NN == 32/64).
     // Into: BLA .__tls_get_addr()
     // Unlike on Linux, there is no symbol or relocation needed for this call.
@@ -2719,6 +2760,15 @@ void PPCAIXAsmPrinter::emitEndOfAsmFile(Module &M) {
       MCSymbol *S = OutContext.getOrCreateSymbol(Name);
       TCEntry = cast<MCSectionXCOFF>(
           getObjFileLowering().getSectionForTOCEntry(S, TM));
+    } else if (I.first.second ==
+               MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSML) {
+      // AIX assembler expects TC storage-mapping class for the "_$TLSML"
+      // symbol.
+      MCSection *MCSect = getObjFileLowering().getContext().getXCOFFSection(
+          cast<MCSymbolXCOFF>(I.first.first)->getSymbolTableName(),
+          SectionKind::getData(),
+          XCOFF::CsectProperties(XCOFF::XMC_TC, XCOFF::XTY_SD));
+      TCEntry = cast<MCSectionXCOFF>(MCSect);
     } else {
       TCEntry = cast<MCSectionXCOFF>(
           getObjFileLowering().getSectionForTOCEntry(I.first.first, TM));
@@ -2835,6 +2885,8 @@ void PPCAIXAsmPrinter::emitInstruction(const MachineInstr *MI) {
 		 MMI->hasDebugInfo());
     break;
   }
+  case PPC::GETtlsMOD32AIX:
+  case PPC::GETtlsMOD64AIX:
   case PPC::GETtlsTpointer32AIX:
   case PPC::GETtlsADDR64AIX:
   case PPC::GETtlsADDR32AIX: {
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index acaf98b62fd94d..9d0430ea2e515a 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1772,9 +1772,13 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
   case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
   case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
+  case PPCISD::GET_TLS_MOD:
+    return "PPCISD::GET_TLS_MOD";
   case PPCISD::GET_TPOINTER:    return "PPCISD::GET_TPOINTER";
   case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
   case PPCISD::TLSGD_AIX:       return "PPCISD::TLSGD_AIX";
+  case PPCISD::TLSLD_AIX:
+    return "PPCISD::TLSLD_AIX";
   case PPCISD::ADDIS_TLSLD_HA:  return "PPCISD::ADDIS_TLSLD_HA";
   case PPCISD::ADDI_TLSLD_L:    return "PPCISD::ADDI_TLSLD_L";
   case PPCISD::GET_TLSLD_ADDR:  return "PPCISD::GET_TLSLD_ADDR";
@@ -3413,13 +3417,25 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
     return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset);
   }
 
-  // Only Local-Exec, Initial-Exec and General-Dynamic TLS models are currently
-  // supported models. If Local- or Initial-exec are not possible or specified,
-  // all GlobalTLSAddress nodes are lowered using the general-dynamic model.
-  // We need to generate two TOC entries, one for the variable offset, one for
-  // the region handle. The global address for the TOC entry of the region
-  // handle is created with the MO_TLSGDM_FLAG flag and the global address
-  // for the TOC entry of the variable offset is created with MO_TLSGD_FLAG.
+  if (Model == TLSModel::LocalDynamic) {
+    // For local-dynamic on AIX, we need to generate two TOC entries, one for
+    // the variable offset, the other for the module handle. The module handle
+    // is encapsulated inside the TLSLD_AIX pseudo node, and will be expanded by
+    // PPCTLSDynamicCall.
+    SDValue VariableOffsetTGA =
+        DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
+    SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
+    return DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, VariableOffset);
+  }
+
+  // The Local-Exec, Initial-Exec, Local-Dynamic, and General-Dynamic TLS models
+  // are currently supported access models. If Local- or Initial-exec or
+  // local-dynamic is not possible or specified, all GlobalTLSAddress nodes are
+  // lowered using the general-dynamic model. We need to generate two TOC
+  // entries, one for the variable offset, one for the region handle. The global
+  // address for the TOC entry of the region handle is created with the
+  // MO_TLSGDM_FLAG flag and the global address for the TOC entry of the
+  // variable offset is created with MO_TLSGD_FLAG.
   SDValue VariableOffsetTGA =
       DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGD_FLAG);
   SDValue RegionHandleTGA =
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index d8679dcf401808..7f80f70d699a6b 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -370,11 +370,23 @@ namespace llvm {
     /// G8RC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY
     /// Op that combines two register copies of TOC entries
     /// (region handle into R3 and variable offset into R4) followed by a
-    /// GET_TLS_ADDR node which will be expanded to a call to __get_tls_addr.
+    /// GET_TLS_ADDR node which will be expanded to a call to __tls_get_addr.
     /// This node is used in 64-bit mode as well (in which case the result is
     /// G8RC and inputs are X3/X4).
     TLSGD_AIX,
 
+    /// %x3 = GET_TLS_MOD _$TLSML - For the AIX local-dynamic TLS model,
+    /// produces a call to __tls_get_mod(_$TLSML\@ml).
+    GET_TLS_MOD,
+
+    /// [GP|G8]RC = TLSLD_AIX, TOC_ENTRY(variable offset)
+    /// Op that internally creates TOC entry for the "_$TLSML" symbol, generates
+    /// GET_TLS_MOD node which will be expanded into a call to __tls_get_mod,
+    /// and then add the variable offset with the result from the call.
+    /// This node is used in both 32-bit and 64-bit modes. The only difference
+    /// is register class.
+    TLSLD_AIX,
+
     /// G8RC = ADDIS_TLSLD_HA %x2, Symbol - For the local-dynamic TLS
     /// model, produces an ADDIS8 instruction that adds the GOT base
     /// register to sym\@got\@tlsld\@ha.
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 0322bb37b1fdf8..e4fee9c2fff23e 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -1557,12 +1557,19 @@ def GETtlsldADDRPCREL : GETtlsldADDRPseudo <"#GETtlsldADDRPCREL">;
 // so we don't need to mark it with a size of 8 bytes. Finally, the assembly
 // manual mentions this exact set of registers as the clobbered set, others
 // are guaranteed not to be clobbered.
-let Defs = [X0,X4,X5,X11,LR8,CR0] in
+let Defs = [X0,X4,X5,X11,LR8,CR0] in {
 def GETtlsADDR64AIX :
   PPCEmitTimePseudo<(outs g8rc:$rD),(ins g8rc:$offset, g8rc:$handle),
                     "GETtlsADDR64AIX",
                     [(set i64:$rD,
                       (PPCgetTlsAddr i64:$offset, i64:$handle))]>, isPPC64;
+// On AIX, the call to __tls_get_mod need one input in X3 for the module handle.
+def GETtlsMOD64AIX :
+  PPCEmitTimePseudo<(outs g8rc:$rD),(ins g8rc:$handle),
+                    "GETtlsMOD64AIX",
+                    [(set i64:$rD,
+                      (PPCgetTlsMod i64:$handle))]>, isPPC64;
+}
 }
 
 // Combined op for ADDItlsgdL and GETtlsADDR, late expanded.  X3 and LR8
@@ -1595,6 +1602,10 @@ def TLSGDAIX8 :
                      "#TLSGDAIX8",
                      [(set i64:$rD,
                        (PPCTlsgdAIX i64:$offset, i64:$handle))]>;
+// This pseudo is expanded to one copy to put the module handle in R3, then call
+// GETtlsMOD64AIX, and then add variable offset to the output from the call.
+def TLSLDAIX8 : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc:$handle),
+                     "#TLSLDAIX8", [(set i64:$rD, (PPCTlsldAIX i64:$handle))]>;
 // Combined op for ADDItlsldL and GETtlsADDR, late expanded.  X3 and LR8
 // are true defines, while the rest of the Defs are clobbers.
 let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 6199785206b2f7..3d7e64d29fe83a 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -213,12 +213,14 @@ def PPCaddTls     : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>;
 def PPCaddisTlsgdHA : SDNode<"PPCISD::ADDIS_TLSGD_HA", SDTIntBinOp>;
 def PPCaddiTlsgdL   : SDNode<"PPCISD::ADDI_TLSGD_L", SDTIntBinOp>;
 def PPCgetTlsAddr   : SDNode<"PPCISD::GET_TLS_ADDR", SDTIntBinOp>;
+def PPCgetTlsMod   : SDNode<"PPCISD::GET_TLS_MOD", SDTIntUnaryOp>;
 def PPCgetTpointer : SDNode<"PPCISD::GET_TPOINTER", SDTIntLeaf, []>;
 def PPCaddiTlsgdLAddr : SDNode<"PPCISD::ADDI_TLSGD_L_ADDR",
                                SDTypeProfile<1, 3, [
                                  SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
                                  SDTCisSameAs<0, 3>, SDTCisInt<0> ]>>;
 def PPCTlsgdAIX     : SDNode<"PPCISD::TLSGD_AIX", SDTIntBinOp>;
+def PPCTlsldAIX     : SDNode<"PPCISD::TLSLD_AIX", SDTIntUnaryOp>;
 def PPCaddisTlsldHA : SDNode<"PPCISD::ADDIS_TLSLD_HA", SDTIntBinOp>;
 def PPCaddiTlsldL   : SDNode<"PPCISD::ADDI_TLSLD_L", SDTIntBinOp>;
 def PPCgetTlsldAddr : SDNode<"PPCISD::GET_TLSLD_ADDR", SDTIntBinOp>;
@@ -3245,11 +3247,16 @@ def GETtlsADDR32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$s
 // The rest of the Defs are the exact set of registers that will be clobbered by
 // the call.
 let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
-    Defs = [R0,R4,R5,R11,LR,CR0] in
+    Defs = [R0,R4,R5,R11,LR,CR0] in {
 def GETtlsADDR32AIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$offset, gprc:$handle),
                           "GETtlsADDR32AIX",
                           [(set i32:$rD,
                             (PPCgetTlsAddr i32:$offset, i32:$handle))]>;
+def GETtlsMOD32AIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$handle),
+                          "GETtlsMOD32AIX",
+                          [(set i32:$rD,
+                            (PPCgetTlsMod i32:$handle))]>;
+}
 
 // For local-exec accesses on 32-bit AIX, a call to .__get_tpointer is
 // generated to retrieve the thread pointer. GETtlsTpointer32AIX clobbers both
@@ -3289,6 +3296,12 @@ def TLSGDAIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$offset, gprc:$handl
                           "#TLSGDAIX",
                           [(set i32:$rD,
                             (PPCTlsgdAIX i32:$offset, i32:$handle))]>;
+// This pseudo is expanded to one copy to put the module handle in R3, then call
+// GETtlsMOD32AIX, and then add variable offset to the output from the call.
+def TLSLDAIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$handle),
+                          "#TLSLDAIX",
+                          [(set i32:$rD,
+                            (PPCTlsldAIX i32:$handle))]>;
 // LR is a true define, while the rest of the Defs are clobbers.  R3 is
 // explicitly defined when this op is created, so not mentioned here.
 let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 9518d5347065cc..32a75a32b9d87f 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -51,6 +51,11 @@ namespace {
       bool Is64Bit = MBB.getParent()->getSubtarget<PPCSubtarget>().isPPC64();
       bool IsAIX = MBB.getParent()->getSubtarget<PPCSubtarget>().isAIXABI();
       bool IsPCREL = false;
+      MachineFunction *MF = MBB.getParent();
+      MachineRegisterInfo &RegInfo = MF->getRegInfo();
+      const TargetRegisterClass *GPRNoZero =
+          Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
+                  : &PPC::GPRC_and_GPRC_NOR0RegClass;
 
       for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
            I != IE;) {
@@ -64,6 +69,8 @@ namespace {
             MI.getOpcode() != PPC::ADDItlsldLADDR &&
             MI.getOpcode() != PPC::ADDItlsgdLADDR32 &&
             MI.getOpcode() != PPC::ADDItlsldLADDR32 &&
+            MI.getOpcode() != PPC::TLSLDAIX &&
+            MI.getOpcode() != PPC::TLSLDAIX8 &&
             MI.getOpcode() != PPC::TLSGDAIX &&
             MI.getOpcode() != PPC::TLSGDAIX8 && !IsTLSTPRelMI && !IsPCREL) {
           // Although we create ADJCALLSTACKDOWN and ADJCALLSTACKUP
@@ -109,6 +116,12 @@ namespace {
           Opc1 = PPC::ADDItlsldL32;
           Opc2 = PPC::GETtlsldADDR32;
           break;
+        case PPC::TLSLDAIX:
+          Opc2 = PPC::GETtlsMOD32AIX;
+          break;
+        case PPC::TLSLDAIX8:
+          Opc2 = PPC::GETtlsMOD64AIX;
+          break;
         case PPC::TLSGDAIX8:
           // TLSGDAIX8 is expanded to two copies and GET_TLS_ADDR, so we only
           // set Opc2 here.
@@ -145,19 +158,55 @@ namespace {
                                                               .addImm(0);
 
         if (IsAIX) {
-          // The variable offset and region handle are copied in r4 and r3. The
-          // copies are followed by GETtlsADDR32AIX/GETtlsADDR64AIX.
-          if (!IsTLSTPRelMI) {
-            BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR4)
-                .addReg(MI.getOperand(1).getReg());
+          if (MI.getOpcode() == PPC::TLSLDAIX8 ||
+              MI.getOpcode() == PPC::TLSLDAIX) {
+            // For Local-Dynamic
+            auto &Subtarget = MBB.getParent()->getSubtarget<PPCSubtarget>();
+            bool IsLargeModel =
+                Subtarget.getTargetMachine().getCodeModel() == CodeModel::Large;
+            Register ModuleHandleHReg;
+            unsigned LDTocOp =
+                Is64Bit ? (IsLargeModel ? PPC::LDtocL : PPC::LDtoc)
+                        : (IsLargeModel ? PPC::LWZtocL : PPC::LWZtoc);
+            if (IsLargeModel) {
+              ModuleHandleHReg = RegInfo.createVirtualRegister(GPRNoZero);
+              BuildMI(MBB, I, DL,
+                      TII->get(Is64Bit ? PPC::ADDIStocHA8 : PPC::ADDIStocHA),
+                      ModuleHandleHReg)
+                  .addReg(Subtarget.getTOCPointerRegister())
+                  .addExternalSymbol("_$TLSML[TC]", PPCII::MO_TLSLD_FLAG);
+            }
+            Register MHReg = RegInfo.createVirtualRegister(GPRNoZero);
+            BuildMI(MBB, I, DL, TII->get(LDTocOp), MHReg)
+                .addExternalSymbol("_$TLSML[TC]", PPCII::MO_TLSLD_FLAG)
+                .addReg(IsLargeModel
+                            ? ModuleHandleHReg
+                            : Register(Subtarget.getTOCPointerRegister()));
             BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR3)
-                .addReg(MI.getOperand(2).getReg());
-            BuildMI(MBB, I, DL, TII->get(Opc2), GPR3).addReg(GPR3).addReg(GPR4);
-          } else
-            // The opcode of GETtlsTpointer32AIX does not change, because later
-            // this instruction will be expanded into a call to .__get_tpointer,
-            // which will return the thread pointer into r3.
-            BuildMI(MBB, I, DL, TII->get(Opc2), GPR3);
+                .addReg(MHReg);
+            // The call to __tls_get_mod.
+            BuildMI(MBB, I, DL, TII->get(Opc2), GPR3).addReg(GPR3);
+            BuildMI(MBB, I, DL, TII->get(Is64Bit ? PPC::ADD8 : PPC::ADD4), GPR3)
+                .addReg(GPR3)
+                .addReg(MI.getOperand(1).getReg());
+          } else {
+            // For Global-Dynamic
+            // The variable offset and region handle are copied in r4 and r3.
+            // The copies are followed by GETtlsADDR32AIX/GETtlsADDR64AIX.
+            if (!IsTLSTPRelMI) {
+              BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR4)
+                  .addReg(MI.getOperand(1).getReg());
+              BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR3)
+                  .addReg(MI.getOperand(2).getReg());
+              BuildMI(MBB, I, DL, TII->get(Opc2), GPR3)
+                  .addReg(GPR3)
+                  .addReg(GPR4);
+            } else
+              // The opcode of GETtlsTpointer32AIX does not change, because
+              // later this instruction will be expanded into a call to
+              // .__get_tpointer, which will return the thread pointer into r3.
+              BuildMI(MBB, I, DL, TII->get(Opc2), GPR3);
+          }
         } else {
           MachineInstr *Addi;
           if (IsPCREL) {
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll
index c0ffb8154c6917..13bef83cf50fd7 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll
@@ -156,10 +156,11 @@ define void @storesTIInit(double %Val) #0 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    lwz 3, L..C4(2) # target-flags(ppc-tlsgdm) @TIInit
-; SMALL32-NEXT:    lwz 4, L..C5(2) # target-flags(ppc-tlsgd) @TIInit
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    bla .__tls_get_addr[PR]
+; SMALL32-NEXT:    lwz 6, L..C4(2) # target-flags(ppc-tlsld) @TIInit
+; SMALL32-NEXT:    lwz 3, L..C5(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
+; SMALL32-NEXT:    bla .__tls_get_mod[PR]
+; SMALL32-NEXT:    add 3, 3, 6
 ; SMALL32-NEXT:    stfd 1, 0(3)
 ; SMALL32-NEXT:    addi 1, 1, 32
 ; SMALL32-NEXT:    lwz 0, 8(1)
@@ -172,10 +173,11 @@ define void @storesTIInit(double %Val) #0 {
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    addis 4, L..C5 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
-; LARGE32-NEXT:    lwz 4, L..C5 at l(4)
-; LARGE32-NEXT:    bla .__tls_get_addr[PR]
+; LARGE32-NEXT:    lwz 6, L..C4 at l(3)
+; LARGE32-NEXT:    addis 3, L..C5 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C5 at l(3)
+; LARGE32-NEXT:    bla .__tls_get_mod[PR]
+; LARGE32-NEXT:    add 3, 3, 6
 ; LARGE32-NEXT:    stfd 1, 0(3)
 ; LARGE32-NEXT:    addi 1, 1, 32
 ; LARGE32-NEXT:    lwz 0, 8(1)
@@ -186,10 +188,11 @@ define void @storesTIInit(double %Val) #0 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    ld 3, L..C4(2) # target-flags(ppc-tlsgdm) @TIInit
-; SMALL64-NEXT:    ld 4, L..C5(2) # target-flags(ppc-tlsgd) @TIInit
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    bla .__tls_get_addr[PR]
+; SMALL64-NEXT:    ld 6, L..C4(2) # target-flags(ppc-tlsld) @TIInit
+; SMALL64-NEXT:    ld 3, L..C5(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
+; SMALL64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL64-NEXT:    add 3, 3, 6
 ; SMALL64-NEXT:    stfd 1, 0(3)
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -201,11 +204,12 @@ define void @storesTIInit(double %Val) #0 {
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    addis 3, L..C4 at u(2)
-; LARGE64-NEXT:    addis 4, L..C5 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 3, L..C4 at l(3)
-; LARGE64-NEXT:    ld 4, L..C5 at l(4)
-; LARGE64-NEXT:    bla .__tls_get_addr[PR]
+; LARGE64-NEXT:    ld 6, L..C4 at l(3)
+; LARGE64-NEXT:    addis 3, L..C5 at u(2)
+; LARGE64-NEXT:    ld 3, L..C5 at l(3)
+; LARGE64-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-NEXT:    add 3, 3, 6
 ; LARGE64-NEXT:    stfd 1, 0(3)
 ; LARGE64-NEXT:    addi 1, 1, 48
 ; LARGE64-NEXT:    ld 0, 16(1)
@@ -452,11 +456,12 @@ define double @loadsTIInit() #1 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    lwz 3, L..C4(2) # target-flags(ppc-tlsgdm) @TIInit
-; SMALL32-NEXT:    lwz 4, L..C5(2) # target-flags(ppc-tlsgd) @TIInit
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    bla .__tls_get_addr[PR]
+; SMALL32-NEXT:    lwz 6, L..C4(2) # target-flags(ppc-tlsld) @TIInit
+; SMALL32-NEXT:    lwz 3, L..C5(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
+; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C8(2) # @GInit
+; SMALL32-NEXT:    add 3, 3, 6
 ; SMALL32-NEXT:    lfd 0, 0(3)
 ; SMALL32-NEXT:    lfd 1, 0(4)
 ; SMALL32-NEXT:    fadd 1, 0, 1
@@ -471,10 +476,11 @@ define double @loadsTIInit() #1 {
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    addis 4, L..C5 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
-; LARGE32-NEXT:    lwz 4, L..C5 at l(4)
-; LARGE32-NEXT:    bla .__tls_get_addr[PR]
+; LARGE32-NEXT:    lwz 6, L..C4 at l(3)
+; LARGE32-NEXT:    addis 3, L..C5 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C5 at l(3)
+; LARGE32-NEXT:    bla .__tls_get_mod[PR]
+; LARGE32-NEXT:    add 3, 3, 6
 ; LARGE32-NEXT:    lfd 0, 0(3)
 ; LARGE32-NEXT:    addis 3, L..C8 at u(2)
 ; LARGE32-NEXT:    lwz 3, L..C8 at l(3)
@@ -489,11 +495,12 @@ define double @loadsTIInit() #1 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    ld 3, L..C4(2) # target-flags(ppc-tlsgdm) @TIInit
-; SMALL64-NEXT:    ld 4, L..C5(2) # target-flags(ppc-tlsgd) @TIInit
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    bla .__tls_get_addr[PR]
+; SMALL64-NEXT:    ld 6, L..C4(2) # target-flags(ppc-tlsld) @TIInit
+; SMALL64-NEXT:    ld 3, L..C5(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
+; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C8(2) # @GInit
+; SMALL64-NEXT:    add 3, 3, 6
 ; SMALL64-NEXT:    lfd 0, 0(3)
 ; SMALL64-NEXT:    lfd 1, 0(4)
 ; SMALL64-NEXT:    fadd 1, 0, 1
@@ -507,11 +514,12 @@ define double @loadsTIInit() #1 {
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    addis 3, L..C4 at u(2)
-; LARGE64-NEXT:    addis 4, L..C5 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 3, L..C4 at l(3)
-; LARGE64-NEXT:    ld 4, L..C5 at l(4)
-; LARGE64-NEXT:    bla .__tls_get_addr[PR]
+; LARGE64-NEXT:    ld 6, L..C4 at l(3)
+; LARGE64-NEXT:    addis 3, L..C5 at u(2)
+; LARGE64-NEXT:    ld 3, L..C5 at l(3)
+; LARGE64-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-NEXT:    add 3, 3, 6
 ; LARGE64-NEXT:    addis 4, L..C8 at u(2)
 ; LARGE64-NEXT:    lfd 0, 0(3)
 ; LARGE64-NEXT:    ld 3, L..C8 at l(4)
@@ -610,12 +618,16 @@ entry:
   ret double %add
 }
 
-; External symbol reference checks for .__tls_get_addr
+; External symbol reference checks for .__tls_get_addr/.__tls_get_mod
 
 ; SMALL32: .extern .__tls_get_addr[PR]
+; SMALL32: .extern .__tls_get_mod[PR]
 ; SMALL64: .extern .__tls_get_addr[PR]
+; SMALL64: .extern .__tls_get_mod[PR]
 ; LARGE32: .extern .__tls_get_addr[PR]
+; LARGE32: .extern .__tls_get_mod[PR]
 ; LARGE64: .extern .__tls_get_addr[PR]
+; LARGE64: .extern .__tls_get_mod[PR]
 
 ; TOC entry checks
 
@@ -629,9 +641,10 @@ entry:
 ; SMALL32-LABEL:  L..C3:
 ; SMALL32-NEXT:   .tc TGInit[TC],TGInit[TL]@gd
 ; SMALL32-LABEL:  L..C4:
-; SMALL32-NEXT:   .tc .TIInit[TC],TIInit[TL]@m
+; SMALL32-NEXT:   .tc TIInit[TC],TIInit[TL]@ld
 ; SMALL32-LABEL:  L..C5:
-; SMALL32-NEXT:   .tc TIInit[TC],TIInit[TL]@gd
+; SMALL32-NEXT:   .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; SMALL32-NEXT:   .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
 ; SMALL32-LABEL:  L..C6:
 ; SMALL32-NEXT:   .tc .TWInit[TC],TWInit[TL]@m
 ; SMALL32-LABEL:  L..C7:
@@ -649,9 +662,10 @@ entry:
 ; LARGE32-LABEL:  L..C3:
 ; LARGE32-NEXT:   .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE32-LABEL:  L..C4:
-; LARGE32-NEXT:   .tc .TIInit[TE],TIInit[TL]@m
+; LARGE32-NEXT:   .tc TIInit[TE],TIInit[TL]@ld
 ; LARGE32-LABEL:  L..C5:
-; LARGE32-NEXT:   .tc TIInit[TE],TIInit[TL]@gd
+; LARGE32-NEXT:   .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; LARGE32-NEXT:   .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
 ; LARGE32-LABEL:  L..C6:
 ; LARGE32-NEXT:   .tc .TWInit[TE],TWInit[TL]@m
 ; LARGE32-LABEL:  L..C7:
@@ -669,9 +683,10 @@ entry:
 ; SMALL64-LABEL:  L..C3:
 ; SMALL64-NEXT:  .tc TGInit[TC],TGInit[TL]@gd
 ; SMALL64-LABEL:  L..C4:
-; SMALL64-NEXT:  .tc .TIInit[TC],TIInit[TL]@m
+; SMALL64-NEXT:  .tc TIInit[TC],TIInit[TL]@ld
 ; SMALL64-LABEL:  L..C5:
-; SMALL64-NEXT:  .tc TIInit[TC],TIInit[TL]@gd
+; SMALL64-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; SMALL64-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
 ; SMALL64-LABEL:  L..C6:
 ; SMALL64-NEXT:  .tc .TWInit[TC],TWInit[TL]@m
 ; SMALL64-LABEL:  L..C7:
@@ -689,9 +704,10 @@ entry:
 ; LARGE64-LABEL:  L..C3:
 ; LARGE64-NEXT:  .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE64-LABEL:  L..C4:
-; LARGE64-NEXT:  .tc .TIInit[TE],TIInit[TL]@m
+; LARGE64-NEXT:  .tc TIInit[TE],TIInit[TL]@ld
 ; LARGE64-LABEL:  L..C5:
-; LARGE64-NEXT:  .tc TIInit[TE],TIInit[TL]@gd
+; LARGE64-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; LARGE64-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
 ; LARGE64-LABEL:  L..C6:
 ; LARGE64-NEXT:  .tc .TWInit[TE],TWInit[TL]@m
 ; LARGE64-LABEL:  L..C7:
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll
index 887c4521a4c90a..dc75db43d38c70 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll
@@ -163,11 +163,12 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    mr 6, 3
-; SMALL32-NEXT:    lwz 3, L..C4(2)
-; SMALL32-NEXT:    lwz 4, L..C5(2)
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    bla .__tls_get_addr[PR]
+; SMALL32-NEXT:    mr 6, 3
+; SMALL32-NEXT:    lwz 7, L..C4(2)
+; SMALL32-NEXT:    lwz 3, L..C5(2)
+; SMALL32-NEXT:    bla .__tls_get_mod[PR]
+; SMALL32-NEXT:    add 3, 3, 7
 ; SMALL32-NEXT:    stw 6, 0(3)
 ; SMALL32-NEXT:    addi 1, 1, 32
 ; SMALL32-NEXT:    lwz 0, 8(1)
@@ -181,10 +182,11 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    mr 6, 3
 ; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    addis 4, L..C5 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
-; LARGE32-NEXT:    lwz 4, L..C5 at l(4)
-; LARGE32-NEXT:    bla .__tls_get_addr[PR]
+; LARGE32-NEXT:    lwz 7, L..C4 at l(3)
+; LARGE32-NEXT:    addis 3, L..C5 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C5 at l(3)
+; LARGE32-NEXT:    bla .__tls_get_mod[PR]
+; LARGE32-NEXT:    add 3, 3, 7
 ; LARGE32-NEXT:    stw 6, 0(3)
 ; LARGE32-NEXT:    addi 1, 1, 32
 ; LARGE32-NEXT:    lwz 0, 8(1)
@@ -195,11 +197,12 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    mr 6, 3
-; SMALL64-NEXT:    ld 3, L..C4(2)
-; SMALL64-NEXT:    ld 4, L..C5(2)
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    bla .__tls_get_addr[PR]
+; SMALL64-NEXT:    mr 6, 3
+; SMALL64-NEXT:    ld 7, L..C4(2)
+; SMALL64-NEXT:    ld 3, L..C5(2)
+; SMALL64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL64-NEXT:    add 3, 3, 7
 ; SMALL64-NEXT:    stw 6, 0(3)
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -212,11 +215,12 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    mr 6, 3
 ; LARGE64-NEXT:    addis 3, L..C4 at u(2)
-; LARGE64-NEXT:    addis 4, L..C5 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 3, L..C4 at l(3)
-; LARGE64-NEXT:    ld 4, L..C5 at l(4)
-; LARGE64-NEXT:    bla .__tls_get_addr[PR]
+; LARGE64-NEXT:    ld 7, L..C4 at l(3)
+; LARGE64-NEXT:    addis 3, L..C5 at u(2)
+; LARGE64-NEXT:    ld 3, L..C5 at l(3)
+; LARGE64-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-NEXT:    add 3, 3, 7
 ; LARGE64-NEXT:    stw 6, 0(3)
 ; LARGE64-NEXT:    addi 1, 1, 48
 ; LARGE64-NEXT:    ld 0, 16(1)
@@ -467,11 +471,12 @@ define i32 @loadsTIUninit() #1 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    lwz 3, L..C4(2)
-; SMALL32-NEXT:    lwz 4, L..C5(2)
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    bla .__tls_get_addr[PR]
+; SMALL32-NEXT:    lwz 6, L..C4(2)
+; SMALL32-NEXT:    lwz 3, L..C5(2)
+; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C8(2)
+; SMALL32-NEXT:    add 3, 3, 6
 ; SMALL32-NEXT:    lwz 3, 0(3)
 ; SMALL32-NEXT:    lwz 4, 0(4)
 ; SMALL32-NEXT:    add 3, 4, 3
@@ -486,10 +491,11 @@ define i32 @loadsTIUninit() #1 {
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    addis 4, L..C5 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
-; LARGE32-NEXT:    lwz 4, L..C5 at l(4)
-; LARGE32-NEXT:    bla .__tls_get_addr[PR]
+; LARGE32-NEXT:    lwz 6, L..C4 at l(3)
+; LARGE32-NEXT:    addis 3, L..C5 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C5 at l(3)
+; LARGE32-NEXT:    bla .__tls_get_mod[PR]
+; LARGE32-NEXT:    add 3, 3, 6
 ; LARGE32-NEXT:    lwz 3, 0(3)
 ; LARGE32-NEXT:    addis 4, L..C8 at u(2)
 ; LARGE32-NEXT:    lwz 4, L..C8 at l(4)
@@ -504,11 +510,12 @@ define i32 @loadsTIUninit() #1 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    ld 3, L..C4(2)
-; SMALL64-NEXT:    ld 4, L..C5(2)
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    bla .__tls_get_addr[PR]
+; SMALL64-NEXT:    ld 6, L..C4(2)
+; SMALL64-NEXT:    ld 3, L..C5(2)
+; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C8(2)
+; SMALL64-NEXT:    add 3, 3, 6
 ; SMALL64-NEXT:    lwz 3, 0(3)
 ; SMALL64-NEXT:    lwz 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
@@ -522,14 +529,15 @@ define i32 @loadsTIUninit() #1 {
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    addis 3, L..C4 at u(2)
-; LARGE64-NEXT:    addis 4, L..C5 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 3, L..C4 at l(3)
-; LARGE64-NEXT:    ld 4, L..C5 at l(4)
-; LARGE64-NEXT:    bla .__tls_get_addr[PR]
+; LARGE64-NEXT:    ld 6, L..C4 at l(3)
+; LARGE64-NEXT:    addis 3, L..C5 at u(2)
+; LARGE64-NEXT:    ld 3, L..C5 at l(3)
+; LARGE64-NEXT:    bla .__tls_get_mod[PR]
 ; LARGE64-NEXT:    addis 4, L..C8 at u(2)
-; LARGE64-NEXT:    lwz 3, 0(3)
+; LARGE64-NEXT:    add 3, 3, 6
 ; LARGE64-NEXT:    ld 4, L..C8 at l(4)
+; LARGE64-NEXT:    lwz 3, 0(3)
 ; LARGE64-NEXT:    lwz 4, 0(4)
 ; LARGE64-NEXT:    add 3, 4, 3
 ; LARGE64-NEXT:    addi 1, 1, 48
@@ -625,12 +633,16 @@ entry:
   ret i32 %add
 }
 
-; External symbol reference checks for .__tls_get_addr
+; External symbol reference checks for .__tls_get_addr/.__tls_get_mod
 
 ; SMALL32: .extern .__tls_get_addr[PR]
+; SMALL32: .extern .__tls_get_mod[PR]
 ; SMALL64: .extern .__tls_get_addr[PR]
+; SMALL64: .extern .__tls_get_mod[PR]
 ; LARGE32: .extern .__tls_get_addr[PR]
+; LARGE32: .extern .__tls_get_mod[PR]
 ; LARGE64: .extern .__tls_get_addr[PR]
+; LARGE64: .extern .__tls_get_mod[PR]
 
 ; TOC entry checks
 
@@ -644,9 +656,10 @@ entry:
 ; SMALL32-LABEL: L..C3:
 ; SMALL32-NEXT:	 .tc TGInit[TC],TGInit[TL]@gd
 ; SMALL32-LABEL: L..C4:
-; SMALL32-NEXT:	 .tc .TIUninit[TC],TIUninit[UL]@m
+; SMALL32-NEXT:	 .tc TIUninit[TC],TIUninit[UL]@ld
 ; SMALL32-LABEL: L..C5:
-; SMALL32-NEXT:	 .tc TIUninit[TC],TIUninit[UL]@gd
+; SMALL32-NEXT:	 .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; SMALL32-NEXT:	 .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
 ; SMALL32-LABEL: L..C6:
 ; SMALL32-NEXT:	 .tc .TWUninit[TC],TWUninit[TL]@m
 ; SMALL32-LABEL: L..C7:
@@ -664,9 +677,10 @@ entry:
 ; LARGE32-LABEL: L..C3:
 ; LARGE32-NEXT:  .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE32-LABEL: L..C4:
-; LARGE32-NEXT:  .tc .TIUninit[TE],TIUninit[UL]@m
+; LARGE32-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
 ; LARGE32-LABEL: L..C5:
-; LARGE32-NEXT:  .tc TIUninit[TE],TIUninit[UL]@gd
+; LARGE32-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; LARGE32-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
 ; LARGE32-LABEL: L..C6:
 ; LARGE32-NEXT:  .tc .TWUninit[TE],TWUninit[TL]@m
 ; LARGE32-LABEL: L..C7:
@@ -684,9 +698,10 @@ entry:
 ; SMALL64-LABEL:  L..C3:
 ; SMALL64-NEXT:   .tc TGInit[TC],TGInit[TL]@gd
 ; SMALL64-LABEL:  L..C4:
-; SMALL64-NEXT:   .tc .TIUninit[TC],TIUninit[UL]@m
+; SMALL64-NEXT:   .tc TIUninit[TC],TIUninit[UL]@ld
 ; SMALL64-LABEL:  L..C5:
-; SMALL64-NEXT:   .tc TIUninit[TC],TIUninit[UL]@gd
+; SMALL64-NEXT:   .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; SMALL64-NEXT:   .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
 ; SMALL64-LABEL:  L..C6:
 ; SMALL64-NEXT:   .tc .TWUninit[TC],TWUninit[TL]@m
 ; SMALL64-LABEL:  L..C7:
@@ -704,9 +719,10 @@ entry:
 ; LARGE64-LABEL:  L..C3:
 ; LARGE64-NEXT:  .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE64-LABEL:  L..C4:
-; LARGE64-NEXT:  .tc .TIUninit[TE],TIUninit[UL]@m
+; LARGE64-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
 ; LARGE64-LABEL:  L..C5:
-; LARGE64-NEXT:  .tc TIUninit[TE],TIUninit[UL]@gd
+; LARGE64-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; LARGE64-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
 ; LARGE64-LABEL:  L..C6:
 ; LARGE64-NEXT:  .tc .TWUninit[TE],TWUninit[TL]@m
 ; LARGE64-LABEL:  L..C7:
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll
index 47813b59ba804c..d19b5ad9b5e50a 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll
@@ -97,12 +97,13 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    mr 6, 4
-; SMALL32-NEXT:    mr 7, 3
-; SMALL32-NEXT:    lwz 3, L..C2(2)
-; SMALL32-NEXT:    lwz 4, L..C3(2)
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    bla .__tls_get_addr[PR]
+; SMALL32-NEXT:    mr 7, 3
+; SMALL32-NEXT:    lwz 8, L..C2(2)
+; SMALL32-NEXT:    mr 6, 4
+; SMALL32-NEXT:    lwz 3, L..C3(2)
+; SMALL32-NEXT:    bla .__tls_get_mod[PR]
+; SMALL32-NEXT:    add 3, 3, 8
 ; SMALL32-NEXT:    stw 6, 4(3)
 ; SMALL32-NEXT:    stw 7, 0(3)
 ; SMALL32-NEXT:    addi 1, 1, 32
@@ -115,13 +116,14 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; LARGE32-NEXT:    mflr 0
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
-; LARGE32-NEXT:    mr 6, 4
 ; LARGE32-NEXT:    mr 7, 3
+; LARGE32-NEXT:    mr 6, 4
 ; LARGE32-NEXT:    addis 3, L..C2 at u(2)
-; LARGE32-NEXT:    addis 4, L..C3 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C2 at l(3)
-; LARGE32-NEXT:    lwz 4, L..C3 at l(4)
-; LARGE32-NEXT:    bla .__tls_get_addr[PR]
+; LARGE32-NEXT:    lwz 8, L..C2 at l(3)
+; LARGE32-NEXT:    addis 3, L..C3 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C3 at l(3)
+; LARGE32-NEXT:    bla .__tls_get_mod[PR]
+; LARGE32-NEXT:    add 3, 3, 8
 ; LARGE32-NEXT:    stw 6, 4(3)
 ; LARGE32-NEXT:    stw 7, 0(3)
 ; LARGE32-NEXT:    addi 1, 1, 32
@@ -133,11 +135,12 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    mr 6, 3
-; SMALL64-NEXT:    ld 3, L..C2(2)
-; SMALL64-NEXT:    ld 4, L..C3(2)
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    bla .__tls_get_addr[PR]
+; SMALL64-NEXT:    mr 6, 3
+; SMALL64-NEXT:    ld 7, L..C2(2)
+; SMALL64-NEXT:    ld 3, L..C3(2)
+; SMALL64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL64-NEXT:    add 3, 3, 7
 ; SMALL64-NEXT:    std 6, 0(3)
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -150,11 +153,12 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    mr 6, 3
 ; LARGE64-NEXT:    addis 3, L..C2 at u(2)
-; LARGE64-NEXT:    addis 4, L..C3 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 3, L..C2 at l(3)
-; LARGE64-NEXT:    ld 4, L..C3 at l(4)
-; LARGE64-NEXT:    bla .__tls_get_addr[PR]
+; LARGE64-NEXT:    ld 7, L..C2 at l(3)
+; LARGE64-NEXT:    addis 3, L..C3 at u(2)
+; LARGE64-NEXT:    ld 3, L..C3 at l(3)
+; LARGE64-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-NEXT:    add 3, 3, 7
 ; LARGE64-NEXT:    std 6, 0(3)
 ; LARGE64-NEXT:    addi 1, 1, 48
 ; LARGE64-NEXT:    ld 0, 16(1)
@@ -171,12 +175,13 @@ define void @storesTIInit(i64 %Val) #0 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    mr 6, 4
-; SMALL32-NEXT:    mr 7, 3
-; SMALL32-NEXT:    lwz 3, L..C4(2)
-; SMALL32-NEXT:    lwz 4, L..C5(2)
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    bla .__tls_get_addr[PR]
+; SMALL32-NEXT:    mr 7, 3
+; SMALL32-NEXT:    lwz 8, L..C4(2)
+; SMALL32-NEXT:    mr 6, 4
+; SMALL32-NEXT:    lwz 3, L..C3(2)
+; SMALL32-NEXT:    bla .__tls_get_mod[PR]
+; SMALL32-NEXT:    add 3, 3, 8
 ; SMALL32-NEXT:    stw 6, 4(3)
 ; SMALL32-NEXT:    stw 7, 0(3)
 ; SMALL32-NEXT:    addi 1, 1, 32
@@ -189,13 +194,14 @@ define void @storesTIInit(i64 %Val) #0 {
 ; LARGE32-NEXT:    mflr 0
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
-; LARGE32-NEXT:    mr 6, 4
 ; LARGE32-NEXT:    mr 7, 3
+; LARGE32-NEXT:    mr 6, 4
 ; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    addis 4, L..C5 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
-; LARGE32-NEXT:    lwz 4, L..C5 at l(4)
-; LARGE32-NEXT:    bla .__tls_get_addr[PR]
+; LARGE32-NEXT:    lwz 8, L..C4 at l(3)
+; LARGE32-NEXT:    addis 3, L..C3 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C3 at l(3)
+; LARGE32-NEXT:    bla .__tls_get_mod[PR]
+; LARGE32-NEXT:    add 3, 3, 8
 ; LARGE32-NEXT:    stw 6, 4(3)
 ; LARGE32-NEXT:    stw 7, 0(3)
 ; LARGE32-NEXT:    addi 1, 1, 32
@@ -207,11 +213,12 @@ define void @storesTIInit(i64 %Val) #0 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    mr 6, 3
-; SMALL64-NEXT:    ld 3, L..C4(2)
-; SMALL64-NEXT:    ld 4, L..C5(2)
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    bla .__tls_get_addr[PR]
+; SMALL64-NEXT:    mr 6, 3
+; SMALL64-NEXT:    ld 7, L..C4(2)
+; SMALL64-NEXT:    ld 3, L..C3(2)
+; SMALL64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL64-NEXT:    add 3, 3, 7
 ; SMALL64-NEXT:    std 6, 0(3)
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -224,11 +231,12 @@ define void @storesTIInit(i64 %Val) #0 {
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    mr 6, 3
 ; LARGE64-NEXT:    addis 3, L..C4 at u(2)
-; LARGE64-NEXT:    addis 4, L..C5 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 3, L..C4 at l(3)
-; LARGE64-NEXT:    ld 4, L..C5 at l(4)
-; LARGE64-NEXT:    bla .__tls_get_addr[PR]
+; LARGE64-NEXT:    ld 7, L..C4 at l(3)
+; LARGE64-NEXT:    addis 3, L..C3 at u(2)
+; LARGE64-NEXT:    ld 3, L..C3 at l(3)
+; LARGE64-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-NEXT:    add 3, 3, 7
 ; LARGE64-NEXT:    std 6, 0(3)
 ; LARGE64-NEXT:    addi 1, 1, 48
 ; LARGE64-NEXT:    ld 0, 16(1)
@@ -247,8 +255,8 @@ define void @storesTWInit(i64 %Val) #0 {
 ; SMALL32-NEXT:    stwu 1, -32(1)
 ; SMALL32-NEXT:    mr 6, 4
 ; SMALL32-NEXT:    mr 7, 3
-; SMALL32-NEXT:    lwz 3, L..C6(2)
-; SMALL32-NEXT:    lwz 4, L..C7(2)
+; SMALL32-NEXT:    lwz 3, L..C5(2)
+; SMALL32-NEXT:    lwz 4, L..C6(2)
 ; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL32-NEXT:    stw 6, 4(3)
@@ -265,10 +273,10 @@ define void @storesTWInit(i64 %Val) #0 {
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    mr 6, 4
 ; LARGE32-NEXT:    mr 7, 3
-; LARGE32-NEXT:    addis 3, L..C6 at u(2)
-; LARGE32-NEXT:    addis 4, L..C7 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C6 at l(3)
-; LARGE32-NEXT:    lwz 4, L..C7 at l(4)
+; LARGE32-NEXT:    addis 3, L..C5 at u(2)
+; LARGE32-NEXT:    addis 4, L..C6 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C5 at l(3)
+; LARGE32-NEXT:    lwz 4, L..C6 at l(4)
 ; LARGE32-NEXT:    bla .__tls_get_addr[PR]
 ; LARGE32-NEXT:    stw 6, 4(3)
 ; LARGE32-NEXT:    stw 7, 0(3)
@@ -282,8 +290,8 @@ define void @storesTWInit(i64 %Val) #0 {
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
 ; SMALL64-NEXT:    mr 6, 3
-; SMALL64-NEXT:    ld 3, L..C6(2)
-; SMALL64-NEXT:    ld 4, L..C7(2)
+; SMALL64-NEXT:    ld 3, L..C5(2)
+; SMALL64-NEXT:    ld 4, L..C6(2)
 ; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL64-NEXT:    std 6, 0(3)
@@ -297,11 +305,11 @@ define void @storesTWInit(i64 %Val) #0 {
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    mr 6, 3
-; LARGE64-NEXT:    addis 3, L..C6 at u(2)
-; LARGE64-NEXT:    addis 4, L..C7 at u(2)
+; LARGE64-NEXT:    addis 3, L..C5 at u(2)
+; LARGE64-NEXT:    addis 4, L..C6 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 3, L..C6 at l(3)
-; LARGE64-NEXT:    ld 4, L..C7 at l(4)
+; LARGE64-NEXT:    ld 3, L..C5 at l(3)
+; LARGE64-NEXT:    ld 4, L..C6 at l(4)
 ; LARGE64-NEXT:    bla .__tls_get_addr[PR]
 ; LARGE64-NEXT:    std 6, 0(3)
 ; LARGE64-NEXT:    addi 1, 1, 48
@@ -323,7 +331,7 @@ define i64 @loadsTGInit() #1 {
 ; SMALL32-NEXT:    lwz 4, L..C1(2)
 ; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_addr[PR]
-; SMALL32-NEXT:    lwz 4, L..C8(2)
+; SMALL32-NEXT:    lwz 4, L..C7(2)
 ; SMALL32-NEXT:    lwz 5, 4(3)
 ; SMALL32-NEXT:    lwz 6, 4(4)
 ; SMALL32-NEXT:    lwz 3, 0(3)
@@ -347,8 +355,8 @@ define i64 @loadsTGInit() #1 {
 ; LARGE32-NEXT:    bla .__tls_get_addr[PR]
 ; LARGE32-NEXT:    lwz 4, 4(3)
 ; LARGE32-NEXT:    lwz 3, 0(3)
-; LARGE32-NEXT:    addis 5, L..C8 at u(2)
-; LARGE32-NEXT:    lwz 5, L..C8 at l(5)
+; LARGE32-NEXT:    addis 5, L..C7 at u(2)
+; LARGE32-NEXT:    lwz 5, L..C7 at l(5)
 ; LARGE32-NEXT:    lwz 6, 4(5)
 ; LARGE32-NEXT:    lwz 5, 0(5)
 ; LARGE32-NEXT:    addc 4, 6, 4
@@ -366,7 +374,7 @@ define i64 @loadsTGInit() #1 {
 ; SMALL64-NEXT:    ld 4, L..C1(2)
 ; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_addr[PR]
-; SMALL64-NEXT:    ld 4, L..C8(2)
+; SMALL64-NEXT:    ld 4, L..C7(2)
 ; SMALL64-NEXT:    ld 3, 0(3)
 ; SMALL64-NEXT:    ld 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
@@ -385,9 +393,9 @@ define i64 @loadsTGInit() #1 {
 ; LARGE64-NEXT:    ld 3, L..C0 at l(3)
 ; LARGE64-NEXT:    ld 4, L..C1 at l(4)
 ; LARGE64-NEXT:    bla .__tls_get_addr[PR]
-; LARGE64-NEXT:    addis 4, L..C8 at u(2)
+; LARGE64-NEXT:    addis 4, L..C7 at u(2)
 ; LARGE64-NEXT:    ld 3, 0(3)
-; LARGE64-NEXT:    ld 4, L..C8 at l(4)
+; LARGE64-NEXT:    ld 4, L..C7 at l(4)
 ; LARGE64-NEXT:    ld 4, 0(4)
 ; LARGE64-NEXT:    add 3, 4, 3
 ; LARGE64-NEXT:    addi 1, 1, 48
@@ -407,11 +415,12 @@ define i64 @loadsTIUninit() #1 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    lwz 3, L..C2(2)
-; SMALL32-NEXT:    lwz 4, L..C3(2)
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    bla .__tls_get_addr[PR]
-; SMALL32-NEXT:    lwz 4, L..C8(2)
+; SMALL32-NEXT:    lwz 6, L..C2(2)
+; SMALL32-NEXT:    lwz 3, L..C3(2)
+; SMALL32-NEXT:    bla .__tls_get_mod[PR]
+; SMALL32-NEXT:    lwz 4, L..C7(2)
+; SMALL32-NEXT:    add 3, 3, 6
 ; SMALL32-NEXT:    lwz 5, 4(3)
 ; SMALL32-NEXT:    lwz 6, 4(4)
 ; SMALL32-NEXT:    lwz 3, 0(3)
@@ -429,14 +438,15 @@ define i64 @loadsTIUninit() #1 {
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    addis 3, L..C2 at u(2)
-; LARGE32-NEXT:    addis 4, L..C3 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C2 at l(3)
-; LARGE32-NEXT:    lwz 4, L..C3 at l(4)
-; LARGE32-NEXT:    bla .__tls_get_addr[PR]
+; LARGE32-NEXT:    lwz 6, L..C2 at l(3)
+; LARGE32-NEXT:    addis 3, L..C3 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C3 at l(3)
+; LARGE32-NEXT:    bla .__tls_get_mod[PR]
+; LARGE32-NEXT:    add 3, 3, 6
 ; LARGE32-NEXT:    lwz 4, 4(3)
 ; LARGE32-NEXT:    lwz 3, 0(3)
-; LARGE32-NEXT:    addis 5, L..C8 at u(2)
-; LARGE32-NEXT:    lwz 5, L..C8 at l(5)
+; LARGE32-NEXT:    addis 5, L..C7 at u(2)
+; LARGE32-NEXT:    lwz 5, L..C7 at l(5)
 ; LARGE32-NEXT:    lwz 6, 4(5)
 ; LARGE32-NEXT:    lwz 5, 0(5)
 ; LARGE32-NEXT:    addc 4, 6, 4
@@ -450,11 +460,12 @@ define i64 @loadsTIUninit() #1 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    ld 3, L..C2(2)
-; SMALL64-NEXT:    ld 4, L..C3(2)
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    bla .__tls_get_addr[PR]
-; SMALL64-NEXT:    ld 4, L..C8(2)
+; SMALL64-NEXT:    ld 6, L..C2(2)
+; SMALL64-NEXT:    ld 3, L..C3(2)
+; SMALL64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL64-NEXT:    ld 4, L..C7(2)
+; SMALL64-NEXT:    add 3, 3, 6
 ; SMALL64-NEXT:    ld 3, 0(3)
 ; SMALL64-NEXT:    ld 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
@@ -468,14 +479,15 @@ define i64 @loadsTIUninit() #1 {
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    addis 3, L..C2 at u(2)
-; LARGE64-NEXT:    addis 4, L..C3 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 3, L..C2 at l(3)
-; LARGE64-NEXT:    ld 4, L..C3 at l(4)
-; LARGE64-NEXT:    bla .__tls_get_addr[PR]
-; LARGE64-NEXT:    addis 4, L..C8 at u(2)
+; LARGE64-NEXT:    ld 6, L..C2 at l(3)
+; LARGE64-NEXT:    addis 3, L..C3 at u(2)
+; LARGE64-NEXT:    ld 3, L..C3 at l(3)
+; LARGE64-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-NEXT:    addis 4, L..C7 at u(2)
+; LARGE64-NEXT:    add 3, 3, 6
+; LARGE64-NEXT:    ld 4, L..C7 at l(4)
 ; LARGE64-NEXT:    ld 3, 0(3)
-; LARGE64-NEXT:    ld 4, L..C8 at l(4)
 ; LARGE64-NEXT:    ld 4, 0(4)
 ; LARGE64-NEXT:    add 3, 4, 3
 ; LARGE64-NEXT:    addi 1, 1, 48
@@ -495,11 +507,12 @@ define i64 @loadsTIInit() #1 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    lwz 3, L..C4(2)
-; SMALL32-NEXT:    lwz 4, L..C5(2)
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    bla .__tls_get_addr[PR]
-; SMALL32-NEXT:    lwz 4, L..C8(2)
+; SMALL32-NEXT:    lwz 6, L..C4(2)
+; SMALL32-NEXT:    lwz 3, L..C3(2)
+; SMALL32-NEXT:    bla .__tls_get_mod[PR]
+; SMALL32-NEXT:    lwz 4, L..C7(2)
+; SMALL32-NEXT:    add 3, 3, 6
 ; SMALL32-NEXT:    lwz 5, 4(3)
 ; SMALL32-NEXT:    lwz 6, 4(4)
 ; SMALL32-NEXT:    lwz 3, 0(3)
@@ -517,14 +530,15 @@ define i64 @loadsTIInit() #1 {
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    addis 4, L..C5 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
-; LARGE32-NEXT:    lwz 4, L..C5 at l(4)
-; LARGE32-NEXT:    bla .__tls_get_addr[PR]
+; LARGE32-NEXT:    lwz 6, L..C4 at l(3)
+; LARGE32-NEXT:    addis 3, L..C3 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C3 at l(3)
+; LARGE32-NEXT:    bla .__tls_get_mod[PR]
+; LARGE32-NEXT:    add 3, 3, 6
 ; LARGE32-NEXT:    lwz 4, 4(3)
 ; LARGE32-NEXT:    lwz 3, 0(3)
-; LARGE32-NEXT:    addis 5, L..C8 at u(2)
-; LARGE32-NEXT:    lwz 5, L..C8 at l(5)
+; LARGE32-NEXT:    addis 5, L..C7 at u(2)
+; LARGE32-NEXT:    lwz 5, L..C7 at l(5)
 ; LARGE32-NEXT:    lwz 6, 4(5)
 ; LARGE32-NEXT:    lwz 5, 0(5)
 ; LARGE32-NEXT:    addc 4, 6, 4
@@ -538,11 +552,12 @@ define i64 @loadsTIInit() #1 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    ld 3, L..C4(2)
-; SMALL64-NEXT:    ld 4, L..C5(2)
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    bla .__tls_get_addr[PR]
-; SMALL64-NEXT:    ld 4, L..C8(2)
+; SMALL64-NEXT:    ld 6, L..C4(2)
+; SMALL64-NEXT:    ld 3, L..C3(2)
+; SMALL64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL64-NEXT:    ld 4, L..C7(2)
+; SMALL64-NEXT:    add 3, 3, 6
 ; SMALL64-NEXT:    ld 3, 0(3)
 ; SMALL64-NEXT:    ld 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
@@ -556,14 +571,15 @@ define i64 @loadsTIInit() #1 {
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    addis 3, L..C4 at u(2)
-; LARGE64-NEXT:    addis 4, L..C5 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 3, L..C4 at l(3)
-; LARGE64-NEXT:    ld 4, L..C5 at l(4)
-; LARGE64-NEXT:    bla .__tls_get_addr[PR]
-; LARGE64-NEXT:    addis 4, L..C8 at u(2)
+; LARGE64-NEXT:    ld 6, L..C4 at l(3)
+; LARGE64-NEXT:    addis 3, L..C3 at u(2)
+; LARGE64-NEXT:    ld 3, L..C3 at l(3)
+; LARGE64-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-NEXT:    addis 4, L..C7 at u(2)
+; LARGE64-NEXT:    add 3, 3, 6
+; LARGE64-NEXT:    ld 4, L..C7 at l(4)
 ; LARGE64-NEXT:    ld 3, 0(3)
-; LARGE64-NEXT:    ld 4, L..C8 at l(4)
 ; LARGE64-NEXT:    ld 4, 0(4)
 ; LARGE64-NEXT:    add 3, 4, 3
 ; LARGE64-NEXT:    addi 1, 1, 48
@@ -583,11 +599,11 @@ define i64 @loadsTWInit() #1 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    lwz 3, L..C6(2)
-; SMALL32-NEXT:    lwz 4, L..C7(2)
+; SMALL32-NEXT:    lwz 3, L..C5(2)
+; SMALL32-NEXT:    lwz 4, L..C6(2)
 ; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_addr[PR]
-; SMALL32-NEXT:    lwz 4, L..C8(2)
+; SMALL32-NEXT:    lwz 4, L..C7(2)
 ; SMALL32-NEXT:    lwz 5, 4(3)
 ; SMALL32-NEXT:    lwz 6, 4(4)
 ; SMALL32-NEXT:    lwz 3, 0(3)
@@ -604,15 +620,15 @@ define i64 @loadsTWInit() #1 {
 ; LARGE32-NEXT:    mflr 0
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
-; LARGE32-NEXT:    addis 3, L..C6 at u(2)
-; LARGE32-NEXT:    addis 4, L..C7 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C6 at l(3)
-; LARGE32-NEXT:    lwz 4, L..C7 at l(4)
+; LARGE32-NEXT:    addis 3, L..C5 at u(2)
+; LARGE32-NEXT:    addis 4, L..C6 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C5 at l(3)
+; LARGE32-NEXT:    lwz 4, L..C6 at l(4)
 ; LARGE32-NEXT:    bla .__tls_get_addr[PR]
 ; LARGE32-NEXT:    lwz 4, 4(3)
 ; LARGE32-NEXT:    lwz 3, 0(3)
-; LARGE32-NEXT:    addis 5, L..C8 at u(2)
-; LARGE32-NEXT:    lwz 5, L..C8 at l(5)
+; LARGE32-NEXT:    addis 5, L..C7 at u(2)
+; LARGE32-NEXT:    lwz 5, L..C7 at l(5)
 ; LARGE32-NEXT:    lwz 6, 4(5)
 ; LARGE32-NEXT:    lwz 5, 0(5)
 ; LARGE32-NEXT:    addc 4, 6, 4
@@ -626,11 +642,11 @@ define i64 @loadsTWInit() #1 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    ld 3, L..C6(2)
-; SMALL64-NEXT:    ld 4, L..C7(2)
+; SMALL64-NEXT:    ld 3, L..C5(2)
+; SMALL64-NEXT:    ld 4, L..C6(2)
 ; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_addr[PR]
-; SMALL64-NEXT:    ld 4, L..C8(2)
+; SMALL64-NEXT:    ld 4, L..C7(2)
 ; SMALL64-NEXT:    ld 3, 0(3)
 ; SMALL64-NEXT:    ld 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
@@ -643,15 +659,15 @@ define i64 @loadsTWInit() #1 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
-; LARGE64-NEXT:    addis 3, L..C6 at u(2)
-; LARGE64-NEXT:    addis 4, L..C7 at u(2)
+; LARGE64-NEXT:    addis 3, L..C5 at u(2)
+; LARGE64-NEXT:    addis 4, L..C6 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 3, L..C6 at l(3)
-; LARGE64-NEXT:    ld 4, L..C7 at l(4)
+; LARGE64-NEXT:    ld 3, L..C5 at l(3)
+; LARGE64-NEXT:    ld 4, L..C6 at l(4)
 ; LARGE64-NEXT:    bla .__tls_get_addr[PR]
-; LARGE64-NEXT:    addis 4, L..C8 at u(2)
+; LARGE64-NEXT:    addis 4, L..C7 at u(2)
 ; LARGE64-NEXT:    ld 3, 0(3)
-; LARGE64-NEXT:    ld 4, L..C8 at l(4)
+; LARGE64-NEXT:    ld 4, L..C7 at l(4)
 ; LARGE64-NEXT:    ld 4, 0(4)
 ; LARGE64-NEXT:    add 3, 4, 3
 ; LARGE64-NEXT:    addi 1, 1, 48
@@ -665,12 +681,16 @@ entry:
   ret i64 %add
 }
 
-; External symbol reference checks for .__tls_get_addr
+; External symbol reference checks for .__tls_get_addr/.__tls_get_mod
 
 ; SMALL32: .extern .__tls_get_addr[PR]
+; SMALL32: .extern .__tls_get_mod[PR]
 ; SMALL64: .extern .__tls_get_addr[PR]
+; SMALL64: .extern .__tls_get_mod[PR]
 ; LARGE32: .extern .__tls_get_addr[PR]
+; LARGE32: .extern .__tls_get_mod[PR]
 ; LARGE64: .extern .__tls_get_addr[PR]
+; LARGE64: .extern .__tls_get_mod[PR]
 
 ; TOC entry checks
 
@@ -680,18 +700,17 @@ entry:
 ; SMALL32-LABEL:  L..C1:
 ; SMALL32-NEXT:  .tc TGInit[TC],TGInit[TL]@gd
 ; SMALL32-LABEL:  L..C2:
-; SMALL32-NEXT:  .tc .TIUninit[TC],TIUninit[UL]@m
+; SMALL32-NEXT:  .tc TIUninit[TC],TIUninit[UL]@ld
 ; SMALL32-LABEL:  L..C3:
-; SMALL32-NEXT:  .tc TIUninit[TC],TIUninit[UL]@gd
+; SMALL32-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; SMALL32-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
 ; SMALL32-LABEL:  L..C4:
-; SMALL32-NEXT:  .tc .TIInit[TC],TIInit[TL]@m
+; SMALL32-NEXT:  .tc TIInit[TC],TIInit[TL]@ld
 ; SMALL32-LABEL:  L..C5:
-; SMALL32-NEXT:  .tc TIInit[TC],TIInit[TL]@gd
-; SMALL32-LABEL:  L..C6:
 ; SMALL32-NEXT:  .tc .TWInit[TC],TWInit[TL]@m
-; SMALL32-LABEL:  L..C7:
+; SMALL32-LABEL:  L..C6:
 ; SMALL32-NEXT:  .tc TWInit[TC],TWInit[TL]@gd
-; SMALL32-LABEL:  L..C8:
+; SMALL32-LABEL:  L..C7:
 ; SMALL32-NEXT:  .tc GInit[TC],GInit[RW]
 
 ; LARGE32-LABEL:  .toc
@@ -700,18 +719,17 @@ entry:
 ; LARGE32-LABEL:  L..C1:
 ; LARGE32-NEXT:  .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE32-LABEL:  L..C2:
-; LARGE32-NEXT:  .tc .TIUninit[TE],TIUninit[UL]@m
+; LARGE32-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
 ; LARGE32-LABEL:  L..C3:
-; LARGE32-NEXT:  .tc TIUninit[TE],TIUninit[UL]@gd
+; LARGE32-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; LARGE32-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
 ; LARGE32-LABEL:  L..C4:
-; LARGE32-NEXT:  .tc .TIInit[TE],TIInit[TL]@m
+; LARGE32-NEXT:  .tc TIInit[TE],TIInit[TL]@ld
 ; LARGE32-LABEL:  L..C5:
-; LARGE32-NEXT:  .tc TIInit[TE],TIInit[TL]@gd
-; LARGE32-LABEL:  L..C6:
 ; LARGE32-NEXT:  .tc .TWInit[TE],TWInit[TL]@m
-; LARGE32-LABEL:  L..C7:
+; LARGE32-LABEL:  L..C6:
 ; LARGE32-NEXT:  .tc TWInit[TE],TWInit[TL]@gd
-; LARGE32-LABEL:  L..C8:
+; LARGE32-LABEL:  L..C7:
 ; LARGE32-NEXT:  .tc GInit[TE],GInit[RW]
 
 ; SMALL64-LABEL:  .toc
@@ -720,18 +738,17 @@ entry:
 ; SMALL64-LABEL:  L..C1:
 ; SMALL64-NEXT:  .tc TGInit[TC],TGInit[TL]@gd
 ; SMALL64-LABEL:  L..C2:
-; SMALL64-NEXT:  .tc .TIUninit[TC],TIUninit[UL]@m
+; SMALL64-NEXT:  .tc TIUninit[TC],TIUninit[UL]@ld
 ; SMALL64-LABEL:  L..C3:
-; SMALL64-NEXT:  .tc TIUninit[TC],TIUninit[UL]@gd
+; SMALL64-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; SMALL64-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
 ; SMALL64-LABEL:  L..C4:
-; SMALL64-NEXT:  .tc .TIInit[TC],TIInit[TL]@m
+; SMALL64-NEXT:  .tc TIInit[TC],TIInit[TL]@ld
 ; SMALL64-LABEL:  L..C5:
-; SMALL64-NEXT:  .tc TIInit[TC],TIInit[TL]@gd
-; SMALL64-LABEL:  L..C6:
 ; SMALL64-NEXT:  .tc .TWInit[TC],TWInit[TL]@m
-; SMALL64-LABEL:  L..C7:
+; SMALL64-LABEL:  L..C6:
 ; SMALL64-NEXT:  .tc TWInit[TC],TWInit[TL]@gd
-; SMALL64-LABEL:  L..C8:
+; SMALL64-LABEL:  L..C7:
 ; SMALL64-NEXT:  .tc GInit[TC],GInit[RW]
 
 ; LARGE64-LABEL:  .toc
@@ -740,18 +757,17 @@ entry:
 ; LARGE64-LABEL:  L..C1:
 ; LARGE64-NEXT:  .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE64-LABEL:  L..C2:
-; LARGE64-NEXT:  .tc .TIUninit[TE],TIUninit[UL]@m
+; LARGE64-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
 ; LARGE64-LABEL:  L..C3:
-; LARGE64-NEXT:  .tc TIUninit[TE],TIUninit[UL]@gd
+; LARGE64-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; LARGE64-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
 ; LARGE64-LABEL:  L..C4:
-; LARGE64-NEXT:  .tc .TIInit[TE],TIInit[TL]@m
+; LARGE64-NEXT:  .tc TIInit[TE],TIInit[TL]@ld
 ; LARGE64-LABEL:  L..C5:
-; LARGE64-NEXT:  .tc TIInit[TE],TIInit[TL]@gd
-; LARGE64-LABEL:  L..C6:
 ; LARGE64-NEXT:  .tc .TWInit[TE],TWInit[TL]@m
-; LARGE64-LABEL:  L..C7:
+; LARGE64-LABEL:  L..C6:
 ; LARGE64-NEXT:  .tc TWInit[TE],TWInit[TL]@gd
-; LARGE64-LABEL:  L..C8:
+; LARGE64-LABEL:  L..C7:
 ; LARGE64-NEXT:  .tc GInit[TE],GInit[RW]
 
 attributes #0 = { nofree norecurse nounwind willreturn writeonly "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-spe,-vsx" }
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll b/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll
new file mode 100644
index 00000000000000..9faa99b6eb2ba7
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll
@@ -0,0 +1,364 @@
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec -mtriple powerpc64-ibm-aix-xcoff \
+; RUN:     --code-model=small < %s | FileCheck %s --check-prefixes=SMALL64,SMALL
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec -mtriple powerpc64-ibm-aix-xcoff \
+; RUN:     --code-model=large < %s | FileCheck %s --check-prefixes=LARGE64,LARGE
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec -mtriple powerpc-ibm-aix-xcoff \
+; RUN:     --code-model=small < %s | FileCheck %s --check-prefixes=SMALL32,SMALL
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec -mtriple powerpc-ibm-aix-xcoff \
+; RUN:     --code-model=large < %s | FileCheck %s --check-prefixes=LARGE32,LARGE
+
+ at TGInit = thread_local(localdynamic) global i32 42, align 4
+ at TGUninit = thread_local(localdynamic) global i32 0, align 4
+ at TIInit = internal thread_local(localdynamic) global i32 42, align 4
+ at TIUninit = internal thread_local(localdynamic) global i32 0, align 4
+ at TWInit = weak thread_local(localdynamic) global i32 42, align 4
+ at TWUninit = weak thread_local(localdynamic) global i32 0, align 4
+
+define i32 @loadTGInit() {
+; SMALL-LABEL:  loadTGInit:
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
+; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL:        bla .__tls_get_mod[PR]
+; SMALL:        add [[TGInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; SMALL:        lwz [[TGInitValR:[0-9]+]], 0([[TGInitAddrR]])
+;
+; LARGE-LABEL:  loadTGInit:
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        add [[TGInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; LARGE:        lwz [[TGInitValR:[0-9]+]], 0([[TGInitAddrR]])
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TGInit)
+  %1 = load i32, ptr %0, align 4
+  ret i32 %1
+}
+
+define void @storeTGInit(i32 noundef signext %i) {
+; SMALL-LABEL:  storeTGInit:
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
+; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL:        bla .__tls_get_mod[PR]
+; SMALL:        add [[TGInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; SMALL:        stw [[TGInitValR:[0-9]+]], 0([[TGInitAddrR]])
+;
+; LARGE-LABEL:  storeTGInit:
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        add [[TGInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; LARGE:        stw [[TGInitValR:[0-9]+]], 0([[TGInitAddrR]])
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TGInit)
+  store i32 %i, ptr %0, align 4
+  ret void
+}
+
+define i32 @loadTGUninit() {
+; SMALL-LABEL:  loadTGUninit:
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
+; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL:        bla .__tls_get_mod[PR]
+; SMALL:        add [[TGUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; SMALL:        lwz [[TGUninitValR:[0-9]+]], 0([[TGUninitAddrR]])
+;
+; LARGE-LABEL:  loadTGUninit:
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        add [[TGUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; LARGE:        lwz [[TGUninitValR:[0-9]+]], 0([[TGUninitAddrR]])
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TGUninit)
+  %1 = load i32, ptr %0, align 4
+  ret i32 %1
+}
+
+define void @storeTGUninit(i32 noundef signext %i) {
+; SMALL-LABEL:  storeTGUninit:
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
+; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL:        bla .__tls_get_mod[PR]
+; SMALL:        add [[TGUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; SMALL:        stw [[TGUninitValR:[0-9]+]], 0([[TGUninitAddrR]])
+;
+; LARGE-LABEL:  storeTGUninit:
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        add [[TGUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; LARGE:        stw [[TGUninitValR:[0-9]+]], 0([[TGUninitAddrR]])
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TGUninit)
+  store i32 %i, ptr %0, align 4
+  ret void
+}
+
+define i32 @loadTIInit() {
+; SMALL-LABEL:  loadTIInit:
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
+; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL:        bla .__tls_get_mod[PR]
+; SMALL:        add [[TIInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; SMALL:        lwz [[TIInitValR:[0-9]+]], 0([[TIInitAddrR]])
+;
+; LARGE-LABEL:  loadTIInit:
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        add [[TIInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; LARGE:        lwz [[TIInitValR:[0-9]+]], 0([[TIInitAddrR]])
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TIInit)
+  %1 = load i32, ptr %0, align 4
+  ret i32 %1
+}
+
+define void @storeTIInit(i32 noundef signext %i) {
+; SMALL-LABEL:  storeTIInit:
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
+; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL:        bla .__tls_get_mod[PR]
+; SMALL:        add [[TIInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; SMALL:        stw [[TIInitValR:[0-9]+]], 0([[TIInitAddrR]])
+;
+; LARGE-LABEL:  storeTIInit:
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        add [[TIInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; LARGE:        stw [[TIInitValR:[0-9]+]], 0([[TIInitAddrR]])
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TIInit)
+  store i32 %i, ptr %0, align 4
+  ret void
+}
+
+define i32 @loadTIUninit() {
+; SMALL-LABEL:  loadTIUninit:
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
+; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL:        bla .__tls_get_mod[PR]
+; SMALL:        add [[TIUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; SMALL:        lwz [[TIUninitValR:[0-9]+]], 0([[TIUninitAddrR]])
+;
+; LARGE-LABEL:  loadTIUninit:
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        add [[TIUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; LARGE:        lwz [[TIUninitValR:[0-9]+]], 0([[TIUninitAddrR]])
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TIUninit)
+  %1 = load i32, ptr %0, align 4
+  ret i32 %1
+}
+
+define void @storeTIUninit(i32 noundef signext %i) {
+; SMALL-LABEL:  storeTIUninit:
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
+; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL:        bla .__tls_get_mod[PR]
+; SMALL:        add [[TIUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; SMALL:        stw [[TIUninitValR:[0-9]+]], 0([[TIUninitAddrR]])
+;
+; LARGE-LABEL:  storeTIUninit:
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        add [[TIUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; LARGE:        stw [[TIUninitValR:[0-9]+]], 0([[TIUninitAddrR]])
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TIUninit)
+  store i32 %i, ptr %0, align 4
+  ret void
+}
+
+define i32 @loadTWInit() {
+; SMALL-LABEL:  loadTWInit:
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
+; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL:        bla .__tls_get_mod[PR]
+; SMALL:        add [[TWInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; SMALL:        lwz [[TWInitValR:[0-9]+]], 0([[TWInitAddrR]])
+;
+; LARGE-LABEL:  loadTWInit:
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        add [[TWInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; LARGE:        lwz [[TWInitValR:[0-9]+]], 0([[TWInitAddrR]])
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TWInit)
+  %1 = load i32, ptr %0, align 4
+  ret i32 %1
+}
+
+define void @storeTWInit(i32 noundef signext %i) {
+; SMALL-LABEL:  storeTWInit:
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
+; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL:        bla .__tls_get_mod[PR]
+; SMALL:        add [[TWInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; SMALL:        stw [[TWInitValR:[0-9]+]], 0([[TWInitAddrR]])
+;
+; LARGE-LABEL:  storeTWInit:
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        add [[TWInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; LARGE:        stw [[TWInitValR:[0-9]+]], 0([[TWInitAddrR]])
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TWInit)
+  store i32 %i, ptr %0, align 4
+  ret void
+}
+
+define i32 @loadTWUninit() {
+; SMALL-LABEL:  loadTWUninit:
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
+; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL:        bla .__tls_get_mod[PR]
+; SMALL:        add [[TWUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; SMALL:        lwz [[TWUninitValR:[0-9]+]], 0([[TWUninitAddrR]])
+;
+; LARGE-LABEL:  loadTWUninit:
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        add [[TWUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; LARGE:        lwz [[TWUninitValR:[0-9]+]], 0([[TWUninitAddrR]])
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TWUninit)
+  %1 = load i32, ptr %0, align 4
+  ret i32 %1
+}
+
+define void @storeTWUninit(i32 noundef signext %i) {
+; SMALL-LABEL:  storeTWUninit:
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
+; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL:        bla .__tls_get_mod[PR]
+; SMALL:        add [[TWUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; SMALL:        stw [[TWUninitValR:[0-9]+]], 0([[TWUninitAddrR]])
+;
+; LARGE-LABEL:  storeTWUninit:
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        add [[TWUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; LARGE:        stw [[TWUninitValR:[0-9]+]], 0([[TWUninitAddrR]])
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TWUninit)
+  store i32 %i, ptr %0, align 4
+  ret void
+}
+
+; SMALL:          .extern .__tls_get_mod[PR]
+; LARGE:          .extern .__tls_get_mod[PR]
+
+; SMALL:        [[TGInitL]]:
+; SMALL-NEXT:   .tc TGInit[TC],TGInit[TL]@ld
+; SMALL:        [[ModuleHandleL]]:
+; SMALL-NEXT:   .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; SMALL-NEXT:   .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; SMALL:        [[TGUninitL]]:
+; SMALL-NEXT:   .tc TGUninit[TC],TGUninit[TL]@ld
+; SMALL:        [[TIInitL]]:
+; SMALL-NEXT:   .tc TIInit[TC],TIInit[TL]@ld
+; SMALL:        [[TIUninitL]]:
+; SMALL-NEXT:   .tc TIUninit[TC],TIUninit[UL]@ld
+; SMALL:        [[TWInitL]]:
+; SMALL-NEXT:   .tc TWInit[TC],TWInit[TL]@ld
+; SMALL:        [[TWUninitL]]:
+; SMALL-NEXT:   .tc TWUninit[TC],TWUninit[TL]@ld
+
+; LARGE:        [[TGInitL]]:
+; LARGE-NEXT:   .tc TGInit[TE],TGInit[TL]@ld
+; LARGE:        [[ModuleHandleL]]:
+; LARGE-NEXT:   .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; LARGE-NEXT:   .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; LARGE:        [[TGUninitL]]:
+; LARGE-NEXT:   .tc TGUninit[TE],TGUninit[TL]@ld
+; LARGE:        [[TIInitL]]:
+; LARGE-NEXT:   .tc TIInit[TE],TIInit[TL]@ld
+; LARGE:        [[TIUninitL]]:
+; LARGE-NEXT:   .tc TIUninit[TE],TIUninit[UL]@ld
+; LARGE:        [[TWInitL]]:
+; LARGE-NEXT:   .tc TWInit[TE],TWInit[TL]@ld
+; LARGE:        [[TWUninitL]]:
+; LARGE-NEXT:   .tc TWUninit[TE],TWUninit[TL]@ld
+
+declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull)
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll
index 2dae8ee96e20d1..3e9f28965bbe3d 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll
@@ -5,6 +5,7 @@
 ; RUN: llvm-objdump -D -r --symbol-description %t.o | FileCheck --check-prefix=DIS %s
 
 @GInit = global double 1.000000e+00, align 8
+; @TIInit is local-dynamic indeed
 @TIInit = internal thread_local global i64 1, align 8
 @TWInit = weak thread_local global double 1.000000e+00, align 8
 
@@ -32,7 +33,7 @@ entry:
 ; RELOC-NEXT:   Section (index: 1) .text {
 ; RELOC-NEXT:   Relocation {
 ; RELOC-NEXT:     Virtual Address: 0x16
-; RELOC-NEXT:     Symbol: .TIInit (17)
+; RELOC-NEXT:     Symbol: TIInit (19)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
@@ -44,19 +45,19 @@ entry:
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
-; RELOC-NEXT:     Type: R_TOCU (0x30)
+; RELOC-NEXT:     Type: R_TOCL (0x31)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
 ; RELOC-NEXT:     Virtual Address: 0x1E
-; RELOC-NEXT:     Symbol: .TIInit (17)
+; RELOC-NEXT:     Symbol: _$TLSML (21)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
-; RELOC-NEXT:     Type: R_TOCL (0x31)
+; RELOC-NEXT:     Type: R_TOCU (0x30)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
 ; RELOC-NEXT:     Virtual Address: 0x22
-; RELOC-NEXT:     Symbol: TIInit (19)
+; RELOC-NEXT:     Symbol: _$TLSML (21)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
@@ -64,63 +65,63 @@ entry:
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
 ; RELOC-NEXT:     Virtual Address: 0x24
-; RELOC-NEXT:     Symbol: .__tls_get_addr (1)
+; RELOC-NEXT:     Symbol: .__tls_get_mod (1)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 26
 ; RELOC-NEXT:     Type: R_RBA (0x18)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x4E
-; RELOC-NEXT:     Symbol: .TWInit (21)
+; RELOC-NEXT:     Virtual Address: 0x5E
+; RELOC-NEXT:     Symbol: .TWInit (23)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
 ; RELOC-NEXT:     Type: R_TOCU (0x30)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x52
-; RELOC-NEXT:     Symbol: TWInit (23)
+; RELOC-NEXT:     Virtual Address: 0x62
+; RELOC-NEXT:     Symbol: TWInit (25)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
 ; RELOC-NEXT:     Type: R_TOCU (0x30)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x56
-; RELOC-NEXT:     Symbol: .TWInit (21)
+; RELOC-NEXT:     Virtual Address: 0x66
+; RELOC-NEXT:     Symbol: .TWInit (23)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
 ; RELOC-NEXT:     Type: R_TOCL (0x31)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x5A
-; RELOC-NEXT:     Symbol: TWInit (23)
+; RELOC-NEXT:     Virtual Address: 0x6A
+; RELOC-NEXT:     Symbol: TWInit (25)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
 ; RELOC-NEXT:     Type: R_TOCL (0x31)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x5C
-; RELOC-NEXT:     Symbol: .__tls_get_addr (1)
+; RELOC-NEXT:     Virtual Address: 0x6C
+; RELOC-NEXT:     Symbol: .__tls_get_addr (3)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 26
 ; RELOC-NEXT:     Type: R_RBA (0x18)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x66
-; RELOC-NEXT:     Symbol: GInit (25)
+; RELOC-NEXT:     Virtual Address: 0x76
+; RELOC-NEXT:     Symbol: GInit (27)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
 ; RELOC-NEXT:     Type: R_TOCU (0x30)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x6A
-; RELOC-NEXT:     Symbol: GInit (25)
+; RELOC-NEXT:     Virtual Address: 0x7A
+; RELOC-NEXT:     Symbol: GInit (27)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
@@ -129,72 +130,72 @@ entry:
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Section (index: 2) .data {
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x90
-; RELOC-NEXT:   Symbol: .storesTIInit (5)
+; RELOC-NEXT:   Virtual Address: 0xA0
+; RELOC-NEXT:   Symbol: .storesTIInit (7)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x94
-; RELOC-NEXT:   Symbol: TOC (15)
+; RELOC-NEXT:   Virtual Address: 0xA4
+; RELOC-NEXT:   Symbol: TOC (17)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x9C
-; RELOC-NEXT:   Symbol: .loadsTWInit (7)
+; RELOC-NEXT:   Virtual Address: 0xAC
+; RELOC-NEXT:   Symbol: .loadsTWInit (9)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xA0
-; RELOC-NEXT:   Symbol: TOC (15)
+; RELOC-NEXT:   Virtual Address: 0xB0
+; RELOC-NEXT:   Symbol: TOC (17)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xA8
-; RELOC-NEXT:   Symbol: TIInit (27)
+; RELOC-NEXT:   Virtual Address: 0xB8
+; RELOC-NEXT:   Symbol: TIInit (29)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
-; RELOC-NEXT:   Type: R_TLSM (0x24)
+; RELOC-NEXT:   Type: R_TLS_LD (0x22)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xAC
-; RELOC-NEXT:   Symbol: TIInit (27)
+; RELOC-NEXT:   Virtual Address: 0xBC
+; RELOC-NEXT:   Symbol: _$TLSML (21)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
-; RELOC-NEXT:   Type: R_TLS (0x20)
+; RELOC-NEXT:   Type: R_TLSML (0x25)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xB0
-; RELOC-NEXT:   Symbol: TWInit (29)
+; RELOC-NEXT:   Virtual Address: 0xC0
+; RELOC-NEXT:   Symbol: TWInit (31)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
 ; RELOC-NEXT:   Type: R_TLSM (0x24)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xB4
-; RELOC-NEXT:   Symbol: TWInit (29)
+; RELOC-NEXT:   Virtual Address: 0xC4
+; RELOC-NEXT:   Symbol: TWInit (31)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
 ; RELOC-NEXT:   Type: R_TLS (0x20)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xB8
-; RELOC-NEXT:   Symbol: GInit (9)
+; RELOC-NEXT:   Virtual Address: 0xC8
+; RELOC-NEXT:   Symbol: GInit (11)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
@@ -220,7 +221,7 @@ entry:
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 1
-; SYM-NEXT:     Name: .__tls_get_addr
+; SYM-NEXT:     Name: .__tls_get_mod
 ; SYM-NEXT:     Value (RelocatableAddress): 0x0
 ; SYM-NEXT:     Section: N_UNDEF
 ; SYM-NEXT:     Type: 0x0
@@ -240,15 +241,35 @@ entry:
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 3
-; SYM-NEXT:     Name:
+; SYM-NEXT:     Name: .__tls_get_addr
+; SYM-NEXT:     Value (RelocatableAddress): 0x0
+; SYM-NEXT:     Section: N_UNDEF
+; SYM-NEXT:     Type: 0x0
+; SYM-NEXT:     StorageClass: C_EXT (0x2)
+; SYM-NEXT:     NumberOfAuxEntries: 1
+; SYM-NEXT:     CSECT Auxiliary Entry {
+; SYM-NEXT:       Index: 4
+; SYM-NEXT:       SectionLen: 0
+; SYM-NEXT:       ParameterHashIndex: 0x0
+; SYM-NEXT:       TypeChkSectNum: 0x0
+; SYM-NEXT:       SymbolAlignmentLog2: 0
+; SYM-NEXT:       SymbolType: XTY_ER (0x0)
+; SYM-NEXT:       StorageMappingClass: XMC_PR (0x0)
+; SYM-NEXT:       StabInfoIndex: 0x0
+; SYM-NEXT:       StabSectNum: 0x0
+; SYM-NEXT:     }
+; SYM-NEXT:   }
+; SYM-NEXT:   Symbol {
+; SYM-NEXT:     Index: 5
+; SYM-NEXT:     Name: 
 ; SYM-NEXT:     Value (RelocatableAddress): 0x0
 ; SYM-NEXT:     Section: .text
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 4
-; SYM-NEXT:       SectionLen: 132
+; SYM-NEXT:       Index: 6
+; SYM-NEXT:       SectionLen: 148
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 5
@@ -259,7 +280,7 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 5
+; SYM-NEXT:     Index: 7
 ; SYM-NEXT:     Name: .storesTIInit
 ; SYM-NEXT:     Value (RelocatableAddress): 0x0
 ; SYM-NEXT:     Section: .text
@@ -267,8 +288,8 @@ entry:
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 6
-; SYM-NEXT:       ContainingCsectSymbolIndex: 3
+; SYM-NEXT:       Index: 8
+; SYM-NEXT:       ContainingCsectSymbolIndex: 5
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 0
@@ -279,16 +300,16 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 7
+; SYM-NEXT:     Index: 9
 ; SYM-NEXT:     Name: .loadsTWInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x40
+; SYM-NEXT:     Value (RelocatableAddress): 0x50
 ; SYM-NEXT:     Section: .text
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 8
-; SYM-NEXT:       ContainingCsectSymbolIndex: 3
+; SYM-NEXT:       Index: 10
+; SYM-NEXT:       ContainingCsectSymbolIndex: 5
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 0
@@ -299,15 +320,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 9
+; SYM-NEXT:     Index: 11
 ; SYM-NEXT:     Name: GInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x88
+; SYM-NEXT:     Value (RelocatableAddress): 0x98
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 10
+; SYM-NEXT:       Index: 12
 ; SYM-NEXT:       SectionLen: 8
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -319,15 +340,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 11
+; SYM-NEXT:     Index: 13
 ; SYM-NEXT:     Name: storesTIInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x90
+; SYM-NEXT:     Value (RelocatableAddress): 0xA0
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 12
+; SYM-NEXT:       Index: 14
 ; SYM-NEXT:       SectionLen: 12
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -339,15 +360,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 13
+; SYM-NEXT:     Index: 15
 ; SYM-NEXT:     Name: loadsTWInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x9C
+; SYM-NEXT:     Value (RelocatableAddress): 0xAC
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 14
+; SYM-NEXT:       Index: 16
 ; SYM-NEXT:       SectionLen: 12
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -359,15 +380,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 15
+; SYM-NEXT:     Index: 17
 ; SYM-NEXT:     Name: TOC
-; SYM-NEXT:     Value (RelocatableAddress): 0xA8
+; SYM-NEXT:     Value (RelocatableAddress): 0xB8
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 16
+; SYM-NEXT:       Index: 18
 ; SYM-NEXT:       SectionLen: 0
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -379,15 +400,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 17
-; SYM-NEXT:     Name: .TIInit
-; SYM-NEXT:     Value (RelocatableAddress): 0xA8
+; SYM-NEXT:     Index: 19
+; SYM-NEXT:     Name: TIInit
+; SYM-NEXT:     Value (RelocatableAddress): 0xB8
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 18
+; SYM-NEXT:       Index: 20
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -399,35 +420,35 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 19
-; SYM-NEXT:     Name: TIInit
-; SYM-NEXT:     Value (RelocatableAddress): 0xAC
+; SYM-NEXT:     Index: 21
+; SYM-NEXT:     Name: _$TLSML
+; SYM-NEXT:     Value (RelocatableAddress): 0xBC
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 20
+; SYM-NEXT:       Index: 22
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 2
 ; SYM-NEXT:       SymbolType: XTY_SD (0x1)
-; SYM-NEXT:       StorageMappingClass: XMC_TE (0x16)
+; SYM-NEXT:       StorageMappingClass: XMC_TC (0x3)
 ; SYM-NEXT:       StabInfoIndex: 0x0
 ; SYM-NEXT:       StabSectNum: 0x0
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 21
+; SYM-NEXT:     Index: 23
 ; SYM-NEXT:     Name: .TWInit
-; SYM-NEXT:     Value (RelocatableAddress): 0xB0
+; SYM-NEXT:     Value (RelocatableAddress): 0xC0
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 22
+; SYM-NEXT:       Index: 24
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -439,15 +460,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 23
+; SYM-NEXT:     Index: 25
 ; SYM-NEXT:     Name: TWInit
-; SYM-NEXT:     Value (RelocatableAddress): 0xB4
+; SYM-NEXT:     Value (RelocatableAddress): 0xC4
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 24
+; SYM-NEXT:       Index: 26
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -459,15 +480,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 25
+; SYM-NEXT:     Index: 27
 ; SYM-NEXT:     Name: GInit
-; SYM-NEXT:     Value (RelocatableAddress): 0xB8
+; SYM-NEXT:     Value (RelocatableAddress): 0xC8
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 26
+; SYM-NEXT:       Index: 28
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -479,7 +500,7 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 27
+; SYM-NEXT:     Index: 29
 ; SYM-NEXT:     Name: TIInit
 ; SYM-NEXT:     Value (RelocatableAddress): 0x0
 ; SYM-NEXT:     Section: .tdata
@@ -487,7 +508,7 @@ entry:
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 28
+; SYM-NEXT:       Index: 30
 ; SYM-NEXT:       SectionLen: 8
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -499,7 +520,7 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 29
+; SYM-NEXT:     Index: 31
 ; SYM-NEXT:     Name: TWInit
 ; SYM-NEXT:     Value (RelocatableAddress): 0x8
 ; SYM-NEXT:     Section: .tdata
@@ -507,7 +528,7 @@ entry:
 ; SYM-NEXT:     StorageClass: C_WEAKEXT (0x6F)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 30
+; SYM-NEXT:       Index: 32
 ; SYM-NEXT:       SectionLen: 8
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -522,47 +543,48 @@ entry:
 
 ; DIS:      {{.*}}aix-tls-xcoff-reloc-large.ll.tmp.o:	file format aixcoff-rs6000
 ; DIS:      Disassembly of section .text:
-; DIS:      00000000 (idx: 5) .storesTIInit:
+; DIS:      00000000 (idx: 7) .storesTIInit:
 ; DIS-NEXT:                                       mflr 0
 ; DIS-NEXT:                                       stwu 1, -32(1)
 ; DIS-NEXT:                                       stw 0, 40(1)
-; DIS-NEXT:                                       mr 6, 4
 ; DIS-NEXT:                                       mr 7, 3
+; DIS-NEXT:                                       mr 6, 4
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 17) .TIInit[TE]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 4, 2, 0
 ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 19) TIInit[TE]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 3, 0(3)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 17) .TIInit[TE]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 4, 4(4)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 8, 0(3)
 ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 19) TIInit[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 21) _$TLSML[TC]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 3, 4(3)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 21) _$TLSML[TC]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                bla 0
-; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA  (idx: 1)      .__tls_get_addr[PR]
+; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA  (idx: 1)      .__tls_get_mod[PR]
+; DIS-NEXT:                                       add 3, 3, 8
 ; DIS-NEXT:                                       stw 6, 4(3)
 ; DIS-NEXT:                                       stw 7, 0(3)
 ; DIS-NEXT:                                       addi 1, 1, 32
 ; DIS-NEXT:                                       lwz 0, 8(1)
 ; DIS-NEXT:                                       mtlr 0
 ; DIS-NEXT:                                       blr
-; DIS:      00000040 (idx: 7) .loadsTWInit:
+; DIS:      00000050 (idx: 9) .loadsTWInit:
 ; DIS-NEXT:                                       mflr 0
 ; DIS-NEXT:                                       stwu 1, -32(1)
 ; DIS-NEXT:                                       stw 0, 40(1)
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 21) .TWInit[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 23) .TWInit[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 4, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 23) TWInit[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 25) TWInit[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 3, 8(3)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 21) .TWInit[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 23) .TWInit[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 4, 12(4)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 23) TWInit[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 25) TWInit[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                bla 0
-; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA  (idx: 1)      .__tls_get_addr[PR]
+; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA  (idx: 3)      .__tls_get_addr[PR]
 ; DIS-NEXT:                                       lfd 0, 0(3)
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 25) GInit[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 27) GInit[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 3, 16(3)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 25) GInit[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 27) GInit[TE]
 ; DIS-NEXT:                                       lfd 1, 0(3)
 ; DIS-NEXT:                                       fadd 1, 0, 1
 ; DIS-NEXT:                                       addi 1, 1, 32
@@ -571,42 +593,42 @@ entry:
 ; DIS-NEXT:                                       blr
 
 ; DIS:      Disassembly of section .data:
-; DIS:      00000088  (idx: 9) GInit[RW]:
-; DIS-NEXT:       88: 3f f0 00 00
-; DIS-NEXT:       8c: 00 00 00 00
-; DIS:      00000090  (idx: 11) storesTIInit[DS]:
-; DIS-NEXT:       90: 00 00 00 00
-; DIS-NEXT: 00000090: R_POS (idx: 5) .storesTIInit
-; DIS-NEXT:       94: 00 00 00 a8
-; DIS-NEXT: 00000094: R_POS (idx: 15) TOC[TC0]
-; DIS-NEXT:       98: 00 00 00 00
-; DIS:      0000009c  (idx: 13) loadsTWInit[DS]:
-; DIS-NEXT:       9c: 00 00 00 40
-; DIS-NEXT: 0000009c: R_POS (idx: 7) .loadsTWInit
-; DIS-NEXT:       a0: 00 00 00 a8
-; DIS-NEXT: 000000a0: R_POS (idx: 15) TOC[TC0]
-; DIS-NEXT:       a4: 00 00 00 00
-; DIS:      000000a8  (idx: 17) .TIInit[TE]:
+; DIS:      00000098  (idx: 11) GInit[RW]:
+; DIS-NEXT:       98: 3f f0 00 00
+; DIS-NEXT:       9c: 00 00 00 00
+; DIS:      000000a0  (idx: 13) storesTIInit[DS]:
+; DIS-NEXT:       a0: 00 00 00 00
+; DIS-NEXT: 000000a0: R_POS (idx: 7) .storesTIInit
+; DIS-NEXT:       a4: 00 00 00 b8
+; DIS-NEXT: 000000a4: R_POS (idx: 17) TOC[TC0]
 ; DIS-NEXT:       a8: 00 00 00 00
-; DIS-NEXT: 000000a8: R_TLSM (idx: 27) TIInit[TL]
-; DIS:      000000ac  (idx: 19) TIInit[TE]:
-; DIS-NEXT:       ac: 00 00 00 00
-; DIS-NEXT: 000000ac: R_TLS (idx: 27) TIInit[TL]
-; DIS:      000000b0  (idx: 21) .TWInit[TE]:
-; DIS-NEXT:       b0: 00 00 00 00
-; DIS-NEXT: 000000b0: R_TLSM (idx: 29) TWInit[TL]
-; DIS:      000000b4  (idx: 23) TWInit[TE]:
-; DIS-NEXT:       b4: 00 00 00 08
-; DIS-NEXT: 000000b4: R_TLS (idx: 29) TWInit[TL]
-; DIS:      000000b8  (idx: 25) GInit[TE]:
-; DIS-NEXT:       b8: 00 00 00 88
-; DIS-NEXT: 000000b8: R_POS (idx: 9) GInit[RW]
+; DIS:      000000ac  (idx: 15) loadsTWInit[DS]:
+; DIS-NEXT:       ac: 00 00 00 50
+; DIS-NEXT: 000000ac: R_POS (idx: 9) .loadsTWInit
+; DIS-NEXT:       b0: 00 00 00 b8
+; DIS-NEXT: 000000b0: R_POS (idx: 17) TOC[TC0]
+; DIS-NEXT:       b4: 00 00 00 00
+; DIS:      000000b8  (idx: 19) TIInit[TE]:
+; DIS-NEXT:       b8: 00 00 00 00
+; DIS-NEXT: 000000b8: R_TLS_LD (idx: 29) TIInit[TL]
+; DIS:      000000bc  (idx: 21) _$TLSML[TC]:
+; DIS-NEXT:       bc: 00 00 00 00
+; DIS-NEXT: 000000bc: R_TLSML (idx: 21) _$TLSML[TC]
+; DIS:      000000c0  (idx: 23) .TWInit[TE]:
+; DIS-NEXT:       c0: 00 00 00 00
+; DIS-NEXT: 000000c0: R_TLSM (idx: 31) TWInit[TL]
+; DIS:      000000c4  (idx: 25) TWInit[TE]:
+; DIS-NEXT:       c4: 00 00 00 08
+; DIS-NEXT: 000000c4: R_TLS (idx: 31) TWInit[TL]
+; DIS:      000000c8  (idx: 27) GInit[TE]:
+; DIS-NEXT:       c8: 00 00 00 98
+; DIS-NEXT: 000000c8: R_POS (idx: 11) GInit[RW]
 
 ; DIS:      Disassembly of section .tdata:
-; DIS:      00000000  (idx: 27) TIInit[TL]:
+; DIS:      00000000  (idx: 29) TIInit[TL]:
 ; DIS-NEXT:        0: 00 00 00 00
 ; DIS-NEXT:        4: 00 00 00 01
-; DIS:      00000008  (idx: 29) TWInit[TL]:
+; DIS:      00000008  (idx: 31) TWInit[TL]:
 ; DIS-NEXT:        8: 3f f0 00 00
 ; DIS-NEXT:        c: 00 00 00 00
 
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
index 0779686b54f3ac..ae1dae7955914b 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
@@ -7,6 +7,7 @@
 @const_ivar = constant i32 6, align 4
 @GInit = global i32 1, align 4
 @TGInit = thread_local global i32 1, align 4
+; @TIUninit is local-dynamic indeed
 @TIUninit = internal thread_local global i32 0, align 4
 
 ; Function Attrs: nofree norecurse nounwind willreturn writeonly
@@ -32,16 +33,16 @@ entry:
 ; RELOC-NEXT: Relocations [
 ; RELOC-NEXT:   Section (index: 1) .text {
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0xE
-; RELOC-NEXT:     Symbol: .TIUninit (23)
+; RELOC-NEXT:     Virtual Address: 0x12
+; RELOC-NEXT:     Symbol: TIUninit (25)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
 ; RELOC-NEXT:     Type: R_TOC (0x3)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x12
-; RELOC-NEXT:     Symbol: TIUninit (25)
+; RELOC-NEXT:     Virtual Address: 0x16
+; RELOC-NEXT:     Symbol: _$TLSML (27)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
@@ -49,39 +50,39 @@ entry:
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
 ; RELOC-NEXT:     Virtual Address: 0x18
-; RELOC-NEXT:     Symbol: .__tls_get_addr (1)
+; RELOC-NEXT:     Symbol: .__tls_get_mod (1)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 26
 ; RELOC-NEXT:     Type: R_RBA (0x18)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x3A
-; RELOC-NEXT:     Symbol: .TGInit (27)
+; RELOC-NEXT:     Virtual Address: 0x4A
+; RELOC-NEXT:     Symbol: .TGInit (29)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
 ; RELOC-NEXT:     Type: R_TOC (0x3)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x3E
-; RELOC-NEXT:     Symbol: TGInit (29)
+; RELOC-NEXT:     Virtual Address: 0x4E
+; RELOC-NEXT:     Symbol: TGInit (31)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
 ; RELOC-NEXT:     Type: R_TOC (0x3)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x44
-; RELOC-NEXT:     Symbol: .__tls_get_addr (1)
+; RELOC-NEXT:     Virtual Address: 0x54
+; RELOC-NEXT:     Symbol: .__tls_get_addr (3)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 26
 ; RELOC-NEXT:     Type: R_RBA (0x18)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x4A
-; RELOC-NEXT:     Symbol: GInit (31)
+; RELOC-NEXT:     Virtual Address: 0x5A
+; RELOC-NEXT:     Symbol: GInit (33)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
@@ -90,72 +91,72 @@ entry:
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Section (index: 2) .data {
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x70
-; RELOC-NEXT:   Symbol: .storesTIUninit (5)
+; RELOC-NEXT:   Virtual Address: 0x80
+; RELOC-NEXT:   Symbol: .storesTIUninit (7)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x74
-; RELOC-NEXT:   Symbol: TOC (21)
+; RELOC-NEXT:   Virtual Address: 0x84
+; RELOC-NEXT:   Symbol: TOC (23)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x7C
-; RELOC-NEXT:   Symbol: .loadsTGInit (7)
+; RELOC-NEXT:   Virtual Address: 0x8C
+; RELOC-NEXT:   Symbol: .loadsTGInit (9)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x80
-; RELOC-NEXT:   Symbol: TOC (21)
+; RELOC-NEXT:   Virtual Address: 0x90
+; RELOC-NEXT:   Symbol: TOC (23)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x88
-; RELOC-NEXT:   Symbol: TIUninit (37)
+; RELOC-NEXT:   Virtual Address: 0x98
+; RELOC-NEXT:   Symbol: TIUninit (39)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
-; RELOC-NEXT:   Type: R_TLSM (0x24)
+; RELOC-NEXT:   Type: R_TLS_LD (0x22)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x8C
-; RELOC-NEXT:   Symbol: TIUninit (37)
+; RELOC-NEXT:   Virtual Address: 0x9C
+; RELOC-NEXT:   Symbol: _$TLSML (27)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
-; RELOC-NEXT:   Type: R_TLS (0x20)
+; RELOC-NEXT:   Type: R_TLSML (0x25)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x90
-; RELOC-NEXT:   Symbol: TGInit (35)
+; RELOC-NEXT:   Virtual Address: 0xA0
+; RELOC-NEXT:   Symbol: TGInit (37)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
 ; RELOC-NEXT:   Type: R_TLSM (0x24)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x94
-; RELOC-NEXT:   Symbol: TGInit (35)
+; RELOC-NEXT:   Virtual Address: 0xA4
+; RELOC-NEXT:   Symbol: TGInit (37)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
 ; RELOC-NEXT:   Type: R_TLS (0x20)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x98
-; RELOC-NEXT:   Symbol: GInit (15)
+; RELOC-NEXT:   Virtual Address: 0xA8
+; RELOC-NEXT:   Symbol: GInit (17)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
@@ -181,7 +182,7 @@ entry:
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 1
-; SYM-NEXT:     Name: .__tls_get_addr
+; SYM-NEXT:     Name: .__tls_get_mod
 ; SYM-NEXT:     Value (RelocatableAddress): 0x0
 ; SYM-NEXT:     Section: N_UNDEF
 ; SYM-NEXT:     Type: 0x0
@@ -201,15 +202,35 @@ entry:
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 3
-; SYM-NEXT:     Name:
+; SYM-NEXT:     Name: .__tls_get_addr
+; SYM-NEXT:     Value (RelocatableAddress): 0x0
+; SYM-NEXT:     Section: N_UNDEF
+; SYM-NEXT:     Type: 0x0
+; SYM-NEXT:     StorageClass: C_EXT (0x2)
+; SYM-NEXT:     NumberOfAuxEntries: 1
+; SYM-NEXT:     CSECT Auxiliary Entry {
+; SYM-NEXT:       Index: 4
+; SYM-NEXT:       SectionLen: 0
+; SYM-NEXT:       ParameterHashIndex: 0x0
+; SYM-NEXT:       TypeChkSectNum: 0x0
+; SYM-NEXT:       SymbolAlignmentLog2: 0
+; SYM-NEXT:       SymbolType: XTY_ER (0x0)
+; SYM-NEXT:       StorageMappingClass: XMC_PR (0x0)
+; SYM-NEXT:       StabInfoIndex: 0x0
+; SYM-NEXT:       StabSectNum: 0x0
+; SYM-NEXT:     }
+; SYM-NEXT:   }
+; SYM-NEXT:   Symbol {
+; SYM-NEXT:     Index: 5
+; SYM-NEXT:     Name: 
 ; SYM-NEXT:     Value (RelocatableAddress): 0x0
 ; SYM-NEXT:     Section: .text
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 4
-; SYM-NEXT:       SectionLen: 104
+; SYM-NEXT:       Index: 6
+; SYM-NEXT:       SectionLen: 120
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 5
@@ -220,7 +241,7 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 5
+; SYM-NEXT:     Index: 7
 ; SYM-NEXT:     Name: .storesTIUninit
 ; SYM-NEXT:     Value (RelocatableAddress): 0x0
 ; SYM-NEXT:     Section: .text
@@ -228,8 +249,8 @@ entry:
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 6
-; SYM-NEXT:       ContainingCsectSymbolIndex: 3
+; SYM-NEXT:       Index: 8
+; SYM-NEXT:       ContainingCsectSymbolIndex: 5
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 0
@@ -240,16 +261,16 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 7
+; SYM-NEXT:     Index: 9
 ; SYM-NEXT:     Name: .loadsTGInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x30
+; SYM-NEXT:     Value (RelocatableAddress): 0x40
 ; SYM-NEXT:     Section: .text
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 8
-; SYM-NEXT:       ContainingCsectSymbolIndex: 3
+; SYM-NEXT:       Index: 10
+; SYM-NEXT:       ContainingCsectSymbolIndex: 5
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 0
@@ -260,15 +281,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 9
+; SYM-NEXT:     Index: 11
 ; SYM-NEXT:     Name: .rodata
-; SYM-NEXT:     Value (RelocatableAddress): 0x68
+; SYM-NEXT:     Value (RelocatableAddress): 0x78
 ; SYM-NEXT:     Section: .text
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 10
+; SYM-NEXT:       Index: 12
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -280,16 +301,16 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 11
+; SYM-NEXT:     Index: 13
 ; SYM-NEXT:     Name: const_ivar
-; SYM-NEXT:     Value (RelocatableAddress): 0x68
+; SYM-NEXT:     Value (RelocatableAddress): 0x78
 ; SYM-NEXT:     Section: .text
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 12
-; SYM-NEXT:       ContainingCsectSymbolIndex: 9
+; SYM-NEXT:       Index: 14
+; SYM-NEXT:       ContainingCsectSymbolIndex: 11
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 0
@@ -300,15 +321,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 13
+; SYM-NEXT:     Index: 15
 ; SYM-NEXT:     Name: .data
-; SYM-NEXT:     Value (RelocatableAddress): 0x6C
+; SYM-NEXT:     Value (RelocatableAddress): 0x7C
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 14
+; SYM-NEXT:       Index: 16
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -320,16 +341,16 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 15
+; SYM-NEXT:     Index: 17
 ; SYM-NEXT:     Name: GInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x6C
+; SYM-NEXT:     Value (RelocatableAddress): 0x7C
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 16
-; SYM-NEXT:       ContainingCsectSymbolIndex: 13
+; SYM-NEXT:       Index: 18
+; SYM-NEXT:       ContainingCsectSymbolIndex: 15
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 0
@@ -340,15 +361,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 17
+; SYM-NEXT:     Index: 19
 ; SYM-NEXT:     Name: storesTIUninit
-; SYM-NEXT:     Value (RelocatableAddress): 0x70
+; SYM-NEXT:     Value (RelocatableAddress): 0x80
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 18
+; SYM-NEXT:       Index: 20
 ; SYM-NEXT:       SectionLen: 12
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -360,15 +381,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 19
+; SYM-NEXT:     Index: 21
 ; SYM-NEXT:     Name: loadsTGInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x7C
+; SYM-NEXT:     Value (RelocatableAddress): 0x8C
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 20
+; SYM-NEXT:       Index: 22
 ; SYM-NEXT:       SectionLen: 12
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -380,15 +401,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 21
+; SYM-NEXT:     Index: 23
 ; SYM-NEXT:     Name: TOC
-; SYM-NEXT:     Value (RelocatableAddress): 0x88
+; SYM-NEXT:     Value (RelocatableAddress): 0x98
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 22
+; SYM-NEXT:       Index: 24
 ; SYM-NEXT:       SectionLen: 0
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -400,15 +421,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 23
-; SYM-NEXT:     Name: .TIUninit
-; SYM-NEXT:     Value (RelocatableAddress): 0x88
+; SYM-NEXT:     Index: 25
+; SYM-NEXT:     Name: TIUninit
+; SYM-NEXT:     Value (RelocatableAddress): 0x98
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 24
+; SYM-NEXT:       Index: 26
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -420,15 +441,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 25
-; SYM-NEXT:     Name: TIUninit
-; SYM-NEXT:     Value (RelocatableAddress): 0x8C
+; SYM-NEXT:     Index: 27
+; SYM-NEXT:     Name: _$TLSML
+; SYM-NEXT:     Value (RelocatableAddress): 0x9C
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 26
+; SYM-NEXT:       Index: 28
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -440,15 +461,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 27
+; SYM-NEXT:     Index: 29
 ; SYM-NEXT:     Name: .TGInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x90
+; SYM-NEXT:     Value (RelocatableAddress): 0xA0
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 28
+; SYM-NEXT:       Index: 30
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -460,15 +481,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 29
+; SYM-NEXT:     Index: 31
 ; SYM-NEXT:     Name: TGInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x94
+; SYM-NEXT:     Value (RelocatableAddress): 0xA4
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 30
+; SYM-NEXT:       Index: 32
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -480,15 +501,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 31
+; SYM-NEXT:     Index: 33
 ; SYM-NEXT:     Name: GInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x98
+; SYM-NEXT:     Value (RelocatableAddress): 0xA8
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 32
+; SYM-NEXT:       Index: 34
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -500,7 +521,7 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 33
+; SYM-NEXT:     Index: 35
 ; SYM-NEXT:     Name: .tdata
 ; SYM-NEXT:     Value (RelocatableAddress): 0x0
 ; SYM-NEXT:     Section: .tdata
@@ -508,7 +529,7 @@ entry:
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 34
+; SYM-NEXT:       Index: 36
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -520,7 +541,7 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 35
+; SYM-NEXT:     Index: 37
 ; SYM-NEXT:     Name: TGInit
 ; SYM-NEXT:     Value (RelocatableAddress): 0x0
 ; SYM-NEXT:     Section: .tdata
@@ -528,8 +549,8 @@ entry:
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 36
-; SYM-NEXT:       ContainingCsectSymbolIndex: 33
+; SYM-NEXT:       Index: 38
+; SYM-NEXT:       ContainingCsectSymbolIndex: 35
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 0
@@ -540,7 +561,7 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: 37
+; SYM-NEXT:     Index: 39
 ; SYM-NEXT:     Name: TIUninit
 ; SYM-NEXT:     Value (RelocatableAddress): 0x4
 ; SYM-NEXT:     Section: .tbss
@@ -548,7 +569,7 @@ entry:
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: 38
+; SYM-NEXT:       Index: 40
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -563,34 +584,35 @@ entry:
 
 ; DIS:      {{.*}}aix-tls-xcoff-reloc.ll.tmp.o:	file format aixcoff-rs6000
 ; DIS:      Disassembly of section .text:
-; DIS:      00000000 (idx: 5) .storesTIUninit:
+; DIS:      00000000 (idx: 7) .storesTIUninit:
 ; DIS-NEXT:                                      mflr 0
 ; DIS-NEXT:                                      stwu 1, -32(1)
+; DIS-NEXT:                                      stw 0, 40(1)
 ; DIS-NEXT:                                      mr 6, 3
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 3, 0(2)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 23) .TIUninit[TC]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 4, 4(2)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 7, 0(2)
 ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 25) TIUninit[TC]
-; DIS-NEXT:                                      stw 0, 40(1)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 3, 4(2)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 27) _$TLSML[TC]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               bla 0
-; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA (idx: 1)      .__tls_get_addr[PR]
+; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA (idx: 1)      .__tls_get_mod[PR]
+; DIS-NEXT:                                      add 3, 3, 7
 ; DIS-NEXT:                                      stw 6, 0(3)
 ; DIS-NEXT:                                      addi 1, 1, 32
 ; DIS-NEXT:                                      lwz 0, 8(1)
 ; DIS-NEXT:                                      mtlr 0
 ; DIS-NEXT:                                      blr
-; DIS:      00000030 (idx: 7) .loadsTGInit:
+; DIS:      00000040 (idx: 9) .loadsTGInit:
 ; DIS-NEXT:                                      mflr 0
 ; DIS-NEXT:                                      stwu 1, -32(1)
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 3, 8(2)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 27) .TGInit[TC]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 29) .TGInit[TC]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 4, 12(2)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 29) TGInit[TC]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 31) TGInit[TC]
 ; DIS-NEXT:                                      stw 0, 40(1)
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               bla 0
-; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA (idx: 1)      .__tls_get_addr[PR]
+; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA (idx: 3)      .__tls_get_addr[PR]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 4, 16(2)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 31) GInit[TC]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 33) GInit[TC]
 ; DIS-NEXT:                                      lwz 3, 0(3)
 ; DIS-NEXT:                                      lwz 4, 0(4)
 ; DIS-NEXT:                                      add 3, 4, 3
@@ -598,46 +620,46 @@ entry:
 ; DIS-NEXT:                                      lwz 0, 8(1)
 ; DIS-NEXT:                                      mtlr 0
 ; DIS-NEXT:                                      blr
-; DIS:      00000068 (idx: 11) const_ivar:
-; DIS-NEXT:       68: 00 00 00 06
+; DIS:      00000078 (idx: 13) const_ivar:
+; DIS-NEXT:       78: 00 00 00 06
 
 ; DIS:      Disassembly of section .data:
-; DIS:      0000006c  (idx: 15) GInit:
-; DIS-NEXT:       6c: 00 00 00 01
-; DIS:      00000070  (idx: 17) storesTIUninit[DS]:
-; DIS-NEXT:       70: 00 00 00 00
-; DIS-NEXT: 00000070: R_POS (idx: 5) .storesTIUninit
-; DIS-NEXT:       74: 00 00 00 88
-; DIS-NEXT: 00000074: R_POS (idx: 21) TOC[TC0]
-; DIS-NEXT:       78: 00 00 00 00
-; DIS:      0000007c  (idx: 19) loadsTGInit[DS]:
-; DIS-NEXT:       7c: 00 00 00 30
-; DIS-NEXT: 0000007c: R_POS (idx: 7) .loadsTGInit
-; DIS-NEXT:       80: 00 00 00 88
-; DIS-NEXT: 00000080: R_POS (idx: 21) TOC[TC0]
-; DIS-NEXT:       84: 00 00 00 00
-; DIS:      00000088  (idx: 23) .TIUninit[TC]:
+; DIS:      0000007c  (idx: 17) GInit:
+; DIS-NEXT:       7c: 00 00 00 01
+; DIS:      00000080  (idx: 19) storesTIUninit[DS]:
+; DIS-NEXT:       80: 00 00 00 00
+; DIS-NEXT: 00000080: R_POS (idx: 7) .storesTIUninit
+; DIS-NEXT:       84: 00 00 00 98
+; DIS-NEXT: 00000084: R_POS (idx: 23) TOC[TC0]
 ; DIS-NEXT:       88: 00 00 00 00
-; DIS-NEXT: 00000088: R_TLSM (idx: 37) TIUninit[UL]
-; DIS:      0000008c  (idx: 25) TIUninit[TC]:
-; DIS-NEXT:       8c: 00 00 00 04
-; DIS-NEXT: 0000008c: R_TLS (idx: 37) TIUninit[UL]
-; DIS:      00000090  (idx: 27) .TGInit[TC]:
-; DIS-NEXT:       90: 00 00 00 00
-; DIS-NEXT: 00000090: R_TLSM (idx: 35) TGInit
-; DIS:      00000094  (idx: 29) TGInit[TC]:
+; DIS:      0000008c  (idx: 21) loadsTGInit[DS]:
+; DIS-NEXT:       8c: 00 00 00 40
+; DIS-NEXT: 0000008c: R_POS (idx: 9) .loadsTGInit
+; DIS-NEXT:       90: 00 00 00 98
+; DIS-NEXT: 00000090: R_POS (idx: 23) TOC[TC0]
 ; DIS-NEXT:       94: 00 00 00 00
-; DIS-NEXT: 00000094: R_TLS (idx: 35) TGInit
-; DIS:      00000098  (idx: 31) GInit[TC]:
-; DIS-NEXT:       98: 00 00 00 6c
-; DIS-NEXT: 00000098: R_POS (idx: 15) GInit
+; DIS:      00000098  (idx: 25) TIUninit[TC]:
+; DIS-NEXT:       98: 00 00 00 00
+; DIS-NEXT: 00000098: R_TLS_LD (idx: 39) TIUninit[UL]
+; DIS:      0000009c  (idx: 27) _$TLSML[TC]:
+; DIS-NEXT:       9c: 00 00 00 00
+; DIS-NEXT: 0000009c: R_TLSML (idx: 27) _$TLSML[TC]
+; DIS:      000000a0  (idx: 29) .TGInit[TC]:
+; DIS-NEXT:       a0: 00 00 00 00
+; DIS-NEXT: 000000a0: R_TLSM (idx: 37) TGInit
+; DIS:      000000a4  (idx: 31) TGInit[TC]:
+; DIS-NEXT:       a4: 00 00 00 00
+; DIS-NEXT: 000000a4: R_TLS (idx: 37) TGInit
+; DIS:      000000a8  (idx: 33) GInit[TC]:
+; DIS-NEXT:       a8: 00 00 00 7c
+; DIS-NEXT: 000000a8: R_POS (idx: 17) GInit
 
 ; DIS:      Disassembly of section .tdata:
-; DIS:      00000000 (idx: 35) TGInit:
+; DIS:      00000000 (idx: 37) TGInit:
 ; DIS-NEXT:        0: 00 00 00 01
 
 ; DIS:      Disassembly of section .tbss:
-; DIS:      00000004 (idx: 37) TIUninit[UL]:
+; DIS:      00000004 (idx: 39) TIUninit[UL]:
 ; DIS-NEXT: ...
 
 attributes #0 = { nofree norecurse nounwind willreturn writeonly "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-spe,-vsx" }

>From 6ed2b1e3ce3d7ce8b8f5d0ab5bda11b6ebec0f3b Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Thu, 14 Sep 2023 03:46:38 -0400
Subject: [PATCH 02/27] address comments

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   |  4 ++--
 llvm/lib/Target/PowerPC/PPCISelLowering.h     | 12 ++++++------
 llvm/lib/Target/PowerPC/PPCInstrInfo.td       |  2 +-
 llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp | 12 +++++++-----
 4 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 9d0430ea2e515a..87be6cc1f1be18 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1772,8 +1772,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
   case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
   case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
-  case PPCISD::GET_TLS_MOD:
-    return "PPCISD::GET_TLS_MOD";
+  case PPCISD::GET_TLS_MOD_AIX:
+    return "PPCISD::GET_TLS_MOD_AIX";
   case PPCISD::GET_TPOINTER:    return "PPCISD::GET_TPOINTER";
   case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
   case PPCISD::TLSGD_AIX:       return "PPCISD::TLSGD_AIX";
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 7f80f70d699a6b..3aaedb3efc0bd1 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -375,16 +375,16 @@ namespace llvm {
     /// G8RC and inputs are X3/X4).
     TLSGD_AIX,
 
-    /// %x3 = GET_TLS_MOD _$TLSML - For the AIX local-dynamic TLS model,
+    /// %x3 = GET_TLS_MOD_AIX _$TLSML - For the AIX local-dynamic TLS model,
     /// produces a call to __tls_get_mod(_$TLSML\@ml).
-    GET_TLS_MOD,
+    GET_TLS_MOD_AIX,
 
     /// [GP|G8]RC = TLSLD_AIX, TOC_ENTRY(variable offset)
     /// Op that internally creates TOC entry for the "_$TLSML" symbol, generates
-    /// GET_TLS_MOD node which will be expanded into a call to __tls_get_mod,
-    /// and then add the variable offset with the result from the call.
-    /// This node is used in both 32-bit and 64-bit modes. The only difference
-    /// is register class.
+    /// GET_TLS_MOD_AIX node which will be expanded into a call to
+    /// __tls_get_mod, and then add the variable offset with the result from the
+    /// call. This node is used in both 32-bit and 64-bit modes. The only
+    /// difference is register class.
     TLSLD_AIX,
 
     /// G8RC = ADDIS_TLSLD_HA %x2, Symbol - For the local-dynamic TLS
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 3d7e64d29fe83a..d396d57e8fe95e 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -213,7 +213,7 @@ def PPCaddTls     : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>;
 def PPCaddisTlsgdHA : SDNode<"PPCISD::ADDIS_TLSGD_HA", SDTIntBinOp>;
 def PPCaddiTlsgdL   : SDNode<"PPCISD::ADDI_TLSGD_L", SDTIntBinOp>;
 def PPCgetTlsAddr   : SDNode<"PPCISD::GET_TLS_ADDR", SDTIntBinOp>;
-def PPCgetTlsMod   : SDNode<"PPCISD::GET_TLS_MOD", SDTIntUnaryOp>;
+def PPCgetTlsMod   : SDNode<"PPCISD::GET_TLS_MOD_AIX", SDTIntUnaryOp>;
 def PPCgetTpointer : SDNode<"PPCISD::GET_TPOINTER", SDTIntLeaf, []>;
 def PPCaddiTlsgdLAddr : SDNode<"PPCISD::ADDI_TLSGD_L_ADDR",
                                SDTypeProfile<1, 3, [
diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 32a75a32b9d87f..339c8d4ad383e7 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -160,8 +160,10 @@ namespace {
         if (IsAIX) {
           if (MI.getOpcode() == PPC::TLSLDAIX8 ||
               MI.getOpcode() == PPC::TLSLDAIX) {
-            // For Local-Dynamic
-            auto &Subtarget = MBB.getParent()->getSubtarget<PPCSubtarget>();
+            // For Local-Dynamic, the module handle is copied in r3. The copy is
+            // followed by GETtlsMOD32AIX/GETtlsMOD64AIX.
+            const PPCSubtarget &Subtarget =
+                MBB.getParent()->getSubtarget<PPCSubtarget>();
             bool IsLargeModel =
                 Subtarget.getTargetMachine().getCodeModel() == CodeModel::Large;
             Register ModuleHandleHReg;
@@ -190,9 +192,9 @@ namespace {
                 .addReg(GPR3)
                 .addReg(MI.getOperand(1).getReg());
           } else {
-            // For Global-Dynamic
-            // The variable offset and region handle are copied in r4 and r3.
-            // The copies are followed by GETtlsADDR32AIX/GETtlsADDR64AIX.
+            // For Global-Dynamic, the variable offset and region handle are
+            // copied in r4 and r3. The copies are followed by
+            // GETtlsADDR32AIX/GETtlsADDR64AIX.
             if (!IsTLSTPRelMI) {
               BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR4)
                   .addReg(MI.getOperand(1).getReg());

>From 603893b4ef89928c2a7e6a3da0f3e6621bf8ffd1 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Mon, 18 Sep 2023 04:03:32 -0400
Subject: [PATCH 03/27] [NFC] address comments.

---
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp     | 9 +++++----
 llvm/lib/Target/PowerPC/PPCISelLowering.h     | 8 ++++----
 llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp | 2 +-
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 1c197521f409a1..6211abd5ad8c47 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -677,10 +677,11 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
     // in R3, need to generate branch to .__tls_get_mod.
     Register VarOffsetReg = Subtarget->isPPC64() ? PPC::X4 : PPC::R4;
     (void)VarOffsetReg;
-    assert((MI->getNumExplicitOperands() < 3 ||
-            (MI->getOperand(2).isReg() &&
-             MI->getOperand(2).getReg() == VarOffsetReg)) &&
-           "GETtls[ld]ADDR[32] must read GPR4");
+    assert(MI->getOpcode() == PPC::GETtlsMOD32AIX ||
+           MI->getOpcode() == PPC::GETtlsMOD64AIX ||
+           (MI->getOperand(2).isReg() &&
+            MI->getOperand(2).getReg() == VarOffsetReg) &&
+               "GETtls[ld]ADDR[32] must read GPR4");
     EmitAIXTlsCallHelper(MI);
     return;
   }
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 3aaedb3efc0bd1..fed03835fd99ab 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -370,20 +370,20 @@ namespace llvm {
     /// G8RC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY
     /// Op that combines two register copies of TOC entries
     /// (region handle into R3 and variable offset into R4) followed by a
-    /// GET_TLS_ADDR node which will be expanded to a call to __tls_get_addr.
+    /// GET_TLS_ADDR node which will be expanded to a call to .__tls_get_addr.
     /// This node is used in 64-bit mode as well (in which case the result is
     /// G8RC and inputs are X3/X4).
     TLSGD_AIX,
 
     /// %x3 = GET_TLS_MOD_AIX _$TLSML - For the AIX local-dynamic TLS model,
-    /// produces a call to __tls_get_mod(_$TLSML\@ml).
+    /// produces a call to .__tls_get_mod(_$TLSML\@ml).
     GET_TLS_MOD_AIX,
 
     /// [GP|G8]RC = TLSLD_AIX, TOC_ENTRY(variable offset)
     /// Op that internally creates TOC entry for the "_$TLSML" symbol, generates
     /// GET_TLS_MOD_AIX node which will be expanded into a call to
-    /// __tls_get_mod, and then add the variable offset with the result from the
-    /// call. This node is used in both 32-bit and 64-bit modes. The only
+    /// .__tls_get_mod, and then add the variable offset with the result from
+    /// the call. This node is used in both 32-bit and 64-bit modes. The only
     /// difference is register class.
     TLSLD_AIX,
 
diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 339c8d4ad383e7..3fd57f4159d3cd 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -186,7 +186,7 @@ namespace {
                             : Register(Subtarget.getTOCPointerRegister()));
             BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR3)
                 .addReg(MHReg);
-            // The call to __tls_get_mod.
+            // The call to .__tls_get_mod.
             BuildMI(MBB, I, DL, TII->get(Opc2), GPR3).addReg(GPR3);
             BuildMI(MBB, I, DL, TII->get(Is64Bit ? PPC::ADD8 : PPC::ADD4), GPR3)
                 .addReg(GPR3)

>From 3ae2695731101a044731cb5e84de02559413ec1f Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Wed, 20 Sep 2023 03:44:10 -0400
Subject: [PATCH 04/27] [NFC] address comments

---
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp   | 18 +++++++++---------
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 14 ++++++--------
 2 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 6211abd5ad8c47..dc2adc352b01df 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -671,17 +671,17 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
          "GETtls[ld]ADDR[32] must read GPR3");
 
   if (Subtarget->isAIXABI()) {
-    // On AIX, for TLSGD the variable offset should already be in R4 and the
-    // region handle should already be in R3, need to generate an absolute
-    // branch to .__tls_get_addr. For TLSLD the module handle should already be
-    // in R3, need to generate branch to .__tls_get_mod.
+    // For TLSGD, the variable offset should already be in R4 and the region
+    // handle should already be in R3, generate absolute branch to
+    // .__tls_get_addr. For TLSLD, the module handle should already be in R3,
+    // generate branch to .__tls_get_mod.
     Register VarOffsetReg = Subtarget->isPPC64() ? PPC::X4 : PPC::R4;
     (void)VarOffsetReg;
-    assert(MI->getOpcode() == PPC::GETtlsMOD32AIX ||
-           MI->getOpcode() == PPC::GETtlsMOD64AIX ||
-           (MI->getOperand(2).isReg() &&
-            MI->getOperand(2).getReg() == VarOffsetReg) &&
-               "GETtls[ld]ADDR[32] must read GPR4");
+    assert((MI->getOpcode() == PPC::GETtlsMOD32AIX ||
+            MI->getOpcode() == PPC::GETtlsMOD64AIX ||
+            (MI->getOperand(2).isReg() &&
+             MI->getOperand(2).getReg() == VarOffsetReg)) &&
+           "GETtls[ld]ADDR[32] must read GPR4");
     EmitAIXTlsCallHelper(MI);
     return;
   }
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 87be6cc1f1be18..8f6c57ae746c24 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -3428,14 +3428,12 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
     return DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, VariableOffset);
   }
 
-  // The Local-Exec, Initial-Exec, Local-Dynamic, and General-Dynamic TLS models
-  // are currently supported access models. If Local- or Initial-exec or
-  // local-dynamic is not possible or specified, all GlobalTLSAddress nodes are
-  // lowered using the general-dynamic model. We need to generate two TOC
-  // entries, one for the variable offset, one for the region handle. The global
-  // address for the TOC entry of the region handle is created with the
-  // MO_TLSGDM_FLAG flag and the global address for the TOC entry of the
-  // variable offset is created with MO_TLSGD_FLAG.
+  // If Local- or Initial-exec or Local-dynamic is not possible or specified,
+  // all GlobalTLSAddress nodes are lowered using the general-dynamic model. We
+  // need to generate two TOC entries, one for the variable offset, one for the
+  // region handle. The global address for the TOC entry of the region handle is
+  // created with the MO_TLSGDM_FLAG flag and the global address for the TOC
+  // entry of the variable offset is created with MO_TLSGD_FLAG.
   SDValue VariableOffsetTGA =
       DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGD_FLAG);
   SDValue RegionHandleTGA =

>From 778607a522a03bb0d0076362f733b67dc2671a1b Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Wed, 20 Sep 2023 23:09:46 -0400
Subject: [PATCH 05/27] [NFC] address comment

---
 llvm/lib/Target/PowerPC/PPCInstr64Bit.td | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index e4fee9c2fff23e..a3590580777f3a 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -1563,7 +1563,7 @@ def GETtlsADDR64AIX :
                     "GETtlsADDR64AIX",
                     [(set i64:$rD,
                       (PPCgetTlsAddr i64:$offset, i64:$handle))]>, isPPC64;
-// On AIX, the call to __tls_get_mod need one input in X3 for the module handle.
+// On AIX, the call to .__tls_get_mod need one input in X3 for the module handle.
 def GETtlsMOD64AIX :
   PPCEmitTimePseudo<(outs g8rc:$rD),(ins g8rc:$handle),
                     "GETtlsMOD64AIX",

>From 07b639997811a4145932ca73ccb377c8452edae3 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Thu, 21 Sep 2023 23:03:58 -0400
Subject: [PATCH 06/27] Attempt to address comment: use r4 for LoadOffsetToc

However looks like machine-scheduler is interfering, and still schedule
LoadOffsetToc ahead of the .__tls_get_mod call.
---
 llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp |  40 ++++-
 .../test/CodeGen/PowerPC/aix-tls-gd-double.ll |  78 +++++----
 llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll   |  78 +++++----
 .../CodeGen/PowerPC/aix-tls-gd-longlong.ll    | 162 +++++++++---------
 .../CodeGen/PowerPC/aix-tls-local-dynamic.ll  | 128 +++++++-------
 .../PowerPC/aix-tls-xcoff-reloc-large.ll      |  49 +++---
 .../CodeGen/PowerPC/aix-tls-xcoff-reloc.ll    |  48 +++---
 7 files changed, 308 insertions(+), 275 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 3fd57f4159d3cd..bb03f8f911c491 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -160,37 +160,61 @@ namespace {
         if (IsAIX) {
           if (MI.getOpcode() == PPC::TLSLDAIX8 ||
               MI.getOpcode() == PPC::TLSLDAIX) {
-            // For Local-Dynamic, the module handle is copied in r3. The copy is
-            // followed by GETtlsMOD32AIX/GETtlsMOD64AIX.
+            // For Local-Dynamic, need to swap the position of VarOffsetInst and
+            // MI, so that VarOffsetInst can use R/X4 to reduce register
+            // pressure.
             const PPCSubtarget &Subtarget =
                 MBB.getParent()->getSubtarget<PPCSubtarget>();
             bool IsLargeModel =
                 Subtarget.getTargetMachine().getCodeModel() == CodeModel::Large;
-            Register ModuleHandleHReg;
             unsigned LDTocOp =
                 Is64Bit ? (IsLargeModel ? PPC::LDtocL : PPC::LDtoc)
                         : (IsLargeModel ? PPC::LWZtocL : PPC::LWZtoc);
+            assert(RegInfo.hasOneDef(MI.getOperand(1).getReg()) &&
+                   "TLSLDAIX expects single def of its operand.");
+            MachineInstr *VarOffsetInst =
+                RegInfo.getOneDef(MI.getOperand(1).getReg())->getParent();
+            assert(VarOffsetInst->getOpcode() == LDTocOp &&
+                   "Unexpected LDTocOp.");
+            if (IsLargeModel) {
+              // Get the ADDIS instruction when using large model.
+              assert(RegInfo.hasOneDef(VarOffsetInst->getOperand(2).getReg()) &&
+                     "LDTocOp expects single def of its operand.");
+              VarOffsetInst =
+                  RegInfo.getOneDef(VarOffsetInst->getOperand(2).getReg())
+                      ->getParent();
+              assert(VarOffsetInst->getOpcode() ==
+                         (Is64Bit ? PPC::ADDIStocHA8 : PPC::ADDIStocHA) &&
+                     "Unexpected ADDIStocHA.");
+              // FIXME: machine-scheduler could schedule ADDIStocHA ahead of
+              // GETtlsMODAIX, and still has to use extra register.
+            }
+            Register ModuleHandleHReg;
             if (IsLargeModel) {
               ModuleHandleHReg = RegInfo.createVirtualRegister(GPRNoZero);
-              BuildMI(MBB, I, DL,
+              BuildMI(MBB, *VarOffsetInst, DL,
                       TII->get(Is64Bit ? PPC::ADDIStocHA8 : PPC::ADDIStocHA),
                       ModuleHandleHReg)
                   .addReg(Subtarget.getTOCPointerRegister())
                   .addExternalSymbol("_$TLSML[TC]", PPCII::MO_TLSLD_FLAG);
             }
             Register MHReg = RegInfo.createVirtualRegister(GPRNoZero);
-            BuildMI(MBB, I, DL, TII->get(LDTocOp), MHReg)
+            BuildMI(MBB, *VarOffsetInst, DL, TII->get(LDTocOp), MHReg)
                 .addExternalSymbol("_$TLSML[TC]", PPCII::MO_TLSLD_FLAG)
                 .addReg(IsLargeModel
                             ? ModuleHandleHReg
                             : Register(Subtarget.getTOCPointerRegister()));
-            BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR3)
+            // The module handle is copied in r3.
+            BuildMI(MBB, *VarOffsetInst, DL, TII->get(TargetOpcode::COPY), GPR3)
                 .addReg(MHReg);
             // The call to .__tls_get_mod.
-            BuildMI(MBB, I, DL, TII->get(Opc2), GPR3).addReg(GPR3);
+            BuildMI(MBB, *VarOffsetInst, DL, TII->get(Opc2), GPR3).addReg(GPR3);
+            // Copy VarOffset to R/X4
+            BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR4)
+                .addReg(MI.getOperand(1).getReg());
             BuildMI(MBB, I, DL, TII->get(Is64Bit ? PPC::ADD8 : PPC::ADD4), GPR3)
                 .addReg(GPR3)
-                .addReg(MI.getOperand(1).getReg());
+                .addReg(GPR4);
           } else {
             // For Global-Dynamic, the variable offset and region handle are
             // copied in r4 and r3. The copies are followed by
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll
index 13bef83cf50fd7..ea1a2b8fe9ade7 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll
@@ -157,10 +157,10 @@ define void @storesTIInit(double %Val) #0 {
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    lwz 6, L..C4(2) # target-flags(ppc-tlsld) @TIInit
-; SMALL32-NEXT:    lwz 3, L..C5(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
+; SMALL32-NEXT:    lwz 3, L..C4(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
-; SMALL32-NEXT:    add 3, 3, 6
+; SMALL32-NEXT:    lwz 4, L..C5(2) # target-flags(ppc-tlsld) @TIInit
+; SMALL32-NEXT:    add 3, 3, 4
 ; SMALL32-NEXT:    stfd 1, 0(3)
 ; SMALL32-NEXT:    addi 1, 1, 32
 ; SMALL32-NEXT:    lwz 0, 8(1)
@@ -173,11 +173,12 @@ define void @storesTIInit(double %Val) #0 {
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    lwz 6, L..C4 at l(3)
-; LARGE32-NEXT:    addis 3, L..C5 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C5 at l(3)
+; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
+; LARGE32-NEXT:    addis 4, L..C5 at u(2)
+; LARGE32-NEXT:    lwz 6, L..C5 at l(4)
 ; LARGE32-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-NEXT:    add 3, 3, 6
+; LARGE32-NEXT:    mr 4, 6
+; LARGE32-NEXT:    add 3, 3, 4
 ; LARGE32-NEXT:    stfd 1, 0(3)
 ; LARGE32-NEXT:    addi 1, 1, 32
 ; LARGE32-NEXT:    lwz 0, 8(1)
@@ -189,10 +190,10 @@ define void @storesTIInit(double %Val) #0 {
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    ld 6, L..C4(2) # target-flags(ppc-tlsld) @TIInit
-; SMALL64-NEXT:    ld 3, L..C5(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
+; SMALL64-NEXT:    ld 3, L..C4(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL64-NEXT:    add 3, 3, 6
+; SMALL64-NEXT:    ld 4, L..C5(2) # target-flags(ppc-tlsld) @TIInit
+; SMALL64-NEXT:    add 3, 3, 4
 ; SMALL64-NEXT:    stfd 1, 0(3)
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -203,13 +204,13 @@ define void @storesTIInit(double %Val) #0 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
-; LARGE64-NEXT:    addis 3, L..C4 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 6, L..C4 at l(3)
-; LARGE64-NEXT:    addis 3, L..C5 at u(2)
-; LARGE64-NEXT:    ld 3, L..C5 at l(3)
+; LARGE64-NEXT:    addis 3, L..C4 at u(2)
+; LARGE64-NEXT:    addis 6, L..C5 at u(2)
+; LARGE64-NEXT:    ld 3, L..C4 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
-; LARGE64-NEXT:    add 3, 3, 6
+; LARGE64-NEXT:    ld 4, L..C5 at l(6)
+; LARGE64-NEXT:    add 3, 3, 4
 ; LARGE64-NEXT:    stfd 1, 0(3)
 ; LARGE64-NEXT:    addi 1, 1, 48
 ; LARGE64-NEXT:    ld 0, 16(1)
@@ -457,11 +458,11 @@ define double @loadsTIInit() #1 {
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    lwz 6, L..C4(2) # target-flags(ppc-tlsld) @TIInit
-; SMALL32-NEXT:    lwz 3, L..C5(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
+; SMALL32-NEXT:    lwz 3, L..C4(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
+; SMALL32-NEXT:    lwz 4, L..C5(2) # target-flags(ppc-tlsld) @TIInit
+; SMALL32-NEXT:    add 3, 3, 4
 ; SMALL32-NEXT:    lwz 4, L..C8(2) # @GInit
-; SMALL32-NEXT:    add 3, 3, 6
 ; SMALL32-NEXT:    lfd 0, 0(3)
 ; SMALL32-NEXT:    lfd 1, 0(4)
 ; SMALL32-NEXT:    fadd 1, 0, 1
@@ -476,11 +477,12 @@ define double @loadsTIInit() #1 {
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    lwz 6, L..C4 at l(3)
-; LARGE32-NEXT:    addis 3, L..C5 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C5 at l(3)
+; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
+; LARGE32-NEXT:    addis 4, L..C5 at u(2)
+; LARGE32-NEXT:    lwz 6, L..C5 at l(4)
 ; LARGE32-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-NEXT:    add 3, 3, 6
+; LARGE32-NEXT:    mr 4, 6
+; LARGE32-NEXT:    add 3, 3, 4
 ; LARGE32-NEXT:    lfd 0, 0(3)
 ; LARGE32-NEXT:    addis 3, L..C8 at u(2)
 ; LARGE32-NEXT:    lwz 3, L..C8 at l(3)
@@ -496,11 +498,11 @@ define double @loadsTIInit() #1 {
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    ld 6, L..C4(2) # target-flags(ppc-tlsld) @TIInit
-; SMALL64-NEXT:    ld 3, L..C5(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
+; SMALL64-NEXT:    ld 3, L..C4(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL64-NEXT:    ld 4, L..C5(2) # target-flags(ppc-tlsld) @TIInit
+; SMALL64-NEXT:    add 3, 3, 4
 ; SMALL64-NEXT:    ld 4, L..C8(2) # @GInit
-; SMALL64-NEXT:    add 3, 3, 6
 ; SMALL64-NEXT:    lfd 0, 0(3)
 ; SMALL64-NEXT:    lfd 1, 0(4)
 ; SMALL64-NEXT:    fadd 1, 0, 1
@@ -513,13 +515,13 @@ define double @loadsTIInit() #1 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
-; LARGE64-NEXT:    addis 3, L..C4 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 6, L..C4 at l(3)
-; LARGE64-NEXT:    addis 3, L..C5 at u(2)
-; LARGE64-NEXT:    ld 3, L..C5 at l(3)
+; LARGE64-NEXT:    addis 3, L..C4 at u(2)
+; LARGE64-NEXT:    addis 6, L..C5 at u(2)
+; LARGE64-NEXT:    ld 3, L..C4 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
-; LARGE64-NEXT:    add 3, 3, 6
+; LARGE64-NEXT:    ld 4, L..C5 at l(6)
+; LARGE64-NEXT:    add 3, 3, 4
 ; LARGE64-NEXT:    addis 4, L..C8 at u(2)
 ; LARGE64-NEXT:    lfd 0, 0(3)
 ; LARGE64-NEXT:    ld 3, L..C8 at l(4)
@@ -641,10 +643,10 @@ entry:
 ; SMALL32-LABEL:  L..C3:
 ; SMALL32-NEXT:   .tc TGInit[TC],TGInit[TL]@gd
 ; SMALL32-LABEL:  L..C4:
-; SMALL32-NEXT:   .tc TIInit[TC],TIInit[TL]@ld
-; SMALL32-LABEL:  L..C5:
 ; SMALL32-NEXT:   .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; SMALL32-NEXT:   .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; SMALL32-LABEL:  L..C5:
+; SMALL32-NEXT:   .tc TIInit[TC],TIInit[TL]@ld
 ; SMALL32-LABEL:  L..C6:
 ; SMALL32-NEXT:   .tc .TWInit[TC],TWInit[TL]@m
 ; SMALL32-LABEL:  L..C7:
@@ -662,10 +664,10 @@ entry:
 ; LARGE32-LABEL:  L..C3:
 ; LARGE32-NEXT:   .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE32-LABEL:  L..C4:
-; LARGE32-NEXT:   .tc TIInit[TE],TIInit[TL]@ld
-; LARGE32-LABEL:  L..C5:
 ; LARGE32-NEXT:   .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; LARGE32-NEXT:   .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; LARGE32-LABEL:  L..C5:
+; LARGE32-NEXT:   .tc TIInit[TE],TIInit[TL]@ld
 ; LARGE32-LABEL:  L..C6:
 ; LARGE32-NEXT:   .tc .TWInit[TE],TWInit[TL]@m
 ; LARGE32-LABEL:  L..C7:
@@ -683,10 +685,10 @@ entry:
 ; SMALL64-LABEL:  L..C3:
 ; SMALL64-NEXT:  .tc TGInit[TC],TGInit[TL]@gd
 ; SMALL64-LABEL:  L..C4:
-; SMALL64-NEXT:  .tc TIInit[TC],TIInit[TL]@ld
-; SMALL64-LABEL:  L..C5:
 ; SMALL64-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; SMALL64-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; SMALL64-LABEL:  L..C5:
+; SMALL64-NEXT:  .tc TIInit[TC],TIInit[TL]@ld
 ; SMALL64-LABEL:  L..C6:
 ; SMALL64-NEXT:  .tc .TWInit[TC],TWInit[TL]@m
 ; SMALL64-LABEL:  L..C7:
@@ -704,10 +706,10 @@ entry:
 ; LARGE64-LABEL:  L..C3:
 ; LARGE64-NEXT:  .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE64-LABEL:  L..C4:
-; LARGE64-NEXT:  .tc TIInit[TE],TIInit[TL]@ld
-; LARGE64-LABEL:  L..C5:
 ; LARGE64-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; LARGE64-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; LARGE64-LABEL:  L..C5:
+; LARGE64-NEXT:  .tc TIInit[TE],TIInit[TL]@ld
 ; LARGE64-LABEL:  L..C6:
 ; LARGE64-NEXT:  .tc .TWInit[TE],TWInit[TL]@m
 ; LARGE64-LABEL:  L..C7:
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll
index dc75db43d38c70..e1cfb6fd767cbf 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll
@@ -165,10 +165,10 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; SMALL32-NEXT:    stwu 1, -32(1)
 ; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    mr 6, 3
-; SMALL32-NEXT:    lwz 7, L..C4(2)
-; SMALL32-NEXT:    lwz 3, L..C5(2)
+; SMALL32-NEXT:    lwz 3, L..C4(2)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
-; SMALL32-NEXT:    add 3, 3, 7
+; SMALL32-NEXT:    lwz 4, L..C5(2)
+; SMALL32-NEXT:    add 3, 3, 4
 ; SMALL32-NEXT:    stw 6, 0(3)
 ; SMALL32-NEXT:    addi 1, 1, 32
 ; SMALL32-NEXT:    lwz 0, 8(1)
@@ -182,11 +182,12 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    mr 6, 3
 ; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    lwz 7, L..C4 at l(3)
-; LARGE32-NEXT:    addis 3, L..C5 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C5 at l(3)
+; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
+; LARGE32-NEXT:    addis 4, L..C5 at u(2)
+; LARGE32-NEXT:    lwz 7, L..C5 at l(4)
 ; LARGE32-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-NEXT:    add 3, 3, 7
+; LARGE32-NEXT:    mr 4, 7
+; LARGE32-NEXT:    add 3, 3, 4
 ; LARGE32-NEXT:    stw 6, 0(3)
 ; LARGE32-NEXT:    addi 1, 1, 32
 ; LARGE32-NEXT:    lwz 0, 8(1)
@@ -199,10 +200,10 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; SMALL64-NEXT:    stdu 1, -48(1)
 ; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    mr 6, 3
-; SMALL64-NEXT:    ld 7, L..C4(2)
-; SMALL64-NEXT:    ld 3, L..C5(2)
+; SMALL64-NEXT:    ld 3, L..C4(2)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL64-NEXT:    add 3, 3, 7
+; SMALL64-NEXT:    ld 4, L..C5(2)
+; SMALL64-NEXT:    add 3, 3, 4
 ; SMALL64-NEXT:    stw 6, 0(3)
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -213,14 +214,14 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    mr 6, 3
 ; LARGE64-NEXT:    addis 3, L..C4 at u(2)
-; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 7, L..C4 at l(3)
-; LARGE64-NEXT:    addis 3, L..C5 at u(2)
-; LARGE64-NEXT:    ld 3, L..C5 at l(3)
+; LARGE64-NEXT:    addis 7, L..C5 at u(2)
+; LARGE64-NEXT:    ld 3, L..C4 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
-; LARGE64-NEXT:    add 3, 3, 7
+; LARGE64-NEXT:    ld 4, L..C5 at l(7)
+; LARGE64-NEXT:    add 3, 3, 4
 ; LARGE64-NEXT:    stw 6, 0(3)
 ; LARGE64-NEXT:    addi 1, 1, 48
 ; LARGE64-NEXT:    ld 0, 16(1)
@@ -472,11 +473,11 @@ define i32 @loadsTIUninit() #1 {
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    lwz 6, L..C4(2)
-; SMALL32-NEXT:    lwz 3, L..C5(2)
+; SMALL32-NEXT:    lwz 3, L..C4(2)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
+; SMALL32-NEXT:    lwz 4, L..C5(2)
+; SMALL32-NEXT:    add 3, 3, 4
 ; SMALL32-NEXT:    lwz 4, L..C8(2)
-; SMALL32-NEXT:    add 3, 3, 6
 ; SMALL32-NEXT:    lwz 3, 0(3)
 ; SMALL32-NEXT:    lwz 4, 0(4)
 ; SMALL32-NEXT:    add 3, 4, 3
@@ -491,11 +492,12 @@ define i32 @loadsTIUninit() #1 {
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    lwz 6, L..C4 at l(3)
-; LARGE32-NEXT:    addis 3, L..C5 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C5 at l(3)
+; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
+; LARGE32-NEXT:    addis 4, L..C5 at u(2)
+; LARGE32-NEXT:    lwz 6, L..C5 at l(4)
 ; LARGE32-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-NEXT:    add 3, 3, 6
+; LARGE32-NEXT:    mr 4, 6
+; LARGE32-NEXT:    add 3, 3, 4
 ; LARGE32-NEXT:    lwz 3, 0(3)
 ; LARGE32-NEXT:    addis 4, L..C8 at u(2)
 ; LARGE32-NEXT:    lwz 4, L..C8 at l(4)
@@ -511,11 +513,11 @@ define i32 @loadsTIUninit() #1 {
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    ld 6, L..C4(2)
-; SMALL64-NEXT:    ld 3, L..C5(2)
+; SMALL64-NEXT:    ld 3, L..C4(2)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL64-NEXT:    ld 4, L..C5(2)
+; SMALL64-NEXT:    add 3, 3, 4
 ; SMALL64-NEXT:    ld 4, L..C8(2)
-; SMALL64-NEXT:    add 3, 3, 6
 ; SMALL64-NEXT:    lwz 3, 0(3)
 ; SMALL64-NEXT:    lwz 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
@@ -528,14 +530,14 @@ define i32 @loadsTIUninit() #1 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
-; LARGE64-NEXT:    addis 3, L..C4 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 6, L..C4 at l(3)
-; LARGE64-NEXT:    addis 3, L..C5 at u(2)
-; LARGE64-NEXT:    ld 3, L..C5 at l(3)
+; LARGE64-NEXT:    addis 3, L..C4 at u(2)
+; LARGE64-NEXT:    addis 6, L..C5 at u(2)
+; LARGE64-NEXT:    ld 3, L..C4 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-NEXT:    ld 4, L..C5 at l(6)
+; LARGE64-NEXT:    add 3, 3, 4
 ; LARGE64-NEXT:    addis 4, L..C8 at u(2)
-; LARGE64-NEXT:    add 3, 3, 6
 ; LARGE64-NEXT:    ld 4, L..C8 at l(4)
 ; LARGE64-NEXT:    lwz 3, 0(3)
 ; LARGE64-NEXT:    lwz 4, 0(4)
@@ -656,10 +658,10 @@ entry:
 ; SMALL32-LABEL: L..C3:
 ; SMALL32-NEXT:	 .tc TGInit[TC],TGInit[TL]@gd
 ; SMALL32-LABEL: L..C4:
-; SMALL32-NEXT:	 .tc TIUninit[TC],TIUninit[UL]@ld
-; SMALL32-LABEL: L..C5:
 ; SMALL32-NEXT:	 .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; SMALL32-NEXT:	 .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; SMALL32-LABEL: L..C5:
+; SMALL32-NEXT:	 .tc TIUninit[TC],TIUninit[UL]@ld
 ; SMALL32-LABEL: L..C6:
 ; SMALL32-NEXT:	 .tc .TWUninit[TC],TWUninit[TL]@m
 ; SMALL32-LABEL: L..C7:
@@ -677,10 +679,10 @@ entry:
 ; LARGE32-LABEL: L..C3:
 ; LARGE32-NEXT:  .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE32-LABEL: L..C4:
-; LARGE32-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
-; LARGE32-LABEL: L..C5:
 ; LARGE32-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; LARGE32-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; LARGE32-LABEL: L..C5:
+; LARGE32-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
 ; LARGE32-LABEL: L..C6:
 ; LARGE32-NEXT:  .tc .TWUninit[TE],TWUninit[TL]@m
 ; LARGE32-LABEL: L..C7:
@@ -698,10 +700,10 @@ entry:
 ; SMALL64-LABEL:  L..C3:
 ; SMALL64-NEXT:   .tc TGInit[TC],TGInit[TL]@gd
 ; SMALL64-LABEL:  L..C4:
-; SMALL64-NEXT:   .tc TIUninit[TC],TIUninit[UL]@ld
-; SMALL64-LABEL:  L..C5:
 ; SMALL64-NEXT:   .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; SMALL64-NEXT:   .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; SMALL64-LABEL:  L..C5:
+; SMALL64-NEXT:   .tc TIUninit[TC],TIUninit[UL]@ld
 ; SMALL64-LABEL:  L..C6:
 ; SMALL64-NEXT:   .tc .TWUninit[TC],TWUninit[TL]@m
 ; SMALL64-LABEL:  L..C7:
@@ -719,10 +721,10 @@ entry:
 ; LARGE64-LABEL:  L..C3:
 ; LARGE64-NEXT:  .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE64-LABEL:  L..C4:
-; LARGE64-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
-; LARGE64-LABEL:  L..C5:
 ; LARGE64-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; LARGE64-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; LARGE64-LABEL:  L..C5:
+; LARGE64-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
 ; LARGE64-LABEL:  L..C6:
 ; LARGE64-NEXT:  .tc .TWUninit[TE],TWUninit[TL]@m
 ; LARGE64-LABEL:  L..C7:
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll
index d19b5ad9b5e50a..2e18d6c9bfd7e5 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll
@@ -99,11 +99,11 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; SMALL32-NEXT:    stwu 1, -32(1)
 ; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    mr 7, 3
-; SMALL32-NEXT:    lwz 8, L..C2(2)
 ; SMALL32-NEXT:    mr 6, 4
-; SMALL32-NEXT:    lwz 3, L..C3(2)
+; SMALL32-NEXT:    lwz 3, L..C2(2)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
-; SMALL32-NEXT:    add 3, 3, 8
+; SMALL32-NEXT:    lwz 4, L..C3(2)
+; SMALL32-NEXT:    add 3, 3, 4
 ; SMALL32-NEXT:    stw 6, 4(3)
 ; SMALL32-NEXT:    stw 7, 0(3)
 ; SMALL32-NEXT:    addi 1, 1, 32
@@ -119,11 +119,12 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; LARGE32-NEXT:    mr 7, 3
 ; LARGE32-NEXT:    mr 6, 4
 ; LARGE32-NEXT:    addis 3, L..C2 at u(2)
-; LARGE32-NEXT:    lwz 8, L..C2 at l(3)
-; LARGE32-NEXT:    addis 3, L..C3 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C3 at l(3)
+; LARGE32-NEXT:    lwz 3, L..C2 at l(3)
+; LARGE32-NEXT:    addis 4, L..C3 at u(2)
+; LARGE32-NEXT:    lwz 8, L..C3 at l(4)
 ; LARGE32-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-NEXT:    add 3, 3, 8
+; LARGE32-NEXT:    mr 4, 8
+; LARGE32-NEXT:    add 3, 3, 4
 ; LARGE32-NEXT:    stw 6, 4(3)
 ; LARGE32-NEXT:    stw 7, 0(3)
 ; LARGE32-NEXT:    addi 1, 1, 32
@@ -137,10 +138,10 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; SMALL64-NEXT:    stdu 1, -48(1)
 ; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    mr 6, 3
-; SMALL64-NEXT:    ld 7, L..C2(2)
-; SMALL64-NEXT:    ld 3, L..C3(2)
+; SMALL64-NEXT:    ld 3, L..C2(2)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL64-NEXT:    add 3, 3, 7
+; SMALL64-NEXT:    ld 4, L..C3(2)
+; SMALL64-NEXT:    add 3, 3, 4
 ; SMALL64-NEXT:    std 6, 0(3)
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -151,14 +152,14 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    mr 6, 3
 ; LARGE64-NEXT:    addis 3, L..C2 at u(2)
-; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 7, L..C2 at l(3)
-; LARGE64-NEXT:    addis 3, L..C3 at u(2)
-; LARGE64-NEXT:    ld 3, L..C3 at l(3)
+; LARGE64-NEXT:    addis 7, L..C3 at u(2)
+; LARGE64-NEXT:    ld 3, L..C2 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
-; LARGE64-NEXT:    add 3, 3, 7
+; LARGE64-NEXT:    ld 4, L..C3 at l(7)
+; LARGE64-NEXT:    add 3, 3, 4
 ; LARGE64-NEXT:    std 6, 0(3)
 ; LARGE64-NEXT:    addi 1, 1, 48
 ; LARGE64-NEXT:    ld 0, 16(1)
@@ -177,11 +178,11 @@ define void @storesTIInit(i64 %Val) #0 {
 ; SMALL32-NEXT:    stwu 1, -32(1)
 ; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    mr 7, 3
-; SMALL32-NEXT:    lwz 8, L..C4(2)
 ; SMALL32-NEXT:    mr 6, 4
-; SMALL32-NEXT:    lwz 3, L..C3(2)
+; SMALL32-NEXT:    lwz 3, L..C2(2)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
-; SMALL32-NEXT:    add 3, 3, 8
+; SMALL32-NEXT:    lwz 4, L..C4(2)
+; SMALL32-NEXT:    add 3, 3, 4
 ; SMALL32-NEXT:    stw 6, 4(3)
 ; SMALL32-NEXT:    stw 7, 0(3)
 ; SMALL32-NEXT:    addi 1, 1, 32
@@ -196,12 +197,13 @@ define void @storesTIInit(i64 %Val) #0 {
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    mr 7, 3
 ; LARGE32-NEXT:    mr 6, 4
-; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    lwz 8, L..C4 at l(3)
-; LARGE32-NEXT:    addis 3, L..C3 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C3 at l(3)
+; LARGE32-NEXT:    addis 3, L..C2 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C2 at l(3)
+; LARGE32-NEXT:    addis 4, L..C4 at u(2)
+; LARGE32-NEXT:    lwz 8, L..C4 at l(4)
 ; LARGE32-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-NEXT:    add 3, 3, 8
+; LARGE32-NEXT:    mr 4, 8
+; LARGE32-NEXT:    add 3, 3, 4
 ; LARGE32-NEXT:    stw 6, 4(3)
 ; LARGE32-NEXT:    stw 7, 0(3)
 ; LARGE32-NEXT:    addi 1, 1, 32
@@ -215,10 +217,10 @@ define void @storesTIInit(i64 %Val) #0 {
 ; SMALL64-NEXT:    stdu 1, -48(1)
 ; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    mr 6, 3
-; SMALL64-NEXT:    ld 7, L..C4(2)
-; SMALL64-NEXT:    ld 3, L..C3(2)
+; SMALL64-NEXT:    ld 3, L..C2(2)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL64-NEXT:    add 3, 3, 7
+; SMALL64-NEXT:    ld 4, L..C4(2)
+; SMALL64-NEXT:    add 3, 3, 4
 ; SMALL64-NEXT:    std 6, 0(3)
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -229,14 +231,14 @@ define void @storesTIInit(i64 %Val) #0 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
-; LARGE64-NEXT:    mr 6, 3
-; LARGE64-NEXT:    addis 3, L..C4 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 7, L..C4 at l(3)
-; LARGE64-NEXT:    addis 3, L..C3 at u(2)
-; LARGE64-NEXT:    ld 3, L..C3 at l(3)
+; LARGE64-NEXT:    mr 6, 3
+; LARGE64-NEXT:    addis 3, L..C2 at u(2)
+; LARGE64-NEXT:    addis 7, L..C4 at u(2)
+; LARGE64-NEXT:    ld 3, L..C2 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
-; LARGE64-NEXT:    add 3, 3, 7
+; LARGE64-NEXT:    ld 4, L..C4 at l(7)
+; LARGE64-NEXT:    add 3, 3, 4
 ; LARGE64-NEXT:    std 6, 0(3)
 ; LARGE64-NEXT:    addi 1, 1, 48
 ; LARGE64-NEXT:    ld 0, 16(1)
@@ -331,7 +333,7 @@ define i64 @loadsTGInit() #1 {
 ; SMALL32-NEXT:    lwz 4, L..C1(2)
 ; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_addr[PR]
-; SMALL32-NEXT:    lwz 4, L..C7(2)
+; SMALL32-NEXT:    lwz 4, L..C7(2) # @GInit
 ; SMALL32-NEXT:    lwz 5, 4(3)
 ; SMALL32-NEXT:    lwz 6, 4(4)
 ; SMALL32-NEXT:    lwz 3, 0(3)
@@ -374,7 +376,7 @@ define i64 @loadsTGInit() #1 {
 ; SMALL64-NEXT:    ld 4, L..C1(2)
 ; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_addr[PR]
-; SMALL64-NEXT:    ld 4, L..C7(2)
+; SMALL64-NEXT:    ld 4, L..C7(2) # @GInit
 ; SMALL64-NEXT:    ld 3, 0(3)
 ; SMALL64-NEXT:    ld 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
@@ -416,11 +418,11 @@ define i64 @loadsTIUninit() #1 {
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    lwz 6, L..C2(2)
-; SMALL32-NEXT:    lwz 3, L..C3(2)
+; SMALL32-NEXT:    lwz 3, L..C2(2)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
-; SMALL32-NEXT:    lwz 4, L..C7(2)
-; SMALL32-NEXT:    add 3, 3, 6
+; SMALL32-NEXT:    lwz 4, L..C3(2)
+; SMALL32-NEXT:    add 3, 3, 4
+; SMALL32-NEXT:    lwz 4, L..C7(2) # @GInit
 ; SMALL32-NEXT:    lwz 5, 4(3)
 ; SMALL32-NEXT:    lwz 6, 4(4)
 ; SMALL32-NEXT:    lwz 3, 0(3)
@@ -438,11 +440,12 @@ define i64 @loadsTIUninit() #1 {
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    addis 3, L..C2 at u(2)
-; LARGE32-NEXT:    lwz 6, L..C2 at l(3)
-; LARGE32-NEXT:    addis 3, L..C3 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C3 at l(3)
+; LARGE32-NEXT:    lwz 3, L..C2 at l(3)
+; LARGE32-NEXT:    addis 4, L..C3 at u(2)
+; LARGE32-NEXT:    lwz 6, L..C3 at l(4)
 ; LARGE32-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-NEXT:    add 3, 3, 6
+; LARGE32-NEXT:    mr 4, 6
+; LARGE32-NEXT:    add 3, 3, 4
 ; LARGE32-NEXT:    lwz 4, 4(3)
 ; LARGE32-NEXT:    lwz 3, 0(3)
 ; LARGE32-NEXT:    addis 5, L..C7 at u(2)
@@ -461,11 +464,11 @@ define i64 @loadsTIUninit() #1 {
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    ld 6, L..C2(2)
-; SMALL64-NEXT:    ld 3, L..C3(2)
+; SMALL64-NEXT:    ld 3, L..C2(2)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL64-NEXT:    ld 4, L..C7(2)
-; SMALL64-NEXT:    add 3, 3, 6
+; SMALL64-NEXT:    ld 4, L..C3(2)
+; SMALL64-NEXT:    add 3, 3, 4
+; SMALL64-NEXT:    ld 4, L..C7(2) # @GInit
 ; SMALL64-NEXT:    ld 3, 0(3)
 ; SMALL64-NEXT:    ld 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
@@ -478,14 +481,14 @@ define i64 @loadsTIUninit() #1 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
-; LARGE64-NEXT:    addis 3, L..C2 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 6, L..C2 at l(3)
-; LARGE64-NEXT:    addis 3, L..C3 at u(2)
-; LARGE64-NEXT:    ld 3, L..C3 at l(3)
+; LARGE64-NEXT:    addis 3, L..C2 at u(2)
+; LARGE64-NEXT:    addis 6, L..C3 at u(2)
+; LARGE64-NEXT:    ld 3, L..C2 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-NEXT:    ld 4, L..C3 at l(6)
+; LARGE64-NEXT:    add 3, 3, 4
 ; LARGE64-NEXT:    addis 4, L..C7 at u(2)
-; LARGE64-NEXT:    add 3, 3, 6
 ; LARGE64-NEXT:    ld 4, L..C7 at l(4)
 ; LARGE64-NEXT:    ld 3, 0(3)
 ; LARGE64-NEXT:    ld 4, 0(4)
@@ -508,11 +511,11 @@ define i64 @loadsTIInit() #1 {
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    lwz 6, L..C4(2)
-; SMALL32-NEXT:    lwz 3, L..C3(2)
+; SMALL32-NEXT:    lwz 3, L..C2(2)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
-; SMALL32-NEXT:    lwz 4, L..C7(2)
-; SMALL32-NEXT:    add 3, 3, 6
+; SMALL32-NEXT:    lwz 4, L..C4(2)
+; SMALL32-NEXT:    add 3, 3, 4
+; SMALL32-NEXT:    lwz 4, L..C7(2) # @GInit
 ; SMALL32-NEXT:    lwz 5, 4(3)
 ; SMALL32-NEXT:    lwz 6, 4(4)
 ; SMALL32-NEXT:    lwz 3, 0(3)
@@ -529,12 +532,13 @@ define i64 @loadsTIInit() #1 {
 ; LARGE32-NEXT:    mflr 0
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
-; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    lwz 6, L..C4 at l(3)
-; LARGE32-NEXT:    addis 3, L..C3 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C3 at l(3)
+; LARGE32-NEXT:    addis 3, L..C2 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C2 at l(3)
+; LARGE32-NEXT:    addis 4, L..C4 at u(2)
+; LARGE32-NEXT:    lwz 6, L..C4 at l(4)
 ; LARGE32-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-NEXT:    add 3, 3, 6
+; LARGE32-NEXT:    mr 4, 6
+; LARGE32-NEXT:    add 3, 3, 4
 ; LARGE32-NEXT:    lwz 4, 4(3)
 ; LARGE32-NEXT:    lwz 3, 0(3)
 ; LARGE32-NEXT:    addis 5, L..C7 at u(2)
@@ -553,11 +557,11 @@ define i64 @loadsTIInit() #1 {
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    ld 6, L..C4(2)
-; SMALL64-NEXT:    ld 3, L..C3(2)
+; SMALL64-NEXT:    ld 3, L..C2(2)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL64-NEXT:    ld 4, L..C7(2)
-; SMALL64-NEXT:    add 3, 3, 6
+; SMALL64-NEXT:    ld 4, L..C4(2)
+; SMALL64-NEXT:    add 3, 3, 4
+; SMALL64-NEXT:    ld 4, L..C7(2) # @GInit
 ; SMALL64-NEXT:    ld 3, 0(3)
 ; SMALL64-NEXT:    ld 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
@@ -570,14 +574,14 @@ define i64 @loadsTIInit() #1 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
-; LARGE64-NEXT:    addis 3, L..C4 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 6, L..C4 at l(3)
-; LARGE64-NEXT:    addis 3, L..C3 at u(2)
-; LARGE64-NEXT:    ld 3, L..C3 at l(3)
+; LARGE64-NEXT:    addis 3, L..C2 at u(2)
+; LARGE64-NEXT:    addis 6, L..C4 at u(2)
+; LARGE64-NEXT:    ld 3, L..C2 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-NEXT:    ld 4, L..C4 at l(6)
+; LARGE64-NEXT:    add 3, 3, 4
 ; LARGE64-NEXT:    addis 4, L..C7 at u(2)
-; LARGE64-NEXT:    add 3, 3, 6
 ; LARGE64-NEXT:    ld 4, L..C7 at l(4)
 ; LARGE64-NEXT:    ld 3, 0(3)
 ; LARGE64-NEXT:    ld 4, 0(4)
@@ -603,7 +607,7 @@ define i64 @loadsTWInit() #1 {
 ; SMALL32-NEXT:    lwz 4, L..C6(2)
 ; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_addr[PR]
-; SMALL32-NEXT:    lwz 4, L..C7(2)
+; SMALL32-NEXT:    lwz 4, L..C7(2) # @GInit
 ; SMALL32-NEXT:    lwz 5, 4(3)
 ; SMALL32-NEXT:    lwz 6, 4(4)
 ; SMALL32-NEXT:    lwz 3, 0(3)
@@ -646,7 +650,7 @@ define i64 @loadsTWInit() #1 {
 ; SMALL64-NEXT:    ld 4, L..C6(2)
 ; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_addr[PR]
-; SMALL64-NEXT:    ld 4, L..C7(2)
+; SMALL64-NEXT:    ld 4, L..C7(2) # @GInit
 ; SMALL64-NEXT:    ld 3, 0(3)
 ; SMALL64-NEXT:    ld 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
@@ -700,10 +704,10 @@ entry:
 ; SMALL32-LABEL:  L..C1:
 ; SMALL32-NEXT:  .tc TGInit[TC],TGInit[TL]@gd
 ; SMALL32-LABEL:  L..C2:
-; SMALL32-NEXT:  .tc TIUninit[TC],TIUninit[UL]@ld
-; SMALL32-LABEL:  L..C3:
 ; SMALL32-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; SMALL32-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; SMALL32-LABEL:  L..C3:
+; SMALL32-NEXT:  .tc TIUninit[TC],TIUninit[UL]@ld
 ; SMALL32-LABEL:  L..C4:
 ; SMALL32-NEXT:  .tc TIInit[TC],TIInit[TL]@ld
 ; SMALL32-LABEL:  L..C5:
@@ -719,10 +723,10 @@ entry:
 ; LARGE32-LABEL:  L..C1:
 ; LARGE32-NEXT:  .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE32-LABEL:  L..C2:
-; LARGE32-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
-; LARGE32-LABEL:  L..C3:
 ; LARGE32-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; LARGE32-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; LARGE32-LABEL:  L..C3:
+; LARGE32-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
 ; LARGE32-LABEL:  L..C4:
 ; LARGE32-NEXT:  .tc TIInit[TE],TIInit[TL]@ld
 ; LARGE32-LABEL:  L..C5:
@@ -738,10 +742,10 @@ entry:
 ; SMALL64-LABEL:  L..C1:
 ; SMALL64-NEXT:  .tc TGInit[TC],TGInit[TL]@gd
 ; SMALL64-LABEL:  L..C2:
-; SMALL64-NEXT:  .tc TIUninit[TC],TIUninit[UL]@ld
-; SMALL64-LABEL:  L..C3:
 ; SMALL64-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; SMALL64-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; SMALL64-LABEL:  L..C3:
+; SMALL64-NEXT:  .tc TIUninit[TC],TIUninit[UL]@ld
 ; SMALL64-LABEL:  L..C4:
 ; SMALL64-NEXT:  .tc TIInit[TC],TIInit[TL]@ld
 ; SMALL64-LABEL:  L..C5:
@@ -757,10 +761,10 @@ entry:
 ; LARGE64-LABEL:  L..C1:
 ; LARGE64-NEXT:  .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE64-LABEL:  L..C2:
-; LARGE64-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
-; LARGE64-LABEL:  L..C3:
 ; LARGE64-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; LARGE64-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; LARGE64-LABEL:  L..C3:
+; LARGE64-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
 ; LARGE64-LABEL:  L..C4:
 ; LARGE64-NEXT:  .tc TIInit[TE],TIInit[TL]@ld
 ; LARGE64-LABEL:  L..C5:
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll b/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll
index 9faa99b6eb2ba7..0513f8de616201 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll
@@ -16,22 +16,22 @@
 
 define i32 @loadTGInit() {
 ; SMALL-LABEL:  loadTGInit:
-; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
-; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
 ; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL:        bla .__tls_get_mod[PR]
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
 ; SMALL:        add [[TGInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; SMALL:        lwz [[TGInitValR:[0-9]+]], 0([[TGInitAddrR]])
 ;
 ; LARGE-LABEL:  loadTGInit:
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@u(2)
-; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        add [[TGInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; LARGE:        lwz [[TGInitValR:[0-9]+]], 0([[TGInitAddrR]])
 entry:
@@ -42,22 +42,22 @@ entry:
 
 define void @storeTGInit(i32 noundef signext %i) {
 ; SMALL-LABEL:  storeTGInit:
-; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
-; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
 ; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL:        bla .__tls_get_mod[PR]
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
 ; SMALL:        add [[TGInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; SMALL:        stw [[TGInitValR:[0-9]+]], 0([[TGInitAddrR]])
 ;
 ; LARGE-LABEL:  storeTGInit:
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@u(2)
-; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        add [[TGInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; LARGE:        stw [[TGInitValR:[0-9]+]], 0([[TGInitAddrR]])
 entry:
@@ -68,22 +68,22 @@ entry:
 
 define i32 @loadTGUninit() {
 ; SMALL-LABEL:  loadTGUninit:
-; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
-; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
 ; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL:        bla .__tls_get_mod[PR]
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
 ; SMALL:        add [[TGUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; SMALL:        lwz [[TGUninitValR:[0-9]+]], 0([[TGUninitAddrR]])
 ;
 ; LARGE-LABEL:  loadTGUninit:
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@u(2)
-; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        add [[TGUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; LARGE:        lwz [[TGUninitValR:[0-9]+]], 0([[TGUninitAddrR]])
 entry:
@@ -94,22 +94,22 @@ entry:
 
 define void @storeTGUninit(i32 noundef signext %i) {
 ; SMALL-LABEL:  storeTGUninit:
-; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
-; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
 ; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL:        bla .__tls_get_mod[PR]
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
 ; SMALL:        add [[TGUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; SMALL:        stw [[TGUninitValR:[0-9]+]], 0([[TGUninitAddrR]])
 ;
 ; LARGE-LABEL:  storeTGUninit:
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@u(2)
-; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        add [[TGUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; LARGE:        stw [[TGUninitValR:[0-9]+]], 0([[TGUninitAddrR]])
 entry:
@@ -120,22 +120,22 @@ entry:
 
 define i32 @loadTIInit() {
 ; SMALL-LABEL:  loadTIInit:
-; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
-; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
 ; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL:        bla .__tls_get_mod[PR]
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
 ; SMALL:        add [[TIInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; SMALL:        lwz [[TIInitValR:[0-9]+]], 0([[TIInitAddrR]])
 ;
 ; LARGE-LABEL:  loadTIInit:
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@u(2)
-; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        add [[TIInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; LARGE:        lwz [[TIInitValR:[0-9]+]], 0([[TIInitAddrR]])
 entry:
@@ -146,22 +146,22 @@ entry:
 
 define void @storeTIInit(i32 noundef signext %i) {
 ; SMALL-LABEL:  storeTIInit:
-; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
-; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
 ; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL:        bla .__tls_get_mod[PR]
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
 ; SMALL:        add [[TIInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; SMALL:        stw [[TIInitValR:[0-9]+]], 0([[TIInitAddrR]])
 ;
 ; LARGE-LABEL:  storeTIInit:
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@u(2)
-; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        add [[TIInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; LARGE:        stw [[TIInitValR:[0-9]+]], 0([[TIInitAddrR]])
 entry:
@@ -172,22 +172,22 @@ entry:
 
 define i32 @loadTIUninit() {
 ; SMALL-LABEL:  loadTIUninit:
-; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
-; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
 ; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL:        bla .__tls_get_mod[PR]
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
 ; SMALL:        add [[TIUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; SMALL:        lwz [[TIUninitValR:[0-9]+]], 0([[TIUninitAddrR]])
 ;
 ; LARGE-LABEL:  loadTIUninit:
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@u(2)
-; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        add [[TIUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; LARGE:        lwz [[TIUninitValR:[0-9]+]], 0([[TIUninitAddrR]])
 entry:
@@ -198,22 +198,22 @@ entry:
 
 define void @storeTIUninit(i32 noundef signext %i) {
 ; SMALL-LABEL:  storeTIUninit:
-; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
-; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
 ; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL:        bla .__tls_get_mod[PR]
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
 ; SMALL:        add [[TIUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; SMALL:        stw [[TIUninitValR:[0-9]+]], 0([[TIUninitAddrR]])
 ;
 ; LARGE-LABEL:  storeTIUninit:
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@u(2)
-; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        add [[TIUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; LARGE:        stw [[TIUninitValR:[0-9]+]], 0([[TIUninitAddrR]])
 entry:
@@ -224,22 +224,22 @@ entry:
 
 define i32 @loadTWInit() {
 ; SMALL-LABEL:  loadTWInit:
-; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
-; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
 ; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL:        bla .__tls_get_mod[PR]
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
 ; SMALL:        add [[TWInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; SMALL:        lwz [[TWInitValR:[0-9]+]], 0([[TWInitAddrR]])
 ;
 ; LARGE-LABEL:  loadTWInit:
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@u(2)
-; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        add [[TWInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; LARGE:        lwz [[TWInitValR:[0-9]+]], 0([[TWInitAddrR]])
 entry:
@@ -250,22 +250,22 @@ entry:
 
 define void @storeTWInit(i32 noundef signext %i) {
 ; SMALL-LABEL:  storeTWInit:
-; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
-; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
 ; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL:        bla .__tls_get_mod[PR]
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
 ; SMALL:        add [[TWInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; SMALL:        stw [[TWInitValR:[0-9]+]], 0([[TWInitAddrR]])
 ;
 ; LARGE-LABEL:  storeTWInit:
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@u(2)
-; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        add [[TWInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; LARGE:        stw [[TWInitValR:[0-9]+]], 0([[TWInitAddrR]])
 entry:
@@ -276,22 +276,22 @@ entry:
 
 define i32 @loadTWUninit() {
 ; SMALL-LABEL:  loadTWUninit:
-; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
-; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
 ; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL:        bla .__tls_get_mod[PR]
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
 ; SMALL:        add [[TWUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; SMALL:        lwz [[TWUninitValR:[0-9]+]], 0([[TWUninitAddrR]])
 ;
 ; LARGE-LABEL:  loadTWUninit:
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@u(2)
-; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        add [[TWUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; LARGE:        lwz [[TWUninitValR:[0-9]+]], 0([[TWUninitAddrR]])
 entry:
@@ -302,22 +302,22 @@ entry:
 
 define void @storeTWUninit(i32 noundef signext %i) {
 ; SMALL-LABEL:  storeTWUninit:
-; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
-; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
 ; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL:        bla .__tls_get_mod[PR]
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
 ; SMALL:        add [[TWUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; SMALL:        stw [[TWUninitValR:[0-9]+]], 0([[TWUninitAddrR]])
 ;
 ; LARGE-LABEL:  storeTWUninit:
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@u(2)
-; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        add [[TWUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; LARGE:        stw [[TWUninitValR:[0-9]+]], 0([[TWUninitAddrR]])
 entry:
@@ -329,11 +329,11 @@ entry:
 ; SMALL:          .extern .__tls_get_mod[PR]
 ; LARGE:          .extern .__tls_get_mod[PR]
 
-; SMALL:        [[TGInitL]]:
-; SMALL-NEXT:   .tc TGInit[TC],TGInit[TL]@ld
 ; SMALL:        [[ModuleHandleL]]:
 ; SMALL-NEXT:   .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; SMALL-NEXT:   .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; SMALL:        [[TGInitL]]:
+; SMALL-NEXT:   .tc TGInit[TC],TGInit[TL]@ld
 ; SMALL:        [[TGUninitL]]:
 ; SMALL-NEXT:   .tc TGUninit[TC],TGUninit[TL]@ld
 ; SMALL:        [[TIInitL]]:
@@ -345,11 +345,11 @@ entry:
 ; SMALL:        [[TWUninitL]]:
 ; SMALL-NEXT:   .tc TWUninit[TC],TWUninit[TL]@ld
 
-; LARGE:        [[TGInitL]]:
-; LARGE-NEXT:   .tc TGInit[TE],TGInit[TL]@ld
 ; LARGE:        [[ModuleHandleL]]:
 ; LARGE-NEXT:   .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; LARGE-NEXT:   .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; LARGE:        [[TGInitL]]:
+; LARGE-NEXT:   .tc TGInit[TE],TGInit[TL]@ld
 ; LARGE:        [[TGUninitL]]:
 ; LARGE-NEXT:   .tc TGUninit[TE],TGUninit[TL]@ld
 ; LARGE:        [[TIInitL]]:
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll
index 3e9f28965bbe3d..6eb1b325e9806d 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll
@@ -33,7 +33,7 @@ entry:
 ; RELOC-NEXT:   Section (index: 1) .text {
 ; RELOC-NEXT:   Relocation {
 ; RELOC-NEXT:     Virtual Address: 0x16
-; RELOC-NEXT:     Symbol: TIInit (19)
+; RELOC-NEXT:     Symbol: _$TLSML (19)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
@@ -41,7 +41,7 @@ entry:
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
 ; RELOC-NEXT:     Virtual Address: 0x1A
-; RELOC-NEXT:     Symbol: TIInit (19)
+; RELOC-NEXT:     Symbol: _$TLSML (19)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
@@ -49,7 +49,7 @@ entry:
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
 ; RELOC-NEXT:     Virtual Address: 0x1E
-; RELOC-NEXT:     Symbol: _$TLSML (21)
+; RELOC-NEXT:     Symbol: TIInit (21)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
@@ -57,7 +57,7 @@ entry:
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
 ; RELOC-NEXT:     Virtual Address: 0x22
-; RELOC-NEXT:     Symbol: _$TLSML (21)
+; RELOC-NEXT:     Symbol: TIInit (21)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
@@ -163,19 +163,19 @@ entry:
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
 ; RELOC-NEXT:   Virtual Address: 0xB8
-; RELOC-NEXT:   Symbol: TIInit (29)
+; RELOC-NEXT:   Symbol: _$TLSML (19)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
-; RELOC-NEXT:   Type: R_TLS_LD (0x22)
+; RELOC-NEXT:   Type: R_TLSML (0x25)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
 ; RELOC-NEXT:   Virtual Address: 0xBC
-; RELOC-NEXT:   Symbol: _$TLSML (21)
+; RELOC-NEXT:   Symbol: TIInit (29)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
-; RELOC-NEXT:   Type: R_TLSML (0x25)
+; RELOC-NEXT:   Type: R_TLS_LD (0x22)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
 ; RELOC-NEXT:   Virtual Address: 0xC0
@@ -401,7 +401,7 @@ entry:
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 19
-; SYM-NEXT:     Name: TIInit
+; SYM-NEXT:     Name: _$TLSML
 ; SYM-NEXT:     Value (RelocatableAddress): 0xB8
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
@@ -414,14 +414,14 @@ entry:
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 2
 ; SYM-NEXT:       SymbolType: XTY_SD (0x1)
-; SYM-NEXT:       StorageMappingClass: XMC_TE (0x16)
+; SYM-NEXT:       StorageMappingClass: XMC_TC (0x3)
 ; SYM-NEXT:       StabInfoIndex: 0x0
 ; SYM-NEXT:       StabSectNum: 0x0
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 21
-; SYM-NEXT:     Name: _$TLSML
+; SYM-NEXT:     Name: TIInit
 ; SYM-NEXT:     Value (RelocatableAddress): 0xBC
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
@@ -434,7 +434,7 @@ entry:
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 2
 ; SYM-NEXT:       SymbolType: XTY_SD (0x1)
-; SYM-NEXT:       StorageMappingClass: XMC_TC (0x3)
+; SYM-NEXT:       StorageMappingClass: XMC_TE (0x16)
 ; SYM-NEXT:       StabInfoIndex: 0x0
 ; SYM-NEXT:       StabSectNum: 0x0
 ; SYM-NEXT:     }
@@ -550,16 +550,17 @@ entry:
 ; DIS-NEXT:                                       mr 7, 3
 ; DIS-NEXT:                                       mr 6, 4
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 19) TIInit[TE]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 8, 0(3)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 19) TIInit[TE]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 21) _$TLSML[TC]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 3, 4(3)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 21) _$TLSML[TC]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 19) _$TLSML[TC]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 3, 0(3)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 19) _$TLSML[TC]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 4, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 21) TIInit[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 8, 4(4)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 21) TIInit[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                bla 0
 ; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA  (idx: 1)      .__tls_get_mod[PR]
-; DIS-NEXT:                                       add 3, 3, 8
+; DIS-NEXT:                                       mr 4, 8
+; DIS-NEXT:                                       add 3, 3, 4
 ; DIS-NEXT:                                       stw 6, 4(3)
 ; DIS-NEXT:                                       stw 7, 0(3)
 ; DIS-NEXT:                                       addi 1, 1, 32
@@ -608,12 +609,12 @@ entry:
 ; DIS-NEXT:       b0: 00 00 00 b8
 ; DIS-NEXT: 000000b0: R_POS (idx: 17) TOC[TC0]
 ; DIS-NEXT:       b4: 00 00 00 00
-; DIS:      000000b8  (idx: 19) TIInit[TE]:
+; DIS:      000000b8  (idx: 19) _$TLSML[TC]:
 ; DIS-NEXT:       b8: 00 00 00 00
-; DIS-NEXT: 000000b8: R_TLS_LD (idx: 29) TIInit[TL]
-; DIS:      000000bc  (idx: 21) _$TLSML[TC]:
+; DIS-NEXT: 000000b8: R_TLSML (idx: 19) _$TLSML[TC]
+; DIS:      000000bc  (idx: 21) TIInit[TE]:
 ; DIS-NEXT:       bc: 00 00 00 00
-; DIS-NEXT: 000000bc: R_TLSML (idx: 21) _$TLSML[TC]
+; DIS-NEXT: 000000bc: R_TLS_LD (idx: 29) TIInit[TL]
 ; DIS:      000000c0  (idx: 23) .TWInit[TE]:
 ; DIS-NEXT:       c0: 00 00 00 00
 ; DIS-NEXT: 000000c0: R_TLSM (idx: 31) TWInit[TL]
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
index ae1dae7955914b..dedd36d8ffc51c 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
@@ -34,27 +34,27 @@ entry:
 ; RELOC-NEXT:   Section (index: 1) .text {
 ; RELOC-NEXT:   Relocation {
 ; RELOC-NEXT:     Virtual Address: 0x12
-; RELOC-NEXT:     Symbol: TIUninit (25)
+; RELOC-NEXT:     Symbol: _$TLSML (25)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
 ; RELOC-NEXT:     Type: R_TOC (0x3)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x16
-; RELOC-NEXT:     Symbol: _$TLSML (27)
+; RELOC-NEXT:     Virtual Address: 0x14
+; RELOC-NEXT:     Symbol: .__tls_get_mod (1)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
-; RELOC-NEXT:     Length: 16
-; RELOC-NEXT:     Type: R_TOC (0x3)
+; RELOC-NEXT:     Length: 26
+; RELOC-NEXT:     Type: R_RBA (0x18)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x18
-; RELOC-NEXT:     Symbol: .__tls_get_mod (1)
+; RELOC-NEXT:     Virtual Address: 0x1A
+; RELOC-NEXT:     Symbol: TIUninit (27)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
-; RELOC-NEXT:     Length: 26
-; RELOC-NEXT:     Type: R_RBA (0x18)
+; RELOC-NEXT:     Length: 16
+; RELOC-NEXT:     Type: R_TOC (0x3)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
 ; RELOC-NEXT:     Virtual Address: 0x4A
@@ -124,19 +124,19 @@ entry:
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
 ; RELOC-NEXT:   Virtual Address: 0x98
-; RELOC-NEXT:   Symbol: TIUninit (39)
+; RELOC-NEXT:   Symbol: _$TLSML (25)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
-; RELOC-NEXT:   Type: R_TLS_LD (0x22)
+; RELOC-NEXT:   Type: R_TLSML (0x25)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
 ; RELOC-NEXT:   Virtual Address: 0x9C
-; RELOC-NEXT:   Symbol: _$TLSML (27)
+; RELOC-NEXT:   Symbol: TIUninit (39)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
-; RELOC-NEXT:   Type: R_TLSML (0x25)
+; RELOC-NEXT:   Type: R_TLS_LD (0x22)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
 ; RELOC-NEXT:   Virtual Address: 0xA0
@@ -422,7 +422,7 @@ entry:
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 25
-; SYM-NEXT:     Name: TIUninit
+; SYM-NEXT:     Name: _$TLSML
 ; SYM-NEXT:     Value (RelocatableAddress): 0x98
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
@@ -442,7 +442,7 @@ entry:
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 27
-; SYM-NEXT:     Name: _$TLSML
+; SYM-NEXT:     Name: TIUninit
 ; SYM-NEXT:     Value (RelocatableAddress): 0x9C
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
@@ -589,13 +589,13 @@ entry:
 ; DIS-NEXT:                                      stwu 1, -32(1)
 ; DIS-NEXT:                                      stw 0, 40(1)
 ; DIS-NEXT:                                      mr 6, 3
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 7, 0(2)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 25) TIUninit[TC]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 3, 4(2)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 27) _$TLSML[TC]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 3, 0(2)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 25) _$TLSML[TC]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               bla 0
 ; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA (idx: 1)      .__tls_get_mod[PR]
-; DIS-NEXT:                                      add 3, 3, 7
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 4, 4(2)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 27) TIUninit[TC]
+; DIS-NEXT:                                      add 3, 3, 4
 ; DIS-NEXT:                                      stw 6, 0(3)
 ; DIS-NEXT:                                      addi 1, 1, 32
 ; DIS-NEXT:                                      lwz 0, 8(1)
@@ -638,12 +638,12 @@ entry:
 ; DIS-NEXT:       90: 00 00 00 98
 ; DIS-NEXT: 00000090: R_POS (idx: 23) TOC[TC0]
 ; DIS-NEXT:       94: 00 00 00 00
-; DIS:      00000098  (idx: 25) TIUninit[TC]:
+; DIS:      00000098  (idx: 25) _$TLSML[TC]:
 ; DIS-NEXT:       98: 00 00 00 00
-; DIS-NEXT: 00000098: R_TLS_LD (idx: 39) TIUninit[UL]
-; DIS:      0000009c  (idx: 27) _$TLSML[TC]:
+; DIS-NEXT: 00000098: R_TLSML (idx: 25) _$TLSML[TC]
+; DIS:      0000009c  (idx: 27) TIUninit[TC]:
 ; DIS-NEXT:       9c: 00 00 00 00
-; DIS-NEXT: 0000009c: R_TLSML (idx: 27) _$TLSML[TC]
+; DIS-NEXT: 0000009c: R_TLS_LD (idx: 39) TIUninit[UL]
 ; DIS:      000000a0  (idx: 29) .TGInit[TC]:
 ; DIS-NEXT:       a0: 00 00 00 00
 ; DIS-NEXT: 000000a0: R_TLSM (idx: 37) TGInit

>From 0b88a17acf8c54a9ce83f05381643f8f291c7496 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Thu, 21 Sep 2023 23:40:30 -0400
Subject: [PATCH 07/27] Remove TLS local-dynamic mode guards for AIX.

---
 clang/include/clang/Basic/DiagnosticDriverKinds.td | 1 -
 clang/lib/Frontend/CompilerInvocation.cpp          | 8 --------
 clang/lib/Sema/SemaDeclAttr.cpp                    | 6 ------
 clang/test/CodeGen/PowerPC/aix-tls-model.cpp       | 9 ++++++---
 clang/test/Sema/aix-attr-tls_model.c               | 2 +-
 5 files changed, 7 insertions(+), 19 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td
index 676f1a62b49dd0..cc173d13d6180c 100644
--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -678,7 +678,6 @@ def err_drv_cannot_mix_options : Error<"cannot specify '%1' along with '%0'">;
 def err_drv_invalid_object_mode : Error<
   "OBJECT_MODE setting %0 is not recognized and is not a valid setting">;
 
-def err_aix_unsupported_tls_model : Error<"TLS model '%0' is not yet supported on AIX">;
 def err_roptr_requires_data_sections: Error<"-mxcoff-roptr is supported only with -fdata-sections">;
 def err_roptr_cannot_build_shared: Error<"-mxcoff-roptr is not supported with -shared">;
 
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index b33bdad2ad81ba..87871d40e3ec05 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -1989,14 +1989,6 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args,
     Opts.LinkBitcodeFiles.push_back(F);
   }
 
-  if (Arg *A = Args.getLastArg(OPT_ftlsmodel_EQ)) {
-    if (T.isOSAIX()) {
-      StringRef Name = A->getValue();
-      if (Name == "local-dynamic")
-        Diags.Report(diag::err_aix_unsupported_tls_model) << Name;
-    }
-  }
-
   if (Arg *A = Args.getLastArg(OPT_fdenormal_fp_math_EQ)) {
     StringRef Val = A->getValue();
     Opts.FPDenormalMode = llvm::parseDenormalFPAttribute(Val);
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 59e456fd9f7298..8b3be4c3ac54b8 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -2053,12 +2053,6 @@ static void handleTLSModelAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
     return;
   }
 
-  if (S.Context.getTargetInfo().getTriple().isOSAIX() &&
-      Model == "local-dynamic") {
-    S.Diag(LiteralLoc, diag::err_aix_attr_unsupported_tls_model) << Model;
-    return;
-  }
-
   D->addAttr(::new (S.Context) TLSModelAttr(S.Context, AL, Model));
 }
 
diff --git a/clang/test/CodeGen/PowerPC/aix-tls-model.cpp b/clang/test/CodeGen/PowerPC/aix-tls-model.cpp
index 9fdd6855a89ee9..cd0a08aa9a3b77 100644
--- a/clang/test/CodeGen/PowerPC/aix-tls-model.cpp
+++ b/clang/test/CodeGen/PowerPC/aix-tls-model.cpp
@@ -1,11 +1,11 @@
 // RUN: %clang_cc1 %s -triple powerpc-unknown-aix -target-cpu pwr8 -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-GD
 // RUN: %clang_cc1 %s -triple powerpc-unknown-aix -target-cpu pwr8 -ftls-model=global-dynamic -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-GD
-// RUN: not %clang_cc1 %s -triple powerpc-unknown-aix -target-cpu pwr8 -ftls-model=local-dynamic -emit-llvm 2>&1 | FileCheck %s -check-prefix=CHECK-LD-ERROR
+// RUN: %clang_cc1 %s -triple powerpc-unknown-aix -target-cpu pwr8 -ftls-model=local-dynamic -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-LD
 // RUN: %clang_cc1 %s -triple powerpc-unknown-aix -target-cpu pwr8 -ftls-model=initial-exec -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-IE
 // RUN: %clang_cc1 %s -triple powerpc-unknown-aix -target-cpu pwr8 -ftls-model=local-exec -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-LE
 // RUN: %clang_cc1 %s -triple powerpc64-unknown-aix -target-cpu pwr8 -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-GD
 // RUN: %clang_cc1 %s -triple powerpc64-unknown-aix -target-cpu pwr8 -ftls-model=global-dynamic -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-GD
-// RUN: not %clang_cc1 %s -triple powerpc64-unknown-aix -target-cpu pwr8 -ftls-model=local-dynamic -emit-llvm 2>&1 | FileCheck %s -check-prefix=CHECK-LD-ERROR
+// RUN: %clang_cc1 %s -triple powerpc64-unknown-aix -target-cpu pwr8 -ftls-model=local-dynamic -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-LD
 // RUN: %clang_cc1 %s -triple powerpc64-unknown-aix -target-cpu pwr8 -ftls-model=initial-exec -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-IE
 // RUN: %clang_cc1 %s -triple powerpc64-unknown-aix -target-cpu pwr8 -ftls-model=local-exec -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-LE
 
@@ -21,7 +21,10 @@ int f() {
 // CHECK-GD: @z2 ={{.*}} global i32 0
 // CHECK-GD: @x ={{.*}} thread_local global i32 0
 // CHECK-GD: @_ZZ1fvE1y = internal thread_local global i32 0
-// CHECK-LD-ERROR:  error: TLS model 'local-dynamic' is not yet supported on AIX
+// CHECK-LD: @z1 ={{.*}} global i32 0
+// CHECK-LD: @z2 ={{.*}} global i32 0
+// CHECK-LD: @x ={{.*}} thread_local(localdynamic) global i32 0
+// CHECK-LD: @_ZZ1fvE1y = internal thread_local(localdynamic) global i32 0
 // CHECK-IE: @z1 ={{.*}} global i32 0
 // CHECK-IE: @z2 ={{.*}} global i32 0
 // CHECK-IE: @x ={{.*}} thread_local(initialexec) global i32 0
diff --git a/clang/test/Sema/aix-attr-tls_model.c b/clang/test/Sema/aix-attr-tls_model.c
index 9c22d6cceed81c..7c2047bced9391 100644
--- a/clang/test/Sema/aix-attr-tls_model.c
+++ b/clang/test/Sema/aix-attr-tls_model.c
@@ -6,6 +6,6 @@
 #endif
 
 static __thread int y __attribute((tls_model("global-dynamic"))); // no-warning
-static __thread int y __attribute((tls_model("local-dynamic"))); // expected-error {{TLS model 'local-dynamic' is not yet supported on AIX}}
+static __thread int y __attribute((tls_model("local-dynamic"))); // expected-no-diagnostics
 static __thread int y __attribute((tls_model("initial-exec"))); // no-warning
 static __thread int y __attribute((tls_model("local-exec"))); // no-warning

>From 4c53a0eb36884bca7743c6b721ab68f6df7c296a Mon Sep 17 00:00:00 2001
From: tingwang <tingwang at tingwangs-MBP.lan>
Date: Sun, 24 Sep 2023 13:11:06 +0800
Subject: [PATCH 08/27] Fixed issues raised by comments and incorporated
 suggested changes. (1) Removed TLSLDAIX argument so that duplicated nodes can
 be eliminated. (2) Try to reuse clobber r4 by moving TLSLDAIX ahead of
 LoadOffsetToc node. (3) Add FIXME comments. (4) Add test case to show
 duplicated .__tls_get_mod can be eliminated.

Below two cases are not updated yet due to environment issue. I will fix
those in next update.
Failed Tests:
  LLVM :: CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll
  LLVM :: CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
---
 .../PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp  |   9 +-
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp     |  28 ++-
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   |  21 +-
 llvm/lib/Target/PowerPC/PPCISelLowering.h     |   4 +-
 llvm/lib/Target/PowerPC/PPCInstr64Bit.td      |  10 +-
 llvm/lib/Target/PowerPC/PPCInstrInfo.td       |   8 +-
 llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp | 123 +++++++-----
 .../test/CodeGen/PowerPC/aix-tls-gd-double.ll |  58 +++---
 llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll   |  58 +++---
 .../CodeGen/PowerPC/aix-tls-gd-longlong.ll    | 164 +++++++---------
 .../CodeGen/PowerPC/aix-tls-local-dynamic.ll  | 184 ++++++++++--------
 11 files changed, 348 insertions(+), 319 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 22cd2fc03ef7c3..00dda306a584c7 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -231,9 +231,12 @@ class PPCTargetAsmStreamer : public PPCTargetStreamer {
       MCSymbolXCOFF *TCSym =
           cast<MCSectionXCOFF>(Streamer.getCurrentSectionOnly())
               ->getQualNameSymbol();
-      // On AIX, we have a region handle (symbol at m), module handle
-      // (__TLSML[TC]@ml) and the variable offset (symbol@{gd|ie|le|ld}) for TLS
-      // variables, depending on the TLS model.
+      // On AIX, we have TLS variable offsets (symbol@({gd|ie|le|ld}) depending
+      // on the TLS access method (or model). For the general-dynamic access
+      // method, we also have region handle (symbol at m) for each variable. For
+      // local-dynamic, a module handle (__TLSML[TC]@ml) for all variables. For
+      // local-exec and initial-exec, we have a thread pointer, in r13 for
+      // 64-bit mode and returned by .__get_tpointer for 32-bit mode.
       if (Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGD ||
           Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGDM ||
           Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSIE ||
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index dc2adc352b01df..099299bff618f7 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -672,9 +672,9 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
 
   if (Subtarget->isAIXABI()) {
     // For TLSGD, the variable offset should already be in R4 and the region
-    // handle should already be in R3, generate absolute branch to
-    // .__tls_get_addr. For TLSLD, the module handle should already be in R3,
-    // generate branch to .__tls_get_mod.
+    // handle should already be in R3. We generate an absolute branch to
+    // .__tls_get_addr. For TLSLD, the module handle should already be in R3.
+    // We generate an absolute branch to .__tls_get_mod.
     Register VarOffsetReg = Subtarget->isPPC64() ? PPC::X4 : PPC::R4;
     (void)VarOffsetReg;
     assert((MI->getOpcode() == PPC::GETtlsMOD32AIX ||
@@ -759,9 +759,10 @@ getTOCEntryTypeForMO(const MachineOperand &MO) {
   }
 }
 
-// On AIX, TLS-local-dynamic requires that symbol for the module handle must
+// FIXME: find alternative approach to get rid of this hack.
+// On AIX, TLS-local-dynamic requires that the symbol for the module handle must
 // have the name "_$TLSML". This symbol is used as one TOC symbol reference
-// itself with ML relocation type, thus it has "[TC]" attached to its name.
+// itself with an ML relocation type, thus it has "[TC]" attached to its name.
 static inline bool isSpecialAIXSymbolTLSML(const MachineOperand &MO,
                                            const bool IsAIX) {
   return IsAIX && MO.isSymbol() &&
@@ -862,9 +863,11 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
       return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGD;
     if (MO.getTargetFlags() & PPCII::MO_TLSLD_FLAG) {
       if (isSpecialAIXSymbolTLSML(MO, IsAIX))
-        // FIXME: On AIX the ML relocation type is only valid for a reference to
-        // a TOC symbol from the symbol itself, and right now its only user is
-        // symbol "_$TLSML". Use symbol name to decide that R_TLSML is expected.
+        // FIXME: Due to the size limit of MachineOperand::SubReg_TargetFlags,
+        // hacked this flag which should have been named MO_TLSLDM_FLAG: on AIX
+        // the ML relocation type is only valid for a reference to a TOC symbol
+        // from the symbol itself, and right now its only user is the symbol
+        // "_$TLSML". Use symbol name to decide that R_TLSML is expected.
         return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSML;
       if (IsAIX)
         return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLD;
@@ -1385,14 +1388,17 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
                    .addExpr(SymGotTlsGD));
     return;
   }
+  case PPC::GETtlsMOD32AIX:
+  case PPC::GETtlsMOD64AIX:
+    // Transform: %r3 = GETtlsMODNNAIX %r3 (for NN == 32/64).
+    // Into: BLA .__tls_get_mod()
+    // Input parameter is a module handle (__TLSML[TC]@ml) for all variables.
   case PPC::GETtlsADDR:
     // Transform: %x3 = GETtlsADDR %x3, @sym
     // Into: BL8_NOP_TLS __tls_get_addr(sym at tlsgd)
   case PPC::GETtlsADDRPCREL:
   case PPC::GETtlsADDR32AIX:
   case PPC::GETtlsADDR64AIX:
-  case PPC::GETtlsMOD32AIX:
-  case PPC::GETtlsMOD64AIX:
     // Transform: %r3 = GETtlsADDRNNAIX %r3, %r4 (for NN == 32/64).
     // Into: BLA .__tls_get_addr()
     // Unlike on Linux, there is no symbol or relocation needed for this call.
@@ -2888,6 +2894,8 @@ void PPCAIXAsmPrinter::emitInstruction(const MachineInstr *MI) {
   }
   case PPC::GETtlsMOD32AIX:
   case PPC::GETtlsMOD64AIX:
+    // A reference to .__tls_get_mod is unknown to the assembler so we need to
+    // emit an external symbol reference.
   case PPC::GETtlsTpointer32AIX:
   case PPC::GETtlsADDR64AIX:
   case PPC::GETtlsADDR32AIX: {
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 8f6c57ae746c24..86c88748d6be61 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1772,13 +1772,11 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
   case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
   case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
-  case PPCISD::GET_TLS_MOD_AIX:
-    return "PPCISD::GET_TLS_MOD_AIX";
+  case PPCISD::GET_TLS_MOD_AIX: return "PPCISD::GET_TLS_MOD_AIX";
   case PPCISD::GET_TPOINTER:    return "PPCISD::GET_TPOINTER";
   case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
   case PPCISD::TLSGD_AIX:       return "PPCISD::TLSGD_AIX";
-  case PPCISD::TLSLD_AIX:
-    return "PPCISD::TLSLD_AIX";
+  case PPCISD::TLSLD_AIX:       return "PPCISD::TLSLD_AIX";
   case PPCISD::ADDIS_TLSLD_HA:  return "PPCISD::ADDIS_TLSLD_HA";
   case PPCISD::ADDI_TLSLD_L:    return "PPCISD::ADDI_TLSLD_L";
   case PPCISD::GET_TLSLD_ADDR:  return "PPCISD::GET_TLSLD_ADDR";
@@ -3418,14 +3416,19 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
   }
 
   if (Model == TLSModel::LocalDynamic) {
-    // For local-dynamic on AIX, we need to generate two TOC entries, one for
-    // the variable offset, the other for the module handle. The module handle
-    // is encapsulated inside the TLSLD_AIX pseudo node, and will be expanded by
-    // PPCTLSDynamicCall.
+    // For local-dynamic on AIX, we need to generate one TOC entry for each
+    // variable offset, and single module-handle TOC entry for the entire file.
+
+    // We are not (1) create GV node (2) call getTOCEntry for the module-handle
+    // due to the reason that the module-handle should not be materialized (i.e.
+    // there should be no symbol-table entry referring to the module-handle).
+    // Instead we will create reference to __TLSML[TC]@ml in PPCTLSDynamicCall
+    // when processing the TLSLD_AIX pseudo node.
+    SDValue ModuleHandle = DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT);
     SDValue VariableOffsetTGA =
         DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
     SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
-    return DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, VariableOffset);
+    return DAG.getNode(ISD::ADD, dl, PtrVT, ModuleHandle, VariableOffset);
   }
 
   // If Local- or Initial-exec or Local-dynamic is not possible or specified,
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index fed03835fd99ab..f09a624fe7235c 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -381,8 +381,8 @@ namespace llvm {
 
     /// [GP|G8]RC = TLSLD_AIX, TOC_ENTRY(variable offset)
     /// Op that internally creates TOC entry for the "_$TLSML" symbol, generates
-    /// GET_TLS_MOD_AIX node which will be expanded into a call to
-    /// .__tls_get_mod, and then add the variable offset with the result from
+    /// a GET_TLS_MOD_AIX node which will be expanded into a call to
+    /// .__tls_get_mod, and then adds the variable offset with the result from
     /// the call. This node is used in both 32-bit and 64-bit modes. The only
     /// difference is register class.
     TLSLD_AIX,
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index a3590580777f3a..e4a6e19dabd348 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -1563,7 +1563,7 @@ def GETtlsADDR64AIX :
                     "GETtlsADDR64AIX",
                     [(set i64:$rD,
                       (PPCgetTlsAddr i64:$offset, i64:$handle))]>, isPPC64;
-// On AIX, the call to .__tls_get_mod need one input in X3 for the module handle.
+// On AIX, the call to .__tls_get_mod needs one input in X3 for the module handle.
 def GETtlsMOD64AIX :
   PPCEmitTimePseudo<(outs g8rc:$rD),(ins g8rc:$handle),
                     "GETtlsMOD64AIX",
@@ -1602,10 +1602,10 @@ def TLSGDAIX8 :
                      "#TLSGDAIX8",
                      [(set i64:$rD,
                        (PPCTlsgdAIX i64:$offset, i64:$handle))]>;
-// This pseudo is expanded to one copy to put the module handle in R3, then call
-// GETtlsMOD64AIX, and then add variable offset to the output from the call.
-def TLSLDAIX8 : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc:$handle),
-                     "#TLSLDAIX8", [(set i64:$rD, (PPCTlsldAIX i64:$handle))]>;
+// This pseudo is expanded to one copy to put the module handle in R3, the call
+// to GETtlsMOD64AIX, and then adds the variable offset to the output from the call.
+def TLSLDAIX8 : PPCEmitTimePseudo<(outs g8rc:$rD), (ins),
+                     "#TLSLDAIX8", [(set i64:$rD, (PPCTlsldAIX))]>;
 // Combined op for ADDItlsldL and GETtlsADDR, late expanded.  X3 and LR8
 // are true defines, while the rest of the Defs are clobbers.
 let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index d396d57e8fe95e..7ef61ecb4a8766 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -220,7 +220,7 @@ def PPCaddiTlsgdLAddr : SDNode<"PPCISD::ADDI_TLSGD_L_ADDR",
                                  SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
                                  SDTCisSameAs<0, 3>, SDTCisInt<0> ]>>;
 def PPCTlsgdAIX     : SDNode<"PPCISD::TLSGD_AIX", SDTIntBinOp>;
-def PPCTlsldAIX     : SDNode<"PPCISD::TLSLD_AIX", SDTIntUnaryOp>;
+def PPCTlsldAIX     : SDNode<"PPCISD::TLSLD_AIX", SDTIntLeaf>;
 def PPCaddisTlsldHA : SDNode<"PPCISD::ADDIS_TLSLD_HA", SDTIntBinOp>;
 def PPCaddiTlsldL   : SDNode<"PPCISD::ADDI_TLSLD_L", SDTIntBinOp>;
 def PPCgetTlsldAddr : SDNode<"PPCISD::GET_TLSLD_ADDR", SDTIntBinOp>;
@@ -3298,10 +3298,8 @@ def TLSGDAIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$offset, gprc:$handl
                             (PPCTlsgdAIX i32:$offset, i32:$handle))]>;
 // This pseudo is expanded to one copy to put the module handle in R3, then call
 // GETtlsMOD32AIX, and then add variable offset to the output from the call.
-def TLSLDAIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$handle),
-                          "#TLSLDAIX",
-                          [(set i32:$rD,
-                            (PPCTlsldAIX i32:$handle))]>;
+def TLSLDAIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins),
+                          "#TLSLDAIX", [(set i32:$rD, (PPCTlsldAIX))]>;
 // LR is a true define, while the rest of the Defs are clobbers.  R3 is
 // explicitly defined when this op is created, so not mentioned here.
 let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index bb03f8f911c491..ba3f9f04697d79 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -92,7 +92,9 @@ namespace {
         Register InReg = PPC::NoRegister;
         Register GPR3 = Is64Bit ? PPC::X3 : PPC::R3;
         Register GPR4 = Is64Bit ? PPC::X4 : PPC::R4;
-        if (!IsPCREL && !IsTLSTPRelMI)
+        if (!IsPCREL && !IsTLSTPRelMI &&
+            !(MI.getOpcode() == PPC::TLSLDAIX8 ||
+              MI.getOpcode() == PPC::TLSLDAIX))
           InReg = MI.getOperand(1).getReg();
         DebugLoc DL = MI.getDebugLoc();
 
@@ -160,9 +162,9 @@ namespace {
         if (IsAIX) {
           if (MI.getOpcode() == PPC::TLSLDAIX8 ||
               MI.getOpcode() == PPC::TLSLDAIX) {
-            // For Local-Dynamic, need to swap the position of VarOffsetInst and
-            // MI, so that VarOffsetInst can use R/X4 to reduce register
-            // pressure.
+            // It is better to put TLSLDAIX node before LoadOffsetToc node,
+            // because LoadOffsetToc node can use clobbers r4/r5. Search for the
+            // first paired LoadOffsetToc node within the same BB.
             const PPCSubtarget &Subtarget =
                 MBB.getParent()->getSubtarget<PPCSubtarget>();
             bool IsLargeModel =
@@ -170,69 +172,94 @@ namespace {
             unsigned LDTocOp =
                 Is64Bit ? (IsLargeModel ? PPC::LDtocL : PPC::LDtoc)
                         : (IsLargeModel ? PPC::LWZtocL : PPC::LWZtoc);
-            assert(RegInfo.hasOneDef(MI.getOperand(1).getReg()) &&
-                   "TLSLDAIX expects single def of its operand.");
-            MachineInstr *VarOffsetInst =
-                RegInfo.getOneDef(MI.getOperand(1).getReg())->getParent();
-            assert(VarOffsetInst->getOpcode() == LDTocOp &&
-                   "Unexpected LDTocOp.");
-            if (IsLargeModel) {
-              // Get the ADDIS instruction when using large model.
-              assert(RegInfo.hasOneDef(VarOffsetInst->getOperand(2).getReg()) &&
-                     "LDTocOp expects single def of its operand.");
-              VarOffsetInst =
-                  RegInfo.getOneDef(VarOffsetInst->getOperand(2).getReg())
-                      ->getParent();
-              assert(VarOffsetInst->getOpcode() ==
-                         (Is64Bit ? PPC::ADDIStocHA8 : PPC::ADDIStocHA) &&
-                     "Unexpected ADDIStocHA.");
-              // FIXME: machine-scheduler could schedule ADDIStocHA ahead of
-              // GETtlsMODAIX, and still has to use extra register.
+            MachineBasicBlock::iterator Anchor = I;
+            if (!RegInfo.use_empty(OutReg)) {
+              std::set<MachineInstr *> Uses;
+              // Collect all instructions that use OutReg
+              for (MachineOperand &MO : RegInfo.use_operands(OutReg)) {
+                if (Uses.count(MO.getParent()))
+                  continue;
+                Uses.insert(MO.getParent());
+              }
+              // Find the first Add within current BB.
+              MachineBasicBlock::iterator UseIter = MBB.begin();
+              for (MachineBasicBlock::iterator AE = MBB.end(); UseIter != AE;
+                   ++UseIter)
+                if (Uses.count(&*UseIter))
+                  break;
+
+              if (UseIter != MBB.end()) {
+                // Get the instruction that defines the other used register
+                // operand of UseIter. The match pattern is that: UseIter has
+                // exactly one used-operand defined by LDTocOp
+                // (LDtocL/LDtoc/LWZtocL/LWZtoc).
+                MachineInstr *LoadOffsetToc = nullptr;
+                int MatchCount = 0;
+                for (MachineOperand &MO : UseIter->operands()) {
+                  if (MO.isReg() && MO.isUse()) {
+                    if (RegInfo.hasOneDef(MO.getReg())) {
+                      if (RegInfo.getOneDef(MO.getReg())
+                              ->getParent()
+                              ->getOpcode() == LDTocOp) {
+                        LoadOffsetToc =
+                            RegInfo.getOneDef(MO.getReg())->getParent();
+                        ++MatchCount;
+                      }
+                    } else {
+                      // FIXME: analyze this scenario if there is one.
+                      MatchCount = 0;
+                      break;
+                    }
+                  }
+                }
+                // Get the iterator.
+                if (MatchCount == 1 && LoadOffsetToc) {
+                  Anchor = MBB.begin();
+                  for (MachineBasicBlock::iterator AE = MBB.end(); Anchor != AE;
+                       ++Anchor)
+                    if (&*Anchor == LoadOffsetToc)
+                      break;
+
+                  if (Anchor == MBB.end())
+                    Anchor = I;
+                }
+              }
             }
+
+            // Generate instructions refer to the "_$TLSML" symbol
             Register ModuleHandleHReg;
             if (IsLargeModel) {
               ModuleHandleHReg = RegInfo.createVirtualRegister(GPRNoZero);
-              BuildMI(MBB, *VarOffsetInst, DL,
+              BuildMI(MBB, Anchor, DL,
                       TII->get(Is64Bit ? PPC::ADDIStocHA8 : PPC::ADDIStocHA),
                       ModuleHandleHReg)
                   .addReg(Subtarget.getTOCPointerRegister())
                   .addExternalSymbol("_$TLSML[TC]", PPCII::MO_TLSLD_FLAG);
             }
             Register MHReg = RegInfo.createVirtualRegister(GPRNoZero);
-            BuildMI(MBB, *VarOffsetInst, DL, TII->get(LDTocOp), MHReg)
+            BuildMI(MBB, Anchor, DL, TII->get(LDTocOp), MHReg)
                 .addExternalSymbol("_$TLSML[TC]", PPCII::MO_TLSLD_FLAG)
                 .addReg(IsLargeModel
                             ? ModuleHandleHReg
                             : Register(Subtarget.getTOCPointerRegister()));
             // The module handle is copied in r3.
-            BuildMI(MBB, *VarOffsetInst, DL, TII->get(TargetOpcode::COPY), GPR3)
+            BuildMI(MBB, Anchor, DL, TII->get(TargetOpcode::COPY), GPR3)
                 .addReg(MHReg);
             // The call to .__tls_get_mod.
-            BuildMI(MBB, *VarOffsetInst, DL, TII->get(Opc2), GPR3).addReg(GPR3);
-            // Copy VarOffset to R/X4
+            BuildMI(MBB, Anchor, DL, TII->get(Opc2), GPR3).addReg(GPR3);
+          } else if (!IsTLSTPRelMI) {
+            // The variable offset and region handle are copied in r4 and r3.
+            // The copies are followed by GETtlsADDR32AIX/GETtlsADDR64AIX.
             BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR4)
                 .addReg(MI.getOperand(1).getReg());
-            BuildMI(MBB, I, DL, TII->get(Is64Bit ? PPC::ADD8 : PPC::ADD4), GPR3)
-                .addReg(GPR3)
-                .addReg(GPR4);
-          } else {
-            // For Global-Dynamic, the variable offset and region handle are
-            // copied in r4 and r3. The copies are followed by
-            // GETtlsADDR32AIX/GETtlsADDR64AIX.
-            if (!IsTLSTPRelMI) {
-              BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR4)
-                  .addReg(MI.getOperand(1).getReg());
-              BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR3)
-                  .addReg(MI.getOperand(2).getReg());
-              BuildMI(MBB, I, DL, TII->get(Opc2), GPR3)
-                  .addReg(GPR3)
-                  .addReg(GPR4);
-            } else
-              // The opcode of GETtlsTpointer32AIX does not change, because
-              // later this instruction will be expanded into a call to
-              // .__get_tpointer, which will return the thread pointer into r3.
-              BuildMI(MBB, I, DL, TII->get(Opc2), GPR3);
-          }
+            BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR3)
+                .addReg(MI.getOperand(2).getReg());
+            BuildMI(MBB, I, DL, TII->get(Opc2), GPR3).addReg(GPR3).addReg(GPR4);
+          } else
+            // The opcode of GETtlsTpointer32AIX does not change, because later
+            // this instruction will be expanded into a call to .__get_tpointer,
+            // which will return the thread pointer into r3.
+            BuildMI(MBB, I, DL, TII->get(Opc2), GPR3);
         } else {
           MachineInstr *Addi;
           if (IsPCREL) {
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll
index ea1a2b8fe9ade7..84ddb83bef457e 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll
@@ -160,8 +160,7 @@ define void @storesTIInit(double %Val) #0 {
 ; SMALL32-NEXT:    lwz 3, L..C4(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C5(2) # target-flags(ppc-tlsld) @TIInit
-; SMALL32-NEXT:    add 3, 3, 4
-; SMALL32-NEXT:    stfd 1, 0(3)
+; SMALL32-NEXT:    stfdx 1, 3, 4
 ; SMALL32-NEXT:    addi 1, 1, 32
 ; SMALL32-NEXT:    lwz 0, 8(1)
 ; SMALL32-NEXT:    mtlr 0
@@ -172,14 +171,12 @@ define void @storesTIInit(double %Val) #0 {
 ; LARGE32-NEXT:    mflr 0
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
-; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
-; LARGE32-NEXT:    addis 4, L..C5 at u(2)
-; LARGE32-NEXT:    lwz 6, L..C5 at l(4)
+; LARGE32-NEXT:    addis 6, L..C4 at u(2)
+; LARGE32-NEXT:    addis 3, L..C5 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C5 at l(3)
 ; LARGE32-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-NEXT:    mr 4, 6
-; LARGE32-NEXT:    add 3, 3, 4
-; LARGE32-NEXT:    stfd 1, 0(3)
+; LARGE32-NEXT:    lwz 4, L..C4 at l(6)
+; LARGE32-NEXT:    stfdx 1, 3, 4
 ; LARGE32-NEXT:    addi 1, 1, 32
 ; LARGE32-NEXT:    lwz 0, 8(1)
 ; LARGE32-NEXT:    mtlr 0
@@ -193,8 +190,7 @@ define void @storesTIInit(double %Val) #0 {
 ; SMALL64-NEXT:    ld 3, L..C4(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C5(2) # target-flags(ppc-tlsld) @TIInit
-; SMALL64-NEXT:    add 3, 3, 4
-; SMALL64-NEXT:    stfd 1, 0(3)
+; SMALL64-NEXT:    stfdx 1, 3, 4
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
 ; SMALL64-NEXT:    mtlr 0
@@ -210,8 +206,7 @@ define void @storesTIInit(double %Val) #0 {
 ; LARGE64-NEXT:    ld 3, L..C4 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
 ; LARGE64-NEXT:    ld 4, L..C5 at l(6)
-; LARGE64-NEXT:    add 3, 3, 4
-; LARGE64-NEXT:    stfd 1, 0(3)
+; LARGE64-NEXT:    stfdx 1, 3, 4
 ; LARGE64-NEXT:    addi 1, 1, 48
 ; LARGE64-NEXT:    ld 0, 16(1)
 ; LARGE64-NEXT:    mtlr 0
@@ -461,10 +456,9 @@ define double @loadsTIInit() #1 {
 ; SMALL32-NEXT:    lwz 3, L..C4(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C5(2) # target-flags(ppc-tlsld) @TIInit
-; SMALL32-NEXT:    add 3, 3, 4
-; SMALL32-NEXT:    lwz 4, L..C8(2) # @GInit
-; SMALL32-NEXT:    lfd 0, 0(3)
-; SMALL32-NEXT:    lfd 1, 0(4)
+; SMALL32-NEXT:    lwz 5, L..C8(2) # @GInit
+; SMALL32-NEXT:    lfdx 0, 3, 4
+; SMALL32-NEXT:    lfd 1, 0(5)
 ; SMALL32-NEXT:    fadd 1, 0, 1
 ; SMALL32-NEXT:    addi 1, 1, 32
 ; SMALL32-NEXT:    lwz 0, 8(1)
@@ -476,14 +470,12 @@ define double @loadsTIInit() #1 {
 ; LARGE32-NEXT:    mflr 0
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
-; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
-; LARGE32-NEXT:    addis 4, L..C5 at u(2)
-; LARGE32-NEXT:    lwz 6, L..C5 at l(4)
+; LARGE32-NEXT:    addis 6, L..C4 at u(2)
+; LARGE32-NEXT:    addis 3, L..C5 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C5 at l(3)
 ; LARGE32-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-NEXT:    mr 4, 6
-; LARGE32-NEXT:    add 3, 3, 4
-; LARGE32-NEXT:    lfd 0, 0(3)
+; LARGE32-NEXT:    lwz 4, L..C4 at l(6)
+; LARGE32-NEXT:    lfdx 0, 3, 4
 ; LARGE32-NEXT:    addis 3, L..C8 at u(2)
 ; LARGE32-NEXT:    lwz 3, L..C8 at l(3)
 ; LARGE32-NEXT:    lfd 1, 0(3)
@@ -501,10 +493,9 @@ define double @loadsTIInit() #1 {
 ; SMALL64-NEXT:    ld 3, L..C4(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C5(2) # target-flags(ppc-tlsld) @TIInit
-; SMALL64-NEXT:    add 3, 3, 4
-; SMALL64-NEXT:    ld 4, L..C8(2) # @GInit
-; SMALL64-NEXT:    lfd 0, 0(3)
-; SMALL64-NEXT:    lfd 1, 0(4)
+; SMALL64-NEXT:    ld 5, L..C8(2) # @GInit
+; SMALL64-NEXT:    lfdx 0, 3, 4
+; SMALL64-NEXT:    lfd 1, 0(5)
 ; SMALL64-NEXT:    fadd 1, 0, 1
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -521,10 +512,9 @@ define double @loadsTIInit() #1 {
 ; LARGE64-NEXT:    ld 3, L..C4 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
 ; LARGE64-NEXT:    ld 4, L..C5 at l(6)
-; LARGE64-NEXT:    add 3, 3, 4
-; LARGE64-NEXT:    addis 4, L..C8 at u(2)
-; LARGE64-NEXT:    lfd 0, 0(3)
-; LARGE64-NEXT:    ld 3, L..C8 at l(4)
+; LARGE64-NEXT:    addis 5, L..C8 at u(2)
+; LARGE64-NEXT:    lfdx 0, 3, 4
+; LARGE64-NEXT:    ld 3, L..C8 at l(5)
 ; LARGE64-NEXT:    lfd 1, 0(3)
 ; LARGE64-NEXT:    fadd 1, 0, 1
 ; LARGE64-NEXT:    addi 1, 1, 48
@@ -664,10 +654,10 @@ entry:
 ; LARGE32-LABEL:  L..C3:
 ; LARGE32-NEXT:   .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE32-LABEL:  L..C4:
+; LARGE32-NEXT:   .tc TIInit[TE],TIInit[TL]@ld
+; LARGE32-LABEL:  L..C5:
 ; LARGE32-NEXT:   .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; LARGE32-NEXT:   .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
-; LARGE32-LABEL:  L..C5:
-; LARGE32-NEXT:   .tc TIInit[TE],TIInit[TL]@ld
 ; LARGE32-LABEL:  L..C6:
 ; LARGE32-NEXT:   .tc .TWInit[TE],TWInit[TL]@m
 ; LARGE32-LABEL:  L..C7:
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll
index e1cfb6fd767cbf..3001a915da4851 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll
@@ -168,8 +168,7 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; SMALL32-NEXT:    lwz 3, L..C4(2)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C5(2)
-; SMALL32-NEXT:    add 3, 3, 4
-; SMALL32-NEXT:    stw 6, 0(3)
+; SMALL32-NEXT:    stwx 6, 3, 4
 ; SMALL32-NEXT:    addi 1, 1, 32
 ; SMALL32-NEXT:    lwz 0, 8(1)
 ; SMALL32-NEXT:    mtlr 0
@@ -181,14 +180,12 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    mr 6, 3
-; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
-; LARGE32-NEXT:    addis 4, L..C5 at u(2)
-; LARGE32-NEXT:    lwz 7, L..C5 at l(4)
+; LARGE32-NEXT:    addis 7, L..C4 at u(2)
+; LARGE32-NEXT:    addis 3, L..C5 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C5 at l(3)
 ; LARGE32-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-NEXT:    mr 4, 7
-; LARGE32-NEXT:    add 3, 3, 4
-; LARGE32-NEXT:    stw 6, 0(3)
+; LARGE32-NEXT:    lwz 4, L..C4 at l(7)
+; LARGE32-NEXT:    stwx 6, 3, 4
 ; LARGE32-NEXT:    addi 1, 1, 32
 ; LARGE32-NEXT:    lwz 0, 8(1)
 ; LARGE32-NEXT:    mtlr 0
@@ -203,8 +200,7 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; SMALL64-NEXT:    ld 3, L..C4(2)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C5(2)
-; SMALL64-NEXT:    add 3, 3, 4
-; SMALL64-NEXT:    stw 6, 0(3)
+; SMALL64-NEXT:    stwx 6, 3, 4
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
 ; SMALL64-NEXT:    mtlr 0
@@ -221,8 +217,7 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; LARGE64-NEXT:    ld 3, L..C4 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
 ; LARGE64-NEXT:    ld 4, L..C5 at l(7)
-; LARGE64-NEXT:    add 3, 3, 4
-; LARGE64-NEXT:    stw 6, 0(3)
+; LARGE64-NEXT:    stwx 6, 3, 4
 ; LARGE64-NEXT:    addi 1, 1, 48
 ; LARGE64-NEXT:    ld 0, 16(1)
 ; LARGE64-NEXT:    mtlr 0
@@ -476,10 +471,9 @@ define i32 @loadsTIUninit() #1 {
 ; SMALL32-NEXT:    lwz 3, L..C4(2)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C5(2)
-; SMALL32-NEXT:    add 3, 3, 4
-; SMALL32-NEXT:    lwz 4, L..C8(2)
-; SMALL32-NEXT:    lwz 3, 0(3)
-; SMALL32-NEXT:    lwz 4, 0(4)
+; SMALL32-NEXT:    lwz 5, L..C8(2)
+; SMALL32-NEXT:    lwzx 3, 3, 4
+; SMALL32-NEXT:    lwz 4, 0(5)
 ; SMALL32-NEXT:    add 3, 4, 3
 ; SMALL32-NEXT:    addi 1, 1, 32
 ; SMALL32-NEXT:    lwz 0, 8(1)
@@ -491,14 +485,12 @@ define i32 @loadsTIUninit() #1 {
 ; LARGE32-NEXT:    mflr 0
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
-; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
-; LARGE32-NEXT:    addis 4, L..C5 at u(2)
-; LARGE32-NEXT:    lwz 6, L..C5 at l(4)
+; LARGE32-NEXT:    addis 6, L..C4 at u(2)
+; LARGE32-NEXT:    addis 3, L..C5 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C5 at l(3)
 ; LARGE32-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-NEXT:    mr 4, 6
-; LARGE32-NEXT:    add 3, 3, 4
-; LARGE32-NEXT:    lwz 3, 0(3)
+; LARGE32-NEXT:    lwz 4, L..C4 at l(6)
+; LARGE32-NEXT:    lwzx 3, 3, 4
 ; LARGE32-NEXT:    addis 4, L..C8 at u(2)
 ; LARGE32-NEXT:    lwz 4, L..C8 at l(4)
 ; LARGE32-NEXT:    lwz 4, 0(4)
@@ -516,10 +508,9 @@ define i32 @loadsTIUninit() #1 {
 ; SMALL64-NEXT:    ld 3, L..C4(2)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C5(2)
-; SMALL64-NEXT:    add 3, 3, 4
-; SMALL64-NEXT:    ld 4, L..C8(2)
-; SMALL64-NEXT:    lwz 3, 0(3)
-; SMALL64-NEXT:    lwz 4, 0(4)
+; SMALL64-NEXT:    ld 5, L..C8(2)
+; SMALL64-NEXT:    lwzx 3, 3, 4
+; SMALL64-NEXT:    lwz 4, 0(5)
 ; SMALL64-NEXT:    add 3, 4, 3
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -536,10 +527,9 @@ define i32 @loadsTIUninit() #1 {
 ; LARGE64-NEXT:    ld 3, L..C4 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
 ; LARGE64-NEXT:    ld 4, L..C5 at l(6)
-; LARGE64-NEXT:    add 3, 3, 4
-; LARGE64-NEXT:    addis 4, L..C8 at u(2)
-; LARGE64-NEXT:    ld 4, L..C8 at l(4)
-; LARGE64-NEXT:    lwz 3, 0(3)
+; LARGE64-NEXT:    addis 5, L..C8 at u(2)
+; LARGE64-NEXT:    lwzx 3, 3, 4
+; LARGE64-NEXT:    ld 4, L..C8 at l(5)
 ; LARGE64-NEXT:    lwz 4, 0(4)
 ; LARGE64-NEXT:    add 3, 4, 3
 ; LARGE64-NEXT:    addi 1, 1, 48
@@ -679,10 +669,10 @@ entry:
 ; LARGE32-LABEL: L..C3:
 ; LARGE32-NEXT:  .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE32-LABEL: L..C4:
+; LARGE32-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
+; LARGE32-LABEL: L..C5:
 ; LARGE32-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; LARGE32-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
-; LARGE32-LABEL: L..C5:
-; LARGE32-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
 ; LARGE32-LABEL: L..C6:
 ; LARGE32-NEXT:  .tc .TWUninit[TE],TWUninit[TL]@m
 ; LARGE32-LABEL: L..C7:
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll
index 2e18d6c9bfd7e5..136c62ff509f9c 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll
@@ -103,9 +103,8 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; SMALL32-NEXT:    lwz 3, L..C2(2)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C3(2)
-; SMALL32-NEXT:    add 3, 3, 4
+; SMALL32-NEXT:    stwux 7, 3, 4
 ; SMALL32-NEXT:    stw 6, 4(3)
-; SMALL32-NEXT:    stw 7, 0(3)
 ; SMALL32-NEXT:    addi 1, 1, 32
 ; SMALL32-NEXT:    lwz 0, 8(1)
 ; SMALL32-NEXT:    mtlr 0
@@ -118,15 +117,13 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    mr 7, 3
 ; LARGE32-NEXT:    mr 6, 4
-; LARGE32-NEXT:    addis 3, L..C2 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C2 at l(3)
-; LARGE32-NEXT:    addis 4, L..C3 at u(2)
-; LARGE32-NEXT:    lwz 8, L..C3 at l(4)
+; LARGE32-NEXT:    addis 8, L..C2 at u(2)
+; LARGE32-NEXT:    addis 3, L..C3 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C3 at l(3)
 ; LARGE32-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-NEXT:    mr 4, 8
-; LARGE32-NEXT:    add 3, 3, 4
+; LARGE32-NEXT:    lwz 4, L..C2 at l(8)
+; LARGE32-NEXT:    stwux 7, 3, 4
 ; LARGE32-NEXT:    stw 6, 4(3)
-; LARGE32-NEXT:    stw 7, 0(3)
 ; LARGE32-NEXT:    addi 1, 1, 32
 ; LARGE32-NEXT:    lwz 0, 8(1)
 ; LARGE32-NEXT:    mtlr 0
@@ -141,8 +138,7 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; SMALL64-NEXT:    ld 3, L..C2(2)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C3(2)
-; SMALL64-NEXT:    add 3, 3, 4
-; SMALL64-NEXT:    std 6, 0(3)
+; SMALL64-NEXT:    stdx 6, 3, 4
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
 ; SMALL64-NEXT:    mtlr 0
@@ -159,8 +155,7 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; LARGE64-NEXT:    ld 3, L..C2 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
 ; LARGE64-NEXT:    ld 4, L..C3 at l(7)
-; LARGE64-NEXT:    add 3, 3, 4
-; LARGE64-NEXT:    std 6, 0(3)
+; LARGE64-NEXT:    stdx 6, 3, 4
 ; LARGE64-NEXT:    addi 1, 1, 48
 ; LARGE64-NEXT:    ld 0, 16(1)
 ; LARGE64-NEXT:    mtlr 0
@@ -182,9 +177,8 @@ define void @storesTIInit(i64 %Val) #0 {
 ; SMALL32-NEXT:    lwz 3, L..C2(2)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C4(2)
-; SMALL32-NEXT:    add 3, 3, 4
+; SMALL32-NEXT:    stwux 7, 3, 4
 ; SMALL32-NEXT:    stw 6, 4(3)
-; SMALL32-NEXT:    stw 7, 0(3)
 ; SMALL32-NEXT:    addi 1, 1, 32
 ; SMALL32-NEXT:    lwz 0, 8(1)
 ; SMALL32-NEXT:    mtlr 0
@@ -197,15 +191,13 @@ define void @storesTIInit(i64 %Val) #0 {
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    mr 7, 3
 ; LARGE32-NEXT:    mr 6, 4
-; LARGE32-NEXT:    addis 3, L..C2 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C2 at l(3)
-; LARGE32-NEXT:    addis 4, L..C4 at u(2)
-; LARGE32-NEXT:    lwz 8, L..C4 at l(4)
+; LARGE32-NEXT:    addis 8, L..C4 at u(2)
+; LARGE32-NEXT:    addis 3, L..C3 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C3 at l(3)
 ; LARGE32-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-NEXT:    mr 4, 8
-; LARGE32-NEXT:    add 3, 3, 4
+; LARGE32-NEXT:    lwz 4, L..C4 at l(8)
+; LARGE32-NEXT:    stwux 7, 3, 4
 ; LARGE32-NEXT:    stw 6, 4(3)
-; LARGE32-NEXT:    stw 7, 0(3)
 ; LARGE32-NEXT:    addi 1, 1, 32
 ; LARGE32-NEXT:    lwz 0, 8(1)
 ; LARGE32-NEXT:    mtlr 0
@@ -220,8 +212,7 @@ define void @storesTIInit(i64 %Val) #0 {
 ; SMALL64-NEXT:    ld 3, L..C2(2)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C4(2)
-; SMALL64-NEXT:    add 3, 3, 4
-; SMALL64-NEXT:    std 6, 0(3)
+; SMALL64-NEXT:    stdx 6, 3, 4
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
 ; SMALL64-NEXT:    mtlr 0
@@ -238,8 +229,7 @@ define void @storesTIInit(i64 %Val) #0 {
 ; LARGE64-NEXT:    ld 3, L..C2 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
 ; LARGE64-NEXT:    ld 4, L..C4 at l(7)
-; LARGE64-NEXT:    add 3, 3, 4
-; LARGE64-NEXT:    std 6, 0(3)
+; LARGE64-NEXT:    stdx 6, 3, 4
 ; LARGE64-NEXT:    addi 1, 1, 48
 ; LARGE64-NEXT:    ld 0, 16(1)
 ; LARGE64-NEXT:    mtlr 0
@@ -333,7 +323,7 @@ define i64 @loadsTGInit() #1 {
 ; SMALL32-NEXT:    lwz 4, L..C1(2)
 ; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_addr[PR]
-; SMALL32-NEXT:    lwz 4, L..C7(2) # @GInit
+; SMALL32-NEXT:    lwz 4, L..C7(2)
 ; SMALL32-NEXT:    lwz 5, 4(3)
 ; SMALL32-NEXT:    lwz 6, 4(4)
 ; SMALL32-NEXT:    lwz 3, 0(3)
@@ -376,7 +366,7 @@ define i64 @loadsTGInit() #1 {
 ; SMALL64-NEXT:    ld 4, L..C1(2)
 ; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_addr[PR]
-; SMALL64-NEXT:    ld 4, L..C7(2) # @GInit
+; SMALL64-NEXT:    ld 4, L..C7(2)
 ; SMALL64-NEXT:    ld 3, 0(3)
 ; SMALL64-NEXT:    ld 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
@@ -421,14 +411,13 @@ define i64 @loadsTIUninit() #1 {
 ; SMALL32-NEXT:    lwz 3, L..C2(2)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C3(2)
-; SMALL32-NEXT:    add 3, 3, 4
-; SMALL32-NEXT:    lwz 4, L..C7(2) # @GInit
-; SMALL32-NEXT:    lwz 5, 4(3)
-; SMALL32-NEXT:    lwz 6, 4(4)
-; SMALL32-NEXT:    lwz 3, 0(3)
-; SMALL32-NEXT:    lwz 7, 0(4)
-; SMALL32-NEXT:    addc 4, 6, 5
-; SMALL32-NEXT:    adde 3, 7, 3
+; SMALL32-NEXT:    lwz 5, L..C7(2)
+; SMALL32-NEXT:    lwzux 6, 3, 4
+; SMALL32-NEXT:    lwz 4, 4(5)
+; SMALL32-NEXT:    lwz 3, 4(3)
+; SMALL32-NEXT:    lwz 5, 0(5)
+; SMALL32-NEXT:    addc 4, 4, 3
+; SMALL32-NEXT:    adde 3, 5, 6
 ; SMALL32-NEXT:    addi 1, 1, 32
 ; SMALL32-NEXT:    lwz 0, 8(1)
 ; SMALL32-NEXT:    mtlr 0
@@ -439,21 +428,19 @@ define i64 @loadsTIUninit() #1 {
 ; LARGE32-NEXT:    mflr 0
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
-; LARGE32-NEXT:    addis 3, L..C2 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C2 at l(3)
-; LARGE32-NEXT:    addis 4, L..C3 at u(2)
-; LARGE32-NEXT:    lwz 6, L..C3 at l(4)
+; LARGE32-NEXT:    addis 6, L..C2 at u(2)
+; LARGE32-NEXT:    addis 3, L..C3 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C3 at l(3)
 ; LARGE32-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-NEXT:    mr 4, 6
-; LARGE32-NEXT:    add 3, 3, 4
-; LARGE32-NEXT:    lwz 4, 4(3)
-; LARGE32-NEXT:    lwz 3, 0(3)
-; LARGE32-NEXT:    addis 5, L..C7 at u(2)
-; LARGE32-NEXT:    lwz 5, L..C7 at l(5)
-; LARGE32-NEXT:    lwz 6, 4(5)
-; LARGE32-NEXT:    lwz 5, 0(5)
-; LARGE32-NEXT:    addc 4, 6, 4
-; LARGE32-NEXT:    adde 3, 5, 3
+; LARGE32-NEXT:    lwz 4, L..C2 at l(6)
+; LARGE32-NEXT:    lwzux 5, 3, 4
+; LARGE32-NEXT:    lwz 3, 4(3)
+; LARGE32-NEXT:    addis 4, L..C7 at u(2)
+; LARGE32-NEXT:    lwz 4, L..C7 at l(4)
+; LARGE32-NEXT:    lwz 6, 4(4)
+; LARGE32-NEXT:    lwz 7, 0(4)
+; LARGE32-NEXT:    addc 4, 6, 3
+; LARGE32-NEXT:    adde 3, 7, 5
 ; LARGE32-NEXT:    addi 1, 1, 32
 ; LARGE32-NEXT:    lwz 0, 8(1)
 ; LARGE32-NEXT:    mtlr 0
@@ -467,10 +454,9 @@ define i64 @loadsTIUninit() #1 {
 ; SMALL64-NEXT:    ld 3, L..C2(2)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C3(2)
-; SMALL64-NEXT:    add 3, 3, 4
-; SMALL64-NEXT:    ld 4, L..C7(2) # @GInit
-; SMALL64-NEXT:    ld 3, 0(3)
-; SMALL64-NEXT:    ld 4, 0(4)
+; SMALL64-NEXT:    ld 5, L..C7(2)
+; SMALL64-NEXT:    ldx 3, 3, 4
+; SMALL64-NEXT:    ld 4, 0(5)
 ; SMALL64-NEXT:    add 3, 4, 3
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -487,10 +473,9 @@ define i64 @loadsTIUninit() #1 {
 ; LARGE64-NEXT:    ld 3, L..C2 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
 ; LARGE64-NEXT:    ld 4, L..C3 at l(6)
-; LARGE64-NEXT:    add 3, 3, 4
-; LARGE64-NEXT:    addis 4, L..C7 at u(2)
-; LARGE64-NEXT:    ld 4, L..C7 at l(4)
-; LARGE64-NEXT:    ld 3, 0(3)
+; LARGE64-NEXT:    addis 5, L..C7 at u(2)
+; LARGE64-NEXT:    ldx 3, 3, 4
+; LARGE64-NEXT:    ld 4, L..C7 at l(5)
 ; LARGE64-NEXT:    ld 4, 0(4)
 ; LARGE64-NEXT:    add 3, 4, 3
 ; LARGE64-NEXT:    addi 1, 1, 48
@@ -514,14 +499,13 @@ define i64 @loadsTIInit() #1 {
 ; SMALL32-NEXT:    lwz 3, L..C2(2)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C4(2)
-; SMALL32-NEXT:    add 3, 3, 4
-; SMALL32-NEXT:    lwz 4, L..C7(2) # @GInit
-; SMALL32-NEXT:    lwz 5, 4(3)
-; SMALL32-NEXT:    lwz 6, 4(4)
-; SMALL32-NEXT:    lwz 3, 0(3)
-; SMALL32-NEXT:    lwz 7, 0(4)
-; SMALL32-NEXT:    addc 4, 6, 5
-; SMALL32-NEXT:    adde 3, 7, 3
+; SMALL32-NEXT:    lwz 5, L..C7(2)
+; SMALL32-NEXT:    lwzux 6, 3, 4
+; SMALL32-NEXT:    lwz 4, 4(5)
+; SMALL32-NEXT:    lwz 3, 4(3)
+; SMALL32-NEXT:    lwz 5, 0(5)
+; SMALL32-NEXT:    addc 4, 4, 3
+; SMALL32-NEXT:    adde 3, 5, 6
 ; SMALL32-NEXT:    addi 1, 1, 32
 ; SMALL32-NEXT:    lwz 0, 8(1)
 ; SMALL32-NEXT:    mtlr 0
@@ -532,21 +516,19 @@ define i64 @loadsTIInit() #1 {
 ; LARGE32-NEXT:    mflr 0
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
-; LARGE32-NEXT:    addis 3, L..C2 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C2 at l(3)
-; LARGE32-NEXT:    addis 4, L..C4 at u(2)
-; LARGE32-NEXT:    lwz 6, L..C4 at l(4)
+; LARGE32-NEXT:    addis 6, L..C4 at u(2)
+; LARGE32-NEXT:    addis 3, L..C3 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C3 at l(3)
 ; LARGE32-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-NEXT:    mr 4, 6
-; LARGE32-NEXT:    add 3, 3, 4
-; LARGE32-NEXT:    lwz 4, 4(3)
-; LARGE32-NEXT:    lwz 3, 0(3)
-; LARGE32-NEXT:    addis 5, L..C7 at u(2)
-; LARGE32-NEXT:    lwz 5, L..C7 at l(5)
-; LARGE32-NEXT:    lwz 6, 4(5)
-; LARGE32-NEXT:    lwz 5, 0(5)
-; LARGE32-NEXT:    addc 4, 6, 4
-; LARGE32-NEXT:    adde 3, 5, 3
+; LARGE32-NEXT:    lwz 4, L..C4 at l(6)
+; LARGE32-NEXT:    lwzux 5, 3, 4
+; LARGE32-NEXT:    lwz 3, 4(3)
+; LARGE32-NEXT:    addis 4, L..C7 at u(2)
+; LARGE32-NEXT:    lwz 4, L..C7 at l(4)
+; LARGE32-NEXT:    lwz 6, 4(4)
+; LARGE32-NEXT:    lwz 7, 0(4)
+; LARGE32-NEXT:    addc 4, 6, 3
+; LARGE32-NEXT:    adde 3, 7, 5
 ; LARGE32-NEXT:    addi 1, 1, 32
 ; LARGE32-NEXT:    lwz 0, 8(1)
 ; LARGE32-NEXT:    mtlr 0
@@ -560,10 +542,9 @@ define i64 @loadsTIInit() #1 {
 ; SMALL64-NEXT:    ld 3, L..C2(2)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C4(2)
-; SMALL64-NEXT:    add 3, 3, 4
-; SMALL64-NEXT:    ld 4, L..C7(2) # @GInit
-; SMALL64-NEXT:    ld 3, 0(3)
-; SMALL64-NEXT:    ld 4, 0(4)
+; SMALL64-NEXT:    ld 5, L..C7(2)
+; SMALL64-NEXT:    ldx 3, 3, 4
+; SMALL64-NEXT:    ld 4, 0(5)
 ; SMALL64-NEXT:    add 3, 4, 3
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -580,10 +561,9 @@ define i64 @loadsTIInit() #1 {
 ; LARGE64-NEXT:    ld 3, L..C2 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
 ; LARGE64-NEXT:    ld 4, L..C4 at l(6)
-; LARGE64-NEXT:    add 3, 3, 4
-; LARGE64-NEXT:    addis 4, L..C7 at u(2)
-; LARGE64-NEXT:    ld 4, L..C7 at l(4)
-; LARGE64-NEXT:    ld 3, 0(3)
+; LARGE64-NEXT:    addis 5, L..C7 at u(2)
+; LARGE64-NEXT:    ldx 3, 3, 4
+; LARGE64-NEXT:    ld 4, L..C7 at l(5)
 ; LARGE64-NEXT:    ld 4, 0(4)
 ; LARGE64-NEXT:    add 3, 4, 3
 ; LARGE64-NEXT:    addi 1, 1, 48
@@ -607,7 +587,7 @@ define i64 @loadsTWInit() #1 {
 ; SMALL32-NEXT:    lwz 4, L..C6(2)
 ; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_addr[PR]
-; SMALL32-NEXT:    lwz 4, L..C7(2) # @GInit
+; SMALL32-NEXT:    lwz 4, L..C7(2)
 ; SMALL32-NEXT:    lwz 5, 4(3)
 ; SMALL32-NEXT:    lwz 6, 4(4)
 ; SMALL32-NEXT:    lwz 3, 0(3)
@@ -650,7 +630,7 @@ define i64 @loadsTWInit() #1 {
 ; SMALL64-NEXT:    ld 4, L..C6(2)
 ; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_addr[PR]
-; SMALL64-NEXT:    ld 4, L..C7(2) # @GInit
+; SMALL64-NEXT:    ld 4, L..C7(2)
 ; SMALL64-NEXT:    ld 3, 0(3)
 ; SMALL64-NEXT:    ld 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
@@ -723,10 +703,10 @@ entry:
 ; LARGE32-LABEL:  L..C1:
 ; LARGE32-NEXT:  .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE32-LABEL:  L..C2:
+; LARGE32-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
+; LARGE32-LABEL:  L..C3:
 ; LARGE32-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; LARGE32-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
-; LARGE32-LABEL:  L..C3:
-; LARGE32-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
 ; LARGE32-LABEL:  L..C4:
 ; LARGE32-NEXT:  .tc TIInit[TE],TIInit[TL]@ld
 ; LARGE32-LABEL:  L..C5:
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll b/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll
index 0513f8de616201..f7cd73ae609b35 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll
@@ -6,6 +6,10 @@
 ; RUN:     --code-model=small < %s | FileCheck %s --check-prefixes=SMALL32,SMALL
 ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec -mtriple powerpc-ibm-aix-xcoff \
 ; RUN:     --code-model=large < %s | FileCheck %s --check-prefixes=LARGE32,LARGE
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec -mtriple powerpc64-ibm-aix-xcoff \
+; RUN:     --code-model=small -O0 < %s | FileCheck %s --check-prefixes=WITHDUP
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec -mtriple powerpc64-ibm-aix-xcoff \
+; RUN:     --code-model=small -O1 < %s | FileCheck %s --check-prefixes=NODUP
 
 @TGInit = thread_local(localdynamic) global i32 42, align 4
 @TGUninit = thread_local(localdynamic) global i32 0, align 4
@@ -13,6 +17,8 @@
 @TIUninit = internal thread_local(localdynamic) global i32 0, align 4
 @TWInit = weak thread_local(localdynamic) global i32 42, align 4
 @TWUninit = weak thread_local(localdynamic) global i32 0, align 4
+ at x = thread_local(localdynamic) global i32 42, align 4
+ at y = thread_local(localdynamic) global i32 42, align 4
 
 define i32 @loadTGInit() {
 ; SMALL-LABEL:  loadTGInit:
@@ -21,19 +27,18 @@ define i32 @loadTGInit() {
 ; SMALL:        bla .__tls_get_mod[PR]
 ; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
-; SMALL:        add [[TGInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; SMALL:        lwz [[TGInitValR:[0-9]+]], 0([[TGInitAddrR]])
+; SMALL:        lwzx [[TGInitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ;
 ; LARGE-LABEL:  loadTGInit:
-; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@u(2)
+; LARGE32:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE:        add [[TGInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; LARGE:        lwz [[TGInitValR:[0-9]+]], 0([[TGInitAddrR]])
+; LARGE:        lwzx [[TGInitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 entry:
   %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TGInit)
   %1 = load i32, ptr %0, align 4
@@ -47,19 +52,18 @@ define void @storeTGInit(i32 noundef signext %i) {
 ; SMALL:        bla .__tls_get_mod[PR]
 ; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
-; SMALL:        add [[TGInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; SMALL:        stw [[TGInitValR:[0-9]+]], 0([[TGInitAddrR]])
+; SMALL:        stwx [[TGInitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ;
 ; LARGE-LABEL:  storeTGInit:
-; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@u(2)
+; LARGE32:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE:        add [[TGInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; LARGE:        stw [[TGInitValR:[0-9]+]], 0([[TGInitAddrR]])
+; LARGE:        stwx [[TGInitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 entry:
   %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TGInit)
   store i32 %i, ptr %0, align 4
@@ -73,19 +77,18 @@ define i32 @loadTGUninit() {
 ; SMALL:        bla .__tls_get_mod[PR]
 ; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
-; SMALL:        add [[TGUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; SMALL:        lwz [[TGUninitValR:[0-9]+]], 0([[TGUninitAddrR]])
+; SMALL:        lwzx [[TGInitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ;
 ; LARGE-LABEL:  loadTGUninit:
-; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@u(2)
+; LARGE32:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE:        add [[TGUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; LARGE:        lwz [[TGUninitValR:[0-9]+]], 0([[TGUninitAddrR]])
+; LARGE:        lwzx [[TGUninitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 entry:
   %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TGUninit)
   %1 = load i32, ptr %0, align 4
@@ -99,19 +102,18 @@ define void @storeTGUninit(i32 noundef signext %i) {
 ; SMALL:        bla .__tls_get_mod[PR]
 ; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
-; SMALL:        add [[TGUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; SMALL:        stw [[TGUninitValR:[0-9]+]], 0([[TGUninitAddrR]])
+; SMALL:        stwx [[TGUninitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ;
 ; LARGE-LABEL:  storeTGUninit:
-; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@u(2)
+; LARGE32:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE:        add [[TGUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; LARGE:        stw [[TGUninitValR:[0-9]+]], 0([[TGUninitAddrR]])
+; LARGE:        stwx [[TGUninitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 entry:
   %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TGUninit)
   store i32 %i, ptr %0, align 4
@@ -125,19 +127,18 @@ define i32 @loadTIInit() {
 ; SMALL:        bla .__tls_get_mod[PR]
 ; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
-; SMALL:        add [[TIInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; SMALL:        lwz [[TIInitValR:[0-9]+]], 0([[TIInitAddrR]])
+; SMALL:        lwzx [[TIInitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ;
 ; LARGE-LABEL:  loadTIInit:
-; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@u(2)
+; LARGE32:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE:        add [[TIInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; LARGE:        lwz [[TIInitValR:[0-9]+]], 0([[TIInitAddrR]])
+; LARGE:        lwzx [[TIInitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 entry:
   %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TIInit)
   %1 = load i32, ptr %0, align 4
@@ -151,19 +152,18 @@ define void @storeTIInit(i32 noundef signext %i) {
 ; SMALL:        bla .__tls_get_mod[PR]
 ; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
-; SMALL:        add [[TIInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; SMALL:        stw [[TIInitValR:[0-9]+]], 0([[TIInitAddrR]])
+; SMALL:        stwx [[TIInitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ;
 ; LARGE-LABEL:  storeTIInit:
-; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@u(2)
+; LARGE32:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE:        add [[TIInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; LARGE:        stw [[TIInitValR:[0-9]+]], 0([[TIInitAddrR]])
+; LARGE:        stwx [[TIInitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 entry:
   %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TIInit)
   store i32 %i, ptr %0, align 4
@@ -177,19 +177,18 @@ define i32 @loadTIUninit() {
 ; SMALL:        bla .__tls_get_mod[PR]
 ; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
-; SMALL:        add [[TIUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; SMALL:        lwz [[TIUninitValR:[0-9]+]], 0([[TIUninitAddrR]])
+; SMALL:        lwzx [[TIUninitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ;
 ; LARGE-LABEL:  loadTIUninit:
-; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@u(2)
+; LARGE32:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE:        add [[TIUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; LARGE:        lwz [[TIUninitValR:[0-9]+]], 0([[TIUninitAddrR]])
+; LARGE:        lwzx [[TIUninitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 entry:
   %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TIUninit)
   %1 = load i32, ptr %0, align 4
@@ -203,19 +202,18 @@ define void @storeTIUninit(i32 noundef signext %i) {
 ; SMALL:        bla .__tls_get_mod[PR]
 ; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
-; SMALL:        add [[TIUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; SMALL:        stw [[TIUninitValR:[0-9]+]], 0([[TIUninitAddrR]])
+; SMALL:        stwx [[TIUninitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ;
 ; LARGE-LABEL:  storeTIUninit:
-; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@u(2)
+; LARGE32:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE:        add [[TIUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; LARGE:        stw [[TIUninitValR:[0-9]+]], 0([[TIUninitAddrR]])
+; LARGE:        stwx [[TIUninitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 entry:
   %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TIUninit)
   store i32 %i, ptr %0, align 4
@@ -229,19 +227,18 @@ define i32 @loadTWInit() {
 ; SMALL:        bla .__tls_get_mod[PR]
 ; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
-; SMALL:        add [[TWInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; SMALL:        lwz [[TWInitValR:[0-9]+]], 0([[TWInitAddrR]])
+; SMALL:        lwzx [[TWInitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ;
 ; LARGE-LABEL:  loadTWInit:
-; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@u(2)
+; LARGE32:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE:        add [[TWInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; LARGE:        lwz [[TWInitValR:[0-9]+]], 0([[TWInitAddrR]])
+; LARGE:        lwzx [[TWInitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 entry:
   %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TWInit)
   %1 = load i32, ptr %0, align 4
@@ -255,19 +252,18 @@ define void @storeTWInit(i32 noundef signext %i) {
 ; SMALL:        bla .__tls_get_mod[PR]
 ; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
-; SMALL:        add [[TWInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; SMALL:        stw [[TWInitValR:[0-9]+]], 0([[TWInitAddrR]])
+; SMALL:        stwx [[TWInitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ;
 ; LARGE-LABEL:  storeTWInit:
-; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@u(2)
+; LARGE32:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE:        add [[TWInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; LARGE:        stw [[TWInitValR:[0-9]+]], 0([[TWInitAddrR]])
+; LARGE:        stwx [[TWInitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 entry:
   %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TWInit)
   store i32 %i, ptr %0, align 4
@@ -281,19 +277,18 @@ define i32 @loadTWUninit() {
 ; SMALL:        bla .__tls_get_mod[PR]
 ; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
-; SMALL:        add [[TWUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; SMALL:        lwz [[TWUninitValR:[0-9]+]], 0([[TWUninitAddrR]])
+; SMALL:        lwzx [[TWUninitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ;
 ; LARGE-LABEL:  loadTWUninit:
-; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@u(2)
+; LARGE32:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE:        add [[TWUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; LARGE:        lwz [[TWUninitValR:[0-9]+]], 0([[TWUninitAddrR]])
+; LARGE:        lwzx [[TWUninitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 entry:
   %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TWUninit)
   %1 = load i32, ptr %0, align 4
@@ -307,25 +302,53 @@ define void @storeTWUninit(i32 noundef signext %i) {
 ; SMALL:        bla .__tls_get_mod[PR]
 ; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
-; SMALL:        add [[TWUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; SMALL:        stw [[TWUninitValR:[0-9]+]], 0([[TWUninitAddrR]])
+; SMALL:        stwx [[TWUninitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ;
 ; LARGE-LABEL:  storeTWUninit:
-; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@u(2)
+; LARGE32:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE:        add [[TWUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; LARGE:        stw [[TWUninitValR:[0-9]+]], 0([[TWUninitAddrR]])
+; LARGE:        stwx [[TWUninitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 entry:
   %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TWUninit)
   store i32 %i, ptr %0, align 4
   ret void
 }
 
+define i32 @DedupTlsGetMod() #0 {
+; WITHDUP-LABEL:  DedupTlsGetMod:
+; WITHDUP:        ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; WITHDUP-NEXT:   bla .__tls_get_mod[PR]
+; WITHDUP-NEXT:   ld [[OffsetXR:[0-9]+]], [[X:L..C[0-9]+]](2)
+; WITHDUP:        ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; WITHDUP-NEXT:   bla .__tls_get_mod[PR]
+; WITHDUP-NEXT:   ld [[OffsetYR:[0-9]+]], [[Y:L..C[0-9]+]](2)
+; WITHDUP-LABEL:  L..DedupTlsGetMod0:
+;
+; NODUP-LABEL:  DedupTlsGetMod:
+; NODUP:        ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; NODUP-NEXT:   bla .__tls_get_mod[PR]
+; NODUP-NEXT:   ld [[OffsetXR:[0-9]+]], [[X:L..C[0-9]+]](2)
+; NODUP-NEXT:   ld [[OffsetYR:[0-9]+]], [[Y:L..C[0-9]+]](2)
+; NODUP-NEXT:   lwzx [[XValR:[0-9]+]], [[ModuleHandleR]], [[OffsetXR]]
+; NODUP-NEXT:   lwzx [[YValR:[0-9]+]], [[ModuleHandleR]], [[OffsetYR]]
+; NODUP-LABEL:  L..DedupTlsGetMod0:
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, ptr %retval, align 4
+  %0 = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x)
+  %1 = load i32, ptr %0, align 4
+  %2 = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @y)
+  %3 = load i32, ptr %2, align 4
+  %add = add nsw i32 %1, %3
+  ret i32 %add
+}
+
 ; SMALL:          .extern .__tls_get_mod[PR]
 ; LARGE:          .extern .__tls_get_mod[PR]
 
@@ -345,11 +368,18 @@ entry:
 ; SMALL:        [[TWUninitL]]:
 ; SMALL-NEXT:   .tc TWUninit[TC],TWUninit[TL]@ld
 
-; LARGE:        [[ModuleHandleL]]:
-; LARGE-NEXT:   .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
-; LARGE-NEXT:   .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
-; LARGE:        [[TGInitL]]:
-; LARGE-NEXT:   .tc TGInit[TE],TGInit[TL]@ld
+; LARGE64:        [[ModuleHandleL]]:
+; LARGE64-NEXT:   .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; LARGE64-NEXT:   .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; LARGE64:        [[TGInitL]]:
+; LARGE64-NEXT:   .tc TGInit[TE],TGInit[TL]@ld
+;
+; LARGE32:        [[TGInitL]]:
+; LARGE32-NEXT:   .tc TGInit[TE],TGInit[TL]@ld
+; LARGE32:        [[ModuleHandleL]]:
+; LARGE32-NEXT:   .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; LARGE32-NEXT:   .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+;
 ; LARGE:        [[TGUninitL]]:
 ; LARGE-NEXT:   .tc TGUninit[TE],TGUninit[TL]@ld
 ; LARGE:        [[TIInitL]]:

>From 9e70b058551e6a6e374e961dd14c03af6879ab0d Mon Sep 17 00:00:00 2001
From: tingwang <tingwang at tingwangs-MacBook-Pro.local>
Date: Mon, 25 Sep 2023 08:52:45 +0800
Subject: [PATCH 09/27] (1) Use GPR3 directly in LoadModuleHandle. (2) Update
 comments.

---
 llvm/lib/Target/PowerPC/PPCInstr64Bit.td      | 3 +--
 llvm/lib/Target/PowerPC/PPCInstrInfo.td       | 3 +--
 llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp | 8 ++------
 3 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index e4a6e19dabd348..b51af57b796f81 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -1602,8 +1602,7 @@ def TLSGDAIX8 :
                      "#TLSGDAIX8",
                      [(set i64:$rD,
                        (PPCTlsgdAIX i64:$offset, i64:$handle))]>;
-// This pseudo is expanded to one copy to put the module handle in R3, the call
-// to GETtlsMOD64AIX, and then adds the variable offset to the output from the call.
+// This pseudo is expanded to load module-handle in X3, and the call to GETtlsMOD64AIX.
 def TLSLDAIX8 : PPCEmitTimePseudo<(outs g8rc:$rD), (ins),
                      "#TLSLDAIX8", [(set i64:$rD, (PPCTlsldAIX))]>;
 // Combined op for ADDItlsldL and GETtlsADDR, late expanded.  X3 and LR8
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 7ef61ecb4a8766..8af426623d5496 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -3296,8 +3296,7 @@ def TLSGDAIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$offset, gprc:$handl
                           "#TLSGDAIX",
                           [(set i32:$rD,
                             (PPCTlsgdAIX i32:$offset, i32:$handle))]>;
-// This pseudo is expanded to one copy to put the module handle in R3, then call
-// GETtlsMOD32AIX, and then add variable offset to the output from the call.
+// This pseudo is expanded to load module-handle in R3, and the call to GETtlsMOD32AIX.
 def TLSLDAIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins),
                           "#TLSLDAIX", [(set i32:$rD, (PPCTlsldAIX))]>;
 // LR is a true define, while the rest of the Defs are clobbers.  R3 is
diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index ba3f9f04697d79..71825dca77bcf0 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -226,7 +226,7 @@ namespace {
               }
             }
 
-            // Generate instructions refer to the "_$TLSML" symbol
+            // Generate instructions to load module-handle.
             Register ModuleHandleHReg;
             if (IsLargeModel) {
               ModuleHandleHReg = RegInfo.createVirtualRegister(GPRNoZero);
@@ -236,15 +236,11 @@ namespace {
                   .addReg(Subtarget.getTOCPointerRegister())
                   .addExternalSymbol("_$TLSML[TC]", PPCII::MO_TLSLD_FLAG);
             }
-            Register MHReg = RegInfo.createVirtualRegister(GPRNoZero);
-            BuildMI(MBB, Anchor, DL, TII->get(LDTocOp), MHReg)
+            BuildMI(MBB, Anchor, DL, TII->get(LDTocOp), GPR3)
                 .addExternalSymbol("_$TLSML[TC]", PPCII::MO_TLSLD_FLAG)
                 .addReg(IsLargeModel
                             ? ModuleHandleHReg
                             : Register(Subtarget.getTOCPointerRegister()));
-            // The module handle is copied in r3.
-            BuildMI(MBB, Anchor, DL, TII->get(TargetOpcode::COPY), GPR3)
-                .addReg(MHReg);
             // The call to .__tls_get_mod.
             BuildMI(MBB, Anchor, DL, TII->get(Opc2), GPR3).addReg(GPR3);
           } else if (!IsTLSTPRelMI) {

>From df98a7f6097e6f6cc425c47dd89f74458e88fc6b Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Sun, 24 Sep 2023 23:31:34 -0400
Subject: [PATCH 10/27] update lit case

---
 .../PowerPC/aix-tls-xcoff-reloc-large.ll      | 174 +++++++++---------
 .../CodeGen/PowerPC/aix-tls-xcoff-reloc.ll    | 117 ++++++------
 2 files changed, 144 insertions(+), 147 deletions(-)

diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll
index 6eb1b325e9806d..70d1e8a592e5d4 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll
@@ -33,7 +33,7 @@ entry:
 ; RELOC-NEXT:   Section (index: 1) .text {
 ; RELOC-NEXT:   Relocation {
 ; RELOC-NEXT:     Virtual Address: 0x16
-; RELOC-NEXT:     Symbol: _$TLSML (19)
+; RELOC-NEXT:     Symbol: TIInit (19)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
@@ -41,30 +41,22 @@ entry:
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
 ; RELOC-NEXT:     Virtual Address: 0x1A
-; RELOC-NEXT:     Symbol: _$TLSML (19)
-; RELOC-NEXT:     IsSigned: No
-; RELOC-NEXT:     FixupBitValue: 0
-; RELOC-NEXT:     Length: 16
-; RELOC-NEXT:     Type: R_TOCL (0x31)
-; RELOC-NEXT:   }
-; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x1E
-; RELOC-NEXT:     Symbol: TIInit (21)
+; RELOC-NEXT:     Symbol: _$TLSML (21)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
 ; RELOC-NEXT:     Type: R_TOCU (0x30)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x22
-; RELOC-NEXT:     Symbol: TIInit (21)
+; RELOC-NEXT:     Virtual Address: 0x1E
+; RELOC-NEXT:     Symbol: _$TLSML (21)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
 ; RELOC-NEXT:     Type: R_TOCL (0x31)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x24
+; RELOC-NEXT:     Virtual Address: 0x20
 ; RELOC-NEXT:     Symbol: .__tls_get_mod (1)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
@@ -72,7 +64,15 @@ entry:
 ; RELOC-NEXT:     Type: R_RBA (0x18)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x5E
+; RELOC-NEXT:     Virtual Address: 0x26
+; RELOC-NEXT:     Symbol: TIInit (19)
+; RELOC-NEXT:     IsSigned: No
+; RELOC-NEXT:     FixupBitValue: 0
+; RELOC-NEXT:     Length: 16
+; RELOC-NEXT:     Type: R_TOCL (0x31)
+; RELOC-NEXT:   }
+; RELOC-NEXT:   Relocation {
+; RELOC-NEXT:     Virtual Address: 0x4E
 ; RELOC-NEXT:     Symbol: .TWInit (23)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
@@ -80,7 +80,7 @@ entry:
 ; RELOC-NEXT:     Type: R_TOCU (0x30)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x62
+; RELOC-NEXT:     Virtual Address: 0x52
 ; RELOC-NEXT:     Symbol: TWInit (25)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
@@ -88,7 +88,7 @@ entry:
 ; RELOC-NEXT:     Type: R_TOCU (0x30)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x66
+; RELOC-NEXT:     Virtual Address: 0x56
 ; RELOC-NEXT:     Symbol: .TWInit (23)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
@@ -96,7 +96,7 @@ entry:
 ; RELOC-NEXT:     Type: R_TOCL (0x31)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x6A
+; RELOC-NEXT:     Virtual Address: 0x5A
 ; RELOC-NEXT:     Symbol: TWInit (25)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
@@ -104,7 +104,7 @@ entry:
 ; RELOC-NEXT:     Type: R_TOCL (0x31)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x6C
+; RELOC-NEXT:     Virtual Address: 0x5C
 ; RELOC-NEXT:     Symbol: .__tls_get_addr (3)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
@@ -112,7 +112,7 @@ entry:
 ; RELOC-NEXT:     Type: R_RBA (0x18)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x76
+; RELOC-NEXT:     Virtual Address: 0x66
 ; RELOC-NEXT:     Symbol: GInit (27)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
@@ -120,7 +120,7 @@ entry:
 ; RELOC-NEXT:     Type: R_TOCU (0x30)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x7A
+; RELOC-NEXT:     Virtual Address: 0x6A
 ; RELOC-NEXT:     Symbol: GInit (27)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
@@ -130,7 +130,7 @@ entry:
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Section (index: 2) .data {
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xA0
+; RELOC-NEXT:   Virtual Address: 0x90
 ; RELOC-NEXT:   Symbol: .storesTIInit (7)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
@@ -138,7 +138,7 @@ entry:
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xA4
+; RELOC-NEXT:   Virtual Address: 0x94
 ; RELOC-NEXT:   Symbol: TOC (17)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
@@ -146,7 +146,7 @@ entry:
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xAC
+; RELOC-NEXT:   Virtual Address: 0x9C
 ; RELOC-NEXT:   Symbol: .loadsTWInit (9)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
@@ -154,7 +154,7 @@ entry:
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xB0
+; RELOC-NEXT:   Virtual Address: 0xA0
 ; RELOC-NEXT:   Symbol: TOC (17)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
@@ -162,23 +162,23 @@ entry:
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xB8
-; RELOC-NEXT:   Symbol: _$TLSML (19)
+; RELOC-NEXT:   Virtual Address: 0xA8
+; RELOC-NEXT:   Symbol: TIInit (29)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
-; RELOC-NEXT:   Type: R_TLSML (0x25)
+; RELOC-NEXT:   Type: R_TLS_LD (0x22)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xBC
-; RELOC-NEXT:   Symbol: TIInit (29)
+; RELOC-NEXT:   Virtual Address: 0xAC
+; RELOC-NEXT:   Symbol: _$TLSML (21)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
-; RELOC-NEXT:   Type: R_TLS_LD (0x22)
+; RELOC-NEXT:   Type: R_TLSML (0x25)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xC0
+; RELOC-NEXT:   Virtual Address: 0xB0
 ; RELOC-NEXT:   Symbol: TWInit (31)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
@@ -186,7 +186,7 @@ entry:
 ; RELOC-NEXT:   Type: R_TLSM (0x24)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xC4
+; RELOC-NEXT:   Virtual Address: 0xB4
 ; RELOC-NEXT:   Symbol: TWInit (31)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
@@ -194,7 +194,7 @@ entry:
 ; RELOC-NEXT:   Type: R_TLS (0x20)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xC8
+; RELOC-NEXT:   Virtual Address: 0xB8
 ; RELOC-NEXT:   Symbol: GInit (11)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
@@ -269,7 +269,7 @@ entry:
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
 ; SYM-NEXT:       Index: 6
-; SYM-NEXT:       SectionLen: 148
+; SYM-NEXT:       SectionLen: 132
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 5
@@ -302,7 +302,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 9
 ; SYM-NEXT:     Name: .loadsTWInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x50
+; SYM-NEXT:     Value (RelocatableAddress): 0x40
 ; SYM-NEXT:     Section: .text
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
@@ -322,7 +322,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 11
 ; SYM-NEXT:     Name: GInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x98
+; SYM-NEXT:     Value (RelocatableAddress): 0x88
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
@@ -342,7 +342,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 13
 ; SYM-NEXT:     Name: storesTIInit
-; SYM-NEXT:     Value (RelocatableAddress): 0xA0
+; SYM-NEXT:     Value (RelocatableAddress): 0x90
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
@@ -362,7 +362,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 15
 ; SYM-NEXT:     Name: loadsTWInit
-; SYM-NEXT:     Value (RelocatableAddress): 0xAC
+; SYM-NEXT:     Value (RelocatableAddress): 0x9C
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
@@ -382,7 +382,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 17
 ; SYM-NEXT:     Name: TOC
-; SYM-NEXT:     Value (RelocatableAddress): 0xB8
+; SYM-NEXT:     Value (RelocatableAddress): 0xA8
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
@@ -401,8 +401,8 @@ entry:
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 19
-; SYM-NEXT:     Name: _$TLSML
-; SYM-NEXT:     Value (RelocatableAddress): 0xB8
+; SYM-NEXT:     Name: TIInit
+; SYM-NEXT:     Value (RelocatableAddress): 0xA8
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
@@ -414,15 +414,15 @@ entry:
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 2
 ; SYM-NEXT:       SymbolType: XTY_SD (0x1)
-; SYM-NEXT:       StorageMappingClass: XMC_TC (0x3)
+; SYM-NEXT:       StorageMappingClass: XMC_TE (0x16)
 ; SYM-NEXT:       StabInfoIndex: 0x0
 ; SYM-NEXT:       StabSectNum: 0x0
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 21
-; SYM-NEXT:     Name: TIInit
-; SYM-NEXT:     Value (RelocatableAddress): 0xBC
+; SYM-NEXT:     Name: _$TLSML
+; SYM-NEXT:     Value (RelocatableAddress): 0xAC
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
@@ -434,7 +434,7 @@ entry:
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 2
 ; SYM-NEXT:       SymbolType: XTY_SD (0x1)
-; SYM-NEXT:       StorageMappingClass: XMC_TE (0x16)
+; SYM-NEXT:       StorageMappingClass: XMC_TC (0x3)
 ; SYM-NEXT:       StabInfoIndex: 0x0
 ; SYM-NEXT:       StabSectNum: 0x0
 ; SYM-NEXT:     }
@@ -442,7 +442,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 23
 ; SYM-NEXT:     Name: .TWInit
-; SYM-NEXT:     Value (RelocatableAddress): 0xC0
+; SYM-NEXT:     Value (RelocatableAddress): 0xB0
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
@@ -462,7 +462,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 25
 ; SYM-NEXT:     Name: TWInit
-; SYM-NEXT:     Value (RelocatableAddress): 0xC4
+; SYM-NEXT:     Value (RelocatableAddress): 0xB4
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
@@ -482,7 +482,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 27
 ; SYM-NEXT:     Name: GInit
-; SYM-NEXT:     Value (RelocatableAddress): 0xC8
+; SYM-NEXT:     Value (RelocatableAddress): 0xB8
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
@@ -549,25 +549,23 @@ entry:
 ; DIS-NEXT:                                       stw 0, 40(1)
 ; DIS-NEXT:                                       mr 7, 3
 ; DIS-NEXT:                                       mr 6, 4
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 8, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 19) TIInit[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 19) _$TLSML[TC]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 3, 0(3)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 19) _$TLSML[TC]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 4, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 21) TIInit[TE]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 8, 4(4)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 21) TIInit[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 21) _$TLSML[TC]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 3, 4(3)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 21) _$TLSML[TC]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                bla 0
 ; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA  (idx: 1)      .__tls_get_mod[PR]
-; DIS-NEXT:                                       mr 4, 8
-; DIS-NEXT:                                       add 3, 3, 4
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 4, 0(8)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 19) TIInit[TE]
+; DIS-NEXT:                                       stwux 7, 3, 4
 ; DIS-NEXT:                                       stw 6, 4(3)
-; DIS-NEXT:                                       stw 7, 0(3)
 ; DIS-NEXT:                                       addi 1, 1, 32
 ; DIS-NEXT:                                       lwz 0, 8(1)
 ; DIS-NEXT:                                       mtlr 0
 ; DIS-NEXT:                                       blr
-; DIS:      00000050 (idx: 9) .loadsTWInit:
+; DIS:      00000040 (idx: 9) .loadsTWInit:
 ; DIS-NEXT:                                       mflr 0
 ; DIS-NEXT:                                       stwu 1, -32(1)
 ; DIS-NEXT:                                       stw 0, 40(1)
@@ -594,36 +592,36 @@ entry:
 ; DIS-NEXT:                                       blr
 
 ; DIS:      Disassembly of section .data:
-; DIS:      00000098  (idx: 11) GInit[RW]:
-; DIS-NEXT:       98: 3f f0 00 00
-; DIS-NEXT:       9c: 00 00 00 00
-; DIS:      000000a0  (idx: 13) storesTIInit[DS]:
-; DIS-NEXT:       a0: 00 00 00 00
-; DIS-NEXT: 000000a0: R_POS (idx: 7) .storesTIInit
-; DIS-NEXT:       a4: 00 00 00 b8
-; DIS-NEXT: 000000a4: R_POS (idx: 17) TOC[TC0]
+; DIS:      00000088  (idx: 11) GInit[RW]:
+; DIS-NEXT:       88: 3f f0 00 00
+; DIS-NEXT:       8c: 00 00 00 00
+; DIS:      00000090  (idx: 13) storesTIInit[DS]:
+; DIS-NEXT:       90: 00 00 00 00
+; DIS-NEXT: 00000090: R_POS (idx: 7) .storesTIInit
+; DIS-NEXT:       94: 00 00 00 a8
+; DIS-NEXT: 00000094: R_POS (idx: 17) TOC[TC0]
+; DIS-NEXT:       98: 00 00 00 00
+; DIS:      0000009c  (idx: 15) loadsTWInit[DS]:
+; DIS-NEXT:       9c: 00 00 00 40
+; DIS-NEXT: 0000009c: R_POS (idx: 9) .loadsTWInit
+; DIS-NEXT:       a0: 00 00 00 a8
+; DIS-NEXT: 000000a0: R_POS (idx: 17) TOC[TC0]
+; DIS-NEXT:       a4: 00 00 00 00
+; DIS:      000000a8  (idx: 19) TIInit[TE]:
 ; DIS-NEXT:       a8: 00 00 00 00
-; DIS:      000000ac  (idx: 15) loadsTWInit[DS]:
-; DIS-NEXT:       ac: 00 00 00 50
-; DIS-NEXT: 000000ac: R_POS (idx: 9) .loadsTWInit
-; DIS-NEXT:       b0: 00 00 00 b8
-; DIS-NEXT: 000000b0: R_POS (idx: 17) TOC[TC0]
-; DIS-NEXT:       b4: 00 00 00 00
-; DIS:      000000b8  (idx: 19) _$TLSML[TC]:
-; DIS-NEXT:       b8: 00 00 00 00
-; DIS-NEXT: 000000b8: R_TLSML (idx: 19) _$TLSML[TC]
-; DIS:      000000bc  (idx: 21) TIInit[TE]:
-; DIS-NEXT:       bc: 00 00 00 00
-; DIS-NEXT: 000000bc: R_TLS_LD (idx: 29) TIInit[TL]
-; DIS:      000000c0  (idx: 23) .TWInit[TE]:
-; DIS-NEXT:       c0: 00 00 00 00
-; DIS-NEXT: 000000c0: R_TLSM (idx: 31) TWInit[TL]
-; DIS:      000000c4  (idx: 25) TWInit[TE]:
-; DIS-NEXT:       c4: 00 00 00 08
-; DIS-NEXT: 000000c4: R_TLS (idx: 31) TWInit[TL]
-; DIS:      000000c8  (idx: 27) GInit[TE]:
-; DIS-NEXT:       c8: 00 00 00 98
-; DIS-NEXT: 000000c8: R_POS (idx: 11) GInit[RW]
+; DIS-NEXT: 000000a8: R_TLS_LD (idx: 29) TIInit[TL]
+; DIS:      000000ac  (idx: 21) _$TLSML[TC]:
+; DIS-NEXT:       ac: 00 00 00 00
+; DIS-NEXT: 000000ac: R_TLSML (idx: 21) _$TLSML[TC]
+; DIS:      000000b0  (idx: 23) .TWInit[TE]:
+; DIS-NEXT:       b0: 00 00 00 00
+; DIS-NEXT: 000000b0: R_TLSM (idx: 31) TWInit[TL]
+; DIS:      000000b4  (idx: 25) TWInit[TE]:
+; DIS-NEXT:       b4: 00 00 00 08
+; DIS-NEXT: 000000b4: R_TLS (idx: 31) TWInit[TL]
+; DIS:      000000b8  (idx: 27) GInit[TE]:
+; DIS-NEXT:       b8: 00 00 00 88
+; DIS-NEXT: 000000b8: R_POS (idx: 11) GInit[RW]
 
 ; DIS:      Disassembly of section .tdata:
 ; DIS:      00000000  (idx: 29) TIInit[TL]:
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
index dedd36d8ffc51c..c8a737281840a0 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
@@ -57,7 +57,7 @@ entry:
 ; RELOC-NEXT:     Type: R_TOC (0x3)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x4A
+; RELOC-NEXT:     Virtual Address: 0x3A
 ; RELOC-NEXT:     Symbol: .TGInit (29)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
@@ -65,7 +65,7 @@ entry:
 ; RELOC-NEXT:     Type: R_TOC (0x3)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x4E
+; RELOC-NEXT:     Virtual Address: 0x3E
 ; RELOC-NEXT:     Symbol: TGInit (31)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
@@ -73,7 +73,7 @@ entry:
 ; RELOC-NEXT:     Type: R_TOC (0x3)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x54
+; RELOC-NEXT:     Virtual Address: 0x44
 ; RELOC-NEXT:     Symbol: .__tls_get_addr (3)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
@@ -81,7 +81,7 @@ entry:
 ; RELOC-NEXT:     Type: R_RBA (0x18)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x5A
+; RELOC-NEXT:     Virtual Address: 0x4A
 ; RELOC-NEXT:     Symbol: GInit (33)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
@@ -91,7 +91,7 @@ entry:
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Section (index: 2) .data {
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x80
+; RELOC-NEXT:   Virtual Address: 0x70
 ; RELOC-NEXT:   Symbol: .storesTIUninit (7)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
@@ -99,7 +99,7 @@ entry:
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x84
+; RELOC-NEXT:   Virtual Address: 0x74
 ; RELOC-NEXT:   Symbol: TOC (23)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
@@ -107,7 +107,7 @@ entry:
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x8C
+; RELOC-NEXT:   Virtual Address: 0x7C
 ; RELOC-NEXT:   Symbol: .loadsTGInit (9)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
@@ -115,7 +115,7 @@ entry:
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x90
+; RELOC-NEXT:   Virtual Address: 0x80
 ; RELOC-NEXT:   Symbol: TOC (23)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
@@ -123,7 +123,7 @@ entry:
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x98
+; RELOC-NEXT:   Virtual Address: 0x88
 ; RELOC-NEXT:   Symbol: _$TLSML (25)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
@@ -131,7 +131,7 @@ entry:
 ; RELOC-NEXT:   Type: R_TLSML (0x25)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x9C
+; RELOC-NEXT:   Virtual Address: 0x8C
 ; RELOC-NEXT:   Symbol: TIUninit (39)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
@@ -139,7 +139,7 @@ entry:
 ; RELOC-NEXT:   Type: R_TLS_LD (0x22)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xA0
+; RELOC-NEXT:   Virtual Address: 0x90
 ; RELOC-NEXT:   Symbol: TGInit (37)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
@@ -147,7 +147,7 @@ entry:
 ; RELOC-NEXT:   Type: R_TLSM (0x24)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xA4
+; RELOC-NEXT:   Virtual Address: 0x94
 ; RELOC-NEXT:   Symbol: TGInit (37)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
@@ -155,7 +155,7 @@ entry:
 ; RELOC-NEXT:   Type: R_TLS (0x20)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xA8
+; RELOC-NEXT:   Virtual Address: 0x98
 ; RELOC-NEXT:   Symbol: GInit (17)
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
@@ -230,7 +230,7 @@ entry:
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
 ; SYM-NEXT:       Index: 6
-; SYM-NEXT:       SectionLen: 120
+; SYM-NEXT:       SectionLen: 104
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 5
@@ -263,7 +263,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 9
 ; SYM-NEXT:     Name: .loadsTGInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x40
+; SYM-NEXT:     Value (RelocatableAddress): 0x30
 ; SYM-NEXT:     Section: .text
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
@@ -283,7 +283,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 11
 ; SYM-NEXT:     Name: .rodata
-; SYM-NEXT:     Value (RelocatableAddress): 0x78
+; SYM-NEXT:     Value (RelocatableAddress): 0x68
 ; SYM-NEXT:     Section: .text
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
@@ -303,7 +303,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 13
 ; SYM-NEXT:     Name: const_ivar
-; SYM-NEXT:     Value (RelocatableAddress): 0x78
+; SYM-NEXT:     Value (RelocatableAddress): 0x68
 ; SYM-NEXT:     Section: .text
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
@@ -323,7 +323,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 15
 ; SYM-NEXT:     Name: .data
-; SYM-NEXT:     Value (RelocatableAddress): 0x7C
+; SYM-NEXT:     Value (RelocatableAddress): 0x6C
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
@@ -343,7 +343,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 17
 ; SYM-NEXT:     Name: GInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x7C
+; SYM-NEXT:     Value (RelocatableAddress): 0x6C
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
@@ -363,7 +363,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 19
 ; SYM-NEXT:     Name: storesTIUninit
-; SYM-NEXT:     Value (RelocatableAddress): 0x80
+; SYM-NEXT:     Value (RelocatableAddress): 0x70
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
@@ -383,7 +383,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 21
 ; SYM-NEXT:     Name: loadsTGInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x8C
+; SYM-NEXT:     Value (RelocatableAddress): 0x7C
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
@@ -403,7 +403,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 23
 ; SYM-NEXT:     Name: TOC
-; SYM-NEXT:     Value (RelocatableAddress): 0x98
+; SYM-NEXT:     Value (RelocatableAddress): 0x88
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
@@ -423,7 +423,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 25
 ; SYM-NEXT:     Name: _$TLSML
-; SYM-NEXT:     Value (RelocatableAddress): 0x98
+; SYM-NEXT:     Value (RelocatableAddress): 0x88
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
@@ -443,7 +443,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 27
 ; SYM-NEXT:     Name: TIUninit
-; SYM-NEXT:     Value (RelocatableAddress): 0x9C
+; SYM-NEXT:     Value (RelocatableAddress): 0x8C
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
@@ -463,7 +463,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 29
 ; SYM-NEXT:     Name: .TGInit
-; SYM-NEXT:     Value (RelocatableAddress): 0xA0
+; SYM-NEXT:     Value (RelocatableAddress): 0x90
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
@@ -483,7 +483,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 31
 ; SYM-NEXT:     Name: TGInit
-; SYM-NEXT:     Value (RelocatableAddress): 0xA4
+; SYM-NEXT:     Value (RelocatableAddress): 0x94
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
@@ -503,7 +503,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: 33
 ; SYM-NEXT:     Name: GInit
-; SYM-NEXT:     Value (RelocatableAddress): 0xA8
+; SYM-NEXT:     Value (RelocatableAddress): 0x98
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
@@ -595,13 +595,12 @@ entry:
 ; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA (idx: 1)      .__tls_get_mod[PR]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 4, 4(2)
 ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 27) TIUninit[TC]
-; DIS-NEXT:                                      add 3, 3, 4
-; DIS-NEXT:                                      stw 6, 0(3)
+; DIS-NEXT:                                      stwx 6, 3, 4
 ; DIS-NEXT:                                      addi 1, 1, 32
 ; DIS-NEXT:                                      lwz 0, 8(1)
 ; DIS-NEXT:                                      mtlr 0
 ; DIS-NEXT:                                      blr
-; DIS:      00000040 (idx: 9) .loadsTGInit:
+; DIS:      00000030 (idx: 9) .loadsTGInit:
 ; DIS-NEXT:                                      mflr 0
 ; DIS-NEXT:                                      stwu 1, -32(1)
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 3, 8(2)
@@ -620,39 +619,39 @@ entry:
 ; DIS-NEXT:                                      lwz 0, 8(1)
 ; DIS-NEXT:                                      mtlr 0
 ; DIS-NEXT:                                      blr
-; DIS:      00000078 (idx: 13) const_ivar:
-; DIS-NEXT:       78: 00 00 00 06
+; DIS:      00000068 (idx: 13) const_ivar:
+; DIS-NEXT:       68: 00 00 00 06
 
 ; DIS:      Disassembly of section .data:
-; DIS:      0000007c  (idx: 17) GInit:
-; DIS-NEXT:       7c: 00 00 00 01
-; DIS:      00000080  (idx: 19) storesTIUninit[DS]:
-; DIS-NEXT:       80: 00 00 00 00
-; DIS-NEXT: 00000080: R_POS (idx: 7) .storesTIUninit
-; DIS-NEXT:       84: 00 00 00 98
-; DIS-NEXT: 00000084: R_POS (idx: 23) TOC[TC0]
+; DIS:      0000006c  (idx: 17) GInit:
+; DIS-NEXT:       6c: 00 00 00 01
+; DIS:      00000070  (idx: 19) storesTIUninit[DS]:
+; DIS-NEXT:       70: 00 00 00 00
+; DIS-NEXT: 00000070: R_POS (idx: 7) .storesTIUninit
+; DIS-NEXT:       74: 00 00 00 88
+; DIS-NEXT: 00000074: R_POS (idx: 23) TOC[TC0]
+; DIS-NEXT:       78: 00 00 00 00
+; DIS:      0000007c  (idx: 21) loadsTGInit[DS]:
+; DIS-NEXT:       7c: 00 00 00 30
+; DIS-NEXT: 0000007c: R_POS (idx: 9) .loadsTGInit
+; DIS-NEXT:       80: 00 00 00 88
+; DIS-NEXT: 00000080: R_POS (idx: 23) TOC[TC0]
+; DIS-NEXT:       84: 00 00 00 00
+; DIS:      00000088  (idx: 25) _$TLSML[TC]:
 ; DIS-NEXT:       88: 00 00 00 00
-; DIS:      0000008c  (idx: 21) loadsTGInit[DS]:
-; DIS-NEXT:       8c: 00 00 00 40
-; DIS-NEXT: 0000008c: R_POS (idx: 9) .loadsTGInit
-; DIS-NEXT:       90: 00 00 00 98
-; DIS-NEXT: 00000090: R_POS (idx: 23) TOC[TC0]
+; DIS-NEXT: 00000088: R_TLSML (idx: 25) _$TLSML[TC]
+; DIS:      0000008c  (idx: 27) TIUninit[TC]:
+; DIS-NEXT:       8c: 00 00 00 00
+; DIS-NEXT: 0000008c: R_TLS_LD (idx: 39) TIUninit[UL]
+; DIS:      00000090  (idx: 29) .TGInit[TC]:
+; DIS-NEXT:       90: 00 00 00 00
+; DIS-NEXT: 00000090: R_TLSM (idx: 37) TGInit
+; DIS:      00000094  (idx: 31) TGInit[TC]:
 ; DIS-NEXT:       94: 00 00 00 00
-; DIS:      00000098  (idx: 25) _$TLSML[TC]:
-; DIS-NEXT:       98: 00 00 00 00
-; DIS-NEXT: 00000098: R_TLSML (idx: 25) _$TLSML[TC]
-; DIS:      0000009c  (idx: 27) TIUninit[TC]:
-; DIS-NEXT:       9c: 00 00 00 00
-; DIS-NEXT: 0000009c: R_TLS_LD (idx: 39) TIUninit[UL]
-; DIS:      000000a0  (idx: 29) .TGInit[TC]:
-; DIS-NEXT:       a0: 00 00 00 00
-; DIS-NEXT: 000000a0: R_TLSM (idx: 37) TGInit
-; DIS:      000000a4  (idx: 31) TGInit[TC]:
-; DIS-NEXT:       a4: 00 00 00 00
-; DIS-NEXT: 000000a4: R_TLS (idx: 37) TGInit
-; DIS:      000000a8  (idx: 33) GInit[TC]:
-; DIS-NEXT:       a8: 00 00 00 7c
-; DIS-NEXT: 000000a8: R_POS (idx: 17) GInit
+; DIS-NEXT: 00000094: R_TLS (idx: 37) TGInit
+; DIS:      00000098  (idx: 33) GInit[TC]:
+; DIS-NEXT:       98: 00 00 00 6c
+; DIS-NEXT: 00000098: R_POS (idx: 17) GInit
 
 ; DIS:      Disassembly of section .tdata:
 ; DIS:      00000000 (idx: 37) TGInit:

>From d182badff3337e987741a6982f012a55a187df39 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Mon, 25 Sep 2023 09:43:41 -0400
Subject: [PATCH 11/27] Fix obj mode var access.

---
 llvm/lib/MC/XCOFFObjectWriter.cpp             |   3 +-
 .../PowerPC/aix-tls-ld-xcoff-reloc-large.ll   | 328 ++++++++++++++++++
 .../CodeGen/PowerPC/aix-tls-xcoff-reloc.ll    |   2 +-
 3 files changed, 331 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/PowerPC/aix-tls-ld-xcoff-reloc-large.ll

diff --git a/llvm/lib/MC/XCOFFObjectWriter.cpp b/llvm/lib/MC/XCOFFObjectWriter.cpp
index 343e2fc877bc3b..a898a35a057e61 100644
--- a/llvm/lib/MC/XCOFFObjectWriter.cpp
+++ b/llvm/lib/MC/XCOFFObjectWriter.cpp
@@ -698,7 +698,8 @@ void XCOFFObjectWriter::recordRelocation(MCAssembler &Asm,
   if (Type == XCOFF::RelocationType::R_POS ||
       Type == XCOFF::RelocationType::R_TLS ||
       Type == XCOFF::RelocationType::R_TLS_LE ||
-      Type == XCOFF::RelocationType::R_TLS_IE)
+      Type == XCOFF::RelocationType::R_TLS_IE ||
+      Type == XCOFF::RelocationType::R_TLS_LD)
     // The FixedValue should be symbol's virtual address in this object file
     // plus any constant value that we might get.
     FixedValue = getVirtualAddress(SymA, SymASec) + Target.getConstant();
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-ld-xcoff-reloc-large.ll b/llvm/test/CodeGen/PowerPC/aix-tls-ld-xcoff-reloc-large.ll
new file mode 100644
index 00000000000000..657f6187bcd7f6
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-ld-xcoff-reloc-large.ll
@@ -0,0 +1,328 @@
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec -mtriple powerpc64-ibm-aix-xcoff \
+; RUN:     -xcoff-traceback-table=false --code-model=large -filetype=obj -o %t.o < %s
+; RUN: llvm-readobj --relocs --expand-relocs %t.o | FileCheck --check-prefix=RELOC %s
+; RUN: llvm-readobj --syms %t.o | FileCheck --check-prefix=SYM %s
+; RUN: llvm-objdump -D -r --symbol-description %t.o | FileCheck --check-prefix=DIS %s
+
+ at ThreadLocalVarInit = thread_local(localdynamic) global i64 1, align 8
+ at IThreadLocalVarUninit = internal thread_local(localdynamic) global i64 0, align 8
+ at IThreadLocalVarUninit2 = internal thread_local(localdynamic) global i64 0, align 8
+declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull)
+
+define void @storeITLUninit(i64 noundef %x) {
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarUninit)
+  store i64 %x, ptr %0, align 8
+  ret void
+}
+
+define i64 @loadTLInit() {
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit)
+  %1 = load i64, ptr %0, align 8
+  ret i64 %1
+}
+
+define signext i64 @loadTLUninit() {
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarUninit)
+  store i64 1, ptr %0, align 8
+  %1 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarUninit2)
+  %2 = load i64, ptr %1, align 8
+  %add = add nsw i64 %2, 1
+  ret i64 %add
+}
+
+; RELOC:      File: {{.*}}aix-tls-ld-xcoff-reloc-large.ll.tmp.o
+; RELOC-NEXT: Format: aix5coff64-rs6000
+; RELOC-NEXT: Arch: powerpc64
+; RELOC-NEXT: AddressSize: 64bit
+; RELOC-NEXT: Relocations [
+; RELOC:      Virtual Address: 0x12
+; RELOC-NEXT:      Symbol: _$TLSML (19)
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 16
+; RELOC-NEXT:      Type: R_TOCU (0x30)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0x16
+; RELOC-NEXT:      Symbol: IThreadLocalVarUninit (21)
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 16
+; RELOC-NEXT:      Type: R_TOCU (0x30)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0x1A
+; RELOC-NEXT:      Symbol: _$TLSML (19)
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 16
+; RELOC-NEXT:      Type: R_TOCL (0x31)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0x22
+; RELOC-NEXT:      Symbol: IThreadLocalVarUninit (21)
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 16
+; RELOC-NEXT:      Type: R_TOCL (0x31)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0x4E
+; RELOC-NEXT:      Symbol: _$TLSML (19)
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 16
+; RELOC-NEXT:      Type: R_TOCU (0x30)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0x52
+; RELOC-NEXT:      Symbol: ThreadLocalVarInit (23)
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 16
+; RELOC-NEXT:      Type: R_TOCU (0x30)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0x56
+; RELOC-NEXT:      Symbol: _$TLSML (19)
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 16
+; RELOC-NEXT:      Type: R_TOCL (0x31)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0x5E
+; RELOC-NEXT:      Symbol: ThreadLocalVarInit (23)
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 16
+; RELOC-NEXT:      Type: R_TOCL (0x31)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0x8E
+; RELOC-NEXT:      Symbol: _$TLSML (19)
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 16
+; RELOC-NEXT:      Type: R_TOCU (0x30)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0x92
+; RELOC-NEXT:      Symbol: IThreadLocalVarUninit (21)
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 16
+; RELOC-NEXT:      Type: R_TOCU (0x30)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0x96
+; RELOC-NEXT:      Symbol: _$TLSML (19)
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 16
+; RELOC-NEXT:      Type: R_TOCL (0x31)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0x9E
+; RELOC-NEXT:      Symbol: IThreadLocalVarUninit (21)
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 16
+; RELOC-NEXT:      Type: R_TOCL (0x31)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0xAA
+; RELOC-NEXT:      Symbol: IThreadLocalVarUninit2 (25)
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 16
+; RELOC-NEXT:      Type: R_TOCU (0x30)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0xAE
+; RELOC-NEXT:      Symbol: IThreadLocalVarUninit2 (25)
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 16
+; RELOC-NEXT:      Type: R_TOCL (0x31)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0x110
+; RELOC-NEXT:      Symbol: _$TLSML (19)
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 64
+; RELOC-NEXT:      Type: R_TLSML (0x25)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0x118
+; RELOC-NEXT:      Symbol: IThreadLocalVarUninit (29)
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 64
+; RELOC-NEXT:      Type: R_TLS_LD (0x22)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0x120
+; RELOC-NEXT:      Symbol: ThreadLocalVarInit (27)
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 64
+; RELOC-NEXT:      Type: R_TLS_LD (0x22)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0x128
+; RELOC-NEXT:      Symbol: IThreadLocalVarUninit2 (31)
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 64
+; RELOC-NEXT:      Type: R_TLS_LD (0x22)
+; RELOC-NEXT:    }
+
+; SYM:      File: {{.*}}aix-tls-ld-xcoff-reloc-large.ll.tmp.o
+; SYM-NEXT: Format: aix5coff64-rs6000
+; SYM-NEXT: Arch: powerpc64
+; SYM-NEXT: AddressSize: 64bit
+; SYM-NEXT: Symbols [
+; SYM:    Index: 19
+; SYM-NEXT:    Name: _$TLSML
+; SYM-NEXT:    Value (RelocatableAddress): 0x110
+; SYM-NEXT:    Section: .data
+; SYM-NEXT:    Type: 0x0
+; SYM-NEXT:    StorageClass: C_HIDEXT (0x6B)
+; SYM-NEXT:    NumberOfAuxEntries: 1
+; SYM-NEXT:    CSECT Auxiliary Entry {
+; SYM-NEXT:      Index: 20
+; SYM-NEXT:      SectionLen: 8
+; SYM-NEXT:      ParameterHashIndex: 0x0
+; SYM-NEXT:      TypeChkSectNum: 0x0
+; SYM-NEXT:      SymbolAlignmentLog2: 3
+; SYM-NEXT:      SymbolType: XTY_SD (0x1)
+; SYM-NEXT:      StorageMappingClass: XMC_TC (0x3)
+; SYM-NEXT:      Auxiliary Type: AUX_CSECT (0xFB)
+; SYM-NEXT:    }
+; SYM-NEXT:  }
+; SYM:    Index: 21
+; SYM-NEXT:    Name: IThreadLocalVarUninit
+; SYM-NEXT:    Value (RelocatableAddress): 0x118
+; SYM-NEXT:    Section: .data
+; SYM-NEXT:    Type: 0x0
+; SYM-NEXT:    StorageClass: C_HIDEXT (0x6B)
+; SYM-NEXT:    NumberOfAuxEntries: 1
+; SYM-NEXT:    CSECT Auxiliary Entry {
+; SYM-NEXT:      Index: 22
+; SYM-NEXT:      SectionLen: 8
+; SYM-NEXT:      ParameterHashIndex: 0x0
+; SYM-NEXT:      TypeChkSectNum: 0x0
+; SYM-NEXT:      SymbolAlignmentLog2: 3
+; SYM-NEXT:      SymbolType: XTY_SD (0x1)
+; SYM-NEXT:      StorageMappingClass: XMC_TE (0x16)
+; SYM-NEXT:      Auxiliary Type: AUX_CSECT (0xFB)
+; SYM-NEXT:    }
+; SYM-NEXT:  }
+; SYM:    Index: 23
+; SYM-NEXT:    Name: ThreadLocalVarInit
+; SYM-NEXT:    Value (RelocatableAddress): 0x120
+; SYM-NEXT:    Section: .data
+; SYM-NEXT:    Type: 0x0
+; SYM-NEXT:    StorageClass: C_HIDEXT (0x6B)
+; SYM-NEXT:    NumberOfAuxEntries: 1
+; SYM-NEXT:    CSECT Auxiliary Entry {
+; SYM-NEXT:      Index: 24
+; SYM-NEXT:      SectionLen: 8
+; SYM-NEXT:      ParameterHashIndex: 0x0
+; SYM-NEXT:      TypeChkSectNum: 0x0
+; SYM-NEXT:      SymbolAlignmentLog2: 3
+; SYM-NEXT:      SymbolType: XTY_SD (0x1)
+; SYM-NEXT:      StorageMappingClass: XMC_TE (0x16)
+; SYM-NEXT:      Auxiliary Type: AUX_CSECT (0xFB)
+; SYM-NEXT:    }
+; SYM-NEXT:  }
+; SYM:    Index: 25
+; SYM-NEXT:    Name: IThreadLocalVarUninit2
+; SYM-NEXT:    Value (RelocatableAddress): 0x128
+; SYM-NEXT:    Section: .data
+; SYM-NEXT:    Type: 0x0
+; SYM-NEXT:    StorageClass: C_HIDEXT (0x6B)
+; SYM-NEXT:    NumberOfAuxEntries: 1
+; SYM-NEXT:    CSECT Auxiliary Entry {
+; SYM-NEXT:      Index: 26
+; SYM-NEXT:      SectionLen: 8
+; SYM-NEXT:      ParameterHashIndex: 0x0
+; SYM-NEXT:      TypeChkSectNum: 0x0
+; SYM-NEXT:      SymbolAlignmentLog2: 3
+; SYM-NEXT:      SymbolType: XTY_SD (0x1)
+; SYM-NEXT:      StorageMappingClass: XMC_TE (0x16)
+; SYM-NEXT:      Auxiliary Type: AUX_CSECT (0xFB)
+; SYM-NEXT:    }
+; SYM-NEXT:  }
+; SYM:    Index: 27
+; SYM-NEXT:    Name: ThreadLocalVarInit
+; SYM-NEXT:    Value (RelocatableAddress): 0x0
+; SYM-NEXT:    Section: .tdata
+; SYM-NEXT:    Type: 0x0
+; SYM-NEXT:    StorageClass: C_EXT (0x2)
+; SYM-NEXT:    NumberOfAuxEntries: 1
+; SYM-NEXT:    CSECT Auxiliary Entry {
+; SYM-NEXT:      Index: 28
+; SYM-NEXT:      SectionLen: 8
+; SYM-NEXT:      ParameterHashIndex: 0x0
+; SYM-NEXT:      TypeChkSectNum: 0x0
+; SYM-NEXT:      SymbolAlignmentLog2: 3
+; SYM-NEXT:      SymbolType: XTY_SD (0x1)
+; SYM-NEXT:      StorageMappingClass: XMC_TL (0x14)
+; SYM-NEXT:      Auxiliary Type: AUX_CSECT (0xFB)
+; SYM-NEXT:    }
+; SYM-NEXT:  }
+; SYM:    Index: 29
+; SYM-NEXT:    Name: IThreadLocalVarUninit
+; SYM-NEXT:    Value (RelocatableAddress): 0x8
+; SYM-NEXT:    Section: .tbss
+; SYM-NEXT:    Type: 0x0
+; SYM-NEXT:    StorageClass: C_HIDEXT (0x6B)
+; SYM-NEXT:    NumberOfAuxEntries: 1
+; SYM-NEXT:    CSECT Auxiliary Entry {
+; SYM-NEXT:      Index: 30
+; SYM-NEXT:      SectionLen: 8
+; SYM-NEXT:      ParameterHashIndex: 0x0
+; SYM-NEXT:      TypeChkSectNum: 0x0
+; SYM-NEXT:      SymbolAlignmentLog2: 3
+; SYM-NEXT:      SymbolType: XTY_CM (0x3)
+; SYM-NEXT:      StorageMappingClass: XMC_UL (0x15)
+; SYM-NEXT:      Auxiliary Type: AUX_CSECT (0xFB)
+; SYM-NEXT:    }
+; SYM-NEXT:  }
+; SYM:    Index: 31
+; SYM-NEXT:    Name: IThreadLocalVarUninit2
+; SYM-NEXT:    Value (RelocatableAddress): 0x10
+; SYM-NEXT:    Section: .tbss
+; SYM-NEXT:    Type: 0x0
+; SYM-NEXT:    StorageClass: C_HIDEXT (0x6B)
+; SYM-NEXT:    NumberOfAuxEntries: 1
+; SYM-NEXT:    CSECT Auxiliary Entry {
+; SYM-NEXT:      Index: 32
+; SYM-NEXT:      SectionLen: 8
+; SYM-NEXT:      ParameterHashIndex: 0x0
+; SYM-NEXT:      TypeChkSectNum: 0x0
+; SYM-NEXT:      SymbolAlignmentLog2: 3
+; SYM-NEXT:      SymbolType: XTY_CM (0x3)
+; SYM-NEXT:      StorageMappingClass: XMC_UL (0x15)
+; SYM-NEXT:      Auxiliary Type: AUX_CSECT (0xFB)
+; SYM-NEXT:    }
+; SYM-NEXT:  }
+
+; DIS:      {{.*}}aix-tls-ld-xcoff-reloc-large.ll.tmp.o:	file format aix5coff64-rs6000
+; DIS:      Disassembly of section .data:
+; DIS:      0000000000000110 (idx: 19) _$TLSML[TC]:
+; DIS-NEXT:     110: 00 00 00 00
+; DIS-NEXT:     0000000000000110:  R_TLSML	(idx: 19) _$TLSML[TC]
+; DIS-NEXT:     114: 00 00 00 00
+; DIS:      0000000000000118 (idx: 21) IThreadLocalVarUninit[TE]:
+; DIS-NEXT:     118: 00 00 00 00
+; DIS-NEXT:     0000000000000118:  R_TLS_LD	(idx: 29) IThreadLocalVarUninit[UL]
+; DIS-NEXT:     11c: 00 00 00 08
+; DIS:      0000000000000120 (idx: 23) ThreadLocalVarInit[TE]:
+; DIS-NEXT:     120: 00 00 00 00
+; DIS-NEXT:     0000000000000120:  R_TLS_LD	(idx: 27) ThreadLocalVarInit[TL]
+; DIS-NEXT:     124: 00 00 00 00
+; DIS:      0000000000000128 (idx: 25) IThreadLocalVarUninit2[TE]:
+; DIS-NEXT:     128: 00 00 00 00
+; DIS-NEXT:     0000000000000128:  R_TLS_LD	(idx: 31) IThreadLocalVarUninit2[UL]
+; DIS-NEXT:     12c: 00 00 00 10
+
+; DIS:      Disassembly of section .tdata:
+; DIS:      0000000000000000 (idx: 27) ThreadLocalVarInit[TL]:
+; DIS-NEXT:        0: 00 00 00 00
+; DIS-NEXT:        4: 00 00 00 01
+
+; DIS:      Disassembly of section .tbss:
+; DIS:      0000000000000008 (idx: 29) IThreadLocalVarUninit[UL]:
+; DIS-NEXT: ...
+; DIS:      0000000000000010 (idx: 31) IThreadLocalVarUninit2[UL]:
+; DIS-NEXT: ...
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
index c8a737281840a0..20110aff8ce654 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
@@ -641,7 +641,7 @@ entry:
 ; DIS-NEXT:       88: 00 00 00 00
 ; DIS-NEXT: 00000088: R_TLSML (idx: 25) _$TLSML[TC]
 ; DIS:      0000008c  (idx: 27) TIUninit[TC]:
-; DIS-NEXT:       8c: 00 00 00 00
+; DIS-NEXT:       8c: 00 00 00 04
 ; DIS-NEXT: 0000008c: R_TLS_LD (idx: 39) TIUninit[UL]
 ; DIS:      00000090  (idx: 29) .TGInit[TC]:
 ; DIS-NEXT:       90: 00 00 00 00

>From 28e29f659ba31ab2237f07ba99e8a5a88a6718db Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Wed, 27 Sep 2023 02:57:07 -0400
Subject: [PATCH 12/27] [NFC] Incorporate comments.

---
 .../PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp  |  7 ++--
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp     |  5 +--
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   | 15 +++++----
 llvm/lib/Target/PowerPC/PPCISelLowering.h     |  4 +--
 llvm/lib/Target/PowerPC/PPCInstrInfo.td       |  2 +-
 llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp | 33 +++++++++----------
 6 files changed, 34 insertions(+), 32 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 00dda306a584c7..5b8f7f06619047 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -234,9 +234,10 @@ class PPCTargetAsmStreamer : public PPCTargetStreamer {
       // On AIX, we have TLS variable offsets (symbol@({gd|ie|le|ld}) depending
       // on the TLS access method (or model). For the general-dynamic access
       // method, we also have region handle (symbol at m) for each variable. For
-      // local-dynamic, a module handle (__TLSML[TC]@ml) for all variables. For
-      // local-exec and initial-exec, we have a thread pointer, in r13 for
-      // 64-bit mode and returned by .__get_tpointer for 32-bit mode.
+      // local-dynamic, there is a module handle (__TLSML[TC]@ml) for all
+      // variables. Finally for local-exec and initial-exec, we have a thread
+      // pointer, in r13 for 64-bit mode and returned by .__get_tpointer for
+      // 32-bit mode.
       if (Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGD ||
           Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGDM ||
           Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSIE ||
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 099299bff618f7..34231802ab7c47 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -867,7 +867,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
         // hacked this flag which should have been named MO_TLSLDM_FLAG: on AIX
         // the ML relocation type is only valid for a reference to a TOC symbol
         // from the symbol itself, and right now its only user is the symbol
-        // "_$TLSML". Use symbol name to decide that R_TLSML is expected.
+        // "_$TLSML". The symbol name is used to decide that R_TLSML is
+        // expected.
         return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSML;
       if (IsAIX)
         return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLD;
@@ -2769,7 +2770,7 @@ void PPCAIXAsmPrinter::emitEndOfAsmFile(Module &M) {
           getObjFileLowering().getSectionForTOCEntry(S, TM));
     } else if (I.first.second ==
                MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSML) {
-      // AIX assembler expects TC storage-mapping class for the "_$TLSML"
+      // AIX assembler expects the TC storage-mapping class for the "_$TLSML"
       // symbol.
       MCSection *MCSect = getObjFileLowering().getContext().getXCOFFSection(
           cast<MCSymbolXCOFF>(I.first.first)->getSymbolTableName(),
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 86c88748d6be61..10116c83565a45 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -3417,13 +3417,14 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
 
   if (Model == TLSModel::LocalDynamic) {
     // For local-dynamic on AIX, we need to generate one TOC entry for each
-    // variable offset, and single module-handle TOC entry for the entire file.
-
-    // We are not (1) create GV node (2) call getTOCEntry for the module-handle
-    // due to the reason that the module-handle should not be materialized (i.e.
-    // there should be no symbol-table entry referring to the module-handle).
-    // Instead we will create reference to __TLSML[TC]@ml in PPCTLSDynamicCall
-    // when processing the TLSLD_AIX pseudo node.
+    // variable offset, and a single module-handle TOC entry for the entire
+    // file.
+
+    // We are not able to (1) create a GV node, and (2) call getTOCEntry for the
+    // module-handle due to the reason that the module-handle should not be
+    // materialized (i.e. there should be no symbol-table entry referring to the
+    // module-handle). Instead we will create reference to __TLSML[TC]@ml in
+    // PPCTLSDynamicCall when processing the TLSLD_AIX pseudo node.
     SDValue ModuleHandle = DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT);
     SDValue VariableOffsetTGA =
         DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index f09a624fe7235c..7a4200fab81e11 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -380,8 +380,8 @@ namespace llvm {
     GET_TLS_MOD_AIX,
 
     /// [GP|G8]RC = TLSLD_AIX, TOC_ENTRY(variable offset)
-    /// Op that internally creates TOC entry for the "_$TLSML" symbol, generates
-    /// a GET_TLS_MOD_AIX node which will be expanded into a call to
+    /// Op that internally creates a TOC entry for the "_$TLSML" symbol, and
+    /// generates a GET_TLS_MOD_AIX node which will be expanded into a call to
     /// .__tls_get_mod, and then adds the variable offset with the result from
     /// the call. This node is used in both 32-bit and 64-bit modes. The only
     /// difference is register class.
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 8af426623d5496..9a76a727ecde6f 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -3296,7 +3296,7 @@ def TLSGDAIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$offset, gprc:$handl
                           "#TLSGDAIX",
                           [(set i32:$rD,
                             (PPCTlsgdAIX i32:$offset, i32:$handle))]>;
-// This pseudo is expanded to load module-handle in R3, and the call to GETtlsMOD32AIX.
+// This pseudo is expanded to load the module-handle in R3, and the call to GETtlsMOD32AIX.
 def TLSLDAIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins),
                           "#TLSLDAIX", [(set i32:$rD, (PPCTlsldAIX))]>;
 // LR is a true define, while the rest of the Defs are clobbers.  R3 is
diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 71825dca77bcf0..8a839af1f2eb71 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -48,8 +48,12 @@ namespace {
     bool processBlock(MachineBasicBlock &MBB) {
       bool Changed = false;
       bool NeedFence = true;
-      bool Is64Bit = MBB.getParent()->getSubtarget<PPCSubtarget>().isPPC64();
-      bool IsAIX = MBB.getParent()->getSubtarget<PPCSubtarget>().isAIXABI();
+      const PPCSubtarget &Subtarget =
+          MBB.getParent()->getSubtarget<PPCSubtarget>();
+      bool Is64Bit = Subtarget.isPPC64();
+      bool IsAIX = Subtarget.isAIXABI();
+      bool IsLargeModel =
+          Subtarget.getTargetMachine().getCodeModel() == CodeModel::Large;
       bool IsPCREL = false;
       MachineFunction *MF = MBB.getParent();
       MachineRegisterInfo &RegInfo = MF->getRegInfo();
@@ -64,15 +68,16 @@ namespace {
         // There are a number of slight differences in code generation
         // when we call .__get_tpointer (32-bit AIX TLS).
         bool IsTLSTPRelMI = MI.getOpcode() == PPC::GETtlsTpointer32AIX;
+        bool IsTLSLDAIXMI = (MI.getOpcode() == PPC::TLSLDAIX8 ||
+                             MI.getOpcode() == PPC::TLSLDAIX);
 
         if (MI.getOpcode() != PPC::ADDItlsgdLADDR &&
             MI.getOpcode() != PPC::ADDItlsldLADDR &&
             MI.getOpcode() != PPC::ADDItlsgdLADDR32 &&
             MI.getOpcode() != PPC::ADDItlsldLADDR32 &&
-            MI.getOpcode() != PPC::TLSLDAIX &&
-            MI.getOpcode() != PPC::TLSLDAIX8 &&
             MI.getOpcode() != PPC::TLSGDAIX &&
-            MI.getOpcode() != PPC::TLSGDAIX8 && !IsTLSTPRelMI && !IsPCREL) {
+            MI.getOpcode() != PPC::TLSGDAIX8 && !IsTLSTPRelMI && !IsPCREL &&
+            !IsTLSLDAIXMI) {
           // Although we create ADJCALLSTACKDOWN and ADJCALLSTACKUP
           // as scheduling fences, we skip creating fences if we already
           // have existing ADJCALLSTACKDOWN/UP to avoid nesting,
@@ -92,9 +97,7 @@ namespace {
         Register InReg = PPC::NoRegister;
         Register GPR3 = Is64Bit ? PPC::X3 : PPC::R3;
         Register GPR4 = Is64Bit ? PPC::X4 : PPC::R4;
-        if (!IsPCREL && !IsTLSTPRelMI &&
-            !(MI.getOpcode() == PPC::TLSLDAIX8 ||
-              MI.getOpcode() == PPC::TLSLDAIX))
+        if (!IsPCREL && !IsTLSTPRelMI && !IsTLSLDAIXMI)
           InReg = MI.getOperand(1).getReg();
         DebugLoc DL = MI.getDebugLoc();
 
@@ -160,15 +163,10 @@ namespace {
                                                               .addImm(0);
 
         if (IsAIX) {
-          if (MI.getOpcode() == PPC::TLSLDAIX8 ||
-              MI.getOpcode() == PPC::TLSLDAIX) {
+          if (IsTLSLDAIXMI) {
             // It is better to put TLSLDAIX node before LoadOffsetToc node,
             // because LoadOffsetToc node can use clobbers r4/r5. Search for the
             // first paired LoadOffsetToc node within the same BB.
-            const PPCSubtarget &Subtarget =
-                MBB.getParent()->getSubtarget<PPCSubtarget>();
-            bool IsLargeModel =
-                Subtarget.getTargetMachine().getCodeModel() == CodeModel::Large;
             unsigned LDTocOp =
                 Is64Bit ? (IsLargeModel ? PPC::LDtocL : PPC::LDtoc)
                         : (IsLargeModel ? PPC::LWZtocL : PPC::LWZtoc);
@@ -197,12 +195,13 @@ namespace {
                 int MatchCount = 0;
                 for (MachineOperand &MO : UseIter->operands()) {
                   if (MO.isReg() && MO.isUse()) {
-                    if (RegInfo.hasOneDef(MO.getReg())) {
-                      if (RegInfo.getOneDef(MO.getReg())
+                    Register OffsetReg = MO.getReg();
+                    if (RegInfo.hasOneDef(OffsetReg)) {
+                      if (RegInfo.getOneDef(OffsetReg)
                               ->getParent()
                               ->getOpcode() == LDTocOp) {
                         LoadOffsetToc =
-                            RegInfo.getOneDef(MO.getReg())->getParent();
+                            RegInfo.getOneDef(OffsetReg)->getParent();
                         ++MatchCount;
                       }
                     } else {

>From 92751124e9f3c708a860001b4d4c7ff2a289b1ef Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Wed, 27 Sep 2023 21:11:49 -0400
Subject: [PATCH 13/27] [NFC] Add FIXME to highlight existing issue: The
 "_$TLSML" symbol did not lower through getTOCEntry().

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 10116c83565a45..74f433da4ab073 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -3420,10 +3420,10 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
     // variable offset, and a single module-handle TOC entry for the entire
     // file.
 
-    // We are not able to (1) create a GV node, and (2) call getTOCEntry for the
-    // module-handle due to the reason that the module-handle should not be
-    // materialized (i.e. there should be no symbol-table entry referring to the
-    // module-handle). Instead we will create reference to __TLSML[TC]@ml in
+    // FIXME: We are not able to (1) create a GV node, and (2) call getTOCEntry
+    // for the module-handle due to the reason that the module-handle should not
+    // be materialized (i.e. there should be no symbol-table entry referring to
+    // the module-handle). Instead we will create reference to __TLSML[TC]@ml in
     // PPCTLSDynamicCall when processing the TLSLD_AIX pseudo node.
     SDValue ModuleHandle = DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT);
     SDValue VariableOffsetTGA =

>From 66aa3aaa12dbb5bf6b7c585df23fc14a66906c97 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Wed, 25 Oct 2023 21:07:42 -0400
Subject: [PATCH 14/27] Address the following FIXME: The "_$TLSML" symbol did
 not lower through getTOCEntry().

The pseudo node "TLSLDAIX" now takes the _$TLSML GV node, and the
ppc-tls-dynamic-call pass is updated to fine tune the relative order
between the LoadOffset at toc node and the .__tls_get_mod node.
---
 llvm/lib/MC/XCOFFObjectWriter.cpp             |  14 +++
 .../PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp  |   6 +-
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp     |  43 +++----
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   |  17 ++-
 llvm/lib/Target/PowerPC/PPCISelLowering.h     |   2 +-
 llvm/lib/Target/PowerPC/PPCInstr64Bit.td      |   6 +-
 llvm/lib/Target/PowerPC/PPCInstrInfo.td       |   8 +-
 llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp | 105 ++++++++----------
 .../test/CodeGen/PowerPC/aix-tls-gd-double.ll |  20 ++--
 llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll   |  20 ++--
 .../CodeGen/PowerPC/aix-tls-gd-longlong.ll    |  36 +++---
 .../PowerPC/aix-tls-ld-xcoff-reloc-large.ll   |  12 +-
 .../CodeGen/PowerPC/aix-tls-local-dynamic.ll  |   2 +-
 .../CodeGen/PowerPC/aix-tls-xcoff-reloc.ll    |   4 +-
 14 files changed, 145 insertions(+), 150 deletions(-)

diff --git a/llvm/lib/MC/XCOFFObjectWriter.cpp b/llvm/lib/MC/XCOFFObjectWriter.cpp
index a898a35a057e61..cfb04b8e6b7af6 100644
--- a/llvm/lib/MC/XCOFFObjectWriter.cpp
+++ b/llvm/lib/MC/XCOFFObjectWriter.cpp
@@ -649,6 +649,14 @@ void XCOFFObjectWriter::recordRelocation(MCAssembler &Asm,
                                          uint64_t &FixedValue) {
   auto getIndex = [this](const MCSymbol *Sym,
                          const MCSectionXCOFF *ContainingCsect) {
+    // Fixup relocation flag for AIX TLS local-dynamic mode.
+    if (Sym->getName().equals("_Renamed..5f24__TLSML[UA]")) {
+      for (auto Iter : SymbolIndexMap)
+        if (Iter.first->getName().equals("_Renamed..5f24__TLSML[TC]"))
+          return Iter.second;
+      llvm_unreachable("For AIX TLS local-dynamic mode: "
+                       "_Renamed..5f24__TLSML[TC] not found.");
+    }
     // If we could not find the symbol directly in SymbolIndexMap, this symbol
     // could either be a temporary symbol or an undefined symbol. In this case,
     // we would need to have the relocation reference its csect instead.
@@ -1146,6 +1154,9 @@ void XCOFFObjectWriter::writeSymbolTable(const MCAsmLayout &Layout) {
                      /*NumberOfAuxEntries=*/0);
 
   for (const auto &Csect : UndefinedCsects) {
+    // AIX does not need to emit for the _$TLSML symbol.
+    if (Csect.getSymbolTableName().equals(StringRef("_$TLSML")))
+      continue;
     writeSymbolEntryForControlSection(Csect, XCOFF::ReservedSectionNum::N_UNDEF,
                                       Csect.MCSec->getStorageClass());
   }
@@ -1361,6 +1372,9 @@ void XCOFFObjectWriter::assignAddressesAndIndices(const MCAsmLayout &Layout) {
 
   // Calculate indices for undefined symbols.
   for (auto &Csect : UndefinedCsects) {
+    // AIX does not need to emit for the _$TLSML symbol.
+    if (Csect.getSymbolTableName().equals(StringRef("_$TLSML")))
+      continue;
     Csect.Size = 0;
     Csect.Address = 0;
     Csect.SymbolTableIndex = SymbolTableIndex;
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 5b8f7f06619047..cc50a29d67a723 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -242,10 +242,12 @@ class PPCTargetAsmStreamer : public PPCTargetStreamer {
           Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGDM ||
           Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSIE ||
           Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLE ||
-          Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLD ||
-          Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSML)
+          Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLD)
         OS << "\t.tc " << TCSym->getName() << "," << XSym->getName() << "@"
            << MCSymbolRefExpr::getVariantKindName(Kind) << '\n';
+      else if (Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSML)
+        OS << "\t.tc " << TCSym->getName() << "," << TCSym->getName() << "@"
+           << MCSymbolRefExpr::getVariantKindName(Kind) << '\n';
       else
         OS << "\t.tc " << TCSym->getName() << "," << XSym->getName() << '\n';
 
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 34231802ab7c47..032bab255665b4 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -723,8 +723,6 @@ static MCSymbol *getMCSymbolForTOCPseudoMO(const MachineOperand &MO,
     return AP.GetJTISymbol(MO.getIndex());
   case MachineOperand::MO_BlockAddress:
     return AP.GetBlockAddressSymbol(MO.getBlockAddress());
-  case MachineOperand::MO_ExternalSymbol:
-    return AP.OutContext.getOrCreateSymbol(MO.getSymbolName());
   default:
     llvm_unreachable("Unexpected operand type to get symbol.");
   }
@@ -758,17 +756,6 @@ getTOCEntryTypeForMO(const MachineOperand &MO) {
     llvm_unreachable("Unexpected operand type to get TOC type.");
   }
 }
-
-// FIXME: find alternative approach to get rid of this hack.
-// On AIX, TLS-local-dynamic requires that the symbol for the module handle must
-// have the name "_$TLSML". This symbol is used as one TOC symbol reference
-// itself with an ML relocation type, thus it has "[TC]" attached to its name.
-static inline bool isSpecialAIXSymbolTLSML(const MachineOperand &MO,
-                                           const bool IsAIX) {
-  return IsAIX && MO.isSymbol() &&
-         (std::strcmp(MO.getSymbolName(), "_$TLSML[TC]") == 0);
-}
-
 /// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to
 /// the current output stream.
 ///
@@ -862,13 +849,15 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     if (Flag == PPCII::MO_TLSGD_FLAG || Flag == PPCII::MO_GOT_TLSGD_PCREL_FLAG)
       return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGD;
     if (MO.getTargetFlags() & PPCII::MO_TLSLD_FLAG) {
-      if (isSpecialAIXSymbolTLSML(MO, IsAIX))
+      if (IsAIX && MO.isGlobal() &&
+          (MO.getGlobal()->getName().equals("_$TLSML")))
         // FIXME: Due to the size limit of MachineOperand::SubReg_TargetFlags,
         // hacked this flag which should have been named MO_TLSLDM_FLAG: on AIX
         // the ML relocation type is only valid for a reference to a TOC symbol
         // from the symbol itself, and right now its only user is the symbol
         // "_$TLSML". The symbol name is used to decide that R_TLSML is
         // expected.
+        // FIX this once #69695 committed.
         return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSML;
       if (IsAIX)
         return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLD;
@@ -991,8 +980,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     TmpInst.setOpcode(PPC::LWZ);
 
     const MachineOperand &MO = MI->getOperand(1);
-    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress() ||
-            isSpecialAIXSymbolTLSML(MO, IsAIX)) &&
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
            "Invalid operand for LWZtoc.");
 
     // Map the operand to its corresponding MCSymbol.
@@ -1081,8 +1069,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     TmpInst.setOpcode(PPC::LD);
 
     const MachineOperand &MO = MI->getOperand(1);
-    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress() ||
-            isSpecialAIXSymbolTLSML(MO, IsAIX)) &&
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
            "Invalid operand!");
 
     // Map the operand to its corresponding MCSymbol.
@@ -1120,8 +1107,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     TmpInst.setOpcode(PPC::ADDIS);
 
     const MachineOperand &MO = MI->getOperand(2);
-    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress() ||
-            isSpecialAIXSymbolTLSML(MO, IsAIX)) &&
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
            "Invalid operand for ADDIStocHA.");
 
     // Map the machine operand to its corresponding MCSymbol.
@@ -1154,8 +1140,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     TmpInst.setOpcode(PPC::LWZ);
 
     const MachineOperand &MO = MI->getOperand(1);
-    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress() ||
-            isSpecialAIXSymbolTLSML(MO, IsAIX)) &&
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
            "Invalid operand for LWZtocL.");
 
     // Map the machine operand to its corresponding MCSymbol.
@@ -1187,8 +1172,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     TmpInst.setOpcode(PPC::ADDIS8);
 
     const MachineOperand &MO = MI->getOperand(2);
-    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress() ||
-            isSpecialAIXSymbolTLSML(MO, IsAIX)) &&
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
            "Invalid operand for ADDIStocHA8!");
 
     const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
@@ -1198,8 +1182,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     const bool GlobalToc =
         MO.isGlobal() && Subtarget->isGVIndirectSymbol(MO.getGlobal());
     if (GlobalToc || MO.isJTI() || MO.isBlockAddress() ||
-        (MO.isCPI() && TM.getCodeModel() == CodeModel::Large) ||
-        isSpecialAIXSymbolTLSML(MO, IsAIX))
+        (MO.isCPI() && TM.getCodeModel() == CodeModel::Large))
       MOSymbol = lookUpOrCreateTOCEntry(MOSymbol, getTOCEntryTypeForMO(MO), VK);
 
     VK = IsAIX ? MCSymbolRefExpr::VK_PPC_U : MCSymbolRefExpr::VK_PPC_TOC_HA;
@@ -1228,8 +1211,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     TmpInst.setOpcode(PPC::LD);
 
     const MachineOperand &MO = MI->getOperand(1);
-    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress() ||
-            isSpecialAIXSymbolTLSML(MO, IsAIX)) &&
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() ||
+            MO.isBlockAddress()) &&
            "Invalid operand for LDtocL!");
 
     LLVM_DEBUG(assert(
@@ -2098,6 +2081,10 @@ void PPCAIXAsmPrinter::emitLinkage(const GlobalValue *GV,
     }
   }
 
+  // Do not emit _$TLSML symbol.
+  if (GVSym->getName().equals(StringRef("_Renamed..5f24__TLSML[UA]")))
+    return;
+
   OutStreamer->emitXCOFFSymbolLinkageWithVisibility(GVSym, LinkageAttr,
                                                     VisibilityAttr);
 }
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 74f433da4ab073..9209b95dde08d5 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -3420,14 +3420,19 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
     // variable offset, and a single module-handle TOC entry for the entire
     // file.
 
-    // FIXME: We are not able to (1) create a GV node, and (2) call getTOCEntry
-    // for the module-handle due to the reason that the module-handle should not
-    // be materialized (i.e. there should be no symbol-table entry referring to
-    // the module-handle). Instead we will create reference to __TLSML[TC]@ml in
-    // PPCTLSDynamicCall when processing the TLSLD_AIX pseudo node.
-    SDValue ModuleHandle = DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT);
     SDValue VariableOffsetTGA =
         DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
+    Module *M = DAG.getMachineFunction().getFunction().getParent();
+    GlobalVariable *TLSGV =
+        dyn_cast_or_null<GlobalVariable>(M->getOrInsertGlobal(
+            StringRef("_$TLSML"), PointerType::getUnqual(*DAG.getContext())));
+    assert(TLSGV && "Not able to create GV for _$TLSML.");
+    // FIXME: create MO_TLSML_FLAG once #69695 committed.
+    SDValue ModuleHandleTGA =
+        DAG.getTargetGlobalAddress(TLSGV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
+    SDValue ModuleHandleTOC = getTOCEntry(DAG, dl, ModuleHandleTGA);
+    SDValue ModuleHandle =
+        DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, ModuleHandleTOC);
     SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
     return DAG.getNode(ISD::ADD, dl, PtrVT, ModuleHandle, VariableOffset);
   }
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 7a4200fab81e11..2b6fcbc93f57e6 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -384,7 +384,7 @@ namespace llvm {
     /// generates a GET_TLS_MOD_AIX node which will be expanded into a call to
     /// .__tls_get_mod, and then adds the variable offset with the result from
     /// the call. This node is used in both 32-bit and 64-bit modes. The only
-    /// difference is register class.
+    /// difference is the register class.
     TLSLD_AIX,
 
     /// G8RC = ADDIS_TLSLD_HA %x2, Symbol - For the local-dynamic TLS
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index b51af57b796f81..2949d58ab66479 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -1602,9 +1602,9 @@ def TLSGDAIX8 :
                      "#TLSGDAIX8",
                      [(set i64:$rD,
                        (PPCTlsgdAIX i64:$offset, i64:$handle))]>;
-// This pseudo is expanded to load module-handle in X3, and the call to GETtlsMOD64AIX.
-def TLSLDAIX8 : PPCEmitTimePseudo<(outs g8rc:$rD), (ins),
-                     "#TLSLDAIX8", [(set i64:$rD, (PPCTlsldAIX))]>;
+// This pseudo is expanded to the call to GETtlsMOD64AIX.
+def TLSLDAIX8 : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc:$handle),
+                     "#TLSLDAIX8", [(set i64:$rD, (PPCTlsldAIX i64:$handle))]>;
 // Combined op for ADDItlsldL and GETtlsADDR, late expanded.  X3 and LR8
 // are true defines, while the rest of the Defs are clobbers.
 let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 9a76a727ecde6f..5fe4f9320ae561 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -220,7 +220,7 @@ def PPCaddiTlsgdLAddr : SDNode<"PPCISD::ADDI_TLSGD_L_ADDR",
                                  SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
                                  SDTCisSameAs<0, 3>, SDTCisInt<0> ]>>;
 def PPCTlsgdAIX     : SDNode<"PPCISD::TLSGD_AIX", SDTIntBinOp>;
-def PPCTlsldAIX     : SDNode<"PPCISD::TLSLD_AIX", SDTIntLeaf>;
+def PPCTlsldAIX     : SDNode<"PPCISD::TLSLD_AIX", SDTIntUnaryOp>;
 def PPCaddisTlsldHA : SDNode<"PPCISD::ADDIS_TLSLD_HA", SDTIntBinOp>;
 def PPCaddiTlsldL   : SDNode<"PPCISD::ADDI_TLSLD_L", SDTIntBinOp>;
 def PPCgetTlsldAddr : SDNode<"PPCISD::GET_TLSLD_ADDR", SDTIntBinOp>;
@@ -3296,9 +3296,9 @@ def TLSGDAIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$offset, gprc:$handl
                           "#TLSGDAIX",
                           [(set i32:$rD,
                             (PPCTlsgdAIX i32:$offset, i32:$handle))]>;
-// This pseudo is expanded to load the module-handle in R3, and the call to GETtlsMOD32AIX.
-def TLSLDAIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins),
-                          "#TLSLDAIX", [(set i32:$rD, (PPCTlsldAIX))]>;
+// This pseudo is expanded to the call to GETtlsMOD32AIX.
+def TLSLDAIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$handle),
+                          "#TLSLDAIX", [(set i32:$rD, (PPCTlsldAIX i32:$handle))]>;
 // LR is a true define, while the rest of the Defs are clobbers.  R3 is
 // explicitly defined when this op is created, so not mentioned here.
 let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 8a839af1f2eb71..a2843ca0a350d8 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -57,9 +57,6 @@ namespace {
       bool IsPCREL = false;
       MachineFunction *MF = MBB.getParent();
       MachineRegisterInfo &RegInfo = MF->getRegInfo();
-      const TargetRegisterClass *GPRNoZero =
-          Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
-                  : &PPC::GPRC_and_GPRC_NOR0RegClass;
 
       for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
            I != IE;) {
@@ -97,7 +94,7 @@ namespace {
         Register InReg = PPC::NoRegister;
         Register GPR3 = Is64Bit ? PPC::X3 : PPC::R3;
         Register GPR4 = Is64Bit ? PPC::X4 : PPC::R4;
-        if (!IsPCREL && !IsTLSTPRelMI && !IsTLSLDAIXMI)
+        if (!IsPCREL && !IsTLSTPRelMI)
           InReg = MI.getOperand(1).getReg();
         DebugLoc DL = MI.getDebugLoc();
 
@@ -164,84 +161,74 @@ namespace {
 
         if (IsAIX) {
           if (IsTLSLDAIXMI) {
-            // It is better to put TLSLDAIX node before LoadOffsetToc node,
-            // because LoadOffsetToc node can use clobbers r4/r5. Search for the
-            // first paired LoadOffsetToc node within the same BB.
+            // The relative order between the LoadOffset at toc node, and the
+            // ._tls_get_mod node is being tuned here. It is better to put the
+            // LoadOffset at toc node after the call, since the LoadOffset at toc node
+            // can use clobbers r4/r5. Search for the pattern of two Load at toc
+            // nodes, and then move the LoadOffset at toc node right before the
+            // node that uses the OutReg of the ._tls_get_mod node.
             unsigned LDTocOp =
                 Is64Bit ? (IsLargeModel ? PPC::LDtocL : PPC::LDtoc)
                         : (IsLargeModel ? PPC::LWZtocL : PPC::LWZtoc);
-            MachineBasicBlock::iterator Anchor = I;
             if (!RegInfo.use_empty(OutReg)) {
               std::set<MachineInstr *> Uses;
-              // Collect all instructions that use OutReg
-              for (MachineOperand &MO : RegInfo.use_operands(OutReg)) {
-                if (Uses.count(MO.getParent()))
-                  continue;
+              // Collect all instructions that use the OutReg.
+              for (MachineOperand &MO : RegInfo.use_operands(OutReg))
                 Uses.insert(MO.getParent());
-              }
-              // Find the first Add within current BB.
+              // Find the first user (e.g.: lwax/stfdx) within the current BB.
               MachineBasicBlock::iterator UseIter = MBB.begin();
-              for (MachineBasicBlock::iterator AE = MBB.end(); UseIter != AE;
+              for (MachineBasicBlock::iterator IE = MBB.end(); UseIter != IE;
                    ++UseIter)
                 if (Uses.count(&*UseIter))
                   break;
 
               if (UseIter != MBB.end()) {
-                // Get the instruction that defines the other used register
-                // operand of UseIter. The match pattern is that: UseIter has
-                // exactly one used-operand defined by LDTocOp
-                // (LDtocL/LDtoc/LWZtocL/LWZtoc).
-                MachineInstr *LoadOffsetToc = nullptr;
-                int MatchCount = 0;
-                for (MachineOperand &MO : UseIter->operands()) {
+                // Collect associated Load at toc nodes.
+                std::set<MachineInstr *> LoadFromTocs;
+                for (MachineOperand &MO : UseIter->operands())
                   if (MO.isReg() && MO.isUse()) {
-                    Register OffsetReg = MO.getReg();
-                    if (RegInfo.hasOneDef(OffsetReg)) {
-                      if (RegInfo.getOneDef(OffsetReg)
-                              ->getParent()
-                              ->getOpcode() == LDTocOp) {
-                        LoadOffsetToc =
-                            RegInfo.getOneDef(OffsetReg)->getParent();
-                        ++MatchCount;
-                      }
+                    if (RegInfo.hasOneDef(MO.getReg())) {
+                      MachineInstr *Temp =
+                          RegInfo.getOneDef(MO.getReg())->getParent();
+                      if (Temp == &MI && RegInfo.hasOneDef(InReg))
+                        Temp = RegInfo.getOneDef(InReg)->getParent();
+                      if (Temp->getOpcode() == LDTocOp)
+                        LoadFromTocs.insert(Temp);
                     } else {
                       // FIXME: analyze this scenario if there is one.
-                      MatchCount = 0;
+                      LoadFromTocs.clear();
                       break;
                     }
                   }
-                }
-                // Get the iterator.
-                if (MatchCount == 1 && LoadOffsetToc) {
-                  Anchor = MBB.begin();
-                  for (MachineBasicBlock::iterator AE = MBB.end(); Anchor != AE;
-                       ++Anchor)
-                    if (&*Anchor == LoadOffsetToc)
-                      break;
 
-                  if (Anchor == MBB.end())
-                    Anchor = I;
+                // Check the two Load at toc: one should be _$TLSML, and the other
+                // will be moved before the node that uses the OutReg of the
+                // ._tls_get_mod node.
+                if (LoadFromTocs.size() == 2) {
+                  MachineBasicBlock::iterator TLSMLIter = MBB.end();
+                  MachineBasicBlock::iterator OffsetIter = MBB.end();
+                  for (MachineBasicBlock::iterator I = MBB.begin(),
+                                                   IE = MBB.end();
+                       I != IE; ++I)
+                    if (LoadFromTocs.count(&*I)) {
+                      if (I->getOperand(1).isGlobal() &&
+                          I->getOperand(1).getGlobal()->getName().equals(
+                              "_$TLSML"))
+                        TLSMLIter = I;
+                      else
+                        OffsetIter = I;
+                    }
+                  if (TLSMLIter != MBB.end() && OffsetIter != MBB.end())
+                    OffsetIter->moveBefore(&*UseIter);
                 }
               }
             }
-
-            // Generate instructions to load module-handle.
-            Register ModuleHandleHReg;
-            if (IsLargeModel) {
-              ModuleHandleHReg = RegInfo.createVirtualRegister(GPRNoZero);
-              BuildMI(MBB, Anchor, DL,
-                      TII->get(Is64Bit ? PPC::ADDIStocHA8 : PPC::ADDIStocHA),
-                      ModuleHandleHReg)
-                  .addReg(Subtarget.getTOCPointerRegister())
-                  .addExternalSymbol("_$TLSML[TC]", PPCII::MO_TLSLD_FLAG);
-            }
-            BuildMI(MBB, Anchor, DL, TII->get(LDTocOp), GPR3)
-                .addExternalSymbol("_$TLSML[TC]", PPCII::MO_TLSLD_FLAG)
-                .addReg(IsLargeModel
-                            ? ModuleHandleHReg
-                            : Register(Subtarget.getTOCPointerRegister()));
+            // The module-handle is copied in r3. The copy is followed by
+            // GETtlsMOD32AIX/GETtlsMOD64AIX.
+            BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR3)
+                .addReg(InReg);
             // The call to .__tls_get_mod.
-            BuildMI(MBB, Anchor, DL, TII->get(Opc2), GPR3).addReg(GPR3);
+            BuildMI(MBB, I, DL, TII->get(Opc2), GPR3).addReg(GPR3);
           } else if (!IsTLSTPRelMI) {
             // The variable offset and region handle are copied in r4 and r3.
             // The copies are followed by GETtlsADDR32AIX/GETtlsADDR64AIX.
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll
index 84ddb83bef457e..194273738c6499 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll
@@ -156,8 +156,8 @@ define void @storesTIInit(double %Val) #0 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
+; SMALL32-NEXT:    lwz 3, L..C4(2) # target-flags(ppc-tlsld) @"_$TLSML"
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    lwz 3, L..C4(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C5(2) # target-flags(ppc-tlsld) @TIInit
 ; SMALL32-NEXT:    stfdx 1, 3, 4
@@ -186,8 +186,8 @@ define void @storesTIInit(double %Val) #0 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
+; SMALL64-NEXT:    ld 3, L..C4(2) # target-flags(ppc-tlsld) @"_$TLSML"
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    ld 3, L..C4(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C5(2) # target-flags(ppc-tlsld) @TIInit
 ; SMALL64-NEXT:    stfdx 1, 3, 4
@@ -200,8 +200,8 @@ define void @storesTIInit(double %Val) #0 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
-; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    addis 3, L..C4 at u(2)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    addis 6, L..C5 at u(2)
 ; LARGE64-NEXT:    ld 3, L..C4 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
@@ -452,13 +452,13 @@ define double @loadsTIInit() #1 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
+; SMALL32-NEXT:    lwz 3, L..C4(2) # target-flags(ppc-tlsld) @"_$TLSML"
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    lwz 3, L..C4(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C5(2) # target-flags(ppc-tlsld) @TIInit
-; SMALL32-NEXT:    lwz 5, L..C8(2) # @GInit
 ; SMALL32-NEXT:    lfdx 0, 3, 4
-; SMALL32-NEXT:    lfd 1, 0(5)
+; SMALL32-NEXT:    lwz 3, L..C8(2) # @GInit
+; SMALL32-NEXT:    lfd 1, 0(3)
 ; SMALL32-NEXT:    fadd 1, 0, 1
 ; SMALL32-NEXT:    addi 1, 1, 32
 ; SMALL32-NEXT:    lwz 0, 8(1)
@@ -489,13 +489,13 @@ define double @loadsTIInit() #1 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
+; SMALL64-NEXT:    ld 3, L..C4(2) # target-flags(ppc-tlsld) @"_$TLSML"
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    ld 3, L..C4(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C5(2) # target-flags(ppc-tlsld) @TIInit
-; SMALL64-NEXT:    ld 5, L..C8(2) # @GInit
 ; SMALL64-NEXT:    lfdx 0, 3, 4
-; SMALL64-NEXT:    lfd 1, 0(5)
+; SMALL64-NEXT:    ld 3, L..C8(2) # @GInit
+; SMALL64-NEXT:    lfd 1, 0(3)
 ; SMALL64-NEXT:    fadd 1, 0, 1
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -506,8 +506,8 @@ define double @loadsTIInit() #1 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
-; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    addis 3, L..C4 at u(2)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    addis 6, L..C5 at u(2)
 ; LARGE64-NEXT:    ld 3, L..C4 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll
index 3001a915da4851..bbb8e04b67b95e 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll
@@ -163,9 +163,9 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    mr 6, 3
 ; SMALL32-NEXT:    lwz 3, L..C4(2)
+; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C5(2)
 ; SMALL32-NEXT:    stwx 6, 3, 4
@@ -195,9 +195,9 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    mr 6, 3
 ; SMALL64-NEXT:    ld 3, L..C4(2)
+; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C5(2)
 ; SMALL64-NEXT:    stwx 6, 3, 4
@@ -210,9 +210,9 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
-; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    mr 6, 3
 ; LARGE64-NEXT:    addis 3, L..C4 at u(2)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    addis 7, L..C5 at u(2)
 ; LARGE64-NEXT:    ld 3, L..C4 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
@@ -467,13 +467,13 @@ define i32 @loadsTIUninit() #1 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    lwz 3, L..C4(2)
+; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C5(2)
-; SMALL32-NEXT:    lwz 5, L..C8(2)
 ; SMALL32-NEXT:    lwzx 3, 3, 4
-; SMALL32-NEXT:    lwz 4, 0(5)
+; SMALL32-NEXT:    lwz 4, L..C8(2)
+; SMALL32-NEXT:    lwz 4, 0(4)
 ; SMALL32-NEXT:    add 3, 4, 3
 ; SMALL32-NEXT:    addi 1, 1, 32
 ; SMALL32-NEXT:    lwz 0, 8(1)
@@ -504,13 +504,13 @@ define i32 @loadsTIUninit() #1 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    ld 3, L..C4(2)
+; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C5(2)
-; SMALL64-NEXT:    ld 5, L..C8(2)
 ; SMALL64-NEXT:    lwzx 3, 3, 4
-; SMALL64-NEXT:    lwz 4, 0(5)
+; SMALL64-NEXT:    ld 4, L..C8(2)
+; SMALL64-NEXT:    lwz 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -521,8 +521,8 @@ define i32 @loadsTIUninit() #1 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
-; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    addis 3, L..C4 at u(2)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    addis 6, L..C5 at u(2)
 ; LARGE64-NEXT:    ld 3, L..C4 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll
index 136c62ff509f9c..ff087a2144488c 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll
@@ -97,10 +97,10 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    mr 7, 3
-; SMALL32-NEXT:    mr 6, 4
 ; SMALL32-NEXT:    lwz 3, L..C2(2)
+; SMALL32-NEXT:    stw 0, 40(1)
+; SMALL32-NEXT:    mr 6, 4
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C3(2)
 ; SMALL32-NEXT:    stwux 7, 3, 4
@@ -133,9 +133,9 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    mr 6, 3
 ; SMALL64-NEXT:    ld 3, L..C2(2)
+; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C3(2)
 ; SMALL64-NEXT:    stdx 6, 3, 4
@@ -148,9 +148,9 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
-; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    mr 6, 3
 ; LARGE64-NEXT:    addis 3, L..C2 at u(2)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    addis 7, L..C3 at u(2)
 ; LARGE64-NEXT:    ld 3, L..C2 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
@@ -171,10 +171,10 @@ define void @storesTIInit(i64 %Val) #0 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    mr 7, 3
-; SMALL32-NEXT:    mr 6, 4
 ; SMALL32-NEXT:    lwz 3, L..C2(2)
+; SMALL32-NEXT:    stw 0, 40(1)
+; SMALL32-NEXT:    mr 6, 4
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C4(2)
 ; SMALL32-NEXT:    stwux 7, 3, 4
@@ -207,9 +207,9 @@ define void @storesTIInit(i64 %Val) #0 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    mr 6, 3
 ; SMALL64-NEXT:    ld 3, L..C2(2)
+; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C4(2)
 ; SMALL64-NEXT:    stdx 6, 3, 4
@@ -222,9 +222,9 @@ define void @storesTIInit(i64 %Val) #0 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
-; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    mr 6, 3
 ; LARGE64-NEXT:    addis 3, L..C2 at u(2)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    addis 7, L..C4 at u(2)
 ; LARGE64-NEXT:    ld 3, L..C2 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
@@ -407,8 +407,8 @@ define i64 @loadsTIUninit() #1 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    lwz 3, L..C2(2)
+; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C3(2)
 ; SMALL32-NEXT:    lwz 5, L..C7(2)
@@ -450,13 +450,13 @@ define i64 @loadsTIUninit() #1 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    ld 3, L..C2(2)
+; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C3(2)
-; SMALL64-NEXT:    ld 5, L..C7(2)
 ; SMALL64-NEXT:    ldx 3, 3, 4
-; SMALL64-NEXT:    ld 4, 0(5)
+; SMALL64-NEXT:    ld 4, L..C7(2)
+; SMALL64-NEXT:    ld 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -467,8 +467,8 @@ define i64 @loadsTIUninit() #1 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
-; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    addis 3, L..C2 at u(2)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    addis 6, L..C3 at u(2)
 ; LARGE64-NEXT:    ld 3, L..C2 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
@@ -495,8 +495,8 @@ define i64 @loadsTIInit() #1 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    lwz 3, L..C2(2)
+; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C4(2)
 ; SMALL32-NEXT:    lwz 5, L..C7(2)
@@ -538,13 +538,13 @@ define i64 @loadsTIInit() #1 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    ld 3, L..C2(2)
+; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C4(2)
-; SMALL64-NEXT:    ld 5, L..C7(2)
 ; SMALL64-NEXT:    ldx 3, 3, 4
-; SMALL64-NEXT:    ld 4, 0(5)
+; SMALL64-NEXT:    ld 4, L..C7(2)
+; SMALL64-NEXT:    ld 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -555,8 +555,8 @@ define i64 @loadsTIInit() #1 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
-; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    addis 3, L..C2 at u(2)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    addis 6, L..C4 at u(2)
 ; LARGE64-NEXT:    ld 3, L..C2 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-ld-xcoff-reloc-large.ll b/llvm/test/CodeGen/PowerPC/aix-tls-ld-xcoff-reloc-large.ll
index 657f6187bcd7f6..d1ca04a7ace838 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-ld-xcoff-reloc-large.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-ld-xcoff-reloc-large.ll
@@ -38,14 +38,14 @@ entry:
 ; RELOC-NEXT: Arch: powerpc64
 ; RELOC-NEXT: AddressSize: 64bit
 ; RELOC-NEXT: Relocations [
-; RELOC:      Virtual Address: 0x12
+; RELOC:      Virtual Address: 0xE
 ; RELOC-NEXT:      Symbol: _$TLSML (19)
 ; RELOC-NEXT:      IsSigned: No
 ; RELOC-NEXT:      FixupBitValue: 0
 ; RELOC-NEXT:      Length: 16
 ; RELOC-NEXT:      Type: R_TOCU (0x30)
 ; RELOC-NEXT:    }
-; RELOC:      Virtual Address: 0x16
+; RELOC:      Virtual Address: 0x12
 ; RELOC-NEXT:      Symbol: IThreadLocalVarUninit (21)
 ; RELOC-NEXT:      IsSigned: No
 ; RELOC-NEXT:      FixupBitValue: 0
@@ -66,14 +66,14 @@ entry:
 ; RELOC-NEXT:      Length: 16
 ; RELOC-NEXT:      Type: R_TOCL (0x31)
 ; RELOC-NEXT:    }
-; RELOC:      Virtual Address: 0x4E
+; RELOC:      Virtual Address: 0x4A
 ; RELOC-NEXT:      Symbol: _$TLSML (19)
 ; RELOC-NEXT:      IsSigned: No
 ; RELOC-NEXT:      FixupBitValue: 0
 ; RELOC-NEXT:      Length: 16
 ; RELOC-NEXT:      Type: R_TOCU (0x30)
 ; RELOC-NEXT:    }
-; RELOC:      Virtual Address: 0x52
+; RELOC:      Virtual Address: 0x4E
 ; RELOC-NEXT:      Symbol: ThreadLocalVarInit (23)
 ; RELOC-NEXT:      IsSigned: No
 ; RELOC-NEXT:      FixupBitValue: 0
@@ -94,14 +94,14 @@ entry:
 ; RELOC-NEXT:      Length: 16
 ; RELOC-NEXT:      Type: R_TOCL (0x31)
 ; RELOC-NEXT:    }
-; RELOC:      Virtual Address: 0x8E
+; RELOC:      Virtual Address: 0x8A
 ; RELOC-NEXT:      Symbol: _$TLSML (19)
 ; RELOC-NEXT:      IsSigned: No
 ; RELOC-NEXT:      FixupBitValue: 0
 ; RELOC-NEXT:      Length: 16
 ; RELOC-NEXT:      Type: R_TOCU (0x30)
 ; RELOC-NEXT:    }
-; RELOC:      Virtual Address: 0x92
+; RELOC:      Virtual Address: 0x8E
 ; RELOC-NEXT:      Symbol: IThreadLocalVarUninit (21)
 ; RELOC-NEXT:      IsSigned: No
 ; RELOC-NEXT:      FixupBitValue: 0
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll b/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll
index f7cd73ae609b35..c745e17636da11 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll
@@ -327,7 +327,7 @@ define i32 @DedupTlsGetMod() #0 {
 ; WITHDUP-NEXT:   ld [[OffsetXR:[0-9]+]], [[X:L..C[0-9]+]](2)
 ; WITHDUP:        ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; WITHDUP-NEXT:   bla .__tls_get_mod[PR]
-; WITHDUP-NEXT:   ld [[OffsetYR:[0-9]+]], [[Y:L..C[0-9]+]](2)
+; WITHDUP:        ld [[OffsetYR:[0-9]+]], [[Y:L..C[0-9]+]](2)
 ; WITHDUP-LABEL:  L..DedupTlsGetMod0:
 ;
 ; NODUP-LABEL:  DedupTlsGetMod:
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
index 20110aff8ce654..d578920aa858cd 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
@@ -33,7 +33,7 @@ entry:
 ; RELOC-NEXT: Relocations [
 ; RELOC-NEXT:   Section (index: 1) .text {
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x12
+; RELOC-NEXT:     Virtual Address: 0xE
 ; RELOC-NEXT:     Symbol: _$TLSML (25)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
@@ -587,10 +587,10 @@ entry:
 ; DIS:      00000000 (idx: 7) .storesTIUninit:
 ; DIS-NEXT:                                      mflr 0
 ; DIS-NEXT:                                      stwu 1, -32(1)
-; DIS-NEXT:                                      stw 0, 40(1)
 ; DIS-NEXT:                                      mr 6, 3
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 3, 0(2)
 ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 25) _$TLSML[TC]
+; DIS-NEXT:                                      stw 0, 40(1)
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               bla 0
 ; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA (idx: 1)      .__tls_get_mod[PR]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 4, 4(2)

>From fda16ebd1c7649a18087cdda4162c6f25ae444b2 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Thu, 26 Oct 2023 03:42:38 -0400
Subject: [PATCH 15/27] Address comment: use == to compare StringRef names

---
 llvm/lib/MC/XCOFFObjectWriter.cpp             | 8 ++++----
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp     | 5 ++---
 llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp | 3 +--
 3 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/MC/XCOFFObjectWriter.cpp b/llvm/lib/MC/XCOFFObjectWriter.cpp
index cfb04b8e6b7af6..5efc8ac841ef4a 100644
--- a/llvm/lib/MC/XCOFFObjectWriter.cpp
+++ b/llvm/lib/MC/XCOFFObjectWriter.cpp
@@ -650,9 +650,9 @@ void XCOFFObjectWriter::recordRelocation(MCAssembler &Asm,
   auto getIndex = [this](const MCSymbol *Sym,
                          const MCSectionXCOFF *ContainingCsect) {
     // Fixup relocation flag for AIX TLS local-dynamic mode.
-    if (Sym->getName().equals("_Renamed..5f24__TLSML[UA]")) {
+    if (Sym->getName() == "_Renamed..5f24__TLSML[UA]") {
       for (auto Iter : SymbolIndexMap)
-        if (Iter.first->getName().equals("_Renamed..5f24__TLSML[TC]"))
+        if (Iter.first->getName() == "_Renamed..5f24__TLSML[TC]")
           return Iter.second;
       llvm_unreachable("For AIX TLS local-dynamic mode: "
                        "_Renamed..5f24__TLSML[TC] not found.");
@@ -1155,7 +1155,7 @@ void XCOFFObjectWriter::writeSymbolTable(const MCAsmLayout &Layout) {
 
   for (const auto &Csect : UndefinedCsects) {
     // AIX does not need to emit for the _$TLSML symbol.
-    if (Csect.getSymbolTableName().equals(StringRef("_$TLSML")))
+    if (Csect.getSymbolTableName() == "_$TLSML")
       continue;
     writeSymbolEntryForControlSection(Csect, XCOFF::ReservedSectionNum::N_UNDEF,
                                       Csect.MCSec->getStorageClass());
@@ -1373,7 +1373,7 @@ void XCOFFObjectWriter::assignAddressesAndIndices(const MCAsmLayout &Layout) {
   // Calculate indices for undefined symbols.
   for (auto &Csect : UndefinedCsects) {
     // AIX does not need to emit for the _$TLSML symbol.
-    if (Csect.getSymbolTableName().equals(StringRef("_$TLSML")))
+    if (Csect.getSymbolTableName() == "_$TLSML")
       continue;
     Csect.Size = 0;
     Csect.Address = 0;
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 032bab255665b4..8e46af5d68c884 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -849,8 +849,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     if (Flag == PPCII::MO_TLSGD_FLAG || Flag == PPCII::MO_GOT_TLSGD_PCREL_FLAG)
       return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGD;
     if (MO.getTargetFlags() & PPCII::MO_TLSLD_FLAG) {
-      if (IsAIX && MO.isGlobal() &&
-          (MO.getGlobal()->getName().equals("_$TLSML")))
+      if (IsAIX && MO.isGlobal() && MO.getGlobal()->getName() == "_$TLSML")
         // FIXME: Due to the size limit of MachineOperand::SubReg_TargetFlags,
         // hacked this flag which should have been named MO_TLSLDM_FLAG: on AIX
         // the ML relocation type is only valid for a reference to a TOC symbol
@@ -2082,7 +2081,7 @@ void PPCAIXAsmPrinter::emitLinkage(const GlobalValue *GV,
   }
 
   // Do not emit _$TLSML symbol.
-  if (GVSym->getName().equals(StringRef("_Renamed..5f24__TLSML[UA]")))
+  if (GVSym->getName() == "_Renamed..5f24__TLSML[UA]")
     return;
 
   OutStreamer->emitXCOFFSymbolLinkageWithVisibility(GVSym, LinkageAttr,
diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index a2843ca0a350d8..452a7a2dcf4168 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -212,8 +212,7 @@ namespace {
                        I != IE; ++I)
                     if (LoadFromTocs.count(&*I)) {
                       if (I->getOperand(1).isGlobal() &&
-                          I->getOperand(1).getGlobal()->getName().equals(
-                              "_$TLSML"))
+                          I->getOperand(1).getGlobal()->getName() == "_$TLSML")
                         TLSMLIter = I;
                       else
                         OffsetIter = I;

>From f0e352e9ecabd3ea252024181d8f79e206d17d2c Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Fri, 27 Oct 2023 10:34:55 -0400
Subject: [PATCH 16/27] Correct SMC setting for the _$TLSML symbol

---
 llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp | 7 +++++++
 llvm/lib/MC/XCOFFObjectWriter.cpp                 | 8 --------
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp         | 2 +-
 3 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 9827bd3ff4f1ba..d71f9b9e0bcba8 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -2394,6 +2394,13 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForExternalReference(
   SmallString<128> Name;
   getNameWithPrefix(Name, GO, TM);
 
+  // AIX TLS local-dynamic requires the setting for the specific symbol name.
+  if (GO->hasName() && GO->getName() == "_$TLSML") {
+    return getContext().getXCOFFSection(
+        Name, SectionKind::getData(),
+        XCOFF::CsectProperties(XCOFF::XMC_TC, XCOFF::XTY_SD));
+  }
+
   XCOFF::StorageMappingClass SMC =
       isa<Function>(GO) ? XCOFF::XMC_DS : XCOFF::XMC_UA;
   if (GO->isThreadLocal())
diff --git a/llvm/lib/MC/XCOFFObjectWriter.cpp b/llvm/lib/MC/XCOFFObjectWriter.cpp
index 5efc8ac841ef4a..98a4d8481dc9d6 100644
--- a/llvm/lib/MC/XCOFFObjectWriter.cpp
+++ b/llvm/lib/MC/XCOFFObjectWriter.cpp
@@ -649,14 +649,6 @@ void XCOFFObjectWriter::recordRelocation(MCAssembler &Asm,
                                          uint64_t &FixedValue) {
   auto getIndex = [this](const MCSymbol *Sym,
                          const MCSectionXCOFF *ContainingCsect) {
-    // Fixup relocation flag for AIX TLS local-dynamic mode.
-    if (Sym->getName() == "_Renamed..5f24__TLSML[UA]") {
-      for (auto Iter : SymbolIndexMap)
-        if (Iter.first->getName() == "_Renamed..5f24__TLSML[TC]")
-          return Iter.second;
-      llvm_unreachable("For AIX TLS local-dynamic mode: "
-                       "_Renamed..5f24__TLSML[TC] not found.");
-    }
     // If we could not find the symbol directly in SymbolIndexMap, this symbol
     // could either be a temporary symbol or an undefined symbol. In this case,
     // we would need to have the relocation reference its csect instead.
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 8e46af5d68c884..0fbe20c553c3f2 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -2081,7 +2081,7 @@ void PPCAIXAsmPrinter::emitLinkage(const GlobalValue *GV,
   }
 
   // Do not emit _$TLSML symbol.
-  if (GVSym->getName() == "_Renamed..5f24__TLSML[UA]")
+  if (GVSym->getName() == "_Renamed..5f24__TLSML[TC]")
     return;
 
   OutStreamer->emitXCOFFSymbolLinkageWithVisibility(GVSym, LinkageAttr,

>From 0f0d2c8a7a759ef810021220fd7f176d398c64c3 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Fri, 27 Oct 2023 10:52:51 -0400
Subject: [PATCH 17/27] Simplify and add check

---
 llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp | 6 ++----
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp                | 3 ++-
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index cc50a29d67a723..5b8f7f06619047 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -242,12 +242,10 @@ class PPCTargetAsmStreamer : public PPCTargetStreamer {
           Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGDM ||
           Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSIE ||
           Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLE ||
-          Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLD)
+          Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLD ||
+          Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSML)
         OS << "\t.tc " << TCSym->getName() << "," << XSym->getName() << "@"
            << MCSymbolRefExpr::getVariantKindName(Kind) << '\n';
-      else if (Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSML)
-        OS << "\t.tc " << TCSym->getName() << "," << TCSym->getName() << "@"
-           << MCSymbolRefExpr::getVariantKindName(Kind) << '\n';
       else
         OS << "\t.tc " << TCSym->getName() << "," << XSym->getName() << '\n';
 
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 0fbe20c553c3f2..51f9f5d3b14ea9 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -849,7 +849,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     if (Flag == PPCII::MO_TLSGD_FLAG || Flag == PPCII::MO_GOT_TLSGD_PCREL_FLAG)
       return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGD;
     if (MO.getTargetFlags() & PPCII::MO_TLSLD_FLAG) {
-      if (IsAIX && MO.isGlobal() && MO.getGlobal()->getName() == "_$TLSML")
+      if (IsAIX && MO.isGlobal() && MO.getGlobal()->hasName() &&
+          MO.getGlobal()->getName() == "_$TLSML")
         // FIXME: Due to the size limit of MachineOperand::SubReg_TargetFlags,
         // hacked this flag which should have been named MO_TLSLDM_FLAG: on AIX
         // the ML relocation type is only valid for a reference to a TOC symbol

>From e9479d0c99dce013821466670019f52537f5de7a Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Fri, 27 Oct 2023 11:01:32 -0400
Subject: [PATCH 18/27] misc

---
 llvm/lib/MC/XCOFFObjectWriter.cpp         | 4 ++--
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/MC/XCOFFObjectWriter.cpp b/llvm/lib/MC/XCOFFObjectWriter.cpp
index 98a4d8481dc9d6..4c89ddefe74f7d 100644
--- a/llvm/lib/MC/XCOFFObjectWriter.cpp
+++ b/llvm/lib/MC/XCOFFObjectWriter.cpp
@@ -1146,7 +1146,7 @@ void XCOFFObjectWriter::writeSymbolTable(const MCAsmLayout &Layout) {
                      /*NumberOfAuxEntries=*/0);
 
   for (const auto &Csect : UndefinedCsects) {
-    // AIX does not need to emit for the _$TLSML symbol.
+    // AIX does not need to emit the _$TLSML symbol.
     if (Csect.getSymbolTableName() == "_$TLSML")
       continue;
     writeSymbolEntryForControlSection(Csect, XCOFF::ReservedSectionNum::N_UNDEF,
@@ -1364,7 +1364,7 @@ void XCOFFObjectWriter::assignAddressesAndIndices(const MCAsmLayout &Layout) {
 
   // Calculate indices for undefined symbols.
   for (auto &Csect : UndefinedCsects) {
-    // AIX does not need to emit for the _$TLSML symbol.
+    // AIX does not need to emit the _$TLSML symbol.
     if (Csect.getSymbolTableName() == "_$TLSML")
       continue;
     Csect.Size = 0;
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 51f9f5d3b14ea9..ea7ff78434d71d 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -2081,7 +2081,7 @@ void PPCAIXAsmPrinter::emitLinkage(const GlobalValue *GV,
     }
   }
 
-  // Do not emit _$TLSML symbol.
+  // Do not emit the _$TLSML symbol.
   if (GVSym->getName() == "_Renamed..5f24__TLSML[TC]")
     return;
 

>From 452f0b82af41c3c770ddd09a39f8b0443da62cd1 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Mon, 30 Oct 2023 04:44:54 -0400
Subject: [PATCH 19/27] Simplify logic by move the XMC_TC setting for the
 _$TLSML symbol into getSectionForTOCEntry()

---
 llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp | 13 ++++++++-----
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp         |  9 ---------
 2 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index d71f9b9e0bcba8..df41406cf5d625 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -2656,13 +2656,16 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForTOCEntry(
   // the chance of needing -bbigtoc is decreased. Also, the toc-entry for
   // EH info is never referenced directly using instructions so it can be
   // allocated with TE storage-mapping class.
+  // The "_$TLSML" symbol for TLS local-dynamic mode requires XMC_TC.
   return getContext().getXCOFFSection(
       cast<MCSymbolXCOFF>(Sym)->getSymbolTableName(), SectionKind::getData(),
-      XCOFF::CsectProperties((TM.getCodeModel() == CodeModel::Large ||
-                              cast<MCSymbolXCOFF>(Sym)->isEHInfo())
-                                 ? XCOFF::XMC_TE
-                                 : XCOFF::XMC_TC,
-                             XCOFF::XTY_SD));
+      XCOFF::CsectProperties(
+          ((TM.getCodeModel() == CodeModel::Large &&
+            cast<MCSymbolXCOFF>(Sym)->getSymbolTableName() != "_$TLSML") ||
+           cast<MCSymbolXCOFF>(Sym)->isEHInfo())
+              ? XCOFF::XMC_TE
+              : XCOFF::XMC_TC,
+          XCOFF::XTY_SD));
 }
 
 MCSection *TargetLoweringObjectFileXCOFF::getSectionForLSDA(
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index ea7ff78434d71d..b792a91fa8b2e6 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -2755,15 +2755,6 @@ void PPCAIXAsmPrinter::emitEndOfAsmFile(Module &M) {
       MCSymbol *S = OutContext.getOrCreateSymbol(Name);
       TCEntry = cast<MCSectionXCOFF>(
           getObjFileLowering().getSectionForTOCEntry(S, TM));
-    } else if (I.first.second ==
-               MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSML) {
-      // AIX assembler expects the TC storage-mapping class for the "_$TLSML"
-      // symbol.
-      MCSection *MCSect = getObjFileLowering().getContext().getXCOFFSection(
-          cast<MCSymbolXCOFF>(I.first.first)->getSymbolTableName(),
-          SectionKind::getData(),
-          XCOFF::CsectProperties(XCOFF::XMC_TC, XCOFF::XTY_SD));
-      TCEntry = cast<MCSectionXCOFF>(MCSect);
     } else {
       TCEntry = cast<MCSectionXCOFF>(
           getObjFileLowering().getSectionForTOCEntry(I.first.first, TM));

>From 94af3fd4596d078bd9e9bdff45389478be278279 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Mon, 30 Oct 2023 04:50:47 -0400
Subject: [PATCH 20/27] Add check to make sure GV's name is defined before
 access

---
 llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 452a7a2dcf4168..93701d340f8001 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -212,6 +212,7 @@ namespace {
                        I != IE; ++I)
                     if (LoadFromTocs.count(&*I)) {
                       if (I->getOperand(1).isGlobal() &&
+                          I->getOperand(1).getGlobal()->hasName() &&
                           I->getOperand(1).getGlobal()->getName() == "_$TLSML")
                         TLSMLIter = I;
                       else

>From 7aa2bb624d773e6b1bc6924c2febfc84c77796ea Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Mon, 30 Oct 2023 20:40:27 -0400
Subject: [PATCH 21/27] update

---
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp   | 1 -
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 1 -
 2 files changed, 2 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index b792a91fa8b2e6..c56b8ae8f4dbae 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -857,7 +857,6 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
         // from the symbol itself, and right now its only user is the symbol
         // "_$TLSML". The symbol name is used to decide that R_TLSML is
         // expected.
-        // FIX this once #69695 committed.
         return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSML;
       if (IsAIX)
         return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLD;
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 9209b95dde08d5..571b6312a89b94 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -3427,7 +3427,6 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
         dyn_cast_or_null<GlobalVariable>(M->getOrInsertGlobal(
             StringRef("_$TLSML"), PointerType::getUnqual(*DAG.getContext())));
     assert(TLSGV && "Not able to create GV for _$TLSML.");
-    // FIXME: create MO_TLSML_FLAG once #69695 committed.
     SDValue ModuleHandleTGA =
         DAG.getTargetGlobalAddress(TLSGV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
     SDValue ModuleHandleTOC = getTOCEntry(DAG, dl, ModuleHandleTGA);

>From 07e900d6e83d1204336817b12ce84f4ba6fa9651 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Thu, 7 Dec 2023 08:59:06 -0500
Subject: [PATCH 22/27] Apply target flag change && update according to
 comments

---
 .../CodeGen/TargetLoweringObjectFileImpl.cpp  |  3 ++-
 .../PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp  |  2 +-
 llvm/lib/Target/PowerPC/PPC.h                 |  6 ++++++
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp     | 19 +++++--------------
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   |  2 +-
 llvm/lib/Target/PowerPC/PPCInstrInfo.cpp      |  1 +
 llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp | 12 ++++++------
 .../test/CodeGen/PowerPC/aix-tls-gd-double.ll |  8 ++++----
 8 files changed, 26 insertions(+), 27 deletions(-)

diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index df41406cf5d625..ed8bac396dd12a 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -2394,7 +2394,8 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForExternalReference(
   SmallString<128> Name;
   getNameWithPrefix(Name, GO, TM);
 
-  // AIX TLS local-dynamic requires the setting for the specific symbol name.
+  // AIX TLS local-dynamic does not need the external reference for the
+  // "_$TLSML" symbol.
   if (GO->hasName() && GO->getName() == "_$TLSML") {
     return getContext().getXCOFFSection(
         Name, SectionKind::getData(),
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 5b8f7f06619047..b849b7be7b7be8 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -234,7 +234,7 @@ class PPCTargetAsmStreamer : public PPCTargetStreamer {
       // On AIX, we have TLS variable offsets (symbol@({gd|ie|le|ld}) depending
       // on the TLS access method (or model). For the general-dynamic access
       // method, we also have region handle (symbol at m) for each variable. For
-      // local-dynamic, there is a module handle (__TLSML[TC]@ml) for all
+      // local-dynamic, there is a module handle (_$TLSML[TC]@ml) for all
       // variables. Finally for local-exec and initial-exec, we have a thread
       // pointer, in r13 for 64-bit mode and returned by .__get_tpointer for
       // 32-bit mode.
diff --git a/llvm/lib/Target/PowerPC/PPC.h b/llvm/lib/Target/PowerPC/PPC.h
index 3d9ea560819385..b625af775ec0a1 100644
--- a/llvm/lib/Target/PowerPC/PPC.h
+++ b/llvm/lib/Target/PowerPC/PPC.h
@@ -139,6 +139,12 @@ class ModulePass;
     /// and Local Exec models.
     MO_TPREL_FLAG,
 
+    /// MO_TLSLDM_FLAG - on AIX the ML relocation type is only valid for a
+    /// reference to a TOC symbol from the symbol itself, and right now its only
+    /// user is the symbol "_$TLSML". The symbol name is used to decide that
+    /// R_TLSML is expected.
+    MO_TLSLDM_FLAG,
+
     /// MO_TLSLD_FLAG - If this bit is set the symbol reference is relative to
     /// TLS Local Dynamic model.
     MO_TLSLD_FLAG,
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index c56b8ae8f4dbae..a8e2719b5892f5 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -848,19 +848,10 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
       return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGDM;
     if (Flag == PPCII::MO_TLSGD_FLAG || Flag == PPCII::MO_GOT_TLSGD_PCREL_FLAG)
       return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGD;
-    if (MO.getTargetFlags() & PPCII::MO_TLSLD_FLAG) {
-      if (IsAIX && MO.isGlobal() && MO.getGlobal()->hasName() &&
-          MO.getGlobal()->getName() == "_$TLSML")
-        // FIXME: Due to the size limit of MachineOperand::SubReg_TargetFlags,
-        // hacked this flag which should have been named MO_TLSLDM_FLAG: on AIX
-        // the ML relocation type is only valid for a reference to a TOC symbol
-        // from the symbol itself, and right now its only user is the symbol
-        // "_$TLSML". The symbol name is used to decide that R_TLSML is
-        // expected.
-        return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSML;
-      if (IsAIX)
-        return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLD;
-    }
+    if (Flag == PPCII::MO_TLSLD_FLAG && IsAIX)
+      return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLD;
+    if (Flag == PPCII::MO_TLSLDM_FLAG && IsAIX)
+      return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSML;
     return MCSymbolRefExpr::VariantKind::VK_None;
   };
 
@@ -1375,7 +1366,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
   case PPC::GETtlsMOD64AIX:
     // Transform: %r3 = GETtlsMODNNAIX %r3 (for NN == 32/64).
     // Into: BLA .__tls_get_mod()
-    // Input parameter is a module handle (__TLSML[TC]@ml) for all variables.
+    // Input parameter is a module handle (_$TLSML[TC]@ml) for all variables.
   case PPC::GETtlsADDR:
     // Transform: %x3 = GETtlsADDR %x3, @sym
     // Into: BL8_NOP_TLS __tls_get_addr(sym at tlsgd)
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 571b6312a89b94..d1079e0723cc62 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -3428,7 +3428,7 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
             StringRef("_$TLSML"), PointerType::getUnqual(*DAG.getContext())));
     assert(TLSGV && "Not able to create GV for _$TLSML.");
     SDValue ModuleHandleTGA =
-        DAG.getTargetGlobalAddress(TLSGV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
+        DAG.getTargetGlobalAddress(TLSGV, dl, PtrVT, 0, PPCII::MO_TLSLDM_FLAG);
     SDValue ModuleHandleTOC = getTOCEntry(DAG, dl, ModuleHandleTGA);
     SDValue ModuleHandle =
         DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, ModuleHandleTOC);
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index d0a6cced1b1974..38128abb9e140a 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -2971,6 +2971,7 @@ PPCInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
       {MO_PCREL_OPT_FLAG, "ppc-opt-pcrel"},
       {MO_TLSGD_FLAG, "ppc-tlsgd"},
       {MO_TPREL_FLAG, "ppc-tprel"},
+      {MO_TLSLDM_FLAG, "ppc-tlsldm"},
       {MO_TLSLD_FLAG, "ppc-tlsld"},
       {MO_TLSGDM_FLAG, "ppc-tlsgdm"},
       {MO_GOT_TLSGD_PCREL_FLAG, "ppc-got-tlsgd-pcrel"},
diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 93701d340f8001..dbe8676e62a1ac 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -162,11 +162,11 @@ namespace {
         if (IsAIX) {
           if (IsTLSLDAIXMI) {
             // The relative order between the LoadOffset at toc node, and the
-            // ._tls_get_mod node is being tuned here. It is better to put the
+            // .__tls_get_mod node is being tuned here. It is better to put the
             // LoadOffset at toc node after the call, since the LoadOffset at toc node
             // can use clobbers r4/r5. Search for the pattern of two Load at toc
             // nodes, and then move the LoadOffset at toc node right before the
-            // node that uses the OutReg of the ._tls_get_mod node.
+            // node that uses the OutReg of the .__tls_get_mod node.
             unsigned LDTocOp =
                 Is64Bit ? (IsLargeModel ? PPC::LDtocL : PPC::LDtoc)
                         : (IsLargeModel ? PPC::LWZtocL : PPC::LWZtoc);
@@ -203,7 +203,7 @@ namespace {
 
                 // Check the two Load at toc: one should be _$TLSML, and the other
                 // will be moved before the node that uses the OutReg of the
-                // ._tls_get_mod node.
+                // .__tls_get_mod node.
                 if (LoadFromTocs.size() == 2) {
                   MachineBasicBlock::iterator TLSMLIter = MBB.end();
                   MachineBasicBlock::iterator OffsetIter = MBB.end();
@@ -211,9 +211,9 @@ namespace {
                                                    IE = MBB.end();
                        I != IE; ++I)
                     if (LoadFromTocs.count(&*I)) {
-                      if (I->getOperand(1).isGlobal() &&
-                          I->getOperand(1).getGlobal()->hasName() &&
-                          I->getOperand(1).getGlobal()->getName() == "_$TLSML")
+                      MachineOperand MO = I->getOperand(1);
+                      if (MO.isGlobal() && MO.getGlobal()->hasName() &&
+                          MO.getGlobal()->getName() == "_$TLSML")
                         TLSMLIter = I;
                       else
                         OffsetIter = I;
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll
index 194273738c6499..ae41b6b1301064 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll
@@ -156,7 +156,7 @@ define void @storesTIInit(double %Val) #0 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    lwz 3, L..C4(2) # target-flags(ppc-tlsld) @"_$TLSML"
+; SMALL32-NEXT:    lwz 3, L..C4(2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C5(2) # target-flags(ppc-tlsld) @TIInit
@@ -186,7 +186,7 @@ define void @storesTIInit(double %Val) #0 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    ld 3, L..C4(2) # target-flags(ppc-tlsld) @"_$TLSML"
+; SMALL64-NEXT:    ld 3, L..C4(2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C5(2) # target-flags(ppc-tlsld) @TIInit
@@ -452,7 +452,7 @@ define double @loadsTIInit() #1 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    lwz 3, L..C4(2) # target-flags(ppc-tlsld) @"_$TLSML"
+; SMALL32-NEXT:    lwz 3, L..C4(2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C5(2) # target-flags(ppc-tlsld) @TIInit
@@ -489,7 +489,7 @@ define double @loadsTIInit() #1 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    ld 3, L..C4(2) # target-flags(ppc-tlsld) @"_$TLSML"
+; SMALL64-NEXT:    ld 3, L..C4(2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C5(2) # target-flags(ppc-tlsld) @TIInit

>From edc4e8875651bfd438dd3bd15ca78c3f413d649a Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Mon, 11 Dec 2023 00:59:54 -0500
Subject: [PATCH 23/27] Address comment: Add flag check before string compare.

---
 llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp | 3 ++-
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp         | 3 ++-
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp       | 1 +
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index ed8bac396dd12a..87efdec0eb10cb 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -2396,7 +2396,8 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForExternalReference(
 
   // AIX TLS local-dynamic does not need the external reference for the
   // "_$TLSML" symbol.
-  if (GO->hasName() && GO->getName() == "_$TLSML") {
+  if (GO->getThreadLocalMode() == GlobalVariable::LocalDynamicTLSModel &&
+      GO->hasName() && GO->getName() == "_$TLSML") {
     return getContext().getXCOFFSection(
         Name, SectionKind::getData(),
         XCOFF::CsectProperties(XCOFF::XMC_TC, XCOFF::XTY_SD));
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index a8e2719b5892f5..67f94c21403706 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -2072,7 +2072,8 @@ void PPCAIXAsmPrinter::emitLinkage(const GlobalValue *GV,
   }
 
   // Do not emit the _$TLSML symbol.
-  if (GVSym->getName() == "_Renamed..5f24__TLSML[TC]")
+  if (GV->getThreadLocalMode() == GlobalVariable::LocalDynamicTLSModel &&
+      GV->hasName() && GV->getName() == "_$TLSML")
     return;
 
   OutStreamer->emitXCOFFSymbolLinkageWithVisibility(GVSym, LinkageAttr,
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index d1079e0723cc62..b9d15ea3ec0597 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -3426,6 +3426,7 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
     GlobalVariable *TLSGV =
         dyn_cast_or_null<GlobalVariable>(M->getOrInsertGlobal(
             StringRef("_$TLSML"), PointerType::getUnqual(*DAG.getContext())));
+    TLSGV->setThreadLocalMode(GlobalVariable::LocalDynamicTLSModel);
     assert(TLSGV && "Not able to create GV for _$TLSML.");
     SDValue ModuleHandleTGA =
         DAG.getTargetGlobalAddress(TLSGV, dl, PtrVT, 0, PPCII::MO_TLSLDM_FLAG);

>From d6bd1dc55a18c4122590e096e00efa6f827348cd Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Tue, 12 Dec 2023 01:54:55 -0500
Subject: [PATCH 24/27] Update comment.

---
 llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 87efdec0eb10cb..9e5378b4e656a3 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -2658,7 +2658,8 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForTOCEntry(
   // the chance of needing -bbigtoc is decreased. Also, the toc-entry for
   // EH info is never referenced directly using instructions so it can be
   // allocated with TE storage-mapping class.
-  // The "_$TLSML" symbol for TLS local-dynamic mode requires XMC_TC.
+  // The "_$TLSML" symbol for TLS local-dynamic mode requires XMC_TC, otherwise
+  // the AIX assembler will complain.
   return getContext().getXCOFFSection(
       cast<MCSymbolXCOFF>(Sym)->getSymbolTableName(), SectionKind::getData(),
       XCOFF::CsectProperties(

>From ad6ef7bd97cfdf708dbeb7cd49aaa9f0475f1e2b Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Mon, 1 Jan 2024 21:55:30 -0500
Subject: [PATCH 25/27] [NFC] Add comments

---
 llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index dbe8676e62a1ac..7fc5ea2cacbfab 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -119,9 +119,13 @@ namespace {
           Opc2 = PPC::GETtlsldADDR32;
           break;
         case PPC::TLSLDAIX:
+          // TLSLDAIX is expanded to one copy and GET_TLS_MOD, so we only set
+          // Opc2 here.
           Opc2 = PPC::GETtlsMOD32AIX;
           break;
         case PPC::TLSLDAIX8:
+          // TLSLDAIX8 is expanded to one copy and GET_TLS_MOD, so we only set
+          // Opc2 here.
           Opc2 = PPC::GETtlsMOD64AIX;
           break;
         case PPC::TLSGDAIX8:
@@ -182,14 +186,20 @@ namespace {
                 if (Uses.count(&*UseIter))
                   break;
 
+              // Got some work to do when UseIter pointing to valid node. Check
+              // the pattern and do the movement if match.
               if (UseIter != MBB.end()) {
-                // Collect associated Load at toc nodes.
+                // Collect associated Load at toc nodes. Use hasOneDef to guard
+                // against unexpected scenarios.
                 std::set<MachineInstr *> LoadFromTocs;
                 for (MachineOperand &MO : UseIter->operands())
                   if (MO.isReg() && MO.isUse()) {
                     if (RegInfo.hasOneDef(MO.getReg())) {
                       MachineInstr *Temp =
                           RegInfo.getOneDef(MO.getReg())->getParent();
+                      // For current TLSLDAIX node, get the Load at toc node for
+                      // the InReg. Otherwise, Temp probably pointed to the
+                      // LoadOffset at toc node that we would like to move.
                       if (Temp == &MI && RegInfo.hasOneDef(InReg))
                         Temp = RegInfo.getOneDef(InReg)->getParent();
                       if (Temp->getOpcode() == LDTocOp)
@@ -207,6 +217,9 @@ namespace {
                 if (LoadFromTocs.size() == 2) {
                   MachineBasicBlock::iterator TLSMLIter = MBB.end();
                   MachineBasicBlock::iterator OffsetIter = MBB.end();
+                  // Make sure the two LoadFromTocs are within current BB, and
+                  // one of them from the "_$TLSML" pseudo symbol, while the
+                  // other from the variable.
                   for (MachineBasicBlock::iterator I = MBB.begin(),
                                                    IE = MBB.end();
                        I != IE; ++I)
@@ -218,6 +231,8 @@ namespace {
                       else
                         OffsetIter = I;
                     }
+                  // If both two iterators are valid, we should have identified
+                  // the scenario, and do the movement.
                   if (TLSMLIter != MBB.end() && OffsetIter != MBB.end())
                     OffsetIter->moveBefore(&*UseIter);
                 }

>From 1247137a186d1af08f7868888066adde0042e1e4 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Wed, 17 Jan 2024 03:40:40 -0500
Subject: [PATCH 26/27] Remove dummy logic: Since
 `getSectionForExternalReference` explicitly set XCOFF::XTY_SD for the
 `_$TLSML` symbol reference, and that only CSType `XCOFF::XTY_ER` symbol
 references are added into `UndefinedCsects`, the checks inside
 `executePostLayoutBinding` are redundant.

Update test case to make sure external reference to the `_$TLSML` symbol
is not observed.
---
 llvm/lib/MC/XCOFFObjectWriter.cpp                  | 6 ------
 llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll | 2 ++
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/MC/XCOFFObjectWriter.cpp b/llvm/lib/MC/XCOFFObjectWriter.cpp
index 4c89ddefe74f7d..a898a35a057e61 100644
--- a/llvm/lib/MC/XCOFFObjectWriter.cpp
+++ b/llvm/lib/MC/XCOFFObjectWriter.cpp
@@ -1146,9 +1146,6 @@ void XCOFFObjectWriter::writeSymbolTable(const MCAsmLayout &Layout) {
                      /*NumberOfAuxEntries=*/0);
 
   for (const auto &Csect : UndefinedCsects) {
-    // AIX does not need to emit the _$TLSML symbol.
-    if (Csect.getSymbolTableName() == "_$TLSML")
-      continue;
     writeSymbolEntryForControlSection(Csect, XCOFF::ReservedSectionNum::N_UNDEF,
                                       Csect.MCSec->getStorageClass());
   }
@@ -1364,9 +1361,6 @@ void XCOFFObjectWriter::assignAddressesAndIndices(const MCAsmLayout &Layout) {
 
   // Calculate indices for undefined symbols.
   for (auto &Csect : UndefinedCsects) {
-    // AIX does not need to emit the _$TLSML symbol.
-    if (Csect.getSymbolTableName() == "_$TLSML")
-      continue;
     Csect.Size = 0;
     Csect.Address = 0;
     Csect.SymbolTableIndex = SymbolTableIndex;
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll b/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll
index c745e17636da11..22349337f18908 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll
@@ -351,6 +351,8 @@ entry:
 
 ; SMALL:          .extern .__tls_get_mod[PR]
 ; LARGE:          .extern .__tls_get_mod[PR]
+; SMALL-NOT:      .extern _Renamed..5f24__TLSML[TC]
+; LARGE-NOT:      .extern _Renamed..5f24__TLSML[TC]
 
 ; SMALL:        [[ModuleHandleL]]:
 ; SMALL-NEXT:   .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml

>From 153bff3ae7bc946763f2b2d7517bc7882aa9e987 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Wed, 7 Feb 2024 08:40:58 -0500
Subject: [PATCH 27/27] [NFC] Update comments.

---
 llvm/lib/Target/PowerPC/PPC.h                 |  2 +-
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp     |  9 ++---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   |  4 ++-
 llvm/lib/Target/PowerPC/PPCISelLowering.h     |  9 +++--
 llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp | 35 +++++++++++--------
 5 files changed, 33 insertions(+), 26 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPC.h b/llvm/lib/Target/PowerPC/PPC.h
index b625af775ec0a1..eb8886dcc9075c 100644
--- a/llvm/lib/Target/PowerPC/PPC.h
+++ b/llvm/lib/Target/PowerPC/PPC.h
@@ -142,7 +142,7 @@ class ModulePass;
     /// MO_TLSLDM_FLAG - on AIX the ML relocation type is only valid for a
     /// reference to a TOC symbol from the symbol itself, and right now its only
     /// user is the symbol "_$TLSML". The symbol name is used to decide that
-    /// R_TLSML is expected.
+    /// the R_TLSML relocation is expected.
     MO_TLSLDM_FLAG,
 
     /// MO_TLSLD_FLAG - If this bit is set the symbol reference is relative to
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 67f94c21403706..56898ba66caa1c 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -848,6 +848,9 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
       return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGDM;
     if (Flag == PPCII::MO_TLSGD_FLAG || Flag == PPCII::MO_GOT_TLSGD_PCREL_FLAG)
       return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGD;
+    // For local-dynamic TLS access on AIX, we have one TOC entry for the symbol
+    // (the variable offset) and one shared TOC entry for the module handle.
+    // They are differentiated by MO_TLSLD_FLAG and MO_TLSLDM_FLAG.
     if (Flag == PPCII::MO_TLSLD_FLAG && IsAIX)
       return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLD;
     if (Flag == PPCII::MO_TLSLDM_FLAG && IsAIX)
@@ -2864,13 +2867,11 @@ void PPCAIXAsmPrinter::emitInstruction(const MachineInstr *MI) {
   }
   case PPC::GETtlsMOD32AIX:
   case PPC::GETtlsMOD64AIX:
-    // A reference to .__tls_get_mod is unknown to the assembler so we need to
-    // emit an external symbol reference.
   case PPC::GETtlsTpointer32AIX:
   case PPC::GETtlsADDR64AIX:
   case PPC::GETtlsADDR32AIX: {
-    // A reference to .__tls_get_addr/.__get_tpointer is unknown to the
-    // assembler so we need to emit an external symbol reference.
+    // A reference to .__tls_get_mod/.__tls_get_addr/.__get_tpointer is unknown
+    // to the assembler so we need to emit an external symbol reference.
     MCSymbol *TlsGetAddr =
         createMCSymbolForTlsGetAddr(OutContext, MI->getOpcode());
     ExtSymSDNodeSymbols.insert(TlsGetAddr);
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index b9d15ea3ec0597..6b209bc5135e5a 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -3422,6 +3422,8 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
 
     SDValue VariableOffsetTGA =
         DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
+    SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
+
     Module *M = DAG.getMachineFunction().getFunction().getParent();
     GlobalVariable *TLSGV =
         dyn_cast_or_null<GlobalVariable>(M->getOrInsertGlobal(
@@ -3433,7 +3435,7 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
     SDValue ModuleHandleTOC = getTOCEntry(DAG, dl, ModuleHandleTGA);
     SDValue ModuleHandle =
         DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, ModuleHandleTOC);
-    SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
+
     return DAG.getNode(ISD::ADD, dl, PtrVT, ModuleHandle, VariableOffset);
   }
 
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 2b6fcbc93f57e6..04e2e4518b8b5c 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -379,12 +379,11 @@ namespace llvm {
     /// produces a call to .__tls_get_mod(_$TLSML\@ml).
     GET_TLS_MOD_AIX,
 
-    /// [GP|G8]RC = TLSLD_AIX, TOC_ENTRY(variable offset)
-    /// Op that internally creates a TOC entry for the "_$TLSML" symbol, and
+    /// [GP|G8]RC = TLSLD_AIX, TOC_ENTRY(module handle)
+    /// Op that with single input of module handle TOC entry in R3, and
     /// generates a GET_TLS_MOD_AIX node which will be expanded into a call to
-    /// .__tls_get_mod, and then adds the variable offset with the result from
-    /// the call. This node is used in both 32-bit and 64-bit modes. The only
-    /// difference is the register class.
+    /// .__tls_get_mod. This node is used in both 32-bit and 64-bit modes. The
+    /// only difference is the register class.
     TLSLD_AIX,
 
     /// G8RC = ADDIS_TLSLD_HA %x2, Symbol - For the local-dynamic TLS
diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 7fc5ea2cacbfab..5f318edf23655a 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -165,12 +165,14 @@ namespace {
 
         if (IsAIX) {
           if (IsTLSLDAIXMI) {
-            // The relative order between the LoadOffset at toc node, and the
-            // .__tls_get_mod node is being tuned here. It is better to put the
-            // LoadOffset at toc node after the call, since the LoadOffset at toc node
-            // can use clobbers r4/r5. Search for the pattern of two Load at toc
-            // nodes, and then move the LoadOffset at toc node right before the
-            // node that uses the OutReg of the .__tls_get_mod node.
+            // The relative order between the LoadOffset at toc node (for the
+            // variable offset), and the .__tls_get_mod node is being tuned
+            // here. It is better to put the LoadOffset at toc node after the call,
+            // since the LoadOffset at toc node can use clobbers r4/r5. Search for
+            // the pattern of two Load at toc nodes (either for the variable offset
+            // or for the module handle), and then move the LoadOffset at toc node
+            // right before the node that uses the OutReg of the .__tls_get_mod
+            // node.
             unsigned LDTocOp =
                 Is64Bit ? (IsLargeModel ? PPC::LDtocL : PPC::LDtoc)
                         : (IsLargeModel ? PPC::LWZtocL : PPC::LWZtoc);
@@ -179,26 +181,29 @@ namespace {
               // Collect all instructions that use the OutReg.
               for (MachineOperand &MO : RegInfo.use_operands(OutReg))
                 Uses.insert(MO.getParent());
-              // Find the first user (e.g.: lwax/stfdx) within the current BB.
+              // Find the first user (e.g.: lwax/stfdx) of the OutReg within the
+              // current BB.
               MachineBasicBlock::iterator UseIter = MBB.begin();
               for (MachineBasicBlock::iterator IE = MBB.end(); UseIter != IE;
                    ++UseIter)
                 if (Uses.count(&*UseIter))
                   break;
 
-              // Got some work to do when UseIter pointing to valid node. Check
-              // the pattern and do the movement if match.
+              // Additional handling is required when UserIter (the first user
+              // of OutReg) is pointing to a valid node. Check the pattern and
+              // do the movement if the pattern matches.
               if (UseIter != MBB.end()) {
-                // Collect associated Load at toc nodes. Use hasOneDef to guard
+                // Collect associated Load at toc nodes. Use hasOneDef() to guard
                 // against unexpected scenarios.
                 std::set<MachineInstr *> LoadFromTocs;
                 for (MachineOperand &MO : UseIter->operands())
                   if (MO.isReg() && MO.isUse()) {
-                    if (RegInfo.hasOneDef(MO.getReg())) {
+                    Register MOReg = MO.getReg();
+                    if (RegInfo.hasOneDef(MOReg)) {
                       MachineInstr *Temp =
-                          RegInfo.getOneDef(MO.getReg())->getParent();
-                      // For current TLSLDAIX node, get the Load at toc node for
-                      // the InReg. Otherwise, Temp probably pointed to the
+                          RegInfo.getOneDef(MOReg)->getParent();
+                      // For the current TLSLDAIX node, get the Load at toc node
+                      // for the InReg. Otherwise, Temp probably pointed to the
                       // LoadOffset at toc node that we would like to move.
                       if (Temp == &MI && RegInfo.hasOneDef(InReg))
                         Temp = RegInfo.getOneDef(InReg)->getParent();
@@ -238,7 +243,7 @@ namespace {
                 }
               }
             }
-            // The module-handle is copied in r3. The copy is followed by
+            // The module-handle is copied into r3. The copy is followed by
             // GETtlsMOD32AIX/GETtlsMOD64AIX.
             BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR3)
                 .addReg(InReg);



More information about the cfe-commits mailing list