[clang] [llvm] [PowerPC] Support local-dynamic TLS relocation on AIX (PR #66316)

Felix via cfe-commits cfe-commits at lists.llvm.org
Wed Feb 28 22:34:55 PST 2024


https://github.com/orcguru updated https://github.com/llvm/llvm-project/pull/66316

>From 4f8863f3838313785abeb996039d64e2d873be43 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Wed, 13 Sep 2023 22:21:11 -0400
Subject: [PATCH 01/31] [PowerPC] Support local-dynamic TLS relocation on AIX

---
 llvm/include/llvm/MC/MCExpr.h                 |   2 +
 llvm/lib/MC/MCExpr.cpp                        |   4 +
 .../PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp  |   9 +-
 .../MCTargetDesc/PPCXCOFFObjectWriter.cpp     |   4 +
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp     |  90 ++++-
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   |  30 +-
 llvm/lib/Target/PowerPC/PPCISelLowering.h     |  14 +-
 llvm/lib/Target/PowerPC/PPCInstr64Bit.td      |  13 +-
 llvm/lib/Target/PowerPC/PPCInstrInfo.td       |  15 +-
 llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp |  73 +++-
 .../test/CodeGen/PowerPC/aix-tls-gd-double.ll |  90 +++--
 llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll   |  96 +++--
 .../CodeGen/PowerPC/aix-tls-gd-longlong.ll    | 298 +++++++-------
 .../CodeGen/PowerPC/aix-tls-local-dynamic.ll  | 364 ++++++++++++++++++
 .../PowerPC/aix-tls-xcoff-reloc-large.ll      | 284 +++++++-------
 .../CodeGen/PowerPC/aix-tls-xcoff-reloc.ll    | 290 +++++++-------
 16 files changed, 1149 insertions(+), 527 deletions(-)
 create mode 100644 llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll

diff --git a/llvm/include/llvm/MC/MCExpr.h b/llvm/include/llvm/MC/MCExpr.h
index 67836292874f5f..f3bc0491fd2f19 100644
--- a/llvm/include/llvm/MC/MCExpr.h
+++ b/llvm/include/llvm/MC/MCExpr.h
@@ -301,6 +301,8 @@ class MCSymbolRefExpr : public MCExpr {
     VK_PPC_AIX_TLSGDM,      // symbol at m
     VK_PPC_AIX_TLSIE,       // symbol at ie
     VK_PPC_AIX_TLSLE,       // symbol at le
+    VK_PPC_AIX_TLSLD,       // symbol at ld
+    VK_PPC_AIX_TLSML,       // symbol at ml
     VK_PPC_GOT_TLSLD,       // symbol at got@tlsld
     VK_PPC_GOT_TLSLD_LO,    // symbol at got@tlsld at l
     VK_PPC_GOT_TLSLD_HI,    // symbol at got@tlsld at h
diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp
index 80def6dfc24b1a..0e563fb926c267 100644
--- a/llvm/lib/MC/MCExpr.cpp
+++ b/llvm/lib/MC/MCExpr.cpp
@@ -331,6 +331,10 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
     return "ie";
   case VK_PPC_AIX_TLSLE:
     return "le";
+  case VK_PPC_AIX_TLSLD:
+    return "ld";
+  case VK_PPC_AIX_TLSML:
+    return "ml";
   case VK_PPC_GOT_TLSLD: return "got at tlsld";
   case VK_PPC_GOT_TLSLD_LO: return "got at tlsld@l";
   case VK_PPC_GOT_TLSLD_HI: return "got at tlsld@h";
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index a804dd823daa4d..22cd2fc03ef7c3 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -231,12 +231,15 @@ class PPCTargetAsmStreamer : public PPCTargetStreamer {
       MCSymbolXCOFF *TCSym =
           cast<MCSectionXCOFF>(Streamer.getCurrentSectionOnly())
               ->getQualNameSymbol();
-      // On AIX, we have a region handle (symbol at m) and the variable offset
-      // (symbol@{gd|ie|le}) for TLS variables, depending on the TLS model.
+      // On AIX, we have a region handle (symbol at m), module handle
+      // (__TLSML[TC]@ml) and the variable offset (symbol@{gd|ie|le|ld}) for TLS
+      // variables, depending on the TLS model.
       if (Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGD ||
           Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGDM ||
           Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSIE ||
-          Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLE)
+          Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLE ||
+          Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLD ||
+          Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSML)
         OS << "\t.tc " << TCSym->getName() << "," << XSym->getName() << "@"
            << MCSymbolRefExpr::getVariantKindName(Kind) << '\n';
       else
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
index 065daf42fe6eb0..f4998e9b9dcba8 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
@@ -116,6 +116,10 @@ std::pair<uint8_t, uint8_t> PPCXCOFFObjectWriter::getRelocTypeAndSignSize(
       return {XCOFF::RelocationType::R_TLS_IE, SignAndSizeForFKData};
     case MCSymbolRefExpr::VK_PPC_AIX_TLSLE:
       return {XCOFF::RelocationType::R_TLS_LE, SignAndSizeForFKData};
+    case MCSymbolRefExpr::VK_PPC_AIX_TLSLD:
+      return {XCOFF::RelocationType::R_TLS_LD, SignAndSizeForFKData};
+    case MCSymbolRefExpr::VK_PPC_AIX_TLSML:
+      return {XCOFF::RelocationType::R_TLSML, SignAndSizeForFKData};
     case MCSymbolRefExpr::VK_None:
       return {XCOFF::RelocationType::R_POS, SignAndSizeForFKData};
     }
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 483cd788ebfe0c..5f40c0b46368d5 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -621,12 +621,23 @@ void PPCAsmPrinter::LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI) {
     EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::NOP));
 }
 
-/// This helper function creates the TlsGetAddr MCSymbol for AIX. We will
-/// create the csect and use the qual-name symbol instead of creating just the
-/// external symbol.
+/// This helper function creates the TlsGetAddr/TlsGetMod MCSymbol for AIX. We
+/// will create the csect and use the qual-name symbol instead of creating just
+/// the external symbol.
 static MCSymbol *createMCSymbolForTlsGetAddr(MCContext &Ctx, unsigned MIOpc) {
-  StringRef SymName =
-      MIOpc == PPC::GETtlsTpointer32AIX ? ".__get_tpointer" : ".__tls_get_addr";
+  StringRef SymName;
+  switch (MIOpc) {
+  default:
+    SymName = ".__tls_get_addr";
+    break;
+  case PPC::GETtlsTpointer32AIX:
+    SymName = ".__get_tpointer";
+    break;
+  case PPC::GETtlsMOD32AIX:
+  case PPC::GETtlsMOD64AIX:
+    SymName = ".__tls_get_mod";
+    break;
+  }
   return Ctx
       .getXCOFFSection(SymName, SectionKind::getText(),
                        XCOFF::CsectProperties(XCOFF::XMC_PR, XCOFF::XTY_ER))
@@ -668,14 +679,15 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
          "GETtls[ld]ADDR[32] must read GPR3");
 
   if (Subtarget->isAIXABI()) {
-    // On AIX, the variable offset should already be in R4 and the region handle
-    // should already be in R3.
-    // For TLSGD, which currently is the only supported access model, we only
-    // need to generate an absolute branch to .__tls_get_addr.
+    // On AIX, for TLSGD the variable offset should already be in R4 and the
+    // region handle should already be in R3, need to generate an absolute
+    // branch to .__tls_get_addr. For TLSLD the module handle should already be
+    // in R3, need to generate branch to .__tls_get_mod.
     Register VarOffsetReg = Subtarget->isPPC64() ? PPC::X4 : PPC::R4;
     (void)VarOffsetReg;
-    assert(MI->getOperand(2).isReg() &&
-           MI->getOperand(2).getReg() == VarOffsetReg &&
+    assert((MI->getNumExplicitOperands() < 3 ||
+            (MI->getOperand(2).isReg() &&
+             MI->getOperand(2).getReg() == VarOffsetReg)) &&
            "GETtls[ld]ADDR[32] must read GPR4");
     EmitAIXTlsCallHelper(MI);
     return;
@@ -718,6 +730,8 @@ static MCSymbol *getMCSymbolForTOCPseudoMO(const MachineOperand &MO,
     return AP.GetJTISymbol(MO.getIndex());
   case MachineOperand::MO_BlockAddress:
     return AP.GetBlockAddressSymbol(MO.getBlockAddress());
+  case MachineOperand::MO_ExternalSymbol:
+    return AP.OutContext.getOrCreateSymbol(MO.getSymbolName());
   default:
     llvm_unreachable("Unexpected operand type to get symbol.");
   }
@@ -751,6 +765,16 @@ getTOCEntryTypeForMO(const MachineOperand &MO) {
     llvm_unreachable("Unexpected operand type to get TOC type.");
   }
 }
+
+// On AIX, TLS-local-dynamic requires that symbol for the module handle must
+// have the name "_$TLSML". This symbol is used as one TOC symbol reference
+// itself with ML relocation type, thus it has "[TC]" attached to its name.
+static inline bool isSpecialAIXSymbolTLSML(const MachineOperand &MO,
+                                           const bool IsAIX) {
+  return IsAIX && MO.isSymbol() &&
+         (std::strcmp(MO.getSymbolName(), "_$TLSML[TC]") == 0);
+}
+
 /// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to
 /// the current output stream.
 ///
@@ -844,6 +868,15 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
       return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGDM;
     if (Flag == PPCII::MO_TLSGD_FLAG || Flag == PPCII::MO_GOT_TLSGD_PCREL_FLAG)
       return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGD;
+    if (MO.getTargetFlags() & PPCII::MO_TLSLD_FLAG) {
+      if (isSpecialAIXSymbolTLSML(MO, IsAIX))
+        // FIXME: On AIX the ML relocation type is only valid for a reference to
+        // a TOC symbol from the symbol itself, and right now its only user is
+        // symbol "_$TLSML". Use symbol name to decide that R_TLSML is expected.
+        return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSML;
+      if (IsAIX)
+        return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLD;
+    }
     return MCSymbolRefExpr::VariantKind::VK_None;
   };
 
@@ -962,7 +995,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     TmpInst.setOpcode(PPC::LWZ);
 
     const MachineOperand &MO = MI->getOperand(1);
-    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress() ||
+            isSpecialAIXSymbolTLSML(MO, IsAIX)) &&
            "Invalid operand for LWZtoc.");
 
     // Map the operand to its corresponding MCSymbol.
@@ -1051,7 +1085,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     TmpInst.setOpcode(PPC::LD);
 
     const MachineOperand &MO = MI->getOperand(1);
-    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress() ||
+            isSpecialAIXSymbolTLSML(MO, IsAIX)) &&
            "Invalid operand!");
 
     // Map the operand to its corresponding MCSymbol.
@@ -1089,7 +1124,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     TmpInst.setOpcode(PPC::ADDIS);
 
     const MachineOperand &MO = MI->getOperand(2);
-    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress() ||
+            isSpecialAIXSymbolTLSML(MO, IsAIX)) &&
            "Invalid operand for ADDIStocHA.");
 
     // Map the machine operand to its corresponding MCSymbol.
@@ -1122,7 +1158,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     TmpInst.setOpcode(PPC::LWZ);
 
     const MachineOperand &MO = MI->getOperand(1);
-    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress() ||
+            isSpecialAIXSymbolTLSML(MO, IsAIX)) &&
            "Invalid operand for LWZtocL.");
 
     // Map the machine operand to its corresponding MCSymbol.
@@ -1154,7 +1191,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     TmpInst.setOpcode(PPC::ADDIS8);
 
     const MachineOperand &MO = MI->getOperand(2);
-    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress() ||
+            isSpecialAIXSymbolTLSML(MO, IsAIX)) &&
            "Invalid operand for ADDIStocHA8!");
 
     const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
@@ -1164,7 +1202,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     const bool GlobalToc =
         MO.isGlobal() && Subtarget->isGVIndirectSymbol(MO.getGlobal());
     if (GlobalToc || MO.isJTI() || MO.isBlockAddress() ||
-        (MO.isCPI() && TM.getCodeModel() == CodeModel::Large))
+        (MO.isCPI() && TM.getCodeModel() == CodeModel::Large) ||
+        isSpecialAIXSymbolTLSML(MO, IsAIX))
       MOSymbol = lookUpOrCreateTOCEntry(MOSymbol, getTOCEntryTypeForMO(MO), VK);
 
     VK = IsAIX ? MCSymbolRefExpr::VK_PPC_U : MCSymbolRefExpr::VK_PPC_TOC_HA;
@@ -1193,8 +1232,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     TmpInst.setOpcode(PPC::LD);
 
     const MachineOperand &MO = MI->getOperand(1);
-    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() ||
-            MO.isBlockAddress()) &&
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress() ||
+            isSpecialAIXSymbolTLSML(MO, IsAIX)) &&
            "Invalid operand for LDtocL!");
 
     LLVM_DEBUG(assert(
@@ -1360,6 +1399,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
   case PPC::GETtlsADDRPCREL:
   case PPC::GETtlsADDR32AIX:
   case PPC::GETtlsADDR64AIX:
+  case PPC::GETtlsMOD32AIX:
+  case PPC::GETtlsMOD64AIX:
     // Transform: %r3 = GETtlsADDRNNAIX %r3, %r4 (for NN == 32/64).
     // Into: BLA .__tls_get_addr()
     // Unlike on Linux, there is no symbol or relocation needed for this call.
@@ -2843,6 +2884,15 @@ void PPCAIXAsmPrinter::emitEndOfAsmFile(Module &M) {
       MCSymbol *S = OutContext.getOrCreateSymbol(Name);
       TCEntry = cast<MCSectionXCOFF>(
           getObjFileLowering().getSectionForTOCEntry(S, TM));
+    } else if (I.first.second ==
+               MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSML) {
+      // AIX assembler expects TC storage-mapping class for the "_$TLSML"
+      // symbol.
+      MCSection *MCSect = getObjFileLowering().getContext().getXCOFFSection(
+          cast<MCSymbolXCOFF>(I.first.first)->getSymbolTableName(),
+          SectionKind::getData(),
+          XCOFF::CsectProperties(XCOFF::XMC_TC, XCOFF::XTY_SD));
+      TCEntry = cast<MCSectionXCOFF>(MCSect);
     } else {
       TCEntry = cast<MCSectionXCOFF>(
           getObjFileLowering().getSectionForTOCEntry(I.first.first, TM));
@@ -2981,6 +3031,8 @@ void PPCAIXAsmPrinter::emitInstruction(const MachineInstr *MI) {
 		 MMI->hasDebugInfo());
     break;
   }
+  case PPC::GETtlsMOD32AIX:
+  case PPC::GETtlsMOD64AIX:
   case PPC::GETtlsTpointer32AIX:
   case PPC::GETtlsADDR64AIX:
   case PPC::GETtlsADDR32AIX: {
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 51becf1d5b8584..2d2a4b008ade06 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1774,9 +1774,13 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
   case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
   case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
+  case PPCISD::GET_TLS_MOD:
+    return "PPCISD::GET_TLS_MOD";
   case PPCISD::GET_TPOINTER:    return "PPCISD::GET_TPOINTER";
   case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
   case PPCISD::TLSGD_AIX:       return "PPCISD::TLSGD_AIX";
+  case PPCISD::TLSLD_AIX:
+    return "PPCISD::TLSLD_AIX";
   case PPCISD::ADDIS_TLSLD_HA:  return "PPCISD::ADDIS_TLSLD_HA";
   case PPCISD::ADDI_TLSLD_L:    return "PPCISD::ADDI_TLSLD_L";
   case PPCISD::GET_TLSLD_ADDR:  return "PPCISD::GET_TLSLD_ADDR";
@@ -3415,13 +3419,25 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
     return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset);
   }
 
-  // Only Local-Exec, Initial-Exec and General-Dynamic TLS models are currently
-  // supported models. If Local- or Initial-exec are not possible or specified,
-  // all GlobalTLSAddress nodes are lowered using the general-dynamic model.
-  // We need to generate two TOC entries, one for the variable offset, one for
-  // the region handle. The global address for the TOC entry of the region
-  // handle is created with the MO_TLSGDM_FLAG flag and the global address
-  // for the TOC entry of the variable offset is created with MO_TLSGD_FLAG.
+  if (Model == TLSModel::LocalDynamic) {
+    // For local-dynamic on AIX, we need to generate two TOC entries, one for
+    // the variable offset, the other for the module handle. The module handle
+    // is encapsulated inside the TLSLD_AIX pseudo node, and will be expanded by
+    // PPCTLSDynamicCall.
+    SDValue VariableOffsetTGA =
+        DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
+    SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
+    return DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, VariableOffset);
+  }
+
+  // The Local-Exec, Initial-Exec, Local-Dynamic, and General-Dynamic TLS models
+  // are currently supported access models. If Local- or Initial-exec or
+  // local-dynamic is not possible or specified, all GlobalTLSAddress nodes are
+  // lowered using the general-dynamic model. We need to generate two TOC
+  // entries, one for the variable offset, one for the region handle. The global
+  // address for the TOC entry of the region handle is created with the
+  // MO_TLSGDM_FLAG flag and the global address for the TOC entry of the
+  // variable offset is created with MO_TLSGD_FLAG.
   SDValue VariableOffsetTGA =
       DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGD_FLAG);
   SDValue RegionHandleTGA =
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 509a22b0bbf413..73a4e1ad3a190d 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -370,11 +370,23 @@ namespace llvm {
     /// G8RC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY
     /// Op that combines two register copies of TOC entries
     /// (region handle into R3 and variable offset into R4) followed by a
-    /// GET_TLS_ADDR node which will be expanded to a call to __get_tls_addr.
+    /// GET_TLS_ADDR node which will be expanded to a call to __tls_get_addr.
     /// This node is used in 64-bit mode as well (in which case the result is
     /// G8RC and inputs are X3/X4).
     TLSGD_AIX,
 
+    /// %x3 = GET_TLS_MOD _$TLSML - For the AIX local-dynamic TLS model,
+    /// produces a call to __tls_get_mod(_$TLSML\@ml).
+    GET_TLS_MOD,
+
+    /// [GP|G8]RC = TLSLD_AIX, TOC_ENTRY(variable offset)
+    /// Op that internally creates TOC entry for the "_$TLSML" symbol, generates
+    /// GET_TLS_MOD node which will be expanded into a call to __tls_get_mod,
+    /// and then add the variable offset with the result from the call.
+    /// This node is used in both 32-bit and 64-bit modes. The only difference
+    /// is register class.
+    TLSLD_AIX,
+
     /// G8RC = ADDIS_TLSLD_HA %x2, Symbol - For the local-dynamic TLS
     /// model, produces an ADDIS8 instruction that adds the GOT base
     /// register to sym\@got\@tlsld\@ha.
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 0322bb37b1fdf8..e4fee9c2fff23e 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -1557,12 +1557,19 @@ def GETtlsldADDRPCREL : GETtlsldADDRPseudo <"#GETtlsldADDRPCREL">;
 // so we don't need to mark it with a size of 8 bytes. Finally, the assembly
 // manual mentions this exact set of registers as the clobbered set, others
 // are guaranteed not to be clobbered.
-let Defs = [X0,X4,X5,X11,LR8,CR0] in
+let Defs = [X0,X4,X5,X11,LR8,CR0] in {
 def GETtlsADDR64AIX :
   PPCEmitTimePseudo<(outs g8rc:$rD),(ins g8rc:$offset, g8rc:$handle),
                     "GETtlsADDR64AIX",
                     [(set i64:$rD,
                       (PPCgetTlsAddr i64:$offset, i64:$handle))]>, isPPC64;
+// On AIX, the call to __tls_get_mod need one input in X3 for the module handle.
+def GETtlsMOD64AIX :
+  PPCEmitTimePseudo<(outs g8rc:$rD),(ins g8rc:$handle),
+                    "GETtlsMOD64AIX",
+                    [(set i64:$rD,
+                      (PPCgetTlsMod i64:$handle))]>, isPPC64;
+}
 }
 
 // Combined op for ADDItlsgdL and GETtlsADDR, late expanded.  X3 and LR8
@@ -1595,6 +1602,10 @@ def TLSGDAIX8 :
                      "#TLSGDAIX8",
                      [(set i64:$rD,
                        (PPCTlsgdAIX i64:$offset, i64:$handle))]>;
+// This pseudo is expanded to one copy to put the module handle in R3, then call
+// GETtlsMOD64AIX, and then add variable offset to the output from the call.
+def TLSLDAIX8 : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc:$handle),
+                     "#TLSLDAIX8", [(set i64:$rD, (PPCTlsldAIX i64:$handle))]>;
 // Combined op for ADDItlsldL and GETtlsADDR, late expanded.  X3 and LR8
 // are true defines, while the rest of the Defs are clobbers.
 let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 3abd97f2c38c09..98d34a3000d473 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -213,12 +213,14 @@ def PPCaddTls     : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>;
 def PPCaddisTlsgdHA : SDNode<"PPCISD::ADDIS_TLSGD_HA", SDTIntBinOp>;
 def PPCaddiTlsgdL   : SDNode<"PPCISD::ADDI_TLSGD_L", SDTIntBinOp>;
 def PPCgetTlsAddr   : SDNode<"PPCISD::GET_TLS_ADDR", SDTIntBinOp>;
+def PPCgetTlsMod   : SDNode<"PPCISD::GET_TLS_MOD", SDTIntUnaryOp>;
 def PPCgetTpointer : SDNode<"PPCISD::GET_TPOINTER", SDTIntLeaf, []>;
 def PPCaddiTlsgdLAddr : SDNode<"PPCISD::ADDI_TLSGD_L_ADDR",
                                SDTypeProfile<1, 3, [
                                  SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
                                  SDTCisSameAs<0, 3>, SDTCisInt<0> ]>>;
 def PPCTlsgdAIX     : SDNode<"PPCISD::TLSGD_AIX", SDTIntBinOp>;
+def PPCTlsldAIX     : SDNode<"PPCISD::TLSLD_AIX", SDTIntUnaryOp>;
 def PPCaddisTlsldHA : SDNode<"PPCISD::ADDIS_TLSLD_HA", SDTIntBinOp>;
 def PPCaddiTlsldL   : SDNode<"PPCISD::ADDI_TLSLD_L", SDTIntBinOp>;
 def PPCgetTlsldAddr : SDNode<"PPCISD::GET_TLSLD_ADDR", SDTIntBinOp>;
@@ -3249,11 +3251,16 @@ def GETtlsADDR32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$s
 // The rest of the Defs are the exact set of registers that will be clobbered by
 // the call.
 let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
-    Defs = [R0,R4,R5,R11,LR,CR0] in
+    Defs = [R0,R4,R5,R11,LR,CR0] in {
 def GETtlsADDR32AIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$offset, gprc:$handle),
                           "GETtlsADDR32AIX",
                           [(set i32:$rD,
                             (PPCgetTlsAddr i32:$offset, i32:$handle))]>;
+def GETtlsMOD32AIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$handle),
+                          "GETtlsMOD32AIX",
+                          [(set i32:$rD,
+                            (PPCgetTlsMod i32:$handle))]>;
+}
 
 // For local-exec accesses on 32-bit AIX, a call to .__get_tpointer is
 // generated to retrieve the thread pointer. GETtlsTpointer32AIX clobbers both
@@ -3293,6 +3300,12 @@ def TLSGDAIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$offset, gprc:$handl
                           "#TLSGDAIX",
                           [(set i32:$rD,
                             (PPCTlsgdAIX i32:$offset, i32:$handle))]>;
+// This pseudo is expanded to one copy to put the module handle in R3, then call
+// GETtlsMOD32AIX, and then add variable offset to the output from the call.
+def TLSLDAIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$handle),
+                          "#TLSLDAIX",
+                          [(set i32:$rD,
+                            (PPCTlsldAIX i32:$handle))]>;
 // LR is a true define, while the rest of the Defs are clobbers.  R3 is
 // explicitly defined when this op is created, so not mentioned here.
 let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 9518d5347065cc..32a75a32b9d87f 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -51,6 +51,11 @@ namespace {
       bool Is64Bit = MBB.getParent()->getSubtarget<PPCSubtarget>().isPPC64();
       bool IsAIX = MBB.getParent()->getSubtarget<PPCSubtarget>().isAIXABI();
       bool IsPCREL = false;
+      MachineFunction *MF = MBB.getParent();
+      MachineRegisterInfo &RegInfo = MF->getRegInfo();
+      const TargetRegisterClass *GPRNoZero =
+          Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
+                  : &PPC::GPRC_and_GPRC_NOR0RegClass;
 
       for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
            I != IE;) {
@@ -64,6 +69,8 @@ namespace {
             MI.getOpcode() != PPC::ADDItlsldLADDR &&
             MI.getOpcode() != PPC::ADDItlsgdLADDR32 &&
             MI.getOpcode() != PPC::ADDItlsldLADDR32 &&
+            MI.getOpcode() != PPC::TLSLDAIX &&
+            MI.getOpcode() != PPC::TLSLDAIX8 &&
             MI.getOpcode() != PPC::TLSGDAIX &&
             MI.getOpcode() != PPC::TLSGDAIX8 && !IsTLSTPRelMI && !IsPCREL) {
           // Although we create ADJCALLSTACKDOWN and ADJCALLSTACKUP
@@ -109,6 +116,12 @@ namespace {
           Opc1 = PPC::ADDItlsldL32;
           Opc2 = PPC::GETtlsldADDR32;
           break;
+        case PPC::TLSLDAIX:
+          Opc2 = PPC::GETtlsMOD32AIX;
+          break;
+        case PPC::TLSLDAIX8:
+          Opc2 = PPC::GETtlsMOD64AIX;
+          break;
         case PPC::TLSGDAIX8:
           // TLSGDAIX8 is expanded to two copies and GET_TLS_ADDR, so we only
           // set Opc2 here.
@@ -145,19 +158,55 @@ namespace {
                                                               .addImm(0);
 
         if (IsAIX) {
-          // The variable offset and region handle are copied in r4 and r3. The
-          // copies are followed by GETtlsADDR32AIX/GETtlsADDR64AIX.
-          if (!IsTLSTPRelMI) {
-            BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR4)
-                .addReg(MI.getOperand(1).getReg());
+          if (MI.getOpcode() == PPC::TLSLDAIX8 ||
+              MI.getOpcode() == PPC::TLSLDAIX) {
+            // For Local-Dynamic
+            auto &Subtarget = MBB.getParent()->getSubtarget<PPCSubtarget>();
+            bool IsLargeModel =
+                Subtarget.getTargetMachine().getCodeModel() == CodeModel::Large;
+            Register ModuleHandleHReg;
+            unsigned LDTocOp =
+                Is64Bit ? (IsLargeModel ? PPC::LDtocL : PPC::LDtoc)
+                        : (IsLargeModel ? PPC::LWZtocL : PPC::LWZtoc);
+            if (IsLargeModel) {
+              ModuleHandleHReg = RegInfo.createVirtualRegister(GPRNoZero);
+              BuildMI(MBB, I, DL,
+                      TII->get(Is64Bit ? PPC::ADDIStocHA8 : PPC::ADDIStocHA),
+                      ModuleHandleHReg)
+                  .addReg(Subtarget.getTOCPointerRegister())
+                  .addExternalSymbol("_$TLSML[TC]", PPCII::MO_TLSLD_FLAG);
+            }
+            Register MHReg = RegInfo.createVirtualRegister(GPRNoZero);
+            BuildMI(MBB, I, DL, TII->get(LDTocOp), MHReg)
+                .addExternalSymbol("_$TLSML[TC]", PPCII::MO_TLSLD_FLAG)
+                .addReg(IsLargeModel
+                            ? ModuleHandleHReg
+                            : Register(Subtarget.getTOCPointerRegister()));
             BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR3)
-                .addReg(MI.getOperand(2).getReg());
-            BuildMI(MBB, I, DL, TII->get(Opc2), GPR3).addReg(GPR3).addReg(GPR4);
-          } else
-            // The opcode of GETtlsTpointer32AIX does not change, because later
-            // this instruction will be expanded into a call to .__get_tpointer,
-            // which will return the thread pointer into r3.
-            BuildMI(MBB, I, DL, TII->get(Opc2), GPR3);
+                .addReg(MHReg);
+            // The call to __tls_get_mod.
+            BuildMI(MBB, I, DL, TII->get(Opc2), GPR3).addReg(GPR3);
+            BuildMI(MBB, I, DL, TII->get(Is64Bit ? PPC::ADD8 : PPC::ADD4), GPR3)
+                .addReg(GPR3)
+                .addReg(MI.getOperand(1).getReg());
+          } else {
+            // For Global-Dynamic
+            // The variable offset and region handle are copied in r4 and r3.
+            // The copies are followed by GETtlsADDR32AIX/GETtlsADDR64AIX.
+            if (!IsTLSTPRelMI) {
+              BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR4)
+                  .addReg(MI.getOperand(1).getReg());
+              BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR3)
+                  .addReg(MI.getOperand(2).getReg());
+              BuildMI(MBB, I, DL, TII->get(Opc2), GPR3)
+                  .addReg(GPR3)
+                  .addReg(GPR4);
+            } else
+              // The opcode of GETtlsTpointer32AIX does not change, because
+              // later this instruction will be expanded into a call to
+              // .__get_tpointer, which will return the thread pointer into r3.
+              BuildMI(MBB, I, DL, TII->get(Opc2), GPR3);
+          }
         } else {
           MachineInstr *Addi;
           if (IsPCREL) {
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll
index c0ffb8154c6917..13bef83cf50fd7 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll
@@ -156,10 +156,11 @@ define void @storesTIInit(double %Val) #0 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    lwz 3, L..C4(2) # target-flags(ppc-tlsgdm) @TIInit
-; SMALL32-NEXT:    lwz 4, L..C5(2) # target-flags(ppc-tlsgd) @TIInit
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    bla .__tls_get_addr[PR]
+; SMALL32-NEXT:    lwz 6, L..C4(2) # target-flags(ppc-tlsld) @TIInit
+; SMALL32-NEXT:    lwz 3, L..C5(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
+; SMALL32-NEXT:    bla .__tls_get_mod[PR]
+; SMALL32-NEXT:    add 3, 3, 6
 ; SMALL32-NEXT:    stfd 1, 0(3)
 ; SMALL32-NEXT:    addi 1, 1, 32
 ; SMALL32-NEXT:    lwz 0, 8(1)
@@ -172,10 +173,11 @@ define void @storesTIInit(double %Val) #0 {
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    addis 4, L..C5 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
-; LARGE32-NEXT:    lwz 4, L..C5 at l(4)
-; LARGE32-NEXT:    bla .__tls_get_addr[PR]
+; LARGE32-NEXT:    lwz 6, L..C4 at l(3)
+; LARGE32-NEXT:    addis 3, L..C5 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C5 at l(3)
+; LARGE32-NEXT:    bla .__tls_get_mod[PR]
+; LARGE32-NEXT:    add 3, 3, 6
 ; LARGE32-NEXT:    stfd 1, 0(3)
 ; LARGE32-NEXT:    addi 1, 1, 32
 ; LARGE32-NEXT:    lwz 0, 8(1)
@@ -186,10 +188,11 @@ define void @storesTIInit(double %Val) #0 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    ld 3, L..C4(2) # target-flags(ppc-tlsgdm) @TIInit
-; SMALL64-NEXT:    ld 4, L..C5(2) # target-flags(ppc-tlsgd) @TIInit
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    bla .__tls_get_addr[PR]
+; SMALL64-NEXT:    ld 6, L..C4(2) # target-flags(ppc-tlsld) @TIInit
+; SMALL64-NEXT:    ld 3, L..C5(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
+; SMALL64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL64-NEXT:    add 3, 3, 6
 ; SMALL64-NEXT:    stfd 1, 0(3)
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -201,11 +204,12 @@ define void @storesTIInit(double %Val) #0 {
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    addis 3, L..C4 at u(2)
-; LARGE64-NEXT:    addis 4, L..C5 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 3, L..C4 at l(3)
-; LARGE64-NEXT:    ld 4, L..C5 at l(4)
-; LARGE64-NEXT:    bla .__tls_get_addr[PR]
+; LARGE64-NEXT:    ld 6, L..C4 at l(3)
+; LARGE64-NEXT:    addis 3, L..C5 at u(2)
+; LARGE64-NEXT:    ld 3, L..C5 at l(3)
+; LARGE64-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-NEXT:    add 3, 3, 6
 ; LARGE64-NEXT:    stfd 1, 0(3)
 ; LARGE64-NEXT:    addi 1, 1, 48
 ; LARGE64-NEXT:    ld 0, 16(1)
@@ -452,11 +456,12 @@ define double @loadsTIInit() #1 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    lwz 3, L..C4(2) # target-flags(ppc-tlsgdm) @TIInit
-; SMALL32-NEXT:    lwz 4, L..C5(2) # target-flags(ppc-tlsgd) @TIInit
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    bla .__tls_get_addr[PR]
+; SMALL32-NEXT:    lwz 6, L..C4(2) # target-flags(ppc-tlsld) @TIInit
+; SMALL32-NEXT:    lwz 3, L..C5(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
+; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C8(2) # @GInit
+; SMALL32-NEXT:    add 3, 3, 6
 ; SMALL32-NEXT:    lfd 0, 0(3)
 ; SMALL32-NEXT:    lfd 1, 0(4)
 ; SMALL32-NEXT:    fadd 1, 0, 1
@@ -471,10 +476,11 @@ define double @loadsTIInit() #1 {
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    addis 4, L..C5 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
-; LARGE32-NEXT:    lwz 4, L..C5 at l(4)
-; LARGE32-NEXT:    bla .__tls_get_addr[PR]
+; LARGE32-NEXT:    lwz 6, L..C4 at l(3)
+; LARGE32-NEXT:    addis 3, L..C5 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C5 at l(3)
+; LARGE32-NEXT:    bla .__tls_get_mod[PR]
+; LARGE32-NEXT:    add 3, 3, 6
 ; LARGE32-NEXT:    lfd 0, 0(3)
 ; LARGE32-NEXT:    addis 3, L..C8 at u(2)
 ; LARGE32-NEXT:    lwz 3, L..C8 at l(3)
@@ -489,11 +495,12 @@ define double @loadsTIInit() #1 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    ld 3, L..C4(2) # target-flags(ppc-tlsgdm) @TIInit
-; SMALL64-NEXT:    ld 4, L..C5(2) # target-flags(ppc-tlsgd) @TIInit
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    bla .__tls_get_addr[PR]
+; SMALL64-NEXT:    ld 6, L..C4(2) # target-flags(ppc-tlsld) @TIInit
+; SMALL64-NEXT:    ld 3, L..C5(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
+; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C8(2) # @GInit
+; SMALL64-NEXT:    add 3, 3, 6
 ; SMALL64-NEXT:    lfd 0, 0(3)
 ; SMALL64-NEXT:    lfd 1, 0(4)
 ; SMALL64-NEXT:    fadd 1, 0, 1
@@ -507,11 +514,12 @@ define double @loadsTIInit() #1 {
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    addis 3, L..C4 at u(2)
-; LARGE64-NEXT:    addis 4, L..C5 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 3, L..C4 at l(3)
-; LARGE64-NEXT:    ld 4, L..C5 at l(4)
-; LARGE64-NEXT:    bla .__tls_get_addr[PR]
+; LARGE64-NEXT:    ld 6, L..C4 at l(3)
+; LARGE64-NEXT:    addis 3, L..C5 at u(2)
+; LARGE64-NEXT:    ld 3, L..C5 at l(3)
+; LARGE64-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-NEXT:    add 3, 3, 6
 ; LARGE64-NEXT:    addis 4, L..C8 at u(2)
 ; LARGE64-NEXT:    lfd 0, 0(3)
 ; LARGE64-NEXT:    ld 3, L..C8 at l(4)
@@ -610,12 +618,16 @@ entry:
   ret double %add
 }
 
-; External symbol reference checks for .__tls_get_addr
+; External symbol reference checks for .__tls_get_addr/.__tls_get_mod
 
 ; SMALL32: .extern .__tls_get_addr[PR]
+; SMALL32: .extern .__tls_get_mod[PR]
 ; SMALL64: .extern .__tls_get_addr[PR]
+; SMALL64: .extern .__tls_get_mod[PR]
 ; LARGE32: .extern .__tls_get_addr[PR]
+; LARGE32: .extern .__tls_get_mod[PR]
 ; LARGE64: .extern .__tls_get_addr[PR]
+; LARGE64: .extern .__tls_get_mod[PR]
 
 ; TOC entry checks
 
@@ -629,9 +641,10 @@ entry:
 ; SMALL32-LABEL:  L..C3:
 ; SMALL32-NEXT:   .tc TGInit[TC],TGInit[TL]@gd
 ; SMALL32-LABEL:  L..C4:
-; SMALL32-NEXT:   .tc .TIInit[TC],TIInit[TL]@m
+; SMALL32-NEXT:   .tc TIInit[TC],TIInit[TL]@ld
 ; SMALL32-LABEL:  L..C5:
-; SMALL32-NEXT:   .tc TIInit[TC],TIInit[TL]@gd
+; SMALL32-NEXT:   .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; SMALL32-NEXT:   .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
 ; SMALL32-LABEL:  L..C6:
 ; SMALL32-NEXT:   .tc .TWInit[TC],TWInit[TL]@m
 ; SMALL32-LABEL:  L..C7:
@@ -649,9 +662,10 @@ entry:
 ; LARGE32-LABEL:  L..C3:
 ; LARGE32-NEXT:   .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE32-LABEL:  L..C4:
-; LARGE32-NEXT:   .tc .TIInit[TE],TIInit[TL]@m
+; LARGE32-NEXT:   .tc TIInit[TE],TIInit[TL]@ld
 ; LARGE32-LABEL:  L..C5:
-; LARGE32-NEXT:   .tc TIInit[TE],TIInit[TL]@gd
+; LARGE32-NEXT:   .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; LARGE32-NEXT:   .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
 ; LARGE32-LABEL:  L..C6:
 ; LARGE32-NEXT:   .tc .TWInit[TE],TWInit[TL]@m
 ; LARGE32-LABEL:  L..C7:
@@ -669,9 +683,10 @@ entry:
 ; SMALL64-LABEL:  L..C3:
 ; SMALL64-NEXT:  .tc TGInit[TC],TGInit[TL]@gd
 ; SMALL64-LABEL:  L..C4:
-; SMALL64-NEXT:  .tc .TIInit[TC],TIInit[TL]@m
+; SMALL64-NEXT:  .tc TIInit[TC],TIInit[TL]@ld
 ; SMALL64-LABEL:  L..C5:
-; SMALL64-NEXT:  .tc TIInit[TC],TIInit[TL]@gd
+; SMALL64-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; SMALL64-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
 ; SMALL64-LABEL:  L..C6:
 ; SMALL64-NEXT:  .tc .TWInit[TC],TWInit[TL]@m
 ; SMALL64-LABEL:  L..C7:
@@ -689,9 +704,10 @@ entry:
 ; LARGE64-LABEL:  L..C3:
 ; LARGE64-NEXT:  .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE64-LABEL:  L..C4:
-; LARGE64-NEXT:  .tc .TIInit[TE],TIInit[TL]@m
+; LARGE64-NEXT:  .tc TIInit[TE],TIInit[TL]@ld
 ; LARGE64-LABEL:  L..C5:
-; LARGE64-NEXT:  .tc TIInit[TE],TIInit[TL]@gd
+; LARGE64-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; LARGE64-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
 ; LARGE64-LABEL:  L..C6:
 ; LARGE64-NEXT:  .tc .TWInit[TE],TWInit[TL]@m
 ; LARGE64-LABEL:  L..C7:
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll
index 887c4521a4c90a..dc75db43d38c70 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll
@@ -163,11 +163,12 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    mr 6, 3
-; SMALL32-NEXT:    lwz 3, L..C4(2)
-; SMALL32-NEXT:    lwz 4, L..C5(2)
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    bla .__tls_get_addr[PR]
+; SMALL32-NEXT:    mr 6, 3
+; SMALL32-NEXT:    lwz 7, L..C4(2)
+; SMALL32-NEXT:    lwz 3, L..C5(2)
+; SMALL32-NEXT:    bla .__tls_get_mod[PR]
+; SMALL32-NEXT:    add 3, 3, 7
 ; SMALL32-NEXT:    stw 6, 0(3)
 ; SMALL32-NEXT:    addi 1, 1, 32
 ; SMALL32-NEXT:    lwz 0, 8(1)
@@ -181,10 +182,11 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    mr 6, 3
 ; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    addis 4, L..C5 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
-; LARGE32-NEXT:    lwz 4, L..C5 at l(4)
-; LARGE32-NEXT:    bla .__tls_get_addr[PR]
+; LARGE32-NEXT:    lwz 7, L..C4 at l(3)
+; LARGE32-NEXT:    addis 3, L..C5 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C5 at l(3)
+; LARGE32-NEXT:    bla .__tls_get_mod[PR]
+; LARGE32-NEXT:    add 3, 3, 7
 ; LARGE32-NEXT:    stw 6, 0(3)
 ; LARGE32-NEXT:    addi 1, 1, 32
 ; LARGE32-NEXT:    lwz 0, 8(1)
@@ -195,11 +197,12 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    mr 6, 3
-; SMALL64-NEXT:    ld 3, L..C4(2)
-; SMALL64-NEXT:    ld 4, L..C5(2)
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    bla .__tls_get_addr[PR]
+; SMALL64-NEXT:    mr 6, 3
+; SMALL64-NEXT:    ld 7, L..C4(2)
+; SMALL64-NEXT:    ld 3, L..C5(2)
+; SMALL64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL64-NEXT:    add 3, 3, 7
 ; SMALL64-NEXT:    stw 6, 0(3)
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -212,11 +215,12 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    mr 6, 3
 ; LARGE64-NEXT:    addis 3, L..C4 at u(2)
-; LARGE64-NEXT:    addis 4, L..C5 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 3, L..C4 at l(3)
-; LARGE64-NEXT:    ld 4, L..C5 at l(4)
-; LARGE64-NEXT:    bla .__tls_get_addr[PR]
+; LARGE64-NEXT:    ld 7, L..C4 at l(3)
+; LARGE64-NEXT:    addis 3, L..C5 at u(2)
+; LARGE64-NEXT:    ld 3, L..C5 at l(3)
+; LARGE64-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-NEXT:    add 3, 3, 7
 ; LARGE64-NEXT:    stw 6, 0(3)
 ; LARGE64-NEXT:    addi 1, 1, 48
 ; LARGE64-NEXT:    ld 0, 16(1)
@@ -467,11 +471,12 @@ define i32 @loadsTIUninit() #1 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    lwz 3, L..C4(2)
-; SMALL32-NEXT:    lwz 4, L..C5(2)
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    bla .__tls_get_addr[PR]
+; SMALL32-NEXT:    lwz 6, L..C4(2)
+; SMALL32-NEXT:    lwz 3, L..C5(2)
+; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C8(2)
+; SMALL32-NEXT:    add 3, 3, 6
 ; SMALL32-NEXT:    lwz 3, 0(3)
 ; SMALL32-NEXT:    lwz 4, 0(4)
 ; SMALL32-NEXT:    add 3, 4, 3
@@ -486,10 +491,11 @@ define i32 @loadsTIUninit() #1 {
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    addis 4, L..C5 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
-; LARGE32-NEXT:    lwz 4, L..C5 at l(4)
-; LARGE32-NEXT:    bla .__tls_get_addr[PR]
+; LARGE32-NEXT:    lwz 6, L..C4 at l(3)
+; LARGE32-NEXT:    addis 3, L..C5 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C5 at l(3)
+; LARGE32-NEXT:    bla .__tls_get_mod[PR]
+; LARGE32-NEXT:    add 3, 3, 6
 ; LARGE32-NEXT:    lwz 3, 0(3)
 ; LARGE32-NEXT:    addis 4, L..C8 at u(2)
 ; LARGE32-NEXT:    lwz 4, L..C8 at l(4)
@@ -504,11 +510,12 @@ define i32 @loadsTIUninit() #1 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    ld 3, L..C4(2)
-; SMALL64-NEXT:    ld 4, L..C5(2)
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    bla .__tls_get_addr[PR]
+; SMALL64-NEXT:    ld 6, L..C4(2)
+; SMALL64-NEXT:    ld 3, L..C5(2)
+; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C8(2)
+; SMALL64-NEXT:    add 3, 3, 6
 ; SMALL64-NEXT:    lwz 3, 0(3)
 ; SMALL64-NEXT:    lwz 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
@@ -522,14 +529,15 @@ define i32 @loadsTIUninit() #1 {
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    addis 3, L..C4 at u(2)
-; LARGE64-NEXT:    addis 4, L..C5 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 3, L..C4 at l(3)
-; LARGE64-NEXT:    ld 4, L..C5 at l(4)
-; LARGE64-NEXT:    bla .__tls_get_addr[PR]
+; LARGE64-NEXT:    ld 6, L..C4 at l(3)
+; LARGE64-NEXT:    addis 3, L..C5 at u(2)
+; LARGE64-NEXT:    ld 3, L..C5 at l(3)
+; LARGE64-NEXT:    bla .__tls_get_mod[PR]
 ; LARGE64-NEXT:    addis 4, L..C8 at u(2)
-; LARGE64-NEXT:    lwz 3, 0(3)
+; LARGE64-NEXT:    add 3, 3, 6
 ; LARGE64-NEXT:    ld 4, L..C8 at l(4)
+; LARGE64-NEXT:    lwz 3, 0(3)
 ; LARGE64-NEXT:    lwz 4, 0(4)
 ; LARGE64-NEXT:    add 3, 4, 3
 ; LARGE64-NEXT:    addi 1, 1, 48
@@ -625,12 +633,16 @@ entry:
   ret i32 %add
 }
 
-; External symbol reference checks for .__tls_get_addr
+; External symbol reference checks for .__tls_get_addr/.__tls_get_mod
 
 ; SMALL32: .extern .__tls_get_addr[PR]
+; SMALL32: .extern .__tls_get_mod[PR]
 ; SMALL64: .extern .__tls_get_addr[PR]
+; SMALL64: .extern .__tls_get_mod[PR]
 ; LARGE32: .extern .__tls_get_addr[PR]
+; LARGE32: .extern .__tls_get_mod[PR]
 ; LARGE64: .extern .__tls_get_addr[PR]
+; LARGE64: .extern .__tls_get_mod[PR]
 
 ; TOC entry checks
 
@@ -644,9 +656,10 @@ entry:
 ; SMALL32-LABEL: L..C3:
 ; SMALL32-NEXT:	 .tc TGInit[TC],TGInit[TL]@gd
 ; SMALL32-LABEL: L..C4:
-; SMALL32-NEXT:	 .tc .TIUninit[TC],TIUninit[UL]@m
+; SMALL32-NEXT:	 .tc TIUninit[TC],TIUninit[UL]@ld
 ; SMALL32-LABEL: L..C5:
-; SMALL32-NEXT:	 .tc TIUninit[TC],TIUninit[UL]@gd
+; SMALL32-NEXT:	 .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; SMALL32-NEXT:	 .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
 ; SMALL32-LABEL: L..C6:
 ; SMALL32-NEXT:	 .tc .TWUninit[TC],TWUninit[TL]@m
 ; SMALL32-LABEL: L..C7:
@@ -664,9 +677,10 @@ entry:
 ; LARGE32-LABEL: L..C3:
 ; LARGE32-NEXT:  .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE32-LABEL: L..C4:
-; LARGE32-NEXT:  .tc .TIUninit[TE],TIUninit[UL]@m
+; LARGE32-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
 ; LARGE32-LABEL: L..C5:
-; LARGE32-NEXT:  .tc TIUninit[TE],TIUninit[UL]@gd
+; LARGE32-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; LARGE32-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
 ; LARGE32-LABEL: L..C6:
 ; LARGE32-NEXT:  .tc .TWUninit[TE],TWUninit[TL]@m
 ; LARGE32-LABEL: L..C7:
@@ -684,9 +698,10 @@ entry:
 ; SMALL64-LABEL:  L..C3:
 ; SMALL64-NEXT:   .tc TGInit[TC],TGInit[TL]@gd
 ; SMALL64-LABEL:  L..C4:
-; SMALL64-NEXT:   .tc .TIUninit[TC],TIUninit[UL]@m
+; SMALL64-NEXT:   .tc TIUninit[TC],TIUninit[UL]@ld
 ; SMALL64-LABEL:  L..C5:
-; SMALL64-NEXT:   .tc TIUninit[TC],TIUninit[UL]@gd
+; SMALL64-NEXT:   .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; SMALL64-NEXT:   .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
 ; SMALL64-LABEL:  L..C6:
 ; SMALL64-NEXT:   .tc .TWUninit[TC],TWUninit[TL]@m
 ; SMALL64-LABEL:  L..C7:
@@ -704,9 +719,10 @@ entry:
 ; LARGE64-LABEL:  L..C3:
 ; LARGE64-NEXT:  .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE64-LABEL:  L..C4:
-; LARGE64-NEXT:  .tc .TIUninit[TE],TIUninit[UL]@m
+; LARGE64-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
 ; LARGE64-LABEL:  L..C5:
-; LARGE64-NEXT:  .tc TIUninit[TE],TIUninit[UL]@gd
+; LARGE64-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; LARGE64-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
 ; LARGE64-LABEL:  L..C6:
 ; LARGE64-NEXT:  .tc .TWUninit[TE],TWUninit[TL]@m
 ; LARGE64-LABEL:  L..C7:
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll
index 47813b59ba804c..d19b5ad9b5e50a 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll
@@ -97,12 +97,13 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    mr 6, 4
-; SMALL32-NEXT:    mr 7, 3
-; SMALL32-NEXT:    lwz 3, L..C2(2)
-; SMALL32-NEXT:    lwz 4, L..C3(2)
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    bla .__tls_get_addr[PR]
+; SMALL32-NEXT:    mr 7, 3
+; SMALL32-NEXT:    lwz 8, L..C2(2)
+; SMALL32-NEXT:    mr 6, 4
+; SMALL32-NEXT:    lwz 3, L..C3(2)
+; SMALL32-NEXT:    bla .__tls_get_mod[PR]
+; SMALL32-NEXT:    add 3, 3, 8
 ; SMALL32-NEXT:    stw 6, 4(3)
 ; SMALL32-NEXT:    stw 7, 0(3)
 ; SMALL32-NEXT:    addi 1, 1, 32
@@ -115,13 +116,14 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; LARGE32-NEXT:    mflr 0
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
-; LARGE32-NEXT:    mr 6, 4
 ; LARGE32-NEXT:    mr 7, 3
+; LARGE32-NEXT:    mr 6, 4
 ; LARGE32-NEXT:    addis 3, L..C2 at u(2)
-; LARGE32-NEXT:    addis 4, L..C3 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C2 at l(3)
-; LARGE32-NEXT:    lwz 4, L..C3 at l(4)
-; LARGE32-NEXT:    bla .__tls_get_addr[PR]
+; LARGE32-NEXT:    lwz 8, L..C2 at l(3)
+; LARGE32-NEXT:    addis 3, L..C3 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C3 at l(3)
+; LARGE32-NEXT:    bla .__tls_get_mod[PR]
+; LARGE32-NEXT:    add 3, 3, 8
 ; LARGE32-NEXT:    stw 6, 4(3)
 ; LARGE32-NEXT:    stw 7, 0(3)
 ; LARGE32-NEXT:    addi 1, 1, 32
@@ -133,11 +135,12 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    mr 6, 3
-; SMALL64-NEXT:    ld 3, L..C2(2)
-; SMALL64-NEXT:    ld 4, L..C3(2)
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    bla .__tls_get_addr[PR]
+; SMALL64-NEXT:    mr 6, 3
+; SMALL64-NEXT:    ld 7, L..C2(2)
+; SMALL64-NEXT:    ld 3, L..C3(2)
+; SMALL64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL64-NEXT:    add 3, 3, 7
 ; SMALL64-NEXT:    std 6, 0(3)
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -150,11 +153,12 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    mr 6, 3
 ; LARGE64-NEXT:    addis 3, L..C2 at u(2)
-; LARGE64-NEXT:    addis 4, L..C3 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 3, L..C2 at l(3)
-; LARGE64-NEXT:    ld 4, L..C3 at l(4)
-; LARGE64-NEXT:    bla .__tls_get_addr[PR]
+; LARGE64-NEXT:    ld 7, L..C2 at l(3)
+; LARGE64-NEXT:    addis 3, L..C3 at u(2)
+; LARGE64-NEXT:    ld 3, L..C3 at l(3)
+; LARGE64-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-NEXT:    add 3, 3, 7
 ; LARGE64-NEXT:    std 6, 0(3)
 ; LARGE64-NEXT:    addi 1, 1, 48
 ; LARGE64-NEXT:    ld 0, 16(1)
@@ -171,12 +175,13 @@ define void @storesTIInit(i64 %Val) #0 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    mr 6, 4
-; SMALL32-NEXT:    mr 7, 3
-; SMALL32-NEXT:    lwz 3, L..C4(2)
-; SMALL32-NEXT:    lwz 4, L..C5(2)
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    bla .__tls_get_addr[PR]
+; SMALL32-NEXT:    mr 7, 3
+; SMALL32-NEXT:    lwz 8, L..C4(2)
+; SMALL32-NEXT:    mr 6, 4
+; SMALL32-NEXT:    lwz 3, L..C3(2)
+; SMALL32-NEXT:    bla .__tls_get_mod[PR]
+; SMALL32-NEXT:    add 3, 3, 8
 ; SMALL32-NEXT:    stw 6, 4(3)
 ; SMALL32-NEXT:    stw 7, 0(3)
 ; SMALL32-NEXT:    addi 1, 1, 32
@@ -189,13 +194,14 @@ define void @storesTIInit(i64 %Val) #0 {
 ; LARGE32-NEXT:    mflr 0
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
-; LARGE32-NEXT:    mr 6, 4
 ; LARGE32-NEXT:    mr 7, 3
+; LARGE32-NEXT:    mr 6, 4
 ; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    addis 4, L..C5 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
-; LARGE32-NEXT:    lwz 4, L..C5 at l(4)
-; LARGE32-NEXT:    bla .__tls_get_addr[PR]
+; LARGE32-NEXT:    lwz 8, L..C4 at l(3)
+; LARGE32-NEXT:    addis 3, L..C3 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C3 at l(3)
+; LARGE32-NEXT:    bla .__tls_get_mod[PR]
+; LARGE32-NEXT:    add 3, 3, 8
 ; LARGE32-NEXT:    stw 6, 4(3)
 ; LARGE32-NEXT:    stw 7, 0(3)
 ; LARGE32-NEXT:    addi 1, 1, 32
@@ -207,11 +213,12 @@ define void @storesTIInit(i64 %Val) #0 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    mr 6, 3
-; SMALL64-NEXT:    ld 3, L..C4(2)
-; SMALL64-NEXT:    ld 4, L..C5(2)
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    bla .__tls_get_addr[PR]
+; SMALL64-NEXT:    mr 6, 3
+; SMALL64-NEXT:    ld 7, L..C4(2)
+; SMALL64-NEXT:    ld 3, L..C3(2)
+; SMALL64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL64-NEXT:    add 3, 3, 7
 ; SMALL64-NEXT:    std 6, 0(3)
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -224,11 +231,12 @@ define void @storesTIInit(i64 %Val) #0 {
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    mr 6, 3
 ; LARGE64-NEXT:    addis 3, L..C4 at u(2)
-; LARGE64-NEXT:    addis 4, L..C5 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 3, L..C4 at l(3)
-; LARGE64-NEXT:    ld 4, L..C5 at l(4)
-; LARGE64-NEXT:    bla .__tls_get_addr[PR]
+; LARGE64-NEXT:    ld 7, L..C4 at l(3)
+; LARGE64-NEXT:    addis 3, L..C3 at u(2)
+; LARGE64-NEXT:    ld 3, L..C3 at l(3)
+; LARGE64-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-NEXT:    add 3, 3, 7
 ; LARGE64-NEXT:    std 6, 0(3)
 ; LARGE64-NEXT:    addi 1, 1, 48
 ; LARGE64-NEXT:    ld 0, 16(1)
@@ -247,8 +255,8 @@ define void @storesTWInit(i64 %Val) #0 {
 ; SMALL32-NEXT:    stwu 1, -32(1)
 ; SMALL32-NEXT:    mr 6, 4
 ; SMALL32-NEXT:    mr 7, 3
-; SMALL32-NEXT:    lwz 3, L..C6(2)
-; SMALL32-NEXT:    lwz 4, L..C7(2)
+; SMALL32-NEXT:    lwz 3, L..C5(2)
+; SMALL32-NEXT:    lwz 4, L..C6(2)
 ; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL32-NEXT:    stw 6, 4(3)
@@ -265,10 +273,10 @@ define void @storesTWInit(i64 %Val) #0 {
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    mr 6, 4
 ; LARGE32-NEXT:    mr 7, 3
-; LARGE32-NEXT:    addis 3, L..C6 at u(2)
-; LARGE32-NEXT:    addis 4, L..C7 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C6 at l(3)
-; LARGE32-NEXT:    lwz 4, L..C7 at l(4)
+; LARGE32-NEXT:    addis 3, L..C5 at u(2)
+; LARGE32-NEXT:    addis 4, L..C6 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C5 at l(3)
+; LARGE32-NEXT:    lwz 4, L..C6 at l(4)
 ; LARGE32-NEXT:    bla .__tls_get_addr[PR]
 ; LARGE32-NEXT:    stw 6, 4(3)
 ; LARGE32-NEXT:    stw 7, 0(3)
@@ -282,8 +290,8 @@ define void @storesTWInit(i64 %Val) #0 {
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
 ; SMALL64-NEXT:    mr 6, 3
-; SMALL64-NEXT:    ld 3, L..C6(2)
-; SMALL64-NEXT:    ld 4, L..C7(2)
+; SMALL64-NEXT:    ld 3, L..C5(2)
+; SMALL64-NEXT:    ld 4, L..C6(2)
 ; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL64-NEXT:    std 6, 0(3)
@@ -297,11 +305,11 @@ define void @storesTWInit(i64 %Val) #0 {
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    mr 6, 3
-; LARGE64-NEXT:    addis 3, L..C6 at u(2)
-; LARGE64-NEXT:    addis 4, L..C7 at u(2)
+; LARGE64-NEXT:    addis 3, L..C5 at u(2)
+; LARGE64-NEXT:    addis 4, L..C6 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 3, L..C6 at l(3)
-; LARGE64-NEXT:    ld 4, L..C7 at l(4)
+; LARGE64-NEXT:    ld 3, L..C5 at l(3)
+; LARGE64-NEXT:    ld 4, L..C6 at l(4)
 ; LARGE64-NEXT:    bla .__tls_get_addr[PR]
 ; LARGE64-NEXT:    std 6, 0(3)
 ; LARGE64-NEXT:    addi 1, 1, 48
@@ -323,7 +331,7 @@ define i64 @loadsTGInit() #1 {
 ; SMALL32-NEXT:    lwz 4, L..C1(2)
 ; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_addr[PR]
-; SMALL32-NEXT:    lwz 4, L..C8(2)
+; SMALL32-NEXT:    lwz 4, L..C7(2)
 ; SMALL32-NEXT:    lwz 5, 4(3)
 ; SMALL32-NEXT:    lwz 6, 4(4)
 ; SMALL32-NEXT:    lwz 3, 0(3)
@@ -347,8 +355,8 @@ define i64 @loadsTGInit() #1 {
 ; LARGE32-NEXT:    bla .__tls_get_addr[PR]
 ; LARGE32-NEXT:    lwz 4, 4(3)
 ; LARGE32-NEXT:    lwz 3, 0(3)
-; LARGE32-NEXT:    addis 5, L..C8 at u(2)
-; LARGE32-NEXT:    lwz 5, L..C8 at l(5)
+; LARGE32-NEXT:    addis 5, L..C7 at u(2)
+; LARGE32-NEXT:    lwz 5, L..C7 at l(5)
 ; LARGE32-NEXT:    lwz 6, 4(5)
 ; LARGE32-NEXT:    lwz 5, 0(5)
 ; LARGE32-NEXT:    addc 4, 6, 4
@@ -366,7 +374,7 @@ define i64 @loadsTGInit() #1 {
 ; SMALL64-NEXT:    ld 4, L..C1(2)
 ; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_addr[PR]
-; SMALL64-NEXT:    ld 4, L..C8(2)
+; SMALL64-NEXT:    ld 4, L..C7(2)
 ; SMALL64-NEXT:    ld 3, 0(3)
 ; SMALL64-NEXT:    ld 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
@@ -385,9 +393,9 @@ define i64 @loadsTGInit() #1 {
 ; LARGE64-NEXT:    ld 3, L..C0 at l(3)
 ; LARGE64-NEXT:    ld 4, L..C1 at l(4)
 ; LARGE64-NEXT:    bla .__tls_get_addr[PR]
-; LARGE64-NEXT:    addis 4, L..C8 at u(2)
+; LARGE64-NEXT:    addis 4, L..C7 at u(2)
 ; LARGE64-NEXT:    ld 3, 0(3)
-; LARGE64-NEXT:    ld 4, L..C8 at l(4)
+; LARGE64-NEXT:    ld 4, L..C7 at l(4)
 ; LARGE64-NEXT:    ld 4, 0(4)
 ; LARGE64-NEXT:    add 3, 4, 3
 ; LARGE64-NEXT:    addi 1, 1, 48
@@ -407,11 +415,12 @@ define i64 @loadsTIUninit() #1 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    lwz 3, L..C2(2)
-; SMALL32-NEXT:    lwz 4, L..C3(2)
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    bla .__tls_get_addr[PR]
-; SMALL32-NEXT:    lwz 4, L..C8(2)
+; SMALL32-NEXT:    lwz 6, L..C2(2)
+; SMALL32-NEXT:    lwz 3, L..C3(2)
+; SMALL32-NEXT:    bla .__tls_get_mod[PR]
+; SMALL32-NEXT:    lwz 4, L..C7(2)
+; SMALL32-NEXT:    add 3, 3, 6
 ; SMALL32-NEXT:    lwz 5, 4(3)
 ; SMALL32-NEXT:    lwz 6, 4(4)
 ; SMALL32-NEXT:    lwz 3, 0(3)
@@ -429,14 +438,15 @@ define i64 @loadsTIUninit() #1 {
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    addis 3, L..C2 at u(2)
-; LARGE32-NEXT:    addis 4, L..C3 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C2 at l(3)
-; LARGE32-NEXT:    lwz 4, L..C3 at l(4)
-; LARGE32-NEXT:    bla .__tls_get_addr[PR]
+; LARGE32-NEXT:    lwz 6, L..C2 at l(3)
+; LARGE32-NEXT:    addis 3, L..C3 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C3 at l(3)
+; LARGE32-NEXT:    bla .__tls_get_mod[PR]
+; LARGE32-NEXT:    add 3, 3, 6
 ; LARGE32-NEXT:    lwz 4, 4(3)
 ; LARGE32-NEXT:    lwz 3, 0(3)
-; LARGE32-NEXT:    addis 5, L..C8 at u(2)
-; LARGE32-NEXT:    lwz 5, L..C8 at l(5)
+; LARGE32-NEXT:    addis 5, L..C7 at u(2)
+; LARGE32-NEXT:    lwz 5, L..C7 at l(5)
 ; LARGE32-NEXT:    lwz 6, 4(5)
 ; LARGE32-NEXT:    lwz 5, 0(5)
 ; LARGE32-NEXT:    addc 4, 6, 4
@@ -450,11 +460,12 @@ define i64 @loadsTIUninit() #1 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    ld 3, L..C2(2)
-; SMALL64-NEXT:    ld 4, L..C3(2)
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    bla .__tls_get_addr[PR]
-; SMALL64-NEXT:    ld 4, L..C8(2)
+; SMALL64-NEXT:    ld 6, L..C2(2)
+; SMALL64-NEXT:    ld 3, L..C3(2)
+; SMALL64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL64-NEXT:    ld 4, L..C7(2)
+; SMALL64-NEXT:    add 3, 3, 6
 ; SMALL64-NEXT:    ld 3, 0(3)
 ; SMALL64-NEXT:    ld 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
@@ -468,14 +479,15 @@ define i64 @loadsTIUninit() #1 {
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    addis 3, L..C2 at u(2)
-; LARGE64-NEXT:    addis 4, L..C3 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 3, L..C2 at l(3)
-; LARGE64-NEXT:    ld 4, L..C3 at l(4)
-; LARGE64-NEXT:    bla .__tls_get_addr[PR]
-; LARGE64-NEXT:    addis 4, L..C8 at u(2)
+; LARGE64-NEXT:    ld 6, L..C2 at l(3)
+; LARGE64-NEXT:    addis 3, L..C3 at u(2)
+; LARGE64-NEXT:    ld 3, L..C3 at l(3)
+; LARGE64-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-NEXT:    addis 4, L..C7 at u(2)
+; LARGE64-NEXT:    add 3, 3, 6
+; LARGE64-NEXT:    ld 4, L..C7 at l(4)
 ; LARGE64-NEXT:    ld 3, 0(3)
-; LARGE64-NEXT:    ld 4, L..C8 at l(4)
 ; LARGE64-NEXT:    ld 4, 0(4)
 ; LARGE64-NEXT:    add 3, 4, 3
 ; LARGE64-NEXT:    addi 1, 1, 48
@@ -495,11 +507,12 @@ define i64 @loadsTIInit() #1 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    lwz 3, L..C4(2)
-; SMALL32-NEXT:    lwz 4, L..C5(2)
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    bla .__tls_get_addr[PR]
-; SMALL32-NEXT:    lwz 4, L..C8(2)
+; SMALL32-NEXT:    lwz 6, L..C4(2)
+; SMALL32-NEXT:    lwz 3, L..C3(2)
+; SMALL32-NEXT:    bla .__tls_get_mod[PR]
+; SMALL32-NEXT:    lwz 4, L..C7(2)
+; SMALL32-NEXT:    add 3, 3, 6
 ; SMALL32-NEXT:    lwz 5, 4(3)
 ; SMALL32-NEXT:    lwz 6, 4(4)
 ; SMALL32-NEXT:    lwz 3, 0(3)
@@ -517,14 +530,15 @@ define i64 @loadsTIInit() #1 {
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    addis 4, L..C5 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
-; LARGE32-NEXT:    lwz 4, L..C5 at l(4)
-; LARGE32-NEXT:    bla .__tls_get_addr[PR]
+; LARGE32-NEXT:    lwz 6, L..C4 at l(3)
+; LARGE32-NEXT:    addis 3, L..C3 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C3 at l(3)
+; LARGE32-NEXT:    bla .__tls_get_mod[PR]
+; LARGE32-NEXT:    add 3, 3, 6
 ; LARGE32-NEXT:    lwz 4, 4(3)
 ; LARGE32-NEXT:    lwz 3, 0(3)
-; LARGE32-NEXT:    addis 5, L..C8 at u(2)
-; LARGE32-NEXT:    lwz 5, L..C8 at l(5)
+; LARGE32-NEXT:    addis 5, L..C7 at u(2)
+; LARGE32-NEXT:    lwz 5, L..C7 at l(5)
 ; LARGE32-NEXT:    lwz 6, 4(5)
 ; LARGE32-NEXT:    lwz 5, 0(5)
 ; LARGE32-NEXT:    addc 4, 6, 4
@@ -538,11 +552,12 @@ define i64 @loadsTIInit() #1 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    ld 3, L..C4(2)
-; SMALL64-NEXT:    ld 4, L..C5(2)
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    bla .__tls_get_addr[PR]
-; SMALL64-NEXT:    ld 4, L..C8(2)
+; SMALL64-NEXT:    ld 6, L..C4(2)
+; SMALL64-NEXT:    ld 3, L..C3(2)
+; SMALL64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL64-NEXT:    ld 4, L..C7(2)
+; SMALL64-NEXT:    add 3, 3, 6
 ; SMALL64-NEXT:    ld 3, 0(3)
 ; SMALL64-NEXT:    ld 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
@@ -556,14 +571,15 @@ define i64 @loadsTIInit() #1 {
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    addis 3, L..C4 at u(2)
-; LARGE64-NEXT:    addis 4, L..C5 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 3, L..C4 at l(3)
-; LARGE64-NEXT:    ld 4, L..C5 at l(4)
-; LARGE64-NEXT:    bla .__tls_get_addr[PR]
-; LARGE64-NEXT:    addis 4, L..C8 at u(2)
+; LARGE64-NEXT:    ld 6, L..C4 at l(3)
+; LARGE64-NEXT:    addis 3, L..C3 at u(2)
+; LARGE64-NEXT:    ld 3, L..C3 at l(3)
+; LARGE64-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-NEXT:    addis 4, L..C7 at u(2)
+; LARGE64-NEXT:    add 3, 3, 6
+; LARGE64-NEXT:    ld 4, L..C7 at l(4)
 ; LARGE64-NEXT:    ld 3, 0(3)
-; LARGE64-NEXT:    ld 4, L..C8 at l(4)
 ; LARGE64-NEXT:    ld 4, 0(4)
 ; LARGE64-NEXT:    add 3, 4, 3
 ; LARGE64-NEXT:    addi 1, 1, 48
@@ -583,11 +599,11 @@ define i64 @loadsTWInit() #1 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    lwz 3, L..C6(2)
-; SMALL32-NEXT:    lwz 4, L..C7(2)
+; SMALL32-NEXT:    lwz 3, L..C5(2)
+; SMALL32-NEXT:    lwz 4, L..C6(2)
 ; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_addr[PR]
-; SMALL32-NEXT:    lwz 4, L..C8(2)
+; SMALL32-NEXT:    lwz 4, L..C7(2)
 ; SMALL32-NEXT:    lwz 5, 4(3)
 ; SMALL32-NEXT:    lwz 6, 4(4)
 ; SMALL32-NEXT:    lwz 3, 0(3)
@@ -604,15 +620,15 @@ define i64 @loadsTWInit() #1 {
 ; LARGE32-NEXT:    mflr 0
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
-; LARGE32-NEXT:    addis 3, L..C6 at u(2)
-; LARGE32-NEXT:    addis 4, L..C7 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C6 at l(3)
-; LARGE32-NEXT:    lwz 4, L..C7 at l(4)
+; LARGE32-NEXT:    addis 3, L..C5 at u(2)
+; LARGE32-NEXT:    addis 4, L..C6 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C5 at l(3)
+; LARGE32-NEXT:    lwz 4, L..C6 at l(4)
 ; LARGE32-NEXT:    bla .__tls_get_addr[PR]
 ; LARGE32-NEXT:    lwz 4, 4(3)
 ; LARGE32-NEXT:    lwz 3, 0(3)
-; LARGE32-NEXT:    addis 5, L..C8 at u(2)
-; LARGE32-NEXT:    lwz 5, L..C8 at l(5)
+; LARGE32-NEXT:    addis 5, L..C7 at u(2)
+; LARGE32-NEXT:    lwz 5, L..C7 at l(5)
 ; LARGE32-NEXT:    lwz 6, 4(5)
 ; LARGE32-NEXT:    lwz 5, 0(5)
 ; LARGE32-NEXT:    addc 4, 6, 4
@@ -626,11 +642,11 @@ define i64 @loadsTWInit() #1 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    ld 3, L..C6(2)
-; SMALL64-NEXT:    ld 4, L..C7(2)
+; SMALL64-NEXT:    ld 3, L..C5(2)
+; SMALL64-NEXT:    ld 4, L..C6(2)
 ; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_addr[PR]
-; SMALL64-NEXT:    ld 4, L..C8(2)
+; SMALL64-NEXT:    ld 4, L..C7(2)
 ; SMALL64-NEXT:    ld 3, 0(3)
 ; SMALL64-NEXT:    ld 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
@@ -643,15 +659,15 @@ define i64 @loadsTWInit() #1 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
-; LARGE64-NEXT:    addis 3, L..C6 at u(2)
-; LARGE64-NEXT:    addis 4, L..C7 at u(2)
+; LARGE64-NEXT:    addis 3, L..C5 at u(2)
+; LARGE64-NEXT:    addis 4, L..C6 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 3, L..C6 at l(3)
-; LARGE64-NEXT:    ld 4, L..C7 at l(4)
+; LARGE64-NEXT:    ld 3, L..C5 at l(3)
+; LARGE64-NEXT:    ld 4, L..C6 at l(4)
 ; LARGE64-NEXT:    bla .__tls_get_addr[PR]
-; LARGE64-NEXT:    addis 4, L..C8 at u(2)
+; LARGE64-NEXT:    addis 4, L..C7 at u(2)
 ; LARGE64-NEXT:    ld 3, 0(3)
-; LARGE64-NEXT:    ld 4, L..C8 at l(4)
+; LARGE64-NEXT:    ld 4, L..C7 at l(4)
 ; LARGE64-NEXT:    ld 4, 0(4)
 ; LARGE64-NEXT:    add 3, 4, 3
 ; LARGE64-NEXT:    addi 1, 1, 48
@@ -665,12 +681,16 @@ entry:
   ret i64 %add
 }
 
-; External symbol reference checks for .__tls_get_addr
+; External symbol reference checks for .__tls_get_addr/.__tls_get_mod
 
 ; SMALL32: .extern .__tls_get_addr[PR]
+; SMALL32: .extern .__tls_get_mod[PR]
 ; SMALL64: .extern .__tls_get_addr[PR]
+; SMALL64: .extern .__tls_get_mod[PR]
 ; LARGE32: .extern .__tls_get_addr[PR]
+; LARGE32: .extern .__tls_get_mod[PR]
 ; LARGE64: .extern .__tls_get_addr[PR]
+; LARGE64: .extern .__tls_get_mod[PR]
 
 ; TOC entry checks
 
@@ -680,18 +700,17 @@ entry:
 ; SMALL32-LABEL:  L..C1:
 ; SMALL32-NEXT:  .tc TGInit[TC],TGInit[TL]@gd
 ; SMALL32-LABEL:  L..C2:
-; SMALL32-NEXT:  .tc .TIUninit[TC],TIUninit[UL]@m
+; SMALL32-NEXT:  .tc TIUninit[TC],TIUninit[UL]@ld
 ; SMALL32-LABEL:  L..C3:
-; SMALL32-NEXT:  .tc TIUninit[TC],TIUninit[UL]@gd
+; SMALL32-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; SMALL32-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
 ; SMALL32-LABEL:  L..C4:
-; SMALL32-NEXT:  .tc .TIInit[TC],TIInit[TL]@m
+; SMALL32-NEXT:  .tc TIInit[TC],TIInit[TL]@ld
 ; SMALL32-LABEL:  L..C5:
-; SMALL32-NEXT:  .tc TIInit[TC],TIInit[TL]@gd
-; SMALL32-LABEL:  L..C6:
 ; SMALL32-NEXT:  .tc .TWInit[TC],TWInit[TL]@m
-; SMALL32-LABEL:  L..C7:
+; SMALL32-LABEL:  L..C6:
 ; SMALL32-NEXT:  .tc TWInit[TC],TWInit[TL]@gd
-; SMALL32-LABEL:  L..C8:
+; SMALL32-LABEL:  L..C7:
 ; SMALL32-NEXT:  .tc GInit[TC],GInit[RW]
 
 ; LARGE32-LABEL:  .toc
@@ -700,18 +719,17 @@ entry:
 ; LARGE32-LABEL:  L..C1:
 ; LARGE32-NEXT:  .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE32-LABEL:  L..C2:
-; LARGE32-NEXT:  .tc .TIUninit[TE],TIUninit[UL]@m
+; LARGE32-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
 ; LARGE32-LABEL:  L..C3:
-; LARGE32-NEXT:  .tc TIUninit[TE],TIUninit[UL]@gd
+; LARGE32-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; LARGE32-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
 ; LARGE32-LABEL:  L..C4:
-; LARGE32-NEXT:  .tc .TIInit[TE],TIInit[TL]@m
+; LARGE32-NEXT:  .tc TIInit[TE],TIInit[TL]@ld
 ; LARGE32-LABEL:  L..C5:
-; LARGE32-NEXT:  .tc TIInit[TE],TIInit[TL]@gd
-; LARGE32-LABEL:  L..C6:
 ; LARGE32-NEXT:  .tc .TWInit[TE],TWInit[TL]@m
-; LARGE32-LABEL:  L..C7:
+; LARGE32-LABEL:  L..C6:
 ; LARGE32-NEXT:  .tc TWInit[TE],TWInit[TL]@gd
-; LARGE32-LABEL:  L..C8:
+; LARGE32-LABEL:  L..C7:
 ; LARGE32-NEXT:  .tc GInit[TE],GInit[RW]
 
 ; SMALL64-LABEL:  .toc
@@ -720,18 +738,17 @@ entry:
 ; SMALL64-LABEL:  L..C1:
 ; SMALL64-NEXT:  .tc TGInit[TC],TGInit[TL]@gd
 ; SMALL64-LABEL:  L..C2:
-; SMALL64-NEXT:  .tc .TIUninit[TC],TIUninit[UL]@m
+; SMALL64-NEXT:  .tc TIUninit[TC],TIUninit[UL]@ld
 ; SMALL64-LABEL:  L..C3:
-; SMALL64-NEXT:  .tc TIUninit[TC],TIUninit[UL]@gd
+; SMALL64-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; SMALL64-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
 ; SMALL64-LABEL:  L..C4:
-; SMALL64-NEXT:  .tc .TIInit[TC],TIInit[TL]@m
+; SMALL64-NEXT:  .tc TIInit[TC],TIInit[TL]@ld
 ; SMALL64-LABEL:  L..C5:
-; SMALL64-NEXT:  .tc TIInit[TC],TIInit[TL]@gd
-; SMALL64-LABEL:  L..C6:
 ; SMALL64-NEXT:  .tc .TWInit[TC],TWInit[TL]@m
-; SMALL64-LABEL:  L..C7:
+; SMALL64-LABEL:  L..C6:
 ; SMALL64-NEXT:  .tc TWInit[TC],TWInit[TL]@gd
-; SMALL64-LABEL:  L..C8:
+; SMALL64-LABEL:  L..C7:
 ; SMALL64-NEXT:  .tc GInit[TC],GInit[RW]
 
 ; LARGE64-LABEL:  .toc
@@ -740,18 +757,17 @@ entry:
 ; LARGE64-LABEL:  L..C1:
 ; LARGE64-NEXT:  .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE64-LABEL:  L..C2:
-; LARGE64-NEXT:  .tc .TIUninit[TE],TIUninit[UL]@m
+; LARGE64-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
 ; LARGE64-LABEL:  L..C3:
-; LARGE64-NEXT:  .tc TIUninit[TE],TIUninit[UL]@gd
+; LARGE64-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; LARGE64-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
 ; LARGE64-LABEL:  L..C4:
-; LARGE64-NEXT:  .tc .TIInit[TE],TIInit[TL]@m
+; LARGE64-NEXT:  .tc TIInit[TE],TIInit[TL]@ld
 ; LARGE64-LABEL:  L..C5:
-; LARGE64-NEXT:  .tc TIInit[TE],TIInit[TL]@gd
-; LARGE64-LABEL:  L..C6:
 ; LARGE64-NEXT:  .tc .TWInit[TE],TWInit[TL]@m
-; LARGE64-LABEL:  L..C7:
+; LARGE64-LABEL:  L..C6:
 ; LARGE64-NEXT:  .tc TWInit[TE],TWInit[TL]@gd
-; LARGE64-LABEL:  L..C8:
+; LARGE64-LABEL:  L..C7:
 ; LARGE64-NEXT:  .tc GInit[TE],GInit[RW]
 
 attributes #0 = { nofree norecurse nounwind willreturn writeonly "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-spe,-vsx" }
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll b/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll
new file mode 100644
index 00000000000000..9faa99b6eb2ba7
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll
@@ -0,0 +1,364 @@
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec -mtriple powerpc64-ibm-aix-xcoff \
+; RUN:     --code-model=small < %s | FileCheck %s --check-prefixes=SMALL64,SMALL
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec -mtriple powerpc64-ibm-aix-xcoff \
+; RUN:     --code-model=large < %s | FileCheck %s --check-prefixes=LARGE64,LARGE
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec -mtriple powerpc-ibm-aix-xcoff \
+; RUN:     --code-model=small < %s | FileCheck %s --check-prefixes=SMALL32,SMALL
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec -mtriple powerpc-ibm-aix-xcoff \
+; RUN:     --code-model=large < %s | FileCheck %s --check-prefixes=LARGE32,LARGE
+
+ at TGInit = thread_local(localdynamic) global i32 42, align 4
+ at TGUninit = thread_local(localdynamic) global i32 0, align 4
+ at TIInit = internal thread_local(localdynamic) global i32 42, align 4
+ at TIUninit = internal thread_local(localdynamic) global i32 0, align 4
+ at TWInit = weak thread_local(localdynamic) global i32 42, align 4
+ at TWUninit = weak thread_local(localdynamic) global i32 0, align 4
+
+define i32 @loadTGInit() {
+; SMALL-LABEL:  loadTGInit:
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
+; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL:        bla .__tls_get_mod[PR]
+; SMALL:        add [[TGInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; SMALL:        lwz [[TGInitValR:[0-9]+]], 0([[TGInitAddrR]])
+;
+; LARGE-LABEL:  loadTGInit:
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        add [[TGInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; LARGE:        lwz [[TGInitValR:[0-9]+]], 0([[TGInitAddrR]])
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TGInit)
+  %1 = load i32, ptr %0, align 4
+  ret i32 %1
+}
+
+define void @storeTGInit(i32 noundef signext %i) {
+; SMALL-LABEL:  storeTGInit:
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
+; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL:        bla .__tls_get_mod[PR]
+; SMALL:        add [[TGInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; SMALL:        stw [[TGInitValR:[0-9]+]], 0([[TGInitAddrR]])
+;
+; LARGE-LABEL:  storeTGInit:
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        add [[TGInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; LARGE:        stw [[TGInitValR:[0-9]+]], 0([[TGInitAddrR]])
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TGInit)
+  store i32 %i, ptr %0, align 4
+  ret void
+}
+
+define i32 @loadTGUninit() {
+; SMALL-LABEL:  loadTGUninit:
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
+; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL:        bla .__tls_get_mod[PR]
+; SMALL:        add [[TGUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; SMALL:        lwz [[TGUninitValR:[0-9]+]], 0([[TGUninitAddrR]])
+;
+; LARGE-LABEL:  loadTGUninit:
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        add [[TGUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; LARGE:        lwz [[TGUninitValR:[0-9]+]], 0([[TGUninitAddrR]])
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TGUninit)
+  %1 = load i32, ptr %0, align 4
+  ret i32 %1
+}
+
+define void @storeTGUninit(i32 noundef signext %i) {
+; SMALL-LABEL:  storeTGUninit:
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
+; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL:        bla .__tls_get_mod[PR]
+; SMALL:        add [[TGUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; SMALL:        stw [[TGUninitValR:[0-9]+]], 0([[TGUninitAddrR]])
+;
+; LARGE-LABEL:  storeTGUninit:
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        add [[TGUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; LARGE:        stw [[TGUninitValR:[0-9]+]], 0([[TGUninitAddrR]])
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TGUninit)
+  store i32 %i, ptr %0, align 4
+  ret void
+}
+
+define i32 @loadTIInit() {
+; SMALL-LABEL:  loadTIInit:
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
+; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL:        bla .__tls_get_mod[PR]
+; SMALL:        add [[TIInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; SMALL:        lwz [[TIInitValR:[0-9]+]], 0([[TIInitAddrR]])
+;
+; LARGE-LABEL:  loadTIInit:
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        add [[TIInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; LARGE:        lwz [[TIInitValR:[0-9]+]], 0([[TIInitAddrR]])
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TIInit)
+  %1 = load i32, ptr %0, align 4
+  ret i32 %1
+}
+
+define void @storeTIInit(i32 noundef signext %i) {
+; SMALL-LABEL:  storeTIInit:
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
+; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL:        bla .__tls_get_mod[PR]
+; SMALL:        add [[TIInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; SMALL:        stw [[TIInitValR:[0-9]+]], 0([[TIInitAddrR]])
+;
+; LARGE-LABEL:  storeTIInit:
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        add [[TIInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; LARGE:        stw [[TIInitValR:[0-9]+]], 0([[TIInitAddrR]])
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TIInit)
+  store i32 %i, ptr %0, align 4
+  ret void
+}
+
+define i32 @loadTIUninit() {
+; SMALL-LABEL:  loadTIUninit:
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
+; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL:        bla .__tls_get_mod[PR]
+; SMALL:        add [[TIUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; SMALL:        lwz [[TIUninitValR:[0-9]+]], 0([[TIUninitAddrR]])
+;
+; LARGE-LABEL:  loadTIUninit:
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        add [[TIUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; LARGE:        lwz [[TIUninitValR:[0-9]+]], 0([[TIUninitAddrR]])
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TIUninit)
+  %1 = load i32, ptr %0, align 4
+  ret i32 %1
+}
+
+define void @storeTIUninit(i32 noundef signext %i) {
+; SMALL-LABEL:  storeTIUninit:
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
+; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL:        bla .__tls_get_mod[PR]
+; SMALL:        add [[TIUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; SMALL:        stw [[TIUninitValR:[0-9]+]], 0([[TIUninitAddrR]])
+;
+; LARGE-LABEL:  storeTIUninit:
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        add [[TIUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; LARGE:        stw [[TIUninitValR:[0-9]+]], 0([[TIUninitAddrR]])
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TIUninit)
+  store i32 %i, ptr %0, align 4
+  ret void
+}
+
+define i32 @loadTWInit() {
+; SMALL-LABEL:  loadTWInit:
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
+; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL:        bla .__tls_get_mod[PR]
+; SMALL:        add [[TWInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; SMALL:        lwz [[TWInitValR:[0-9]+]], 0([[TWInitAddrR]])
+;
+; LARGE-LABEL:  loadTWInit:
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        add [[TWInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; LARGE:        lwz [[TWInitValR:[0-9]+]], 0([[TWInitAddrR]])
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TWInit)
+  %1 = load i32, ptr %0, align 4
+  ret i32 %1
+}
+
+define void @storeTWInit(i32 noundef signext %i) {
+; SMALL-LABEL:  storeTWInit:
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
+; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL:        bla .__tls_get_mod[PR]
+; SMALL:        add [[TWInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; SMALL:        stw [[TWInitValR:[0-9]+]], 0([[TWInitAddrR]])
+;
+; LARGE-LABEL:  storeTWInit:
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        add [[TWInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; LARGE:        stw [[TWInitValR:[0-9]+]], 0([[TWInitAddrR]])
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TWInit)
+  store i32 %i, ptr %0, align 4
+  ret void
+}
+
+define i32 @loadTWUninit() {
+; SMALL-LABEL:  loadTWUninit:
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
+; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL:        bla .__tls_get_mod[PR]
+; SMALL:        add [[TWUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; SMALL:        lwz [[TWUninitValR:[0-9]+]], 0([[TWUninitAddrR]])
+;
+; LARGE-LABEL:  loadTWUninit:
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        add [[TWUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; LARGE:        lwz [[TWUninitValR:[0-9]+]], 0([[TWUninitAddrR]])
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TWUninit)
+  %1 = load i32, ptr %0, align 4
+  ret i32 %1
+}
+
+define void @storeTWUninit(i32 noundef signext %i) {
+; SMALL-LABEL:  storeTWUninit:
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
+; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; SMALL:        bla .__tls_get_mod[PR]
+; SMALL:        add [[TWUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; SMALL:        stw [[TWUninitValR:[0-9]+]], 0([[TWUninitAddrR]])
+;
+; LARGE-LABEL:  storeTWUninit:
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
+; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        add [[TWUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
+; LARGE:        stw [[TWUninitValR:[0-9]+]], 0([[TWUninitAddrR]])
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TWUninit)
+  store i32 %i, ptr %0, align 4
+  ret void
+}
+
+; SMALL:          .extern .__tls_get_mod[PR]
+; LARGE:          .extern .__tls_get_mod[PR]
+
+; SMALL:        [[TGInitL]]:
+; SMALL-NEXT:   .tc TGInit[TC],TGInit[TL]@ld
+; SMALL:        [[ModuleHandleL]]:
+; SMALL-NEXT:   .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; SMALL-NEXT:   .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; SMALL:        [[TGUninitL]]:
+; SMALL-NEXT:   .tc TGUninit[TC],TGUninit[TL]@ld
+; SMALL:        [[TIInitL]]:
+; SMALL-NEXT:   .tc TIInit[TC],TIInit[TL]@ld
+; SMALL:        [[TIUninitL]]:
+; SMALL-NEXT:   .tc TIUninit[TC],TIUninit[UL]@ld
+; SMALL:        [[TWInitL]]:
+; SMALL-NEXT:   .tc TWInit[TC],TWInit[TL]@ld
+; SMALL:        [[TWUninitL]]:
+; SMALL-NEXT:   .tc TWUninit[TC],TWUninit[TL]@ld
+
+; LARGE:        [[TGInitL]]:
+; LARGE-NEXT:   .tc TGInit[TE],TGInit[TL]@ld
+; LARGE:        [[ModuleHandleL]]:
+; LARGE-NEXT:   .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; LARGE-NEXT:   .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; LARGE:        [[TGUninitL]]:
+; LARGE-NEXT:   .tc TGUninit[TE],TGUninit[TL]@ld
+; LARGE:        [[TIInitL]]:
+; LARGE-NEXT:   .tc TIInit[TE],TIInit[TL]@ld
+; LARGE:        [[TIUninitL]]:
+; LARGE-NEXT:   .tc TIUninit[TE],TIUninit[UL]@ld
+; LARGE:        [[TWInitL]]:
+; LARGE-NEXT:   .tc TWInit[TE],TWInit[TL]@ld
+; LARGE:        [[TWUninitL]]:
+; LARGE-NEXT:   .tc TWUninit[TE],TWUninit[TL]@ld
+
+declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull)
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll
index 059924f392f6b7..b1749511b910cf 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll
@@ -5,6 +5,7 @@
 ; RUN: llvm-objdump -D -r --symbol-description %t.o | FileCheck -D#NFA=2 --check-prefix=DIS %s
 
 @GInit = global double 1.000000e+00, align 8
+; @TIInit is local-dynamic indeed
 @TIInit = internal thread_local global i64 1, align 8
 @TWInit = weak thread_local global double 1.000000e+00, align 8
 
@@ -32,7 +33,7 @@ entry:
 ; RELOC-NEXT:   Section (index: 1) .text {
 ; RELOC-NEXT:   Relocation {
 ; RELOC-NEXT:     Virtual Address: 0x16
-; RELOC-NEXT:     Symbol: .TIInit ([[#NFA+17]])
+; RELOC-NEXT:     Symbol: TIInit ([[#NFA+19]])
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
@@ -44,19 +45,19 @@ entry:
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
-; RELOC-NEXT:     Type: R_TOCU (0x30)
+; RELOC-NEXT:     Type: R_TOCL (0x31)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
 ; RELOC-NEXT:     Virtual Address: 0x1E
-; RELOC-NEXT:     Symbol: .TIInit ([[#NFA+17]])
+; RELOC-NEXT:     Symbol: _$TLSML ([[#NFA+21]])
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
-; RELOC-NEXT:     Type: R_TOCL (0x31)
+; RELOC-NEXT:     Type: R_TOCU (0x30)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
 ; RELOC-NEXT:     Virtual Address: 0x22
-; RELOC-NEXT:     Symbol: TIInit ([[#NFA+19]])
+; RELOC-NEXT:     Symbol: _$TLSML ([[#NFA+21]])
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
@@ -64,63 +65,63 @@ entry:
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
 ; RELOC-NEXT:     Virtual Address: 0x24
-; RELOC-NEXT:     Symbol: .__tls_get_addr ([[#NFA+1]])
+; RELOC-NEXT:     Symbol: .__tls_get_mod ([[#NFA+1]])
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 26
 ; RELOC-NEXT:     Type: R_RBA (0x18)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x4E
-; RELOC-NEXT:     Symbol: .TWInit ([[#NFA+21]])
+; RELOC-NEXT:     Virtual Address: 0x5E
+; RELOC-NEXT:     Symbol: .TWInit ([[#NFA+23]])
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
 ; RELOC-NEXT:     Type: R_TOCU (0x30)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x52
-; RELOC-NEXT:     Symbol: TWInit ([[#NFA+23]])
+; RELOC-NEXT:     Virtual Address: 0x62
+; RELOC-NEXT:     Symbol: TWInit ([[#NFA+25]])
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
 ; RELOC-NEXT:     Type: R_TOCU (0x30)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x56
-; RELOC-NEXT:     Symbol: .TWInit ([[#NFA+21]])
+; RELOC-NEXT:     Virtual Address: 0x66
+; RELOC-NEXT:     Symbol: .TWInit ([[#NFA+23]])
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
 ; RELOC-NEXT:     Type: R_TOCL (0x31)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x5A
-; RELOC-NEXT:     Symbol: TWInit ([[#NFA+23]])
+; RELOC-NEXT:     Virtual Address: 0x6A
+; RELOC-NEXT:     Symbol: TWInit ([[#NFA+25]])
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
 ; RELOC-NEXT:     Type: R_TOCL (0x31)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x5C
-; RELOC-NEXT:     Symbol: .__tls_get_addr ([[#NFA+1]])
+; RELOC-NEXT:     Virtual Address: 0x6C
+; RELOC-NEXT:     Symbol: .__tls_get_addr ([[#NFA+3]])
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 26
 ; RELOC-NEXT:     Type: R_RBA (0x18)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x66
-; RELOC-NEXT:     Symbol: GInit ([[#NFA+25]])
+; RELOC-NEXT:     Virtual Address: 0x76
+; RELOC-NEXT:     Symbol: GInit ([[#NFA+27]])
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
 ; RELOC-NEXT:     Type: R_TOCU (0x30)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x6A
-; RELOC-NEXT:     Symbol: GInit ([[#NFA+25]])
+; RELOC-NEXT:     Virtual Address: 0x7A
+; RELOC-NEXT:     Symbol: GInit ([[#NFA+27]])
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
@@ -129,72 +130,72 @@ entry:
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Section (index: 2) .data {
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x90
-; RELOC-NEXT:   Symbol: .storesTIInit ([[#NFA+5]])
+; RELOC-NEXT:   Virtual Address: 0xA0
+; RELOC-NEXT:   Symbol: .storesTIInit ([[#NFA+7]])
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x94
-; RELOC-NEXT:   Symbol: TOC ([[#NFA+15]])
+; RELOC-NEXT:   Virtual Address: 0xA4
+; RELOC-NEXT:   Symbol: TOC ([[#NFA+17]])
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x9C
-; RELOC-NEXT:   Symbol: .loadsTWInit ([[#NFA+7]])
+; RELOC-NEXT:   Virtual Address: 0xAC
+; RELOC-NEXT:   Symbol: .loadsTWInit ([[#NFA+9]])
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xA0
-; RELOC-NEXT:   Symbol: TOC ([[#NFA+15]])
+; RELOC-NEXT:   Virtual Address: 0xB0
+; RELOC-NEXT:   Symbol: TOC ([[#NFA+17]])
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xA8
-; RELOC-NEXT:   Symbol: TIInit ([[#NFA+27]])
+; RELOC-NEXT:   Virtual Address: 0xB8
+; RELOC-NEXT:   Symbol: TIInit ([[#NFA+29]])
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
-; RELOC-NEXT:   Type: R_TLSM (0x24)
+; RELOC-NEXT:   Type: R_TLS_LD (0x22)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xAC
-; RELOC-NEXT:   Symbol: TIInit ([[#NFA+27]])
+; RELOC-NEXT:   Virtual Address: 0xBC
+; RELOC-NEXT:   Symbol: _$TLSML ([[#NFA+21]])
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
-; RELOC-NEXT:   Type: R_TLS (0x20)
+; RELOC-NEXT:   Type: R_TLSML (0x25)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xB0
-; RELOC-NEXT:   Symbol: TWInit ([[#NFA+29]])
+; RELOC-NEXT:   Virtual Address: 0xC0
+; RELOC-NEXT:   Symbol: TWInit ([[#NFA+31]])
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
 ; RELOC-NEXT:   Type: R_TLSM (0x24)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xB4
-; RELOC-NEXT:   Symbol: TWInit ([[#NFA+29]])
+; RELOC-NEXT:   Virtual Address: 0xC4
+; RELOC-NEXT:   Symbol: TWInit ([[#NFA+31]])
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
 ; RELOC-NEXT:   Type: R_TLS (0x20)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xB8
-; RELOC-NEXT:   Symbol: GInit ([[#NFA+9]])
+; RELOC-NEXT:   Virtual Address: 0xC8
+; RELOC-NEXT:   Symbol: GInit ([[#NFA+11]])
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
@@ -230,7 +231,7 @@ entry:
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: [[#NFA+1]]
-; SYM-NEXT:     Name: .__tls_get_addr
+; SYM-NEXT:     Name: .__tls_get_mod
 ; SYM-NEXT:     Value (RelocatableAddress): 0x0
 ; SYM-NEXT:     Section: N_UNDEF
 ; SYM-NEXT:     Type: 0x0
@@ -250,6 +251,26 @@ entry:
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: [[#NFA+3]]
+; SYM-NEXT:     Name: .__tls_get_addr
+; SYM-NEXT:     Value (RelocatableAddress): 0x0
+; SYM-NEXT:     Section: N_UNDEF
+; SYM-NEXT:     Type: 0x0
+; SYM-NEXT:     StorageClass: C_EXT (0x2)
+; SYM-NEXT:     NumberOfAuxEntries: 1
+; SYM-NEXT:     CSECT Auxiliary Entry {
+; SYM-NEXT:       Index: [[#NFA+4]]
+; SYM-NEXT:       SectionLen: 0
+; SYM-NEXT:       ParameterHashIndex: 0x0
+; SYM-NEXT:       TypeChkSectNum: 0x0
+; SYM-NEXT:       SymbolAlignmentLog2: 0
+; SYM-NEXT:       SymbolType: XTY_ER (0x0)
+; SYM-NEXT:       StorageMappingClass: XMC_PR (0x0)
+; SYM-NEXT:       StabInfoIndex: 0x0
+; SYM-NEXT:       StabSectNum: 0x0
+; SYM-NEXT:     }
+; SYM-NEXT:   }
+; SYM-NEXT:   Symbol {
+; SYM-NEXT:     Index: [[#NFA+5]]
 ; SYM-NEXT:     Name:
 ; SYM-NEXT:     Value (RelocatableAddress): 0x0
 ; SYM-NEXT:     Section: .text
@@ -257,8 +278,8 @@ entry:
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: [[#NFA+4]]
-; SYM-NEXT:       SectionLen: 132
+; SYM-NEXT:       Index: [[#NFA+6]]
+; SYM-NEXT:       SectionLen: 148
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 5
@@ -269,7 +290,7 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: [[#NFA+5]]
+; SYM-NEXT:     Index: [[#NFA+7]]
 ; SYM-NEXT:     Name: .storesTIInit
 ; SYM-NEXT:     Value (RelocatableAddress): 0x0
 ; SYM-NEXT:     Section: .text
@@ -277,8 +298,8 @@ entry:
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: [[#NFA+6]]
-; SYM-NEXT:       ContainingCsectSymbolIndex: [[#NFA+3]]
+; SYM-NEXT:       Index: [[#NFA+8]]
+; SYM-NEXT:       ContainingCsectSymbolIndex: [[#NFA+5]]
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 0
@@ -289,16 +310,16 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: [[#NFA+7]]
+; SYM-NEXT:     Index: [[#NFA+9]]
 ; SYM-NEXT:     Name: .loadsTWInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x40
+; SYM-NEXT:     Value (RelocatableAddress): 0x50
 ; SYM-NEXT:     Section: .text
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: [[#NFA+8]]
-; SYM-NEXT:       ContainingCsectSymbolIndex:  [[#NFA+3]]
+; SYM-NEXT:       Index: [[#NFA+10]]
+; SYM-NEXT:       ContainingCsectSymbolIndex:  [[#NFA+5]]
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 0
@@ -309,15 +330,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: [[#NFA+9]]
+; SYM-NEXT:     Index: [[#NFA+11]]
 ; SYM-NEXT:     Name: GInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x88
+; SYM-NEXT:     Value (RelocatableAddress): 0x98
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: [[#NFA+10]]
+; SYM-NEXT:       Index: [[#NFA+12]]
 ; SYM-NEXT:       SectionLen: 8
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -329,15 +350,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: [[#NFA+11]]
+; SYM-NEXT:     Index: [[#NFA+13]]
 ; SYM-NEXT:     Name: storesTIInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x90
+; SYM-NEXT:     Value (RelocatableAddress): 0xA0
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: [[#NFA+12]]
+; SYM-NEXT:       Index: [[#NFA+14]]
 ; SYM-NEXT:       SectionLen: 12
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -349,15 +370,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: [[#NFA+13]]
+; SYM-NEXT:     Index: [[#NFA+15]]
 ; SYM-NEXT:     Name: loadsTWInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x9C
+; SYM-NEXT:     Value (RelocatableAddress): 0xAC
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: [[#NFA+14]]
+; SYM-NEXT:       Index: [[#NFA+16]]
 ; SYM-NEXT:       SectionLen: 12
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -369,15 +390,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: [[#NFA+15]]
+; SYM-NEXT:     Index: [[#NFA+17]]
 ; SYM-NEXT:     Name: TOC
-; SYM-NEXT:     Value (RelocatableAddress): 0xA8
+; SYM-NEXT:     Value (RelocatableAddress): 0xB8
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: [[#NFA+16]]
+; SYM-NEXT:       Index: [[#NFA+18]]
 ; SYM-NEXT:       SectionLen: 0
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -389,15 +410,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: [[#NFA+17]]
-; SYM-NEXT:     Name: .TIInit
-; SYM-NEXT:     Value (RelocatableAddress): 0xA8
+; SYM-NEXT:     Index: [[#NFA+19]]
+; SYM-NEXT:     Name: TIInit
+; SYM-NEXT:     Value (RelocatableAddress): 0xB8
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: [[#NFA+18]]
+; SYM-NEXT:       Index: [[#NFA+20]]
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -409,35 +430,35 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: [[#NFA+19]]
-; SYM-NEXT:     Name: TIInit
-; SYM-NEXT:     Value (RelocatableAddress): 0xAC
+; SYM-NEXT:     Index: [[#NFA+21]]
+; SYM-NEXT:     Name: _$TLSML
+; SYM-NEXT:     Value (RelocatableAddress): 0xBC
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: [[#NFA+20]]
+; SYM-NEXT:       Index: [[#NFA+22]]
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 2
 ; SYM-NEXT:       SymbolType: XTY_SD (0x1)
-; SYM-NEXT:       StorageMappingClass: XMC_TE (0x16)
+; SYM-NEXT:       StorageMappingClass: XMC_TC (0x3)
 ; SYM-NEXT:       StabInfoIndex: 0x0
 ; SYM-NEXT:       StabSectNum: 0x0
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: [[#NFA+21]]
+; SYM-NEXT:     Index: [[#NFA+23]]
 ; SYM-NEXT:     Name: .TWInit
-; SYM-NEXT:     Value (RelocatableAddress): 0xB0
+; SYM-NEXT:     Value (RelocatableAddress): 0xC0
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: [[#NFA+22]]
+; SYM-NEXT:       Index: [[#NFA+24]]
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -449,15 +470,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: [[#NFA+23]]
+; SYM-NEXT:     Index: [[#NFA+25]]
 ; SYM-NEXT:     Name: TWInit
-; SYM-NEXT:     Value (RelocatableAddress): 0xB4
+; SYM-NEXT:     Value (RelocatableAddress): 0xC4
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: [[#NFA+24]]
+; SYM-NEXT:       Index: [[#NFA+26]]
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -469,15 +490,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: [[#NFA+25]]
+; SYM-NEXT:     Index: [[#NFA+27]]
 ; SYM-NEXT:     Name: GInit
-; SYM-NEXT:     Value (RelocatableAddress): 0xB8
+; SYM-NEXT:     Value (RelocatableAddress): 0xC8
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: [[#NFA+26]]
+; SYM-NEXT:       Index: [[#NFA+28]]
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -489,7 +510,7 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: [[#NFA+27]]
+; SYM-NEXT:     Index: [[#NFA+29]]
 ; SYM-NEXT:     Name: TIInit
 ; SYM-NEXT:     Value (RelocatableAddress): 0x0
 ; SYM-NEXT:     Section: .tdata
@@ -497,7 +518,7 @@ entry:
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: [[#NFA+28]]
+; SYM-NEXT:       Index: [[#NFA+30]]
 ; SYM-NEXT:       SectionLen: 8
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -509,7 +530,7 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: [[#NFA+29]]
+; SYM-NEXT:     Index: [[#NFA+31]]
 ; SYM-NEXT:     Name: TWInit
 ; SYM-NEXT:     Value (RelocatableAddress): 0x8
 ; SYM-NEXT:     Section: .tdata
@@ -517,7 +538,7 @@ entry:
 ; SYM-NEXT:     StorageClass: C_WEAKEXT (0x6F)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: [[#NFA+30]]
+; SYM-NEXT:       Index: [[#NFA+32]]
 ; SYM-NEXT:       SectionLen: 8
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -536,43 +557,44 @@ entry:
 ; DIS-NEXT:                                       mflr 0
 ; DIS-NEXT:                                       stwu 1, -32(1)
 ; DIS-NEXT:                                       stw 0, 40(1)
-; DIS-NEXT:                                       mr 6, 4
 ; DIS-NEXT:                                       mr 7, 3
+; DIS-NEXT:                                       mr 6, 4
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: [[#NFA+17]]) .TIInit[TE]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 4, 2, 0
 ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: [[#NFA+19]]) TIInit[TE]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 3, 0(3)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: [[#NFA+17]]) .TIInit[TE]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 4, 4(4)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 8, 0(3)
 ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: [[#NFA+19]]) TIInit[TE]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                bla 0
-; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA  (idx: [[#NFA+1]])      .__tls_get_addr[PR]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: [[#NFA+21]]) _$TLSML[TC]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 3, 4(3)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: [[#NFA+21]]) _$TLSML[TC]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                bla 0x0
+; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA (idx: [[#NFA+1]]) .__tls_get_mod[PR]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                add 3, 3, 8
 ; DIS-NEXT:                                       stw 6, 4(3)
 ; DIS-NEXT:                                       stw 7, 0(3)
 ; DIS-NEXT:                                       addi 1, 1, 32
 ; DIS-NEXT:                                       lwz 0, 8(1)
 ; DIS-NEXT:                                       mtlr 0
 ; DIS-NEXT:                                       blr
-; DIS:      00000040 (idx: [[#INDX+2]]) .loadsTWInit:
+; DIS:      00000050 (idx: [[#INDX+2]]) .loadsTWInit:
 ; DIS-NEXT:                                       mflr 0
 ; DIS-NEXT:                                       stwu 1, -32(1)
 ; DIS-NEXT:                                       stw 0, 40(1)
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: [[#NFA+21]]) .TWInit[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: [[#NFA+23]]) .TWInit[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 4, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: [[#NFA+23]]) TWInit[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: [[#NFA+25]]) TWInit[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 3, 8(3)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: [[#NFA+21]]) .TWInit[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: [[#NFA+23]]) .TWInit[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 4, 12(4)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: [[#NFA+23]]) TWInit[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: [[#NFA+25]]) TWInit[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                bla 0
-; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA  (idx: [[#NFA+1]])      .__tls_get_addr[PR]
+; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA  (idx: [[#NFA+3]])      .__tls_get_addr[PR]
 ; DIS-NEXT:                                       lfd 0, 0(3)
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: [[#NFA+25]]) GInit[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: [[#NFA+27]]) GInit[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 3, 16(3)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: [[#NFA+25]]) GInit[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: [[#NFA+27]]) GInit[TE]
 ; DIS-NEXT:                                       lfd 1, 0(3)
 ; DIS-NEXT:                                       fadd 1, 0, 1
 ; DIS-NEXT:                                       addi 1, 1, 32
@@ -581,42 +603,42 @@ entry:
 ; DIS-NEXT:                                       blr
 
 ; DIS:      Disassembly of section .data:
-; DIS:      00000088  (idx: [[#NFA+9]]) GInit[RW]:
-; DIS-NEXT:       88: 3f f0 00 00
-; DIS-NEXT:       8c: 00 00 00 00
-; DIS:      00000090  (idx: [[#NFA+11]]) storesTIInit[DS]:
-; DIS-NEXT:       90: 00 00 00 00
-; DIS-NEXT: 00000090: R_POS (idx: [[#NFA+5]]) .storesTIInit
-; DIS-NEXT:       94: 00 00 00 a8
-; DIS-NEXT: 00000094: R_POS (idx: [[#NFA+15]]) TOC[TC0]
-; DIS-NEXT:       98: 00 00 00 00
-; DIS:      0000009c  (idx: [[#NFA+13]]) loadsTWInit[DS]:
-; DIS-NEXT:       9c: 00 00 00 40
-; DIS-NEXT: 0000009c: R_POS (idx: [[#NFA+7]]) .loadsTWInit
-; DIS-NEXT:       a0: 00 00 00 a8
-; DIS-NEXT: 000000a0: R_POS (idx: [[#NFA+15]]) TOC[TC0]
-; DIS-NEXT:       a4: 00 00 00 00
-; DIS:      000000a8  (idx: [[#NFA+17]]) .TIInit[TE]:
+; DIS:      00000098  (idx: [[#NFA+11]]) GInit[RW]:
+; DIS-NEXT:       98: 3f f0 00 00
+; DIS-NEXT:       9c: 00 00 00 00
+; DIS:      000000a0  (idx: [[#NFA+13]]) storesTIInit[DS]:
+; DIS-NEXT:       a0: 00 00 00 00
+; DIS-NEXT: 000000a0: R_POS (idx: [[#NFA+7]]) .storesTIInit
+; DIS-NEXT:       a4: 00 00 00 b8
+; DIS-NEXT: 000000a4: R_POS (idx: [[#NFA+17]]) TOC[TC0]
 ; DIS-NEXT:       a8: 00 00 00 00
-; DIS-NEXT: 000000a8: R_TLSM (idx: [[#NFA+27]]) TIInit[TL]
-; DIS:      000000ac  (idx: [[#NFA+19]]) TIInit[TE]:
-; DIS-NEXT:       ac: 00 00 00 00
-; DIS-NEXT: 000000ac: R_TLS (idx: [[#NFA+27]]) TIInit[TL]
-; DIS:      000000b0  (idx: [[#NFA+21]]) .TWInit[TE]:
-; DIS-NEXT:       b0: 00 00 00 00
-; DIS-NEXT: 000000b0: R_TLSM (idx: [[#NFA+29]]) TWInit[TL]
-; DIS:      000000b4  (idx: [[#NFA+23]]) TWInit[TE]:
-; DIS-NEXT:       b4: 00 00 00 08
-; DIS-NEXT: 000000b4: R_TLS (idx: [[#NFA+29]]) TWInit[TL]
-; DIS:      000000b8  (idx: [[#NFA+25]]) GInit[TE]:
-; DIS-NEXT:       b8: 00 00 00 88
-; DIS-NEXT: 000000b8: R_POS (idx: [[#NFA+9]]) GInit[RW]
+; DIS:      000000ac  (idx: [[#NFA+15]]) loadsTWInit[DS]:
+; DIS-NEXT:       ac: 00 00 00 50
+; DIS-NEXT: 000000ac: R_POS (idx: [[#NFA+9]]) .loadsTWInit
+; DIS-NEXT:       b0: 00 00 00 b8
+; DIS-NEXT: 000000b0: R_POS (idx: [[#NFA+17]]) TOC[TC0]
+; DIS-NEXT:       b4: 00 00 00 00
+; DIS:      000000b8  (idx: [[#NFA+19]]) TIInit[TE]:
+; DIS-NEXT:       b8: 00 00 00 00
+; DIS-NEXT: 000000b8: R_TLS_LD (idx: [[#NFA+29]]) TIInit[TL]
+; DIS:      000000bc (idx: [[#NFA+21]]) _$TLSML[TC]:
+; DIS-NEXT:       bc: 00 00 00 00
+; DIS-NEXT: 000000bc:  R_TLSML (idx: [[#NFA+21]]) _$TLSML[TC]
+; DIS:      000000c0  (idx: [[#NFA+23]]) .TWInit[TE]:
+; DIS-NEXT:       c0: 00 00 00 00
+; DIS-NEXT: 000000c0: R_TLSM (idx: [[#NFA+31]]) TWInit[TL]
+; DIS:      000000c4  (idx: [[#NFA+25]]) TWInit[TE]:
+; DIS-NEXT:       c4: 00 00 00 08
+; DIS-NEXT: 000000c4: R_TLS (idx: [[#NFA+31]]) TWInit[TL]
+; DIS:      000000c8  (idx: [[#NFA+27]]) GInit[TE]:
+; DIS-NEXT:       c8: 00 00 00 98
+; DIS-NEXT: 000000c8: R_POS (idx: [[#NFA+11]]) GInit[RW]
 
 ; DIS:      Disassembly of section .tdata:
-; DIS:      00000000  (idx: [[#NFA+27]]) TIInit[TL]:
+; DIS:      00000000  (idx: [[#NFA+29]]) TIInit[TL]:
 ; DIS-NEXT:        0: 00 00 00 00
 ; DIS-NEXT:        4: 00 00 00 01
-; DIS:      00000008  (idx: [[#NFA+29]]) TWInit[TL]:
+; DIS:      00000008  (idx: [[#NFA+31]]) TWInit[TL]:
 ; DIS-NEXT:        8: 3f f0 00 00
 ; DIS-NEXT:        c: 00 00 00 00
 
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
index eb7a0e277a5652..10ccb520a94aba 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
@@ -7,6 +7,7 @@
 @const_ivar = constant i32 6, align 4
 @GInit = global i32 1, align 4
 @TGInit = thread_local global i32 1, align 4
+; @TIUninit is local-dynamic indeed
 @TIUninit = internal thread_local global i32 0, align 4
 
 ; Function Attrs: nofree norecurse nounwind willreturn writeonly
@@ -32,16 +33,16 @@ entry:
 ; RELOC-NEXT: Relocations [
 ; RELOC-NEXT:   Section (index: 1) .text {
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0xE
-; RELOC-NEXT:     Symbol: .TIUninit ([[#NFA+23]])
+; RELOC-NEXT:     Virtual Address: 0x12
+; RELOC-NEXT:     Symbol: TIUninit ([[#NFA+25]])
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
 ; RELOC-NEXT:     Type: R_TOC (0x3)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x12
-; RELOC-NEXT:     Symbol: TIUninit ([[#NFA+25]])
+; RELOC-NEXT:     Virtual Address: 0x16
+; RELOC-NEXT:     Symbol: _$TLSML ([[#NFA+27]])
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
@@ -49,39 +50,39 @@ entry:
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
 ; RELOC-NEXT:     Virtual Address: 0x18
-; RELOC-NEXT:     Symbol: .__tls_get_addr ([[#NFA+1]])
+; RELOC-NEXT:     Symbol: .__tls_get_mod ([[#NFA+1]])
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 26
 ; RELOC-NEXT:     Type: R_RBA (0x18)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x3A
-; RELOC-NEXT:     Symbol: .TGInit ([[#NFA+27]])
+; RELOC-NEXT:     Virtual Address: 0x4A
+; RELOC-NEXT:     Symbol: .TGInit ([[#NFA+29]])
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
 ; RELOC-NEXT:     Type: R_TOC (0x3)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x3E
-; RELOC-NEXT:     Symbol: TGInit ([[#NFA+29]])
+; RELOC-NEXT:     Virtual Address: 0x4E
+; RELOC-NEXT:     Symbol: TGInit ([[#NFA+31]])
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
 ; RELOC-NEXT:     Type: R_TOC (0x3)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x44
-; RELOC-NEXT:     Symbol: .__tls_get_addr ([[#NFA+1]])
+; RELOC-NEXT:     Virtual Address: 0x54
+; RELOC-NEXT:     Symbol: .__tls_get_addr ([[#NFA+3]])
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 26
 ; RELOC-NEXT:     Type: R_RBA (0x18)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x4A
-; RELOC-NEXT:     Symbol: GInit ([[#NFA+31]])
+; RELOC-NEXT:     Virtual Address: 0x5A
+; RELOC-NEXT:     Symbol: GInit ([[#NFA+33]])
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
@@ -90,72 +91,72 @@ entry:
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Section (index: 2) .data {
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x70
-; RELOC-NEXT:   Symbol: .storesTIUninit ([[#NFA+5]])
+; RELOC-NEXT:   Virtual Address: 0x80
+; RELOC-NEXT:   Symbol: .storesTIUninit ([[#NFA+7]])
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x74
-; RELOC-NEXT:   Symbol: TOC ([[#NFA+21]])
+; RELOC-NEXT:   Virtual Address: 0x84
+; RELOC-NEXT:   Symbol: TOC ([[#NFA+23]])
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x7C
-; RELOC-NEXT:   Symbol: .loadsTGInit ([[#NFA+7]])
+; RELOC-NEXT:   Virtual Address: 0x8C
+; RELOC-NEXT:   Symbol: .loadsTGInit ([[#NFA+9]])
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x80
-; RELOC-NEXT:   Symbol: TOC ([[#NFA+21]])
+; RELOC-NEXT:   Virtual Address: 0x90
+; RELOC-NEXT:   Symbol: TOC ([[#NFA+23]])
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x88
-; RELOC-NEXT:   Symbol: TIUninit ([[#NFA+37]])
+; RELOC-NEXT:   Virtual Address: 0x98
+; RELOC-NEXT:   Symbol: TIUninit ([[#NFA+39]])
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
-; RELOC-NEXT:   Type: R_TLSM (0x24)
+; RELOC-NEXT:   Type: R_TLS_LD (0x22)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x8C
-; RELOC-NEXT:   Symbol: TIUninit ([[#NFA+37]])
+; RELOC-NEXT:   Virtual Address: 0x9C
+; RELOC-NEXT:   Symbol: _$TLSML ([[#NFA+27]])
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
-; RELOC-NEXT:   Type: R_TLS (0x20)
+; RELOC-NEXT:   Type: R_TLSML (0x25)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x90
-; RELOC-NEXT:   Symbol: TGInit ([[#NFA+35]])
+; RELOC-NEXT:   Virtual Address: 0xA0
+; RELOC-NEXT:   Symbol: TGInit ([[#NFA+37]])
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
 ; RELOC-NEXT:   Type: R_TLSM (0x24)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x94
-; RELOC-NEXT:   Symbol: TGInit ([[#NFA+35]])
+; RELOC-NEXT:   Virtual Address: 0xA4
+; RELOC-NEXT:   Symbol: TGInit ([[#NFA+37]])
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
 ; RELOC-NEXT:   Type: R_TLS (0x20)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x98
-; RELOC-NEXT:   Symbol: GInit ([[#NFA+15]])
+; RELOC-NEXT:   Virtual Address: 0xA8
+; RELOC-NEXT:   Symbol: GInit ([[#NFA+17]])
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
@@ -178,9 +179,9 @@ entry:
 ; SYM-NEXT:     CPU Version ID: TCPU_COM (0x3)
 ; SYM-NEXT:     StorageClass: C_FILE (0x67)
 ; SYM-NEXT:     NumberOfAuxEntries: 2
-; SYM:         Symbol {
+; SYM:        Symbol {
 ; SYM-NEXT:     Index: [[#NFA+1]]
-; SYM-NEXT:     Name: .__tls_get_addr
+; SYM-NEXT:     Name: .__tls_get_mod
 ; SYM-NEXT:     Value (RelocatableAddress): 0x0
 ; SYM-NEXT:     Section: N_UNDEF
 ; SYM-NEXT:     Type: 0x0
@@ -200,6 +201,26 @@ entry:
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: [[#NFA+3]]
+; SYM-NEXT:     Name: .__tls_get_addr
+; SYM-NEXT:     Value (RelocatableAddress): 0x0
+; SYM-NEXT:     Section: N_UNDEF
+; SYM-NEXT:     Type: 0x0
+; SYM-NEXT:     StorageClass: C_EXT (0x2)
+; SYM-NEXT:     NumberOfAuxEntries: 1
+; SYM-NEXT:     CSECT Auxiliary Entry {
+; SYM-NEXT:       Index: 6
+; SYM-NEXT:       SectionLen: 0
+; SYM-NEXT:       ParameterHashIndex: 0x0
+; SYM-NEXT:       TypeChkSectNum: 0x0
+; SYM-NEXT:       SymbolAlignmentLog2: 0
+; SYM-NEXT:       SymbolType: XTY_ER (0x0)
+; SYM-NEXT:       StorageMappingClass: XMC_PR (0x0)
+; SYM-NEXT:       StabInfoIndex: 0x0
+; SYM-NEXT:       StabSectNum: 0x0
+; SYM-NEXT:     }
+; SYM-NEXT:   }
+; SYM-NEXT:   Symbol {
+; SYM-NEXT:     Index: [[#NFA+5]]
 ; SYM-NEXT:     Name:
 ; SYM-NEXT:     Value (RelocatableAddress): 0x0
 ; SYM-NEXT:     Section: .text
@@ -207,8 +228,8 @@ entry:
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: [[#NFA+4]]
-; SYM-NEXT:       SectionLen: 104
+; SYM-NEXT:       Index: [[#NFA+6]]
+; SYM-NEXT:       SectionLen: 120
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 5
@@ -219,7 +240,7 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: [[#NFA+5]]
+; SYM-NEXT:     Index: [[#NFA+7]]
 ; SYM-NEXT:     Name: .storesTIUninit
 ; SYM-NEXT:     Value (RelocatableAddress): 0x0
 ; SYM-NEXT:     Section: .text
@@ -227,8 +248,8 @@ entry:
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: [[#NFA+6]]
-; SYM-NEXT:       ContainingCsectSymbolIndex: [[#NFA+3]]
+; SYM-NEXT:       Index: [[#NFA+8]]
+; SYM-NEXT:       ContainingCsectSymbolIndex: [[#NFA+5]]
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 0
@@ -239,16 +260,16 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: [[#NFA+7]]
+; SYM-NEXT:     Index: [[#NFA+9]]
 ; SYM-NEXT:     Name: .loadsTGInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x30
+; SYM-NEXT:     Value (RelocatableAddress): 0x40
 ; SYM-NEXT:     Section: .text
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: [[#NFA+8]]
-; SYM-NEXT:       ContainingCsectSymbolIndex: [[#NFA+3]]
+; SYM-NEXT:       Index: [[#NFA+10]]
+; SYM-NEXT:       ContainingCsectSymbolIndex: [[#NFA+5]]
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 0
@@ -259,15 +280,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: [[#NFA+9]]
+; SYM-NEXT:     Index: [[#NFA+11]]
 ; SYM-NEXT:     Name: .rodata
-; SYM-NEXT:     Value (RelocatableAddress): 0x68
+; SYM-NEXT:     Value (RelocatableAddress): 0x78
 ; SYM-NEXT:     Section: .text
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: [[#NFA+10]]
+; SYM-NEXT:       Index: [[#NFA+12]]
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -279,16 +300,16 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: [[#NFA+11]]
+; SYM-NEXT:     Index: [[#NFA+13]]
 ; SYM-NEXT:     Name: const_ivar
-; SYM-NEXT:     Value (RelocatableAddress): 0x68
+; SYM-NEXT:     Value (RelocatableAddress): 0x78
 ; SYM-NEXT:     Section: .text
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: [[#NFA+12]]
-; SYM-NEXT:       ContainingCsectSymbolIndex: [[#NFA+9]]
+; SYM-NEXT:       Index: [[#NFA+14]]
+; SYM-NEXT:       ContainingCsectSymbolIndex: [[#NFA+11]]
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 0
@@ -299,15 +320,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: [[#NFA+13]]
+; SYM-NEXT:     Index: [[#NFA+15]]
 ; SYM-NEXT:     Name: .data
-; SYM-NEXT:     Value (RelocatableAddress): 0x6C
+; SYM-NEXT:     Value (RelocatableAddress): 0x7C
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: [[#NFA+14]]
+; SYM-NEXT:       Index: [[#NFA+16]]
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -319,16 +340,16 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: [[#NFA+15]]
+; SYM-NEXT:     Index: [[#NFA+17]]
 ; SYM-NEXT:     Name: GInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x6C
+; SYM-NEXT:     Value (RelocatableAddress): 0x7C
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: [[#NFA+16]]
-; SYM-NEXT:       ContainingCsectSymbolIndex: [[#NFA+13]]
+; SYM-NEXT:       Index: [[#NFA+18]]
+; SYM-NEXT:       ContainingCsectSymbolIndex: [[#NFA+15]]
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 0
@@ -339,15 +360,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: [[#NFA+17]]
+; SYM-NEXT:     Index: [[#NFA+19]]
 ; SYM-NEXT:     Name: storesTIUninit
-; SYM-NEXT:     Value (RelocatableAddress): 0x70
+; SYM-NEXT:     Value (RelocatableAddress): 0x80
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: [[#NFA+18]]
+; SYM-NEXT:       Index: [[#NFA+20]]
 ; SYM-NEXT:       SectionLen: 12
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -359,15 +380,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: [[#NFA+19]]
+; SYM-NEXT:     Index: [[#NFA+21]]
 ; SYM-NEXT:     Name: loadsTGInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x7C
+; SYM-NEXT:     Value (RelocatableAddress): 0x8C
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: [[#NFA+20]]
+; SYM-NEXT:       Index: [[#NFA+22]]
 ; SYM-NEXT:       SectionLen: 12
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -379,15 +400,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: [[#NFA+21]]
+; SYM-NEXT:     Index: [[#NFA+23]]
 ; SYM-NEXT:     Name: TOC
-; SYM-NEXT:     Value (RelocatableAddress): 0x88
+; SYM-NEXT:     Value (RelocatableAddress): 0x98
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: [[#NFA+22]]
+; SYM-NEXT:       Index: [[#NFA+24]]
 ; SYM-NEXT:       SectionLen: 0
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -399,15 +420,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: [[#NFA+23]]
-; SYM-NEXT:     Name: .TIUninit
-; SYM-NEXT:     Value (RelocatableAddress): 0x88
+; SYM-NEXT:     Index: [[#NFA+25]]
+; SYM-NEXT:     Name: TIUninit
+; SYM-NEXT:     Value (RelocatableAddress): 0x98
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: [[#NFA+24]]
+; SYM-NEXT:       Index: [[#NFA+26]]
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -419,15 +440,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: [[#NFA+25]]
-; SYM-NEXT:     Name: TIUninit
-; SYM-NEXT:     Value (RelocatableAddress): 0x8C
+; SYM-NEXT:     Index: [[#NFA+27]]
+; SYM-NEXT:     Name: _$TLSML
+; SYM-NEXT:     Value (RelocatableAddress): 0x9C
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: [[#NFA+26]]
+; SYM-NEXT:       Index: [[#NFA+28]]
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -439,15 +460,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: [[#NFA+27]]
+; SYM-NEXT:     Index: [[#NFA+29]]
 ; SYM-NEXT:     Name: .TGInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x90
+; SYM-NEXT:     Value (RelocatableAddress): 0xA0
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: [[#NFA+28]]
+; SYM-NEXT:       Index: [[#NFA+30]]
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -459,15 +480,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: [[#NFA+29]]
+; SYM-NEXT:     Index: [[#NFA+31]]
 ; SYM-NEXT:     Name: TGInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x94
+; SYM-NEXT:     Value (RelocatableAddress): 0xA4
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: [[#NFA+30]]
+; SYM-NEXT:       Index: [[#NFA+32]]
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -479,15 +500,15 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: [[#NFA+31]]
+; SYM-NEXT:     Index: [[#NFA+33]]
 ; SYM-NEXT:     Name: GInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x98
+; SYM-NEXT:     Value (RelocatableAddress): 0xA8
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: [[#NFA+32]]
+; SYM-NEXT:       Index: [[#NFA+34]]
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -499,7 +520,7 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: [[#NFA+33]]
+; SYM-NEXT:     Index: [[#NFA+35]]
 ; SYM-NEXT:     Name: .tdata
 ; SYM-NEXT:     Value (RelocatableAddress): 0x0
 ; SYM-NEXT:     Section: .tdata
@@ -507,7 +528,7 @@ entry:
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: [[#NFA+34]]
+; SYM-NEXT:       Index: [[#NFA+36]]
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -519,7 +540,7 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: [[#NFA+35]]
+; SYM-NEXT:     Index: [[#NFA+37]]
 ; SYM-NEXT:     Name: TGInit
 ; SYM-NEXT:     Value (RelocatableAddress): 0x0
 ; SYM-NEXT:     Section: .tdata
@@ -527,8 +548,8 @@ entry:
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: [[#NFA+36]]
-; SYM-NEXT:       ContainingCsectSymbolIndex: [[#NFA+33]]
+; SYM-NEXT:       Index: [[#NFA+38]]
+; SYM-NEXT:       ContainingCsectSymbolIndex: [[#NFA+35]]
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 0
@@ -539,7 +560,7 @@ entry:
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
-; SYM-NEXT:     Index: [[#NFA+37]]
+; SYM-NEXT:     Index: [[#NFA+39]]
 ; SYM-NEXT:     Name: TIUninit
 ; SYM-NEXT:     Value (RelocatableAddress): 0x4
 ; SYM-NEXT:     Section: .tbss
@@ -547,7 +568,7 @@ entry:
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
-; SYM-NEXT:       Index: [[#NFA+38]]
+; SYM-NEXT:       Index: [[#NFA+40]]
 ; SYM-NEXT:       SectionLen: 4
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
@@ -562,34 +583,35 @@ entry:
 
 ; DIS:      {{.*}}aix-tls-xcoff-reloc.ll.tmp.o:	file format aixcoff-rs6000
 ; DIS:      Disassembly of section .text:
-; DIS:      00000000 (idx: [[#NFA+5]]) .storesTIUninit:
+; DIS:      00000000 (idx: [[#NFA+7]]) .storesTIUninit:
 ; DIS-NEXT:                                      mflr 0
 ; DIS-NEXT:                                      stwu 1, -32(1)
+; DIS-NEXT:                                      stw 0, 40(1)
 ; DIS-NEXT:                                      mr 6, 3
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 3, 0(2)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: [[#NFA+23]]) .TIUninit[TC]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 4, 4(2)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 7, 0(2)
 ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: [[#NFA+25]]) TIUninit[TC]
-; DIS-NEXT:                                      stw 0, 40(1)
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               bla 0
-; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA (idx: [[#NFA+1]])      .__tls_get_addr[PR]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 3, 4(2)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: [[#NFA+27]]) _$TLSML[TC]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               bla 0x0
+; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA (idx: [[#NFA+1]]) .__tls_get_mod[PR]
+; DIS-NEXT:                                      add 3, 3, 7
 ; DIS-NEXT:                                      stw 6, 0(3)
 ; DIS-NEXT:                                      addi 1, 1, 32
 ; DIS-NEXT:                                      lwz 0, 8(1)
 ; DIS-NEXT:                                      mtlr 0
 ; DIS-NEXT:                                      blr
-; DIS:      00000030 (idx: [[#NFA+7]]) .loadsTGInit:
+; DIS:      00000040 (idx: [[#NFA+9]]) .loadsTGInit:
 ; DIS-NEXT:                                      mflr 0
 ; DIS-NEXT:                                      stwu 1, -32(1)
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 3, 8(2)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: [[#NFA+27]]) .TGInit[TC]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: [[#NFA+29]]) .TGInit[TC]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 4, 12(2)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: [[#NFA+29]]) TGInit[TC]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: [[#NFA+31]]) TGInit[TC]
 ; DIS-NEXT:                                      stw 0, 40(1)
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               bla 0
-; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA (idx: [[#NFA+1]])      .__tls_get_addr[PR]
+; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA (idx: [[#NFA+3]])      .__tls_get_addr[PR]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 4, 16(2)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: [[#NFA+31]]) GInit[TC]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: [[#NFA+33]]) GInit[TC]
 ; DIS-NEXT:                                      lwz 3, 0(3)
 ; DIS-NEXT:                                      lwz 4, 0(4)
 ; DIS-NEXT:                                      add 3, 4, 3
@@ -597,46 +619,46 @@ entry:
 ; DIS-NEXT:                                      lwz 0, 8(1)
 ; DIS-NEXT:                                      mtlr 0
 ; DIS-NEXT:                                      blr
-; DIS:      00000068 (idx: [[#NFA+11]]) const_ivar:
-; DIS-NEXT:       68: 00 00 00 06
+; DIS:      00000078 (idx: [[#NFA+13]]) const_ivar:
+; DIS-NEXT:       78: 00 00 00 06
 
 ; DIS:      Disassembly of section .data:
-; DIS:      0000006c  (idx: [[#NFA+15]]) GInit:
-; DIS-NEXT:       6c: 00 00 00 01
-; DIS:      00000070  (idx: [[#NFA+17]]) storesTIUninit[DS]:
-; DIS-NEXT:       70: 00 00 00 00
-; DIS-NEXT: 00000070: R_POS (idx: [[#NFA+5]]) .storesTIUninit
-; DIS-NEXT:       74: 00 00 00 88
-; DIS-NEXT: 00000074: R_POS (idx: [[#NFA+21]]) TOC[TC0]
-; DIS-NEXT:       78: 00 00 00 00
-; DIS:      0000007c  (idx: [[#NFA+19]]) loadsTGInit[DS]:
-; DIS-NEXT:       7c: 00 00 00 30
-; DIS-NEXT: 0000007c: R_POS (idx: [[#NFA+7]]) .loadsTGInit
-; DIS-NEXT:       80: 00 00 00 88
-; DIS-NEXT: 00000080: R_POS (idx: [[#NFA+21]]) TOC[TC0]
-; DIS-NEXT:       84: 00 00 00 00
-; DIS:      00000088  (idx: [[#NFA+23]]) .TIUninit[TC]:
+; DIS:      0000007c  (idx: [[#NFA+17]]) GInit:
+; DIS-NEXT:       7c: 00 00 00 01
+; DIS:      00000080  (idx: [[#NFA+19]]) storesTIUninit[DS]:
+; DIS-NEXT:       80: 00 00 00 00
+; DIS-NEXT: 00000080: R_POS (idx: [[#NFA+7]]) .storesTIUninit
+; DIS-NEXT:       84: 00 00 00 98
+; DIS-NEXT: 00000084: R_POS (idx: [[#NFA+23]]) TOC[TC0]
 ; DIS-NEXT:       88: 00 00 00 00
-; DIS-NEXT: 00000088: R_TLSM (idx: [[#NFA+37]]) TIUninit[UL]
-; DIS:      0000008c  (idx: [[#NFA+25]]) TIUninit[TC]:
-; DIS-NEXT:       8c: 00 00 00 04
-; DIS-NEXT: 0000008c: R_TLS (idx: [[#NFA+37]]) TIUninit[UL]
-; DIS:      00000090  (idx: [[#NFA+27]]) .TGInit[TC]:
-; DIS-NEXT:       90: 00 00 00 00
-; DIS-NEXT: 00000090: R_TLSM (idx: [[#NFA+35]]) TGInit
-; DIS:      00000094  (idx: [[#NFA+29]]) TGInit[TC]:
+; DIS:      0000008c  (idx: [[#NFA+21]]) loadsTGInit[DS]:
+; DIS-NEXT:       8c: 00 00 00 40
+; DIS-NEXT: 0000008c: R_POS (idx: [[#NFA+9]]) .loadsTGInit
+; DIS-NEXT:       90: 00 00 00 98
+; DIS-NEXT: 00000090: R_POS (idx: [[#NFA+23]]) TOC[TC0]
 ; DIS-NEXT:       94: 00 00 00 00
-; DIS-NEXT: 00000094: R_TLS (idx: [[#NFA+35]]) TGInit
-; DIS:      00000098  (idx: [[#NFA+31]]) GInit[TC]:
-; DIS-NEXT:       98: 00 00 00 6c
-; DIS-NEXT: 00000098: R_POS (idx: [[#NFA+15]]) GInit
+; DIS:      00000098  (idx: [[#NFA+25]]) TIUninit[TC]:
+; DIS-NEXT:       98: 00 00 00 00
+; DIS-NEXT: 00000098: R_TLS_LD (idx: [[#NFA+39]]) TIUninit[UL]
+; DIS:      0000009c (idx: [[#NFA+27]]) _$TLSML[TC]:
+; DIS-NEXT:       9c: 00 00 00 00
+; DIS-NEXT: 0000009c: R_TLSML (idx: [[#NFA+27]]) _$TLSML[TC]
+; DIS:      000000a0  (idx: [[#NFA+29]]) .TGInit[TC]:
+; DIS-NEXT:       a0: 00 00 00 00
+; DIS-NEXT: 000000a0: R_TLSM (idx: [[#NFA+37]]) TGInit
+; DIS:      000000a4  (idx: [[#NFA+31]]) TGInit[TC]:
+; DIS-NEXT:       a4: 00 00 00 00
+; DIS-NEXT: 000000a4: R_TLS (idx: [[#NFA+37]]) TGInit
+; DIS:      000000a8  (idx: [[#NFA+33]]) GInit[TC]:
+; DIS-NEXT:       a8: 00 00 00 7c
+; DIS-NEXT: 000000a8: R_POS (idx: [[#NFA+17]]) GInit
 
 ; DIS:      Disassembly of section .tdata:
-; DIS:      00000000 (idx: [[#NFA+35]]) TGInit:
+; DIS:      00000000 (idx: [[#NFA+37]]) TGInit:
 ; DIS-NEXT:        0: 00 00 00 01
 
 ; DIS:      Disassembly of section .tbss:
-; DIS:      00000004 (idx: [[#NFA+37]]) TIUninit[UL]:
+; DIS:      00000004 (idx: [[#NFA+39]]) TIUninit[UL]:
 ; DIS-NEXT: ...
 
 attributes #0 = { nofree norecurse nounwind willreturn writeonly "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-spe,-vsx" }

>From cff590752633bfa6beba33afdec67c52655c2c97 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Thu, 14 Sep 2023 03:46:38 -0400
Subject: [PATCH 02/31] address comments

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   |  4 ++--
 llvm/lib/Target/PowerPC/PPCISelLowering.h     | 12 ++++++------
 llvm/lib/Target/PowerPC/PPCInstrInfo.td       |  2 +-
 llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp | 12 +++++++-----
 4 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 2d2a4b008ade06..26d11456992e3f 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1774,8 +1774,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
   case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
   case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
-  case PPCISD::GET_TLS_MOD:
-    return "PPCISD::GET_TLS_MOD";
+  case PPCISD::GET_TLS_MOD_AIX:
+    return "PPCISD::GET_TLS_MOD_AIX";
   case PPCISD::GET_TPOINTER:    return "PPCISD::GET_TPOINTER";
   case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
   case PPCISD::TLSGD_AIX:       return "PPCISD::TLSGD_AIX";
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 73a4e1ad3a190d..573de7816e2420 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -375,16 +375,16 @@ namespace llvm {
     /// G8RC and inputs are X3/X4).
     TLSGD_AIX,
 
-    /// %x3 = GET_TLS_MOD _$TLSML - For the AIX local-dynamic TLS model,
+    /// %x3 = GET_TLS_MOD_AIX _$TLSML - For the AIX local-dynamic TLS model,
     /// produces a call to __tls_get_mod(_$TLSML\@ml).
-    GET_TLS_MOD,
+    GET_TLS_MOD_AIX,
 
     /// [GP|G8]RC = TLSLD_AIX, TOC_ENTRY(variable offset)
     /// Op that internally creates TOC entry for the "_$TLSML" symbol, generates
-    /// GET_TLS_MOD node which will be expanded into a call to __tls_get_mod,
-    /// and then add the variable offset with the result from the call.
-    /// This node is used in both 32-bit and 64-bit modes. The only difference
-    /// is register class.
+    /// GET_TLS_MOD_AIX node which will be expanded into a call to
+    /// __tls_get_mod, and then add the variable offset with the result from the
+    /// call. This node is used in both 32-bit and 64-bit modes. The only
+    /// difference is register class.
     TLSLD_AIX,
 
     /// G8RC = ADDIS_TLSLD_HA %x2, Symbol - For the local-dynamic TLS
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 98d34a3000d473..416cae1754a328 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -213,7 +213,7 @@ def PPCaddTls     : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>;
 def PPCaddisTlsgdHA : SDNode<"PPCISD::ADDIS_TLSGD_HA", SDTIntBinOp>;
 def PPCaddiTlsgdL   : SDNode<"PPCISD::ADDI_TLSGD_L", SDTIntBinOp>;
 def PPCgetTlsAddr   : SDNode<"PPCISD::GET_TLS_ADDR", SDTIntBinOp>;
-def PPCgetTlsMod   : SDNode<"PPCISD::GET_TLS_MOD", SDTIntUnaryOp>;
+def PPCgetTlsMod   : SDNode<"PPCISD::GET_TLS_MOD_AIX", SDTIntUnaryOp>;
 def PPCgetTpointer : SDNode<"PPCISD::GET_TPOINTER", SDTIntLeaf, []>;
 def PPCaddiTlsgdLAddr : SDNode<"PPCISD::ADDI_TLSGD_L_ADDR",
                                SDTypeProfile<1, 3, [
diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 32a75a32b9d87f..339c8d4ad383e7 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -160,8 +160,10 @@ namespace {
         if (IsAIX) {
           if (MI.getOpcode() == PPC::TLSLDAIX8 ||
               MI.getOpcode() == PPC::TLSLDAIX) {
-            // For Local-Dynamic
-            auto &Subtarget = MBB.getParent()->getSubtarget<PPCSubtarget>();
+            // For Local-Dynamic, the module handle is copied in r3. The copy is
+            // followed by GETtlsMOD32AIX/GETtlsMOD64AIX.
+            const PPCSubtarget &Subtarget =
+                MBB.getParent()->getSubtarget<PPCSubtarget>();
             bool IsLargeModel =
                 Subtarget.getTargetMachine().getCodeModel() == CodeModel::Large;
             Register ModuleHandleHReg;
@@ -190,9 +192,9 @@ namespace {
                 .addReg(GPR3)
                 .addReg(MI.getOperand(1).getReg());
           } else {
-            // For Global-Dynamic
-            // The variable offset and region handle are copied in r4 and r3.
-            // The copies are followed by GETtlsADDR32AIX/GETtlsADDR64AIX.
+            // For Global-Dynamic, the variable offset and region handle are
+            // copied in r4 and r3. The copies are followed by
+            // GETtlsADDR32AIX/GETtlsADDR64AIX.
             if (!IsTLSTPRelMI) {
               BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR4)
                   .addReg(MI.getOperand(1).getReg());

>From c3aff48f88f3fa819a75ccc75ed265f95239927e Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Mon, 18 Sep 2023 04:03:32 -0400
Subject: [PATCH 03/31] [NFC] address comments.

---
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp     | 9 +++++----
 llvm/lib/Target/PowerPC/PPCISelLowering.h     | 8 ++++----
 llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp | 2 +-
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 5f40c0b46368d5..d7226485addfb5 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -685,10 +685,11 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
     // in R3, need to generate branch to .__tls_get_mod.
     Register VarOffsetReg = Subtarget->isPPC64() ? PPC::X4 : PPC::R4;
     (void)VarOffsetReg;
-    assert((MI->getNumExplicitOperands() < 3 ||
-            (MI->getOperand(2).isReg() &&
-             MI->getOperand(2).getReg() == VarOffsetReg)) &&
-           "GETtls[ld]ADDR[32] must read GPR4");
+    assert(MI->getOpcode() == PPC::GETtlsMOD32AIX ||
+           MI->getOpcode() == PPC::GETtlsMOD64AIX ||
+           (MI->getOperand(2).isReg() &&
+            MI->getOperand(2).getReg() == VarOffsetReg) &&
+               "GETtls[ld]ADDR[32] must read GPR4");
     EmitAIXTlsCallHelper(MI);
     return;
   }
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 573de7816e2420..d2e207a04908b1 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -370,20 +370,20 @@ namespace llvm {
     /// G8RC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY
     /// Op that combines two register copies of TOC entries
     /// (region handle into R3 and variable offset into R4) followed by a
-    /// GET_TLS_ADDR node which will be expanded to a call to __tls_get_addr.
+    /// GET_TLS_ADDR node which will be expanded to a call to .__tls_get_addr.
     /// This node is used in 64-bit mode as well (in which case the result is
     /// G8RC and inputs are X3/X4).
     TLSGD_AIX,
 
     /// %x3 = GET_TLS_MOD_AIX _$TLSML - For the AIX local-dynamic TLS model,
-    /// produces a call to __tls_get_mod(_$TLSML\@ml).
+    /// produces a call to .__tls_get_mod(_$TLSML\@ml).
     GET_TLS_MOD_AIX,
 
     /// [GP|G8]RC = TLSLD_AIX, TOC_ENTRY(variable offset)
     /// Op that internally creates TOC entry for the "_$TLSML" symbol, generates
     /// GET_TLS_MOD_AIX node which will be expanded into a call to
-    /// __tls_get_mod, and then add the variable offset with the result from the
-    /// call. This node is used in both 32-bit and 64-bit modes. The only
+    /// .__tls_get_mod, and then add the variable offset with the result from
+    /// the call. This node is used in both 32-bit and 64-bit modes. The only
     /// difference is register class.
     TLSLD_AIX,
 
diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 339c8d4ad383e7..3fd57f4159d3cd 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -186,7 +186,7 @@ namespace {
                             : Register(Subtarget.getTOCPointerRegister()));
             BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR3)
                 .addReg(MHReg);
-            // The call to __tls_get_mod.
+            // The call to .__tls_get_mod.
             BuildMI(MBB, I, DL, TII->get(Opc2), GPR3).addReg(GPR3);
             BuildMI(MBB, I, DL, TII->get(Is64Bit ? PPC::ADD8 : PPC::ADD4), GPR3)
                 .addReg(GPR3)

>From efa671fb66363ff98972bb34200c2a121cdcb15f Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Wed, 20 Sep 2023 03:44:10 -0400
Subject: [PATCH 04/31] [NFC] address comments

---
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp   | 18 +++++++++---------
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 14 ++++++--------
 2 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index d7226485addfb5..ff1330e14a1829 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -679,17 +679,17 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
          "GETtls[ld]ADDR[32] must read GPR3");
 
   if (Subtarget->isAIXABI()) {
-    // On AIX, for TLSGD the variable offset should already be in R4 and the
-    // region handle should already be in R3, need to generate an absolute
-    // branch to .__tls_get_addr. For TLSLD the module handle should already be
-    // in R3, need to generate branch to .__tls_get_mod.
+    // For TLSGD, the variable offset should already be in R4 and the region
+    // handle should already be in R3, generate absolute branch to
+    // .__tls_get_addr. For TLSLD, the module handle should already be in R3,
+    // generate branch to .__tls_get_mod.
     Register VarOffsetReg = Subtarget->isPPC64() ? PPC::X4 : PPC::R4;
     (void)VarOffsetReg;
-    assert(MI->getOpcode() == PPC::GETtlsMOD32AIX ||
-           MI->getOpcode() == PPC::GETtlsMOD64AIX ||
-           (MI->getOperand(2).isReg() &&
-            MI->getOperand(2).getReg() == VarOffsetReg) &&
-               "GETtls[ld]ADDR[32] must read GPR4");
+    assert((MI->getOpcode() == PPC::GETtlsMOD32AIX ||
+            MI->getOpcode() == PPC::GETtlsMOD64AIX ||
+            (MI->getOperand(2).isReg() &&
+             MI->getOperand(2).getReg() == VarOffsetReg)) &&
+           "GETtls[ld]ADDR[32] must read GPR4");
     EmitAIXTlsCallHelper(MI);
     return;
   }
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 26d11456992e3f..36bb939c41d25c 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -3430,14 +3430,12 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
     return DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, VariableOffset);
   }
 
-  // The Local-Exec, Initial-Exec, Local-Dynamic, and General-Dynamic TLS models
-  // are currently supported access models. If Local- or Initial-exec or
-  // local-dynamic is not possible or specified, all GlobalTLSAddress nodes are
-  // lowered using the general-dynamic model. We need to generate two TOC
-  // entries, one for the variable offset, one for the region handle. The global
-  // address for the TOC entry of the region handle is created with the
-  // MO_TLSGDM_FLAG flag and the global address for the TOC entry of the
-  // variable offset is created with MO_TLSGD_FLAG.
+  // If Local- or Initial-exec or Local-dynamic is not possible or specified,
+  // all GlobalTLSAddress nodes are lowered using the general-dynamic model. We
+  // need to generate two TOC entries, one for the variable offset, one for the
+  // region handle. The global address for the TOC entry of the region handle is
+  // created with the MO_TLSGDM_FLAG flag and the global address for the TOC
+  // entry of the variable offset is created with MO_TLSGD_FLAG.
   SDValue VariableOffsetTGA =
       DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGD_FLAG);
   SDValue RegionHandleTGA =

>From 58de62bf07f9e06b15595142bf318abe8f011ec1 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Wed, 20 Sep 2023 23:09:46 -0400
Subject: [PATCH 05/31] [NFC] address comment

---
 llvm/lib/Target/PowerPC/PPCInstr64Bit.td | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index e4fee9c2fff23e..a3590580777f3a 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -1563,7 +1563,7 @@ def GETtlsADDR64AIX :
                     "GETtlsADDR64AIX",
                     [(set i64:$rD,
                       (PPCgetTlsAddr i64:$offset, i64:$handle))]>, isPPC64;
-// On AIX, the call to __tls_get_mod need one input in X3 for the module handle.
+// On AIX, the call to .__tls_get_mod need one input in X3 for the module handle.
 def GETtlsMOD64AIX :
   PPCEmitTimePseudo<(outs g8rc:$rD),(ins g8rc:$handle),
                     "GETtlsMOD64AIX",

>From f52e9ce52a5a9597e774124210bda2ba6c9af783 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Thu, 21 Sep 2023 23:03:58 -0400
Subject: [PATCH 06/31] Attempt to address comment: use r4 for LoadOffsetToc

However looks like machine-scheduler is interfering, and still schedule
LoadOffsetToc ahead of the .__tls_get_mod call.
---
 llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp |  40 ++++-
 .../test/CodeGen/PowerPC/aix-tls-gd-double.ll |  78 +++++----
 llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll   |  78 +++++----
 .../CodeGen/PowerPC/aix-tls-gd-longlong.ll    | 162 +++++++++---------
 .../CodeGen/PowerPC/aix-tls-local-dynamic.ll  | 128 +++++++-------
 .../PowerPC/aix-tls-xcoff-reloc-large.ll      |  49 +++---
 .../CodeGen/PowerPC/aix-tls-xcoff-reloc.ll    |  48 +++---
 7 files changed, 308 insertions(+), 275 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 3fd57f4159d3cd..bb03f8f911c491 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -160,37 +160,61 @@ namespace {
         if (IsAIX) {
           if (MI.getOpcode() == PPC::TLSLDAIX8 ||
               MI.getOpcode() == PPC::TLSLDAIX) {
-            // For Local-Dynamic, the module handle is copied in r3. The copy is
-            // followed by GETtlsMOD32AIX/GETtlsMOD64AIX.
+            // For Local-Dynamic, need to swap the position of VarOffsetInst and
+            // MI, so that VarOffsetInst can use R/X4 to reduce register
+            // pressure.
             const PPCSubtarget &Subtarget =
                 MBB.getParent()->getSubtarget<PPCSubtarget>();
             bool IsLargeModel =
                 Subtarget.getTargetMachine().getCodeModel() == CodeModel::Large;
-            Register ModuleHandleHReg;
             unsigned LDTocOp =
                 Is64Bit ? (IsLargeModel ? PPC::LDtocL : PPC::LDtoc)
                         : (IsLargeModel ? PPC::LWZtocL : PPC::LWZtoc);
+            assert(RegInfo.hasOneDef(MI.getOperand(1).getReg()) &&
+                   "TLSLDAIX expects single def of its operand.");
+            MachineInstr *VarOffsetInst =
+                RegInfo.getOneDef(MI.getOperand(1).getReg())->getParent();
+            assert(VarOffsetInst->getOpcode() == LDTocOp &&
+                   "Unexpected LDTocOp.");
+            if (IsLargeModel) {
+              // Get the ADDIS instruction when using large model.
+              assert(RegInfo.hasOneDef(VarOffsetInst->getOperand(2).getReg()) &&
+                     "LDTocOp expects single def of its operand.");
+              VarOffsetInst =
+                  RegInfo.getOneDef(VarOffsetInst->getOperand(2).getReg())
+                      ->getParent();
+              assert(VarOffsetInst->getOpcode() ==
+                         (Is64Bit ? PPC::ADDIStocHA8 : PPC::ADDIStocHA) &&
+                     "Unexpected ADDIStocHA.");
+              // FIXME: machine-scheduler could schedule ADDIStocHA ahead of
+              // GETtlsMODAIX, and still has to use extra register.
+            }
+            Register ModuleHandleHReg;
             if (IsLargeModel) {
               ModuleHandleHReg = RegInfo.createVirtualRegister(GPRNoZero);
-              BuildMI(MBB, I, DL,
+              BuildMI(MBB, *VarOffsetInst, DL,
                       TII->get(Is64Bit ? PPC::ADDIStocHA8 : PPC::ADDIStocHA),
                       ModuleHandleHReg)
                   .addReg(Subtarget.getTOCPointerRegister())
                   .addExternalSymbol("_$TLSML[TC]", PPCII::MO_TLSLD_FLAG);
             }
             Register MHReg = RegInfo.createVirtualRegister(GPRNoZero);
-            BuildMI(MBB, I, DL, TII->get(LDTocOp), MHReg)
+            BuildMI(MBB, *VarOffsetInst, DL, TII->get(LDTocOp), MHReg)
                 .addExternalSymbol("_$TLSML[TC]", PPCII::MO_TLSLD_FLAG)
                 .addReg(IsLargeModel
                             ? ModuleHandleHReg
                             : Register(Subtarget.getTOCPointerRegister()));
-            BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR3)
+            // The module handle is copied in r3.
+            BuildMI(MBB, *VarOffsetInst, DL, TII->get(TargetOpcode::COPY), GPR3)
                 .addReg(MHReg);
             // The call to .__tls_get_mod.
-            BuildMI(MBB, I, DL, TII->get(Opc2), GPR3).addReg(GPR3);
+            BuildMI(MBB, *VarOffsetInst, DL, TII->get(Opc2), GPR3).addReg(GPR3);
+            // Copy VarOffset to R/X4
+            BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR4)
+                .addReg(MI.getOperand(1).getReg());
             BuildMI(MBB, I, DL, TII->get(Is64Bit ? PPC::ADD8 : PPC::ADD4), GPR3)
                 .addReg(GPR3)
-                .addReg(MI.getOperand(1).getReg());
+                .addReg(GPR4);
           } else {
             // For Global-Dynamic, the variable offset and region handle are
             // copied in r4 and r3. The copies are followed by
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll
index 13bef83cf50fd7..ea1a2b8fe9ade7 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll
@@ -157,10 +157,10 @@ define void @storesTIInit(double %Val) #0 {
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    lwz 6, L..C4(2) # target-flags(ppc-tlsld) @TIInit
-; SMALL32-NEXT:    lwz 3, L..C5(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
+; SMALL32-NEXT:    lwz 3, L..C4(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
-; SMALL32-NEXT:    add 3, 3, 6
+; SMALL32-NEXT:    lwz 4, L..C5(2) # target-flags(ppc-tlsld) @TIInit
+; SMALL32-NEXT:    add 3, 3, 4
 ; SMALL32-NEXT:    stfd 1, 0(3)
 ; SMALL32-NEXT:    addi 1, 1, 32
 ; SMALL32-NEXT:    lwz 0, 8(1)
@@ -173,11 +173,12 @@ define void @storesTIInit(double %Val) #0 {
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    lwz 6, L..C4 at l(3)
-; LARGE32-NEXT:    addis 3, L..C5 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C5 at l(3)
+; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
+; LARGE32-NEXT:    addis 4, L..C5 at u(2)
+; LARGE32-NEXT:    lwz 6, L..C5 at l(4)
 ; LARGE32-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-NEXT:    add 3, 3, 6
+; LARGE32-NEXT:    mr 4, 6
+; LARGE32-NEXT:    add 3, 3, 4
 ; LARGE32-NEXT:    stfd 1, 0(3)
 ; LARGE32-NEXT:    addi 1, 1, 32
 ; LARGE32-NEXT:    lwz 0, 8(1)
@@ -189,10 +190,10 @@ define void @storesTIInit(double %Val) #0 {
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    ld 6, L..C4(2) # target-flags(ppc-tlsld) @TIInit
-; SMALL64-NEXT:    ld 3, L..C5(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
+; SMALL64-NEXT:    ld 3, L..C4(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL64-NEXT:    add 3, 3, 6
+; SMALL64-NEXT:    ld 4, L..C5(2) # target-flags(ppc-tlsld) @TIInit
+; SMALL64-NEXT:    add 3, 3, 4
 ; SMALL64-NEXT:    stfd 1, 0(3)
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -203,13 +204,13 @@ define void @storesTIInit(double %Val) #0 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
-; LARGE64-NEXT:    addis 3, L..C4 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 6, L..C4 at l(3)
-; LARGE64-NEXT:    addis 3, L..C5 at u(2)
-; LARGE64-NEXT:    ld 3, L..C5 at l(3)
+; LARGE64-NEXT:    addis 3, L..C4 at u(2)
+; LARGE64-NEXT:    addis 6, L..C5 at u(2)
+; LARGE64-NEXT:    ld 3, L..C4 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
-; LARGE64-NEXT:    add 3, 3, 6
+; LARGE64-NEXT:    ld 4, L..C5 at l(6)
+; LARGE64-NEXT:    add 3, 3, 4
 ; LARGE64-NEXT:    stfd 1, 0(3)
 ; LARGE64-NEXT:    addi 1, 1, 48
 ; LARGE64-NEXT:    ld 0, 16(1)
@@ -457,11 +458,11 @@ define double @loadsTIInit() #1 {
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    lwz 6, L..C4(2) # target-flags(ppc-tlsld) @TIInit
-; SMALL32-NEXT:    lwz 3, L..C5(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
+; SMALL32-NEXT:    lwz 3, L..C4(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
+; SMALL32-NEXT:    lwz 4, L..C5(2) # target-flags(ppc-tlsld) @TIInit
+; SMALL32-NEXT:    add 3, 3, 4
 ; SMALL32-NEXT:    lwz 4, L..C8(2) # @GInit
-; SMALL32-NEXT:    add 3, 3, 6
 ; SMALL32-NEXT:    lfd 0, 0(3)
 ; SMALL32-NEXT:    lfd 1, 0(4)
 ; SMALL32-NEXT:    fadd 1, 0, 1
@@ -476,11 +477,12 @@ define double @loadsTIInit() #1 {
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    lwz 6, L..C4 at l(3)
-; LARGE32-NEXT:    addis 3, L..C5 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C5 at l(3)
+; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
+; LARGE32-NEXT:    addis 4, L..C5 at u(2)
+; LARGE32-NEXT:    lwz 6, L..C5 at l(4)
 ; LARGE32-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-NEXT:    add 3, 3, 6
+; LARGE32-NEXT:    mr 4, 6
+; LARGE32-NEXT:    add 3, 3, 4
 ; LARGE32-NEXT:    lfd 0, 0(3)
 ; LARGE32-NEXT:    addis 3, L..C8 at u(2)
 ; LARGE32-NEXT:    lwz 3, L..C8 at l(3)
@@ -496,11 +498,11 @@ define double @loadsTIInit() #1 {
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    ld 6, L..C4(2) # target-flags(ppc-tlsld) @TIInit
-; SMALL64-NEXT:    ld 3, L..C5(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
+; SMALL64-NEXT:    ld 3, L..C4(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL64-NEXT:    ld 4, L..C5(2) # target-flags(ppc-tlsld) @TIInit
+; SMALL64-NEXT:    add 3, 3, 4
 ; SMALL64-NEXT:    ld 4, L..C8(2) # @GInit
-; SMALL64-NEXT:    add 3, 3, 6
 ; SMALL64-NEXT:    lfd 0, 0(3)
 ; SMALL64-NEXT:    lfd 1, 0(4)
 ; SMALL64-NEXT:    fadd 1, 0, 1
@@ -513,13 +515,13 @@ define double @loadsTIInit() #1 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
-; LARGE64-NEXT:    addis 3, L..C4 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 6, L..C4 at l(3)
-; LARGE64-NEXT:    addis 3, L..C5 at u(2)
-; LARGE64-NEXT:    ld 3, L..C5 at l(3)
+; LARGE64-NEXT:    addis 3, L..C4 at u(2)
+; LARGE64-NEXT:    addis 6, L..C5 at u(2)
+; LARGE64-NEXT:    ld 3, L..C4 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
-; LARGE64-NEXT:    add 3, 3, 6
+; LARGE64-NEXT:    ld 4, L..C5 at l(6)
+; LARGE64-NEXT:    add 3, 3, 4
 ; LARGE64-NEXT:    addis 4, L..C8 at u(2)
 ; LARGE64-NEXT:    lfd 0, 0(3)
 ; LARGE64-NEXT:    ld 3, L..C8 at l(4)
@@ -641,10 +643,10 @@ entry:
 ; SMALL32-LABEL:  L..C3:
 ; SMALL32-NEXT:   .tc TGInit[TC],TGInit[TL]@gd
 ; SMALL32-LABEL:  L..C4:
-; SMALL32-NEXT:   .tc TIInit[TC],TIInit[TL]@ld
-; SMALL32-LABEL:  L..C5:
 ; SMALL32-NEXT:   .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; SMALL32-NEXT:   .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; SMALL32-LABEL:  L..C5:
+; SMALL32-NEXT:   .tc TIInit[TC],TIInit[TL]@ld
 ; SMALL32-LABEL:  L..C6:
 ; SMALL32-NEXT:   .tc .TWInit[TC],TWInit[TL]@m
 ; SMALL32-LABEL:  L..C7:
@@ -662,10 +664,10 @@ entry:
 ; LARGE32-LABEL:  L..C3:
 ; LARGE32-NEXT:   .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE32-LABEL:  L..C4:
-; LARGE32-NEXT:   .tc TIInit[TE],TIInit[TL]@ld
-; LARGE32-LABEL:  L..C5:
 ; LARGE32-NEXT:   .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; LARGE32-NEXT:   .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; LARGE32-LABEL:  L..C5:
+; LARGE32-NEXT:   .tc TIInit[TE],TIInit[TL]@ld
 ; LARGE32-LABEL:  L..C6:
 ; LARGE32-NEXT:   .tc .TWInit[TE],TWInit[TL]@m
 ; LARGE32-LABEL:  L..C7:
@@ -683,10 +685,10 @@ entry:
 ; SMALL64-LABEL:  L..C3:
 ; SMALL64-NEXT:  .tc TGInit[TC],TGInit[TL]@gd
 ; SMALL64-LABEL:  L..C4:
-; SMALL64-NEXT:  .tc TIInit[TC],TIInit[TL]@ld
-; SMALL64-LABEL:  L..C5:
 ; SMALL64-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; SMALL64-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; SMALL64-LABEL:  L..C5:
+; SMALL64-NEXT:  .tc TIInit[TC],TIInit[TL]@ld
 ; SMALL64-LABEL:  L..C6:
 ; SMALL64-NEXT:  .tc .TWInit[TC],TWInit[TL]@m
 ; SMALL64-LABEL:  L..C7:
@@ -704,10 +706,10 @@ entry:
 ; LARGE64-LABEL:  L..C3:
 ; LARGE64-NEXT:  .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE64-LABEL:  L..C4:
-; LARGE64-NEXT:  .tc TIInit[TE],TIInit[TL]@ld
-; LARGE64-LABEL:  L..C5:
 ; LARGE64-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; LARGE64-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; LARGE64-LABEL:  L..C5:
+; LARGE64-NEXT:  .tc TIInit[TE],TIInit[TL]@ld
 ; LARGE64-LABEL:  L..C6:
 ; LARGE64-NEXT:  .tc .TWInit[TE],TWInit[TL]@m
 ; LARGE64-LABEL:  L..C7:
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll
index dc75db43d38c70..e1cfb6fd767cbf 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll
@@ -165,10 +165,10 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; SMALL32-NEXT:    stwu 1, -32(1)
 ; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    mr 6, 3
-; SMALL32-NEXT:    lwz 7, L..C4(2)
-; SMALL32-NEXT:    lwz 3, L..C5(2)
+; SMALL32-NEXT:    lwz 3, L..C4(2)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
-; SMALL32-NEXT:    add 3, 3, 7
+; SMALL32-NEXT:    lwz 4, L..C5(2)
+; SMALL32-NEXT:    add 3, 3, 4
 ; SMALL32-NEXT:    stw 6, 0(3)
 ; SMALL32-NEXT:    addi 1, 1, 32
 ; SMALL32-NEXT:    lwz 0, 8(1)
@@ -182,11 +182,12 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    mr 6, 3
 ; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    lwz 7, L..C4 at l(3)
-; LARGE32-NEXT:    addis 3, L..C5 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C5 at l(3)
+; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
+; LARGE32-NEXT:    addis 4, L..C5 at u(2)
+; LARGE32-NEXT:    lwz 7, L..C5 at l(4)
 ; LARGE32-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-NEXT:    add 3, 3, 7
+; LARGE32-NEXT:    mr 4, 7
+; LARGE32-NEXT:    add 3, 3, 4
 ; LARGE32-NEXT:    stw 6, 0(3)
 ; LARGE32-NEXT:    addi 1, 1, 32
 ; LARGE32-NEXT:    lwz 0, 8(1)
@@ -199,10 +200,10 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; SMALL64-NEXT:    stdu 1, -48(1)
 ; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    mr 6, 3
-; SMALL64-NEXT:    ld 7, L..C4(2)
-; SMALL64-NEXT:    ld 3, L..C5(2)
+; SMALL64-NEXT:    ld 3, L..C4(2)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL64-NEXT:    add 3, 3, 7
+; SMALL64-NEXT:    ld 4, L..C5(2)
+; SMALL64-NEXT:    add 3, 3, 4
 ; SMALL64-NEXT:    stw 6, 0(3)
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -213,14 +214,14 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    mr 6, 3
 ; LARGE64-NEXT:    addis 3, L..C4 at u(2)
-; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 7, L..C4 at l(3)
-; LARGE64-NEXT:    addis 3, L..C5 at u(2)
-; LARGE64-NEXT:    ld 3, L..C5 at l(3)
+; LARGE64-NEXT:    addis 7, L..C5 at u(2)
+; LARGE64-NEXT:    ld 3, L..C4 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
-; LARGE64-NEXT:    add 3, 3, 7
+; LARGE64-NEXT:    ld 4, L..C5 at l(7)
+; LARGE64-NEXT:    add 3, 3, 4
 ; LARGE64-NEXT:    stw 6, 0(3)
 ; LARGE64-NEXT:    addi 1, 1, 48
 ; LARGE64-NEXT:    ld 0, 16(1)
@@ -472,11 +473,11 @@ define i32 @loadsTIUninit() #1 {
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    lwz 6, L..C4(2)
-; SMALL32-NEXT:    lwz 3, L..C5(2)
+; SMALL32-NEXT:    lwz 3, L..C4(2)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
+; SMALL32-NEXT:    lwz 4, L..C5(2)
+; SMALL32-NEXT:    add 3, 3, 4
 ; SMALL32-NEXT:    lwz 4, L..C8(2)
-; SMALL32-NEXT:    add 3, 3, 6
 ; SMALL32-NEXT:    lwz 3, 0(3)
 ; SMALL32-NEXT:    lwz 4, 0(4)
 ; SMALL32-NEXT:    add 3, 4, 3
@@ -491,11 +492,12 @@ define i32 @loadsTIUninit() #1 {
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    lwz 6, L..C4 at l(3)
-; LARGE32-NEXT:    addis 3, L..C5 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C5 at l(3)
+; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
+; LARGE32-NEXT:    addis 4, L..C5 at u(2)
+; LARGE32-NEXT:    lwz 6, L..C5 at l(4)
 ; LARGE32-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-NEXT:    add 3, 3, 6
+; LARGE32-NEXT:    mr 4, 6
+; LARGE32-NEXT:    add 3, 3, 4
 ; LARGE32-NEXT:    lwz 3, 0(3)
 ; LARGE32-NEXT:    addis 4, L..C8 at u(2)
 ; LARGE32-NEXT:    lwz 4, L..C8 at l(4)
@@ -511,11 +513,11 @@ define i32 @loadsTIUninit() #1 {
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    ld 6, L..C4(2)
-; SMALL64-NEXT:    ld 3, L..C5(2)
+; SMALL64-NEXT:    ld 3, L..C4(2)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL64-NEXT:    ld 4, L..C5(2)
+; SMALL64-NEXT:    add 3, 3, 4
 ; SMALL64-NEXT:    ld 4, L..C8(2)
-; SMALL64-NEXT:    add 3, 3, 6
 ; SMALL64-NEXT:    lwz 3, 0(3)
 ; SMALL64-NEXT:    lwz 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
@@ -528,14 +530,14 @@ define i32 @loadsTIUninit() #1 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
-; LARGE64-NEXT:    addis 3, L..C4 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 6, L..C4 at l(3)
-; LARGE64-NEXT:    addis 3, L..C5 at u(2)
-; LARGE64-NEXT:    ld 3, L..C5 at l(3)
+; LARGE64-NEXT:    addis 3, L..C4 at u(2)
+; LARGE64-NEXT:    addis 6, L..C5 at u(2)
+; LARGE64-NEXT:    ld 3, L..C4 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-NEXT:    ld 4, L..C5 at l(6)
+; LARGE64-NEXT:    add 3, 3, 4
 ; LARGE64-NEXT:    addis 4, L..C8 at u(2)
-; LARGE64-NEXT:    add 3, 3, 6
 ; LARGE64-NEXT:    ld 4, L..C8 at l(4)
 ; LARGE64-NEXT:    lwz 3, 0(3)
 ; LARGE64-NEXT:    lwz 4, 0(4)
@@ -656,10 +658,10 @@ entry:
 ; SMALL32-LABEL: L..C3:
 ; SMALL32-NEXT:	 .tc TGInit[TC],TGInit[TL]@gd
 ; SMALL32-LABEL: L..C4:
-; SMALL32-NEXT:	 .tc TIUninit[TC],TIUninit[UL]@ld
-; SMALL32-LABEL: L..C5:
 ; SMALL32-NEXT:	 .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; SMALL32-NEXT:	 .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; SMALL32-LABEL: L..C5:
+; SMALL32-NEXT:	 .tc TIUninit[TC],TIUninit[UL]@ld
 ; SMALL32-LABEL: L..C6:
 ; SMALL32-NEXT:	 .tc .TWUninit[TC],TWUninit[TL]@m
 ; SMALL32-LABEL: L..C7:
@@ -677,10 +679,10 @@ entry:
 ; LARGE32-LABEL: L..C3:
 ; LARGE32-NEXT:  .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE32-LABEL: L..C4:
-; LARGE32-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
-; LARGE32-LABEL: L..C5:
 ; LARGE32-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; LARGE32-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; LARGE32-LABEL: L..C5:
+; LARGE32-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
 ; LARGE32-LABEL: L..C6:
 ; LARGE32-NEXT:  .tc .TWUninit[TE],TWUninit[TL]@m
 ; LARGE32-LABEL: L..C7:
@@ -698,10 +700,10 @@ entry:
 ; SMALL64-LABEL:  L..C3:
 ; SMALL64-NEXT:   .tc TGInit[TC],TGInit[TL]@gd
 ; SMALL64-LABEL:  L..C4:
-; SMALL64-NEXT:   .tc TIUninit[TC],TIUninit[UL]@ld
-; SMALL64-LABEL:  L..C5:
 ; SMALL64-NEXT:   .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; SMALL64-NEXT:   .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; SMALL64-LABEL:  L..C5:
+; SMALL64-NEXT:   .tc TIUninit[TC],TIUninit[UL]@ld
 ; SMALL64-LABEL:  L..C6:
 ; SMALL64-NEXT:   .tc .TWUninit[TC],TWUninit[TL]@m
 ; SMALL64-LABEL:  L..C7:
@@ -719,10 +721,10 @@ entry:
 ; LARGE64-LABEL:  L..C3:
 ; LARGE64-NEXT:  .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE64-LABEL:  L..C4:
-; LARGE64-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
-; LARGE64-LABEL:  L..C5:
 ; LARGE64-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; LARGE64-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; LARGE64-LABEL:  L..C5:
+; LARGE64-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
 ; LARGE64-LABEL:  L..C6:
 ; LARGE64-NEXT:  .tc .TWUninit[TE],TWUninit[TL]@m
 ; LARGE64-LABEL:  L..C7:
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll
index d19b5ad9b5e50a..2e18d6c9bfd7e5 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll
@@ -99,11 +99,11 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; SMALL32-NEXT:    stwu 1, -32(1)
 ; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    mr 7, 3
-; SMALL32-NEXT:    lwz 8, L..C2(2)
 ; SMALL32-NEXT:    mr 6, 4
-; SMALL32-NEXT:    lwz 3, L..C3(2)
+; SMALL32-NEXT:    lwz 3, L..C2(2)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
-; SMALL32-NEXT:    add 3, 3, 8
+; SMALL32-NEXT:    lwz 4, L..C3(2)
+; SMALL32-NEXT:    add 3, 3, 4
 ; SMALL32-NEXT:    stw 6, 4(3)
 ; SMALL32-NEXT:    stw 7, 0(3)
 ; SMALL32-NEXT:    addi 1, 1, 32
@@ -119,11 +119,12 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; LARGE32-NEXT:    mr 7, 3
 ; LARGE32-NEXT:    mr 6, 4
 ; LARGE32-NEXT:    addis 3, L..C2 at u(2)
-; LARGE32-NEXT:    lwz 8, L..C2 at l(3)
-; LARGE32-NEXT:    addis 3, L..C3 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C3 at l(3)
+; LARGE32-NEXT:    lwz 3, L..C2 at l(3)
+; LARGE32-NEXT:    addis 4, L..C3 at u(2)
+; LARGE32-NEXT:    lwz 8, L..C3 at l(4)
 ; LARGE32-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-NEXT:    add 3, 3, 8
+; LARGE32-NEXT:    mr 4, 8
+; LARGE32-NEXT:    add 3, 3, 4
 ; LARGE32-NEXT:    stw 6, 4(3)
 ; LARGE32-NEXT:    stw 7, 0(3)
 ; LARGE32-NEXT:    addi 1, 1, 32
@@ -137,10 +138,10 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; SMALL64-NEXT:    stdu 1, -48(1)
 ; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    mr 6, 3
-; SMALL64-NEXT:    ld 7, L..C2(2)
-; SMALL64-NEXT:    ld 3, L..C3(2)
+; SMALL64-NEXT:    ld 3, L..C2(2)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL64-NEXT:    add 3, 3, 7
+; SMALL64-NEXT:    ld 4, L..C3(2)
+; SMALL64-NEXT:    add 3, 3, 4
 ; SMALL64-NEXT:    std 6, 0(3)
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -151,14 +152,14 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    mr 6, 3
 ; LARGE64-NEXT:    addis 3, L..C2 at u(2)
-; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 7, L..C2 at l(3)
-; LARGE64-NEXT:    addis 3, L..C3 at u(2)
-; LARGE64-NEXT:    ld 3, L..C3 at l(3)
+; LARGE64-NEXT:    addis 7, L..C3 at u(2)
+; LARGE64-NEXT:    ld 3, L..C2 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
-; LARGE64-NEXT:    add 3, 3, 7
+; LARGE64-NEXT:    ld 4, L..C3 at l(7)
+; LARGE64-NEXT:    add 3, 3, 4
 ; LARGE64-NEXT:    std 6, 0(3)
 ; LARGE64-NEXT:    addi 1, 1, 48
 ; LARGE64-NEXT:    ld 0, 16(1)
@@ -177,11 +178,11 @@ define void @storesTIInit(i64 %Val) #0 {
 ; SMALL32-NEXT:    stwu 1, -32(1)
 ; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    mr 7, 3
-; SMALL32-NEXT:    lwz 8, L..C4(2)
 ; SMALL32-NEXT:    mr 6, 4
-; SMALL32-NEXT:    lwz 3, L..C3(2)
+; SMALL32-NEXT:    lwz 3, L..C2(2)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
-; SMALL32-NEXT:    add 3, 3, 8
+; SMALL32-NEXT:    lwz 4, L..C4(2)
+; SMALL32-NEXT:    add 3, 3, 4
 ; SMALL32-NEXT:    stw 6, 4(3)
 ; SMALL32-NEXT:    stw 7, 0(3)
 ; SMALL32-NEXT:    addi 1, 1, 32
@@ -196,12 +197,13 @@ define void @storesTIInit(i64 %Val) #0 {
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    mr 7, 3
 ; LARGE32-NEXT:    mr 6, 4
-; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    lwz 8, L..C4 at l(3)
-; LARGE32-NEXT:    addis 3, L..C3 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C3 at l(3)
+; LARGE32-NEXT:    addis 3, L..C2 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C2 at l(3)
+; LARGE32-NEXT:    addis 4, L..C4 at u(2)
+; LARGE32-NEXT:    lwz 8, L..C4 at l(4)
 ; LARGE32-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-NEXT:    add 3, 3, 8
+; LARGE32-NEXT:    mr 4, 8
+; LARGE32-NEXT:    add 3, 3, 4
 ; LARGE32-NEXT:    stw 6, 4(3)
 ; LARGE32-NEXT:    stw 7, 0(3)
 ; LARGE32-NEXT:    addi 1, 1, 32
@@ -215,10 +217,10 @@ define void @storesTIInit(i64 %Val) #0 {
 ; SMALL64-NEXT:    stdu 1, -48(1)
 ; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    mr 6, 3
-; SMALL64-NEXT:    ld 7, L..C4(2)
-; SMALL64-NEXT:    ld 3, L..C3(2)
+; SMALL64-NEXT:    ld 3, L..C2(2)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL64-NEXT:    add 3, 3, 7
+; SMALL64-NEXT:    ld 4, L..C4(2)
+; SMALL64-NEXT:    add 3, 3, 4
 ; SMALL64-NEXT:    std 6, 0(3)
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -229,14 +231,14 @@ define void @storesTIInit(i64 %Val) #0 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
-; LARGE64-NEXT:    mr 6, 3
-; LARGE64-NEXT:    addis 3, L..C4 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 7, L..C4 at l(3)
-; LARGE64-NEXT:    addis 3, L..C3 at u(2)
-; LARGE64-NEXT:    ld 3, L..C3 at l(3)
+; LARGE64-NEXT:    mr 6, 3
+; LARGE64-NEXT:    addis 3, L..C2 at u(2)
+; LARGE64-NEXT:    addis 7, L..C4 at u(2)
+; LARGE64-NEXT:    ld 3, L..C2 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
-; LARGE64-NEXT:    add 3, 3, 7
+; LARGE64-NEXT:    ld 4, L..C4 at l(7)
+; LARGE64-NEXT:    add 3, 3, 4
 ; LARGE64-NEXT:    std 6, 0(3)
 ; LARGE64-NEXT:    addi 1, 1, 48
 ; LARGE64-NEXT:    ld 0, 16(1)
@@ -331,7 +333,7 @@ define i64 @loadsTGInit() #1 {
 ; SMALL32-NEXT:    lwz 4, L..C1(2)
 ; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_addr[PR]
-; SMALL32-NEXT:    lwz 4, L..C7(2)
+; SMALL32-NEXT:    lwz 4, L..C7(2) # @GInit
 ; SMALL32-NEXT:    lwz 5, 4(3)
 ; SMALL32-NEXT:    lwz 6, 4(4)
 ; SMALL32-NEXT:    lwz 3, 0(3)
@@ -374,7 +376,7 @@ define i64 @loadsTGInit() #1 {
 ; SMALL64-NEXT:    ld 4, L..C1(2)
 ; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_addr[PR]
-; SMALL64-NEXT:    ld 4, L..C7(2)
+; SMALL64-NEXT:    ld 4, L..C7(2) # @GInit
 ; SMALL64-NEXT:    ld 3, 0(3)
 ; SMALL64-NEXT:    ld 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
@@ -416,11 +418,11 @@ define i64 @loadsTIUninit() #1 {
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    lwz 6, L..C2(2)
-; SMALL32-NEXT:    lwz 3, L..C3(2)
+; SMALL32-NEXT:    lwz 3, L..C2(2)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
-; SMALL32-NEXT:    lwz 4, L..C7(2)
-; SMALL32-NEXT:    add 3, 3, 6
+; SMALL32-NEXT:    lwz 4, L..C3(2)
+; SMALL32-NEXT:    add 3, 3, 4
+; SMALL32-NEXT:    lwz 4, L..C7(2) # @GInit
 ; SMALL32-NEXT:    lwz 5, 4(3)
 ; SMALL32-NEXT:    lwz 6, 4(4)
 ; SMALL32-NEXT:    lwz 3, 0(3)
@@ -438,11 +440,12 @@ define i64 @loadsTIUninit() #1 {
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    addis 3, L..C2 at u(2)
-; LARGE32-NEXT:    lwz 6, L..C2 at l(3)
-; LARGE32-NEXT:    addis 3, L..C3 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C3 at l(3)
+; LARGE32-NEXT:    lwz 3, L..C2 at l(3)
+; LARGE32-NEXT:    addis 4, L..C3 at u(2)
+; LARGE32-NEXT:    lwz 6, L..C3 at l(4)
 ; LARGE32-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-NEXT:    add 3, 3, 6
+; LARGE32-NEXT:    mr 4, 6
+; LARGE32-NEXT:    add 3, 3, 4
 ; LARGE32-NEXT:    lwz 4, 4(3)
 ; LARGE32-NEXT:    lwz 3, 0(3)
 ; LARGE32-NEXT:    addis 5, L..C7 at u(2)
@@ -461,11 +464,11 @@ define i64 @loadsTIUninit() #1 {
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    ld 6, L..C2(2)
-; SMALL64-NEXT:    ld 3, L..C3(2)
+; SMALL64-NEXT:    ld 3, L..C2(2)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL64-NEXT:    ld 4, L..C7(2)
-; SMALL64-NEXT:    add 3, 3, 6
+; SMALL64-NEXT:    ld 4, L..C3(2)
+; SMALL64-NEXT:    add 3, 3, 4
+; SMALL64-NEXT:    ld 4, L..C7(2) # @GInit
 ; SMALL64-NEXT:    ld 3, 0(3)
 ; SMALL64-NEXT:    ld 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
@@ -478,14 +481,14 @@ define i64 @loadsTIUninit() #1 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
-; LARGE64-NEXT:    addis 3, L..C2 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 6, L..C2 at l(3)
-; LARGE64-NEXT:    addis 3, L..C3 at u(2)
-; LARGE64-NEXT:    ld 3, L..C3 at l(3)
+; LARGE64-NEXT:    addis 3, L..C2 at u(2)
+; LARGE64-NEXT:    addis 6, L..C3 at u(2)
+; LARGE64-NEXT:    ld 3, L..C2 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-NEXT:    ld 4, L..C3 at l(6)
+; LARGE64-NEXT:    add 3, 3, 4
 ; LARGE64-NEXT:    addis 4, L..C7 at u(2)
-; LARGE64-NEXT:    add 3, 3, 6
 ; LARGE64-NEXT:    ld 4, L..C7 at l(4)
 ; LARGE64-NEXT:    ld 3, 0(3)
 ; LARGE64-NEXT:    ld 4, 0(4)
@@ -508,11 +511,11 @@ define i64 @loadsTIInit() #1 {
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    lwz 6, L..C4(2)
-; SMALL32-NEXT:    lwz 3, L..C3(2)
+; SMALL32-NEXT:    lwz 3, L..C2(2)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
-; SMALL32-NEXT:    lwz 4, L..C7(2)
-; SMALL32-NEXT:    add 3, 3, 6
+; SMALL32-NEXT:    lwz 4, L..C4(2)
+; SMALL32-NEXT:    add 3, 3, 4
+; SMALL32-NEXT:    lwz 4, L..C7(2) # @GInit
 ; SMALL32-NEXT:    lwz 5, 4(3)
 ; SMALL32-NEXT:    lwz 6, 4(4)
 ; SMALL32-NEXT:    lwz 3, 0(3)
@@ -529,12 +532,13 @@ define i64 @loadsTIInit() #1 {
 ; LARGE32-NEXT:    mflr 0
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
-; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    lwz 6, L..C4 at l(3)
-; LARGE32-NEXT:    addis 3, L..C3 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C3 at l(3)
+; LARGE32-NEXT:    addis 3, L..C2 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C2 at l(3)
+; LARGE32-NEXT:    addis 4, L..C4 at u(2)
+; LARGE32-NEXT:    lwz 6, L..C4 at l(4)
 ; LARGE32-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-NEXT:    add 3, 3, 6
+; LARGE32-NEXT:    mr 4, 6
+; LARGE32-NEXT:    add 3, 3, 4
 ; LARGE32-NEXT:    lwz 4, 4(3)
 ; LARGE32-NEXT:    lwz 3, 0(3)
 ; LARGE32-NEXT:    addis 5, L..C7 at u(2)
@@ -553,11 +557,11 @@ define i64 @loadsTIInit() #1 {
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    ld 6, L..C4(2)
-; SMALL64-NEXT:    ld 3, L..C3(2)
+; SMALL64-NEXT:    ld 3, L..C2(2)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL64-NEXT:    ld 4, L..C7(2)
-; SMALL64-NEXT:    add 3, 3, 6
+; SMALL64-NEXT:    ld 4, L..C4(2)
+; SMALL64-NEXT:    add 3, 3, 4
+; SMALL64-NEXT:    ld 4, L..C7(2) # @GInit
 ; SMALL64-NEXT:    ld 3, 0(3)
 ; SMALL64-NEXT:    ld 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
@@ -570,14 +574,14 @@ define i64 @loadsTIInit() #1 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
-; LARGE64-NEXT:    addis 3, L..C4 at u(2)
 ; LARGE64-NEXT:    std 0, 64(1)
-; LARGE64-NEXT:    ld 6, L..C4 at l(3)
-; LARGE64-NEXT:    addis 3, L..C3 at u(2)
-; LARGE64-NEXT:    ld 3, L..C3 at l(3)
+; LARGE64-NEXT:    addis 3, L..C2 at u(2)
+; LARGE64-NEXT:    addis 6, L..C4 at u(2)
+; LARGE64-NEXT:    ld 3, L..C2 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
+; LARGE64-NEXT:    ld 4, L..C4 at l(6)
+; LARGE64-NEXT:    add 3, 3, 4
 ; LARGE64-NEXT:    addis 4, L..C7 at u(2)
-; LARGE64-NEXT:    add 3, 3, 6
 ; LARGE64-NEXT:    ld 4, L..C7 at l(4)
 ; LARGE64-NEXT:    ld 3, 0(3)
 ; LARGE64-NEXT:    ld 4, 0(4)
@@ -603,7 +607,7 @@ define i64 @loadsTWInit() #1 {
 ; SMALL32-NEXT:    lwz 4, L..C6(2)
 ; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_addr[PR]
-; SMALL32-NEXT:    lwz 4, L..C7(2)
+; SMALL32-NEXT:    lwz 4, L..C7(2) # @GInit
 ; SMALL32-NEXT:    lwz 5, 4(3)
 ; SMALL32-NEXT:    lwz 6, 4(4)
 ; SMALL32-NEXT:    lwz 3, 0(3)
@@ -646,7 +650,7 @@ define i64 @loadsTWInit() #1 {
 ; SMALL64-NEXT:    ld 4, L..C6(2)
 ; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_addr[PR]
-; SMALL64-NEXT:    ld 4, L..C7(2)
+; SMALL64-NEXT:    ld 4, L..C7(2) # @GInit
 ; SMALL64-NEXT:    ld 3, 0(3)
 ; SMALL64-NEXT:    ld 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
@@ -700,10 +704,10 @@ entry:
 ; SMALL32-LABEL:  L..C1:
 ; SMALL32-NEXT:  .tc TGInit[TC],TGInit[TL]@gd
 ; SMALL32-LABEL:  L..C2:
-; SMALL32-NEXT:  .tc TIUninit[TC],TIUninit[UL]@ld
-; SMALL32-LABEL:  L..C3:
 ; SMALL32-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; SMALL32-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; SMALL32-LABEL:  L..C3:
+; SMALL32-NEXT:  .tc TIUninit[TC],TIUninit[UL]@ld
 ; SMALL32-LABEL:  L..C4:
 ; SMALL32-NEXT:  .tc TIInit[TC],TIInit[TL]@ld
 ; SMALL32-LABEL:  L..C5:
@@ -719,10 +723,10 @@ entry:
 ; LARGE32-LABEL:  L..C1:
 ; LARGE32-NEXT:  .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE32-LABEL:  L..C2:
-; LARGE32-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
-; LARGE32-LABEL:  L..C3:
 ; LARGE32-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; LARGE32-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; LARGE32-LABEL:  L..C3:
+; LARGE32-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
 ; LARGE32-LABEL:  L..C4:
 ; LARGE32-NEXT:  .tc TIInit[TE],TIInit[TL]@ld
 ; LARGE32-LABEL:  L..C5:
@@ -738,10 +742,10 @@ entry:
 ; SMALL64-LABEL:  L..C1:
 ; SMALL64-NEXT:  .tc TGInit[TC],TGInit[TL]@gd
 ; SMALL64-LABEL:  L..C2:
-; SMALL64-NEXT:  .tc TIUninit[TC],TIUninit[UL]@ld
-; SMALL64-LABEL:  L..C3:
 ; SMALL64-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; SMALL64-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; SMALL64-LABEL:  L..C3:
+; SMALL64-NEXT:  .tc TIUninit[TC],TIUninit[UL]@ld
 ; SMALL64-LABEL:  L..C4:
 ; SMALL64-NEXT:  .tc TIInit[TC],TIInit[TL]@ld
 ; SMALL64-LABEL:  L..C5:
@@ -757,10 +761,10 @@ entry:
 ; LARGE64-LABEL:  L..C1:
 ; LARGE64-NEXT:  .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE64-LABEL:  L..C2:
-; LARGE64-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
-; LARGE64-LABEL:  L..C3:
 ; LARGE64-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; LARGE64-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; LARGE64-LABEL:  L..C3:
+; LARGE64-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
 ; LARGE64-LABEL:  L..C4:
 ; LARGE64-NEXT:  .tc TIInit[TE],TIInit[TL]@ld
 ; LARGE64-LABEL:  L..C5:
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll b/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll
index 9faa99b6eb2ba7..0513f8de616201 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll
@@ -16,22 +16,22 @@
 
 define i32 @loadTGInit() {
 ; SMALL-LABEL:  loadTGInit:
-; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
-; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
 ; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL:        bla .__tls_get_mod[PR]
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
 ; SMALL:        add [[TGInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; SMALL:        lwz [[TGInitValR:[0-9]+]], 0([[TGInitAddrR]])
 ;
 ; LARGE-LABEL:  loadTGInit:
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@u(2)
-; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        add [[TGInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; LARGE:        lwz [[TGInitValR:[0-9]+]], 0([[TGInitAddrR]])
 entry:
@@ -42,22 +42,22 @@ entry:
 
 define void @storeTGInit(i32 noundef signext %i) {
 ; SMALL-LABEL:  storeTGInit:
-; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
-; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
 ; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL:        bla .__tls_get_mod[PR]
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
 ; SMALL:        add [[TGInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; SMALL:        stw [[TGInitValR:[0-9]+]], 0([[TGInitAddrR]])
 ;
 ; LARGE-LABEL:  storeTGInit:
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@u(2)
-; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        add [[TGInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; LARGE:        stw [[TGInitValR:[0-9]+]], 0([[TGInitAddrR]])
 entry:
@@ -68,22 +68,22 @@ entry:
 
 define i32 @loadTGUninit() {
 ; SMALL-LABEL:  loadTGUninit:
-; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
-; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
 ; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL:        bla .__tls_get_mod[PR]
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
 ; SMALL:        add [[TGUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; SMALL:        lwz [[TGUninitValR:[0-9]+]], 0([[TGUninitAddrR]])
 ;
 ; LARGE-LABEL:  loadTGUninit:
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@u(2)
-; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        add [[TGUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; LARGE:        lwz [[TGUninitValR:[0-9]+]], 0([[TGUninitAddrR]])
 entry:
@@ -94,22 +94,22 @@ entry:
 
 define void @storeTGUninit(i32 noundef signext %i) {
 ; SMALL-LABEL:  storeTGUninit:
-; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
-; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
 ; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL:        bla .__tls_get_mod[PR]
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
 ; SMALL:        add [[TGUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; SMALL:        stw [[TGUninitValR:[0-9]+]], 0([[TGUninitAddrR]])
 ;
 ; LARGE-LABEL:  storeTGUninit:
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@u(2)
-; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        add [[TGUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; LARGE:        stw [[TGUninitValR:[0-9]+]], 0([[TGUninitAddrR]])
 entry:
@@ -120,22 +120,22 @@ entry:
 
 define i32 @loadTIInit() {
 ; SMALL-LABEL:  loadTIInit:
-; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
-; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
 ; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL:        bla .__tls_get_mod[PR]
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
 ; SMALL:        add [[TIInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; SMALL:        lwz [[TIInitValR:[0-9]+]], 0([[TIInitAddrR]])
 ;
 ; LARGE-LABEL:  loadTIInit:
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@u(2)
-; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        add [[TIInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; LARGE:        lwz [[TIInitValR:[0-9]+]], 0([[TIInitAddrR]])
 entry:
@@ -146,22 +146,22 @@ entry:
 
 define void @storeTIInit(i32 noundef signext %i) {
 ; SMALL-LABEL:  storeTIInit:
-; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
-; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
 ; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL:        bla .__tls_get_mod[PR]
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
 ; SMALL:        add [[TIInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; SMALL:        stw [[TIInitValR:[0-9]+]], 0([[TIInitAddrR]])
 ;
 ; LARGE-LABEL:  storeTIInit:
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@u(2)
-; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        add [[TIInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; LARGE:        stw [[TIInitValR:[0-9]+]], 0([[TIInitAddrR]])
 entry:
@@ -172,22 +172,22 @@ entry:
 
 define i32 @loadTIUninit() {
 ; SMALL-LABEL:  loadTIUninit:
-; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
-; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
 ; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL:        bla .__tls_get_mod[PR]
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
 ; SMALL:        add [[TIUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; SMALL:        lwz [[TIUninitValR:[0-9]+]], 0([[TIUninitAddrR]])
 ;
 ; LARGE-LABEL:  loadTIUninit:
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@u(2)
-; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        add [[TIUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; LARGE:        lwz [[TIUninitValR:[0-9]+]], 0([[TIUninitAddrR]])
 entry:
@@ -198,22 +198,22 @@ entry:
 
 define void @storeTIUninit(i32 noundef signext %i) {
 ; SMALL-LABEL:  storeTIUninit:
-; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
-; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
 ; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL:        bla .__tls_get_mod[PR]
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
 ; SMALL:        add [[TIUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; SMALL:        stw [[TIUninitValR:[0-9]+]], 0([[TIUninitAddrR]])
 ;
 ; LARGE-LABEL:  storeTIUninit:
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@u(2)
-; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        add [[TIUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; LARGE:        stw [[TIUninitValR:[0-9]+]], 0([[TIUninitAddrR]])
 entry:
@@ -224,22 +224,22 @@ entry:
 
 define i32 @loadTWInit() {
 ; SMALL-LABEL:  loadTWInit:
-; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
-; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
 ; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL:        bla .__tls_get_mod[PR]
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
 ; SMALL:        add [[TWInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; SMALL:        lwz [[TWInitValR:[0-9]+]], 0([[TWInitAddrR]])
 ;
 ; LARGE-LABEL:  loadTWInit:
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@u(2)
-; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        add [[TWInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; LARGE:        lwz [[TWInitValR:[0-9]+]], 0([[TWInitAddrR]])
 entry:
@@ -250,22 +250,22 @@ entry:
 
 define void @storeTWInit(i32 noundef signext %i) {
 ; SMALL-LABEL:  storeTWInit:
-; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
-; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
 ; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL:        bla .__tls_get_mod[PR]
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
 ; SMALL:        add [[TWInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; SMALL:        stw [[TWInitValR:[0-9]+]], 0([[TWInitAddrR]])
 ;
 ; LARGE-LABEL:  storeTWInit:
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@u(2)
-; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        add [[TWInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; LARGE:        stw [[TWInitValR:[0-9]+]], 0([[TWInitAddrR]])
 entry:
@@ -276,22 +276,22 @@ entry:
 
 define i32 @loadTWUninit() {
 ; SMALL-LABEL:  loadTWUninit:
-; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
-; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
 ; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL:        bla .__tls_get_mod[PR]
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
 ; SMALL:        add [[TWUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; SMALL:        lwz [[TWUninitValR:[0-9]+]], 0([[TWUninitAddrR]])
 ;
 ; LARGE-LABEL:  loadTWUninit:
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@u(2)
-; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        add [[TWUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; LARGE:        lwz [[TWUninitValR:[0-9]+]], 0([[TWUninitAddrR]])
 entry:
@@ -302,22 +302,22 @@ entry:
 
 define void @storeTWUninit(i32 noundef signext %i) {
 ; SMALL-LABEL:  storeTWUninit:
-; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
-; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
 ; SMALL64:      ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; SMALL:        bla .__tls_get_mod[PR]
+; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
+; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
 ; SMALL:        add [[TWUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; SMALL:        stw [[TWUninitValR:[0-9]+]], 0([[TWUninitAddrR]])
 ;
 ; LARGE-LABEL:  storeTWUninit:
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@u(2)
-; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@u(2)
+; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
+; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE:        add [[TWUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ; LARGE:        stw [[TWUninitValR:[0-9]+]], 0([[TWUninitAddrR]])
 entry:
@@ -329,11 +329,11 @@ entry:
 ; SMALL:          .extern .__tls_get_mod[PR]
 ; LARGE:          .extern .__tls_get_mod[PR]
 
-; SMALL:        [[TGInitL]]:
-; SMALL-NEXT:   .tc TGInit[TC],TGInit[TL]@ld
 ; SMALL:        [[ModuleHandleL]]:
 ; SMALL-NEXT:   .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; SMALL-NEXT:   .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; SMALL:        [[TGInitL]]:
+; SMALL-NEXT:   .tc TGInit[TC],TGInit[TL]@ld
 ; SMALL:        [[TGUninitL]]:
 ; SMALL-NEXT:   .tc TGUninit[TC],TGUninit[TL]@ld
 ; SMALL:        [[TIInitL]]:
@@ -345,11 +345,11 @@ entry:
 ; SMALL:        [[TWUninitL]]:
 ; SMALL-NEXT:   .tc TWUninit[TC],TWUninit[TL]@ld
 
-; LARGE:        [[TGInitL]]:
-; LARGE-NEXT:   .tc TGInit[TE],TGInit[TL]@ld
 ; LARGE:        [[ModuleHandleL]]:
 ; LARGE-NEXT:   .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; LARGE-NEXT:   .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; LARGE:        [[TGInitL]]:
+; LARGE-NEXT:   .tc TGInit[TE],TGInit[TL]@ld
 ; LARGE:        [[TGUninitL]]:
 ; LARGE-NEXT:   .tc TGUninit[TE],TGUninit[TL]@ld
 ; LARGE:        [[TIInitL]]:
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll
index b1749511b910cf..fe843e5dcb0ea7 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll
@@ -33,7 +33,7 @@ entry:
 ; RELOC-NEXT:   Section (index: 1) .text {
 ; RELOC-NEXT:   Relocation {
 ; RELOC-NEXT:     Virtual Address: 0x16
-; RELOC-NEXT:     Symbol: TIInit ([[#NFA+19]])
+; RELOC-NEXT:     Symbol: _$TLSML ([[#NFA+19]])
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
@@ -41,7 +41,7 @@ entry:
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
 ; RELOC-NEXT:     Virtual Address: 0x1A
-; RELOC-NEXT:     Symbol: TIInit ([[#NFA+19]])
+; RELOC-NEXT:     Symbol: _$TLSML ([[#NFA+19]])
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
@@ -49,7 +49,7 @@ entry:
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
 ; RELOC-NEXT:     Virtual Address: 0x1E
-; RELOC-NEXT:     Symbol: _$TLSML ([[#NFA+21]])
+; RELOC-NEXT:     Symbol: TIInit ([[#NFA+21]])
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
@@ -57,7 +57,7 @@ entry:
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
 ; RELOC-NEXT:     Virtual Address: 0x22
-; RELOC-NEXT:     Symbol: _$TLSML ([[#NFA+21]])
+; RELOC-NEXT:     Symbol: TIInit ([[#NFA+21]])
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
@@ -163,19 +163,19 @@ entry:
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
 ; RELOC-NEXT:   Virtual Address: 0xB8
-; RELOC-NEXT:   Symbol: TIInit ([[#NFA+29]])
+; RELOC-NEXT:   Symbol: _$TLSML ([[#NFA+19]])
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
-; RELOC-NEXT:   Type: R_TLS_LD (0x22)
+; RELOC-NEXT:   Type: R_TLSML (0x25)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
 ; RELOC-NEXT:   Virtual Address: 0xBC
-; RELOC-NEXT:   Symbol: _$TLSML ([[#NFA+21]])
+; RELOC-NEXT:   Symbol: TIInit ([[#NFA+29]])
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
-; RELOC-NEXT:   Type: R_TLSML (0x25)
+; RELOC-NEXT:   Type: R_TLS_LD (0x22)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
 ; RELOC-NEXT:   Virtual Address: 0xC0
@@ -411,7 +411,7 @@ entry:
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: [[#NFA+19]]
-; SYM-NEXT:     Name: TIInit
+; SYM-NEXT:     Name: _$TLSML
 ; SYM-NEXT:     Value (RelocatableAddress): 0xB8
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
@@ -424,14 +424,14 @@ entry:
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 2
 ; SYM-NEXT:       SymbolType: XTY_SD (0x1)
-; SYM-NEXT:       StorageMappingClass: XMC_TE (0x16)
+; SYM-NEXT:       StorageMappingClass: XMC_TC (0x3)
 ; SYM-NEXT:       StabInfoIndex: 0x0
 ; SYM-NEXT:       StabSectNum: 0x0
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: [[#NFA+21]]
-; SYM-NEXT:     Name: _$TLSML
+; SYM-NEXT:     Name: TIInit
 ; SYM-NEXT:     Value (RelocatableAddress): 0xBC
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
@@ -444,7 +444,7 @@ entry:
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 2
 ; SYM-NEXT:       SymbolType: XTY_SD (0x1)
-; SYM-NEXT:       StorageMappingClass: XMC_TC (0x3)
+; SYM-NEXT:       StorageMappingClass: XMC_TE (0x16)
 ; SYM-NEXT:       StabInfoIndex: 0x0
 ; SYM-NEXT:       StabSectNum: 0x0
 ; SYM-NEXT:     }
@@ -560,16 +560,17 @@ entry:
 ; DIS-NEXT:                                       mr 7, 3
 ; DIS-NEXT:                                       mr 6, 4
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: [[#NFA+19]]) TIInit[TE]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 8, 0(3)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: [[#NFA+19]]) TIInit[TE]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: [[#NFA+21]]) _$TLSML[TC]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 3, 4(3)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: [[#NFA+21]]) _$TLSML[TC]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: [[#NFA+19]]) _$TLSML[TC]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 3, 0(3)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: [[#NFA+19]]) _$TLSML[TC]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 4, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: [[#NFA+21]]) TIInit[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 8, 4(4)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: [[#NFA+21]]) TIInit[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                bla 0x0
 ; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA (idx: [[#NFA+1]]) .__tls_get_mod[PR]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                add 3, 3, 8
+; DIS-NEXT:                                       mr      4, 8
+; DIS-NEXT:                                       add 3, 3, 4
 ; DIS-NEXT:                                       stw 6, 4(3)
 ; DIS-NEXT:                                       stw 7, 0(3)
 ; DIS-NEXT:                                       addi 1, 1, 32
@@ -618,12 +619,12 @@ entry:
 ; DIS-NEXT:       b0: 00 00 00 b8
 ; DIS-NEXT: 000000b0: R_POS (idx: [[#NFA+17]]) TOC[TC0]
 ; DIS-NEXT:       b4: 00 00 00 00
-; DIS:      000000b8  (idx: [[#NFA+19]]) TIInit[TE]:
+; DIS:      000000b8 (idx: [[#NFA+19]]) _$TLSML[TC]:
 ; DIS-NEXT:       b8: 00 00 00 00
-; DIS-NEXT: 000000b8: R_TLS_LD (idx: [[#NFA+29]]) TIInit[TL]
-; DIS:      000000bc (idx: [[#NFA+21]]) _$TLSML[TC]:
+; DIS-NEXT: 000000b8:  R_TLSML (idx: [[#NFA+19]]) _$TLSML[TC]
+; DIS:      000000bc  (idx: [[#NFA+21]]) TIInit[TE]:
 ; DIS-NEXT:       bc: 00 00 00 00
-; DIS-NEXT: 000000bc:  R_TLSML (idx: [[#NFA+21]]) _$TLSML[TC]
+; DIS-NEXT: 000000bc: R_TLS_LD (idx: [[#NFA+29]]) TIInit[TL]
 ; DIS:      000000c0  (idx: [[#NFA+23]]) .TWInit[TE]:
 ; DIS-NEXT:       c0: 00 00 00 00
 ; DIS-NEXT: 000000c0: R_TLSM (idx: [[#NFA+31]]) TWInit[TL]
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
index 10ccb520a94aba..feeb1da35ecbad 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
@@ -34,27 +34,27 @@ entry:
 ; RELOC-NEXT:   Section (index: 1) .text {
 ; RELOC-NEXT:   Relocation {
 ; RELOC-NEXT:     Virtual Address: 0x12
-; RELOC-NEXT:     Symbol: TIUninit ([[#NFA+25]])
+; RELOC-NEXT:     Symbol: _$TLSML ([[#NFA+25]])
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
 ; RELOC-NEXT:     Type: R_TOC (0x3)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x16
-; RELOC-NEXT:     Symbol: _$TLSML ([[#NFA+27]])
+; RELOC-NEXT:     Virtual Address: 0x14
+; RELOC-NEXT:     Symbol: .__tls_get_mod ([[#NFA+1]])
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
-; RELOC-NEXT:     Length: 16
-; RELOC-NEXT:     Type: R_TOC (0x3)
+; RELOC-NEXT:     Length: 26
+; RELOC-NEXT:     Type: R_RBA (0x18)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x18
-; RELOC-NEXT:     Symbol: .__tls_get_mod ([[#NFA+1]])
+; RELOC-NEXT:     Virtual Address: 0x1A
+; RELOC-NEXT:     Symbol: TIUninit ([[#NFA+27]])
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
-; RELOC-NEXT:     Length: 26
-; RELOC-NEXT:     Type: R_RBA (0x18)
+; RELOC-NEXT:     Length: 16
+; RELOC-NEXT:     Type: R_TOC (0x3)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
 ; RELOC-NEXT:     Virtual Address: 0x4A
@@ -124,19 +124,19 @@ entry:
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
 ; RELOC-NEXT:   Virtual Address: 0x98
-; RELOC-NEXT:   Symbol: TIUninit ([[#NFA+39]])
+; RELOC-NEXT:   Symbol: _$TLSML ([[#NFA+25]])
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
-; RELOC-NEXT:   Type: R_TLS_LD (0x22)
+; RELOC-NEXT:   Type: R_TLSML (0x25)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
 ; RELOC-NEXT:   Virtual Address: 0x9C
-; RELOC-NEXT:   Symbol: _$TLSML ([[#NFA+27]])
+; RELOC-NEXT:   Symbol: TIUninit ([[#NFA+39]])
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
-; RELOC-NEXT:   Type: R_TLSML (0x25)
+; RELOC-NEXT:   Type: R_TLS_LD (0x22)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
 ; RELOC-NEXT:   Virtual Address: 0xA0
@@ -421,7 +421,7 @@ entry:
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: [[#NFA+25]]
-; SYM-NEXT:     Name: TIUninit
+; SYM-NEXT:     Name: _$TLSML
 ; SYM-NEXT:     Value (RelocatableAddress): 0x98
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
@@ -441,7 +441,7 @@ entry:
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: [[#NFA+27]]
-; SYM-NEXT:     Name: _$TLSML
+; SYM-NEXT:     Name: TIUninit
 ; SYM-NEXT:     Value (RelocatableAddress): 0x9C
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
@@ -588,13 +588,13 @@ entry:
 ; DIS-NEXT:                                      stwu 1, -32(1)
 ; DIS-NEXT:                                      stw 0, 40(1)
 ; DIS-NEXT:                                      mr 6, 3
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 7, 0(2)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: [[#NFA+25]]) TIUninit[TC]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 3, 4(2)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: [[#NFA+27]]) _$TLSML[TC]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 3, 0(2)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: [[#NFA+25]]) _$TLSML[TC]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               bla 0x0
 ; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA (idx: [[#NFA+1]]) .__tls_get_mod[PR]
-; DIS-NEXT:                                      add 3, 3, 7
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 4, 4(2)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: [[#NFA+27]]) TIUninit[TC]
+; DIS-NEXT:                                      add 3, 3, 4
 ; DIS-NEXT:                                      stw 6, 0(3)
 ; DIS-NEXT:                                      addi 1, 1, 32
 ; DIS-NEXT:                                      lwz 0, 8(1)
@@ -637,12 +637,12 @@ entry:
 ; DIS-NEXT:       90: 00 00 00 98
 ; DIS-NEXT: 00000090: R_POS (idx: [[#NFA+23]]) TOC[TC0]
 ; DIS-NEXT:       94: 00 00 00 00
-; DIS:      00000098  (idx: [[#NFA+25]]) TIUninit[TC]:
+; DIS:      00000098 (idx: [[#NFA+25]]) _$TLSML[TC]:
 ; DIS-NEXT:       98: 00 00 00 00
-; DIS-NEXT: 00000098: R_TLS_LD (idx: [[#NFA+39]]) TIUninit[UL]
-; DIS:      0000009c (idx: [[#NFA+27]]) _$TLSML[TC]:
+; DIS-NEXT: 00000098: R_TLSML (idx: [[#NFA+25]]) _$TLSML[TC]
+; DIS:      0000009c  (idx: [[#NFA+27]]) TIUninit[TC]:
 ; DIS-NEXT:       9c: 00 00 00 00
-; DIS-NEXT: 0000009c: R_TLSML (idx: [[#NFA+27]]) _$TLSML[TC]
+; DIS-NEXT: 0000009c: R_TLS_LD (idx: [[#NFA+39]]) TIUninit[UL]
 ; DIS:      000000a0  (idx: [[#NFA+29]]) .TGInit[TC]:
 ; DIS-NEXT:       a0: 00 00 00 00
 ; DIS-NEXT: 000000a0: R_TLSM (idx: [[#NFA+37]]) TGInit

>From 3a5113da98a5cf62509078fac803f27ddab09f5e Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Thu, 21 Sep 2023 23:40:30 -0400
Subject: [PATCH 07/31] Remove TLS local-dynamic mode guards for AIX.

---
 clang/include/clang/Basic/DiagnosticDriverKinds.td | 1 -
 clang/lib/Frontend/CompilerInvocation.cpp          | 8 --------
 clang/lib/Sema/SemaDeclAttr.cpp                    | 6 ------
 clang/test/CodeGen/PowerPC/aix-tls-model.cpp       | 9 ++++++---
 clang/test/Sema/aix-attr-tls_model.c               | 2 +-
 5 files changed, 7 insertions(+), 19 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td
index 0807d8877591a6..b8d583f1970f33 100644
--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -693,7 +693,6 @@ def err_drv_cannot_mix_options : Error<"cannot specify '%1' along with '%0'">;
 def err_drv_invalid_object_mode : Error<
   "OBJECT_MODE setting %0 is not recognized and is not a valid setting">;
 
-def err_aix_unsupported_tls_model : Error<"TLS model '%0' is not yet supported on AIX">;
 def err_roptr_requires_data_sections: Error<"-mxcoff-roptr is supported only with -fdata-sections">;
 def err_roptr_cannot_build_shared: Error<"-mxcoff-roptr is not supported with -shared">;
 
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index bcb31243056b7e..ad70b44d7793fa 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -1978,14 +1978,6 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args,
     Opts.LinkBitcodeFiles.push_back(F);
   }
 
-  if (Arg *A = Args.getLastArg(OPT_ftlsmodel_EQ)) {
-    if (T.isOSAIX()) {
-      StringRef Name = A->getValue();
-      if (Name == "local-dynamic")
-        Diags.Report(diag::err_aix_unsupported_tls_model) << Name;
-    }
-  }
-
   if (Arg *A = Args.getLastArg(OPT_fdenormal_fp_math_EQ)) {
     StringRef Val = A->getValue();
     Opts.FPDenormalMode = llvm::parseDenormalFPAttribute(Val);
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 8a204b1d3b88d9..4bd30b1c0989e7 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -2053,12 +2053,6 @@ static void handleTLSModelAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
     return;
   }
 
-  if (S.Context.getTargetInfo().getTriple().isOSAIX() &&
-      Model == "local-dynamic") {
-    S.Diag(LiteralLoc, diag::err_aix_attr_unsupported_tls_model) << Model;
-    return;
-  }
-
   D->addAttr(::new (S.Context) TLSModelAttr(S.Context, AL, Model));
 }
 
diff --git a/clang/test/CodeGen/PowerPC/aix-tls-model.cpp b/clang/test/CodeGen/PowerPC/aix-tls-model.cpp
index 9fdd6855a89ee9..cd0a08aa9a3b77 100644
--- a/clang/test/CodeGen/PowerPC/aix-tls-model.cpp
+++ b/clang/test/CodeGen/PowerPC/aix-tls-model.cpp
@@ -1,11 +1,11 @@
 // RUN: %clang_cc1 %s -triple powerpc-unknown-aix -target-cpu pwr8 -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-GD
 // RUN: %clang_cc1 %s -triple powerpc-unknown-aix -target-cpu pwr8 -ftls-model=global-dynamic -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-GD
-// RUN: not %clang_cc1 %s -triple powerpc-unknown-aix -target-cpu pwr8 -ftls-model=local-dynamic -emit-llvm 2>&1 | FileCheck %s -check-prefix=CHECK-LD-ERROR
+// RUN: %clang_cc1 %s -triple powerpc-unknown-aix -target-cpu pwr8 -ftls-model=local-dynamic -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-LD
 // RUN: %clang_cc1 %s -triple powerpc-unknown-aix -target-cpu pwr8 -ftls-model=initial-exec -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-IE
 // RUN: %clang_cc1 %s -triple powerpc-unknown-aix -target-cpu pwr8 -ftls-model=local-exec -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-LE
 // RUN: %clang_cc1 %s -triple powerpc64-unknown-aix -target-cpu pwr8 -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-GD
 // RUN: %clang_cc1 %s -triple powerpc64-unknown-aix -target-cpu pwr8 -ftls-model=global-dynamic -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-GD
-// RUN: not %clang_cc1 %s -triple powerpc64-unknown-aix -target-cpu pwr8 -ftls-model=local-dynamic -emit-llvm 2>&1 | FileCheck %s -check-prefix=CHECK-LD-ERROR
+// RUN: %clang_cc1 %s -triple powerpc64-unknown-aix -target-cpu pwr8 -ftls-model=local-dynamic -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-LD
 // RUN: %clang_cc1 %s -triple powerpc64-unknown-aix -target-cpu pwr8 -ftls-model=initial-exec -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-IE
 // RUN: %clang_cc1 %s -triple powerpc64-unknown-aix -target-cpu pwr8 -ftls-model=local-exec -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-LE
 
@@ -21,7 +21,10 @@ int f() {
 // CHECK-GD: @z2 ={{.*}} global i32 0
 // CHECK-GD: @x ={{.*}} thread_local global i32 0
 // CHECK-GD: @_ZZ1fvE1y = internal thread_local global i32 0
-// CHECK-LD-ERROR:  error: TLS model 'local-dynamic' is not yet supported on AIX
+// CHECK-LD: @z1 ={{.*}} global i32 0
+// CHECK-LD: @z2 ={{.*}} global i32 0
+// CHECK-LD: @x ={{.*}} thread_local(localdynamic) global i32 0
+// CHECK-LD: @_ZZ1fvE1y = internal thread_local(localdynamic) global i32 0
 // CHECK-IE: @z1 ={{.*}} global i32 0
 // CHECK-IE: @z2 ={{.*}} global i32 0
 // CHECK-IE: @x ={{.*}} thread_local(initialexec) global i32 0
diff --git a/clang/test/Sema/aix-attr-tls_model.c b/clang/test/Sema/aix-attr-tls_model.c
index 9c22d6cceed81c..7c2047bced9391 100644
--- a/clang/test/Sema/aix-attr-tls_model.c
+++ b/clang/test/Sema/aix-attr-tls_model.c
@@ -6,6 +6,6 @@
 #endif
 
 static __thread int y __attribute((tls_model("global-dynamic"))); // no-warning
-static __thread int y __attribute((tls_model("local-dynamic"))); // expected-error {{TLS model 'local-dynamic' is not yet supported on AIX}}
+static __thread int y __attribute((tls_model("local-dynamic"))); // expected-no-diagnostics
 static __thread int y __attribute((tls_model("initial-exec"))); // no-warning
 static __thread int y __attribute((tls_model("local-exec"))); // no-warning

>From 7ef99338a05342091ccb3892154982213e6e8e6c Mon Sep 17 00:00:00 2001
From: tingwang <tingwang at tingwangs-MBP.lan>
Date: Sun, 24 Sep 2023 13:11:06 +0800
Subject: [PATCH 08/31] Fixed issues raised by comments and incorporated
 suggested changes. (1) Removed TLSLDAIX argument so that duplicated nodes can
 be eliminated. (2) Try to reuse clobber r4 by moving TLSLDAIX ahead of
 LoadOffsetToc node. (3) Add FIXME comments. (4) Add test case to show
 duplicated .__tls_get_mod can be eliminated.

Below two cases are not updated yet due to environment issue. I will fix
those in next update.
Failed Tests:
  LLVM :: CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll
  LLVM :: CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
---
 .../PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp  |   9 +-
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp     |  28 ++-
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   |  21 +-
 llvm/lib/Target/PowerPC/PPCISelLowering.h     |   4 +-
 llvm/lib/Target/PowerPC/PPCInstr64Bit.td      |  10 +-
 llvm/lib/Target/PowerPC/PPCInstrInfo.td       |   8 +-
 llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp | 123 +++++++-----
 .../test/CodeGen/PowerPC/aix-tls-gd-double.ll |  58 +++---
 llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll   |  58 +++---
 .../CodeGen/PowerPC/aix-tls-gd-longlong.ll    | 164 +++++++---------
 .../CodeGen/PowerPC/aix-tls-local-dynamic.ll  | 184 ++++++++++--------
 11 files changed, 348 insertions(+), 319 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 22cd2fc03ef7c3..00dda306a584c7 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -231,9 +231,12 @@ class PPCTargetAsmStreamer : public PPCTargetStreamer {
       MCSymbolXCOFF *TCSym =
           cast<MCSectionXCOFF>(Streamer.getCurrentSectionOnly())
               ->getQualNameSymbol();
-      // On AIX, we have a region handle (symbol at m), module handle
-      // (__TLSML[TC]@ml) and the variable offset (symbol@{gd|ie|le|ld}) for TLS
-      // variables, depending on the TLS model.
+      // On AIX, we have TLS variable offsets (symbol@({gd|ie|le|ld}) depending
+      // on the TLS access method (or model). For the general-dynamic access
+      // method, we also have region handle (symbol at m) for each variable. For
+      // local-dynamic, a module handle (__TLSML[TC]@ml) for all variables. For
+      // local-exec and initial-exec, we have a thread pointer, in r13 for
+      // 64-bit mode and returned by .__get_tpointer for 32-bit mode.
       if (Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGD ||
           Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGDM ||
           Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSIE ||
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index ff1330e14a1829..769656d9d0f10c 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -680,9 +680,9 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
 
   if (Subtarget->isAIXABI()) {
     // For TLSGD, the variable offset should already be in R4 and the region
-    // handle should already be in R3, generate absolute branch to
-    // .__tls_get_addr. For TLSLD, the module handle should already be in R3,
-    // generate branch to .__tls_get_mod.
+    // handle should already be in R3. We generate an absolute branch to
+    // .__tls_get_addr. For TLSLD, the module handle should already be in R3.
+    // We generate an absolute branch to .__tls_get_mod.
     Register VarOffsetReg = Subtarget->isPPC64() ? PPC::X4 : PPC::R4;
     (void)VarOffsetReg;
     assert((MI->getOpcode() == PPC::GETtlsMOD32AIX ||
@@ -767,9 +767,10 @@ getTOCEntryTypeForMO(const MachineOperand &MO) {
   }
 }
 
-// On AIX, TLS-local-dynamic requires that symbol for the module handle must
+// FIXME: find alternative approach to get rid of this hack.
+// On AIX, TLS-local-dynamic requires that the symbol for the module handle must
 // have the name "_$TLSML". This symbol is used as one TOC symbol reference
-// itself with ML relocation type, thus it has "[TC]" attached to its name.
+// itself with an ML relocation type, thus it has "[TC]" attached to its name.
 static inline bool isSpecialAIXSymbolTLSML(const MachineOperand &MO,
                                            const bool IsAIX) {
   return IsAIX && MO.isSymbol() &&
@@ -871,9 +872,11 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
       return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGD;
     if (MO.getTargetFlags() & PPCII::MO_TLSLD_FLAG) {
       if (isSpecialAIXSymbolTLSML(MO, IsAIX))
-        // FIXME: On AIX the ML relocation type is only valid for a reference to
-        // a TOC symbol from the symbol itself, and right now its only user is
-        // symbol "_$TLSML". Use symbol name to decide that R_TLSML is expected.
+        // FIXME: Due to the size limit of MachineOperand::SubReg_TargetFlags,
+        // hacked this flag which should have been named MO_TLSLDM_FLAG: on AIX
+        // the ML relocation type is only valid for a reference to a TOC symbol
+        // from the symbol itself, and right now its only user is the symbol
+        // "_$TLSML". Use symbol name to decide that R_TLSML is expected.
         return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSML;
       if (IsAIX)
         return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLD;
@@ -1394,14 +1397,17 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
                    .addExpr(SymGotTlsGD));
     return;
   }
+  case PPC::GETtlsMOD32AIX:
+  case PPC::GETtlsMOD64AIX:
+    // Transform: %r3 = GETtlsMODNNAIX %r3 (for NN == 32/64).
+    // Into: BLA .__tls_get_mod()
+    // Input parameter is a module handle (__TLSML[TC]@ml) for all variables.
   case PPC::GETtlsADDR:
     // Transform: %x3 = GETtlsADDR %x3, @sym
     // Into: BL8_NOP_TLS __tls_get_addr(sym at tlsgd)
   case PPC::GETtlsADDRPCREL:
   case PPC::GETtlsADDR32AIX:
   case PPC::GETtlsADDR64AIX:
-  case PPC::GETtlsMOD32AIX:
-  case PPC::GETtlsMOD64AIX:
     // Transform: %r3 = GETtlsADDRNNAIX %r3, %r4 (for NN == 32/64).
     // Into: BLA .__tls_get_addr()
     // Unlike on Linux, there is no symbol or relocation needed for this call.
@@ -3034,6 +3040,8 @@ void PPCAIXAsmPrinter::emitInstruction(const MachineInstr *MI) {
   }
   case PPC::GETtlsMOD32AIX:
   case PPC::GETtlsMOD64AIX:
+    // A reference to .__tls_get_mod is unknown to the assembler so we need to
+    // emit an external symbol reference.
   case PPC::GETtlsTpointer32AIX:
   case PPC::GETtlsADDR64AIX:
   case PPC::GETtlsADDR32AIX: {
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 36bb939c41d25c..3ccc0754fcd099 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1774,13 +1774,11 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
   case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
   case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
-  case PPCISD::GET_TLS_MOD_AIX:
-    return "PPCISD::GET_TLS_MOD_AIX";
+  case PPCISD::GET_TLS_MOD_AIX: return "PPCISD::GET_TLS_MOD_AIX";
   case PPCISD::GET_TPOINTER:    return "PPCISD::GET_TPOINTER";
   case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
   case PPCISD::TLSGD_AIX:       return "PPCISD::TLSGD_AIX";
-  case PPCISD::TLSLD_AIX:
-    return "PPCISD::TLSLD_AIX";
+  case PPCISD::TLSLD_AIX:       return "PPCISD::TLSLD_AIX";
   case PPCISD::ADDIS_TLSLD_HA:  return "PPCISD::ADDIS_TLSLD_HA";
   case PPCISD::ADDI_TLSLD_L:    return "PPCISD::ADDI_TLSLD_L";
   case PPCISD::GET_TLSLD_ADDR:  return "PPCISD::GET_TLSLD_ADDR";
@@ -3420,14 +3418,19 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
   }
 
   if (Model == TLSModel::LocalDynamic) {
-    // For local-dynamic on AIX, we need to generate two TOC entries, one for
-    // the variable offset, the other for the module handle. The module handle
-    // is encapsulated inside the TLSLD_AIX pseudo node, and will be expanded by
-    // PPCTLSDynamicCall.
+    // For local-dynamic on AIX, we need to generate one TOC entry for each
+    // variable offset, and single module-handle TOC entry for the entire file.
+
+    // We are not (1) create GV node (2) call getTOCEntry for the module-handle
+    // due to the reason that the module-handle should not be materialized (i.e.
+    // there should be no symbol-table entry referring to the module-handle).
+    // Instead we will create reference to __TLSML[TC]@ml in PPCTLSDynamicCall
+    // when processing the TLSLD_AIX pseudo node.
+    SDValue ModuleHandle = DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT);
     SDValue VariableOffsetTGA =
         DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
     SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
-    return DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, VariableOffset);
+    return DAG.getNode(ISD::ADD, dl, PtrVT, ModuleHandle, VariableOffset);
   }
 
   // If Local- or Initial-exec or Local-dynamic is not possible or specified,
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index d2e207a04908b1..e9a38f3601e91f 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -381,8 +381,8 @@ namespace llvm {
 
     /// [GP|G8]RC = TLSLD_AIX, TOC_ENTRY(variable offset)
     /// Op that internally creates TOC entry for the "_$TLSML" symbol, generates
-    /// GET_TLS_MOD_AIX node which will be expanded into a call to
-    /// .__tls_get_mod, and then add the variable offset with the result from
+    /// a GET_TLS_MOD_AIX node which will be expanded into a call to
+    /// .__tls_get_mod, and then adds the variable offset with the result from
     /// the call. This node is used in both 32-bit and 64-bit modes. The only
     /// difference is register class.
     TLSLD_AIX,
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index a3590580777f3a..e4a6e19dabd348 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -1563,7 +1563,7 @@ def GETtlsADDR64AIX :
                     "GETtlsADDR64AIX",
                     [(set i64:$rD,
                       (PPCgetTlsAddr i64:$offset, i64:$handle))]>, isPPC64;
-// On AIX, the call to .__tls_get_mod need one input in X3 for the module handle.
+// On AIX, the call to .__tls_get_mod needs one input in X3 for the module handle.
 def GETtlsMOD64AIX :
   PPCEmitTimePseudo<(outs g8rc:$rD),(ins g8rc:$handle),
                     "GETtlsMOD64AIX",
@@ -1602,10 +1602,10 @@ def TLSGDAIX8 :
                      "#TLSGDAIX8",
                      [(set i64:$rD,
                        (PPCTlsgdAIX i64:$offset, i64:$handle))]>;
-// This pseudo is expanded to one copy to put the module handle in R3, then call
-// GETtlsMOD64AIX, and then add variable offset to the output from the call.
-def TLSLDAIX8 : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc:$handle),
-                     "#TLSLDAIX8", [(set i64:$rD, (PPCTlsldAIX i64:$handle))]>;
+// This pseudo is expanded to one copy to put the module handle in R3, the call
+// to GETtlsMOD64AIX, and then adds the variable offset to the output from the call.
+def TLSLDAIX8 : PPCEmitTimePseudo<(outs g8rc:$rD), (ins),
+                     "#TLSLDAIX8", [(set i64:$rD, (PPCTlsldAIX))]>;
 // Combined op for ADDItlsldL and GETtlsADDR, late expanded.  X3 and LR8
 // are true defines, while the rest of the Defs are clobbers.
 let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 416cae1754a328..9fbdd5728d12ff 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -220,7 +220,7 @@ def PPCaddiTlsgdLAddr : SDNode<"PPCISD::ADDI_TLSGD_L_ADDR",
                                  SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
                                  SDTCisSameAs<0, 3>, SDTCisInt<0> ]>>;
 def PPCTlsgdAIX     : SDNode<"PPCISD::TLSGD_AIX", SDTIntBinOp>;
-def PPCTlsldAIX     : SDNode<"PPCISD::TLSLD_AIX", SDTIntUnaryOp>;
+def PPCTlsldAIX     : SDNode<"PPCISD::TLSLD_AIX", SDTIntLeaf>;
 def PPCaddisTlsldHA : SDNode<"PPCISD::ADDIS_TLSLD_HA", SDTIntBinOp>;
 def PPCaddiTlsldL   : SDNode<"PPCISD::ADDI_TLSLD_L", SDTIntBinOp>;
 def PPCgetTlsldAddr : SDNode<"PPCISD::GET_TLSLD_ADDR", SDTIntBinOp>;
@@ -3302,10 +3302,8 @@ def TLSGDAIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$offset, gprc:$handl
                             (PPCTlsgdAIX i32:$offset, i32:$handle))]>;
 // This pseudo is expanded to one copy to put the module handle in R3, then call
 // GETtlsMOD32AIX, and then add variable offset to the output from the call.
-def TLSLDAIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$handle),
-                          "#TLSLDAIX",
-                          [(set i32:$rD,
-                            (PPCTlsldAIX i32:$handle))]>;
+def TLSLDAIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins),
+                          "#TLSLDAIX", [(set i32:$rD, (PPCTlsldAIX))]>;
 // LR is a true define, while the rest of the Defs are clobbers.  R3 is
 // explicitly defined when this op is created, so not mentioned here.
 let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index bb03f8f911c491..ba3f9f04697d79 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -92,7 +92,9 @@ namespace {
         Register InReg = PPC::NoRegister;
         Register GPR3 = Is64Bit ? PPC::X3 : PPC::R3;
         Register GPR4 = Is64Bit ? PPC::X4 : PPC::R4;
-        if (!IsPCREL && !IsTLSTPRelMI)
+        if (!IsPCREL && !IsTLSTPRelMI &&
+            !(MI.getOpcode() == PPC::TLSLDAIX8 ||
+              MI.getOpcode() == PPC::TLSLDAIX))
           InReg = MI.getOperand(1).getReg();
         DebugLoc DL = MI.getDebugLoc();
 
@@ -160,9 +162,9 @@ namespace {
         if (IsAIX) {
           if (MI.getOpcode() == PPC::TLSLDAIX8 ||
               MI.getOpcode() == PPC::TLSLDAIX) {
-            // For Local-Dynamic, need to swap the position of VarOffsetInst and
-            // MI, so that VarOffsetInst can use R/X4 to reduce register
-            // pressure.
+            // It is better to put TLSLDAIX node before LoadOffsetToc node,
+            // because LoadOffsetToc node can use clobbers r4/r5. Search for the
+            // first paired LoadOffsetToc node within the same BB.
             const PPCSubtarget &Subtarget =
                 MBB.getParent()->getSubtarget<PPCSubtarget>();
             bool IsLargeModel =
@@ -170,69 +172,94 @@ namespace {
             unsigned LDTocOp =
                 Is64Bit ? (IsLargeModel ? PPC::LDtocL : PPC::LDtoc)
                         : (IsLargeModel ? PPC::LWZtocL : PPC::LWZtoc);
-            assert(RegInfo.hasOneDef(MI.getOperand(1).getReg()) &&
-                   "TLSLDAIX expects single def of its operand.");
-            MachineInstr *VarOffsetInst =
-                RegInfo.getOneDef(MI.getOperand(1).getReg())->getParent();
-            assert(VarOffsetInst->getOpcode() == LDTocOp &&
-                   "Unexpected LDTocOp.");
-            if (IsLargeModel) {
-              // Get the ADDIS instruction when using large model.
-              assert(RegInfo.hasOneDef(VarOffsetInst->getOperand(2).getReg()) &&
-                     "LDTocOp expects single def of its operand.");
-              VarOffsetInst =
-                  RegInfo.getOneDef(VarOffsetInst->getOperand(2).getReg())
-                      ->getParent();
-              assert(VarOffsetInst->getOpcode() ==
-                         (Is64Bit ? PPC::ADDIStocHA8 : PPC::ADDIStocHA) &&
-                     "Unexpected ADDIStocHA.");
-              // FIXME: machine-scheduler could schedule ADDIStocHA ahead of
-              // GETtlsMODAIX, and still has to use extra register.
+            MachineBasicBlock::iterator Anchor = I;
+            if (!RegInfo.use_empty(OutReg)) {
+              std::set<MachineInstr *> Uses;
+              // Collect all instructions that use OutReg
+              for (MachineOperand &MO : RegInfo.use_operands(OutReg)) {
+                if (Uses.count(MO.getParent()))
+                  continue;
+                Uses.insert(MO.getParent());
+              }
+              // Find the first Add within current BB.
+              MachineBasicBlock::iterator UseIter = MBB.begin();
+              for (MachineBasicBlock::iterator AE = MBB.end(); UseIter != AE;
+                   ++UseIter)
+                if (Uses.count(&*UseIter))
+                  break;
+
+              if (UseIter != MBB.end()) {
+                // Get the instruction that defines the other used register
+                // operand of UseIter. The match pattern is that: UseIter has
+                // exactly one used-operand defined by LDTocOp
+                // (LDtocL/LDtoc/LWZtocL/LWZtoc).
+                MachineInstr *LoadOffsetToc = nullptr;
+                int MatchCount = 0;
+                for (MachineOperand &MO : UseIter->operands()) {
+                  if (MO.isReg() && MO.isUse()) {
+                    if (RegInfo.hasOneDef(MO.getReg())) {
+                      if (RegInfo.getOneDef(MO.getReg())
+                              ->getParent()
+                              ->getOpcode() == LDTocOp) {
+                        LoadOffsetToc =
+                            RegInfo.getOneDef(MO.getReg())->getParent();
+                        ++MatchCount;
+                      }
+                    } else {
+                      // FIXME: analyze this scenario if there is one.
+                      MatchCount = 0;
+                      break;
+                    }
+                  }
+                }
+                // Get the iterator.
+                if (MatchCount == 1 && LoadOffsetToc) {
+                  Anchor = MBB.begin();
+                  for (MachineBasicBlock::iterator AE = MBB.end(); Anchor != AE;
+                       ++Anchor)
+                    if (&*Anchor == LoadOffsetToc)
+                      break;
+
+                  if (Anchor == MBB.end())
+                    Anchor = I;
+                }
+              }
             }
+
+            // Generate instructions refer to the "_$TLSML" symbol
             Register ModuleHandleHReg;
             if (IsLargeModel) {
               ModuleHandleHReg = RegInfo.createVirtualRegister(GPRNoZero);
-              BuildMI(MBB, *VarOffsetInst, DL,
+              BuildMI(MBB, Anchor, DL,
                       TII->get(Is64Bit ? PPC::ADDIStocHA8 : PPC::ADDIStocHA),
                       ModuleHandleHReg)
                   .addReg(Subtarget.getTOCPointerRegister())
                   .addExternalSymbol("_$TLSML[TC]", PPCII::MO_TLSLD_FLAG);
             }
             Register MHReg = RegInfo.createVirtualRegister(GPRNoZero);
-            BuildMI(MBB, *VarOffsetInst, DL, TII->get(LDTocOp), MHReg)
+            BuildMI(MBB, Anchor, DL, TII->get(LDTocOp), MHReg)
                 .addExternalSymbol("_$TLSML[TC]", PPCII::MO_TLSLD_FLAG)
                 .addReg(IsLargeModel
                             ? ModuleHandleHReg
                             : Register(Subtarget.getTOCPointerRegister()));
             // The module handle is copied in r3.
-            BuildMI(MBB, *VarOffsetInst, DL, TII->get(TargetOpcode::COPY), GPR3)
+            BuildMI(MBB, Anchor, DL, TII->get(TargetOpcode::COPY), GPR3)
                 .addReg(MHReg);
             // The call to .__tls_get_mod.
-            BuildMI(MBB, *VarOffsetInst, DL, TII->get(Opc2), GPR3).addReg(GPR3);
-            // Copy VarOffset to R/X4
+            BuildMI(MBB, Anchor, DL, TII->get(Opc2), GPR3).addReg(GPR3);
+          } else if (!IsTLSTPRelMI) {
+            // The variable offset and region handle are copied in r4 and r3.
+            // The copies are followed by GETtlsADDR32AIX/GETtlsADDR64AIX.
             BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR4)
                 .addReg(MI.getOperand(1).getReg());
-            BuildMI(MBB, I, DL, TII->get(Is64Bit ? PPC::ADD8 : PPC::ADD4), GPR3)
-                .addReg(GPR3)
-                .addReg(GPR4);
-          } else {
-            // For Global-Dynamic, the variable offset and region handle are
-            // copied in r4 and r3. The copies are followed by
-            // GETtlsADDR32AIX/GETtlsADDR64AIX.
-            if (!IsTLSTPRelMI) {
-              BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR4)
-                  .addReg(MI.getOperand(1).getReg());
-              BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR3)
-                  .addReg(MI.getOperand(2).getReg());
-              BuildMI(MBB, I, DL, TII->get(Opc2), GPR3)
-                  .addReg(GPR3)
-                  .addReg(GPR4);
-            } else
-              // The opcode of GETtlsTpointer32AIX does not change, because
-              // later this instruction will be expanded into a call to
-              // .__get_tpointer, which will return the thread pointer into r3.
-              BuildMI(MBB, I, DL, TII->get(Opc2), GPR3);
-          }
+            BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR3)
+                .addReg(MI.getOperand(2).getReg());
+            BuildMI(MBB, I, DL, TII->get(Opc2), GPR3).addReg(GPR3).addReg(GPR4);
+          } else
+            // The opcode of GETtlsTpointer32AIX does not change, because later
+            // this instruction will be expanded into a call to .__get_tpointer,
+            // which will return the thread pointer into r3.
+            BuildMI(MBB, I, DL, TII->get(Opc2), GPR3);
         } else {
           MachineInstr *Addi;
           if (IsPCREL) {
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll
index ea1a2b8fe9ade7..84ddb83bef457e 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll
@@ -160,8 +160,7 @@ define void @storesTIInit(double %Val) #0 {
 ; SMALL32-NEXT:    lwz 3, L..C4(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C5(2) # target-flags(ppc-tlsld) @TIInit
-; SMALL32-NEXT:    add 3, 3, 4
-; SMALL32-NEXT:    stfd 1, 0(3)
+; SMALL32-NEXT:    stfdx 1, 3, 4
 ; SMALL32-NEXT:    addi 1, 1, 32
 ; SMALL32-NEXT:    lwz 0, 8(1)
 ; SMALL32-NEXT:    mtlr 0
@@ -172,14 +171,12 @@ define void @storesTIInit(double %Val) #0 {
 ; LARGE32-NEXT:    mflr 0
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
-; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
-; LARGE32-NEXT:    addis 4, L..C5 at u(2)
-; LARGE32-NEXT:    lwz 6, L..C5 at l(4)
+; LARGE32-NEXT:    addis 6, L..C4 at u(2)
+; LARGE32-NEXT:    addis 3, L..C5 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C5 at l(3)
 ; LARGE32-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-NEXT:    mr 4, 6
-; LARGE32-NEXT:    add 3, 3, 4
-; LARGE32-NEXT:    stfd 1, 0(3)
+; LARGE32-NEXT:    lwz 4, L..C4 at l(6)
+; LARGE32-NEXT:    stfdx 1, 3, 4
 ; LARGE32-NEXT:    addi 1, 1, 32
 ; LARGE32-NEXT:    lwz 0, 8(1)
 ; LARGE32-NEXT:    mtlr 0
@@ -193,8 +190,7 @@ define void @storesTIInit(double %Val) #0 {
 ; SMALL64-NEXT:    ld 3, L..C4(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C5(2) # target-flags(ppc-tlsld) @TIInit
-; SMALL64-NEXT:    add 3, 3, 4
-; SMALL64-NEXT:    stfd 1, 0(3)
+; SMALL64-NEXT:    stfdx 1, 3, 4
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
 ; SMALL64-NEXT:    mtlr 0
@@ -210,8 +206,7 @@ define void @storesTIInit(double %Val) #0 {
 ; LARGE64-NEXT:    ld 3, L..C4 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
 ; LARGE64-NEXT:    ld 4, L..C5 at l(6)
-; LARGE64-NEXT:    add 3, 3, 4
-; LARGE64-NEXT:    stfd 1, 0(3)
+; LARGE64-NEXT:    stfdx 1, 3, 4
 ; LARGE64-NEXT:    addi 1, 1, 48
 ; LARGE64-NEXT:    ld 0, 16(1)
 ; LARGE64-NEXT:    mtlr 0
@@ -461,10 +456,9 @@ define double @loadsTIInit() #1 {
 ; SMALL32-NEXT:    lwz 3, L..C4(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C5(2) # target-flags(ppc-tlsld) @TIInit
-; SMALL32-NEXT:    add 3, 3, 4
-; SMALL32-NEXT:    lwz 4, L..C8(2) # @GInit
-; SMALL32-NEXT:    lfd 0, 0(3)
-; SMALL32-NEXT:    lfd 1, 0(4)
+; SMALL32-NEXT:    lwz 5, L..C8(2) # @GInit
+; SMALL32-NEXT:    lfdx 0, 3, 4
+; SMALL32-NEXT:    lfd 1, 0(5)
 ; SMALL32-NEXT:    fadd 1, 0, 1
 ; SMALL32-NEXT:    addi 1, 1, 32
 ; SMALL32-NEXT:    lwz 0, 8(1)
@@ -476,14 +470,12 @@ define double @loadsTIInit() #1 {
 ; LARGE32-NEXT:    mflr 0
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
-; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
-; LARGE32-NEXT:    addis 4, L..C5 at u(2)
-; LARGE32-NEXT:    lwz 6, L..C5 at l(4)
+; LARGE32-NEXT:    addis 6, L..C4 at u(2)
+; LARGE32-NEXT:    addis 3, L..C5 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C5 at l(3)
 ; LARGE32-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-NEXT:    mr 4, 6
-; LARGE32-NEXT:    add 3, 3, 4
-; LARGE32-NEXT:    lfd 0, 0(3)
+; LARGE32-NEXT:    lwz 4, L..C4 at l(6)
+; LARGE32-NEXT:    lfdx 0, 3, 4
 ; LARGE32-NEXT:    addis 3, L..C8 at u(2)
 ; LARGE32-NEXT:    lwz 3, L..C8 at l(3)
 ; LARGE32-NEXT:    lfd 1, 0(3)
@@ -501,10 +493,9 @@ define double @loadsTIInit() #1 {
 ; SMALL64-NEXT:    ld 3, L..C4(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C5(2) # target-flags(ppc-tlsld) @TIInit
-; SMALL64-NEXT:    add 3, 3, 4
-; SMALL64-NEXT:    ld 4, L..C8(2) # @GInit
-; SMALL64-NEXT:    lfd 0, 0(3)
-; SMALL64-NEXT:    lfd 1, 0(4)
+; SMALL64-NEXT:    ld 5, L..C8(2) # @GInit
+; SMALL64-NEXT:    lfdx 0, 3, 4
+; SMALL64-NEXT:    lfd 1, 0(5)
 ; SMALL64-NEXT:    fadd 1, 0, 1
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -521,10 +512,9 @@ define double @loadsTIInit() #1 {
 ; LARGE64-NEXT:    ld 3, L..C4 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
 ; LARGE64-NEXT:    ld 4, L..C5 at l(6)
-; LARGE64-NEXT:    add 3, 3, 4
-; LARGE64-NEXT:    addis 4, L..C8 at u(2)
-; LARGE64-NEXT:    lfd 0, 0(3)
-; LARGE64-NEXT:    ld 3, L..C8 at l(4)
+; LARGE64-NEXT:    addis 5, L..C8 at u(2)
+; LARGE64-NEXT:    lfdx 0, 3, 4
+; LARGE64-NEXT:    ld 3, L..C8 at l(5)
 ; LARGE64-NEXT:    lfd 1, 0(3)
 ; LARGE64-NEXT:    fadd 1, 0, 1
 ; LARGE64-NEXT:    addi 1, 1, 48
@@ -664,10 +654,10 @@ entry:
 ; LARGE32-LABEL:  L..C3:
 ; LARGE32-NEXT:   .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE32-LABEL:  L..C4:
+; LARGE32-NEXT:   .tc TIInit[TE],TIInit[TL]@ld
+; LARGE32-LABEL:  L..C5:
 ; LARGE32-NEXT:   .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; LARGE32-NEXT:   .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
-; LARGE32-LABEL:  L..C5:
-; LARGE32-NEXT:   .tc TIInit[TE],TIInit[TL]@ld
 ; LARGE32-LABEL:  L..C6:
 ; LARGE32-NEXT:   .tc .TWInit[TE],TWInit[TL]@m
 ; LARGE32-LABEL:  L..C7:
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll
index e1cfb6fd767cbf..3001a915da4851 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll
@@ -168,8 +168,7 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; SMALL32-NEXT:    lwz 3, L..C4(2)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C5(2)
-; SMALL32-NEXT:    add 3, 3, 4
-; SMALL32-NEXT:    stw 6, 0(3)
+; SMALL32-NEXT:    stwx 6, 3, 4
 ; SMALL32-NEXT:    addi 1, 1, 32
 ; SMALL32-NEXT:    lwz 0, 8(1)
 ; SMALL32-NEXT:    mtlr 0
@@ -181,14 +180,12 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    mr 6, 3
-; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
-; LARGE32-NEXT:    addis 4, L..C5 at u(2)
-; LARGE32-NEXT:    lwz 7, L..C5 at l(4)
+; LARGE32-NEXT:    addis 7, L..C4 at u(2)
+; LARGE32-NEXT:    addis 3, L..C5 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C5 at l(3)
 ; LARGE32-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-NEXT:    mr 4, 7
-; LARGE32-NEXT:    add 3, 3, 4
-; LARGE32-NEXT:    stw 6, 0(3)
+; LARGE32-NEXT:    lwz 4, L..C4 at l(7)
+; LARGE32-NEXT:    stwx 6, 3, 4
 ; LARGE32-NEXT:    addi 1, 1, 32
 ; LARGE32-NEXT:    lwz 0, 8(1)
 ; LARGE32-NEXT:    mtlr 0
@@ -203,8 +200,7 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; SMALL64-NEXT:    ld 3, L..C4(2)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C5(2)
-; SMALL64-NEXT:    add 3, 3, 4
-; SMALL64-NEXT:    stw 6, 0(3)
+; SMALL64-NEXT:    stwx 6, 3, 4
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
 ; SMALL64-NEXT:    mtlr 0
@@ -221,8 +217,7 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; LARGE64-NEXT:    ld 3, L..C4 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
 ; LARGE64-NEXT:    ld 4, L..C5 at l(7)
-; LARGE64-NEXT:    add 3, 3, 4
-; LARGE64-NEXT:    stw 6, 0(3)
+; LARGE64-NEXT:    stwx 6, 3, 4
 ; LARGE64-NEXT:    addi 1, 1, 48
 ; LARGE64-NEXT:    ld 0, 16(1)
 ; LARGE64-NEXT:    mtlr 0
@@ -476,10 +471,9 @@ define i32 @loadsTIUninit() #1 {
 ; SMALL32-NEXT:    lwz 3, L..C4(2)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C5(2)
-; SMALL32-NEXT:    add 3, 3, 4
-; SMALL32-NEXT:    lwz 4, L..C8(2)
-; SMALL32-NEXT:    lwz 3, 0(3)
-; SMALL32-NEXT:    lwz 4, 0(4)
+; SMALL32-NEXT:    lwz 5, L..C8(2)
+; SMALL32-NEXT:    lwzx 3, 3, 4
+; SMALL32-NEXT:    lwz 4, 0(5)
 ; SMALL32-NEXT:    add 3, 4, 3
 ; SMALL32-NEXT:    addi 1, 1, 32
 ; SMALL32-NEXT:    lwz 0, 8(1)
@@ -491,14 +485,12 @@ define i32 @loadsTIUninit() #1 {
 ; LARGE32-NEXT:    mflr 0
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
-; LARGE32-NEXT:    addis 3, L..C4 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
-; LARGE32-NEXT:    addis 4, L..C5 at u(2)
-; LARGE32-NEXT:    lwz 6, L..C5 at l(4)
+; LARGE32-NEXT:    addis 6, L..C4 at u(2)
+; LARGE32-NEXT:    addis 3, L..C5 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C5 at l(3)
 ; LARGE32-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-NEXT:    mr 4, 6
-; LARGE32-NEXT:    add 3, 3, 4
-; LARGE32-NEXT:    lwz 3, 0(3)
+; LARGE32-NEXT:    lwz 4, L..C4 at l(6)
+; LARGE32-NEXT:    lwzx 3, 3, 4
 ; LARGE32-NEXT:    addis 4, L..C8 at u(2)
 ; LARGE32-NEXT:    lwz 4, L..C8 at l(4)
 ; LARGE32-NEXT:    lwz 4, 0(4)
@@ -516,10 +508,9 @@ define i32 @loadsTIUninit() #1 {
 ; SMALL64-NEXT:    ld 3, L..C4(2)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C5(2)
-; SMALL64-NEXT:    add 3, 3, 4
-; SMALL64-NEXT:    ld 4, L..C8(2)
-; SMALL64-NEXT:    lwz 3, 0(3)
-; SMALL64-NEXT:    lwz 4, 0(4)
+; SMALL64-NEXT:    ld 5, L..C8(2)
+; SMALL64-NEXT:    lwzx 3, 3, 4
+; SMALL64-NEXT:    lwz 4, 0(5)
 ; SMALL64-NEXT:    add 3, 4, 3
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -536,10 +527,9 @@ define i32 @loadsTIUninit() #1 {
 ; LARGE64-NEXT:    ld 3, L..C4 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
 ; LARGE64-NEXT:    ld 4, L..C5 at l(6)
-; LARGE64-NEXT:    add 3, 3, 4
-; LARGE64-NEXT:    addis 4, L..C8 at u(2)
-; LARGE64-NEXT:    ld 4, L..C8 at l(4)
-; LARGE64-NEXT:    lwz 3, 0(3)
+; LARGE64-NEXT:    addis 5, L..C8 at u(2)
+; LARGE64-NEXT:    lwzx 3, 3, 4
+; LARGE64-NEXT:    ld 4, L..C8 at l(5)
 ; LARGE64-NEXT:    lwz 4, 0(4)
 ; LARGE64-NEXT:    add 3, 4, 3
 ; LARGE64-NEXT:    addi 1, 1, 48
@@ -679,10 +669,10 @@ entry:
 ; LARGE32-LABEL: L..C3:
 ; LARGE32-NEXT:  .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE32-LABEL: L..C4:
+; LARGE32-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
+; LARGE32-LABEL: L..C5:
 ; LARGE32-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; LARGE32-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
-; LARGE32-LABEL: L..C5:
-; LARGE32-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
 ; LARGE32-LABEL: L..C6:
 ; LARGE32-NEXT:  .tc .TWUninit[TE],TWUninit[TL]@m
 ; LARGE32-LABEL: L..C7:
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll
index 2e18d6c9bfd7e5..136c62ff509f9c 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll
@@ -103,9 +103,8 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; SMALL32-NEXT:    lwz 3, L..C2(2)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C3(2)
-; SMALL32-NEXT:    add 3, 3, 4
+; SMALL32-NEXT:    stwux 7, 3, 4
 ; SMALL32-NEXT:    stw 6, 4(3)
-; SMALL32-NEXT:    stw 7, 0(3)
 ; SMALL32-NEXT:    addi 1, 1, 32
 ; SMALL32-NEXT:    lwz 0, 8(1)
 ; SMALL32-NEXT:    mtlr 0
@@ -118,15 +117,13 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    mr 7, 3
 ; LARGE32-NEXT:    mr 6, 4
-; LARGE32-NEXT:    addis 3, L..C2 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C2 at l(3)
-; LARGE32-NEXT:    addis 4, L..C3 at u(2)
-; LARGE32-NEXT:    lwz 8, L..C3 at l(4)
+; LARGE32-NEXT:    addis 8, L..C2 at u(2)
+; LARGE32-NEXT:    addis 3, L..C3 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C3 at l(3)
 ; LARGE32-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-NEXT:    mr 4, 8
-; LARGE32-NEXT:    add 3, 3, 4
+; LARGE32-NEXT:    lwz 4, L..C2 at l(8)
+; LARGE32-NEXT:    stwux 7, 3, 4
 ; LARGE32-NEXT:    stw 6, 4(3)
-; LARGE32-NEXT:    stw 7, 0(3)
 ; LARGE32-NEXT:    addi 1, 1, 32
 ; LARGE32-NEXT:    lwz 0, 8(1)
 ; LARGE32-NEXT:    mtlr 0
@@ -141,8 +138,7 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; SMALL64-NEXT:    ld 3, L..C2(2)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C3(2)
-; SMALL64-NEXT:    add 3, 3, 4
-; SMALL64-NEXT:    std 6, 0(3)
+; SMALL64-NEXT:    stdx 6, 3, 4
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
 ; SMALL64-NEXT:    mtlr 0
@@ -159,8 +155,7 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; LARGE64-NEXT:    ld 3, L..C2 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
 ; LARGE64-NEXT:    ld 4, L..C3 at l(7)
-; LARGE64-NEXT:    add 3, 3, 4
-; LARGE64-NEXT:    std 6, 0(3)
+; LARGE64-NEXT:    stdx 6, 3, 4
 ; LARGE64-NEXT:    addi 1, 1, 48
 ; LARGE64-NEXT:    ld 0, 16(1)
 ; LARGE64-NEXT:    mtlr 0
@@ -182,9 +177,8 @@ define void @storesTIInit(i64 %Val) #0 {
 ; SMALL32-NEXT:    lwz 3, L..C2(2)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C4(2)
-; SMALL32-NEXT:    add 3, 3, 4
+; SMALL32-NEXT:    stwux 7, 3, 4
 ; SMALL32-NEXT:    stw 6, 4(3)
-; SMALL32-NEXT:    stw 7, 0(3)
 ; SMALL32-NEXT:    addi 1, 1, 32
 ; SMALL32-NEXT:    lwz 0, 8(1)
 ; SMALL32-NEXT:    mtlr 0
@@ -197,15 +191,13 @@ define void @storesTIInit(i64 %Val) #0 {
 ; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    mr 7, 3
 ; LARGE32-NEXT:    mr 6, 4
-; LARGE32-NEXT:    addis 3, L..C2 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C2 at l(3)
-; LARGE32-NEXT:    addis 4, L..C4 at u(2)
-; LARGE32-NEXT:    lwz 8, L..C4 at l(4)
+; LARGE32-NEXT:    addis 8, L..C4 at u(2)
+; LARGE32-NEXT:    addis 3, L..C3 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C3 at l(3)
 ; LARGE32-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-NEXT:    mr 4, 8
-; LARGE32-NEXT:    add 3, 3, 4
+; LARGE32-NEXT:    lwz 4, L..C4 at l(8)
+; LARGE32-NEXT:    stwux 7, 3, 4
 ; LARGE32-NEXT:    stw 6, 4(3)
-; LARGE32-NEXT:    stw 7, 0(3)
 ; LARGE32-NEXT:    addi 1, 1, 32
 ; LARGE32-NEXT:    lwz 0, 8(1)
 ; LARGE32-NEXT:    mtlr 0
@@ -220,8 +212,7 @@ define void @storesTIInit(i64 %Val) #0 {
 ; SMALL64-NEXT:    ld 3, L..C2(2)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C4(2)
-; SMALL64-NEXT:    add 3, 3, 4
-; SMALL64-NEXT:    std 6, 0(3)
+; SMALL64-NEXT:    stdx 6, 3, 4
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
 ; SMALL64-NEXT:    mtlr 0
@@ -238,8 +229,7 @@ define void @storesTIInit(i64 %Val) #0 {
 ; LARGE64-NEXT:    ld 3, L..C2 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
 ; LARGE64-NEXT:    ld 4, L..C4 at l(7)
-; LARGE64-NEXT:    add 3, 3, 4
-; LARGE64-NEXT:    std 6, 0(3)
+; LARGE64-NEXT:    stdx 6, 3, 4
 ; LARGE64-NEXT:    addi 1, 1, 48
 ; LARGE64-NEXT:    ld 0, 16(1)
 ; LARGE64-NEXT:    mtlr 0
@@ -333,7 +323,7 @@ define i64 @loadsTGInit() #1 {
 ; SMALL32-NEXT:    lwz 4, L..C1(2)
 ; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_addr[PR]
-; SMALL32-NEXT:    lwz 4, L..C7(2) # @GInit
+; SMALL32-NEXT:    lwz 4, L..C7(2)
 ; SMALL32-NEXT:    lwz 5, 4(3)
 ; SMALL32-NEXT:    lwz 6, 4(4)
 ; SMALL32-NEXT:    lwz 3, 0(3)
@@ -376,7 +366,7 @@ define i64 @loadsTGInit() #1 {
 ; SMALL64-NEXT:    ld 4, L..C1(2)
 ; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_addr[PR]
-; SMALL64-NEXT:    ld 4, L..C7(2) # @GInit
+; SMALL64-NEXT:    ld 4, L..C7(2)
 ; SMALL64-NEXT:    ld 3, 0(3)
 ; SMALL64-NEXT:    ld 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
@@ -421,14 +411,13 @@ define i64 @loadsTIUninit() #1 {
 ; SMALL32-NEXT:    lwz 3, L..C2(2)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C3(2)
-; SMALL32-NEXT:    add 3, 3, 4
-; SMALL32-NEXT:    lwz 4, L..C7(2) # @GInit
-; SMALL32-NEXT:    lwz 5, 4(3)
-; SMALL32-NEXT:    lwz 6, 4(4)
-; SMALL32-NEXT:    lwz 3, 0(3)
-; SMALL32-NEXT:    lwz 7, 0(4)
-; SMALL32-NEXT:    addc 4, 6, 5
-; SMALL32-NEXT:    adde 3, 7, 3
+; SMALL32-NEXT:    lwz 5, L..C7(2)
+; SMALL32-NEXT:    lwzux 6, 3, 4
+; SMALL32-NEXT:    lwz 4, 4(5)
+; SMALL32-NEXT:    lwz 3, 4(3)
+; SMALL32-NEXT:    lwz 5, 0(5)
+; SMALL32-NEXT:    addc 4, 4, 3
+; SMALL32-NEXT:    adde 3, 5, 6
 ; SMALL32-NEXT:    addi 1, 1, 32
 ; SMALL32-NEXT:    lwz 0, 8(1)
 ; SMALL32-NEXT:    mtlr 0
@@ -439,21 +428,19 @@ define i64 @loadsTIUninit() #1 {
 ; LARGE32-NEXT:    mflr 0
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
-; LARGE32-NEXT:    addis 3, L..C2 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C2 at l(3)
-; LARGE32-NEXT:    addis 4, L..C3 at u(2)
-; LARGE32-NEXT:    lwz 6, L..C3 at l(4)
+; LARGE32-NEXT:    addis 6, L..C2 at u(2)
+; LARGE32-NEXT:    addis 3, L..C3 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C3 at l(3)
 ; LARGE32-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-NEXT:    mr 4, 6
-; LARGE32-NEXT:    add 3, 3, 4
-; LARGE32-NEXT:    lwz 4, 4(3)
-; LARGE32-NEXT:    lwz 3, 0(3)
-; LARGE32-NEXT:    addis 5, L..C7 at u(2)
-; LARGE32-NEXT:    lwz 5, L..C7 at l(5)
-; LARGE32-NEXT:    lwz 6, 4(5)
-; LARGE32-NEXT:    lwz 5, 0(5)
-; LARGE32-NEXT:    addc 4, 6, 4
-; LARGE32-NEXT:    adde 3, 5, 3
+; LARGE32-NEXT:    lwz 4, L..C2 at l(6)
+; LARGE32-NEXT:    lwzux 5, 3, 4
+; LARGE32-NEXT:    lwz 3, 4(3)
+; LARGE32-NEXT:    addis 4, L..C7 at u(2)
+; LARGE32-NEXT:    lwz 4, L..C7 at l(4)
+; LARGE32-NEXT:    lwz 6, 4(4)
+; LARGE32-NEXT:    lwz 7, 0(4)
+; LARGE32-NEXT:    addc 4, 6, 3
+; LARGE32-NEXT:    adde 3, 7, 5
 ; LARGE32-NEXT:    addi 1, 1, 32
 ; LARGE32-NEXT:    lwz 0, 8(1)
 ; LARGE32-NEXT:    mtlr 0
@@ -467,10 +454,9 @@ define i64 @loadsTIUninit() #1 {
 ; SMALL64-NEXT:    ld 3, L..C2(2)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C3(2)
-; SMALL64-NEXT:    add 3, 3, 4
-; SMALL64-NEXT:    ld 4, L..C7(2) # @GInit
-; SMALL64-NEXT:    ld 3, 0(3)
-; SMALL64-NEXT:    ld 4, 0(4)
+; SMALL64-NEXT:    ld 5, L..C7(2)
+; SMALL64-NEXT:    ldx 3, 3, 4
+; SMALL64-NEXT:    ld 4, 0(5)
 ; SMALL64-NEXT:    add 3, 4, 3
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -487,10 +473,9 @@ define i64 @loadsTIUninit() #1 {
 ; LARGE64-NEXT:    ld 3, L..C2 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
 ; LARGE64-NEXT:    ld 4, L..C3 at l(6)
-; LARGE64-NEXT:    add 3, 3, 4
-; LARGE64-NEXT:    addis 4, L..C7 at u(2)
-; LARGE64-NEXT:    ld 4, L..C7 at l(4)
-; LARGE64-NEXT:    ld 3, 0(3)
+; LARGE64-NEXT:    addis 5, L..C7 at u(2)
+; LARGE64-NEXT:    ldx 3, 3, 4
+; LARGE64-NEXT:    ld 4, L..C7 at l(5)
 ; LARGE64-NEXT:    ld 4, 0(4)
 ; LARGE64-NEXT:    add 3, 4, 3
 ; LARGE64-NEXT:    addi 1, 1, 48
@@ -514,14 +499,13 @@ define i64 @loadsTIInit() #1 {
 ; SMALL32-NEXT:    lwz 3, L..C2(2)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C4(2)
-; SMALL32-NEXT:    add 3, 3, 4
-; SMALL32-NEXT:    lwz 4, L..C7(2) # @GInit
-; SMALL32-NEXT:    lwz 5, 4(3)
-; SMALL32-NEXT:    lwz 6, 4(4)
-; SMALL32-NEXT:    lwz 3, 0(3)
-; SMALL32-NEXT:    lwz 7, 0(4)
-; SMALL32-NEXT:    addc 4, 6, 5
-; SMALL32-NEXT:    adde 3, 7, 3
+; SMALL32-NEXT:    lwz 5, L..C7(2)
+; SMALL32-NEXT:    lwzux 6, 3, 4
+; SMALL32-NEXT:    lwz 4, 4(5)
+; SMALL32-NEXT:    lwz 3, 4(3)
+; SMALL32-NEXT:    lwz 5, 0(5)
+; SMALL32-NEXT:    addc 4, 4, 3
+; SMALL32-NEXT:    adde 3, 5, 6
 ; SMALL32-NEXT:    addi 1, 1, 32
 ; SMALL32-NEXT:    lwz 0, 8(1)
 ; SMALL32-NEXT:    mtlr 0
@@ -532,21 +516,19 @@ define i64 @loadsTIInit() #1 {
 ; LARGE32-NEXT:    mflr 0
 ; LARGE32-NEXT:    stwu 1, -32(1)
 ; LARGE32-NEXT:    stw 0, 40(1)
-; LARGE32-NEXT:    addis 3, L..C2 at u(2)
-; LARGE32-NEXT:    lwz 3, L..C2 at l(3)
-; LARGE32-NEXT:    addis 4, L..C4 at u(2)
-; LARGE32-NEXT:    lwz 6, L..C4 at l(4)
+; LARGE32-NEXT:    addis 6, L..C4 at u(2)
+; LARGE32-NEXT:    addis 3, L..C3 at u(2)
+; LARGE32-NEXT:    lwz 3, L..C3 at l(3)
 ; LARGE32-NEXT:    bla .__tls_get_mod[PR]
-; LARGE32-NEXT:    mr 4, 6
-; LARGE32-NEXT:    add 3, 3, 4
-; LARGE32-NEXT:    lwz 4, 4(3)
-; LARGE32-NEXT:    lwz 3, 0(3)
-; LARGE32-NEXT:    addis 5, L..C7 at u(2)
-; LARGE32-NEXT:    lwz 5, L..C7 at l(5)
-; LARGE32-NEXT:    lwz 6, 4(5)
-; LARGE32-NEXT:    lwz 5, 0(5)
-; LARGE32-NEXT:    addc 4, 6, 4
-; LARGE32-NEXT:    adde 3, 5, 3
+; LARGE32-NEXT:    lwz 4, L..C4 at l(6)
+; LARGE32-NEXT:    lwzux 5, 3, 4
+; LARGE32-NEXT:    lwz 3, 4(3)
+; LARGE32-NEXT:    addis 4, L..C7 at u(2)
+; LARGE32-NEXT:    lwz 4, L..C7 at l(4)
+; LARGE32-NEXT:    lwz 6, 4(4)
+; LARGE32-NEXT:    lwz 7, 0(4)
+; LARGE32-NEXT:    addc 4, 6, 3
+; LARGE32-NEXT:    adde 3, 7, 5
 ; LARGE32-NEXT:    addi 1, 1, 32
 ; LARGE32-NEXT:    lwz 0, 8(1)
 ; LARGE32-NEXT:    mtlr 0
@@ -560,10 +542,9 @@ define i64 @loadsTIInit() #1 {
 ; SMALL64-NEXT:    ld 3, L..C2(2)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C4(2)
-; SMALL64-NEXT:    add 3, 3, 4
-; SMALL64-NEXT:    ld 4, L..C7(2) # @GInit
-; SMALL64-NEXT:    ld 3, 0(3)
-; SMALL64-NEXT:    ld 4, 0(4)
+; SMALL64-NEXT:    ld 5, L..C7(2)
+; SMALL64-NEXT:    ldx 3, 3, 4
+; SMALL64-NEXT:    ld 4, 0(5)
 ; SMALL64-NEXT:    add 3, 4, 3
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -580,10 +561,9 @@ define i64 @loadsTIInit() #1 {
 ; LARGE64-NEXT:    ld 3, L..C2 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
 ; LARGE64-NEXT:    ld 4, L..C4 at l(6)
-; LARGE64-NEXT:    add 3, 3, 4
-; LARGE64-NEXT:    addis 4, L..C7 at u(2)
-; LARGE64-NEXT:    ld 4, L..C7 at l(4)
-; LARGE64-NEXT:    ld 3, 0(3)
+; LARGE64-NEXT:    addis 5, L..C7 at u(2)
+; LARGE64-NEXT:    ldx 3, 3, 4
+; LARGE64-NEXT:    ld 4, L..C7 at l(5)
 ; LARGE64-NEXT:    ld 4, 0(4)
 ; LARGE64-NEXT:    add 3, 4, 3
 ; LARGE64-NEXT:    addi 1, 1, 48
@@ -607,7 +587,7 @@ define i64 @loadsTWInit() #1 {
 ; SMALL32-NEXT:    lwz 4, L..C6(2)
 ; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_addr[PR]
-; SMALL32-NEXT:    lwz 4, L..C7(2) # @GInit
+; SMALL32-NEXT:    lwz 4, L..C7(2)
 ; SMALL32-NEXT:    lwz 5, 4(3)
 ; SMALL32-NEXT:    lwz 6, 4(4)
 ; SMALL32-NEXT:    lwz 3, 0(3)
@@ -650,7 +630,7 @@ define i64 @loadsTWInit() #1 {
 ; SMALL64-NEXT:    ld 4, L..C6(2)
 ; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_addr[PR]
-; SMALL64-NEXT:    ld 4, L..C7(2) # @GInit
+; SMALL64-NEXT:    ld 4, L..C7(2)
 ; SMALL64-NEXT:    ld 3, 0(3)
 ; SMALL64-NEXT:    ld 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
@@ -723,10 +703,10 @@ entry:
 ; LARGE32-LABEL:  L..C1:
 ; LARGE32-NEXT:  .tc TGInit[TE],TGInit[TL]@gd
 ; LARGE32-LABEL:  L..C2:
+; LARGE32-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
+; LARGE32-LABEL:  L..C3:
 ; LARGE32-NEXT:  .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
 ; LARGE32-NEXT:  .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
-; LARGE32-LABEL:  L..C3:
-; LARGE32-NEXT:  .tc TIUninit[TE],TIUninit[UL]@ld
 ; LARGE32-LABEL:  L..C4:
 ; LARGE32-NEXT:  .tc TIInit[TE],TIInit[TL]@ld
 ; LARGE32-LABEL:  L..C5:
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll b/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll
index 0513f8de616201..f7cd73ae609b35 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll
@@ -6,6 +6,10 @@
 ; RUN:     --code-model=small < %s | FileCheck %s --check-prefixes=SMALL32,SMALL
 ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec -mtriple powerpc-ibm-aix-xcoff \
 ; RUN:     --code-model=large < %s | FileCheck %s --check-prefixes=LARGE32,LARGE
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec -mtriple powerpc64-ibm-aix-xcoff \
+; RUN:     --code-model=small -O0 < %s | FileCheck %s --check-prefixes=WITHDUP
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec -mtriple powerpc64-ibm-aix-xcoff \
+; RUN:     --code-model=small -O1 < %s | FileCheck %s --check-prefixes=NODUP
 
 @TGInit = thread_local(localdynamic) global i32 42, align 4
 @TGUninit = thread_local(localdynamic) global i32 0, align 4
@@ -13,6 +17,8 @@
 @TIUninit = internal thread_local(localdynamic) global i32 0, align 4
 @TWInit = weak thread_local(localdynamic) global i32 42, align 4
 @TWUninit = weak thread_local(localdynamic) global i32 0, align 4
+ at x = thread_local(localdynamic) global i32 42, align 4
+ at y = thread_local(localdynamic) global i32 42, align 4
 
 define i32 @loadTGInit() {
 ; SMALL-LABEL:  loadTGInit:
@@ -21,19 +27,18 @@ define i32 @loadTGInit() {
 ; SMALL:        bla .__tls_get_mod[PR]
 ; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
-; SMALL:        add [[TGInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; SMALL:        lwz [[TGInitValR:[0-9]+]], 0([[TGInitAddrR]])
+; SMALL:        lwzx [[TGInitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ;
 ; LARGE-LABEL:  loadTGInit:
-; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@u(2)
+; LARGE32:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE:        add [[TGInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; LARGE:        lwz [[TGInitValR:[0-9]+]], 0([[TGInitAddrR]])
+; LARGE:        lwzx [[TGInitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 entry:
   %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TGInit)
   %1 = load i32, ptr %0, align 4
@@ -47,19 +52,18 @@ define void @storeTGInit(i32 noundef signext %i) {
 ; SMALL:        bla .__tls_get_mod[PR]
 ; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]](2)
-; SMALL:        add [[TGInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; SMALL:        stw [[TGInitValR:[0-9]+]], 0([[TGInitAddrR]])
+; SMALL:        stwx [[TGInitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ;
 ; LARGE-LABEL:  storeTGInit:
-; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@u(2)
+; LARGE32:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGInitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE:        add [[TGInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; LARGE:        stw [[TGInitValR:[0-9]+]], 0([[TGInitAddrR]])
+; LARGE:        stwx [[TGInitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 entry:
   %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TGInit)
   store i32 %i, ptr %0, align 4
@@ -73,19 +77,18 @@ define i32 @loadTGUninit() {
 ; SMALL:        bla .__tls_get_mod[PR]
 ; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
-; SMALL:        add [[TGUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; SMALL:        lwz [[TGUninitValR:[0-9]+]], 0([[TGUninitAddrR]])
+; SMALL:        lwzx [[TGInitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ;
 ; LARGE-LABEL:  loadTGUninit:
-; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@u(2)
+; LARGE32:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE:        add [[TGUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; LARGE:        lwz [[TGUninitValR:[0-9]+]], 0([[TGUninitAddrR]])
+; LARGE:        lwzx [[TGUninitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 entry:
   %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TGUninit)
   %1 = load i32, ptr %0, align 4
@@ -99,19 +102,18 @@ define void @storeTGUninit(i32 noundef signext %i) {
 ; SMALL:        bla .__tls_get_mod[PR]
 ; SMALL64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]](2)
-; SMALL:        add [[TGUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; SMALL:        stw [[TGUninitValR:[0-9]+]], 0([[TGUninitAddrR]])
+; SMALL:        stwx [[TGUninitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ;
 ; LARGE-LABEL:  storeTGUninit:
-; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@u(2)
+; LARGE32:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TGUninitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE:        add [[TGUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; LARGE:        stw [[TGUninitValR:[0-9]+]], 0([[TGUninitAddrR]])
+; LARGE:        stwx [[TGUninitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 entry:
   %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TGUninit)
   store i32 %i, ptr %0, align 4
@@ -125,19 +127,18 @@ define i32 @loadTIInit() {
 ; SMALL:        bla .__tls_get_mod[PR]
 ; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
-; SMALL:        add [[TIInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; SMALL:        lwz [[TIInitValR:[0-9]+]], 0([[TIInitAddrR]])
+; SMALL:        lwzx [[TIInitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ;
 ; LARGE-LABEL:  loadTIInit:
-; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@u(2)
+; LARGE32:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE:        add [[TIInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; LARGE:        lwz [[TIInitValR:[0-9]+]], 0([[TIInitAddrR]])
+; LARGE:        lwzx [[TIInitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 entry:
   %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TIInit)
   %1 = load i32, ptr %0, align 4
@@ -151,19 +152,18 @@ define void @storeTIInit(i32 noundef signext %i) {
 ; SMALL:        bla .__tls_get_mod[PR]
 ; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]](2)
-; SMALL:        add [[TIInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; SMALL:        stw [[TIInitValR:[0-9]+]], 0([[TIInitAddrR]])
+; SMALL:        stwx [[TIInitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ;
 ; LARGE-LABEL:  storeTIInit:
-; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@u(2)
+; LARGE32:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIInitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE:        add [[TIInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; LARGE:        stw [[TIInitValR:[0-9]+]], 0([[TIInitAddrR]])
+; LARGE:        stwx [[TIInitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 entry:
   %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TIInit)
   store i32 %i, ptr %0, align 4
@@ -177,19 +177,18 @@ define i32 @loadTIUninit() {
 ; SMALL:        bla .__tls_get_mod[PR]
 ; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
-; SMALL:        add [[TIUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; SMALL:        lwz [[TIUninitValR:[0-9]+]], 0([[TIUninitAddrR]])
+; SMALL:        lwzx [[TIUninitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ;
 ; LARGE-LABEL:  loadTIUninit:
-; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@u(2)
+; LARGE32:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE:        add [[TIUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; LARGE:        lwz [[TIUninitValR:[0-9]+]], 0([[TIUninitAddrR]])
+; LARGE:        lwzx [[TIUninitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 entry:
   %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TIUninit)
   %1 = load i32, ptr %0, align 4
@@ -203,19 +202,18 @@ define void @storeTIUninit(i32 noundef signext %i) {
 ; SMALL:        bla .__tls_get_mod[PR]
 ; SMALL64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]](2)
-; SMALL:        add [[TIUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; SMALL:        stw [[TIUninitValR:[0-9]+]], 0([[TIUninitAddrR]])
+; SMALL:        stwx [[TIUninitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ;
 ; LARGE-LABEL:  storeTIUninit:
-; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@u(2)
+; LARGE32:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TIUninitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE:        add [[TIUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; LARGE:        stw [[TIUninitValR:[0-9]+]], 0([[TIUninitAddrR]])
+; LARGE:        stwx [[TIUninitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 entry:
   %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TIUninit)
   store i32 %i, ptr %0, align 4
@@ -229,19 +227,18 @@ define i32 @loadTWInit() {
 ; SMALL:        bla .__tls_get_mod[PR]
 ; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
-; SMALL:        add [[TWInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; SMALL:        lwz [[TWInitValR:[0-9]+]], 0([[TWInitAddrR]])
+; SMALL:        lwzx [[TWInitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ;
 ; LARGE-LABEL:  loadTWInit:
-; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@u(2)
+; LARGE32:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE:        add [[TWInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; LARGE:        lwz [[TWInitValR:[0-9]+]], 0([[TWInitAddrR]])
+; LARGE:        lwzx [[TWInitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 entry:
   %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TWInit)
   %1 = load i32, ptr %0, align 4
@@ -255,19 +252,18 @@ define void @storeTWInit(i32 noundef signext %i) {
 ; SMALL:        bla .__tls_get_mod[PR]
 ; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]](2)
-; SMALL:        add [[TWInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; SMALL:        stw [[TWInitValR:[0-9]+]], 0([[TWInitAddrR]])
+; SMALL:        stwx [[TWInitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ;
 ; LARGE-LABEL:  storeTWInit:
-; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@u(2)
+; LARGE32:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWInitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE:        add [[TWInitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; LARGE:        stw [[TWInitValR:[0-9]+]], 0([[TWInitAddrR]])
+; LARGE:        stwx [[TWInitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 entry:
   %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TWInit)
   store i32 %i, ptr %0, align 4
@@ -281,19 +277,18 @@ define i32 @loadTWUninit() {
 ; SMALL:        bla .__tls_get_mod[PR]
 ; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
-; SMALL:        add [[TWUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; SMALL:        lwz [[TWUninitValR:[0-9]+]], 0([[TWUninitAddrR]])
+; SMALL:        lwzx [[TWUninitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ;
 ; LARGE-LABEL:  loadTWUninit:
-; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@u(2)
+; LARGE32:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE:        add [[TWUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; LARGE:        lwz [[TWUninitValR:[0-9]+]], 0([[TWUninitAddrR]])
+; LARGE:        lwzx [[TWUninitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 entry:
   %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TWUninit)
   %1 = load i32, ptr %0, align 4
@@ -307,25 +302,53 @@ define void @storeTWUninit(i32 noundef signext %i) {
 ; SMALL:        bla .__tls_get_mod[PR]
 ; SMALL64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
 ; SMALL32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]](2)
-; SMALL:        add [[TWUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; SMALL:        stw [[TWUninitValR:[0-9]+]], 0([[TWUninitAddrR]])
+; SMALL:        stwx [[TWUninitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 ;
 ; LARGE-LABEL:  storeTWUninit:
-; LARGE:        addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE64:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
+; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@u(2)
+; LARGE32:      addis [[ModuleHandleHR:[0-9]+]], [[ModuleHandleL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE32:      lwz [[ModuleHandleR:3]], [[ModuleHandleL]]@l([[ModuleHandleHR]])
 ; LARGE:        bla .__tls_get_mod[PR]
-; LARGE:        addis [[OffsetHR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@u(2)
 ; LARGE64:      ld [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
 ; LARGE32:      lwz [[OffsetR:[0-9]+]], [[TWUninitL:L..C[0-9]+]]@l([[OffsetHR]])
-; LARGE:        add [[TWUninitAddrR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
-; LARGE:        stw [[TWUninitValR:[0-9]+]], 0([[TWUninitAddrR]])
+; LARGE:        stwx [[TWUninitValR:[0-9]+]], [[ModuleHandleR]], [[OffsetR]]
 entry:
   %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @TWUninit)
   store i32 %i, ptr %0, align 4
   ret void
 }
 
+define i32 @DedupTlsGetMod() #0 {
+; WITHDUP-LABEL:  DedupTlsGetMod:
+; WITHDUP:        ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; WITHDUP-NEXT:   bla .__tls_get_mod[PR]
+; WITHDUP-NEXT:   ld [[OffsetXR:[0-9]+]], [[X:L..C[0-9]+]](2)
+; WITHDUP:        ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; WITHDUP-NEXT:   bla .__tls_get_mod[PR]
+; WITHDUP-NEXT:   ld [[OffsetYR:[0-9]+]], [[Y:L..C[0-9]+]](2)
+; WITHDUP-LABEL:  L..DedupTlsGetMod0:
+;
+; NODUP-LABEL:  DedupTlsGetMod:
+; NODUP:        ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
+; NODUP-NEXT:   bla .__tls_get_mod[PR]
+; NODUP-NEXT:   ld [[OffsetXR:[0-9]+]], [[X:L..C[0-9]+]](2)
+; NODUP-NEXT:   ld [[OffsetYR:[0-9]+]], [[Y:L..C[0-9]+]](2)
+; NODUP-NEXT:   lwzx [[XValR:[0-9]+]], [[ModuleHandleR]], [[OffsetXR]]
+; NODUP-NEXT:   lwzx [[YValR:[0-9]+]], [[ModuleHandleR]], [[OffsetYR]]
+; NODUP-LABEL:  L..DedupTlsGetMod0:
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, ptr %retval, align 4
+  %0 = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x)
+  %1 = load i32, ptr %0, align 4
+  %2 = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @y)
+  %3 = load i32, ptr %2, align 4
+  %add = add nsw i32 %1, %3
+  ret i32 %add
+}
+
 ; SMALL:          .extern .__tls_get_mod[PR]
 ; LARGE:          .extern .__tls_get_mod[PR]
 
@@ -345,11 +368,18 @@ entry:
 ; SMALL:        [[TWUninitL]]:
 ; SMALL-NEXT:   .tc TWUninit[TC],TWUninit[TL]@ld
 
-; LARGE:        [[ModuleHandleL]]:
-; LARGE-NEXT:   .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
-; LARGE-NEXT:   .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
-; LARGE:        [[TGInitL]]:
-; LARGE-NEXT:   .tc TGInit[TE],TGInit[TL]@ld
+; LARGE64:        [[ModuleHandleL]]:
+; LARGE64-NEXT:   .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; LARGE64-NEXT:   .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+; LARGE64:        [[TGInitL]]:
+; LARGE64-NEXT:   .tc TGInit[TE],TGInit[TL]@ld
+;
+; LARGE32:        [[TGInitL]]:
+; LARGE32-NEXT:   .tc TGInit[TE],TGInit[TL]@ld
+; LARGE32:        [[ModuleHandleL]]:
+; LARGE32-NEXT:   .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
+; LARGE32-NEXT:   .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
+;
 ; LARGE:        [[TGUninitL]]:
 ; LARGE-NEXT:   .tc TGUninit[TE],TGUninit[TL]@ld
 ; LARGE:        [[TIInitL]]:

>From 377b787e41b893e4220813a1de439b77381b4cba Mon Sep 17 00:00:00 2001
From: tingwang <tingwang at tingwangs-MacBook-Pro.local>
Date: Mon, 25 Sep 2023 08:52:45 +0800
Subject: [PATCH 09/31] (1) Use GPR3 directly in LoadModuleHandle. (2) Update
 comments.

---
 llvm/lib/Target/PowerPC/PPCInstr64Bit.td      | 3 +--
 llvm/lib/Target/PowerPC/PPCInstrInfo.td       | 3 +--
 llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp | 8 ++------
 3 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index e4a6e19dabd348..b51af57b796f81 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -1602,8 +1602,7 @@ def TLSGDAIX8 :
                      "#TLSGDAIX8",
                      [(set i64:$rD,
                        (PPCTlsgdAIX i64:$offset, i64:$handle))]>;
-// This pseudo is expanded to one copy to put the module handle in R3, the call
-// to GETtlsMOD64AIX, and then adds the variable offset to the output from the call.
+// This pseudo is expanded to load module-handle in X3, and the call to GETtlsMOD64AIX.
 def TLSLDAIX8 : PPCEmitTimePseudo<(outs g8rc:$rD), (ins),
                      "#TLSLDAIX8", [(set i64:$rD, (PPCTlsldAIX))]>;
 // Combined op for ADDItlsldL and GETtlsADDR, late expanded.  X3 and LR8
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 9fbdd5728d12ff..26e562ea3ba046 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -3300,8 +3300,7 @@ def TLSGDAIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$offset, gprc:$handl
                           "#TLSGDAIX",
                           [(set i32:$rD,
                             (PPCTlsgdAIX i32:$offset, i32:$handle))]>;
-// This pseudo is expanded to one copy to put the module handle in R3, then call
-// GETtlsMOD32AIX, and then add variable offset to the output from the call.
+// This pseudo is expanded to load module-handle in R3, and the call to GETtlsMOD32AIX.
 def TLSLDAIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins),
                           "#TLSLDAIX", [(set i32:$rD, (PPCTlsldAIX))]>;
 // LR is a true define, while the rest of the Defs are clobbers.  R3 is
diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index ba3f9f04697d79..71825dca77bcf0 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -226,7 +226,7 @@ namespace {
               }
             }
 
-            // Generate instructions refer to the "_$TLSML" symbol
+            // Generate instructions to load module-handle.
             Register ModuleHandleHReg;
             if (IsLargeModel) {
               ModuleHandleHReg = RegInfo.createVirtualRegister(GPRNoZero);
@@ -236,15 +236,11 @@ namespace {
                   .addReg(Subtarget.getTOCPointerRegister())
                   .addExternalSymbol("_$TLSML[TC]", PPCII::MO_TLSLD_FLAG);
             }
-            Register MHReg = RegInfo.createVirtualRegister(GPRNoZero);
-            BuildMI(MBB, Anchor, DL, TII->get(LDTocOp), MHReg)
+            BuildMI(MBB, Anchor, DL, TII->get(LDTocOp), GPR3)
                 .addExternalSymbol("_$TLSML[TC]", PPCII::MO_TLSLD_FLAG)
                 .addReg(IsLargeModel
                             ? ModuleHandleHReg
                             : Register(Subtarget.getTOCPointerRegister()));
-            // The module handle is copied in r3.
-            BuildMI(MBB, Anchor, DL, TII->get(TargetOpcode::COPY), GPR3)
-                .addReg(MHReg);
             // The call to .__tls_get_mod.
             BuildMI(MBB, Anchor, DL, TII->get(Opc2), GPR3).addReg(GPR3);
           } else if (!IsTLSTPRelMI) {

>From 05ef3f48b33eb84333308ed85f54d1ee15464efa Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Mon, 25 Sep 2023 09:43:41 -0400
Subject: [PATCH 10/31] Fix obj mode var access.

---
 llvm/lib/MC/XCOFFObjectWriter.cpp             |   3 +-
 .../PowerPC/aix-tls-ld-xcoff-reloc-large.ll   | 328 ++++++++++++++++++
 .../PowerPC/aix-tls-xcoff-reloc-large.ll      | 174 +++++-----
 .../CodeGen/PowerPC/aix-tls-xcoff-reloc.ll    | 117 ++++---
 4 files changed, 474 insertions(+), 148 deletions(-)
 create mode 100644 llvm/test/CodeGen/PowerPC/aix-tls-ld-xcoff-reloc-large.ll

diff --git a/llvm/lib/MC/XCOFFObjectWriter.cpp b/llvm/lib/MC/XCOFFObjectWriter.cpp
index 8809af2e5e0c18..d46bbaf7576571 100644
--- a/llvm/lib/MC/XCOFFObjectWriter.cpp
+++ b/llvm/lib/MC/XCOFFObjectWriter.cpp
@@ -715,7 +715,8 @@ void XCOFFObjectWriter::recordRelocation(MCAssembler &Asm,
   if (Type == XCOFF::RelocationType::R_POS ||
       Type == XCOFF::RelocationType::R_TLS ||
       Type == XCOFF::RelocationType::R_TLS_LE ||
-      Type == XCOFF::RelocationType::R_TLS_IE)
+      Type == XCOFF::RelocationType::R_TLS_IE ||
+      Type == XCOFF::RelocationType::R_TLS_LD)
     // The FixedValue should be symbol's virtual address in this object file
     // plus any constant value that we might get.
     FixedValue = getVirtualAddress(SymA, SymASec) + Target.getConstant();
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-ld-xcoff-reloc-large.ll b/llvm/test/CodeGen/PowerPC/aix-tls-ld-xcoff-reloc-large.ll
new file mode 100644
index 00000000000000..ecb05a5821bc11
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-ld-xcoff-reloc-large.ll
@@ -0,0 +1,328 @@
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec -mtriple powerpc64-ibm-aix-xcoff \
+; RUN:     -xcoff-traceback-table=false --code-model=large -filetype=obj -o %t.o < %s
+; RUN: llvm-readobj --relocs --expand-relocs %t.o | FileCheck -D#NFA=2 --check-prefix=RELOC %s
+; RUN: llvm-readobj --syms %t.o | FileCheck -D#NFA=2 --check-prefix=SYM %s
+; RUN: llvm-objdump -D -r --symbol-description %t.o | FileCheck -D#NFA=2 --check-prefix=DIS %s
+
+ at ThreadLocalVarInit = thread_local(localdynamic) global i64 1, align 8
+ at IThreadLocalVarUninit = internal thread_local(localdynamic) global i64 0, align 8
+ at IThreadLocalVarUninit2 = internal thread_local(localdynamic) global i64 0, align 8
+declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull)
+
+define void @storeITLUninit(i64 noundef %x) {
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarUninit)
+  store i64 %x, ptr %0, align 8
+  ret void
+}
+
+define i64 @loadTLInit() {
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit)
+  %1 = load i64, ptr %0, align 8
+  ret i64 %1
+}
+
+define signext i64 @loadTLUninit() {
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarUninit)
+  store i64 1, ptr %0, align 8
+  %1 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarUninit2)
+  %2 = load i64, ptr %1, align 8
+  %add = add nsw i64 %2, 1
+  ret i64 %add
+}
+
+; RELOC:      File: {{.*}}aix-tls-ld-xcoff-reloc-large.ll.tmp.o
+; RELOC-NEXT: Format: aix5coff64-rs6000
+; RELOC-NEXT: Arch: powerpc64
+; RELOC-NEXT: AddressSize: 64bit
+; RELOC-NEXT: Relocations [
+; RELOC:      Virtual Address: 0x12
+; RELOC-NEXT:      Symbol: _$TLSML ([[#NFA+19]])
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 16
+; RELOC-NEXT:      Type: R_TOCU (0x30)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0x16
+; RELOC-NEXT:      Symbol: IThreadLocalVarUninit ([[#NFA+21]])
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 16
+; RELOC-NEXT:      Type: R_TOCU (0x30)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0x1A
+; RELOC-NEXT:      Symbol: _$TLSML ([[#NFA+19]])
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 16
+; RELOC-NEXT:      Type: R_TOCL (0x31)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0x22
+; RELOC-NEXT:      Symbol: IThreadLocalVarUninit ([[#NFA+21]])
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 16
+; RELOC-NEXT:      Type: R_TOCL (0x31)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0x4E
+; RELOC-NEXT:      Symbol: _$TLSML ([[#NFA+19]])
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 16
+; RELOC-NEXT:      Type: R_TOCU (0x30)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0x52
+; RELOC-NEXT:      Symbol: ThreadLocalVarInit ([[#NFA+23]])
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 16
+; RELOC-NEXT:      Type: R_TOCU (0x30)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0x56
+; RELOC-NEXT:      Symbol: _$TLSML ([[#NFA+19]])
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 16
+; RELOC-NEXT:      Type: R_TOCL (0x31)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0x5E
+; RELOC-NEXT:      Symbol: ThreadLocalVarInit ([[#NFA+23]])
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 16
+; RELOC-NEXT:      Type: R_TOCL (0x31)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0x8E
+; RELOC-NEXT:      Symbol: _$TLSML ([[#NFA+19]])
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 16
+; RELOC-NEXT:      Type: R_TOCU (0x30)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0x92
+; RELOC-NEXT:      Symbol: IThreadLocalVarUninit ([[#NFA+21]])
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 16
+; RELOC-NEXT:      Type: R_TOCU (0x30)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0x96
+; RELOC-NEXT:      Symbol: _$TLSML ([[#NFA+19]])
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 16
+; RELOC-NEXT:      Type: R_TOCL (0x31)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0x9E
+; RELOC-NEXT:      Symbol: IThreadLocalVarUninit ([[#NFA+21]])
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 16
+; RELOC-NEXT:      Type: R_TOCL (0x31)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0xAA
+; RELOC-NEXT:      Symbol: IThreadLocalVarUninit2 ([[#NFA+25]])
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 16
+; RELOC-NEXT:      Type: R_TOCU (0x30)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0xAE
+; RELOC-NEXT:      Symbol: IThreadLocalVarUninit2 ([[#NFA+25]])
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 16
+; RELOC-NEXT:      Type: R_TOCL (0x31)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0x110
+; RELOC-NEXT:      Symbol: _$TLSML ([[#NFA+19]])
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 64
+; RELOC-NEXT:      Type: R_TLSML (0x25)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0x118
+; RELOC-NEXT:      Symbol: IThreadLocalVarUninit ([[#NFA+29]])
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 64
+; RELOC-NEXT:      Type: R_TLS_LD (0x22)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0x120
+; RELOC-NEXT:      Symbol: ThreadLocalVarInit ([[#NFA+27]])
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 64
+; RELOC-NEXT:      Type: R_TLS_LD (0x22)
+; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0x128
+; RELOC-NEXT:      Symbol: IThreadLocalVarUninit2 ([[#NFA+31]])
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 64
+; RELOC-NEXT:      Type: R_TLS_LD (0x22)
+; RELOC-NEXT:    }
+
+; SYM:      File: {{.*}}aix-tls-ld-xcoff-reloc-large.ll.tmp.o
+; SYM-NEXT: Format: aix5coff64-rs6000
+; SYM-NEXT: Arch: powerpc64
+; SYM-NEXT: AddressSize: 64bit
+; SYM-NEXT: Symbols [
+; SYM:    Index: [[#NFA+19]]
+; SYM-NEXT:    Name: _$TLSML
+; SYM-NEXT:    Value (RelocatableAddress): 0x110
+; SYM-NEXT:    Section: .data
+; SYM-NEXT:    Type: 0x0
+; SYM-NEXT:    StorageClass: C_HIDEXT (0x6B)
+; SYM-NEXT:    NumberOfAuxEntries: 1
+; SYM-NEXT:    CSECT Auxiliary Entry {
+; SYM-NEXT:      Index: [[#NFA+20]]
+; SYM-NEXT:      SectionLen: 8
+; SYM-NEXT:      ParameterHashIndex: 0x0
+; SYM-NEXT:      TypeChkSectNum: 0x0
+; SYM-NEXT:      SymbolAlignmentLog2: 3
+; SYM-NEXT:      SymbolType: XTY_SD (0x1)
+; SYM-NEXT:      StorageMappingClass: XMC_TC (0x3)
+; SYM-NEXT:      Auxiliary Type: AUX_CSECT (0xFB)
+; SYM-NEXT:    }
+; SYM-NEXT:  }
+; SYM:    Index: [[#NFA+21]]
+; SYM-NEXT:    Name: IThreadLocalVarUninit
+; SYM-NEXT:    Value (RelocatableAddress): 0x118
+; SYM-NEXT:    Section: .data
+; SYM-NEXT:    Type: 0x0
+; SYM-NEXT:    StorageClass: C_HIDEXT (0x6B)
+; SYM-NEXT:    NumberOfAuxEntries: 1
+; SYM-NEXT:    CSECT Auxiliary Entry {
+; SYM-NEXT:      Index: [[#NFA+22]]
+; SYM-NEXT:      SectionLen: 8
+; SYM-NEXT:      ParameterHashIndex: 0x0
+; SYM-NEXT:      TypeChkSectNum: 0x0
+; SYM-NEXT:      SymbolAlignmentLog2: 3
+; SYM-NEXT:      SymbolType: XTY_SD (0x1)
+; SYM-NEXT:      StorageMappingClass: XMC_TE (0x16)
+; SYM-NEXT:      Auxiliary Type: AUX_CSECT (0xFB)
+; SYM-NEXT:    }
+; SYM-NEXT:  }
+; SYM:    Index: [[#NFA+23]]
+; SYM-NEXT:    Name: ThreadLocalVarInit
+; SYM-NEXT:    Value (RelocatableAddress): 0x120
+; SYM-NEXT:    Section: .data
+; SYM-NEXT:    Type: 0x0
+; SYM-NEXT:    StorageClass: C_HIDEXT (0x6B)
+; SYM-NEXT:    NumberOfAuxEntries: 1
+; SYM-NEXT:    CSECT Auxiliary Entry {
+; SYM-NEXT:      Index: [[#NFA+24]]
+; SYM-NEXT:      SectionLen: 8
+; SYM-NEXT:      ParameterHashIndex: 0x0
+; SYM-NEXT:      TypeChkSectNum: 0x0
+; SYM-NEXT:      SymbolAlignmentLog2: 3
+; SYM-NEXT:      SymbolType: XTY_SD (0x1)
+; SYM-NEXT:      StorageMappingClass: XMC_TE (0x16)
+; SYM-NEXT:      Auxiliary Type: AUX_CSECT (0xFB)
+; SYM-NEXT:    }
+; SYM-NEXT:  }
+; SYM:    Index: [[#NFA+25]]
+; SYM-NEXT:    Name: IThreadLocalVarUninit2
+; SYM-NEXT:    Value (RelocatableAddress): 0x128
+; SYM-NEXT:    Section: .data
+; SYM-NEXT:    Type: 0x0
+; SYM-NEXT:    StorageClass: C_HIDEXT (0x6B)
+; SYM-NEXT:    NumberOfAuxEntries: 1
+; SYM-NEXT:    CSECT Auxiliary Entry {
+; SYM-NEXT:      Index: [[#NFA+26]]
+; SYM-NEXT:      SectionLen: 8
+; SYM-NEXT:      ParameterHashIndex: 0x0
+; SYM-NEXT:      TypeChkSectNum: 0x0
+; SYM-NEXT:      SymbolAlignmentLog2: 3
+; SYM-NEXT:      SymbolType: XTY_SD (0x1)
+; SYM-NEXT:      StorageMappingClass: XMC_TE (0x16)
+; SYM-NEXT:      Auxiliary Type: AUX_CSECT (0xFB)
+; SYM-NEXT:    }
+; SYM-NEXT:  }
+; SYM:    Index: [[#NFA+27]]
+; SYM-NEXT:    Name: ThreadLocalVarInit
+; SYM-NEXT:    Value (RelocatableAddress): 0x0
+; SYM-NEXT:    Section: .tdata
+; SYM-NEXT:    Type: 0x0
+; SYM-NEXT:    StorageClass: C_EXT (0x2)
+; SYM-NEXT:    NumberOfAuxEntries: 1
+; SYM-NEXT:    CSECT Auxiliary Entry {
+; SYM-NEXT:      Index: [[#NFA+28]]
+; SYM-NEXT:      SectionLen: 8
+; SYM-NEXT:      ParameterHashIndex: 0x0
+; SYM-NEXT:      TypeChkSectNum: 0x0
+; SYM-NEXT:      SymbolAlignmentLog2: 3
+; SYM-NEXT:      SymbolType: XTY_SD (0x1)
+; SYM-NEXT:      StorageMappingClass: XMC_TL (0x14)
+; SYM-NEXT:      Auxiliary Type: AUX_CSECT (0xFB)
+; SYM-NEXT:    }
+; SYM-NEXT:  }
+; SYM:    Index: [[#NFA+29]]
+; SYM-NEXT:    Name: IThreadLocalVarUninit
+; SYM-NEXT:    Value (RelocatableAddress): 0x8
+; SYM-NEXT:    Section: .tbss
+; SYM-NEXT:    Type: 0x0
+; SYM-NEXT:    StorageClass: C_HIDEXT (0x6B)
+; SYM-NEXT:    NumberOfAuxEntries: 1
+; SYM-NEXT:    CSECT Auxiliary Entry {
+; SYM-NEXT:      Index: [[#NFA+30]]
+; SYM-NEXT:      SectionLen: 8
+; SYM-NEXT:      ParameterHashIndex: 0x0
+; SYM-NEXT:      TypeChkSectNum: 0x0
+; SYM-NEXT:      SymbolAlignmentLog2: 3
+; SYM-NEXT:      SymbolType: XTY_CM (0x3)
+; SYM-NEXT:      StorageMappingClass: XMC_UL (0x15)
+; SYM-NEXT:      Auxiliary Type: AUX_CSECT (0xFB)
+; SYM-NEXT:    }
+; SYM-NEXT:  }
+; SYM:    Index: [[#NFA+31]]
+; SYM-NEXT:    Name: IThreadLocalVarUninit2
+; SYM-NEXT:    Value (RelocatableAddress): 0x10
+; SYM-NEXT:    Section: .tbss
+; SYM-NEXT:    Type: 0x0
+; SYM-NEXT:    StorageClass: C_HIDEXT (0x6B)
+; SYM-NEXT:    NumberOfAuxEntries: 1
+; SYM-NEXT:    CSECT Auxiliary Entry {
+; SYM-NEXT:      Index: [[#NFA+32]]
+; SYM-NEXT:      SectionLen: 8
+; SYM-NEXT:      ParameterHashIndex: 0x0
+; SYM-NEXT:      TypeChkSectNum: 0x0
+; SYM-NEXT:      SymbolAlignmentLog2: 3
+; SYM-NEXT:      SymbolType: XTY_CM (0x3)
+; SYM-NEXT:      StorageMappingClass: XMC_UL (0x15)
+; SYM-NEXT:      Auxiliary Type: AUX_CSECT (0xFB)
+; SYM-NEXT:    }
+; SYM-NEXT:  }
+
+; DIS:      {{.*}}aix-tls-ld-xcoff-reloc-large.ll.tmp.o:	file format aix5coff64-rs6000
+; DIS:      Disassembly of section .data:
+; DIS:      0000000000000110 (idx: [[#NFA+19]]) _$TLSML[TC]:
+; DIS-NEXT:     110: 00 00 00 00
+; DIS-NEXT:     0000000000000110:  R_TLSML	(idx: [[#NFA+19]]) _$TLSML[TC]
+; DIS-NEXT:     114: 00 00 00 00
+; DIS:      0000000000000118 (idx: [[#NFA+21]]) IThreadLocalVarUninit[TE]:
+; DIS-NEXT:     118: 00 00 00 00
+; DIS-NEXT:     0000000000000118:  R_TLS_LD	(idx: [[#NFA+29]]) IThreadLocalVarUninit[UL]
+; DIS-NEXT:     11c: 00 00 00 08
+; DIS:      0000000000000120 (idx: [[#NFA+23]]) ThreadLocalVarInit[TE]:
+; DIS-NEXT:     120: 00 00 00 00
+; DIS-NEXT:     0000000000000120:  R_TLS_LD	(idx: [[#NFA+27]]) ThreadLocalVarInit[TL]
+; DIS-NEXT:     124: 00 00 00 00
+; DIS:      0000000000000128 (idx: [[#NFA+25]]) IThreadLocalVarUninit2[TE]:
+; DIS-NEXT:     128: 00 00 00 00
+; DIS-NEXT:     0000000000000128:  R_TLS_LD	(idx: [[#NFA+31]]) IThreadLocalVarUninit2[UL]
+; DIS-NEXT:     12c: 00 00 00 10
+
+; DIS:      Disassembly of section .tdata:
+; DIS:      0000000000000000 (idx: [[#NFA+27]]) ThreadLocalVarInit[TL]:
+; DIS-NEXT:        0: 00 00 00 00
+; DIS-NEXT:        4: 00 00 00 01
+
+; DIS:      Disassembly of section .tbss:
+; DIS:      0000000000000008 (idx: [[#NFA+29]]) IThreadLocalVarUninit[UL]:
+; DIS-NEXT: ...
+; DIS:      0000000000000010 (idx: [[#NFA+31]]) IThreadLocalVarUninit2[UL]:
+; DIS-NEXT: ...
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll
index fe843e5dcb0ea7..1f7b497bb6c612 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll
@@ -33,7 +33,7 @@ entry:
 ; RELOC-NEXT:   Section (index: 1) .text {
 ; RELOC-NEXT:   Relocation {
 ; RELOC-NEXT:     Virtual Address: 0x16
-; RELOC-NEXT:     Symbol: _$TLSML ([[#NFA+19]])
+; RELOC-NEXT:     Symbol: TIInit ([[#NFA+19]])
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
@@ -41,30 +41,22 @@ entry:
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
 ; RELOC-NEXT:     Virtual Address: 0x1A
-; RELOC-NEXT:     Symbol: _$TLSML ([[#NFA+19]])
-; RELOC-NEXT:     IsSigned: No
-; RELOC-NEXT:     FixupBitValue: 0
-; RELOC-NEXT:     Length: 16
-; RELOC-NEXT:     Type: R_TOCL (0x31)
-; RELOC-NEXT:   }
-; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x1E
-; RELOC-NEXT:     Symbol: TIInit ([[#NFA+21]])
+; RELOC-NEXT:     Symbol: _$TLSML ([[#NFA+21]])
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
 ; RELOC-NEXT:     Type: R_TOCU (0x30)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x22
-; RELOC-NEXT:     Symbol: TIInit ([[#NFA+21]])
+; RELOC-NEXT:     Virtual Address: 0x1E
+; RELOC-NEXT:     Symbol: _$TLSML ([[#NFA+21]])
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
 ; RELOC-NEXT:     Length: 16
 ; RELOC-NEXT:     Type: R_TOCL (0x31)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x24
+; RELOC-NEXT:     Virtual Address: 0x20
 ; RELOC-NEXT:     Symbol: .__tls_get_mod ([[#NFA+1]])
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
@@ -72,7 +64,15 @@ entry:
 ; RELOC-NEXT:     Type: R_RBA (0x18)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x5E
+; RELOC-NEXT:     Virtual Address: 0x26
+; RELOC-NEXT:     Symbol: TIInit ([[#NFA+19]])
+; RELOC-NEXT:     IsSigned: No
+; RELOC-NEXT:     FixupBitValue: 0
+; RELOC-NEXT:     Length: 16
+; RELOC-NEXT:     Type: R_TOCL (0x31)
+; RELOC-NEXT:   }
+; RELOC-NEXT:   Relocation {
+; RELOC-NEXT:     Virtual Address: 0x4E
 ; RELOC-NEXT:     Symbol: .TWInit ([[#NFA+23]])
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
@@ -80,7 +80,7 @@ entry:
 ; RELOC-NEXT:     Type: R_TOCU (0x30)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x62
+; RELOC-NEXT:     Virtual Address: 0x52
 ; RELOC-NEXT:     Symbol: TWInit ([[#NFA+25]])
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
@@ -88,7 +88,7 @@ entry:
 ; RELOC-NEXT:     Type: R_TOCU (0x30)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x66
+; RELOC-NEXT:     Virtual Address: 0x56
 ; RELOC-NEXT:     Symbol: .TWInit ([[#NFA+23]])
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
@@ -96,7 +96,7 @@ entry:
 ; RELOC-NEXT:     Type: R_TOCL (0x31)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x6A
+; RELOC-NEXT:     Virtual Address: 0x5A
 ; RELOC-NEXT:     Symbol: TWInit ([[#NFA+25]])
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
@@ -104,7 +104,7 @@ entry:
 ; RELOC-NEXT:     Type: R_TOCL (0x31)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x6C
+; RELOC-NEXT:     Virtual Address: 0x5C
 ; RELOC-NEXT:     Symbol: .__tls_get_addr ([[#NFA+3]])
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
@@ -112,7 +112,7 @@ entry:
 ; RELOC-NEXT:     Type: R_RBA (0x18)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x76
+; RELOC-NEXT:     Virtual Address: 0x66
 ; RELOC-NEXT:     Symbol: GInit ([[#NFA+27]])
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
@@ -120,7 +120,7 @@ entry:
 ; RELOC-NEXT:     Type: R_TOCU (0x30)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x7A
+; RELOC-NEXT:     Virtual Address: 0x6A
 ; RELOC-NEXT:     Symbol: GInit ([[#NFA+27]])
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
@@ -130,7 +130,7 @@ entry:
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Section (index: 2) .data {
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xA0
+; RELOC-NEXT:   Virtual Address: 0x90
 ; RELOC-NEXT:   Symbol: .storesTIInit ([[#NFA+7]])
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
@@ -138,7 +138,7 @@ entry:
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xA4
+; RELOC-NEXT:   Virtual Address: 0x94
 ; RELOC-NEXT:   Symbol: TOC ([[#NFA+17]])
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
@@ -146,7 +146,7 @@ entry:
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xAC
+; RELOC-NEXT:   Virtual Address: 0x9C
 ; RELOC-NEXT:   Symbol: .loadsTWInit ([[#NFA+9]])
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
@@ -154,7 +154,7 @@ entry:
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xB0
+; RELOC-NEXT:   Virtual Address: 0xA0
 ; RELOC-NEXT:   Symbol: TOC ([[#NFA+17]])
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
@@ -162,23 +162,23 @@ entry:
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xB8
-; RELOC-NEXT:   Symbol: _$TLSML ([[#NFA+19]])
+; RELOC-NEXT:   Virtual Address: 0xA8
+; RELOC-NEXT:   Symbol: TIInit ([[#NFA+29]])
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
-; RELOC-NEXT:   Type: R_TLSML (0x25)
+; RELOC-NEXT:   Type: R_TLS_LD (0x22)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xBC
-; RELOC-NEXT:   Symbol: TIInit ([[#NFA+29]])
+; RELOC-NEXT:   Virtual Address: 0xAC
+; RELOC-NEXT:   Symbol: _$TLSML ([[#NFA+21]])
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
 ; RELOC-NEXT:   Length: 32
-; RELOC-NEXT:   Type: R_TLS_LD (0x22)
+; RELOC-NEXT:   Type: R_TLSML (0x25)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xC0
+; RELOC-NEXT:   Virtual Address: 0xB0
 ; RELOC-NEXT:   Symbol: TWInit ([[#NFA+31]])
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
@@ -186,7 +186,7 @@ entry:
 ; RELOC-NEXT:   Type: R_TLSM (0x24)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xC4
+; RELOC-NEXT:   Virtual Address: 0xB4
 ; RELOC-NEXT:   Symbol: TWInit ([[#NFA+31]])
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
@@ -194,7 +194,7 @@ entry:
 ; RELOC-NEXT:   Type: R_TLS (0x20)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xC8
+; RELOC-NEXT:   Virtual Address: 0xB8
 ; RELOC-NEXT:   Symbol: GInit ([[#NFA+11]])
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
@@ -279,7 +279,7 @@ entry:
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
 ; SYM-NEXT:       Index: [[#NFA+6]]
-; SYM-NEXT:       SectionLen: 148
+; SYM-NEXT:       SectionLen: 132
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 5
@@ -312,7 +312,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: [[#NFA+9]]
 ; SYM-NEXT:     Name: .loadsTWInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x50
+; SYM-NEXT:     Value (RelocatableAddress): 0x40
 ; SYM-NEXT:     Section: .text
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
@@ -332,7 +332,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: [[#NFA+11]]
 ; SYM-NEXT:     Name: GInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x98
+; SYM-NEXT:     Value (RelocatableAddress): 0x88
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
@@ -352,7 +352,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: [[#NFA+13]]
 ; SYM-NEXT:     Name: storesTIInit
-; SYM-NEXT:     Value (RelocatableAddress): 0xA0
+; SYM-NEXT:     Value (RelocatableAddress): 0x90
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
@@ -372,7 +372,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: [[#NFA+15]]
 ; SYM-NEXT:     Name: loadsTWInit
-; SYM-NEXT:     Value (RelocatableAddress): 0xAC
+; SYM-NEXT:     Value (RelocatableAddress): 0x9C
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
@@ -392,7 +392,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: [[#NFA+17]]
 ; SYM-NEXT:     Name: TOC
-; SYM-NEXT:     Value (RelocatableAddress): 0xB8
+; SYM-NEXT:     Value (RelocatableAddress): 0xA8
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
@@ -411,8 +411,8 @@ entry:
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: [[#NFA+19]]
-; SYM-NEXT:     Name: _$TLSML
-; SYM-NEXT:     Value (RelocatableAddress): 0xB8
+; SYM-NEXT:     Name: TIInit
+; SYM-NEXT:     Value (RelocatableAddress): 0xA8
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
@@ -424,15 +424,15 @@ entry:
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 2
 ; SYM-NEXT:       SymbolType: XTY_SD (0x1)
-; SYM-NEXT:       StorageMappingClass: XMC_TC (0x3)
+; SYM-NEXT:       StorageMappingClass: XMC_TE (0x16)
 ; SYM-NEXT:       StabInfoIndex: 0x0
 ; SYM-NEXT:       StabSectNum: 0x0
 ; SYM-NEXT:     }
 ; SYM-NEXT:   }
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: [[#NFA+21]]
-; SYM-NEXT:     Name: TIInit
-; SYM-NEXT:     Value (RelocatableAddress): 0xBC
+; SYM-NEXT:     Name: _$TLSML
+; SYM-NEXT:     Value (RelocatableAddress): 0xAC
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
@@ -444,7 +444,7 @@ entry:
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 2
 ; SYM-NEXT:       SymbolType: XTY_SD (0x1)
-; SYM-NEXT:       StorageMappingClass: XMC_TE (0x16)
+; SYM-NEXT:       StorageMappingClass: XMC_TC (0x3)
 ; SYM-NEXT:       StabInfoIndex: 0x0
 ; SYM-NEXT:       StabSectNum: 0x0
 ; SYM-NEXT:     }
@@ -452,7 +452,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: [[#NFA+23]]
 ; SYM-NEXT:     Name: .TWInit
-; SYM-NEXT:     Value (RelocatableAddress): 0xC0
+; SYM-NEXT:     Value (RelocatableAddress): 0xB0
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
@@ -472,7 +472,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: [[#NFA+25]]
 ; SYM-NEXT:     Name: TWInit
-; SYM-NEXT:     Value (RelocatableAddress): 0xC4
+; SYM-NEXT:     Value (RelocatableAddress): 0xB4
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
@@ -492,7 +492,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: [[#NFA+27]]
 ; SYM-NEXT:     Name: GInit
-; SYM-NEXT:     Value (RelocatableAddress): 0xC8
+; SYM-NEXT:     Value (RelocatableAddress): 0xB8
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
@@ -559,25 +559,23 @@ entry:
 ; DIS-NEXT:                                       stw 0, 40(1)
 ; DIS-NEXT:                                       mr 7, 3
 ; DIS-NEXT:                                       mr 6, 4
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 8, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: [[#NFA+19]]) TIInit[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: [[#NFA+19]]) _$TLSML[TC]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 3, 0(3)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: [[#NFA+19]]) _$TLSML[TC]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 4, 2, 0
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: [[#NFA+21]]) TIInit[TE]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 8, 4(4)
-; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: [[#NFA+21]]) TIInit[TE]
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: [[#NFA+21]]) _$TLSML[TC]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 3, 4(3)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: [[#NFA+21]]) _$TLSML[TC]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                bla 0x0
 ; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA (idx: [[#NFA+1]]) .__tls_get_mod[PR]
-; DIS-NEXT:                                       mr      4, 8
-; DIS-NEXT:                                       add 3, 3, 4
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                lwz 4, 0(8)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: [[#NFA+19]]) TIInit[TE]
+; DIS-NEXT:                                       stwux 7, 3, 4
 ; DIS-NEXT:                                       stw 6, 4(3)
-; DIS-NEXT:                                       stw 7, 0(3)
 ; DIS-NEXT:                                       addi 1, 1, 32
 ; DIS-NEXT:                                       lwz 0, 8(1)
 ; DIS-NEXT:                                       mtlr 0
 ; DIS-NEXT:                                       blr
-; DIS:      00000050 (idx: [[#INDX+2]]) .loadsTWInit:
+; DIS:      00000040 (idx: [[#INDX+2]]) .loadsTWInit:
 ; DIS-NEXT:                                       mflr 0
 ; DIS-NEXT:                                       stwu 1, -32(1)
 ; DIS-NEXT:                                       stw 0, 40(1)
@@ -604,36 +602,36 @@ entry:
 ; DIS-NEXT:                                       blr
 
 ; DIS:      Disassembly of section .data:
-; DIS:      00000098  (idx: [[#NFA+11]]) GInit[RW]:
-; DIS-NEXT:       98: 3f f0 00 00
-; DIS-NEXT:       9c: 00 00 00 00
-; DIS:      000000a0  (idx: [[#NFA+13]]) storesTIInit[DS]:
-; DIS-NEXT:       a0: 00 00 00 00
-; DIS-NEXT: 000000a0: R_POS (idx: [[#NFA+7]]) .storesTIInit
-; DIS-NEXT:       a4: 00 00 00 b8
-; DIS-NEXT: 000000a4: R_POS (idx: [[#NFA+17]]) TOC[TC0]
+; DIS:      00000088  (idx: [[#NFA+11]]) GInit[RW]:
+; DIS-NEXT:       88: 3f f0 00 00
+; DIS-NEXT:       8c: 00 00 00 00
+; DIS:      00000090  (idx: [[#NFA+13]]) storesTIInit[DS]:
+; DIS-NEXT:       90: 00 00 00 00
+; DIS-NEXT: 00000090: R_POS (idx: [[#NFA+7]]) .storesTIInit
+; DIS-NEXT:       94: 00 00 00 a8
+; DIS-NEXT: 00000094: R_POS (idx: [[#NFA+17]]) TOC[TC0]
+; DIS-NEXT:       98: 00 00 00 00
+; DIS:      0000009c  (idx: [[#NFA+15]]) loadsTWInit[DS]:
+; DIS-NEXT:       9c: 00 00 00 40
+; DIS-NEXT: 0000009c: R_POS (idx: [[#NFA+9]]) .loadsTWInit
+; DIS-NEXT:       a0: 00 00 00 a8
+; DIS-NEXT: 000000a0: R_POS (idx: [[#NFA+17]]) TOC[TC0]
+; DIS-NEXT:       a4: 00 00 00 00
+; DIS:      000000a8  (idx: [[#NFA+19]]) TIInit[TE]:
 ; DIS-NEXT:       a8: 00 00 00 00
-; DIS:      000000ac  (idx: [[#NFA+15]]) loadsTWInit[DS]:
-; DIS-NEXT:       ac: 00 00 00 50
-; DIS-NEXT: 000000ac: R_POS (idx: [[#NFA+9]]) .loadsTWInit
-; DIS-NEXT:       b0: 00 00 00 b8
-; DIS-NEXT: 000000b0: R_POS (idx: [[#NFA+17]]) TOC[TC0]
-; DIS-NEXT:       b4: 00 00 00 00
-; DIS:      000000b8 (idx: [[#NFA+19]]) _$TLSML[TC]:
-; DIS-NEXT:       b8: 00 00 00 00
-; DIS-NEXT: 000000b8:  R_TLSML (idx: [[#NFA+19]]) _$TLSML[TC]
-; DIS:      000000bc  (idx: [[#NFA+21]]) TIInit[TE]:
-; DIS-NEXT:       bc: 00 00 00 00
-; DIS-NEXT: 000000bc: R_TLS_LD (idx: [[#NFA+29]]) TIInit[TL]
-; DIS:      000000c0  (idx: [[#NFA+23]]) .TWInit[TE]:
-; DIS-NEXT:       c0: 00 00 00 00
-; DIS-NEXT: 000000c0: R_TLSM (idx: [[#NFA+31]]) TWInit[TL]
-; DIS:      000000c4  (idx: [[#NFA+25]]) TWInit[TE]:
-; DIS-NEXT:       c4: 00 00 00 08
-; DIS-NEXT: 000000c4: R_TLS (idx: [[#NFA+31]]) TWInit[TL]
-; DIS:      000000c8  (idx: [[#NFA+27]]) GInit[TE]:
-; DIS-NEXT:       c8: 00 00 00 98
-; DIS-NEXT: 000000c8: R_POS (idx: [[#NFA+11]]) GInit[RW]
+; DIS-NEXT: 000000a8: R_TLS_LD (idx: [[#NFA+29]]) TIInit[TL]
+; DIS:      000000ac (idx: [[#NFA+21]]) _$TLSML[TC]:
+; DIS-NEXT:       ac: 00 00 00 00
+; DIS-NEXT: 000000ac:  R_TLSML (idx: [[#NFA+21]]) _$TLSML[TC]
+; DIS:      000000b0  (idx: [[#NFA+23]]) .TWInit[TE]:
+; DIS-NEXT:       b0: 00 00 00 00
+; DIS-NEXT: 000000b0: R_TLSM (idx: [[#NFA+31]]) TWInit[TL]
+; DIS:      000000b4  (idx: [[#NFA+25]]) TWInit[TE]:
+; DIS-NEXT:       b4: 00 00 00 08
+; DIS-NEXT: 000000b4: R_TLS (idx: [[#NFA+31]]) TWInit[TL]
+; DIS:      000000b8  (idx: [[#NFA+27]]) GInit[TE]:
+; DIS-NEXT:       b8: 00 00 00 88
+; DIS-NEXT: 000000b8: R_POS (idx: [[#NFA+11]]) GInit[RW]
 
 ; DIS:      Disassembly of section .tdata:
 ; DIS:      00000000  (idx: [[#NFA+29]]) TIInit[TL]:
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
index feeb1da35ecbad..8e26c6a2f3bbfd 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
@@ -57,7 +57,7 @@ entry:
 ; RELOC-NEXT:     Type: R_TOC (0x3)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x4A
+; RELOC-NEXT:     Virtual Address: 0x3A
 ; RELOC-NEXT:     Symbol: .TGInit ([[#NFA+29]])
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
@@ -65,7 +65,7 @@ entry:
 ; RELOC-NEXT:     Type: R_TOC (0x3)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x4E
+; RELOC-NEXT:     Virtual Address: 0x3E
 ; RELOC-NEXT:     Symbol: TGInit ([[#NFA+31]])
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
@@ -73,7 +73,7 @@ entry:
 ; RELOC-NEXT:     Type: R_TOC (0x3)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x54
+; RELOC-NEXT:     Virtual Address: 0x44
 ; RELOC-NEXT:     Symbol: .__tls_get_addr ([[#NFA+3]])
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
@@ -81,7 +81,7 @@ entry:
 ; RELOC-NEXT:     Type: R_RBA (0x18)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x5A
+; RELOC-NEXT:     Virtual Address: 0x4A
 ; RELOC-NEXT:     Symbol: GInit ([[#NFA+33]])
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
@@ -91,7 +91,7 @@ entry:
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Section (index: 2) .data {
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x80
+; RELOC-NEXT:   Virtual Address: 0x70
 ; RELOC-NEXT:   Symbol: .storesTIUninit ([[#NFA+7]])
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
@@ -99,7 +99,7 @@ entry:
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x84
+; RELOC-NEXT:   Virtual Address: 0x74
 ; RELOC-NEXT:   Symbol: TOC ([[#NFA+23]])
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
@@ -107,7 +107,7 @@ entry:
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x8C
+; RELOC-NEXT:   Virtual Address: 0x7C
 ; RELOC-NEXT:   Symbol: .loadsTGInit ([[#NFA+9]])
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
@@ -115,7 +115,7 @@ entry:
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x90
+; RELOC-NEXT:   Virtual Address: 0x80
 ; RELOC-NEXT:   Symbol: TOC ([[#NFA+23]])
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
@@ -123,7 +123,7 @@ entry:
 ; RELOC-NEXT:   Type: R_POS (0x0)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x98
+; RELOC-NEXT:   Virtual Address: 0x88
 ; RELOC-NEXT:   Symbol: _$TLSML ([[#NFA+25]])
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
@@ -131,7 +131,7 @@ entry:
 ; RELOC-NEXT:   Type: R_TLSML (0x25)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0x9C
+; RELOC-NEXT:   Virtual Address: 0x8C
 ; RELOC-NEXT:   Symbol: TIUninit ([[#NFA+39]])
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
@@ -139,7 +139,7 @@ entry:
 ; RELOC-NEXT:   Type: R_TLS_LD (0x22)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xA0
+; RELOC-NEXT:   Virtual Address: 0x90
 ; RELOC-NEXT:   Symbol: TGInit ([[#NFA+37]])
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
@@ -147,7 +147,7 @@ entry:
 ; RELOC-NEXT:   Type: R_TLSM (0x24)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xA4
+; RELOC-NEXT:   Virtual Address: 0x94
 ; RELOC-NEXT:   Symbol: TGInit ([[#NFA+37]])
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
@@ -155,7 +155,7 @@ entry:
 ; RELOC-NEXT:   Type: R_TLS (0x20)
 ; RELOC-NEXT: }
 ; RELOC-NEXT: Relocation {
-; RELOC-NEXT:   Virtual Address: 0xA8
+; RELOC-NEXT:   Virtual Address: 0x98
 ; RELOC-NEXT:   Symbol: GInit ([[#NFA+17]])
 ; RELOC-NEXT:   IsSigned: No
 ; RELOC-NEXT:   FixupBitValue: 0
@@ -229,7 +229,7 @@ entry:
 ; SYM-NEXT:     NumberOfAuxEntries: 1
 ; SYM-NEXT:     CSECT Auxiliary Entry {
 ; SYM-NEXT:       Index: [[#NFA+6]]
-; SYM-NEXT:       SectionLen: 120
+; SYM-NEXT:       SectionLen: 104
 ; SYM-NEXT:       ParameterHashIndex: 0x0
 ; SYM-NEXT:       TypeChkSectNum: 0x0
 ; SYM-NEXT:       SymbolAlignmentLog2: 5
@@ -262,7 +262,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: [[#NFA+9]]
 ; SYM-NEXT:     Name: .loadsTGInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x40
+; SYM-NEXT:     Value (RelocatableAddress): 0x30
 ; SYM-NEXT:     Section: .text
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
@@ -282,7 +282,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: [[#NFA+11]]
 ; SYM-NEXT:     Name: .rodata
-; SYM-NEXT:     Value (RelocatableAddress): 0x78
+; SYM-NEXT:     Value (RelocatableAddress): 0x68
 ; SYM-NEXT:     Section: .text
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
@@ -302,7 +302,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: [[#NFA+13]]
 ; SYM-NEXT:     Name: const_ivar
-; SYM-NEXT:     Value (RelocatableAddress): 0x78
+; SYM-NEXT:     Value (RelocatableAddress): 0x68
 ; SYM-NEXT:     Section: .text
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
@@ -322,7 +322,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: [[#NFA+15]]
 ; SYM-NEXT:     Name: .data
-; SYM-NEXT:     Value (RelocatableAddress): 0x7C
+; SYM-NEXT:     Value (RelocatableAddress): 0x6C
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
@@ -342,7 +342,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: [[#NFA+17]]
 ; SYM-NEXT:     Name: GInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x7C
+; SYM-NEXT:     Value (RelocatableAddress): 0x6C
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
@@ -362,7 +362,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: [[#NFA+19]]
 ; SYM-NEXT:     Name: storesTIUninit
-; SYM-NEXT:     Value (RelocatableAddress): 0x80
+; SYM-NEXT:     Value (RelocatableAddress): 0x70
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
@@ -382,7 +382,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: [[#NFA+21]]
 ; SYM-NEXT:     Name: loadsTGInit
-; SYM-NEXT:     Value (RelocatableAddress): 0x8C
+; SYM-NEXT:     Value (RelocatableAddress): 0x7C
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_EXT (0x2)
@@ -402,7 +402,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: [[#NFA+23]]
 ; SYM-NEXT:     Name: TOC
-; SYM-NEXT:     Value (RelocatableAddress): 0x98
+; SYM-NEXT:     Value (RelocatableAddress): 0x88
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
@@ -422,7 +422,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: [[#NFA+25]]
 ; SYM-NEXT:     Name: _$TLSML
-; SYM-NEXT:     Value (RelocatableAddress): 0x98
+; SYM-NEXT:     Value (RelocatableAddress): 0x88
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
@@ -442,7 +442,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: [[#NFA+27]]
 ; SYM-NEXT:     Name: TIUninit
-; SYM-NEXT:     Value (RelocatableAddress): 0x9C
+; SYM-NEXT:     Value (RelocatableAddress): 0x8C
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
@@ -462,7 +462,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: [[#NFA+29]]
 ; SYM-NEXT:     Name: .TGInit
-; SYM-NEXT:     Value (RelocatableAddress): 0xA0
+; SYM-NEXT:     Value (RelocatableAddress): 0x90
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
@@ -482,7 +482,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: [[#NFA+31]]
 ; SYM-NEXT:     Name: TGInit
-; SYM-NEXT:     Value (RelocatableAddress): 0xA4
+; SYM-NEXT:     Value (RelocatableAddress): 0x94
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
@@ -502,7 +502,7 @@ entry:
 ; SYM-NEXT:   Symbol {
 ; SYM-NEXT:     Index: [[#NFA+33]]
 ; SYM-NEXT:     Name: GInit
-; SYM-NEXT:     Value (RelocatableAddress): 0xA8
+; SYM-NEXT:     Value (RelocatableAddress): 0x98
 ; SYM-NEXT:     Section: .data
 ; SYM-NEXT:     Type: 0x0
 ; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
@@ -594,13 +594,12 @@ entry:
 ; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA (idx: [[#NFA+1]]) .__tls_get_mod[PR]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 4, 4(2)
 ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: [[#NFA+27]]) TIUninit[TC]
-; DIS-NEXT:                                      add 3, 3, 4
-; DIS-NEXT:                                      stw 6, 0(3)
+; DIS-NEXT:                                      stwx 6, 3, 4
 ; DIS-NEXT:                                      addi 1, 1, 32
 ; DIS-NEXT:                                      lwz 0, 8(1)
 ; DIS-NEXT:                                      mtlr 0
 ; DIS-NEXT:                                      blr
-; DIS:      00000040 (idx: [[#NFA+9]]) .loadsTGInit:
+; DIS:      00000030 (idx: [[#NFA+9]]) .loadsTGInit:
 ; DIS-NEXT:                                      mflr 0
 ; DIS-NEXT:                                      stwu 1, -32(1)
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 3, 8(2)
@@ -619,39 +618,39 @@ entry:
 ; DIS-NEXT:                                      lwz 0, 8(1)
 ; DIS-NEXT:                                      mtlr 0
 ; DIS-NEXT:                                      blr
-; DIS:      00000078 (idx: [[#NFA+13]]) const_ivar:
-; DIS-NEXT:       78: 00 00 00 06
+; DIS:      00000068 (idx: [[#NFA+13]]) const_ivar:
+; DIS-NEXT:       68: 00 00 00 06
 
 ; DIS:      Disassembly of section .data:
-; DIS:      0000007c  (idx: [[#NFA+17]]) GInit:
-; DIS-NEXT:       7c: 00 00 00 01
-; DIS:      00000080  (idx: [[#NFA+19]]) storesTIUninit[DS]:
-; DIS-NEXT:       80: 00 00 00 00
-; DIS-NEXT: 00000080: R_POS (idx: [[#NFA+7]]) .storesTIUninit
-; DIS-NEXT:       84: 00 00 00 98
-; DIS-NEXT: 00000084: R_POS (idx: [[#NFA+23]]) TOC[TC0]
+; DIS:      0000006c  (idx: [[#NFA+17]]) GInit:
+; DIS-NEXT:       6c: 00 00 00 01
+; DIS:      00000070  (idx: [[#NFA+19]]) storesTIUninit[DS]:
+; DIS-NEXT:       70: 00 00 00 00
+; DIS-NEXT: 00000070: R_POS (idx: [[#NFA+7]]) .storesTIUninit
+; DIS-NEXT:       74: 00 00 00 88
+; DIS-NEXT: 00000074: R_POS (idx: [[#NFA+23]]) TOC[TC0]
+; DIS-NEXT:       78: 00 00 00 00
+; DIS:      0000007c  (idx: [[#NFA+21]]) loadsTGInit[DS]:
+; DIS-NEXT:       7c: 00 00 00 30
+; DIS-NEXT: 0000007c: R_POS (idx: [[#NFA+9]]) .loadsTGInit
+; DIS-NEXT:       80: 00 00 00 88
+; DIS-NEXT: 00000080: R_POS (idx: [[#NFA+23]]) TOC[TC0]
+; DIS-NEXT:       84: 00 00 00 00
+; DIS:      00000088 (idx: [[#NFA+25]]) _$TLSML[TC]:
 ; DIS-NEXT:       88: 00 00 00 00
-; DIS:      0000008c  (idx: [[#NFA+21]]) loadsTGInit[DS]:
-; DIS-NEXT:       8c: 00 00 00 40
-; DIS-NEXT: 0000008c: R_POS (idx: [[#NFA+9]]) .loadsTGInit
-; DIS-NEXT:       90: 00 00 00 98
-; DIS-NEXT: 00000090: R_POS (idx: [[#NFA+23]]) TOC[TC0]
+; DIS-NEXT: 00000088: R_TLSML (idx: [[#NFA+25]]) _$TLSML[TC]
+; DIS:      0000008c  (idx: [[#NFA+27]]) TIUninit[TC]:
+; DIS-NEXT:       8c: 00 00 00 04
+; DIS-NEXT: 0000008c: R_TLS_LD (idx: [[#NFA+39]]) TIUninit[UL]
+; DIS:      00000090  (idx: [[#NFA+29]]) .TGInit[TC]:
+; DIS-NEXT:       90: 00 00 00 00
+; DIS-NEXT: 00000090: R_TLSM (idx: [[#NFA+37]]) TGInit
+; DIS:      00000094  (idx: [[#NFA+31]]) TGInit[TC]:
 ; DIS-NEXT:       94: 00 00 00 00
-; DIS:      00000098 (idx: [[#NFA+25]]) _$TLSML[TC]:
-; DIS-NEXT:       98: 00 00 00 00
-; DIS-NEXT: 00000098: R_TLSML (idx: [[#NFA+25]]) _$TLSML[TC]
-; DIS:      0000009c  (idx: [[#NFA+27]]) TIUninit[TC]:
-; DIS-NEXT:       9c: 00 00 00 00
-; DIS-NEXT: 0000009c: R_TLS_LD (idx: [[#NFA+39]]) TIUninit[UL]
-; DIS:      000000a0  (idx: [[#NFA+29]]) .TGInit[TC]:
-; DIS-NEXT:       a0: 00 00 00 00
-; DIS-NEXT: 000000a0: R_TLSM (idx: [[#NFA+37]]) TGInit
-; DIS:      000000a4  (idx: [[#NFA+31]]) TGInit[TC]:
-; DIS-NEXT:       a4: 00 00 00 00
-; DIS-NEXT: 000000a4: R_TLS (idx: [[#NFA+37]]) TGInit
-; DIS:      000000a8  (idx: [[#NFA+33]]) GInit[TC]:
-; DIS-NEXT:       a8: 00 00 00 7c
-; DIS-NEXT: 000000a8: R_POS (idx: [[#NFA+17]]) GInit
+; DIS-NEXT: 00000094: R_TLS (idx: [[#NFA+37]]) TGInit
+; DIS:      00000098  (idx: [[#NFA+33]]) GInit[TC]:
+; DIS-NEXT:       98: 00 00 00 6c
+; DIS-NEXT: 00000098: R_POS (idx: [[#NFA+17]]) GInit
 
 ; DIS:      Disassembly of section .tdata:
 ; DIS:      00000000 (idx: [[#NFA+37]]) TGInit:

>From 5d6ff2cae84311f71286c55bdd67997d4db2b5ed Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Wed, 27 Sep 2023 02:57:07 -0400
Subject: [PATCH 11/31] [NFC] Incorporate comments.

---
 .../PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp  |  7 ++--
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp     |  5 +--
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   | 15 +++++----
 llvm/lib/Target/PowerPC/PPCISelLowering.h     |  4 +--
 llvm/lib/Target/PowerPC/PPCInstrInfo.td       |  2 +-
 llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp | 33 +++++++++----------
 6 files changed, 34 insertions(+), 32 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 00dda306a584c7..5b8f7f06619047 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -234,9 +234,10 @@ class PPCTargetAsmStreamer : public PPCTargetStreamer {
       // On AIX, we have TLS variable offsets (symbol@({gd|ie|le|ld}) depending
       // on the TLS access method (or model). For the general-dynamic access
       // method, we also have region handle (symbol at m) for each variable. For
-      // local-dynamic, a module handle (__TLSML[TC]@ml) for all variables. For
-      // local-exec and initial-exec, we have a thread pointer, in r13 for
-      // 64-bit mode and returned by .__get_tpointer for 32-bit mode.
+      // local-dynamic, there is a module handle (__TLSML[TC]@ml) for all
+      // variables. Finally for local-exec and initial-exec, we have a thread
+      // pointer, in r13 for 64-bit mode and returned by .__get_tpointer for
+      // 32-bit mode.
       if (Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGD ||
           Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGDM ||
           Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSIE ||
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 769656d9d0f10c..05309355a60119 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -876,7 +876,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
         // hacked this flag which should have been named MO_TLSLDM_FLAG: on AIX
         // the ML relocation type is only valid for a reference to a TOC symbol
         // from the symbol itself, and right now its only user is the symbol
-        // "_$TLSML". Use symbol name to decide that R_TLSML is expected.
+        // "_$TLSML". The symbol name is used to decide that R_TLSML is
+        // expected.
         return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSML;
       if (IsAIX)
         return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLD;
@@ -2893,7 +2894,7 @@ void PPCAIXAsmPrinter::emitEndOfAsmFile(Module &M) {
           getObjFileLowering().getSectionForTOCEntry(S, TM));
     } else if (I.first.second ==
                MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSML) {
-      // AIX assembler expects TC storage-mapping class for the "_$TLSML"
+      // AIX assembler expects the TC storage-mapping class for the "_$TLSML"
       // symbol.
       MCSection *MCSect = getObjFileLowering().getContext().getXCOFFSection(
           cast<MCSymbolXCOFF>(I.first.first)->getSymbolTableName(),
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 3ccc0754fcd099..01a366ea4dda8f 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -3419,13 +3419,14 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
 
   if (Model == TLSModel::LocalDynamic) {
     // For local-dynamic on AIX, we need to generate one TOC entry for each
-    // variable offset, and single module-handle TOC entry for the entire file.
-
-    // We are not (1) create GV node (2) call getTOCEntry for the module-handle
-    // due to the reason that the module-handle should not be materialized (i.e.
-    // there should be no symbol-table entry referring to the module-handle).
-    // Instead we will create reference to __TLSML[TC]@ml in PPCTLSDynamicCall
-    // when processing the TLSLD_AIX pseudo node.
+    // variable offset, and a single module-handle TOC entry for the entire
+    // file.
+
+    // We are not able to (1) create a GV node, and (2) call getTOCEntry for the
+    // module-handle due to the reason that the module-handle should not be
+    // materialized (i.e. there should be no symbol-table entry referring to the
+    // module-handle). Instead we will create reference to __TLSML[TC]@ml in
+    // PPCTLSDynamicCall when processing the TLSLD_AIX pseudo node.
     SDValue ModuleHandle = DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT);
     SDValue VariableOffsetTGA =
         DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index e9a38f3601e91f..4efa68e99a63b9 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -380,8 +380,8 @@ namespace llvm {
     GET_TLS_MOD_AIX,
 
     /// [GP|G8]RC = TLSLD_AIX, TOC_ENTRY(variable offset)
-    /// Op that internally creates TOC entry for the "_$TLSML" symbol, generates
-    /// a GET_TLS_MOD_AIX node which will be expanded into a call to
+    /// Op that internally creates a TOC entry for the "_$TLSML" symbol, and
+    /// generates a GET_TLS_MOD_AIX node which will be expanded into a call to
     /// .__tls_get_mod, and then adds the variable offset with the result from
     /// the call. This node is used in both 32-bit and 64-bit modes. The only
     /// difference is register class.
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 26e562ea3ba046..d47cb6f3ae4265 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -3300,7 +3300,7 @@ def TLSGDAIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$offset, gprc:$handl
                           "#TLSGDAIX",
                           [(set i32:$rD,
                             (PPCTlsgdAIX i32:$offset, i32:$handle))]>;
-// This pseudo is expanded to load module-handle in R3, and the call to GETtlsMOD32AIX.
+// This pseudo is expanded to load the module-handle in R3, and the call to GETtlsMOD32AIX.
 def TLSLDAIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins),
                           "#TLSLDAIX", [(set i32:$rD, (PPCTlsldAIX))]>;
 // LR is a true define, while the rest of the Defs are clobbers.  R3 is
diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 71825dca77bcf0..8a839af1f2eb71 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -48,8 +48,12 @@ namespace {
     bool processBlock(MachineBasicBlock &MBB) {
       bool Changed = false;
       bool NeedFence = true;
-      bool Is64Bit = MBB.getParent()->getSubtarget<PPCSubtarget>().isPPC64();
-      bool IsAIX = MBB.getParent()->getSubtarget<PPCSubtarget>().isAIXABI();
+      const PPCSubtarget &Subtarget =
+          MBB.getParent()->getSubtarget<PPCSubtarget>();
+      bool Is64Bit = Subtarget.isPPC64();
+      bool IsAIX = Subtarget.isAIXABI();
+      bool IsLargeModel =
+          Subtarget.getTargetMachine().getCodeModel() == CodeModel::Large;
       bool IsPCREL = false;
       MachineFunction *MF = MBB.getParent();
       MachineRegisterInfo &RegInfo = MF->getRegInfo();
@@ -64,15 +68,16 @@ namespace {
         // There are a number of slight differences in code generation
         // when we call .__get_tpointer (32-bit AIX TLS).
         bool IsTLSTPRelMI = MI.getOpcode() == PPC::GETtlsTpointer32AIX;
+        bool IsTLSLDAIXMI = (MI.getOpcode() == PPC::TLSLDAIX8 ||
+                             MI.getOpcode() == PPC::TLSLDAIX);
 
         if (MI.getOpcode() != PPC::ADDItlsgdLADDR &&
             MI.getOpcode() != PPC::ADDItlsldLADDR &&
             MI.getOpcode() != PPC::ADDItlsgdLADDR32 &&
             MI.getOpcode() != PPC::ADDItlsldLADDR32 &&
-            MI.getOpcode() != PPC::TLSLDAIX &&
-            MI.getOpcode() != PPC::TLSLDAIX8 &&
             MI.getOpcode() != PPC::TLSGDAIX &&
-            MI.getOpcode() != PPC::TLSGDAIX8 && !IsTLSTPRelMI && !IsPCREL) {
+            MI.getOpcode() != PPC::TLSGDAIX8 && !IsTLSTPRelMI && !IsPCREL &&
+            !IsTLSLDAIXMI) {
           // Although we create ADJCALLSTACKDOWN and ADJCALLSTACKUP
           // as scheduling fences, we skip creating fences if we already
           // have existing ADJCALLSTACKDOWN/UP to avoid nesting,
@@ -92,9 +97,7 @@ namespace {
         Register InReg = PPC::NoRegister;
         Register GPR3 = Is64Bit ? PPC::X3 : PPC::R3;
         Register GPR4 = Is64Bit ? PPC::X4 : PPC::R4;
-        if (!IsPCREL && !IsTLSTPRelMI &&
-            !(MI.getOpcode() == PPC::TLSLDAIX8 ||
-              MI.getOpcode() == PPC::TLSLDAIX))
+        if (!IsPCREL && !IsTLSTPRelMI && !IsTLSLDAIXMI)
           InReg = MI.getOperand(1).getReg();
         DebugLoc DL = MI.getDebugLoc();
 
@@ -160,15 +163,10 @@ namespace {
                                                               .addImm(0);
 
         if (IsAIX) {
-          if (MI.getOpcode() == PPC::TLSLDAIX8 ||
-              MI.getOpcode() == PPC::TLSLDAIX) {
+          if (IsTLSLDAIXMI) {
             // It is better to put TLSLDAIX node before LoadOffsetToc node,
             // because LoadOffsetToc node can use clobbers r4/r5. Search for the
             // first paired LoadOffsetToc node within the same BB.
-            const PPCSubtarget &Subtarget =
-                MBB.getParent()->getSubtarget<PPCSubtarget>();
-            bool IsLargeModel =
-                Subtarget.getTargetMachine().getCodeModel() == CodeModel::Large;
             unsigned LDTocOp =
                 Is64Bit ? (IsLargeModel ? PPC::LDtocL : PPC::LDtoc)
                         : (IsLargeModel ? PPC::LWZtocL : PPC::LWZtoc);
@@ -197,12 +195,13 @@ namespace {
                 int MatchCount = 0;
                 for (MachineOperand &MO : UseIter->operands()) {
                   if (MO.isReg() && MO.isUse()) {
-                    if (RegInfo.hasOneDef(MO.getReg())) {
-                      if (RegInfo.getOneDef(MO.getReg())
+                    Register OffsetReg = MO.getReg();
+                    if (RegInfo.hasOneDef(OffsetReg)) {
+                      if (RegInfo.getOneDef(OffsetReg)
                               ->getParent()
                               ->getOpcode() == LDTocOp) {
                         LoadOffsetToc =
-                            RegInfo.getOneDef(MO.getReg())->getParent();
+                            RegInfo.getOneDef(OffsetReg)->getParent();
                         ++MatchCount;
                       }
                     } else {

>From 644783f84412023ef145beda9db02a28e1d31161 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Wed, 27 Sep 2023 21:11:49 -0400
Subject: [PATCH 12/31] [NFC] Add FIXME to highlight existing issue: The
 "_$TLSML" symbol did not lower through getTOCEntry().

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 01a366ea4dda8f..f0e513f71df104 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -3422,10 +3422,10 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
     // variable offset, and a single module-handle TOC entry for the entire
     // file.
 
-    // We are not able to (1) create a GV node, and (2) call getTOCEntry for the
-    // module-handle due to the reason that the module-handle should not be
-    // materialized (i.e. there should be no symbol-table entry referring to the
-    // module-handle). Instead we will create reference to __TLSML[TC]@ml in
+    // FIXME: We are not able to (1) create a GV node, and (2) call getTOCEntry
+    // for the module-handle due to the reason that the module-handle should not
+    // be materialized (i.e. there should be no symbol-table entry referring to
+    // the module-handle). Instead we will create reference to __TLSML[TC]@ml in
     // PPCTLSDynamicCall when processing the TLSLD_AIX pseudo node.
     SDValue ModuleHandle = DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT);
     SDValue VariableOffsetTGA =

>From 2f48597d68e56609e417def5862894a895d0c5d6 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Wed, 25 Oct 2023 21:07:42 -0400
Subject: [PATCH 13/31] Address the following FIXME: The "_$TLSML" symbol did
 not lower through getTOCEntry().

The pseudo node "TLSLDAIX" now takes the _$TLSML GV node, and the
ppc-tls-dynamic-call pass is updated to fine tune the relative order
between the LoadOffset at toc node and the .__tls_get_mod node.
---
 llvm/lib/MC/XCOFFObjectWriter.cpp             |  14 +++
 .../PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp  |   6 +-
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp     |  43 +++----
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   |  17 ++-
 llvm/lib/Target/PowerPC/PPCISelLowering.h     |   2 +-
 llvm/lib/Target/PowerPC/PPCInstr64Bit.td      |   6 +-
 llvm/lib/Target/PowerPC/PPCInstrInfo.td       |   8 +-
 llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp | 105 ++++++++----------
 .../test/CodeGen/PowerPC/aix-tls-gd-double.ll |  20 ++--
 llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll   |  20 ++--
 .../CodeGen/PowerPC/aix-tls-gd-longlong.ll    |  36 +++---
 .../CodeGen/PowerPC/aix-tls-local-dynamic.ll  |   2 +-
 .../CodeGen/PowerPC/aix-tls-xcoff-reloc.ll    |   1 -
 13 files changed, 137 insertions(+), 143 deletions(-)

diff --git a/llvm/lib/MC/XCOFFObjectWriter.cpp b/llvm/lib/MC/XCOFFObjectWriter.cpp
index d46bbaf7576571..207c3e65757aa1 100644
--- a/llvm/lib/MC/XCOFFObjectWriter.cpp
+++ b/llvm/lib/MC/XCOFFObjectWriter.cpp
@@ -666,6 +666,14 @@ void XCOFFObjectWriter::recordRelocation(MCAssembler &Asm,
                                          uint64_t &FixedValue) {
   auto getIndex = [this](const MCSymbol *Sym,
                          const MCSectionXCOFF *ContainingCsect) {
+    // Fixup relocation flag for AIX TLS local-dynamic mode.
+    if (Sym->getName().equals("_Renamed..5f24__TLSML[UA]")) {
+      for (auto Iter : SymbolIndexMap)
+        if (Iter.first->getName().equals("_Renamed..5f24__TLSML[TC]"))
+          return Iter.second;
+      llvm_unreachable("For AIX TLS local-dynamic mode: "
+                       "_Renamed..5f24__TLSML[TC] not found.");
+    }
     // If we could not find the symbol directly in SymbolIndexMap, this symbol
     // could either be a temporary symbol or an undefined symbol. In this case,
     // we would need to have the relocation reference its csect instead.
@@ -1202,6 +1210,9 @@ void XCOFFObjectWriter::writeSymbolTable(MCAssembler &Asm,
                      /*NumberOfAuxEntries=*/0);
 
   for (const auto &Csect : UndefinedCsects) {
+    // AIX does not need to emit for the _$TLSML symbol.
+    if (Csect.getSymbolTableName().equals(StringRef("_$TLSML")))
+      continue;
     writeSymbolEntryForControlSection(Csect, XCOFF::ReservedSectionNum::N_UNDEF,
                                       Csect.MCSec->getStorageClass());
   }
@@ -1420,6 +1431,9 @@ void XCOFFObjectWriter::assignAddressesAndIndices(MCAssembler &Asm,
 
   // Calculate indices for undefined symbols.
   for (auto &Csect : UndefinedCsects) {
+    // AIX does not need to emit for the _$TLSML symbol.
+    if (Csect.getSymbolTableName().equals(StringRef("_$TLSML")))
+      continue;
     Csect.Size = 0;
     Csect.Address = 0;
     Csect.SymbolTableIndex = SymbolTableIndex;
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 5b8f7f06619047..cc50a29d67a723 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -242,10 +242,12 @@ class PPCTargetAsmStreamer : public PPCTargetStreamer {
           Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGDM ||
           Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSIE ||
           Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLE ||
-          Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLD ||
-          Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSML)
+          Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLD)
         OS << "\t.tc " << TCSym->getName() << "," << XSym->getName() << "@"
            << MCSymbolRefExpr::getVariantKindName(Kind) << '\n';
+      else if (Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSML)
+        OS << "\t.tc " << TCSym->getName() << "," << TCSym->getName() << "@"
+           << MCSymbolRefExpr::getVariantKindName(Kind) << '\n';
       else
         OS << "\t.tc " << TCSym->getName() << "," << XSym->getName() << '\n';
 
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 05309355a60119..f8f097fe38994e 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -731,8 +731,6 @@ static MCSymbol *getMCSymbolForTOCPseudoMO(const MachineOperand &MO,
     return AP.GetJTISymbol(MO.getIndex());
   case MachineOperand::MO_BlockAddress:
     return AP.GetBlockAddressSymbol(MO.getBlockAddress());
-  case MachineOperand::MO_ExternalSymbol:
-    return AP.OutContext.getOrCreateSymbol(MO.getSymbolName());
   default:
     llvm_unreachable("Unexpected operand type to get symbol.");
   }
@@ -766,17 +764,6 @@ getTOCEntryTypeForMO(const MachineOperand &MO) {
     llvm_unreachable("Unexpected operand type to get TOC type.");
   }
 }
-
-// FIXME: find alternative approach to get rid of this hack.
-// On AIX, TLS-local-dynamic requires that the symbol for the module handle must
-// have the name "_$TLSML". This symbol is used as one TOC symbol reference
-// itself with an ML relocation type, thus it has "[TC]" attached to its name.
-static inline bool isSpecialAIXSymbolTLSML(const MachineOperand &MO,
-                                           const bool IsAIX) {
-  return IsAIX && MO.isSymbol() &&
-         (std::strcmp(MO.getSymbolName(), "_$TLSML[TC]") == 0);
-}
-
 /// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to
 /// the current output stream.
 ///
@@ -871,13 +858,15 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     if (Flag == PPCII::MO_TLSGD_FLAG || Flag == PPCII::MO_GOT_TLSGD_PCREL_FLAG)
       return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGD;
     if (MO.getTargetFlags() & PPCII::MO_TLSLD_FLAG) {
-      if (isSpecialAIXSymbolTLSML(MO, IsAIX))
+      if (IsAIX && MO.isGlobal() &&
+          (MO.getGlobal()->getName().equals("_$TLSML")))
         // FIXME: Due to the size limit of MachineOperand::SubReg_TargetFlags,
         // hacked this flag which should have been named MO_TLSLDM_FLAG: on AIX
         // the ML relocation type is only valid for a reference to a TOC symbol
         // from the symbol itself, and right now its only user is the symbol
         // "_$TLSML". The symbol name is used to decide that R_TLSML is
         // expected.
+        // FIX this once #69695 committed.
         return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSML;
       if (IsAIX)
         return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLD;
@@ -1000,8 +989,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     TmpInst.setOpcode(PPC::LWZ);
 
     const MachineOperand &MO = MI->getOperand(1);
-    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress() ||
-            isSpecialAIXSymbolTLSML(MO, IsAIX)) &&
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
            "Invalid operand for LWZtoc.");
 
     // Map the operand to its corresponding MCSymbol.
@@ -1090,8 +1078,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     TmpInst.setOpcode(PPC::LD);
 
     const MachineOperand &MO = MI->getOperand(1);
-    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress() ||
-            isSpecialAIXSymbolTLSML(MO, IsAIX)) &&
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
            "Invalid operand!");
 
     // Map the operand to its corresponding MCSymbol.
@@ -1129,8 +1116,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     TmpInst.setOpcode(PPC::ADDIS);
 
     const MachineOperand &MO = MI->getOperand(2);
-    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress() ||
-            isSpecialAIXSymbolTLSML(MO, IsAIX)) &&
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
            "Invalid operand for ADDIStocHA.");
 
     // Map the machine operand to its corresponding MCSymbol.
@@ -1163,8 +1149,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     TmpInst.setOpcode(PPC::LWZ);
 
     const MachineOperand &MO = MI->getOperand(1);
-    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress() ||
-            isSpecialAIXSymbolTLSML(MO, IsAIX)) &&
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
            "Invalid operand for LWZtocL.");
 
     // Map the machine operand to its corresponding MCSymbol.
@@ -1196,8 +1181,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     TmpInst.setOpcode(PPC::ADDIS8);
 
     const MachineOperand &MO = MI->getOperand(2);
-    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress() ||
-            isSpecialAIXSymbolTLSML(MO, IsAIX)) &&
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
            "Invalid operand for ADDIStocHA8!");
 
     const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
@@ -1207,8 +1191,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     const bool GlobalToc =
         MO.isGlobal() && Subtarget->isGVIndirectSymbol(MO.getGlobal());
     if (GlobalToc || MO.isJTI() || MO.isBlockAddress() ||
-        (MO.isCPI() && TM.getCodeModel() == CodeModel::Large) ||
-        isSpecialAIXSymbolTLSML(MO, IsAIX))
+        (MO.isCPI() && TM.getCodeModel() == CodeModel::Large))
       MOSymbol = lookUpOrCreateTOCEntry(MOSymbol, getTOCEntryTypeForMO(MO), VK);
 
     VK = IsAIX ? MCSymbolRefExpr::VK_PPC_U : MCSymbolRefExpr::VK_PPC_TOC_HA;
@@ -1237,8 +1220,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     TmpInst.setOpcode(PPC::LD);
 
     const MachineOperand &MO = MI->getOperand(1);
-    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress() ||
-            isSpecialAIXSymbolTLSML(MO, IsAIX)) &&
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() ||
+            MO.isBlockAddress()) &&
            "Invalid operand for LDtocL!");
 
     LLVM_DEBUG(assert(
@@ -2216,6 +2199,10 @@ void PPCAIXAsmPrinter::emitLinkage(const GlobalValue *GV,
     }
   }
 
+  // Do not emit _$TLSML symbol.
+  if (GVSym->getName().equals(StringRef("_Renamed..5f24__TLSML[UA]")))
+    return;
+
   OutStreamer->emitXCOFFSymbolLinkageWithVisibility(GVSym, LinkageAttr,
                                                     VisibilityAttr);
 }
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index f0e513f71df104..a15057cd05e8a6 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -3422,14 +3422,19 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
     // variable offset, and a single module-handle TOC entry for the entire
     // file.
 
-    // FIXME: We are not able to (1) create a GV node, and (2) call getTOCEntry
-    // for the module-handle due to the reason that the module-handle should not
-    // be materialized (i.e. there should be no symbol-table entry referring to
-    // the module-handle). Instead we will create reference to __TLSML[TC]@ml in
-    // PPCTLSDynamicCall when processing the TLSLD_AIX pseudo node.
-    SDValue ModuleHandle = DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT);
     SDValue VariableOffsetTGA =
         DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
+    Module *M = DAG.getMachineFunction().getFunction().getParent();
+    GlobalVariable *TLSGV =
+        dyn_cast_or_null<GlobalVariable>(M->getOrInsertGlobal(
+            StringRef("_$TLSML"), PointerType::getUnqual(*DAG.getContext())));
+    assert(TLSGV && "Not able to create GV for _$TLSML.");
+    // FIXME: create MO_TLSML_FLAG once #69695 committed.
+    SDValue ModuleHandleTGA =
+        DAG.getTargetGlobalAddress(TLSGV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
+    SDValue ModuleHandleTOC = getTOCEntry(DAG, dl, ModuleHandleTGA);
+    SDValue ModuleHandle =
+        DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, ModuleHandleTOC);
     SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
     return DAG.getNode(ISD::ADD, dl, PtrVT, ModuleHandle, VariableOffset);
   }
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 4efa68e99a63b9..f03e59ccf78008 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -384,7 +384,7 @@ namespace llvm {
     /// generates a GET_TLS_MOD_AIX node which will be expanded into a call to
     /// .__tls_get_mod, and then adds the variable offset with the result from
     /// the call. This node is used in both 32-bit and 64-bit modes. The only
-    /// difference is register class.
+    /// difference is the register class.
     TLSLD_AIX,
 
     /// G8RC = ADDIS_TLSLD_HA %x2, Symbol - For the local-dynamic TLS
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index b51af57b796f81..2949d58ab66479 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -1602,9 +1602,9 @@ def TLSGDAIX8 :
                      "#TLSGDAIX8",
                      [(set i64:$rD,
                        (PPCTlsgdAIX i64:$offset, i64:$handle))]>;
-// This pseudo is expanded to load module-handle in X3, and the call to GETtlsMOD64AIX.
-def TLSLDAIX8 : PPCEmitTimePseudo<(outs g8rc:$rD), (ins),
-                     "#TLSLDAIX8", [(set i64:$rD, (PPCTlsldAIX))]>;
+// This pseudo is expanded to the call to GETtlsMOD64AIX.
+def TLSLDAIX8 : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc:$handle),
+                     "#TLSLDAIX8", [(set i64:$rD, (PPCTlsldAIX i64:$handle))]>;
 // Combined op for ADDItlsldL and GETtlsADDR, late expanded.  X3 and LR8
 // are true defines, while the rest of the Defs are clobbers.
 let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index d47cb6f3ae4265..82da1a3c305983 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -220,7 +220,7 @@ def PPCaddiTlsgdLAddr : SDNode<"PPCISD::ADDI_TLSGD_L_ADDR",
                                  SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
                                  SDTCisSameAs<0, 3>, SDTCisInt<0> ]>>;
 def PPCTlsgdAIX     : SDNode<"PPCISD::TLSGD_AIX", SDTIntBinOp>;
-def PPCTlsldAIX     : SDNode<"PPCISD::TLSLD_AIX", SDTIntLeaf>;
+def PPCTlsldAIX     : SDNode<"PPCISD::TLSLD_AIX", SDTIntUnaryOp>;
 def PPCaddisTlsldHA : SDNode<"PPCISD::ADDIS_TLSLD_HA", SDTIntBinOp>;
 def PPCaddiTlsldL   : SDNode<"PPCISD::ADDI_TLSLD_L", SDTIntBinOp>;
 def PPCgetTlsldAddr : SDNode<"PPCISD::GET_TLSLD_ADDR", SDTIntBinOp>;
@@ -3300,9 +3300,9 @@ def TLSGDAIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$offset, gprc:$handl
                           "#TLSGDAIX",
                           [(set i32:$rD,
                             (PPCTlsgdAIX i32:$offset, i32:$handle))]>;
-// This pseudo is expanded to load the module-handle in R3, and the call to GETtlsMOD32AIX.
-def TLSLDAIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins),
-                          "#TLSLDAIX", [(set i32:$rD, (PPCTlsldAIX))]>;
+// This pseudo is expanded to the call to GETtlsMOD32AIX.
+def TLSLDAIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$handle),
+                          "#TLSLDAIX", [(set i32:$rD, (PPCTlsldAIX i32:$handle))]>;
 // LR is a true define, while the rest of the Defs are clobbers.  R3 is
 // explicitly defined when this op is created, so not mentioned here.
 let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 8a839af1f2eb71..a2843ca0a350d8 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -57,9 +57,6 @@ namespace {
       bool IsPCREL = false;
       MachineFunction *MF = MBB.getParent();
       MachineRegisterInfo &RegInfo = MF->getRegInfo();
-      const TargetRegisterClass *GPRNoZero =
-          Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
-                  : &PPC::GPRC_and_GPRC_NOR0RegClass;
 
       for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
            I != IE;) {
@@ -97,7 +94,7 @@ namespace {
         Register InReg = PPC::NoRegister;
         Register GPR3 = Is64Bit ? PPC::X3 : PPC::R3;
         Register GPR4 = Is64Bit ? PPC::X4 : PPC::R4;
-        if (!IsPCREL && !IsTLSTPRelMI && !IsTLSLDAIXMI)
+        if (!IsPCREL && !IsTLSTPRelMI)
           InReg = MI.getOperand(1).getReg();
         DebugLoc DL = MI.getDebugLoc();
 
@@ -164,84 +161,74 @@ namespace {
 
         if (IsAIX) {
           if (IsTLSLDAIXMI) {
-            // It is better to put TLSLDAIX node before LoadOffsetToc node,
-            // because LoadOffsetToc node can use clobbers r4/r5. Search for the
-            // first paired LoadOffsetToc node within the same BB.
+            // The relative order between the LoadOffset at toc node, and the
+            // ._tls_get_mod node is being tuned here. It is better to put the
+            // LoadOffset at toc node after the call, since the LoadOffset at toc node
+            // can use clobbers r4/r5. Search for the pattern of two Load at toc
+            // nodes, and then move the LoadOffset at toc node right before the
+            // node that uses the OutReg of the ._tls_get_mod node.
             unsigned LDTocOp =
                 Is64Bit ? (IsLargeModel ? PPC::LDtocL : PPC::LDtoc)
                         : (IsLargeModel ? PPC::LWZtocL : PPC::LWZtoc);
-            MachineBasicBlock::iterator Anchor = I;
             if (!RegInfo.use_empty(OutReg)) {
               std::set<MachineInstr *> Uses;
-              // Collect all instructions that use OutReg
-              for (MachineOperand &MO : RegInfo.use_operands(OutReg)) {
-                if (Uses.count(MO.getParent()))
-                  continue;
+              // Collect all instructions that use the OutReg.
+              for (MachineOperand &MO : RegInfo.use_operands(OutReg))
                 Uses.insert(MO.getParent());
-              }
-              // Find the first Add within current BB.
+              // Find the first user (e.g.: lwax/stfdx) within the current BB.
               MachineBasicBlock::iterator UseIter = MBB.begin();
-              for (MachineBasicBlock::iterator AE = MBB.end(); UseIter != AE;
+              for (MachineBasicBlock::iterator IE = MBB.end(); UseIter != IE;
                    ++UseIter)
                 if (Uses.count(&*UseIter))
                   break;
 
               if (UseIter != MBB.end()) {
-                // Get the instruction that defines the other used register
-                // operand of UseIter. The match pattern is that: UseIter has
-                // exactly one used-operand defined by LDTocOp
-                // (LDtocL/LDtoc/LWZtocL/LWZtoc).
-                MachineInstr *LoadOffsetToc = nullptr;
-                int MatchCount = 0;
-                for (MachineOperand &MO : UseIter->operands()) {
+                // Collect associated Load at toc nodes.
+                std::set<MachineInstr *> LoadFromTocs;
+                for (MachineOperand &MO : UseIter->operands())
                   if (MO.isReg() && MO.isUse()) {
-                    Register OffsetReg = MO.getReg();
-                    if (RegInfo.hasOneDef(OffsetReg)) {
-                      if (RegInfo.getOneDef(OffsetReg)
-                              ->getParent()
-                              ->getOpcode() == LDTocOp) {
-                        LoadOffsetToc =
-                            RegInfo.getOneDef(OffsetReg)->getParent();
-                        ++MatchCount;
-                      }
+                    if (RegInfo.hasOneDef(MO.getReg())) {
+                      MachineInstr *Temp =
+                          RegInfo.getOneDef(MO.getReg())->getParent();
+                      if (Temp == &MI && RegInfo.hasOneDef(InReg))
+                        Temp = RegInfo.getOneDef(InReg)->getParent();
+                      if (Temp->getOpcode() == LDTocOp)
+                        LoadFromTocs.insert(Temp);
                     } else {
                       // FIXME: analyze this scenario if there is one.
-                      MatchCount = 0;
+                      LoadFromTocs.clear();
                       break;
                     }
                   }
-                }
-                // Get the iterator.
-                if (MatchCount == 1 && LoadOffsetToc) {
-                  Anchor = MBB.begin();
-                  for (MachineBasicBlock::iterator AE = MBB.end(); Anchor != AE;
-                       ++Anchor)
-                    if (&*Anchor == LoadOffsetToc)
-                      break;
 
-                  if (Anchor == MBB.end())
-                    Anchor = I;
+                // Check the two Load at toc: one should be _$TLSML, and the other
+                // will be moved before the node that uses the OutReg of the
+                // ._tls_get_mod node.
+                if (LoadFromTocs.size() == 2) {
+                  MachineBasicBlock::iterator TLSMLIter = MBB.end();
+                  MachineBasicBlock::iterator OffsetIter = MBB.end();
+                  for (MachineBasicBlock::iterator I = MBB.begin(),
+                                                   IE = MBB.end();
+                       I != IE; ++I)
+                    if (LoadFromTocs.count(&*I)) {
+                      if (I->getOperand(1).isGlobal() &&
+                          I->getOperand(1).getGlobal()->getName().equals(
+                              "_$TLSML"))
+                        TLSMLIter = I;
+                      else
+                        OffsetIter = I;
+                    }
+                  if (TLSMLIter != MBB.end() && OffsetIter != MBB.end())
+                    OffsetIter->moveBefore(&*UseIter);
                 }
               }
             }
-
-            // Generate instructions to load module-handle.
-            Register ModuleHandleHReg;
-            if (IsLargeModel) {
-              ModuleHandleHReg = RegInfo.createVirtualRegister(GPRNoZero);
-              BuildMI(MBB, Anchor, DL,
-                      TII->get(Is64Bit ? PPC::ADDIStocHA8 : PPC::ADDIStocHA),
-                      ModuleHandleHReg)
-                  .addReg(Subtarget.getTOCPointerRegister())
-                  .addExternalSymbol("_$TLSML[TC]", PPCII::MO_TLSLD_FLAG);
-            }
-            BuildMI(MBB, Anchor, DL, TII->get(LDTocOp), GPR3)
-                .addExternalSymbol("_$TLSML[TC]", PPCII::MO_TLSLD_FLAG)
-                .addReg(IsLargeModel
-                            ? ModuleHandleHReg
-                            : Register(Subtarget.getTOCPointerRegister()));
+            // The module-handle is copied in r3. The copy is followed by
+            // GETtlsMOD32AIX/GETtlsMOD64AIX.
+            BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR3)
+                .addReg(InReg);
             // The call to .__tls_get_mod.
-            BuildMI(MBB, Anchor, DL, TII->get(Opc2), GPR3).addReg(GPR3);
+            BuildMI(MBB, I, DL, TII->get(Opc2), GPR3).addReg(GPR3);
           } else if (!IsTLSTPRelMI) {
             // The variable offset and region handle are copied in r4 and r3.
             // The copies are followed by GETtlsADDR32AIX/GETtlsADDR64AIX.
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll
index 84ddb83bef457e..194273738c6499 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll
@@ -156,8 +156,8 @@ define void @storesTIInit(double %Val) #0 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
+; SMALL32-NEXT:    lwz 3, L..C4(2) # target-flags(ppc-tlsld) @"_$TLSML"
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    lwz 3, L..C4(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C5(2) # target-flags(ppc-tlsld) @TIInit
 ; SMALL32-NEXT:    stfdx 1, 3, 4
@@ -186,8 +186,8 @@ define void @storesTIInit(double %Val) #0 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
+; SMALL64-NEXT:    ld 3, L..C4(2) # target-flags(ppc-tlsld) @"_$TLSML"
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    ld 3, L..C4(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C5(2) # target-flags(ppc-tlsld) @TIInit
 ; SMALL64-NEXT:    stfdx 1, 3, 4
@@ -200,8 +200,8 @@ define void @storesTIInit(double %Val) #0 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
-; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    addis 3, L..C4 at u(2)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    addis 6, L..C5 at u(2)
 ; LARGE64-NEXT:    ld 3, L..C4 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
@@ -452,13 +452,13 @@ define double @loadsTIInit() #1 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
+; SMALL32-NEXT:    lwz 3, L..C4(2) # target-flags(ppc-tlsld) @"_$TLSML"
 ; SMALL32-NEXT:    stw 0, 40(1)
-; SMALL32-NEXT:    lwz 3, L..C4(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C5(2) # target-flags(ppc-tlsld) @TIInit
-; SMALL32-NEXT:    lwz 5, L..C8(2) # @GInit
 ; SMALL32-NEXT:    lfdx 0, 3, 4
-; SMALL32-NEXT:    lfd 1, 0(5)
+; SMALL32-NEXT:    lwz 3, L..C8(2) # @GInit
+; SMALL32-NEXT:    lfd 1, 0(3)
 ; SMALL32-NEXT:    fadd 1, 0, 1
 ; SMALL32-NEXT:    addi 1, 1, 32
 ; SMALL32-NEXT:    lwz 0, 8(1)
@@ -489,13 +489,13 @@ define double @loadsTIInit() #1 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
+; SMALL64-NEXT:    ld 3, L..C4(2) # target-flags(ppc-tlsld) @"_$TLSML"
 ; SMALL64-NEXT:    std 0, 64(1)
-; SMALL64-NEXT:    ld 3, L..C4(2) # target-flags(ppc-tlsld) &"_$TLSML[TC]"
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C5(2) # target-flags(ppc-tlsld) @TIInit
-; SMALL64-NEXT:    ld 5, L..C8(2) # @GInit
 ; SMALL64-NEXT:    lfdx 0, 3, 4
-; SMALL64-NEXT:    lfd 1, 0(5)
+; SMALL64-NEXT:    ld 3, L..C8(2) # @GInit
+; SMALL64-NEXT:    lfd 1, 0(3)
 ; SMALL64-NEXT:    fadd 1, 0, 1
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -506,8 +506,8 @@ define double @loadsTIInit() #1 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
-; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    addis 3, L..C4 at u(2)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    addis 6, L..C5 at u(2)
 ; LARGE64-NEXT:    ld 3, L..C4 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll
index 3001a915da4851..bbb8e04b67b95e 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll
@@ -163,9 +163,9 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    mr 6, 3
 ; SMALL32-NEXT:    lwz 3, L..C4(2)
+; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C5(2)
 ; SMALL32-NEXT:    stwx 6, 3, 4
@@ -195,9 +195,9 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    mr 6, 3
 ; SMALL64-NEXT:    ld 3, L..C4(2)
+; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C5(2)
 ; SMALL64-NEXT:    stwx 6, 3, 4
@@ -210,9 +210,9 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
-; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    mr 6, 3
 ; LARGE64-NEXT:    addis 3, L..C4 at u(2)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    addis 7, L..C5 at u(2)
 ; LARGE64-NEXT:    ld 3, L..C4 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
@@ -467,13 +467,13 @@ define i32 @loadsTIUninit() #1 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    lwz 3, L..C4(2)
+; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C5(2)
-; SMALL32-NEXT:    lwz 5, L..C8(2)
 ; SMALL32-NEXT:    lwzx 3, 3, 4
-; SMALL32-NEXT:    lwz 4, 0(5)
+; SMALL32-NEXT:    lwz 4, L..C8(2)
+; SMALL32-NEXT:    lwz 4, 0(4)
 ; SMALL32-NEXT:    add 3, 4, 3
 ; SMALL32-NEXT:    addi 1, 1, 32
 ; SMALL32-NEXT:    lwz 0, 8(1)
@@ -504,13 +504,13 @@ define i32 @loadsTIUninit() #1 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    ld 3, L..C4(2)
+; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C5(2)
-; SMALL64-NEXT:    ld 5, L..C8(2)
 ; SMALL64-NEXT:    lwzx 3, 3, 4
-; SMALL64-NEXT:    lwz 4, 0(5)
+; SMALL64-NEXT:    ld 4, L..C8(2)
+; SMALL64-NEXT:    lwz 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -521,8 +521,8 @@ define i32 @loadsTIUninit() #1 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
-; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    addis 3, L..C4 at u(2)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    addis 6, L..C5 at u(2)
 ; LARGE64-NEXT:    ld 3, L..C4 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll
index 136c62ff509f9c..ff087a2144488c 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll
@@ -97,10 +97,10 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    mr 7, 3
-; SMALL32-NEXT:    mr 6, 4
 ; SMALL32-NEXT:    lwz 3, L..C2(2)
+; SMALL32-NEXT:    stw 0, 40(1)
+; SMALL32-NEXT:    mr 6, 4
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C3(2)
 ; SMALL32-NEXT:    stwux 7, 3, 4
@@ -133,9 +133,9 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    mr 6, 3
 ; SMALL64-NEXT:    ld 3, L..C2(2)
+; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C3(2)
 ; SMALL64-NEXT:    stdx 6, 3, 4
@@ -148,9 +148,9 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
-; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    mr 6, 3
 ; LARGE64-NEXT:    addis 3, L..C2 at u(2)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    addis 7, L..C3 at u(2)
 ; LARGE64-NEXT:    ld 3, L..C2 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
@@ -171,10 +171,10 @@ define void @storesTIInit(i64 %Val) #0 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    mr 7, 3
-; SMALL32-NEXT:    mr 6, 4
 ; SMALL32-NEXT:    lwz 3, L..C2(2)
+; SMALL32-NEXT:    stw 0, 40(1)
+; SMALL32-NEXT:    mr 6, 4
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C4(2)
 ; SMALL32-NEXT:    stwux 7, 3, 4
@@ -207,9 +207,9 @@ define void @storesTIInit(i64 %Val) #0 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    mr 6, 3
 ; SMALL64-NEXT:    ld 3, L..C2(2)
+; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C4(2)
 ; SMALL64-NEXT:    stdx 6, 3, 4
@@ -222,9 +222,9 @@ define void @storesTIInit(i64 %Val) #0 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
-; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    mr 6, 3
 ; LARGE64-NEXT:    addis 3, L..C2 at u(2)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    addis 7, L..C4 at u(2)
 ; LARGE64-NEXT:    ld 3, L..C2 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
@@ -407,8 +407,8 @@ define i64 @loadsTIUninit() #1 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    lwz 3, L..C2(2)
+; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C3(2)
 ; SMALL32-NEXT:    lwz 5, L..C7(2)
@@ -450,13 +450,13 @@ define i64 @loadsTIUninit() #1 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    ld 3, L..C2(2)
+; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C3(2)
-; SMALL64-NEXT:    ld 5, L..C7(2)
 ; SMALL64-NEXT:    ldx 3, 3, 4
-; SMALL64-NEXT:    ld 4, 0(5)
+; SMALL64-NEXT:    ld 4, L..C7(2)
+; SMALL64-NEXT:    ld 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -467,8 +467,8 @@ define i64 @loadsTIUninit() #1 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
-; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    addis 3, L..C2 at u(2)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    addis 6, L..C3 at u(2)
 ; LARGE64-NEXT:    ld 3, L..C2 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
@@ -495,8 +495,8 @@ define i64 @loadsTIInit() #1 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    lwz 3, L..C2(2)
+; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C4(2)
 ; SMALL32-NEXT:    lwz 5, L..C7(2)
@@ -538,13 +538,13 @@ define i64 @loadsTIInit() #1 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    ld 3, L..C2(2)
+; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C4(2)
-; SMALL64-NEXT:    ld 5, L..C7(2)
 ; SMALL64-NEXT:    ldx 3, 3, 4
-; SMALL64-NEXT:    ld 4, 0(5)
+; SMALL64-NEXT:    ld 4, L..C7(2)
+; SMALL64-NEXT:    ld 4, 0(4)
 ; SMALL64-NEXT:    add 3, 4, 3
 ; SMALL64-NEXT:    addi 1, 1, 48
 ; SMALL64-NEXT:    ld 0, 16(1)
@@ -555,8 +555,8 @@ define i64 @loadsTIInit() #1 {
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
 ; LARGE64-NEXT:    stdu 1, -48(1)
-; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    addis 3, L..C2 at u(2)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    addis 6, L..C4 at u(2)
 ; LARGE64-NEXT:    ld 3, L..C2 at l(3)
 ; LARGE64-NEXT:    bla .__tls_get_mod[PR]
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll b/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll
index f7cd73ae609b35..c745e17636da11 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll
@@ -327,7 +327,7 @@ define i32 @DedupTlsGetMod() #0 {
 ; WITHDUP-NEXT:   ld [[OffsetXR:[0-9]+]], [[X:L..C[0-9]+]](2)
 ; WITHDUP:        ld [[ModuleHandleR:3]], [[ModuleHandleL:L..C[0-9]+]](2)
 ; WITHDUP-NEXT:   bla .__tls_get_mod[PR]
-; WITHDUP-NEXT:   ld [[OffsetYR:[0-9]+]], [[Y:L..C[0-9]+]](2)
+; WITHDUP:        ld [[OffsetYR:[0-9]+]], [[Y:L..C[0-9]+]](2)
 ; WITHDUP-LABEL:  L..DedupTlsGetMod0:
 ;
 ; NODUP-LABEL:  DedupTlsGetMod:
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
index 8e26c6a2f3bbfd..0a6d57ee212fb6 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
@@ -586,7 +586,6 @@ entry:
 ; DIS:      00000000 (idx: [[#NFA+7]]) .storesTIUninit:
 ; DIS-NEXT:                                      mflr 0
 ; DIS-NEXT:                                      stwu 1, -32(1)
-; DIS-NEXT:                                      stw 0, 40(1)
 ; DIS-NEXT:                                      mr 6, 3
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 3, 0(2)
 ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: [[#NFA+25]]) _$TLSML[TC]

>From 49666bde7331e2a62a1d3ea26b7a18e9df887b0b Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Thu, 26 Oct 2023 03:42:38 -0400
Subject: [PATCH 14/31] Address comment: use == to compare StringRef names

---
 llvm/lib/MC/XCOFFObjectWriter.cpp             | 8 ++++----
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp     | 5 ++---
 llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp | 3 +--
 3 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/MC/XCOFFObjectWriter.cpp b/llvm/lib/MC/XCOFFObjectWriter.cpp
index 207c3e65757aa1..41bb312946b89c 100644
--- a/llvm/lib/MC/XCOFFObjectWriter.cpp
+++ b/llvm/lib/MC/XCOFFObjectWriter.cpp
@@ -667,9 +667,9 @@ void XCOFFObjectWriter::recordRelocation(MCAssembler &Asm,
   auto getIndex = [this](const MCSymbol *Sym,
                          const MCSectionXCOFF *ContainingCsect) {
     // Fixup relocation flag for AIX TLS local-dynamic mode.
-    if (Sym->getName().equals("_Renamed..5f24__TLSML[UA]")) {
+    if (Sym->getName() == "_Renamed..5f24__TLSML[UA]") {
       for (auto Iter : SymbolIndexMap)
-        if (Iter.first->getName().equals("_Renamed..5f24__TLSML[TC]"))
+        if (Iter.first->getName() == "_Renamed..5f24__TLSML[TC]")
           return Iter.second;
       llvm_unreachable("For AIX TLS local-dynamic mode: "
                        "_Renamed..5f24__TLSML[TC] not found.");
@@ -1211,7 +1211,7 @@ void XCOFFObjectWriter::writeSymbolTable(MCAssembler &Asm,
 
   for (const auto &Csect : UndefinedCsects) {
     // AIX does not need to emit for the _$TLSML symbol.
-    if (Csect.getSymbolTableName().equals(StringRef("_$TLSML")))
+    if (Csect.getSymbolTableName() == "_$TLSML")
       continue;
     writeSymbolEntryForControlSection(Csect, XCOFF::ReservedSectionNum::N_UNDEF,
                                       Csect.MCSec->getStorageClass());
@@ -1432,7 +1432,7 @@ void XCOFFObjectWriter::assignAddressesAndIndices(MCAssembler &Asm,
   // Calculate indices for undefined symbols.
   for (auto &Csect : UndefinedCsects) {
     // AIX does not need to emit for the _$TLSML symbol.
-    if (Csect.getSymbolTableName().equals(StringRef("_$TLSML")))
+    if (Csect.getSymbolTableName() == "_$TLSML")
       continue;
     Csect.Size = 0;
     Csect.Address = 0;
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index f8f097fe38994e..87fc30279bcd36 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -858,8 +858,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     if (Flag == PPCII::MO_TLSGD_FLAG || Flag == PPCII::MO_GOT_TLSGD_PCREL_FLAG)
       return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGD;
     if (MO.getTargetFlags() & PPCII::MO_TLSLD_FLAG) {
-      if (IsAIX && MO.isGlobal() &&
-          (MO.getGlobal()->getName().equals("_$TLSML")))
+      if (IsAIX && MO.isGlobal() && MO.getGlobal()->getName() == "_$TLSML")
         // FIXME: Due to the size limit of MachineOperand::SubReg_TargetFlags,
         // hacked this flag which should have been named MO_TLSLDM_FLAG: on AIX
         // the ML relocation type is only valid for a reference to a TOC symbol
@@ -2200,7 +2199,7 @@ void PPCAIXAsmPrinter::emitLinkage(const GlobalValue *GV,
   }
 
   // Do not emit _$TLSML symbol.
-  if (GVSym->getName().equals(StringRef("_Renamed..5f24__TLSML[UA]")))
+  if (GVSym->getName() == "_Renamed..5f24__TLSML[UA]")
     return;
 
   OutStreamer->emitXCOFFSymbolLinkageWithVisibility(GVSym, LinkageAttr,
diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index a2843ca0a350d8..452a7a2dcf4168 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -212,8 +212,7 @@ namespace {
                        I != IE; ++I)
                     if (LoadFromTocs.count(&*I)) {
                       if (I->getOperand(1).isGlobal() &&
-                          I->getOperand(1).getGlobal()->getName().equals(
-                              "_$TLSML"))
+                          I->getOperand(1).getGlobal()->getName() == "_$TLSML")
                         TLSMLIter = I;
                       else
                         OffsetIter = I;

>From 696a337df65303a4c1004990b4dc1000bd79f0ff Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Fri, 27 Oct 2023 10:34:55 -0400
Subject: [PATCH 15/31] Correct SMC setting for the _$TLSML symbol

---
 llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp | 7 +++++++
 llvm/lib/MC/XCOFFObjectWriter.cpp                 | 8 --------
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp         | 2 +-
 3 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 3cf59bcd09720a..835bc03150495e 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -2403,6 +2403,13 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForExternalReference(
   SmallString<128> Name;
   getNameWithPrefix(Name, GO, TM);
 
+  // AIX TLS local-dynamic requires the setting for the specific symbol name.
+  if (GO->hasName() && GO->getName() == "_$TLSML") {
+    return getContext().getXCOFFSection(
+        Name, SectionKind::getData(),
+        XCOFF::CsectProperties(XCOFF::XMC_TC, XCOFF::XTY_SD));
+  }
+
   XCOFF::StorageMappingClass SMC =
       isa<Function>(GO) ? XCOFF::XMC_DS : XCOFF::XMC_UA;
   if (GO->isThreadLocal())
diff --git a/llvm/lib/MC/XCOFFObjectWriter.cpp b/llvm/lib/MC/XCOFFObjectWriter.cpp
index 41bb312946b89c..f483e3d5fecad8 100644
--- a/llvm/lib/MC/XCOFFObjectWriter.cpp
+++ b/llvm/lib/MC/XCOFFObjectWriter.cpp
@@ -666,14 +666,6 @@ void XCOFFObjectWriter::recordRelocation(MCAssembler &Asm,
                                          uint64_t &FixedValue) {
   auto getIndex = [this](const MCSymbol *Sym,
                          const MCSectionXCOFF *ContainingCsect) {
-    // Fixup relocation flag for AIX TLS local-dynamic mode.
-    if (Sym->getName() == "_Renamed..5f24__TLSML[UA]") {
-      for (auto Iter : SymbolIndexMap)
-        if (Iter.first->getName() == "_Renamed..5f24__TLSML[TC]")
-          return Iter.second;
-      llvm_unreachable("For AIX TLS local-dynamic mode: "
-                       "_Renamed..5f24__TLSML[TC] not found.");
-    }
     // If we could not find the symbol directly in SymbolIndexMap, this symbol
     // could either be a temporary symbol or an undefined symbol. In this case,
     // we would need to have the relocation reference its csect instead.
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 87fc30279bcd36..cbff54c3a9e033 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -2199,7 +2199,7 @@ void PPCAIXAsmPrinter::emitLinkage(const GlobalValue *GV,
   }
 
   // Do not emit _$TLSML symbol.
-  if (GVSym->getName() == "_Renamed..5f24__TLSML[UA]")
+  if (GVSym->getName() == "_Renamed..5f24__TLSML[TC]")
     return;
 
   OutStreamer->emitXCOFFSymbolLinkageWithVisibility(GVSym, LinkageAttr,

>From fd03235e23069b10e943bb6651ebca662bea0cc7 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Fri, 27 Oct 2023 10:52:51 -0400
Subject: [PATCH 16/31] Simplify and add check

---
 llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp | 6 ++----
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp                | 3 ++-
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index cc50a29d67a723..5b8f7f06619047 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -242,12 +242,10 @@ class PPCTargetAsmStreamer : public PPCTargetStreamer {
           Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGDM ||
           Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSIE ||
           Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLE ||
-          Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLD)
+          Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLD ||
+          Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSML)
         OS << "\t.tc " << TCSym->getName() << "," << XSym->getName() << "@"
            << MCSymbolRefExpr::getVariantKindName(Kind) << '\n';
-      else if (Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSML)
-        OS << "\t.tc " << TCSym->getName() << "," << TCSym->getName() << "@"
-           << MCSymbolRefExpr::getVariantKindName(Kind) << '\n';
       else
         OS << "\t.tc " << TCSym->getName() << "," << XSym->getName() << '\n';
 
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index cbff54c3a9e033..daa4f5cc86911a 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -858,7 +858,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     if (Flag == PPCII::MO_TLSGD_FLAG || Flag == PPCII::MO_GOT_TLSGD_PCREL_FLAG)
       return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGD;
     if (MO.getTargetFlags() & PPCII::MO_TLSLD_FLAG) {
-      if (IsAIX && MO.isGlobal() && MO.getGlobal()->getName() == "_$TLSML")
+      if (IsAIX && MO.isGlobal() && MO.getGlobal()->hasName() &&
+          MO.getGlobal()->getName() == "_$TLSML")
         // FIXME: Due to the size limit of MachineOperand::SubReg_TargetFlags,
         // hacked this flag which should have been named MO_TLSLDM_FLAG: on AIX
         // the ML relocation type is only valid for a reference to a TOC symbol

>From 3eb604e5b4b171388515ad81b971dc70f14b662e Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Fri, 27 Oct 2023 11:01:32 -0400
Subject: [PATCH 17/31] misc

---
 llvm/lib/MC/XCOFFObjectWriter.cpp         | 4 ++--
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/MC/XCOFFObjectWriter.cpp b/llvm/lib/MC/XCOFFObjectWriter.cpp
index f483e3d5fecad8..89d1b667437032 100644
--- a/llvm/lib/MC/XCOFFObjectWriter.cpp
+++ b/llvm/lib/MC/XCOFFObjectWriter.cpp
@@ -1202,7 +1202,7 @@ void XCOFFObjectWriter::writeSymbolTable(MCAssembler &Asm,
                      /*NumberOfAuxEntries=*/0);
 
   for (const auto &Csect : UndefinedCsects) {
-    // AIX does not need to emit for the _$TLSML symbol.
+    // AIX does not need to emit the _$TLSML symbol.
     if (Csect.getSymbolTableName() == "_$TLSML")
       continue;
     writeSymbolEntryForControlSection(Csect, XCOFF::ReservedSectionNum::N_UNDEF,
@@ -1423,7 +1423,7 @@ void XCOFFObjectWriter::assignAddressesAndIndices(MCAssembler &Asm,
 
   // Calculate indices for undefined symbols.
   for (auto &Csect : UndefinedCsects) {
-    // AIX does not need to emit for the _$TLSML symbol.
+    // AIX does not need to emit the _$TLSML symbol.
     if (Csect.getSymbolTableName() == "_$TLSML")
       continue;
     Csect.Size = 0;
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index daa4f5cc86911a..d8f2c9bb6ed45e 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -2199,7 +2199,7 @@ void PPCAIXAsmPrinter::emitLinkage(const GlobalValue *GV,
     }
   }
 
-  // Do not emit _$TLSML symbol.
+  // Do not emit the _$TLSML symbol.
   if (GVSym->getName() == "_Renamed..5f24__TLSML[TC]")
     return;
 

>From aa7609ca937dd5d3b2c80133411bc3c03d32a8ee Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Mon, 30 Oct 2023 04:44:54 -0400
Subject: [PATCH 18/31] Simplify logic by move the XMC_TC setting for the
 _$TLSML symbol into getSectionForTOCEntry()

---
 llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp | 13 ++++++++-----
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp         |  9 ---------
 2 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 835bc03150495e..acb0eeb2ee9683 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -2667,13 +2667,16 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForTOCEntry(
   // the chance of needing -bbigtoc is decreased. Also, the toc-entry for
   // EH info is never referenced directly using instructions so it can be
   // allocated with TE storage-mapping class.
+  // The "_$TLSML" symbol for TLS local-dynamic mode requires XMC_TC.
   return getContext().getXCOFFSection(
       cast<MCSymbolXCOFF>(Sym)->getSymbolTableName(), SectionKind::getData(),
-      XCOFF::CsectProperties((TM.getCodeModel() == CodeModel::Large ||
-                              cast<MCSymbolXCOFF>(Sym)->isEHInfo())
-                                 ? XCOFF::XMC_TE
-                                 : XCOFF::XMC_TC,
-                             XCOFF::XTY_SD));
+      XCOFF::CsectProperties(
+          ((TM.getCodeModel() == CodeModel::Large &&
+            cast<MCSymbolXCOFF>(Sym)->getSymbolTableName() != "_$TLSML") ||
+           cast<MCSymbolXCOFF>(Sym)->isEHInfo())
+              ? XCOFF::XMC_TE
+              : XCOFF::XMC_TC,
+          XCOFF::XTY_SD));
 }
 
 MCSection *TargetLoweringObjectFileXCOFF::getSectionForLSDA(
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index d8f2c9bb6ed45e..fdab52afe2d81d 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -2879,15 +2879,6 @@ void PPCAIXAsmPrinter::emitEndOfAsmFile(Module &M) {
       MCSymbol *S = OutContext.getOrCreateSymbol(Name);
       TCEntry = cast<MCSectionXCOFF>(
           getObjFileLowering().getSectionForTOCEntry(S, TM));
-    } else if (I.first.second ==
-               MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSML) {
-      // AIX assembler expects the TC storage-mapping class for the "_$TLSML"
-      // symbol.
-      MCSection *MCSect = getObjFileLowering().getContext().getXCOFFSection(
-          cast<MCSymbolXCOFF>(I.first.first)->getSymbolTableName(),
-          SectionKind::getData(),
-          XCOFF::CsectProperties(XCOFF::XMC_TC, XCOFF::XTY_SD));
-      TCEntry = cast<MCSectionXCOFF>(MCSect);
     } else {
       TCEntry = cast<MCSectionXCOFF>(
           getObjFileLowering().getSectionForTOCEntry(I.first.first, TM));

>From bb82150c01484705c20f8715cfd6ba94e1f1c8fc Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Mon, 30 Oct 2023 04:50:47 -0400
Subject: [PATCH 19/31] Add check to make sure GV's name is defined before
 access

---
 llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 452a7a2dcf4168..93701d340f8001 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -212,6 +212,7 @@ namespace {
                        I != IE; ++I)
                     if (LoadFromTocs.count(&*I)) {
                       if (I->getOperand(1).isGlobal() &&
+                          I->getOperand(1).getGlobal()->hasName() &&
                           I->getOperand(1).getGlobal()->getName() == "_$TLSML")
                         TLSMLIter = I;
                       else

>From 335186cf8eecd8d9e189a474c857cfa05a3a828f Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Mon, 30 Oct 2023 20:40:27 -0400
Subject: [PATCH 20/31] update

---
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp   | 1 -
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 1 -
 2 files changed, 2 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index fdab52afe2d81d..063a3b88ae1d19 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -866,7 +866,6 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
         // from the symbol itself, and right now its only user is the symbol
         // "_$TLSML". The symbol name is used to decide that R_TLSML is
         // expected.
-        // FIX this once #69695 committed.
         return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSML;
       if (IsAIX)
         return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLD;
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index a15057cd05e8a6..e0caf73601972b 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -3429,7 +3429,6 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
         dyn_cast_or_null<GlobalVariable>(M->getOrInsertGlobal(
             StringRef("_$TLSML"), PointerType::getUnqual(*DAG.getContext())));
     assert(TLSGV && "Not able to create GV for _$TLSML.");
-    // FIXME: create MO_TLSML_FLAG once #69695 committed.
     SDValue ModuleHandleTGA =
         DAG.getTargetGlobalAddress(TLSGV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
     SDValue ModuleHandleTOC = getTOCEntry(DAG, dl, ModuleHandleTGA);

>From df99beba80687844d31cfa1a6e1503e638680416 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Thu, 7 Dec 2023 08:59:06 -0500
Subject: [PATCH 21/31] Apply target flag change && update according to
 comments

---
 .../CodeGen/TargetLoweringObjectFileImpl.cpp  |  3 ++-
 .../PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp  |  2 +-
 llvm/lib/Target/PowerPC/PPC.h                 |  6 ++++++
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp     | 19 +++++--------------
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   |  2 +-
 llvm/lib/Target/PowerPC/PPCInstrInfo.cpp      |  1 +
 llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp | 12 ++++++------
 .../test/CodeGen/PowerPC/aix-tls-gd-double.ll |  8 ++++----
 8 files changed, 26 insertions(+), 27 deletions(-)

diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index acb0eeb2ee9683..550358fda1eb04 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -2403,7 +2403,8 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForExternalReference(
   SmallString<128> Name;
   getNameWithPrefix(Name, GO, TM);
 
-  // AIX TLS local-dynamic requires the setting for the specific symbol name.
+  // AIX TLS local-dynamic does not need the external reference for the
+  // "_$TLSML" symbol.
   if (GO->hasName() && GO->getName() == "_$TLSML") {
     return getContext().getXCOFFSection(
         Name, SectionKind::getData(),
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 5b8f7f06619047..b849b7be7b7be8 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -234,7 +234,7 @@ class PPCTargetAsmStreamer : public PPCTargetStreamer {
       // On AIX, we have TLS variable offsets (symbol@({gd|ie|le|ld}) depending
       // on the TLS access method (or model). For the general-dynamic access
       // method, we also have region handle (symbol at m) for each variable. For
-      // local-dynamic, there is a module handle (__TLSML[TC]@ml) for all
+      // local-dynamic, there is a module handle (_$TLSML[TC]@ml) for all
       // variables. Finally for local-exec and initial-exec, we have a thread
       // pointer, in r13 for 64-bit mode and returned by .__get_tpointer for
       // 32-bit mode.
diff --git a/llvm/lib/Target/PowerPC/PPC.h b/llvm/lib/Target/PowerPC/PPC.h
index 3d9ea560819385..b625af775ec0a1 100644
--- a/llvm/lib/Target/PowerPC/PPC.h
+++ b/llvm/lib/Target/PowerPC/PPC.h
@@ -139,6 +139,12 @@ class ModulePass;
     /// and Local Exec models.
     MO_TPREL_FLAG,
 
+    /// MO_TLSLDM_FLAG - on AIX the ML relocation type is only valid for a
+    /// reference to a TOC symbol from the symbol itself, and right now its only
+    /// user is the symbol "_$TLSML". The symbol name is used to decide that
+    /// R_TLSML is expected.
+    MO_TLSLDM_FLAG,
+
     /// MO_TLSLD_FLAG - If this bit is set the symbol reference is relative to
     /// TLS Local Dynamic model.
     MO_TLSLD_FLAG,
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 063a3b88ae1d19..14b5b3f13beaa5 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -857,19 +857,10 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
       return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGDM;
     if (Flag == PPCII::MO_TLSGD_FLAG || Flag == PPCII::MO_GOT_TLSGD_PCREL_FLAG)
       return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGD;
-    if (MO.getTargetFlags() & PPCII::MO_TLSLD_FLAG) {
-      if (IsAIX && MO.isGlobal() && MO.getGlobal()->hasName() &&
-          MO.getGlobal()->getName() == "_$TLSML")
-        // FIXME: Due to the size limit of MachineOperand::SubReg_TargetFlags,
-        // hacked this flag which should have been named MO_TLSLDM_FLAG: on AIX
-        // the ML relocation type is only valid for a reference to a TOC symbol
-        // from the symbol itself, and right now its only user is the symbol
-        // "_$TLSML". The symbol name is used to decide that R_TLSML is
-        // expected.
-        return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSML;
-      if (IsAIX)
-        return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLD;
-    }
+    if (Flag == PPCII::MO_TLSLD_FLAG && IsAIX)
+      return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLD;
+    if (Flag == PPCII::MO_TLSLDM_FLAG && IsAIX)
+      return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSML;
     return MCSymbolRefExpr::VariantKind::VK_None;
   };
 
@@ -1384,7 +1375,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
   case PPC::GETtlsMOD64AIX:
     // Transform: %r3 = GETtlsMODNNAIX %r3 (for NN == 32/64).
     // Into: BLA .__tls_get_mod()
-    // Input parameter is a module handle (__TLSML[TC]@ml) for all variables.
+    // Input parameter is a module handle (_$TLSML[TC]@ml) for all variables.
   case PPC::GETtlsADDR:
     // Transform: %x3 = GETtlsADDR %x3, @sym
     // Into: BL8_NOP_TLS __tls_get_addr(sym at tlsgd)
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index e0caf73601972b..afbb6df193fd15 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -3430,7 +3430,7 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
             StringRef("_$TLSML"), PointerType::getUnqual(*DAG.getContext())));
     assert(TLSGV && "Not able to create GV for _$TLSML.");
     SDValue ModuleHandleTGA =
-        DAG.getTargetGlobalAddress(TLSGV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
+        DAG.getTargetGlobalAddress(TLSGV, dl, PtrVT, 0, PPCII::MO_TLSLDM_FLAG);
     SDValue ModuleHandleTOC = getTOCEntry(DAG, dl, ModuleHandleTGA);
     SDValue ModuleHandle =
         DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, ModuleHandleTOC);
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 68cc76a98ff839..1c610b269d32d8 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -2965,6 +2965,7 @@ PPCInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
       {MO_PCREL_OPT_FLAG, "ppc-opt-pcrel"},
       {MO_TLSGD_FLAG, "ppc-tlsgd"},
       {MO_TPREL_FLAG, "ppc-tprel"},
+      {MO_TLSLDM_FLAG, "ppc-tlsldm"},
       {MO_TLSLD_FLAG, "ppc-tlsld"},
       {MO_TLSGDM_FLAG, "ppc-tlsgdm"},
       {MO_GOT_TLSGD_PCREL_FLAG, "ppc-got-tlsgd-pcrel"},
diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 93701d340f8001..dbe8676e62a1ac 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -162,11 +162,11 @@ namespace {
         if (IsAIX) {
           if (IsTLSLDAIXMI) {
             // The relative order between the LoadOffset at toc node, and the
-            // ._tls_get_mod node is being tuned here. It is better to put the
+            // .__tls_get_mod node is being tuned here. It is better to put the
             // LoadOffset at toc node after the call, since the LoadOffset at toc node
             // can use clobbers r4/r5. Search for the pattern of two Load at toc
             // nodes, and then move the LoadOffset at toc node right before the
-            // node that uses the OutReg of the ._tls_get_mod node.
+            // node that uses the OutReg of the .__tls_get_mod node.
             unsigned LDTocOp =
                 Is64Bit ? (IsLargeModel ? PPC::LDtocL : PPC::LDtoc)
                         : (IsLargeModel ? PPC::LWZtocL : PPC::LWZtoc);
@@ -203,7 +203,7 @@ namespace {
 
                 // Check the two Load at toc: one should be _$TLSML, and the other
                 // will be moved before the node that uses the OutReg of the
-                // ._tls_get_mod node.
+                // .__tls_get_mod node.
                 if (LoadFromTocs.size() == 2) {
                   MachineBasicBlock::iterator TLSMLIter = MBB.end();
                   MachineBasicBlock::iterator OffsetIter = MBB.end();
@@ -211,9 +211,9 @@ namespace {
                                                    IE = MBB.end();
                        I != IE; ++I)
                     if (LoadFromTocs.count(&*I)) {
-                      if (I->getOperand(1).isGlobal() &&
-                          I->getOperand(1).getGlobal()->hasName() &&
-                          I->getOperand(1).getGlobal()->getName() == "_$TLSML")
+                      MachineOperand MO = I->getOperand(1);
+                      if (MO.isGlobal() && MO.getGlobal()->hasName() &&
+                          MO.getGlobal()->getName() == "_$TLSML")
                         TLSMLIter = I;
                       else
                         OffsetIter = I;
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll
index 194273738c6499..ae41b6b1301064 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll
@@ -156,7 +156,7 @@ define void @storesTIInit(double %Val) #0 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    lwz 3, L..C4(2) # target-flags(ppc-tlsld) @"_$TLSML"
+; SMALL32-NEXT:    lwz 3, L..C4(2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C5(2) # target-flags(ppc-tlsld) @TIInit
@@ -186,7 +186,7 @@ define void @storesTIInit(double %Val) #0 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    ld 3, L..C4(2) # target-flags(ppc-tlsld) @"_$TLSML"
+; SMALL64-NEXT:    ld 3, L..C4(2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C5(2) # target-flags(ppc-tlsld) @TIInit
@@ -452,7 +452,7 @@ define double @loadsTIInit() #1 {
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    lwz 3, L..C4(2) # target-flags(ppc-tlsld) @"_$TLSML"
+; SMALL32-NEXT:    lwz 3, L..C4(2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL32-NEXT:    lwz 4, L..C5(2) # target-flags(ppc-tlsld) @TIInit
@@ -489,7 +489,7 @@ define double @loadsTIInit() #1 {
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    ld 3, L..C4(2) # target-flags(ppc-tlsld) @"_$TLSML"
+; SMALL64-NEXT:    ld 3, L..C4(2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL64-NEXT:    ld 4, L..C5(2) # target-flags(ppc-tlsld) @TIInit

>From 368e3e8ac4fae6072db7968fd581b38b56795266 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Mon, 11 Dec 2023 00:59:54 -0500
Subject: [PATCH 22/31] Address comment: Add flag check before string compare.

---
 llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp | 3 ++-
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp         | 3 ++-
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp       | 1 +
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 550358fda1eb04..160beb1f1480d7 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -2405,7 +2405,8 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForExternalReference(
 
   // AIX TLS local-dynamic does not need the external reference for the
   // "_$TLSML" symbol.
-  if (GO->hasName() && GO->getName() == "_$TLSML") {
+  if (GO->getThreadLocalMode() == GlobalVariable::LocalDynamicTLSModel &&
+      GO->hasName() && GO->getName() == "_$TLSML") {
     return getContext().getXCOFFSection(
         Name, SectionKind::getData(),
         XCOFF::CsectProperties(XCOFF::XMC_TC, XCOFF::XTY_SD));
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 14b5b3f13beaa5..a74bcb653ed067 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -2190,7 +2190,8 @@ void PPCAIXAsmPrinter::emitLinkage(const GlobalValue *GV,
   }
 
   // Do not emit the _$TLSML symbol.
-  if (GVSym->getName() == "_Renamed..5f24__TLSML[TC]")
+  if (GV->getThreadLocalMode() == GlobalVariable::LocalDynamicTLSModel &&
+      GV->hasName() && GV->getName() == "_$TLSML")
     return;
 
   OutStreamer->emitXCOFFSymbolLinkageWithVisibility(GVSym, LinkageAttr,
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index afbb6df193fd15..68d72320fb54f8 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -3428,6 +3428,7 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
     GlobalVariable *TLSGV =
         dyn_cast_or_null<GlobalVariable>(M->getOrInsertGlobal(
             StringRef("_$TLSML"), PointerType::getUnqual(*DAG.getContext())));
+    TLSGV->setThreadLocalMode(GlobalVariable::LocalDynamicTLSModel);
     assert(TLSGV && "Not able to create GV for _$TLSML.");
     SDValue ModuleHandleTGA =
         DAG.getTargetGlobalAddress(TLSGV, dl, PtrVT, 0, PPCII::MO_TLSLDM_FLAG);

>From 6a2bfd851efad58b4a0ab84025547369b623ebcb Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Tue, 12 Dec 2023 01:54:55 -0500
Subject: [PATCH 23/31] Update comment.

---
 llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 160beb1f1480d7..58b475368fe789 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -2669,7 +2669,8 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForTOCEntry(
   // the chance of needing -bbigtoc is decreased. Also, the toc-entry for
   // EH info is never referenced directly using instructions so it can be
   // allocated with TE storage-mapping class.
-  // The "_$TLSML" symbol for TLS local-dynamic mode requires XMC_TC.
+  // The "_$TLSML" symbol for TLS local-dynamic mode requires XMC_TC, otherwise
+  // the AIX assembler will complain.
   return getContext().getXCOFFSection(
       cast<MCSymbolXCOFF>(Sym)->getSymbolTableName(), SectionKind::getData(),
       XCOFF::CsectProperties(

>From 9b0b1d2092f01c71d8c4d08ba88cac807ffd4342 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Mon, 1 Jan 2024 21:55:30 -0500
Subject: [PATCH 24/31] [NFC] Add comments

---
 llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index dbe8676e62a1ac..7fc5ea2cacbfab 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -119,9 +119,13 @@ namespace {
           Opc2 = PPC::GETtlsldADDR32;
           break;
         case PPC::TLSLDAIX:
+          // TLSLDAIX is expanded to one copy and GET_TLS_MOD, so we only set
+          // Opc2 here.
           Opc2 = PPC::GETtlsMOD32AIX;
           break;
         case PPC::TLSLDAIX8:
+          // TLSLDAIX8 is expanded to one copy and GET_TLS_MOD, so we only set
+          // Opc2 here.
           Opc2 = PPC::GETtlsMOD64AIX;
           break;
         case PPC::TLSGDAIX8:
@@ -182,14 +186,20 @@ namespace {
                 if (Uses.count(&*UseIter))
                   break;
 
+              // Got some work to do when UseIter pointing to valid node. Check
+              // the pattern and do the movement if match.
               if (UseIter != MBB.end()) {
-                // Collect associated Load at toc nodes.
+                // Collect associated Load at toc nodes. Use hasOneDef to guard
+                // against unexpected scenarios.
                 std::set<MachineInstr *> LoadFromTocs;
                 for (MachineOperand &MO : UseIter->operands())
                   if (MO.isReg() && MO.isUse()) {
                     if (RegInfo.hasOneDef(MO.getReg())) {
                       MachineInstr *Temp =
                           RegInfo.getOneDef(MO.getReg())->getParent();
+                      // For current TLSLDAIX node, get the Load at toc node for
+                      // the InReg. Otherwise, Temp probably pointed to the
+                      // LoadOffset at toc node that we would like to move.
                       if (Temp == &MI && RegInfo.hasOneDef(InReg))
                         Temp = RegInfo.getOneDef(InReg)->getParent();
                       if (Temp->getOpcode() == LDTocOp)
@@ -207,6 +217,9 @@ namespace {
                 if (LoadFromTocs.size() == 2) {
                   MachineBasicBlock::iterator TLSMLIter = MBB.end();
                   MachineBasicBlock::iterator OffsetIter = MBB.end();
+                  // Make sure the two LoadFromTocs are within current BB, and
+                  // one of them from the "_$TLSML" pseudo symbol, while the
+                  // other from the variable.
                   for (MachineBasicBlock::iterator I = MBB.begin(),
                                                    IE = MBB.end();
                        I != IE; ++I)
@@ -218,6 +231,8 @@ namespace {
                       else
                         OffsetIter = I;
                     }
+                  // If both two iterators are valid, we should have identified
+                  // the scenario, and do the movement.
                   if (TLSMLIter != MBB.end() && OffsetIter != MBB.end())
                     OffsetIter->moveBefore(&*UseIter);
                 }

>From 62c1bfec6860080930e2951a00199bb1ce3f543f Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Wed, 17 Jan 2024 03:40:40 -0500
Subject: [PATCH 25/31] Remove dummy logic: Since
 `getSectionForExternalReference` explicitly set XCOFF::XTY_SD for the
 `_$TLSML` symbol reference, and that only CSType `XCOFF::XTY_ER` symbol
 references are added into `UndefinedCsects`, the checks inside
 `executePostLayoutBinding` are redundant.

Update test case to make sure external reference to the `_$TLSML` symbol
is not observed.
---
 llvm/lib/MC/XCOFFObjectWriter.cpp                  | 6 ------
 llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll | 2 ++
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/MC/XCOFFObjectWriter.cpp b/llvm/lib/MC/XCOFFObjectWriter.cpp
index 89d1b667437032..d46bbaf7576571 100644
--- a/llvm/lib/MC/XCOFFObjectWriter.cpp
+++ b/llvm/lib/MC/XCOFFObjectWriter.cpp
@@ -1202,9 +1202,6 @@ void XCOFFObjectWriter::writeSymbolTable(MCAssembler &Asm,
                      /*NumberOfAuxEntries=*/0);
 
   for (const auto &Csect : UndefinedCsects) {
-    // AIX does not need to emit the _$TLSML symbol.
-    if (Csect.getSymbolTableName() == "_$TLSML")
-      continue;
     writeSymbolEntryForControlSection(Csect, XCOFF::ReservedSectionNum::N_UNDEF,
                                       Csect.MCSec->getStorageClass());
   }
@@ -1423,9 +1420,6 @@ void XCOFFObjectWriter::assignAddressesAndIndices(MCAssembler &Asm,
 
   // Calculate indices for undefined symbols.
   for (auto &Csect : UndefinedCsects) {
-    // AIX does not need to emit the _$TLSML symbol.
-    if (Csect.getSymbolTableName() == "_$TLSML")
-      continue;
     Csect.Size = 0;
     Csect.Address = 0;
     Csect.SymbolTableIndex = SymbolTableIndex;
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll b/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll
index c745e17636da11..22349337f18908 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-local-dynamic.ll
@@ -351,6 +351,8 @@ entry:
 
 ; SMALL:          .extern .__tls_get_mod[PR]
 ; LARGE:          .extern .__tls_get_mod[PR]
+; SMALL-NOT:      .extern _Renamed..5f24__TLSML[TC]
+; LARGE-NOT:      .extern _Renamed..5f24__TLSML[TC]
 
 ; SMALL:        [[ModuleHandleL]]:
 ; SMALL-NEXT:   .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml

>From a87fe3ec4ee947a92e1885ca3c33f68a742a4e82 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Wed, 7 Feb 2024 08:40:58 -0500
Subject: [PATCH 26/31] [NFC] Update comments.

---
 llvm/lib/Target/PowerPC/PPC.h                 |  2 +-
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp     |  9 ++---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   |  4 ++-
 llvm/lib/Target/PowerPC/PPCISelLowering.h     |  9 +++--
 llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp | 35 +++++++++++--------
 5 files changed, 33 insertions(+), 26 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPC.h b/llvm/lib/Target/PowerPC/PPC.h
index b625af775ec0a1..eb8886dcc9075c 100644
--- a/llvm/lib/Target/PowerPC/PPC.h
+++ b/llvm/lib/Target/PowerPC/PPC.h
@@ -142,7 +142,7 @@ class ModulePass;
     /// MO_TLSLDM_FLAG - on AIX the ML relocation type is only valid for a
     /// reference to a TOC symbol from the symbol itself, and right now its only
     /// user is the symbol "_$TLSML". The symbol name is used to decide that
-    /// R_TLSML is expected.
+    /// the R_TLSML relocation is expected.
     MO_TLSLDM_FLAG,
 
     /// MO_TLSLD_FLAG - If this bit is set the symbol reference is relative to
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index a74bcb653ed067..9396ca22dacf86 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -857,6 +857,9 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
       return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGDM;
     if (Flag == PPCII::MO_TLSGD_FLAG || Flag == PPCII::MO_GOT_TLSGD_PCREL_FLAG)
       return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGD;
+    // For local-dynamic TLS access on AIX, we have one TOC entry for the symbol
+    // (the variable offset) and one shared TOC entry for the module handle.
+    // They are differentiated by MO_TLSLD_FLAG and MO_TLSLDM_FLAG.
     if (Flag == PPCII::MO_TLSLD_FLAG && IsAIX)
       return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLD;
     if (Flag == PPCII::MO_TLSLDM_FLAG && IsAIX)
@@ -3010,13 +3013,11 @@ void PPCAIXAsmPrinter::emitInstruction(const MachineInstr *MI) {
   }
   case PPC::GETtlsMOD32AIX:
   case PPC::GETtlsMOD64AIX:
-    // A reference to .__tls_get_mod is unknown to the assembler so we need to
-    // emit an external symbol reference.
   case PPC::GETtlsTpointer32AIX:
   case PPC::GETtlsADDR64AIX:
   case PPC::GETtlsADDR32AIX: {
-    // A reference to .__tls_get_addr/.__get_tpointer is unknown to the
-    // assembler so we need to emit an external symbol reference.
+    // A reference to .__tls_get_mod/.__tls_get_addr/.__get_tpointer is unknown
+    // to the assembler so we need to emit an external symbol reference.
     MCSymbol *TlsGetAddr =
         createMCSymbolForTlsGetAddr(OutContext, MI->getOpcode());
     ExtSymSDNodeSymbols.insert(TlsGetAddr);
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 68d72320fb54f8..8206e90e6f8f8b 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -3424,6 +3424,8 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
 
     SDValue VariableOffsetTGA =
         DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
+    SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
+
     Module *M = DAG.getMachineFunction().getFunction().getParent();
     GlobalVariable *TLSGV =
         dyn_cast_or_null<GlobalVariable>(M->getOrInsertGlobal(
@@ -3435,7 +3437,7 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
     SDValue ModuleHandleTOC = getTOCEntry(DAG, dl, ModuleHandleTGA);
     SDValue ModuleHandle =
         DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, ModuleHandleTOC);
-    SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
+
     return DAG.getNode(ISD::ADD, dl, PtrVT, ModuleHandle, VariableOffset);
   }
 
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index f03e59ccf78008..b64af2ff6c0bfe 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -379,12 +379,11 @@ namespace llvm {
     /// produces a call to .__tls_get_mod(_$TLSML\@ml).
     GET_TLS_MOD_AIX,
 
-    /// [GP|G8]RC = TLSLD_AIX, TOC_ENTRY(variable offset)
-    /// Op that internally creates a TOC entry for the "_$TLSML" symbol, and
+    /// [GP|G8]RC = TLSLD_AIX, TOC_ENTRY(module handle)
+    /// Op that with single input of module handle TOC entry in R3, and
     /// generates a GET_TLS_MOD_AIX node which will be expanded into a call to
-    /// .__tls_get_mod, and then adds the variable offset with the result from
-    /// the call. This node is used in both 32-bit and 64-bit modes. The only
-    /// difference is the register class.
+    /// .__tls_get_mod. This node is used in both 32-bit and 64-bit modes. The
+    /// only difference is the register class.
     TLSLD_AIX,
 
     /// G8RC = ADDIS_TLSLD_HA %x2, Symbol - For the local-dynamic TLS
diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 7fc5ea2cacbfab..5f318edf23655a 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -165,12 +165,14 @@ namespace {
 
         if (IsAIX) {
           if (IsTLSLDAIXMI) {
-            // The relative order between the LoadOffset at toc node, and the
-            // .__tls_get_mod node is being tuned here. It is better to put the
-            // LoadOffset at toc node after the call, since the LoadOffset at toc node
-            // can use clobbers r4/r5. Search for the pattern of two Load at toc
-            // nodes, and then move the LoadOffset at toc node right before the
-            // node that uses the OutReg of the .__tls_get_mod node.
+            // The relative order between the LoadOffset at toc node (for the
+            // variable offset), and the .__tls_get_mod node is being tuned
+            // here. It is better to put the LoadOffset at toc node after the call,
+            // since the LoadOffset at toc node can use clobbers r4/r5. Search for
+            // the pattern of two Load at toc nodes (either for the variable offset
+            // or for the module handle), and then move the LoadOffset at toc node
+            // right before the node that uses the OutReg of the .__tls_get_mod
+            // node.
             unsigned LDTocOp =
                 Is64Bit ? (IsLargeModel ? PPC::LDtocL : PPC::LDtoc)
                         : (IsLargeModel ? PPC::LWZtocL : PPC::LWZtoc);
@@ -179,26 +181,29 @@ namespace {
               // Collect all instructions that use the OutReg.
               for (MachineOperand &MO : RegInfo.use_operands(OutReg))
                 Uses.insert(MO.getParent());
-              // Find the first user (e.g.: lwax/stfdx) within the current BB.
+              // Find the first user (e.g.: lwax/stfdx) of the OutReg within the
+              // current BB.
               MachineBasicBlock::iterator UseIter = MBB.begin();
               for (MachineBasicBlock::iterator IE = MBB.end(); UseIter != IE;
                    ++UseIter)
                 if (Uses.count(&*UseIter))
                   break;
 
-              // Got some work to do when UseIter pointing to valid node. Check
-              // the pattern and do the movement if match.
+              // Additional handling is required when UserIter (the first user
+              // of OutReg) is pointing to a valid node. Check the pattern and
+              // do the movement if the pattern matches.
               if (UseIter != MBB.end()) {
-                // Collect associated Load at toc nodes. Use hasOneDef to guard
+                // Collect associated Load at toc nodes. Use hasOneDef() to guard
                 // against unexpected scenarios.
                 std::set<MachineInstr *> LoadFromTocs;
                 for (MachineOperand &MO : UseIter->operands())
                   if (MO.isReg() && MO.isUse()) {
-                    if (RegInfo.hasOneDef(MO.getReg())) {
+                    Register MOReg = MO.getReg();
+                    if (RegInfo.hasOneDef(MOReg)) {
                       MachineInstr *Temp =
-                          RegInfo.getOneDef(MO.getReg())->getParent();
-                      // For current TLSLDAIX node, get the Load at toc node for
-                      // the InReg. Otherwise, Temp probably pointed to the
+                          RegInfo.getOneDef(MOReg)->getParent();
+                      // For the current TLSLDAIX node, get the Load at toc node
+                      // for the InReg. Otherwise, Temp probably pointed to the
                       // LoadOffset at toc node that we would like to move.
                       if (Temp == &MI && RegInfo.hasOneDef(InReg))
                         Temp = RegInfo.getOneDef(InReg)->getParent();
@@ -238,7 +243,7 @@ namespace {
                 }
               }
             }
-            // The module-handle is copied in r3. The copy is followed by
+            // The module-handle is copied into r3. The copy is followed by
             // GETtlsMOD32AIX/GETtlsMOD64AIX.
             BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR3)
                 .addReg(InReg);

>From 3a3c5b5b1cbb38e18f3a43140d0e19c043aea718 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Mon, 19 Feb 2024 19:49:18 -0500
Subject: [PATCH 27/31] rebase test case

---
 .../PowerPC/aix-tls-ld-xcoff-reloc-large.ll   | 33 +++++++++++++++----
 .../CodeGen/PowerPC/aix-tls-xcoff-reloc.ll    |  3 +-
 2 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-ld-xcoff-reloc-large.ll b/llvm/test/CodeGen/PowerPC/aix-tls-ld-xcoff-reloc-large.ll
index ecb05a5821bc11..73741a210ed298 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-ld-xcoff-reloc-large.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-ld-xcoff-reloc-large.ll
@@ -38,14 +38,14 @@ entry:
 ; RELOC-NEXT: Arch: powerpc64
 ; RELOC-NEXT: AddressSize: 64bit
 ; RELOC-NEXT: Relocations [
-; RELOC:      Virtual Address: 0x12
+; RELOC:      Virtual Address: 0xE
 ; RELOC-NEXT:      Symbol: _$TLSML ([[#NFA+19]])
 ; RELOC-NEXT:      IsSigned: No
 ; RELOC-NEXT:      FixupBitValue: 0
 ; RELOC-NEXT:      Length: 16
 ; RELOC-NEXT:      Type: R_TOCU (0x30)
 ; RELOC-NEXT:    }
-; RELOC:      Virtual Address: 0x16
+; RELOC:      Virtual Address: 0x12
 ; RELOC-NEXT:      Symbol: IThreadLocalVarUninit ([[#NFA+21]])
 ; RELOC-NEXT:      IsSigned: No
 ; RELOC-NEXT:      FixupBitValue: 0
@@ -59,6 +59,13 @@ entry:
 ; RELOC-NEXT:      Length: 16
 ; RELOC-NEXT:      Type: R_TOCL (0x31)
 ; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0x1C
+; RELOC-NEXT:      Symbol: .__tls_get_mod ([[#NFA+1]])
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 26
+; RELOC-NEXT:      Type: R_RBA (0x18)
+; RELOC-NEXT:    }
 ; RELOC:      Virtual Address: 0x22
 ; RELOC-NEXT:      Symbol: IThreadLocalVarUninit ([[#NFA+21]])
 ; RELOC-NEXT:      IsSigned: No
@@ -66,14 +73,14 @@ entry:
 ; RELOC-NEXT:      Length: 16
 ; RELOC-NEXT:      Type: R_TOCL (0x31)
 ; RELOC-NEXT:    }
-; RELOC:      Virtual Address: 0x4E
+; RELOC:      Virtual Address: 0x4A
 ; RELOC-NEXT:      Symbol: _$TLSML ([[#NFA+19]])
 ; RELOC-NEXT:      IsSigned: No
 ; RELOC-NEXT:      FixupBitValue: 0
 ; RELOC-NEXT:      Length: 16
 ; RELOC-NEXT:      Type: R_TOCU (0x30)
 ; RELOC-NEXT:    }
-; RELOC:      Virtual Address: 0x52
+; RELOC:      Virtual Address: 0x4E
 ; RELOC-NEXT:      Symbol: ThreadLocalVarInit ([[#NFA+23]])
 ; RELOC-NEXT:      IsSigned: No
 ; RELOC-NEXT:      FixupBitValue: 0
@@ -87,6 +94,13 @@ entry:
 ; RELOC-NEXT:      Length: 16
 ; RELOC-NEXT:      Type: R_TOCL (0x31)
 ; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0x58
+; RELOC-NEXT:      Symbol: .__tls_get_mod ([[#NFA+1]])
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 26
+; RELOC-NEXT:      Type: R_RBA (0x18)
+; RELOC-NEXT:    }
 ; RELOC:      Virtual Address: 0x5E
 ; RELOC-NEXT:      Symbol: ThreadLocalVarInit ([[#NFA+23]])
 ; RELOC-NEXT:      IsSigned: No
@@ -94,14 +108,14 @@ entry:
 ; RELOC-NEXT:      Length: 16
 ; RELOC-NEXT:      Type: R_TOCL (0x31)
 ; RELOC-NEXT:    }
-; RELOC:      Virtual Address: 0x8E
+; RELOC:      Virtual Address: 0x8A
 ; RELOC-NEXT:      Symbol: _$TLSML ([[#NFA+19]])
 ; RELOC-NEXT:      IsSigned: No
 ; RELOC-NEXT:      FixupBitValue: 0
 ; RELOC-NEXT:      Length: 16
 ; RELOC-NEXT:      Type: R_TOCU (0x30)
 ; RELOC-NEXT:    }
-; RELOC:      Virtual Address: 0x92
+; RELOC:      Virtual Address: 0x8E
 ; RELOC-NEXT:      Symbol: IThreadLocalVarUninit ([[#NFA+21]])
 ; RELOC-NEXT:      IsSigned: No
 ; RELOC-NEXT:      FixupBitValue: 0
@@ -115,6 +129,13 @@ entry:
 ; RELOC-NEXT:      Length: 16
 ; RELOC-NEXT:      Type: R_TOCL (0x31)
 ; RELOC-NEXT:    }
+; RELOC:      Virtual Address: 0x98
+; RELOC-NEXT:      Symbol: .__tls_get_mod (3)
+; RELOC-NEXT:      IsSigned: No
+; RELOC-NEXT:      FixupBitValue: 0
+; RELOC-NEXT:      Length: 26
+; RELOC-NEXT:      Type: R_RBA (0x18)
+; RELOC-NEXT:    }
 ; RELOC:      Virtual Address: 0x9E
 ; RELOC-NEXT:      Symbol: IThreadLocalVarUninit ([[#NFA+21]])
 ; RELOC-NEXT:      IsSigned: No
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
index 0a6d57ee212fb6..0a3e7637b2e70c 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
@@ -33,7 +33,7 @@ entry:
 ; RELOC-NEXT: Relocations [
 ; RELOC-NEXT:   Section (index: 1) .text {
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x12
+; RELOC-NEXT:     Virtual Address: 0xE
 ; RELOC-NEXT:     Symbol: _$TLSML ([[#NFA+25]])
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
@@ -589,6 +589,7 @@ entry:
 ; DIS-NEXT:                                      mr 6, 3
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 3, 0(2)
 ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: [[#NFA+25]]) _$TLSML[TC]
+; DIS-NEXT:                                      stw 0, 40(1)
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               bla 0x0
 ; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA (idx: [[#NFA+1]]) .__tls_get_mod[PR]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 4, 4(2)

>From 3a80632c883876046ae8c4f80b4937d50ba73367 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Mon, 19 Feb 2024 20:05:20 -0500
Subject: [PATCH 28/31] [NFC] Update comments.

---
 llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 5f318edf23655a..cc59d708558b51 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -216,15 +216,15 @@ namespace {
                     }
                   }
 
-                // Check the two Load at toc: one should be _$TLSML, and the other
-                // will be moved before the node that uses the OutReg of the
-                // .__tls_get_mod node.
+                // Check the two Load at toc nodes: one should be _$TLSML, and the
+                // other will be moved before the node that uses the OutReg of
+                // the .__tls_get_mod node.
                 if (LoadFromTocs.size() == 2) {
                   MachineBasicBlock::iterator TLSMLIter = MBB.end();
                   MachineBasicBlock::iterator OffsetIter = MBB.end();
-                  // Make sure the two LoadFromTocs are within current BB, and
-                  // one of them from the "_$TLSML" pseudo symbol, while the
-                  // other from the variable.
+                  // Make sure the two LoadFromToc nodes are within the current
+                  // BB, and that one of them is from the "_$TLSML" pseudo
+                  // symbol, while the other is from the variable.
                   for (MachineBasicBlock::iterator I = MBB.begin(),
                                                    IE = MBB.end();
                        I != IE; ++I)
@@ -236,8 +236,9 @@ namespace {
                       else
                         OffsetIter = I;
                     }
-                  // If both two iterators are valid, we should have identified
-                  // the scenario, and do the movement.
+                  // Perform the movement when the desired scenario has been
+                  // identified, which should be when both of the iterators are
+                  // valid.
                   if (TLSMLIter != MBB.end() && OffsetIter != MBB.end())
                     OffsetIter->moveBefore(&*UseIter);
                 }

>From 390020a40fd32b5d57771e983b95be94187b76b2 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Mon, 19 Feb 2024 22:08:09 -0500
Subject: [PATCH 29/31] [NFC] Update comment.

---
 llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index cc59d708558b51..a0df8e7ab98697 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -251,8 +251,9 @@ namespace {
             // The call to .__tls_get_mod.
             BuildMI(MBB, I, DL, TII->get(Opc2), GPR3).addReg(GPR3);
           } else if (!IsTLSTPRelMI) {
-            // The variable offset and region handle are copied in r4 and r3.
-            // The copies are followed by GETtlsADDR32AIX/GETtlsADDR64AIX.
+            // The variable offset and region handle (for TLSGD) are copied in
+            // r4 and r3. The copies are followed by
+            // GETtlsADDR32AIX/GETtlsADDR64AIX.
             BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR4)
                 .addReg(MI.getOperand(1).getReg());
             BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR3)

>From 7f84305eb6cc4ea1df2ca5c773bdb501bb4369f9 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Tue, 27 Feb 2024 02:40:57 -0500
Subject: [PATCH 30/31] [NFC] update comments

---
 llvm/lib/Target/PowerPC/PPCISelLowering.h     |  8 ++--
 llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp | 43 ++++++++++---------
 2 files changed, 26 insertions(+), 25 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index b64af2ff6c0bfe..0bdfdcd15441f4 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -380,10 +380,10 @@ namespace llvm {
     GET_TLS_MOD_AIX,
 
     /// [GP|G8]RC = TLSLD_AIX, TOC_ENTRY(module handle)
-    /// Op that with single input of module handle TOC entry in R3, and
-    /// generates a GET_TLS_MOD_AIX node which will be expanded into a call to
-    /// .__tls_get_mod. This node is used in both 32-bit and 64-bit modes. The
-    /// only difference is the register class.
+    /// Op that requires a single input of the module handle TOC entry in R3,
+    /// and generates a GET_TLS_MOD_AIX node which will be expanded into a call
+    /// to .__tls_get_mod. This node is used in both 32-bit and 64-bit modes.
+    /// The only difference is the register class.
     TLSLD_AIX,
 
     /// G8RC = ADDIS_TLSLD_HA %x2, Symbol - For the local-dynamic TLS
diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index a0df8e7ab98697..303130a6cee61d 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -165,14 +165,14 @@ namespace {
 
         if (IsAIX) {
           if (IsTLSLDAIXMI) {
-            // The relative order between the LoadOffset at toc node (for the
-            // variable offset), and the .__tls_get_mod node is being tuned
-            // here. It is better to put the LoadOffset at toc node after the call,
-            // since the LoadOffset at toc node can use clobbers r4/r5. Search for
-            // the pattern of two Load at toc nodes (either for the variable offset
-            // or for the module handle), and then move the LoadOffset at toc node
-            // right before the node that uses the OutReg of the .__tls_get_mod
-            // node.
+            // The relative order between the node that loads the variable
+            // offset from the TOC, and the .__tls_get_mod node is being tuned
+            // here. It is better to put the variable offset TOC load after the
+            // call, since this node can use clobbers r4/r5.
+            // Search for the pattern of the two nodes that load from the TOC
+            // (either for the variable offset or for the module handle), and
+            // then move the variable offset TOC load right before the node that
+            // uses the OutReg of the .__tls_get_mod node.
             unsigned LDTocOp =
                 Is64Bit ? (IsLargeModel ? PPC::LDtocL : PPC::LDtoc)
                         : (IsLargeModel ? PPC::LWZtocL : PPC::LWZtoc);
@@ -190,11 +190,11 @@ namespace {
                   break;
 
               // Additional handling is required when UserIter (the first user
-              // of OutReg) is pointing to a valid node. Check the pattern and
-              // do the movement if the pattern matches.
+              // of OutReg) is pointing to a valid node that loads from the TOC.
+              // Check the pattern and do the movement if the pattern matches.
               if (UseIter != MBB.end()) {
-                // Collect associated Load at toc nodes. Use hasOneDef() to guard
-                // against unexpected scenarios.
+                // Collect all associated nodes that load from the TOC. Use
+                // hasOneDef() to guard against unexpected scenarios.
                 std::set<MachineInstr *> LoadFromTocs;
                 for (MachineOperand &MO : UseIter->operands())
                   if (MO.isReg() && MO.isUse()) {
@@ -202,9 +202,10 @@ namespace {
                     if (RegInfo.hasOneDef(MOReg)) {
                       MachineInstr *Temp =
                           RegInfo.getOneDef(MOReg)->getParent();
-                      // For the current TLSLDAIX node, get the Load at toc node
-                      // for the InReg. Otherwise, Temp probably pointed to the
-                      // LoadOffset at toc node that we would like to move.
+                      // For the current TLSLDAIX node, get the corresponding
+                      // node that loads from the TOC for the InReg. Otherwise,
+                      // Temp probably pointed to the variable offset TOC load
+                      // we would like to move.
                       if (Temp == &MI && RegInfo.hasOneDef(InReg))
                         Temp = RegInfo.getOneDef(InReg)->getParent();
                       if (Temp->getOpcode() == LDTocOp)
@@ -216,15 +217,15 @@ namespace {
                     }
                   }
 
-                // Check the two Load at toc nodes: one should be _$TLSML, and the
-                // other will be moved before the node that uses the OutReg of
-                // the .__tls_get_mod node.
+                // Check the two nodes that load from the TOC: one should be
+                // _$TLSML, and the other will be moved before the node that
+                // uses the OutReg of the .__tls_get_mod node.
                 if (LoadFromTocs.size() == 2) {
                   MachineBasicBlock::iterator TLSMLIter = MBB.end();
                   MachineBasicBlock::iterator OffsetIter = MBB.end();
-                  // Make sure the two LoadFromToc nodes are within the current
-                  // BB, and that one of them is from the "_$TLSML" pseudo
-                  // symbol, while the other is from the variable.
+                  // Make sure the two nodes that loaded from the TOC are within
+                  // the current BB, and that one of them is from the "_$TLSML"
+                  // pseudo symbol, while the other is from the variable.
                   for (MachineBasicBlock::iterator I = MBB.begin(),
                                                    IE = MBB.end();
                        I != IE; ++I)

>From 7740a3fb9beed33e5d10319388a955d527a6a631 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Thu, 29 Feb 2024 01:34:28 -0500
Subject: [PATCH 31/31] [NFC] update comments

---
 llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 303130a6cee61d..147438dfedd87d 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -217,8 +217,8 @@ namespace {
                     }
                   }
 
-                // Check the two nodes that load from the TOC: one should be
-                // _$TLSML, and the other will be moved before the node that
+                // Check the two nodes that loaded from the TOC: one should be
+                // "_$TLSML", and the other will be moved before the node that
                 // uses the OutReg of the .__tls_get_mod node.
                 if (LoadFromTocs.size() == 2) {
                   MachineBasicBlock::iterator TLSMLIter = MBB.end();



More information about the cfe-commits mailing list