[llvm] d565980 - [AIX][TLS] Generate 64-bit local-exec access code sequence

Amy Kwan via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 19 10:17:42 PDT 2023


Author: Amy Kwan
Date: 2023-06-19T12:17:30-05:00
New Revision: d5659808b2fabadd2fff4f19d7aed2e2ff3435f5

URL: https://github.com/llvm/llvm-project/commit/d5659808b2fabadd2fff4f19d7aed2e2ff3435f5
DIFF: https://github.com/llvm/llvm-project/commit/d5659808b2fabadd2fff4f19d7aed2e2ff3435f5.diff

LOG: [AIX][TLS] Generate 64-bit local-exec access code sequence

This patch adds support for the TLS local-exec access model on AIX to allow
for the ability to generate the 64-bit (specifically, non-optimized) code sequence.

For this patch in particular, the sequence that is generated involves a load of the
variable offset, followed by an add of the loaded variable offset to r13 (which is
thread pointer, respectively). This code sequence looks like the following:
```
ld reg1,var[TC](2)
add reg2, reg1, r13     // r13 contains the thread pointer
```
The TOC (.tc pseudo-op) entries generated in the assembly files are also
changed where we add the @le relocation for the variable offset.

Differential Revision: https://reviews.llvm.org/D149722

Added: 
    llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-double.ll
    llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-float.ll
    llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-int.ll
    llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-longlong.ll
    llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc-large.ll
    llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc.ll

Modified: 
    llvm/include/llvm/MC/MCExpr.h
    llvm/lib/MC/MCExpr.cpp
    llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
    llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
    llvm/lib/Target/PowerPC/PPC.h
    llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/lib/Target/PowerPC/PPCISelLowering.h
    llvm/lib/Target/PowerPC/PPCInstr64Bit.td

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/MC/MCExpr.h b/llvm/include/llvm/MC/MCExpr.h
index bf1f32bb91ba7..49bd6883d4297 100644
--- a/llvm/include/llvm/MC/MCExpr.h
+++ b/llvm/include/llvm/MC/MCExpr.h
@@ -299,6 +299,7 @@ class MCSymbolRefExpr : public MCExpr {
     VK_PPC_TLSGD,           // symbol at tlsgd
     VK_PPC_AIX_TLSGD,       // symbol at gd
     VK_PPC_AIX_TLSGDM,      // symbol at m
+    VK_PPC_AIX_TLSLE,       // symbol at le
     VK_PPC_GOT_TLSLD,       // symbol at got@tlsld
     VK_PPC_GOT_TLSLD_LO,    // symbol at got@tlsld at l
     VK_PPC_GOT_TLSLD_HI,    // symbol at got@tlsld at h

diff  --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp
index d5b5aa8fba4e0..c2ecc00b5796c 100644
--- a/llvm/lib/MC/MCExpr.cpp
+++ b/llvm/lib/MC/MCExpr.cpp
@@ -327,6 +327,8 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
     return "gd";
   case VK_PPC_AIX_TLSGDM:
     return "m";
+  case VK_PPC_AIX_TLSLE:
+    return "le";
   case VK_PPC_GOT_TLSLD: return "got at tlsld";
   case VK_PPC_GOT_TLSLD_LO: return "got at tlsld@l";
   case VK_PPC_GOT_TLSLD_HI: return "got at tlsld@h";

diff  --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 37bb90ab2c248..271f7ab757e16 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -147,12 +147,11 @@ class PPCTargetAsmStreamer : public PPCTargetStreamer {
       MCSymbolXCOFF *TCSym =
           cast<MCSectionXCOFF>(Streamer.getCurrentSectionOnly())
               ->getQualNameSymbol();
-      // If the variant kind is VK_PPC_AIX_TLSGDM the entry represents the
-      // region handle for the symbol, we add the relocation specifier @m.
-      // If the variant kind is VK_PPC_AIX_TLSGD the entry represents the
-      // variable offset for the symbol, we add the relocation specifier @gd.
+      // On AIX, we have a region handle (symbol at m) and the variable offset
+      // (symbol@{gd|le}) for TLS variables, depending on the TLS model.
       if (Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGD ||
-          Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGDM)
+          Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGDM ||
+          Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLE)
         OS << "\t.tc " << TCSym->getName() << "," << XSym->getName() << "@"
            << MCSymbolRefExpr::getVariantKindName(Kind) << '\n';
       else

diff  --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
index b6e749b781804..df671f53cbd8c 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
@@ -108,6 +108,8 @@ std::pair<uint8_t, uint8_t> PPCXCOFFObjectWriter::getRelocTypeAndSignSize(
       return {XCOFF::RelocationType::R_TLS, SignAndSizeForFKData};
     case MCSymbolRefExpr::VK_PPC_AIX_TLSGDM:
       return {XCOFF::RelocationType::R_TLSM, SignAndSizeForFKData};
+    case MCSymbolRefExpr::VK_PPC_AIX_TLSLE:
+      return {XCOFF::RelocationType::R_TLS_LE, SignAndSizeForFKData};
     case MCSymbolRefExpr::VK_None:
       return {XCOFF::RelocationType::R_POS, SignAndSizeForFKData};
     }

diff  --git a/llvm/lib/Target/PowerPC/PPC.h b/llvm/lib/Target/PowerPC/PPC.h
index 8f84ae7efc246..0d3d71742bfb7 100644
--- a/llvm/lib/Target/PowerPC/PPC.h
+++ b/llvm/lib/Target/PowerPC/PPC.h
@@ -127,8 +127,9 @@ class ModulePass;
     /// General Dynamic model for AIX.
     MO_TLSGD_FLAG = 32,
 
-    /// MO_TPREL_FLAG - If this bit is set the symbol reference is relative to
-    /// TLS Initial Exec model.
+    /// MO_TPREL_FLAG - If this bit is set, the symbol reference is relative to
+    /// the thread pointer and the symbol can be used for the TLS Initial Exec
+    /// and Local Exec models.
     MO_TPREL_FLAG = 64,
 
     /// MO_TLSLD_FLAG - If this bit is set the symbol reference is relative to

diff  --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index caaea40ae5169..2a192e6555912 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -813,6 +813,18 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     return Expr;
   };
   auto GetVKForMO = [&](const MachineOperand &MO) {
+    // For TLS local-exec accesses on AIX, we have one TOC entry for the symbol
+    // (with the variable offset), which is 
diff erentiated by MO_TPREL_FLAG.
+    if (MO.getTargetFlags() & PPCII::MO_TPREL_FLAG) {
+      // TODO: Update the query and the comment above to add a check for initial
+      // exec when this TLS model is supported on AIX in the future, as both
+      // local-exec and initial-exec can use MO_TPREL_FLAG.
+      assert(MO.isGlobal() && "Only expecting a global MachineOperand here!\n");
+      TLSModel::Model Model = TM.getTLSModel(MO.getGlobal());
+      if (Model == TLSModel::LocalExec)
+        return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLE;
+      llvm_unreachable("Only expecting local-exec accesses!");
+    }
     // For GD TLS access on AIX, we have two TOC entries for the symbol (one for
     // the variable offset and the other for the region handle). They are
     // 
diff erentiated by MO_TLSGD_FLAG and MO_TLSGDM_FLAG.

diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 012052a1be1d3..f4eb89dda19ce 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -3326,9 +3326,31 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
   SDLoc dl(GA);
   const GlobalValue *GV = GA->getGlobal();
   EVT PtrVT = getPointerTy(DAG.getDataLayout());
+  bool Is64Bit = Subtarget.isPPC64();
+  TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
 
-  // The general-dynamic model is the only access model supported for now, so
-  // all the GlobalTLSAddress nodes are lowered with this model.
+  if (Model == TLSModel::LocalExec) {
+    if (Is64Bit) {
+      // For local-exec on AIX (64-bit), the sequence that is generated involves
+      // a load of the variable offset (from the TOC), followed by an add of the
+      // loaded variable offset to R13 (the thread pointer).
+      // This code sequence looks like:
+      //    ld reg1,var[TC](2)
+      //    add reg2, reg1, r13     // r13 contains the thread pointer
+      SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
+      SDValue VariableOffsetTGA =
+          DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_FLAG);
+      SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
+      return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset);
+    } else {
+      report_fatal_error("On AIX, the local-exec TLS model is only supported "
+                         "on PPC64 for now.");
+    }
+  }
+
+  // The Local-Exec and General-Dynamic TLS models are currently the only
+  // supported access models. If Local-exec is not possible or specified, all
+  // GlobalTLSAddress nodes are lowered using the general-dynamic model.
   // We need to generate two TOC entries, one for the variable offset, one for
   // the region handle. The global address for the TOC entry of the region
   // handle is created with the MO_TLSGDM_FLAG flag and the global address

diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 02f7147dfd6bb..2e2514a5e34a4 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -332,11 +332,11 @@ namespace llvm {
     /// finds the offset of "sym" relative to the thread pointer.
     LD_GOT_TPREL_L,
 
-    /// G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS
-    /// model, produces an ADD instruction that adds the contents of
-    /// G8RReg to the thread pointer.  Symbol contains a relocation
-    /// sym\@tls which is to be replaced by the thread pointer and
-    /// identifies to the linker that the instruction is part of a
+    /// G8RC = ADD_TLS G8RReg, Symbol - Can be used by the initial-exec
+    /// and local-exec TLS models, produces an ADD instruction that adds
+    /// the contents of G8RReg to the thread pointer.  Symbol contains a
+    /// relocation sym\@tls which is to be replaced by the thread pointer
+    /// and identifies to the linker that the instruction is part of a
     /// TLS sequence.
     ADD_TLS,
 

diff  --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 4487082a3bb8c..f4f058aff237f 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -1910,9 +1910,20 @@ def : Pat<(add i64:$in, (PPChi tblockaddress:$g, 0)),
           (ADDIS8 $in, tblockaddress:$g)>;
 
 // AIX 64-bit small code model TLS access.
+// This is used for global dynamic accesses when loading the region handle and
+// variable offset, and also for local-exec accesses to load the offset of a
+// TLS variable from the TOC, prior to adding it to r13.
 def : Pat<(i64 (PPCtoc_entry tglobaltlsaddr:$disp, i64:$reg)),
           (i64 (LDtoc tglobaltlsaddr:$disp, i64:$reg))>;
 
+// The following pattern matches 64-bit local-exec TLS accesses on AIX.
+// PPCaddTls is used in local-exec accesses in order to:
+//   - Get the address of a variable (adding the variable offset to the thread
+//     pointer in r13).
+//   - Create an opportunity to optimize the user of the loaded address.
+def : Pat<(PPCaddTls i64:$in, i64:$addr),
+          (ADD8TLS $in, $addr)>;
+
 // 64-bits atomic loads and stores
 def : Pat<(atomic_load_64 DSForm:$src), (LD  memrix:$src)>;
 def : Pat<(atomic_load_64 XForm:$src),  (LDX memrr:$src)>;

diff  --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-double.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-double.ll
new file mode 100644
index 0000000000000..2e0697d347226
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-double.ll
@@ -0,0 +1,328 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff < %s | FileCheck %s \
+; RUN:      --check-prefix=SMALL64
+; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff --code-model=large < %s \
+; RUN:      | FileCheck %s --check-prefix=LARGE64
+
+ at ThreadLocalVarInit = thread_local(localexec) global double 0x4021947AE147AE14, align 8
+ at VarInit = global double 8.787000e+01, align 8
+ at IThreadLocalVarUninit = internal thread_local(localexec) global double 0.000000e+00, align 8
+ at IThreadLocalVarInit = internal thread_local(localexec) global double 5.870000e+00, align 8
+ at ThreadLocalVarUninit = thread_local(localexec) global double 0.000000e+00, align 8
+declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
+
+define void @storeITLUninit(double noundef %x) {
+; SMALL64-LABEL: storeITLUninit:
+; SMALL64:       # %bb.0: # %entry
+; SMALL64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit
+; SMALL64-NEXT:    add r3, r13, r3
+; SMALL64-NEXT:    stfd f1, 0(r3)
+; SMALL64-NEXT:    blr
+;
+; LARGE64-LABEL: storeITLUninit:
+; LARGE64:       # %bb.0: # %entry
+; LARGE64-NEXT:    addis r3, L..C0 at u(r2)
+; LARGE64-NEXT:    ld r3, L..C0 at l(r3)
+; LARGE64-NEXT:    add r3, r13, r3
+; LARGE64-NEXT:    stfd f1, 0(r3)
+; LARGE64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarUninit)
+  store double %x, ptr %0, align 8
+  ret void
+}
+
+define void @storeITLInit(double noundef %x) {
+; SMALL64-LABEL: storeITLInit:
+; SMALL64:       # %bb.0: # %entry
+; SMALL64-NEXT:    ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit
+; SMALL64-NEXT:    add r3, r13, r3
+; SMALL64-NEXT:    stfd f1, 0(r3)
+; SMALL64-NEXT:    blr
+;
+; LARGE64-LABEL: storeITLInit:
+; LARGE64:       # %bb.0: # %entry
+; LARGE64-NEXT:    addis r3, L..C1 at u(r2)
+; LARGE64-NEXT:    ld r3, L..C1 at l(r3)
+; LARGE64-NEXT:    add r3, r13, r3
+; LARGE64-NEXT:    stfd f1, 0(r3)
+; LARGE64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit)
+  store double %x, ptr %0, align 8
+  ret void
+}
+
+define void @storeTLUninit(double noundef %x) {
+; SMALL64-LABEL: storeTLUninit:
+; SMALL64:       # %bb.0: # %entry
+; SMALL64-NEXT:    ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit
+; SMALL64-NEXT:    add r3, r13, r3
+; SMALL64-NEXT:    stfd f1, 0(r3)
+; SMALL64-NEXT:    blr
+;
+; LARGE64-LABEL: storeTLUninit:
+; LARGE64:       # %bb.0: # %entry
+; LARGE64-NEXT:    addis r3, L..C2 at u(r2)
+; LARGE64-NEXT:    ld r3, L..C2 at l(r3)
+; LARGE64-NEXT:    add r3, r13, r3
+; LARGE64-NEXT:    stfd f1, 0(r3)
+; LARGE64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarUninit)
+  store double %x, ptr %0, align 8
+  ret void
+}
+
+define void @storeTLInit(double noundef %x) {
+; SMALL64-LABEL: storeTLInit:
+; SMALL64:       # %bb.0: # %entry
+; SMALL64-NEXT:    ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit
+; SMALL64-NEXT:    add r3, r13, r3
+; SMALL64-NEXT:    stfd f1, 0(r3)
+; SMALL64-NEXT:    blr
+;
+; LARGE64-LABEL: storeTLInit:
+; LARGE64:       # %bb.0: # %entry
+; LARGE64-NEXT:    addis r3, L..C3 at u(r2)
+; LARGE64-NEXT:    ld r3, L..C3 at l(r3)
+; LARGE64-NEXT:    add r3, r13, r3
+; LARGE64-NEXT:    stfd f1, 0(r3)
+; LARGE64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit)
+  store double %x, ptr %0, align 8
+  ret void
+}
+
+define double @loadITLUninit() {
+; SMALL64-LABEL: loadITLUninit:
+; SMALL64:       # %bb.0: # %entry
+; SMALL64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit
+; SMALL64-NEXT:    add r3, r13, r3
+; SMALL64-NEXT:    lfd f1, 0(r3)
+; SMALL64-NEXT:    blr
+;
+; LARGE64-LABEL: loadITLUninit:
+; LARGE64:       # %bb.0: # %entry
+; LARGE64-NEXT:    addis r3, L..C0 at u(r2)
+; LARGE64-NEXT:    ld r3, L..C0 at l(r3)
+; LARGE64-NEXT:    add r3, r13, r3
+; LARGE64-NEXT:    lfd f1, 0(r3)
+; LARGE64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarUninit)
+  %1 = load double, ptr %0, align 8
+  ret double %1
+}
+
+define double @loadITLUninit2() {
+; SMALL64-LABEL: loadITLUninit2:
+; SMALL64:       # %bb.0: # %entry
+; SMALL64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit
+; SMALL64-NEXT:    ld r4, L..C4(r2) # @VarInit
+; SMALL64-NEXT:    add r3, r13, r3
+; SMALL64-NEXT:    lfd f1, 0(r4)
+; SMALL64-NEXT:    lfd f0, 0(r3)
+; SMALL64-NEXT:    xsadddp f1, f0, f1
+; SMALL64-NEXT:    blr
+;
+; LARGE64-LABEL: loadITLUninit2:
+; LARGE64:       # %bb.0: # %entry
+; LARGE64-NEXT:    addis r3, L..C0 at u(r2)
+; LARGE64-NEXT:    addis r4, L..C4 at u(r2)
+; LARGE64-NEXT:    ld r3, L..C0 at l(r3)
+; LARGE64-NEXT:    ld r4, L..C4 at l(r4)
+; LARGE64-NEXT:    add r3, r13, r3
+; LARGE64-NEXT:    lfd f1, 0(r4)
+; LARGE64-NEXT:    lfd f0, 0(r3)
+; LARGE64-NEXT:    xsadddp f1, f0, f1
+; LARGE64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarUninit)
+  %1 = load double, ptr %0, align 8
+  %2 = load double, ptr @VarInit, align 8
+  %add = fadd double %1, %2
+  ret double %add
+}
+
+define double @loadITLInit() {
+; SMALL64-LABEL: loadITLInit:
+; SMALL64:       # %bb.0: # %entry
+; SMALL64-NEXT:    ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit
+; SMALL64-NEXT:    add r3, r13, r3
+; SMALL64-NEXT:    lfd f1, 0(r3)
+; SMALL64-NEXT:    blr
+;
+; LARGE64-LABEL: loadITLInit:
+; LARGE64:       # %bb.0: # %entry
+; LARGE64-NEXT:    addis r3, L..C1 at u(r2)
+; LARGE64-NEXT:    ld r3, L..C1 at l(r3)
+; LARGE64-NEXT:    add r3, r13, r3
+; LARGE64-NEXT:    lfd f1, 0(r3)
+; LARGE64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit)
+  %1 = load double, ptr %0, align 8
+  ret double %1
+}
+
+define double @loadITLInit2() {
+; SMALL64-LABEL: loadITLInit2:
+; SMALL64:       # %bb.0: # %entry
+; SMALL64-NEXT:    ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit
+; SMALL64-NEXT:    ld r4, L..C4(r2) # @VarInit
+; SMALL64-NEXT:    add r3, r13, r3
+; SMALL64-NEXT:    lfd f1, 0(r4)
+; SMALL64-NEXT:    lfd f0, 0(r3)
+; SMALL64-NEXT:    xsadddp f1, f0, f1
+; SMALL64-NEXT:    blr
+;
+; LARGE64-LABEL: loadITLInit2:
+; LARGE64:       # %bb.0: # %entry
+; LARGE64-NEXT:    addis r3, L..C1 at u(r2)
+; LARGE64-NEXT:    addis r4, L..C4 at u(r2)
+; LARGE64-NEXT:    ld r3, L..C1 at l(r3)
+; LARGE64-NEXT:    ld r4, L..C4 at l(r4)
+; LARGE64-NEXT:    add r3, r13, r3
+; LARGE64-NEXT:    lfd f1, 0(r4)
+; LARGE64-NEXT:    lfd f0, 0(r3)
+; LARGE64-NEXT:    xsadddp f1, f0, f1
+; LARGE64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit)
+  %1 = load double, ptr %0, align 8
+  %2 = load double, ptr @VarInit, align 8
+  %add = fadd double %1, %2
+  ret double %add
+}
+
+define double @loadTLUninit() {
+; SMALL64-LABEL: loadTLUninit:
+; SMALL64:       # %bb.0: # %entry
+; SMALL64-NEXT:    ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit
+; SMALL64-NEXT:    add r3, r13, r3
+; SMALL64-NEXT:    lfd f1, 0(r3)
+; SMALL64-NEXT:    blr
+;
+; LARGE64-LABEL: loadTLUninit:
+; LARGE64:       # %bb.0: # %entry
+; LARGE64-NEXT:    addis r3, L..C2 at u(r2)
+; LARGE64-NEXT:    ld r3, L..C2 at l(r3)
+; LARGE64-NEXT:    add r3, r13, r3
+; LARGE64-NEXT:    lfd f1, 0(r3)
+; LARGE64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarUninit)
+  %1 = load double, ptr %0, align 8
+  ret double %1
+}
+
+define double @loadTLUninit2() {
+; SMALL64-LABEL: loadTLUninit2:
+; SMALL64:       # %bb.0: # %entry
+; SMALL64-NEXT:    ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit
+; SMALL64-NEXT:    ld r4, L..C4(r2) # @VarInit
+; SMALL64-NEXT:    add r3, r13, r3
+; SMALL64-NEXT:    lfd f1, 0(r4)
+; SMALL64-NEXT:    lfd f0, 0(r3)
+; SMALL64-NEXT:    xsadddp f1, f0, f1
+; SMALL64-NEXT:    blr
+;
+; LARGE64-LABEL: loadTLUninit2:
+; LARGE64:       # %bb.0: # %entry
+; LARGE64-NEXT:    addis r3, L..C2 at u(r2)
+; LARGE64-NEXT:    addis r4, L..C4 at u(r2)
+; LARGE64-NEXT:    ld r3, L..C2 at l(r3)
+; LARGE64-NEXT:    ld r4, L..C4 at l(r4)
+; LARGE64-NEXT:    add r3, r13, r3
+; LARGE64-NEXT:    lfd f1, 0(r4)
+; LARGE64-NEXT:    lfd f0, 0(r3)
+; LARGE64-NEXT:    xsadddp f1, f0, f1
+; LARGE64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarUninit)
+  %1 = load double, ptr %0, align 8
+  %2 = load double, ptr @VarInit, align 8
+  %add = fadd double %1, %2
+  ret double %add
+}
+
+define double @loadTLInit() {
+; SMALL64-LABEL: loadTLInit:
+; SMALL64:       # %bb.0: # %entry
+; SMALL64-NEXT:    ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit
+; SMALL64-NEXT:    add r3, r13, r3
+; SMALL64-NEXT:    lfd f1, 0(r3)
+; SMALL64-NEXT:    blr
+;
+; LARGE64-LABEL: loadTLInit:
+; LARGE64:       # %bb.0: # %entry
+; LARGE64-NEXT:    addis r3, L..C3 at u(r2)
+; LARGE64-NEXT:    ld r3, L..C3 at l(r3)
+; LARGE64-NEXT:    add r3, r13, r3
+; LARGE64-NEXT:    lfd f1, 0(r3)
+; LARGE64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit)
+  %1 = load double, ptr %0, align 8
+  ret double %1
+}
+
+define double @loadTLInit2() {
+; SMALL64-LABEL: loadTLInit2:
+; SMALL64:       # %bb.0: # %entry
+; SMALL64-NEXT:    ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit
+; SMALL64-NEXT:    ld r4, L..C4(r2) # @VarInit
+; SMALL64-NEXT:    add r3, r13, r3
+; SMALL64-NEXT:    lfd f1, 0(r4)
+; SMALL64-NEXT:    lfd f0, 0(r3)
+; SMALL64-NEXT:    xsadddp f1, f0, f1
+; SMALL64-NEXT:    blr
+;
+; LARGE64-LABEL: loadTLInit2:
+; LARGE64:       # %bb.0: # %entry
+; LARGE64-NEXT:    addis r3, L..C3 at u(r2)
+; LARGE64-NEXT:    addis r4, L..C4 at u(r2)
+; LARGE64-NEXT:    ld r3, L..C3 at l(r3)
+; LARGE64-NEXT:    ld r4, L..C4 at l(r4)
+; LARGE64-NEXT:    add r3, r13, r3
+; LARGE64-NEXT:    lfd f1, 0(r4)
+; LARGE64-NEXT:    lfd f0, 0(r3)
+; LARGE64-NEXT:    xsadddp f1, f0, f1
+; LARGE64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit)
+  %1 = load double, ptr %0, align 8
+  %2 = load double, ptr @VarInit, align 8
+  %add = fadd double %1, %2
+  ret double %add
+}
+
+; TOC Entry Checks.
+
+; SMALL64-LABEL: .toc
+; SMALL64-LABEL: L..C0:
+; SMALL64-NEXT: .tc IThreadLocalVarUninit[TC],IThreadLocalVarUninit[UL]@le
+; SMALL64-LABEL: L..C1:
+; SMALL64-NEXT: .tc IThreadLocalVarInit[TC],IThreadLocalVarInit[TL]@le
+; SMALL64-LABEL: L..C2:
+; SMALL64-NEXT: .tc ThreadLocalVarUninit[TC],ThreadLocalVarUninit[TL]@le
+; SMALL64-LABEL: L..C3:
+; SMALL64-NEXT: .tc ThreadLocalVarInit[TC],ThreadLocalVarInit[TL]@le
+; SMALL64-LABEL: L..C4:
+; SMALL64-NEXT: .tc VarInit[TC],VarInit[RW]
+
+; LARGE64-LABEL: .toc
+; LARGE64-LABEL: L..C0:
+; LARGE64-NEXT: .tc IThreadLocalVarUninit[TE],IThreadLocalVarUninit[UL]@le
+; LARGE64-LABEL: L..C1:
+; LARGE64-NEXT: .tc IThreadLocalVarInit[TE],IThreadLocalVarInit[TL]@le
+; LARGE64-LABEL: L..C2:
+; LARGE64-NEXT: .tc ThreadLocalVarUninit[TE],ThreadLocalVarUninit[TL]@le
+; LARGE64-LABEL: L..C3:
+; LARGE64-NEXT: .tc ThreadLocalVarInit[TE],ThreadLocalVarInit[TL]@le
+; LARGE64-LABEL: L..C4:
+; LARGE64-NEXT: .tc VarInit[TE],VarInit[RW]

diff  --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-float.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-float.ll
new file mode 100644
index 0000000000000..2b93616bfe8bd
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-float.ll
@@ -0,0 +1,328 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff < %s | FileCheck %s \
+; RUN:      --check-prefix=SMALL64
+; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff --code-model=large < %s \
+; RUN:      | FileCheck %s --check-prefix=LARGE64
+
+ at ThreadLocalVarInit = thread_local(localexec) global float 0x401D333340000000, align 4
+ at VarInit = global float 0x4021666660000000, align 4
+ at IThreadLocalVarUninit = internal thread_local(localexec) global float 0.000000e+00, align 4
+ at IThreadLocalVarInit = internal thread_local(localexec) global float 0x4018CCCCC0000000, align 4
+ at ThreadLocalVarUninit = thread_local(localexec) global float 0.000000e+00, align 4
+declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
+
+define void @storeITLUninit(float noundef %x) {
+; SMALL64-LABEL: storeITLUninit:
+; SMALL64:       # %bb.0: # %entry
+; SMALL64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit
+; SMALL64-NEXT:    add r3, r13, r3
+; SMALL64-NEXT:    stfs f1, 0(r3)
+; SMALL64-NEXT:    blr
+;
+; LARGE64-LABEL: storeITLUninit:
+; LARGE64:       # %bb.0: # %entry
+; LARGE64-NEXT:    addis r3, L..C0 at u(r2)
+; LARGE64-NEXT:    ld r3, L..C0 at l(r3)
+; LARGE64-NEXT:    add r3, r13, r3
+; LARGE64-NEXT:    stfs f1, 0(r3)
+; LARGE64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarUninit)
+  store float %x, ptr %0, align 4
+  ret void
+}
+
+define void @storeITLInit(float noundef %x) {
+; SMALL64-LABEL: storeITLInit:
+; SMALL64:       # %bb.0: # %entry
+; SMALL64-NEXT:    ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit
+; SMALL64-NEXT:    add r3, r13, r3
+; SMALL64-NEXT:    stfs f1, 0(r3)
+; SMALL64-NEXT:    blr
+;
+; LARGE64-LABEL: storeITLInit:
+; LARGE64:       # %bb.0: # %entry
+; LARGE64-NEXT:    addis r3, L..C1 at u(r2)
+; LARGE64-NEXT:    ld r3, L..C1 at l(r3)
+; LARGE64-NEXT:    add r3, r13, r3
+; LARGE64-NEXT:    stfs f1, 0(r3)
+; LARGE64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit)
+  store float %x, ptr %0, align 4
+  ret void
+}
+
+define void @storeTLUninit(float noundef %x) {
+; SMALL64-LABEL: storeTLUninit:
+; SMALL64:       # %bb.0: # %entry
+; SMALL64-NEXT:    ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit
+; SMALL64-NEXT:    add r3, r13, r3
+; SMALL64-NEXT:    stfs f1, 0(r3)
+; SMALL64-NEXT:    blr
+;
+; LARGE64-LABEL: storeTLUninit:
+; LARGE64:       # %bb.0: # %entry
+; LARGE64-NEXT:    addis r3, L..C2 at u(r2)
+; LARGE64-NEXT:    ld r3, L..C2 at l(r3)
+; LARGE64-NEXT:    add r3, r13, r3
+; LARGE64-NEXT:    stfs f1, 0(r3)
+; LARGE64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarUninit)
+  store float %x, ptr %0, align 4
+  ret void
+}
+
+define void @storeTLInit(float noundef %x) {
+; SMALL64-LABEL: storeTLInit:
+; SMALL64:       # %bb.0: # %entry
+; SMALL64-NEXT:    ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit
+; SMALL64-NEXT:    add r3, r13, r3
+; SMALL64-NEXT:    stfs f1, 0(r3)
+; SMALL64-NEXT:    blr
+;
+; LARGE64-LABEL: storeTLInit:
+; LARGE64:       # %bb.0: # %entry
+; LARGE64-NEXT:    addis r3, L..C3 at u(r2)
+; LARGE64-NEXT:    ld r3, L..C3 at l(r3)
+; LARGE64-NEXT:    add r3, r13, r3
+; LARGE64-NEXT:    stfs f1, 0(r3)
+; LARGE64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit)
+  store float %x, ptr %0, align 4
+  ret void
+}
+
+define float @loadITLUninit() {
+; SMALL64-LABEL: loadITLUninit:
+; SMALL64:       # %bb.0: # %entry
+; SMALL64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit
+; SMALL64-NEXT:    add r3, r13, r3
+; SMALL64-NEXT:    lfs f1, 0(r3)
+; SMALL64-NEXT:    blr
+;
+; LARGE64-LABEL: loadITLUninit:
+; LARGE64:       # %bb.0: # %entry
+; LARGE64-NEXT:    addis r3, L..C0 at u(r2)
+; LARGE64-NEXT:    ld r3, L..C0 at l(r3)
+; LARGE64-NEXT:    add r3, r13, r3
+; LARGE64-NEXT:    lfs f1, 0(r3)
+; LARGE64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarUninit)
+  %1 = load float, ptr %0, align 4
+  ret float %1
+}
+
+define float @loadITLUninit2() {
+; SMALL64-LABEL: loadITLUninit2:
+; SMALL64:       # %bb.0: # %entry
+; SMALL64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit
+; SMALL64-NEXT:    ld r4, L..C4(r2) # @VarInit
+; SMALL64-NEXT:    add r3, r13, r3
+; SMALL64-NEXT:    lfs f1, 0(r4)
+; SMALL64-NEXT:    lfs f0, 0(r3)
+; SMALL64-NEXT:    fadds f1, f0, f1
+; SMALL64-NEXT:    blr
+;
+; LARGE64-LABEL: loadITLUninit2:
+; LARGE64:       # %bb.0: # %entry
+; LARGE64-NEXT:    addis r3, L..C0 at u(r2)
+; LARGE64-NEXT:    addis r4, L..C4 at u(r2)
+; LARGE64-NEXT:    ld r3, L..C0 at l(r3)
+; LARGE64-NEXT:    ld r4, L..C4 at l(r4)
+; LARGE64-NEXT:    add r3, r13, r3
+; LARGE64-NEXT:    lfs f1, 0(r4)
+; LARGE64-NEXT:    lfs f0, 0(r3)
+; LARGE64-NEXT:    fadds f1, f0, f1
+; LARGE64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarUninit)
+  %1 = load float, ptr %0, align 4
+  %2 = load float, ptr @VarInit, align 4
+  %add = fadd float %1, %2
+  ret float %add
+}
+
+define float @loadITLInit() {
+; SMALL64-LABEL: loadITLInit:
+; SMALL64:       # %bb.0: # %entry
+; SMALL64-NEXT:    ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit
+; SMALL64-NEXT:    add r3, r13, r3
+; SMALL64-NEXT:    lfs f1, 0(r3)
+; SMALL64-NEXT:    blr
+;
+; LARGE64-LABEL: loadITLInit:
+; LARGE64:       # %bb.0: # %entry
+; LARGE64-NEXT:    addis r3, L..C1 at u(r2)
+; LARGE64-NEXT:    ld r3, L..C1 at l(r3)
+; LARGE64-NEXT:    add r3, r13, r3
+; LARGE64-NEXT:    lfs f1, 0(r3)
+; LARGE64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit)
+  %1 = load float, ptr %0, align 4
+  ret float %1
+}
+
+define float @loadITLInit2() {
+; SMALL64-LABEL: loadITLInit2:
+; SMALL64:       # %bb.0: # %entry
+; SMALL64-NEXT:    ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit
+; SMALL64-NEXT:    ld r4, L..C4(r2) # @VarInit
+; SMALL64-NEXT:    add r3, r13, r3
+; SMALL64-NEXT:    lfs f1, 0(r4)
+; SMALL64-NEXT:    lfs f0, 0(r3)
+; SMALL64-NEXT:    fadds f1, f0, f1
+; SMALL64-NEXT:    blr
+;
+; LARGE64-LABEL: loadITLInit2:
+; LARGE64:       # %bb.0: # %entry
+; LARGE64-NEXT:    addis r3, L..C1 at u(r2)
+; LARGE64-NEXT:    addis r4, L..C4 at u(r2)
+; LARGE64-NEXT:    ld r3, L..C1 at l(r3)
+; LARGE64-NEXT:    ld r4, L..C4 at l(r4)
+; LARGE64-NEXT:    add r3, r13, r3
+; LARGE64-NEXT:    lfs f1, 0(r4)
+; LARGE64-NEXT:    lfs f0, 0(r3)
+; LARGE64-NEXT:    fadds f1, f0, f1
+; LARGE64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit)
+  %1 = load float, ptr %0, align 4
+  %2 = load float, ptr @VarInit, align 4
+  %add = fadd float %1, %2
+  ret float %add
+}
+
+define float @loadTLUninit() {
+; SMALL64-LABEL: loadTLUninit:
+; SMALL64:       # %bb.0: # %entry
+; SMALL64-NEXT:    ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit
+; SMALL64-NEXT:    add r3, r13, r3
+; SMALL64-NEXT:    lfs f1, 0(r3)
+; SMALL64-NEXT:    blr
+;
+; LARGE64-LABEL: loadTLUninit:
+; LARGE64:       # %bb.0: # %entry
+; LARGE64-NEXT:    addis r3, L..C2 at u(r2)
+; LARGE64-NEXT:    ld r3, L..C2 at l(r3)
+; LARGE64-NEXT:    add r3, r13, r3
+; LARGE64-NEXT:    lfs f1, 0(r3)
+; LARGE64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarUninit)
+  %1 = load float, ptr %0, align 4
+  ret float %1
+}
+
+define float @loadTLUninit2() {
+; SMALL64-LABEL: loadTLUninit2:
+; SMALL64:       # %bb.0: # %entry
+; SMALL64-NEXT:    ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit
+; SMALL64-NEXT:    ld r4, L..C4(r2) # @VarInit
+; SMALL64-NEXT:    add r3, r13, r3
+; SMALL64-NEXT:    lfs f1, 0(r4)
+; SMALL64-NEXT:    lfs f0, 0(r3)
+; SMALL64-NEXT:    fadds f1, f0, f1
+; SMALL64-NEXT:    blr
+;
+; LARGE64-LABEL: loadTLUninit2:
+; LARGE64:       # %bb.0: # %entry
+; LARGE64-NEXT:    addis r3, L..C2 at u(r2)
+; LARGE64-NEXT:    addis r4, L..C4 at u(r2)
+; LARGE64-NEXT:    ld r3, L..C2 at l(r3)
+; LARGE64-NEXT:    ld r4, L..C4 at l(r4)
+; LARGE64-NEXT:    add r3, r13, r3
+; LARGE64-NEXT:    lfs f1, 0(r4)
+; LARGE64-NEXT:    lfs f0, 0(r3)
+; LARGE64-NEXT:    fadds f1, f0, f1
+; LARGE64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarUninit)
+  %1 = load float, ptr %0, align 4
+  %2 = load float, ptr @VarInit, align 4
+  %add = fadd float %1, %2
+  ret float %add
+}
+
+define float @loadTLInit() {
+; SMALL64-LABEL: loadTLInit:
+; SMALL64:       # %bb.0: # %entry
+; SMALL64-NEXT:    ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit
+; SMALL64-NEXT:    add r3, r13, r3
+; SMALL64-NEXT:    lfs f1, 0(r3)
+; SMALL64-NEXT:    blr
+;
+; LARGE64-LABEL: loadTLInit:
+; LARGE64:       # %bb.0: # %entry
+; LARGE64-NEXT:    addis r3, L..C3 at u(r2)
+; LARGE64-NEXT:    ld r3, L..C3 at l(r3)
+; LARGE64-NEXT:    add r3, r13, r3
+; LARGE64-NEXT:    lfs f1, 0(r3)
+; LARGE64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit)
+  %1 = load float, ptr %0, align 4
+  ret float %1
+}
+
+define float @loadTLInit2() {
+; SMALL64-LABEL: loadTLInit2:
+; SMALL64:       # %bb.0: # %entry
+; SMALL64-NEXT:    ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit
+; SMALL64-NEXT:    ld r4, L..C4(r2) # @VarInit
+; SMALL64-NEXT:    add r3, r13, r3
+; SMALL64-NEXT:    lfs f1, 0(r4)
+; SMALL64-NEXT:    lfs f0, 0(r3)
+; SMALL64-NEXT:    fadds f1, f0, f1
+; SMALL64-NEXT:    blr
+;
+; LARGE64-LABEL: loadTLInit2:
+; LARGE64:       # %bb.0: # %entry
+; LARGE64-NEXT:    addis r3, L..C3 at u(r2)
+; LARGE64-NEXT:    addis r4, L..C4 at u(r2)
+; LARGE64-NEXT:    ld r3, L..C3 at l(r3)
+; LARGE64-NEXT:    ld r4, L..C4 at l(r4)
+; LARGE64-NEXT:    add r3, r13, r3
+; LARGE64-NEXT:    lfs f1, 0(r4)
+; LARGE64-NEXT:    lfs f0, 0(r3)
+; LARGE64-NEXT:    fadds f1, f0, f1
+; LARGE64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit)
+  %1 = load float, ptr %0, align 4
+  %2 = load float, ptr @VarInit, align 4
+  %add = fadd float %1, %2
+  ret float %add
+}
+
+; TOC Entry Checks.
+
+; SMALL64-LABEL: .toc
+; SMALL64-LABEL: L..C0:
+; SMALL64-NEXT: .tc IThreadLocalVarUninit[TC],IThreadLocalVarUninit[UL]@le
+; SMALL64-LABEL: L..C1:
+; SMALL64-NEXT: .tc IThreadLocalVarInit[TC],IThreadLocalVarInit[TL]@le
+; SMALL64-LABEL: L..C2:
+; SMALL64-NEXT: .tc ThreadLocalVarUninit[TC],ThreadLocalVarUninit[TL]@le
+; SMALL64-LABEL: L..C3:
+; SMALL64-NEXT: .tc ThreadLocalVarInit[TC],ThreadLocalVarInit[TL]@le
+; SMALL64-LABEL: L..C4:
+; SMALL64-NEXT: .tc VarInit[TC],VarInit[RW]
+
+; LARGE64-LABEL: .toc
+; LARGE64-LABEL: L..C0:
+; LARGE64-NEXT: .tc IThreadLocalVarUninit[TE],IThreadLocalVarUninit[UL]@le
+; LARGE64-LABEL: L..C1:
+; LARGE64-NEXT: .tc IThreadLocalVarInit[TE],IThreadLocalVarInit[TL]@le
+; LARGE64-LABEL: L..C2:
+; LARGE64-NEXT: .tc ThreadLocalVarUninit[TE],ThreadLocalVarUninit[TL]@le
+; LARGE64-LABEL: L..C3:
+; LARGE64-NEXT: .tc ThreadLocalVarInit[TE],ThreadLocalVarInit[TL]@le
+; LARGE64-LABEL: L..C4:
+; LARGE64-NEXT: .tc VarInit[TE],VarInit[RW]

diff  --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-int.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-int.ll
new file mode 100644
index 0000000000000..1df848481da4b
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-int.ll
@@ -0,0 +1,336 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff < %s | FileCheck %s \
+; RUN:      --check-prefix=SMALL64
+; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff --code-model=large < %s \
+; RUN:      | FileCheck %s --check-prefix=LARGE64
+
+ at ThreadLocalVarInit = thread_local(localexec) global i32 1, align 4
+ at VarInit = global i32 87, align 4
+ at IThreadLocalVarUninit = internal thread_local(localexec) global i32 0, align 4
+ at IThreadLocalVarInit = internal thread_local(localexec) global i32 1, align 4
+ at ThreadLocalVarUninit = thread_local(localexec) global i32 0, align 4
+declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
+
+define void @storeITLUninit(i32 noundef signext %x) {
+; SMALL64-LABEL: storeITLUninit:
+; SMALL64:       # %bb.0: # %entry
+; SMALL64-NEXT:    ld r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit
+; SMALL64-NEXT:    add r4, r13, r4
+; SMALL64-NEXT:    stw r3, 0(r4)
+; SMALL64-NEXT:    blr
+;
+; LARGE64-LABEL: storeITLUninit:
+; LARGE64:       # %bb.0: # %entry
+; LARGE64-NEXT:    addis r4, L..C0 at u(r2)
+; LARGE64-NEXT:    ld r4, L..C0 at l(r4)
+; LARGE64-NEXT:    add r4, r13, r4
+; LARGE64-NEXT:    stw r3, 0(r4)
+; LARGE64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarUninit)
+  store i32 %x, ptr %0, align 4
+  ret void
+}
+
+define void @storeITLInit(i32 noundef signext %x) {
+; SMALL64-LABEL: storeITLInit:
+; SMALL64:       # %bb.0: # %entry
+; SMALL64-NEXT:    ld r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit
+; SMALL64-NEXT:    add r4, r13, r4
+; SMALL64-NEXT:    stw r3, 0(r4)
+; SMALL64-NEXT:    blr
+;
+; LARGE64-LABEL: storeITLInit:
+; LARGE64:       # %bb.0: # %entry
+; LARGE64-NEXT:    addis r4, L..C1 at u(r2)
+; LARGE64-NEXT:    ld r4, L..C1 at l(r4)
+; LARGE64-NEXT:    add r4, r13, r4
+; LARGE64-NEXT:    stw r3, 0(r4)
+; LARGE64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit)
+  store i32 %x, ptr %0, align 4
+  ret void
+}
+
+define void @storeTLUninit(i32 noundef signext %x) {
+; SMALL64-LABEL: storeTLUninit:
+; SMALL64:       # %bb.0: # %entry
+; SMALL64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit
+; SMALL64-NEXT:    add r4, r13, r4
+; SMALL64-NEXT:    stw r3, 0(r4)
+; SMALL64-NEXT:    blr
+;
+; LARGE64-LABEL: storeTLUninit:
+; LARGE64:       # %bb.0: # %entry
+; LARGE64-NEXT:    addis r4, L..C2 at u(r2)
+; LARGE64-NEXT:    ld r4, L..C2 at l(r4)
+; LARGE64-NEXT:    add r4, r13, r4
+; LARGE64-NEXT:    stw r3, 0(r4)
+; LARGE64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarUninit)
+  store i32 %x, ptr %0, align 4
+  ret void
+}
+
+define void @storeTLInit(i32 noundef signext %x) {
+; SMALL64-LABEL: storeTLInit:
+; SMALL64:       # %bb.0: # %entry
+; SMALL64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit
+; SMALL64-NEXT:    add r4, r13, r4
+; SMALL64-NEXT:    stw r3, 0(r4)
+; SMALL64-NEXT:    blr
+;
+; LARGE64-LABEL: storeTLInit:
+; LARGE64:       # %bb.0: # %entry
+; LARGE64-NEXT:    addis r4, L..C3 at u(r2)
+; LARGE64-NEXT:    ld r4, L..C3 at l(r4)
+; LARGE64-NEXT:    add r4, r13, r4
+; LARGE64-NEXT:    stw r3, 0(r4)
+; LARGE64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit)
+  store i32 %x, ptr %0, align 4
+  ret void
+}
+
+define signext i32 @loadITLUninit() {
+; SMALL64-LABEL: loadITLUninit:
+; SMALL64:       # %bb.0: # %entry
+; SMALL64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit
+; SMALL64-NEXT:    add r3, r13, r3
+; SMALL64-NEXT:    lwa r3, 0(r3)
+; SMALL64-NEXT:    blr
+;
+; LARGE64-LABEL: loadITLUninit:
+; LARGE64:       # %bb.0: # %entry
+; LARGE64-NEXT:    addis r3, L..C0 at u(r2)
+; LARGE64-NEXT:    ld r3, L..C0 at l(r3)
+; LARGE64-NEXT:    add r3, r13, r3
+; LARGE64-NEXT:    lwa r3, 0(r3)
+; LARGE64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarUninit)
+  %1 = load i32, ptr %0, align 4
+  ret i32 %1
+}
+
+define signext i32 @loadITLUninit2() {
+; SMALL64-LABEL: loadITLUninit2:
+; SMALL64:       # %bb.0: # %entry
+; SMALL64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit
+; SMALL64-NEXT:    ld r4, L..C4(r2) # @VarInit
+; SMALL64-NEXT:    add r3, r13, r3
+; SMALL64-NEXT:    lwz r4, 0(r4)
+; SMALL64-NEXT:    lwz r3, 0(r3)
+; SMALL64-NEXT:    add r3, r4, r3
+; SMALL64-NEXT:    extsw r3, r3
+; SMALL64-NEXT:    blr
+;
+; LARGE64-LABEL: loadITLUninit2:
+; LARGE64:       # %bb.0: # %entry
+; LARGE64-NEXT:    addis r3, L..C0 at u(r2)
+; LARGE64-NEXT:    addis r4, L..C4 at u(r2)
+; LARGE64-NEXT:    ld r3, L..C0 at l(r3)
+; LARGE64-NEXT:    ld r4, L..C4 at l(r4)
+; LARGE64-NEXT:    add r3, r13, r3
+; LARGE64-NEXT:    lwz r4, 0(r4)
+; LARGE64-NEXT:    lwz r3, 0(r3)
+; LARGE64-NEXT:    add r3, r4, r3
+; LARGE64-NEXT:    extsw r3, r3
+; LARGE64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarUninit)
+  %1 = load i32, ptr %0, align 4
+  %2 = load i32, ptr @VarInit, align 4
+  %add = add nsw i32 %2, %1
+  ret i32 %add
+}
+
+define signext i32 @loadITLInit() {
+; SMALL64-LABEL: loadITLInit:
+; SMALL64:       # %bb.0: # %entry
+; SMALL64-NEXT:    ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit
+; SMALL64-NEXT:    add r3, r13, r3
+; SMALL64-NEXT:    lwa r3, 0(r3)
+; SMALL64-NEXT:    blr
+;
+; LARGE64-LABEL: loadITLInit:
+; LARGE64:       # %bb.0: # %entry
+; LARGE64-NEXT:    addis r3, L..C1 at u(r2)
+; LARGE64-NEXT:    ld r3, L..C1 at l(r3)
+; LARGE64-NEXT:    add r3, r13, r3
+; LARGE64-NEXT:    lwa r3, 0(r3)
+; LARGE64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit)
+  %1 = load i32, ptr %0, align 4
+  ret i32 %1
+}
+
+define signext i32 @loadITLInit2() {
+; SMALL64-LABEL: loadITLInit2:
+; SMALL64:       # %bb.0: # %entry
+; SMALL64-NEXT:    ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit
+; SMALL64-NEXT:    ld r4, L..C4(r2) # @VarInit
+; SMALL64-NEXT:    add r3, r13, r3
+; SMALL64-NEXT:    lwz r4, 0(r4)
+; SMALL64-NEXT:    lwz r3, 0(r3)
+; SMALL64-NEXT:    add r3, r4, r3
+; SMALL64-NEXT:    extsw r3, r3
+; SMALL64-NEXT:    blr
+;
+; LARGE64-LABEL: loadITLInit2:
+; LARGE64:       # %bb.0: # %entry
+; LARGE64-NEXT:    addis r3, L..C1 at u(r2)
+; LARGE64-NEXT:    addis r4, L..C4 at u(r2)
+; LARGE64-NEXT:    ld r3, L..C1 at l(r3)
+; LARGE64-NEXT:    ld r4, L..C4 at l(r4)
+; LARGE64-NEXT:    add r3, r13, r3
+; LARGE64-NEXT:    lwz r4, 0(r4)
+; LARGE64-NEXT:    lwz r3, 0(r3)
+; LARGE64-NEXT:    add r3, r4, r3
+; LARGE64-NEXT:    extsw r3, r3
+; LARGE64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit)
+  %1 = load i32, ptr %0, align 4
+  %2 = load i32, ptr @VarInit, align 4
+  %add = add nsw i32 %2, %1
+  ret i32 %add
+}
+
+define signext i32 @loadTLUninit() {
+; SMALL64-LABEL: loadTLUninit:
+; SMALL64:       # %bb.0: # %entry
+; SMALL64-NEXT:    ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit
+; SMALL64-NEXT:    add r3, r13, r3
+; SMALL64-NEXT:    lwa r3, 0(r3)
+; SMALL64-NEXT:    blr
+;
+; LARGE64-LABEL: loadTLUninit:
+; LARGE64:       # %bb.0: # %entry
+; LARGE64-NEXT:    addis r3, L..C2 at u(r2)
+; LARGE64-NEXT:    ld r3, L..C2 at l(r3)
+; LARGE64-NEXT:    add r3, r13, r3
+; LARGE64-NEXT:    lwa r3, 0(r3)
+; LARGE64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarUninit)
+  %1 = load i32, ptr %0, align 4
+  ret i32 %1
+}
+
+define signext i32 @loadTLUninit2() {
+; SMALL64-LABEL: loadTLUninit2:
+; SMALL64:       # %bb.0: # %entry
+; SMALL64-NEXT:    ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit
+; SMALL64-NEXT:    ld r4, L..C4(r2) # @VarInit
+; SMALL64-NEXT:    add r3, r13, r3
+; SMALL64-NEXT:    lwz r4, 0(r4)
+; SMALL64-NEXT:    lwz r3, 0(r3)
+; SMALL64-NEXT:    add r3, r4, r3
+; SMALL64-NEXT:    extsw r3, r3
+; SMALL64-NEXT:    blr
+;
+; LARGE64-LABEL: loadTLUninit2:
+; LARGE64:       # %bb.0: # %entry
+; LARGE64-NEXT:    addis r3, L..C2 at u(r2)
+; LARGE64-NEXT:    addis r4, L..C4 at u(r2)
+; LARGE64-NEXT:    ld r3, L..C2 at l(r3)
+; LARGE64-NEXT:    ld r4, L..C4 at l(r4)
+; LARGE64-NEXT:    add r3, r13, r3
+; LARGE64-NEXT:    lwz r4, 0(r4)
+; LARGE64-NEXT:    lwz r3, 0(r3)
+; LARGE64-NEXT:    add r3, r4, r3
+; LARGE64-NEXT:    extsw r3, r3
+; LARGE64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarUninit)
+  %1 = load i32, ptr %0, align 4
+  %2 = load i32, ptr @VarInit, align 4
+  %add = add nsw i32 %2, %1
+  ret i32 %add
+}
+
+define signext i32 @loadTLInit() {
+; SMALL64-LABEL: loadTLInit:
+; SMALL64:       # %bb.0: # %entry
+; SMALL64-NEXT:    ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit
+; SMALL64-NEXT:    add r3, r13, r3
+; SMALL64-NEXT:    lwa r3, 0(r3)
+; SMALL64-NEXT:    blr
+;
+; LARGE64-LABEL: loadTLInit:
+; LARGE64:       # %bb.0: # %entry
+; LARGE64-NEXT:    addis r3, L..C3 at u(r2)
+; LARGE64-NEXT:    ld r3, L..C3 at l(r3)
+; LARGE64-NEXT:    add r3, r13, r3
+; LARGE64-NEXT:    lwa r3, 0(r3)
+; LARGE64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit)
+  %1 = load i32, ptr %0, align 4
+  ret i32 %1
+}
+
+define signext i32 @loadTLInit2() {
+; SMALL64-LABEL: loadTLInit2:
+; SMALL64:       # %bb.0: # %entry
+; SMALL64-NEXT:    ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit
+; SMALL64-NEXT:    ld r4, L..C4(r2) # @VarInit
+; SMALL64-NEXT:    add r3, r13, r3
+; SMALL64-NEXT:    lwz r4, 0(r4)
+; SMALL64-NEXT:    lwz r3, 0(r3)
+; SMALL64-NEXT:    add r3, r4, r3
+; SMALL64-NEXT:    extsw r3, r3
+; SMALL64-NEXT:    blr
+;
+; LARGE64-LABEL: loadTLInit2:
+; LARGE64:       # %bb.0: # %entry
+; LARGE64-NEXT:    addis r3, L..C3 at u(r2)
+; LARGE64-NEXT:    addis r4, L..C4 at u(r2)
+; LARGE64-NEXT:    ld r3, L..C3 at l(r3)
+; LARGE64-NEXT:    ld r4, L..C4 at l(r4)
+; LARGE64-NEXT:    add r3, r13, r3
+; LARGE64-NEXT:    lwz r4, 0(r4)
+; LARGE64-NEXT:    lwz r3, 0(r3)
+; LARGE64-NEXT:    add r3, r4, r3
+; LARGE64-NEXT:    extsw r3, r3
+; LARGE64-NEXT:    blr
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit)
+  %1 = load i32, ptr %0, align 4
+  %2 = load i32, ptr @VarInit, align 4
+  %add = add nsw i32 %2, %1
+  ret i32 %add
+}
+
+; TOC Entry Checks.
+
+; SMALL64-LABEL: .toc
+; SMALL64-LABEL: L..C0:
+; SMALL64-NEXT: .tc IThreadLocalVarUninit[TC],IThreadLocalVarUninit[UL]@le
+; SMALL64-LABEL: L..C1:
+; SMALL64-NEXT: .tc IThreadLocalVarInit[TC],IThreadLocalVarInit[TL]@le
+; SMALL64-LABEL: L..C2:
+; SMALL64-NEXT: .tc ThreadLocalVarUninit[TC],ThreadLocalVarUninit[TL]@le
+; SMALL64-LABEL: L..C3:
+; SMALL64-NEXT: .tc ThreadLocalVarInit[TC],ThreadLocalVarInit[TL]@le
+; SMALL64-LABEL: L..C4:
+; SMALL64-NEXT: .tc VarInit[TC],VarInit[RW]
+
+; LARGE64-LABEL: .toc
+; LARGE64-LABEL: L..C0:
+; LARGE64-NEXT: .tc IThreadLocalVarUninit[TE],IThreadLocalVarUninit[UL]@le
+; LARGE64-LABEL: L..C1:
+; LARGE64-NEXT: .tc IThreadLocalVarInit[TE],IThreadLocalVarInit[TL]@le
+; LARGE64-LABEL: L..C2:
+; LARGE64-NEXT: .tc ThreadLocalVarUninit[TE],ThreadLocalVarUninit[TL]@le
+; LARGE64-LABEL: L..C3:
+; LARGE64-NEXT: .tc ThreadLocalVarInit[TE],ThreadLocalVarInit[TL]@le
+; LARGE64-LABEL: L..C4:
+; LARGE64-NEXT: .tc VarInit[TE],VarInit[RW]

diff  --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-longlong.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-longlong.ll
new file mode 100644
index 0000000000000..8ef680909f91b
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-longlong.ll
@@ -0,0 +1,328 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff < %s | FileCheck %s \
+; RUN:      --check-prefix=SMALL64
+; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff --code-model=large < %s \
+; RUN:      | FileCheck %s --check-prefix=LARGE64
+
+ at ThreadLocalVarInit = thread_local(localexec) global i64 1, align 8
+ at VarInit = global i64 87, align 8
+ at IThreadLocalVarUninit = internal thread_local(localexec) global i64 0, align 8
+ at IThreadLocalVarInit = internal thread_local(localexec) global i64 1, align 8
+ at ThreadLocalVarUninit = thread_local(localexec) global i64 0, align 8
+declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
+
+define void @storeITLUninit(i64 noundef %x) {
+; SMALL64-LABEL: storeITLUninit:
+; SMALL64:       # %bb.0: # %entry
+; SMALL64-NEXT:    ld r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit
+; SMALL64-NEXT:    add r4, r13, r4
+; SMALL64-NEXT:    std r3, 0(r4)
+; SMALL64-NEXT:    blr
+;
+; LARGE64-LABEL: storeITLUninit:
+; LARGE64:       # %bb.0: # %entry
+; LARGE64-NEXT:    addis r4, L..C0 at u(r2)
+; LARGE64-NEXT:    ld r4, L..C0 at l(r4)
+; LARGE64-NEXT:    add r4, r13, r4
+; LARGE64-NEXT:    std r3, 0(r4)
+; LARGE64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarUninit)
+  store i64 %x, ptr %0, align 8
+  ret void
+}
+
+define void @storeITLInit(i64 noundef %x) {
+; SMALL64-LABEL: storeITLInit:
+; SMALL64:       # %bb.0: # %entry
+; SMALL64-NEXT:    ld r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit
+; SMALL64-NEXT:    add r4, r13, r4
+; SMALL64-NEXT:    std r3, 0(r4)
+; SMALL64-NEXT:    blr
+;
+; LARGE64-LABEL: storeITLInit:
+; LARGE64:       # %bb.0: # %entry
+; LARGE64-NEXT:    addis r4, L..C1 at u(r2)
+; LARGE64-NEXT:    ld r4, L..C1 at l(r4)
+; LARGE64-NEXT:    add r4, r13, r4
+; LARGE64-NEXT:    std r3, 0(r4)
+; LARGE64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit)
+  store i64 %x, ptr %0, align 8
+  ret void
+}
+
+define void @storeTLUninit(i64 noundef %x) {
+; SMALL64-LABEL: storeTLUninit:
+; SMALL64:       # %bb.0: # %entry
+; SMALL64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit
+; SMALL64-NEXT:    add r4, r13, r4
+; SMALL64-NEXT:    std r3, 0(r4)
+; SMALL64-NEXT:    blr
+;
+; LARGE64-LABEL: storeTLUninit:
+; LARGE64:       # %bb.0: # %entry
+; LARGE64-NEXT:    addis r4, L..C2 at u(r2)
+; LARGE64-NEXT:    ld r4, L..C2 at l(r4)
+; LARGE64-NEXT:    add r4, r13, r4
+; LARGE64-NEXT:    std r3, 0(r4)
+; LARGE64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarUninit)
+  store i64 %x, ptr %0, align 8
+  ret void
+}
+
+define void @storeTLInit(i64 noundef %x) {
+; SMALL64-LABEL: storeTLInit:
+; SMALL64:       # %bb.0: # %entry
+; SMALL64-NEXT:    ld r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit
+; SMALL64-NEXT:    add r4, r13, r4
+; SMALL64-NEXT:    std r3, 0(r4)
+; SMALL64-NEXT:    blr
+;
+; LARGE64-LABEL: storeTLInit:
+; LARGE64:       # %bb.0: # %entry
+; LARGE64-NEXT:    addis r4, L..C3 at u(r2)
+; LARGE64-NEXT:    ld r4, L..C3 at l(r4)
+; LARGE64-NEXT:    add r4, r13, r4
+; LARGE64-NEXT:    std r3, 0(r4)
+; LARGE64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit)
+  store i64 %x, ptr %0, align 8
+  ret void
+}
+
+define i64 @loadITLUninit() {
+; SMALL64-LABEL: loadITLUninit:
+; SMALL64:       # %bb.0: # %entry
+; SMALL64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit
+; SMALL64-NEXT:    add r3, r13, r3
+; SMALL64-NEXT:    ld r3, 0(r3)
+; SMALL64-NEXT:    blr
+;
+; LARGE64-LABEL: loadITLUninit:
+; LARGE64:       # %bb.0: # %entry
+; LARGE64-NEXT:    addis r3, L..C0 at u(r2)
+; LARGE64-NEXT:    ld r3, L..C0 at l(r3)
+; LARGE64-NEXT:    add r3, r13, r3
+; LARGE64-NEXT:    ld r3, 0(r3)
+; LARGE64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarUninit)
+  %1 = load i64, ptr %0, align 8
+  ret i64 %1
+}
+
+define i64 @loadITLUninit2() {
+; SMALL64-LABEL: loadITLUninit2:
+; SMALL64:       # %bb.0: # %entry
+; SMALL64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit
+; SMALL64-NEXT:    ld r4, L..C4(r2) # @VarInit
+; SMALL64-NEXT:    add r3, r13, r3
+; SMALL64-NEXT:    ld r4, 0(r4)
+; SMALL64-NEXT:    ld r3, 0(r3)
+; SMALL64-NEXT:    add r3, r4, r3
+; SMALL64-NEXT:    blr
+;
+; LARGE64-LABEL: loadITLUninit2:
+; LARGE64:       # %bb.0: # %entry
+; LARGE64-NEXT:    addis r3, L..C0 at u(r2)
+; LARGE64-NEXT:    addis r4, L..C4 at u(r2)
+; LARGE64-NEXT:    ld r3, L..C0 at l(r3)
+; LARGE64-NEXT:    ld r4, L..C4 at l(r4)
+; LARGE64-NEXT:    add r3, r13, r3
+; LARGE64-NEXT:    ld r4, 0(r4)
+; LARGE64-NEXT:    ld r3, 0(r3)
+; LARGE64-NEXT:    add r3, r4, r3
+; LARGE64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarUninit)
+  %1 = load i64, ptr %0, align 8
+  %2 = load i64, ptr @VarInit, align 8
+  %add = add nsw i64 %2, %1
+  ret i64 %add
+}
+
+define i64 @loadITLInit() {
+; SMALL64-LABEL: loadITLInit:
+; SMALL64:       # %bb.0: # %entry
+; SMALL64-NEXT:    ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit
+; SMALL64-NEXT:    add r3, r13, r3
+; SMALL64-NEXT:    ld r3, 0(r3)
+; SMALL64-NEXT:    blr
+;
+; LARGE64-LABEL: loadITLInit:
+; LARGE64:       # %bb.0: # %entry
+; LARGE64-NEXT:    addis r3, L..C1 at u(r2)
+; LARGE64-NEXT:    ld r3, L..C1 at l(r3)
+; LARGE64-NEXT:    add r3, r13, r3
+; LARGE64-NEXT:    ld r3, 0(r3)
+; LARGE64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit)
+  %1 = load i64, ptr %0, align 8
+  ret i64 %1
+}
+
+define i64 @loadITLInit2() {
+; SMALL64-LABEL: loadITLInit2:
+; SMALL64:       # %bb.0: # %entry
+; SMALL64-NEXT:    ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit
+; SMALL64-NEXT:    ld r4, L..C4(r2) # @VarInit
+; SMALL64-NEXT:    add r3, r13, r3
+; SMALL64-NEXT:    ld r4, 0(r4)
+; SMALL64-NEXT:    ld r3, 0(r3)
+; SMALL64-NEXT:    add r3, r4, r3
+; SMALL64-NEXT:    blr
+;
+; LARGE64-LABEL: loadITLInit2:
+; LARGE64:       # %bb.0: # %entry
+; LARGE64-NEXT:    addis r3, L..C1 at u(r2)
+; LARGE64-NEXT:    addis r4, L..C4 at u(r2)
+; LARGE64-NEXT:    ld r3, L..C1 at l(r3)
+; LARGE64-NEXT:    ld r4, L..C4 at l(r4)
+; LARGE64-NEXT:    add r3, r13, r3
+; LARGE64-NEXT:    ld r4, 0(r4)
+; LARGE64-NEXT:    ld r3, 0(r3)
+; LARGE64-NEXT:    add r3, r4, r3
+; LARGE64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit)
+  %1 = load i64, ptr %0, align 8
+  %2 = load i64, ptr @VarInit, align 8
+  %add = add nsw i64 %2, %1
+  ret i64 %add
+}
+
+define i64 @loadTLUninit() {
+; SMALL64-LABEL: loadTLUninit:
+; SMALL64:       # %bb.0: # %entry
+; SMALL64-NEXT:    ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit
+; SMALL64-NEXT:    add r3, r13, r3
+; SMALL64-NEXT:    ld r3, 0(r3)
+; SMALL64-NEXT:    blr
+;
+; LARGE64-LABEL: loadTLUninit:
+; LARGE64:       # %bb.0: # %entry
+; LARGE64-NEXT:    addis r3, L..C2 at u(r2)
+; LARGE64-NEXT:    ld r3, L..C2 at l(r3)
+; LARGE64-NEXT:    add r3, r13, r3
+; LARGE64-NEXT:    ld r3, 0(r3)
+; LARGE64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarUninit)
+  %1 = load i64, ptr %0, align 8
+  ret i64 %1
+}
+
+define i64 @loadTLUninit2() {
+; SMALL64-LABEL: loadTLUninit2:
+; SMALL64:       # %bb.0: # %entry
+; SMALL64-NEXT:    ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit
+; SMALL64-NEXT:    ld r4, L..C4(r2) # @VarInit
+; SMALL64-NEXT:    add r3, r13, r3
+; SMALL64-NEXT:    ld r4, 0(r4)
+; SMALL64-NEXT:    ld r3, 0(r3)
+; SMALL64-NEXT:    add r3, r4, r3
+; SMALL64-NEXT:    blr
+;
+; LARGE64-LABEL: loadTLUninit2:
+; LARGE64:       # %bb.0: # %entry
+; LARGE64-NEXT:    addis r3, L..C2 at u(r2)
+; LARGE64-NEXT:    addis r4, L..C4 at u(r2)
+; LARGE64-NEXT:    ld r3, L..C2 at l(r3)
+; LARGE64-NEXT:    ld r4, L..C4 at l(r4)
+; LARGE64-NEXT:    add r3, r13, r3
+; LARGE64-NEXT:    ld r4, 0(r4)
+; LARGE64-NEXT:    ld r3, 0(r3)
+; LARGE64-NEXT:    add r3, r4, r3
+; LARGE64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarUninit)
+  %1 = load i64, ptr %0, align 8
+  %2 = load i64, ptr @VarInit, align 8
+  %add = add nsw i64 %2, %1
+  ret i64 %add
+}
+
+define i64 @loadTLInit() {
+; SMALL64-LABEL: loadTLInit:
+; SMALL64:       # %bb.0: # %entry
+; SMALL64-NEXT:    ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit
+; SMALL64-NEXT:    add r3, r13, r3
+; SMALL64-NEXT:    ld r3, 0(r3)
+; SMALL64-NEXT:    blr
+;
+; LARGE64-LABEL: loadTLInit:
+; LARGE64:       # %bb.0: # %entry
+; LARGE64-NEXT:    addis r3, L..C3 at u(r2)
+; LARGE64-NEXT:    ld r3, L..C3 at l(r3)
+; LARGE64-NEXT:    add r3, r13, r3
+; LARGE64-NEXT:    ld r3, 0(r3)
+; LARGE64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit)
+  %1 = load i64, ptr %0, align 8
+  ret i64 %1
+}
+
+define i64 @loadTLInit2() {
+; SMALL64-LABEL: loadTLInit2:
+; SMALL64:       # %bb.0: # %entry
+; SMALL64-NEXT:    ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit
+; SMALL64-NEXT:    ld r4, L..C4(r2) # @VarInit
+; SMALL64-NEXT:    add r3, r13, r3
+; SMALL64-NEXT:    ld r4, 0(r4)
+; SMALL64-NEXT:    ld r3, 0(r3)
+; SMALL64-NEXT:    add r3, r4, r3
+; SMALL64-NEXT:    blr
+;
+; LARGE64-LABEL: loadTLInit2:
+; LARGE64:       # %bb.0: # %entry
+; LARGE64-NEXT:    addis r3, L..C3 at u(r2)
+; LARGE64-NEXT:    addis r4, L..C4 at u(r2)
+; LARGE64-NEXT:    ld r3, L..C3 at l(r3)
+; LARGE64-NEXT:    ld r4, L..C4 at l(r4)
+; LARGE64-NEXT:    add r3, r13, r3
+; LARGE64-NEXT:    ld r4, 0(r4)
+; LARGE64-NEXT:    ld r3, 0(r3)
+; LARGE64-NEXT:    add r3, r4, r3
+; LARGE64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit)
+  %1 = load i64, ptr %0, align 8
+  %2 = load i64, ptr @VarInit, align 8
+  %add = add nsw i64 %2, %1
+  ret i64 %add
+}
+
+; TOC Entry Checks.
+
+; SMALL64-LABEL: .toc
+; SMALL64-LABEL: L..C0:
+; SMALL64-NEXT: .tc IThreadLocalVarUninit[TC],IThreadLocalVarUninit[UL]@le
+; SMALL64-LABEL: L..C1:
+; SMALL64-NEXT: .tc IThreadLocalVarInit[TC],IThreadLocalVarInit[TL]@le
+; SMALL64-LABEL: L..C2:
+; SMALL64-NEXT: .tc ThreadLocalVarUninit[TC],ThreadLocalVarUninit[TL]@le
+; SMALL64-LABEL: L..C3:
+; SMALL64-NEXT: .tc ThreadLocalVarInit[TC],ThreadLocalVarInit[TL]@le
+; SMALL64-LABEL: L..C4:
+; SMALL64-NEXT: .tc VarInit[TC],VarInit[RW]
+
+; LARGE64-LABEL: .toc
+; LARGE64-LABEL: L..C0:
+; LARGE64-NEXT: .tc IThreadLocalVarUninit[TE],IThreadLocalVarUninit[UL]@le
+; LARGE64-LABEL: L..C1:
+; LARGE64-NEXT: .tc IThreadLocalVarInit[TE],IThreadLocalVarInit[TL]@le
+; LARGE64-LABEL: L..C2:
+; LARGE64-NEXT: .tc ThreadLocalVarUninit[TE],ThreadLocalVarUninit[TL]@le
+; LARGE64-LABEL: L..C3:
+; LARGE64-NEXT: .tc ThreadLocalVarInit[TE],ThreadLocalVarInit[TL]@le
+; LARGE64-LABEL: L..C4:
+; LARGE64-NEXT: .tc VarInit[TE],VarInit[RW]

diff  --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc-large.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc-large.ll
new file mode 100644
index 0000000000000..5404fdae4c03a
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc-large.ll
@@ -0,0 +1,218 @@
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec -mtriple powerpc64-ibm-aix-xcoff \
+; RUN:     -xcoff-traceback-table=false --code-model=large -filetype=obj -o %t.o < %s
+; RUN: llvm-readobj --relocs --expand-relocs %t.o | FileCheck --check-prefix=RELOC %s
+; RUN: llvm-readobj --syms %t.o | FileCheck --check-prefix=SYM %s
+; RUN: llvm-objdump -D -r --symbol-description %t.o | FileCheck --check-prefix=DIS %s
+
+ at ThreadLocalVarInit = thread_local(localexec) global i64 1, align 8
+ at VarInit = global i64 87, align 8
+ at IThreadLocalVarUninit = internal thread_local(localexec) global i64 0, align 8
+declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull)
+
+define void @storeITLUninit(i64 noundef %x) {
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarUninit)
+  store i64 %x, ptr %0, align 8
+  ret void
+}
+
+define i64 @loadTLInit() {
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit)
+  %1 = load i64, ptr %0, align 8
+  %2 = load i64, ptr @VarInit, align 8
+  %add = add nsw i64 %2, %1
+  ret i64 %add
+}
+
+; RELOC:      File: {{.*}}aix-tls-le-xcoff-reloc-large.ll.tmp.o
+; RELOC-NEXT: Format: aix5coff64-rs6000
+; RELOC-NEXT: Arch: powerpc64
+; RELOC-NEXT: AddressSize: 64bit
+; RELOC-NEXT: Relocations [
+; RELOC:       Virtual Address: 0x2
+; RELOC-NEXT:       Symbol: IThreadLocalVarUninit (15)
+; RELOC-NEXT:       IsSigned: No
+; RELOC-NEXT:       FixupBitValue: 0
+; RELOC-NEXT:       Length: 16
+; RELOC-NEXT:       Type: R_TOCU (0x30)
+; RELOC-NEXT:     }
+; RELOC:       Virtual Address: 0x6
+; RELOC-NEXT:       Symbol: IThreadLocalVarUninit (15)
+; RELOC-NEXT:       IsSigned: No
+; RELOC-NEXT:       FixupBitValue: 0
+; RELOC-NEXT:       Length: 16
+; RELOC-NEXT:       Type: R_TOCL (0x31)
+; RELOC-NEXT:     }
+; RELOC:       Virtual Address: 0x22
+; RELOC-NEXT:       Symbol: ThreadLocalVarInit (17)
+; RELOC-NEXT:       IsSigned: No
+; RELOC-NEXT:       FixupBitValue: 0
+; RELOC-NEXT:       Length: 16
+; RELOC-NEXT:       Type: R_TOCU (0x30)
+; RELOC-NEXT:     }
+; RELOC:       Virtual Address: 0x2A
+; RELOC-NEXT:       Symbol: ThreadLocalVarInit (17)
+; RELOC-NEXT:       IsSigned: No
+; RELOC-NEXT:       FixupBitValue: 0
+; RELOC-NEXT:       Length: 16
+; RELOC-NEXT:       Type: R_TOCL (0x31)
+; RELOC-NEXT:     }
+; RELOC:       Virtual Address: 0x80
+; RELOC-NEXT:       Symbol: IThreadLocalVarUninit (23)
+; RELOC-NEXT:       IsSigned: No
+; RELOC-NEXT:       FixupBitValue: 0
+; RELOC-NEXT:       Length: 64
+; RELOC-NEXT:       Type: R_TLS_LE (0x23)
+; RELOC-NEXT:     }
+; RELOC:       Virtual Address: 0x88
+; RELOC-NEXT:       Symbol: ThreadLocalVarInit (21)
+; RELOC-NEXT:       IsSigned: No
+; RELOC-NEXT:       FixupBitValue: 0
+; RELOC-NEXT:       Length: 64
+; RELOC-NEXT:       Type: R_TLS_LE (0x23)
+; RELOC-NEXT:     }
+
+; SYM:      File: {{.*}}aix-tls-le-xcoff-reloc-large.ll.tmp.o
+; SYM-NEXT: Format: aix5coff64-rs6000
+; SYM-NEXT: Arch: powerpc64
+; SYM-NEXT: AddressSize: 64bit
+; SYM-NEXT: Symbols [
+; SYM:     Index: 15
+; SYM-NEXT:     Name: IThreadLocalVarUninit
+; SYM-NEXT:     Value (RelocatableAddress): 0x80
+; SYM-NEXT:     Section: .data
+; SYM-NEXT:     Type: 0x0
+; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
+; SYM-NEXT:     NumberOfAuxEntries: 1
+; SYM-NEXT:     CSECT Auxiliary Entry {
+; SYM-NEXT:       Index: 16
+; SYM-NEXT:       SectionLen: 8
+; SYM-NEXT:       ParameterHashIndex: 0x0
+; SYM-NEXT:       TypeChkSectNum: 0x0
+; SYM-NEXT:       SymbolAlignmentLog2: 3
+; SYM-NEXT:       SymbolType: XTY_SD (0x1)
+; SYM-NEXT:       StorageMappingClass: XMC_TE (0x16)
+; SYM-NEXT:       Auxiliary Type: AUX_CSECT (0xFB)
+; SYM-NEXT:     }
+; SYM-NEXT:   }
+; SYM:     Index: 17
+; SYM-NEXT:     Name: ThreadLocalVarInit
+; SYM-NEXT:     Value (RelocatableAddress): 0x88
+; SYM-NEXT:     Section: .data
+; SYM-NEXT:     Type: 0x0
+; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
+; SYM-NEXT:     NumberOfAuxEntries: 1
+; SYM-NEXT:     CSECT Auxiliary Entry {
+; SYM-NEXT:       Index: 18
+; SYM-NEXT:       SectionLen: 8
+; SYM-NEXT:       ParameterHashIndex: 0x0
+; SYM-NEXT:       TypeChkSectNum: 0x0
+; SYM-NEXT:       SymbolAlignmentLog2: 3
+; SYM-NEXT:       SymbolType: XTY_SD (0x1)
+; SYM-NEXT:       StorageMappingClass: XMC_TE (0x16)
+; SYM-NEXT:       Auxiliary Type: AUX_CSECT (0xFB)
+; SYM-NEXT:     }
+; SYM-NEXT:   }
+; SYM:     Index: 21
+; SYM-NEXT:     Name: ThreadLocalVarInit
+; SYM-NEXT:     Value (RelocatableAddress): 0x0
+; SYM-NEXT:     Section: .tdata
+; SYM-NEXT:     Type: 0x0
+; SYM-NEXT:     StorageClass: C_EXT (0x2)
+; SYM-NEXT:     NumberOfAuxEntries: 1
+; SYM-NEXT:     CSECT Auxiliary Entry {
+; SYM-NEXT:       Index: 22
+; SYM-NEXT:       SectionLen: 8
+; SYM-NEXT:       ParameterHashIndex: 0x0
+; SYM-NEXT:       TypeChkSectNum: 0x0
+; SYM-NEXT:       SymbolAlignmentLog2: 3
+; SYM-NEXT:       SymbolType: XTY_SD (0x1)
+; SYM-NEXT:       StorageMappingClass: XMC_TL (0x14)
+; SYM-NEXT:       Auxiliary Type: AUX_CSECT (0xFB)
+; SYM-NEXT:     }
+; SYM-NEXT:   }
+; SYM:     Index: 23
+; SYM-NEXT:     Name: IThreadLocalVarUninit
+; SYM-NEXT:     Value (RelocatableAddress): 0x8
+; SYM-NEXT:     Section: .tbss
+; SYM-NEXT:     Type: 0x0
+; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
+; SYM-NEXT:     NumberOfAuxEntries: 1
+; SYM-NEXT:     CSECT Auxiliary Entry {
+; SYM-NEXT:       Index: 24
+; SYM-NEXT:       SectionLen: 8
+; SYM-NEXT:       ParameterHashIndex: 0x0
+; SYM-NEXT:       TypeChkSectNum: 0x0
+; SYM-NEXT:       SymbolAlignmentLog2: 3
+; SYM-NEXT:       SymbolType: XTY_CM (0x3)
+; SYM-NEXT:       StorageMappingClass: XMC_UL (0x15)
+; SYM-NEXT:       Auxiliary Type: AUX_CSECT (0xFB)
+; SYM-NEXT:     }
+; SYM-NEXT:   }
+
+; DIS:      {{.*}}aix-tls-le-xcoff-reloc-large.ll.tmp.o:	file format aix5coff64-rs6000
+; DIS:      Disassembly of section .text:
+; DIS:      0000000000000000 (idx: 3) .storeITLUninit:
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 4, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU	(idx: 15) IThreadLocalVarUninit[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 4, 0(4)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL	(idx: 15) IThreadLocalVarUninit[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                add 4, 13, 4
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                std 3, 0(4)
+; DIS-NEXT:                                       blr
+; DIS:      0000000000000020 (idx: 5) .loadTLInit:
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU       (idx: 17) ThreadLocalVarInit[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 4, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU       (idx: 19) VarInit[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 3, 8(3)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL       (idx: 17) ThreadLocalVarInit[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 4, 16(4)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL       (idx: 19) VarInit[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                add 3, 13, 3
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 4, 0(4)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                ld 3, 0(3)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                add 3, 4, 3
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                blr
+
+; DIS:      Disassembly of section .data:
+; DIS:      0000000000000048 (idx: 7) VarInit[RW]:
+; DIS-NEXT:       48: 00 00 00 00
+; DIS-NEXT:       4c: 00 00 00 57
+; DIS:      0000000000000050 (idx: 9) storeITLUninit[DS]:
+; DIS-NEXT:       50: 00 00 00 00
+; DIS-NEXT: 0000000000000050:  R_POS        (idx: 3) .storeITLUninit
+; DIS-NEXT:       54: 00 00 00 00
+; DIS-NEXT:       58: 00 00 00 00
+; DIS-NEXT: 0000000000000058:  R_POS        (idx: 13) TOC[TC0]
+; DIS-NEXT:       5c: 00 00 00 80
+; DIS:      0000000000000068 (idx: 11) loadTLInit[DS]:
+; DIS-NEXT:       68: 00 00 00 00
+; DIS-NEXT: 0000000000000068:  R_POS        (idx: 5) .loadTLInit
+; DIS-NEXT:       6c: 00 00 00 20
+; DIS-NEXT:       70: 00 00 00 00
+; DIS-NEXT: 0000000000000070:  R_POS        (idx: 13) TOC[TC0]
+; DIS-NEXT:       74: 00 00 00 80
+; DIS:      0000000000000080 (idx: 15) IThreadLocalVarUninit[TE]:
+; DIS-NEXT:       80: 00 00 00 00
+; DIS-NEXT: 0000000000000080:  R_TLS_LE     (idx: 23) IThreadLocalVarUninit[UL]
+; DIS-NEXT:       84: 00 00 00 00
+; DIS:      0000000000000088 (idx: 17) ThreadLocalVarInit[TE]:
+; DIS-NEXT:       88: 00 00 00 00
+; DIS-NEXT: 0000000000000088:  R_TLS_LE     (idx: 21) ThreadLocalVarInit[TL]
+; DIS-NEXT:       8c: 00 00 00 00
+; DIS:      0000000000000090 (idx: 19) VarInit[TE]:
+; DIS-NEXT:       90: 00 00 00 00
+; DIS-NEXT: 0000000000000090:  R_POS        (idx: 7) VarInit[RW]
+; DIS-NEXT:       94: 00 00 00 48
+
+; DIS:      Disassembly of section .tdata:
+; DIS:      0000000000000000 (idx: 21) ThreadLocalVarInit[TL]:
+; DIS-NEXT:        0: 00 00 00 00
+; DIS-NEXT:        4: 00 00 00 01
+
+; DIS:      Disassembly of section .tbss:
+; DIS:      0000000000000008 (idx: 23) IThreadLocalVarUninit[UL]:
+; DIS-NEXT: ...
+

diff  --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc.ll
new file mode 100644
index 0000000000000..73a81a3d6f681
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc.ll
@@ -0,0 +1,194 @@
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec -mtriple powerpc64-ibm-aix-xcoff \
+; RUN:     -xcoff-traceback-table=false -data-sections=false -filetype=obj -o %t.o < %s
+; RUN: llvm-readobj --relocs --expand-relocs %t.o | FileCheck --check-prefix=RELOC %s
+; RUN: llvm-readobj --syms %t.o | FileCheck --check-prefix=SYM %s
+; RUN: llvm-objdump -D -r --symbol-description %t.o | FileCheck --check-prefix=DIS %s
+
+ at ThreadLocalVarInit = thread_local(localexec) global i32 1, align 4
+ at VarInit = global i32 87, align 4
+ at IThreadLocalVarUninit = internal thread_local(localexec) global i32 0, align 4
+declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull)
+
+define void @storeITLUninit(i32 noundef signext %x) {
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarUninit)
+  store i32 %x, ptr %0, align 4
+  ret void
+}
+
+define signext i32 @loadTLInit() {
+entry:
+  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit)
+  %1 = load i32, ptr %0, align 4
+  %2 = load i32, ptr @VarInit, align 4
+  %add = add nsw i32 %2, %1
+  ret i32 %add
+}
+
+; RELOC:      File: {{.*}}aix-tls-le-xcoff-reloc.ll.tmp.o
+; RELOC-NEXT: Format: aix5coff64-rs6000
+; RELOC-NEXT: Arch: powerpc64
+; RELOC-NEXT: AddressSize: 64bit
+; RELOC-NEXT: Relocations [
+; RELOC:       Virtual Address: 0x2
+; RELOC-NEXT:       Symbol: IThreadLocalVarUninit (17)
+; RELOC-NEXT:       IsSigned: No
+; RELOC-NEXT:       FixupBitValue: 0
+; RELOC-NEXT:       Length: 16
+; RELOC-NEXT:       Type: R_TOC (0x3)
+; RELOC-NEXT:     }
+; RELOC:       Virtual Address: 0x12
+; RELOC-NEXT:       Symbol: ThreadLocalVarInit (19)
+; RELOC-NEXT:       IsSigned: No
+; RELOC-NEXT:       FixupBitValue: 0
+; RELOC-NEXT:       Length: 16
+; RELOC-NEXT:       Type: R_TOC (0x3)
+; RELOC-NEXT:     }
+; RELOC:       Virtual Address: 0x68
+; RELOC-NEXT:       Symbol: IThreadLocalVarUninit (27)
+; RELOC-NEXT:       IsSigned: No
+; RELOC-NEXT:       FixupBitValue: 0
+; RELOC-NEXT:       Length: 64
+; RELOC-NEXT:       Type: R_TLS_LE (0x23)
+; RELOC-NEXT:     }
+; RELOC:       Virtual Address: 0x70
+; RELOC-NEXT:       Symbol: ThreadLocalVarInit (25)
+; RELOC-NEXT:       IsSigned: No
+; RELOC-NEXT:       FixupBitValue: 0
+; RELOC-NEXT:       Length: 64
+; RELOC-NEXT:       Type: R_TLS_LE (0x23)
+; RELOC-NEXT:     }
+
+; SYM:      File: {{.*}}aix-tls-le-xcoff-reloc.ll.tmp.o
+; SYM-NEXT: Format: aix5coff64-rs6000
+; SYM-NEXT: Arch: powerpc64
+; SYM-NEXT: AddressSize: 64bit
+; SYM-NEXT: Symbols [
+; SYM:     Index: 17
+; SYM-NEXT:     Name: IThreadLocalVarUninit
+; SYM-NEXT:     Value (RelocatableAddress): 0x68
+; SYM-NEXT:     Section: .data
+; SYM-NEXT:     Type: 0x0
+; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
+; SYM-NEXT:     NumberOfAuxEntries: 1
+; SYM-NEXT:     CSECT Auxiliary Entry {
+; SYM-NEXT:       Index: 18
+; SYM-NEXT:       SectionLen: 8
+; SYM-NEXT:       ParameterHashIndex: 0x0
+; SYM-NEXT:       TypeChkSectNum: 0x0
+; SYM-NEXT:       SymbolAlignmentLog2: 3
+; SYM-NEXT:       SymbolType: XTY_SD (0x1)
+; SYM-NEXT:       StorageMappingClass: XMC_TC (0x3)
+; SYM-NEXT:       Auxiliary Type: AUX_CSECT (0xFB)
+; SYM-NEXT:     }
+; SYM-NEXT:   }
+; SYM:     Index: 19
+; SYM-NEXT:     Name: ThreadLocalVarInit
+; SYM-NEXT:     Value (RelocatableAddress): 0x70
+; SYM-NEXT:     Section: .data
+; SYM-NEXT:     Type: 0x0
+; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
+; SYM-NEXT:     NumberOfAuxEntries: 1
+; SYM-NEXT:     CSECT Auxiliary Entry {
+; SYM-NEXT:       Index: 20
+; SYM-NEXT:       SectionLen: 8
+; SYM-NEXT:       ParameterHashIndex: 0x0
+; SYM-NEXT:       TypeChkSectNum: 0x0
+; SYM-NEXT:       SymbolAlignmentLog2: 3
+; SYM-NEXT:       SymbolType: XTY_SD (0x1)
+; SYM-NEXT:       StorageMappingClass: XMC_TC (0x3)
+; SYM-NEXT:       Auxiliary Type: AUX_CSECT (0xFB)
+; SYM-NEXT:     }
+; SYM-NEXT:   }
+; SYM:     Index: 25
+; SYM-NEXT:     Name: ThreadLocalVarInit
+; SYM-NEXT:     Value (RelocatableAddress): 0x0
+; SYM-NEXT:     Section: .tdata
+; SYM-NEXT:     Type: 0x0
+; SYM-NEXT:     StorageClass: C_EXT (0x2)
+; SYM-NEXT:     NumberOfAuxEntries: 1
+; SYM-NEXT:     CSECT Auxiliary Entry {
+; SYM-NEXT:       Index: 26
+; SYM-NEXT:       ContainingCsectSymbolIndex: 23
+; SYM-NEXT:       ParameterHashIndex: 0x0
+; SYM-NEXT:       TypeChkSectNum: 0x0
+; SYM-NEXT:       SymbolAlignmentLog2: 0
+; SYM-NEXT:       SymbolType: XTY_LD (0x2)
+; SYM-NEXT:       StorageMappingClass: XMC_TL (0x14)
+; SYM-NEXT:       Auxiliary Type: AUX_CSECT (0xFB)
+; SYM-NEXT:     }
+; SYM-NEXT:   }
+; SYM:     Index: 27
+; SYM-NEXT:     Name: IThreadLocalVarUninit
+; SYM-NEXT:     Value (RelocatableAddress): 0x4
+; SYM-NEXT:     Section: .tbss
+; SYM-NEXT:     Type: 0x0
+; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
+; SYM-NEXT:     NumberOfAuxEntries: 1
+; SYM-NEXT:     CSECT Auxiliary Entry {
+; SYM-NEXT:       Index: 28
+; SYM-NEXT:       SectionLen: 4
+; SYM-NEXT:       ParameterHashIndex: 0x0
+; SYM-NEXT:       TypeChkSectNum: 0x0
+; SYM-NEXT:       SymbolAlignmentLog2: 2
+; SYM-NEXT:       SymbolType: XTY_CM (0x3)
+; SYM-NEXT:       StorageMappingClass: XMC_UL (0x15)
+; SYM-NEXT:       Auxiliary Type: AUX_CSECT (0xFB)
+; SYM-NEXT:     }
+; SYM-NEXT:   }
+
+; DIS:      {{.*}}aix-tls-le-xcoff-reloc.ll.tmp.o:	file format aix5coff64-rs6000
+; DIS:      Disassembly of section .text:
+; DIS:      0000000000000000 (idx: 3) .storeITLUninit:
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               ld 4, 0(2)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC        (idx: 17) IThreadLocalVarUninit[TC]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               add 4, 13, 4
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               stw 3, 0(4)
+; DIS-NEXT:                                      blr
+; DIS:      0000000000000010 (idx: 5) .loadTLInit:
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               ld 3, 8(2)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC        (idx: 19) ThreadLocalVarInit[TC]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               ld 4, 16(2)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC        (idx: 21) VarInit[TC]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               add 3, 13, 3
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 4, 0(4)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 3, 0(3)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               add 3, 4, 3
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               extsw 3, 3
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               blr
+
+; DIS:      Disassembly of section .data:
+; DIS:      0000000000000030 (idx: 9) VarInit:
+; DIS-NEXT:       30: 00 00 00 57
+; DIS:      0000000000000038 (idx: 11) storeITLUninit[DS]:
+; DIS-NEXT:       8: 00 00 00 00
+; DIS-NEXT: 0000000000000038:  R_POS        (idx: 3) .storeITLUninit
+; DIS-NEXT:       3c: 00 00 00 00
+; DIS-NEXT:       40: 00 00 00 00
+; DIS-NEXT: 0000000000000040:  R_POS        (idx: 15) TOC[TC0]
+; DIS-NEXT:       44: 00 00 00 68
+; DIS:      0000000000000050 (idx: 13) loadTLInit[DS]:
+; DIS-NEXT:       50: 00 00 00 00
+; DIS-NEXT: 0000000000000050:  R_POS        (idx: 5) .loadTLInit
+; DIS-NEXT:       54: 00 00 00 10
+; DIS-NEXT:       58: 00 00 00 00
+; DIS-NEXT: 0000000000000058:  R_POS        (idx: 15) TOC[TC0]
+; DIS-NEXT:       5c: 00 00 00 68
+; DIS:      0000000000000068 (idx: 17) IThreadLocalVarUninit[TC]:
+; DIS-NEXT:       68: 00 00 00 00
+; DIS-NEXT: 0000000000000068:  R_TLS_LE     (idx: 27) IThreadLocalVarUninit[UL]
+; DIS:      0000000000000070 (idx: 19) ThreadLocalVarInit[TC]:
+; DIS-NEXT:       70: 00 00 00 00
+; DIS-NEXT: 0000000000000070:  R_TLS_LE     (idx: 25) ThreadLocalVarInit
+; DIS:      0000000000000078 (idx: 21) VarInit[TC]:
+; DIS-NEXT:       78: 00 00 00 00
+; DIS-NEXT: 0000000000000078:  R_POS        (idx: 9) VarInit
+
+; DIS:      Disassembly of section .tdata:
+; DIS:      0000000000000000 (idx: 25) ThreadLocalVarInit:
+; DIS-NEXT:        0: 00 00 00 01
+
+; DIS:      Disassembly of section .tbss:
+; DIS:      0000000000000004 (idx: 27) IThreadLocalVarUninit[UL]:
+; DIS-NEXT: ...
+


        


More information about the llvm-commits mailing list