[llvm] f5ae075 - [AIX][TLS] Generate 32-bit local-exec access code sequence
Amy Kwan via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 20 09:59:06 PDT 2023
Author: Amy Kwan
Date: 2023-06-20T11:57:38-05:00
New Revision: f5ae07504846ec967ee8ac51198f17e76632b01b
URL: https://github.com/llvm/llvm-project/commit/f5ae07504846ec967ee8ac51198f17e76632b01b
DIFF: https://github.com/llvm/llvm-project/commit/f5ae07504846ec967ee8ac51198f17e76632b01b.diff
LOG: [AIX][TLS] Generate 32-bit local-exec access code sequence
This patch adds support for the TLS local-exec access model on AIX to allow
for the ability to generate the 32-bit (specifically, non-optimized) code sequence.
This work is a follow up of D149722.
The particular sequence that is generated for this sequence is as follows:
```
.tc var[TC],var[TL]@le. // variable offset, with the le relocation specifier
bla .__get_tpointer() // get the thread pointer, modifies r3
lwz reg1, var[TC](2) // load the variable offset
add reg2, r3, reg1 // add the variable offset to the retrieved thread pointer
```
Differential Revision: https://reviews.llvm.org/D152669
Added:
llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc-large32.ll
llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc32.ll
Modified:
llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/lib/Target/PowerPC/PPCISelLowering.h
llvm/lib/Target/PowerPC/PPCInstrInfo.td
llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-double.ll
llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-float.ll
llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-int.ll
llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-longlong.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 2a192e6555912..c3c52a3fb2e4f 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -196,6 +196,7 @@ class PPCAsmPrinter : public AsmPrinter {
void LowerSTACKMAP(StackMaps &SM, const MachineInstr &MI);
void LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI);
void EmitTlsCall(const MachineInstr *MI, MCSymbolRefExpr::VariantKind VK);
+ void EmitAIXTlsCallHelper(const MachineInstr *MI);
bool runOnMachineFunction(MachineFunction &MF) override {
Subtarget = &MF.getSubtarget<PPCSubtarget>();
bool Changed = AsmPrinter::runOnMachineFunction(MF);
@@ -611,13 +612,26 @@ void PPCAsmPrinter::LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI) {
/// This helper function creates the TlsGetAddr MCSymbol for AIX. We will
/// create the csect and use the qual-name symbol instead of creating just the
/// external symbol.
-static MCSymbol *createMCSymbolForTlsGetAddr(MCContext &Ctx) {
+static MCSymbol *createMCSymbolForTlsGetAddr(MCContext &Ctx, unsigned MIOpc) {
+ StringRef SymName =
+ MIOpc == PPC::GETtlsTpointer32AIX ? ".__get_tpointer" : ".__tls_get_addr";
return Ctx
- .getXCOFFSection(".__tls_get_addr", SectionKind::getText(),
+ .getXCOFFSection(SymName, SectionKind::getText(),
XCOFF::CsectProperties(XCOFF::XMC_PR, XCOFF::XTY_ER))
->getQualNameSymbol();
}
+void PPCAsmPrinter::EmitAIXTlsCallHelper(const MachineInstr *MI) {
+ assert(Subtarget->isAIXABI() &&
+ "Only expecting to emit calls to get the thread pointer on AIX!");
+
+ MCSymbol *TlsCall = createMCSymbolForTlsGetAddr(OutContext, MI->getOpcode());
+ const MCExpr *TlsRef =
+ MCSymbolRefExpr::create(TlsCall, MCSymbolRefExpr::VK_None, OutContext);
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BLA).addExpr(TlsRef));
+ return;
+}
+
/// EmitTlsCall -- Given a GETtls[ld]ADDR[32] instruction, print a
/// call to __tls_get_addr to the current output stream.
void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
@@ -652,10 +666,7 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
assert(MI->getOperand(2).isReg() &&
MI->getOperand(2).getReg() == VarOffsetReg &&
"GETtls[ld]ADDR[32] must read GPR4");
- MCSymbol *TlsGetAddr = createMCSymbolForTlsGetAddr(OutContext);
- const MCExpr *TlsRef = MCSymbolRefExpr::create(
- TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext);
- EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BLA).addExpr(TlsRef));
+ EmitAIXTlsCallHelper(MI);
return;
}
@@ -1357,6 +1368,12 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
EmitTlsCall(MI, MCSymbolRefExpr::VK_PPC_TLSGD);
return;
}
+ case PPC::GETtlsTpointer32AIX: {
+ // Transform: %r3 = GETtlsTpointer32AIX
+ // Into: BLA .__get_tpointer()
+ EmitAIXTlsCallHelper(MI);
+ return;
+ }
case PPC::ADDIStlsldHA: {
// Transform: %xd = ADDIStlsldHA %x2, @sym
// Into: %xd = ADDIS8 %x2, sym at got@tlsld at ha
@@ -2776,11 +2793,13 @@ void PPCAIXAsmPrinter::emitInstruction(const MachineInstr *MI) {
MMI->hasDebugInfo());
break;
}
+ case PPC::GETtlsTpointer32AIX:
case PPC::GETtlsADDR64AIX:
case PPC::GETtlsADDR32AIX: {
- // The reference to .__tls_get_addr is unknown to the assembler
- // so we need to emit an external symbol reference.
- MCSymbol *TlsGetAddr = createMCSymbolForTlsGetAddr(OutContext);
+ // A reference to .__tls_get_addr/.__get_tpointer is unknown to the
+ // assembler so we need to emit an external symbol reference.
+ MCSymbol *TlsGetAddr =
+ createMCSymbolForTlsGetAddr(OutContext, MI->getOpcode());
ExtSymSDNodeSymbols.insert(TlsGetAddr);
break;
}
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index f4eb89dda19ce..599e5762a19a8 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1738,6 +1738,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
+ case PPCISD::GET_TPOINTER: return "PPCISD::GET_TPOINTER";
case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
case PPCISD::TLSGD_AIX: return "PPCISD::TLSGD_AIX";
case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
@@ -3330,22 +3331,28 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
if (Model == TLSModel::LocalExec) {
- if (Is64Bit) {
+ SDValue VariableOffsetTGA =
+ DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_FLAG);
+ SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
+ SDValue TLSReg;
+ if (Is64Bit)
// For local-exec on AIX (64-bit), the sequence that is generated involves
// a load of the variable offset (from the TOC), followed by an add of the
// loaded variable offset to R13 (the thread pointer).
// This code sequence looks like:
// ld reg1,var[TC](2)
// add reg2, reg1, r13 // r13 contains the thread pointer
- SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
- SDValue VariableOffsetTGA =
- DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_FLAG);
- SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
- return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset);
- } else {
- report_fatal_error("On AIX, the local-exec TLS model is only supported "
- "on PPC64 for now.");
- }
+ TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
+ else
+ // For local-exec on AIX (32-bit), the sequence that is generated involves
+ // loading the variable offset from the TOC, generating a call to
+ // .__get_tpointer to get the thread pointer (which will be in R3), and
+ // adding the two together:
+ // lwz reg1,var[TC](2)
+ // bla .__get_tpointer
+ // add reg2, reg1, r3
+ TLSReg = DAG.getNode(PPCISD::GET_TPOINTER, dl, PtrVT);
+ return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset);
}
// The Local-Exec and General-Dynamic TLS models are currently the only
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 2e2514a5e34a4..e6ebc68008fb7 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -356,6 +356,11 @@ namespace llvm {
/// ADDIS_TLSGD_L_ADDR until after register assignment.
GET_TLS_ADDR,
+ /// %x3 = GET_TPOINTER - Used for the local-exec TLS model on 32-bit AIX,
+ /// produces a call to .__get_tpointer to retrieve the thread pointer
+ /// At the end of the call, the thread pointer is found in R3.
+ GET_TPOINTER,
+
/// G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that
/// combines ADDI_TLSGD_L and GET_TLS_ADDR until expansion following
/// register assignment.
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index a8c27d0cf6a5a..25b2f9b1942fa 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -213,6 +213,7 @@ def PPCaddTls : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>;
def PPCaddisTlsgdHA : SDNode<"PPCISD::ADDIS_TLSGD_HA", SDTIntBinOp>;
def PPCaddiTlsgdL : SDNode<"PPCISD::ADDI_TLSGD_L", SDTIntBinOp>;
def PPCgetTlsAddr : SDNode<"PPCISD::GET_TLS_ADDR", SDTIntBinOp>;
+def PPCgetTpointer : SDNode<"PPCISD::GET_TPOINTER", SDTIntLeaf, []>;
def PPCaddiTlsgdLAddr : SDNode<"PPCISD::ADDI_TLSGD_L_ADDR",
SDTypeProfile<1, 3, [
SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
@@ -3142,6 +3143,24 @@ def GETtlsADDR32AIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$offset, gprc
"GETtlsADDR32AIX",
[(set i32:$rD,
(PPCgetTlsAddr i32:$offset, i32:$handle))]>;
+
+// For local-exec accesses on 32-bit AIX, a call to .__get_tpointer is
+// generated to retrieve the thread pointer. GETtlsTpointer32AIX clobbers both
+// R3 and the LR (link register).
+let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
+ Defs = [R3,LR] in
+def GETtlsTpointer32AIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins),
+ "GETtlsTpointer32AIX",
+ [(set i32:$rD, (PPCgetTpointer))]>;
+
+// The following pattern matches local-exec TLS accesses on 32-bit AIX.
+// PPCaddTls is used in local-exec accesses in order to:
+// - Get the address of a variable (add the variable offset to the thread
+// pointer, retrieved by calling .__get_tpointer).
+// - Create an opportunity to optimize the user of the loaded address.
+def : Pat<(PPCaddTls i32:$in, i32:$addr),
+ (ADD4TLS $in, $addr)>;
+
// Combined op for ADDItlsgdL32 and GETtlsADDR32, late expanded. R3 and LR
// are true defines while the rest of the Defs are clobbers.
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 59e8f3ff84a4b..9518d5347065c 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -56,13 +56,16 @@ namespace {
I != IE;) {
MachineInstr &MI = *I;
IsPCREL = isPCREL(MI);
+ // There are a number of slight
diff erences in code generation
+ // when we call .__get_tpointer (32-bit AIX TLS).
+ bool IsTLSTPRelMI = MI.getOpcode() == PPC::GETtlsTpointer32AIX;
if (MI.getOpcode() != PPC::ADDItlsgdLADDR &&
MI.getOpcode() != PPC::ADDItlsldLADDR &&
MI.getOpcode() != PPC::ADDItlsgdLADDR32 &&
MI.getOpcode() != PPC::ADDItlsldLADDR32 &&
MI.getOpcode() != PPC::TLSGDAIX &&
- MI.getOpcode() != PPC::TLSGDAIX8 && !IsPCREL) {
+ MI.getOpcode() != PPC::TLSGDAIX8 && !IsTLSTPRelMI && !IsPCREL) {
// Although we create ADJCALLSTACKDOWN and ADJCALLSTACKUP
// as scheduling fences, we skip creating fences if we already
// have existing ADJCALLSTACKDOWN/UP to avoid nesting,
@@ -82,7 +85,7 @@ namespace {
Register InReg = PPC::NoRegister;
Register GPR3 = Is64Bit ? PPC::X3 : PPC::R3;
Register GPR4 = Is64Bit ? PPC::X4 : PPC::R4;
- if (!IsPCREL)
+ if (!IsPCREL && !IsTLSTPRelMI)
InReg = MI.getOperand(1).getReg();
DebugLoc DL = MI.getDebugLoc();
@@ -116,6 +119,12 @@ namespace {
// set Opc2 here.
Opc2 = PPC::GETtlsADDR32AIX;
break;
+ case PPC::GETtlsTpointer32AIX:
+ // GETtlsTpointer32AIX is expanded to a call to GET_TPOINTER on AIX
+ // 32-bit mode within PPCAsmPrinter. This instruction does not need
+ // to change, so Opc2 is set to the same instruction opcode.
+ Opc2 = PPC::GETtlsTpointer32AIX;
+ break;
case PPC::PADDI8pc:
assert(IsPCREL && "Expecting General/Local Dynamic PCRel");
Opc1 = PPC::PADDI8pc;
@@ -138,11 +147,17 @@ namespace {
if (IsAIX) {
// The variable offset and region handle are copied in r4 and r3. The
// copies are followed by GETtlsADDR32AIX/GETtlsADDR64AIX.
- BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR4)
- .addReg(MI.getOperand(1).getReg());
- BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR3)
- .addReg(MI.getOperand(2).getReg());
- BuildMI(MBB, I, DL, TII->get(Opc2), GPR3).addReg(GPR3).addReg(GPR4);
+ if (!IsTLSTPRelMI) {
+ BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR4)
+ .addReg(MI.getOperand(1).getReg());
+ BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR3)
+ .addReg(MI.getOperand(2).getReg());
+ BuildMI(MBB, I, DL, TII->get(Opc2), GPR3).addReg(GPR3).addReg(GPR4);
+ } else
+ // The opcode of GETtlsTpointer32AIX does not change, because later
+ // this instruction will be expanded into a call to .__get_tpointer,
+ // which will return the thread pointer into r3.
+ BuildMI(MBB, I, DL, TII->get(Opc2), GPR3);
} else {
MachineInstr *Addi;
if (IsPCREL) {
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-double.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-double.ll
index 2e0697d347226..44905418a4d52 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-double.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-double.ll
@@ -5,6 +5,12 @@
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large < %s \
; RUN: | FileCheck %s --check-prefix=LARGE64
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN: -mtriple powerpc-ibm-aix-xcoff < %s | FileCheck %s \
+; RUN: --check-prefix=SMALL32
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN: -mtriple powerpc-ibm-aix-xcoff --code-model=large < %s \
+; RUN: | FileCheck %s --check-prefix=LARGE32
@ThreadLocalVarInit = thread_local(localexec) global double 0x4021947AE147AE14, align 8
@VarInit = global double 8.787000e+01, align 8
@@ -28,6 +34,35 @@ define void @storeITLUninit(double noundef %x) {
; LARGE64-NEXT: add r3, r13, r3
; LARGE64-NEXT: stfd f1, 0(r3)
; LARGE64-NEXT: blr
+;
+; SMALL32-LABEL: storeITLUninit:
+; SMALL32: # %bb.0: # %entry
+; SMALL32-NEXT: mflr r0
+; SMALL32-NEXT: stwu r1, -32(r1)
+; SMALL32-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit
+; SMALL32-NEXT: bla .__get_tpointer[PR]
+; SMALL32-NEXT: stw r0, 40(r1)
+; SMALL32-NEXT: add r3, r3, r4
+; SMALL32-NEXT: stfd f1, 0(r3)
+; SMALL32-NEXT: addi r1, r1, 32
+; SMALL32-NEXT: lwz r0, 8(r1)
+; SMALL32-NEXT: mtlr r0
+; SMALL32-NEXT: blr
+;
+; LARGE32-LABEL: storeITLUninit:
+; LARGE32: # %bb.0: # %entry
+; LARGE32-NEXT: mflr r0
+; LARGE32-NEXT: stwu r1, -32(r1)
+; LARGE32-NEXT: stw r0, 40(r1)
+; LARGE32-NEXT: addis r3, L..C0 at u(r2)
+; LARGE32-NEXT: lwz r4, L..C0 at l(r3)
+; LARGE32-NEXT: bla .__get_tpointer[PR]
+; LARGE32-NEXT: add r3, r3, r4
+; LARGE32-NEXT: stfd f1, 0(r3)
+; LARGE32-NEXT: addi r1, r1, 32
+; LARGE32-NEXT: lwz r0, 8(r1)
+; LARGE32-NEXT: mtlr r0
+; LARGE32-NEXT: blr
entry:
%0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarUninit)
store double %x, ptr %0, align 8
@@ -49,6 +84,35 @@ define void @storeITLInit(double noundef %x) {
; LARGE64-NEXT: add r3, r13, r3
; LARGE64-NEXT: stfd f1, 0(r3)
; LARGE64-NEXT: blr
+;
+; SMALL32-LABEL: storeITLInit:
+; SMALL32: # %bb.0: # %entry
+; SMALL32-NEXT: mflr r0
+; SMALL32-NEXT: stwu r1, -32(r1)
+; SMALL32-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit
+; SMALL32-NEXT: bla .__get_tpointer[PR]
+; SMALL32-NEXT: stw r0, 40(r1)
+; SMALL32-NEXT: add r3, r3, r4
+; SMALL32-NEXT: stfd f1, 0(r3)
+; SMALL32-NEXT: addi r1, r1, 32
+; SMALL32-NEXT: lwz r0, 8(r1)
+; SMALL32-NEXT: mtlr r0
+; SMALL32-NEXT: blr
+;
+; LARGE32-LABEL: storeITLInit:
+; LARGE32: # %bb.0: # %entry
+; LARGE32-NEXT: mflr r0
+; LARGE32-NEXT: stwu r1, -32(r1)
+; LARGE32-NEXT: stw r0, 40(r1)
+; LARGE32-NEXT: addis r3, L..C1 at u(r2)
+; LARGE32-NEXT: lwz r4, L..C1 at l(r3)
+; LARGE32-NEXT: bla .__get_tpointer[PR]
+; LARGE32-NEXT: add r3, r3, r4
+; LARGE32-NEXT: stfd f1, 0(r3)
+; LARGE32-NEXT: addi r1, r1, 32
+; LARGE32-NEXT: lwz r0, 8(r1)
+; LARGE32-NEXT: mtlr r0
+; LARGE32-NEXT: blr
entry:
%0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit)
store double %x, ptr %0, align 8
@@ -70,6 +134,35 @@ define void @storeTLUninit(double noundef %x) {
; LARGE64-NEXT: add r3, r13, r3
; LARGE64-NEXT: stfd f1, 0(r3)
; LARGE64-NEXT: blr
+;
+; SMALL32-LABEL: storeTLUninit:
+; SMALL32: # %bb.0: # %entry
+; SMALL32-NEXT: mflr r0
+; SMALL32-NEXT: stwu r1, -32(r1)
+; SMALL32-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit
+; SMALL32-NEXT: bla .__get_tpointer[PR]
+; SMALL32-NEXT: stw r0, 40(r1)
+; SMALL32-NEXT: add r3, r3, r4
+; SMALL32-NEXT: stfd f1, 0(r3)
+; SMALL32-NEXT: addi r1, r1, 32
+; SMALL32-NEXT: lwz r0, 8(r1)
+; SMALL32-NEXT: mtlr r0
+; SMALL32-NEXT: blr
+;
+; LARGE32-LABEL: storeTLUninit:
+; LARGE32: # %bb.0: # %entry
+; LARGE32-NEXT: mflr r0
+; LARGE32-NEXT: stwu r1, -32(r1)
+; LARGE32-NEXT: stw r0, 40(r1)
+; LARGE32-NEXT: addis r3, L..C2 at u(r2)
+; LARGE32-NEXT: lwz r4, L..C2 at l(r3)
+; LARGE32-NEXT: bla .__get_tpointer[PR]
+; LARGE32-NEXT: add r3, r3, r4
+; LARGE32-NEXT: stfd f1, 0(r3)
+; LARGE32-NEXT: addi r1, r1, 32
+; LARGE32-NEXT: lwz r0, 8(r1)
+; LARGE32-NEXT: mtlr r0
+; LARGE32-NEXT: blr
entry:
%0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarUninit)
store double %x, ptr %0, align 8
@@ -91,6 +184,35 @@ define void @storeTLInit(double noundef %x) {
; LARGE64-NEXT: add r3, r13, r3
; LARGE64-NEXT: stfd f1, 0(r3)
; LARGE64-NEXT: blr
+;
+; SMALL32-LABEL: storeTLInit:
+; SMALL32: # %bb.0: # %entry
+; SMALL32-NEXT: mflr r0
+; SMALL32-NEXT: stwu r1, -32(r1)
+; SMALL32-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit
+; SMALL32-NEXT: bla .__get_tpointer[PR]
+; SMALL32-NEXT: stw r0, 40(r1)
+; SMALL32-NEXT: add r3, r3, r4
+; SMALL32-NEXT: stfd f1, 0(r3)
+; SMALL32-NEXT: addi r1, r1, 32
+; SMALL32-NEXT: lwz r0, 8(r1)
+; SMALL32-NEXT: mtlr r0
+; SMALL32-NEXT: blr
+;
+; LARGE32-LABEL: storeTLInit:
+; LARGE32: # %bb.0: # %entry
+; LARGE32-NEXT: mflr r0
+; LARGE32-NEXT: stwu r1, -32(r1)
+; LARGE32-NEXT: stw r0, 40(r1)
+; LARGE32-NEXT: addis r3, L..C3 at u(r2)
+; LARGE32-NEXT: lwz r4, L..C3 at l(r3)
+; LARGE32-NEXT: bla .__get_tpointer[PR]
+; LARGE32-NEXT: add r3, r3, r4
+; LARGE32-NEXT: stfd f1, 0(r3)
+; LARGE32-NEXT: addi r1, r1, 32
+; LARGE32-NEXT: lwz r0, 8(r1)
+; LARGE32-NEXT: mtlr r0
+; LARGE32-NEXT: blr
entry:
%0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit)
store double %x, ptr %0, align 8
@@ -112,6 +234,35 @@ define double @loadITLUninit() {
; LARGE64-NEXT: add r3, r13, r3
; LARGE64-NEXT: lfd f1, 0(r3)
; LARGE64-NEXT: blr
+;
+; SMALL32-LABEL: loadITLUninit:
+; SMALL32: # %bb.0: # %entry
+; SMALL32-NEXT: mflr r0
+; SMALL32-NEXT: stwu r1, -32(r1)
+; SMALL32-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit
+; SMALL32-NEXT: bla .__get_tpointer[PR]
+; SMALL32-NEXT: stw r0, 40(r1)
+; SMALL32-NEXT: add r3, r3, r4
+; SMALL32-NEXT: lfd f1, 0(r3)
+; SMALL32-NEXT: addi r1, r1, 32
+; SMALL32-NEXT: lwz r0, 8(r1)
+; SMALL32-NEXT: mtlr r0
+; SMALL32-NEXT: blr
+;
+; LARGE32-LABEL: loadITLUninit:
+; LARGE32: # %bb.0: # %entry
+; LARGE32-NEXT: mflr r0
+; LARGE32-NEXT: stwu r1, -32(r1)
+; LARGE32-NEXT: stw r0, 40(r1)
+; LARGE32-NEXT: addis r3, L..C0 at u(r2)
+; LARGE32-NEXT: lwz r4, L..C0 at l(r3)
+; LARGE32-NEXT: bla .__get_tpointer[PR]
+; LARGE32-NEXT: add r3, r3, r4
+; LARGE32-NEXT: lfd f1, 0(r3)
+; LARGE32-NEXT: addi r1, r1, 32
+; LARGE32-NEXT: lwz r0, 8(r1)
+; LARGE32-NEXT: mtlr r0
+; LARGE32-NEXT: blr
entry:
%0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarUninit)
%1 = load double, ptr %0, align 8
@@ -140,6 +291,42 @@ define double @loadITLUninit2() {
; LARGE64-NEXT: lfd f0, 0(r3)
; LARGE64-NEXT: xsadddp f1, f0, f1
; LARGE64-NEXT: blr
+;
+; SMALL32-LABEL: loadITLUninit2:
+; SMALL32: # %bb.0: # %entry
+; SMALL32-NEXT: mflr r0
+; SMALL32-NEXT: stwu r1, -32(r1)
+; SMALL32-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit
+; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit
+; SMALL32-NEXT: bla .__get_tpointer[PR]
+; SMALL32-NEXT: stw r0, 40(r1)
+; SMALL32-NEXT: add r3, r3, r4
+; SMALL32-NEXT: lfd f1, 0(r5)
+; SMALL32-NEXT: lfd f0, 0(r3)
+; SMALL32-NEXT: xsadddp f1, f0, f1
+; SMALL32-NEXT: addi r1, r1, 32
+; SMALL32-NEXT: lwz r0, 8(r1)
+; SMALL32-NEXT: mtlr r0
+; SMALL32-NEXT: blr
+;
+; LARGE32-LABEL: loadITLUninit2:
+; LARGE32: # %bb.0: # %entry
+; LARGE32-NEXT: mflr r0
+; LARGE32-NEXT: stwu r1, -32(r1)
+; LARGE32-NEXT: stw r0, 40(r1)
+; LARGE32-NEXT: addis r3, L..C0 at u(r2)
+; LARGE32-NEXT: lwz r4, L..C0 at l(r3)
+; LARGE32-NEXT: bla .__get_tpointer[PR]
+; LARGE32-NEXT: add r3, r3, r4
+; LARGE32-NEXT: lfd f0, 0(r3)
+; LARGE32-NEXT: addis r3, L..C4 at u(r2)
+; LARGE32-NEXT: lwz r3, L..C4 at l(r3)
+; LARGE32-NEXT: lfd f1, 0(r3)
+; LARGE32-NEXT: xsadddp f1, f0, f1
+; LARGE32-NEXT: addi r1, r1, 32
+; LARGE32-NEXT: lwz r0, 8(r1)
+; LARGE32-NEXT: mtlr r0
+; LARGE32-NEXT: blr
entry:
%0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarUninit)
%1 = load double, ptr %0, align 8
@@ -163,6 +350,35 @@ define double @loadITLInit() {
; LARGE64-NEXT: add r3, r13, r3
; LARGE64-NEXT: lfd f1, 0(r3)
; LARGE64-NEXT: blr
+;
+; SMALL32-LABEL: loadITLInit:
+; SMALL32: # %bb.0: # %entry
+; SMALL32-NEXT: mflr r0
+; SMALL32-NEXT: stwu r1, -32(r1)
+; SMALL32-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit
+; SMALL32-NEXT: bla .__get_tpointer[PR]
+; SMALL32-NEXT: stw r0, 40(r1)
+; SMALL32-NEXT: add r3, r3, r4
+; SMALL32-NEXT: lfd f1, 0(r3)
+; SMALL32-NEXT: addi r1, r1, 32
+; SMALL32-NEXT: lwz r0, 8(r1)
+; SMALL32-NEXT: mtlr r0
+; SMALL32-NEXT: blr
+;
+; LARGE32-LABEL: loadITLInit:
+; LARGE32: # %bb.0: # %entry
+; LARGE32-NEXT: mflr r0
+; LARGE32-NEXT: stwu r1, -32(r1)
+; LARGE32-NEXT: stw r0, 40(r1)
+; LARGE32-NEXT: addis r3, L..C1 at u(r2)
+; LARGE32-NEXT: lwz r4, L..C1 at l(r3)
+; LARGE32-NEXT: bla .__get_tpointer[PR]
+; LARGE32-NEXT: add r3, r3, r4
+; LARGE32-NEXT: lfd f1, 0(r3)
+; LARGE32-NEXT: addi r1, r1, 32
+; LARGE32-NEXT: lwz r0, 8(r1)
+; LARGE32-NEXT: mtlr r0
+; LARGE32-NEXT: blr
entry:
%0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit)
%1 = load double, ptr %0, align 8
@@ -191,6 +407,42 @@ define double @loadITLInit2() {
; LARGE64-NEXT: lfd f0, 0(r3)
; LARGE64-NEXT: xsadddp f1, f0, f1
; LARGE64-NEXT: blr
+;
+; SMALL32-LABEL: loadITLInit2:
+; SMALL32: # %bb.0: # %entry
+; SMALL32-NEXT: mflr r0
+; SMALL32-NEXT: stwu r1, -32(r1)
+; SMALL32-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit
+; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit
+; SMALL32-NEXT: bla .__get_tpointer[PR]
+; SMALL32-NEXT: stw r0, 40(r1)
+; SMALL32-NEXT: add r3, r3, r4
+; SMALL32-NEXT: lfd f1, 0(r5)
+; SMALL32-NEXT: lfd f0, 0(r3)
+; SMALL32-NEXT: xsadddp f1, f0, f1
+; SMALL32-NEXT: addi r1, r1, 32
+; SMALL32-NEXT: lwz r0, 8(r1)
+; SMALL32-NEXT: mtlr r0
+; SMALL32-NEXT: blr
+;
+; LARGE32-LABEL: loadITLInit2:
+; LARGE32: # %bb.0: # %entry
+; LARGE32-NEXT: mflr r0
+; LARGE32-NEXT: stwu r1, -32(r1)
+; LARGE32-NEXT: stw r0, 40(r1)
+; LARGE32-NEXT: addis r3, L..C1 at u(r2)
+; LARGE32-NEXT: lwz r4, L..C1 at l(r3)
+; LARGE32-NEXT: bla .__get_tpointer[PR]
+; LARGE32-NEXT: add r3, r3, r4
+; LARGE32-NEXT: lfd f0, 0(r3)
+; LARGE32-NEXT: addis r3, L..C4 at u(r2)
+; LARGE32-NEXT: lwz r3, L..C4 at l(r3)
+; LARGE32-NEXT: lfd f1, 0(r3)
+; LARGE32-NEXT: xsadddp f1, f0, f1
+; LARGE32-NEXT: addi r1, r1, 32
+; LARGE32-NEXT: lwz r0, 8(r1)
+; LARGE32-NEXT: mtlr r0
+; LARGE32-NEXT: blr
entry:
%0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit)
%1 = load double, ptr %0, align 8
@@ -214,6 +466,35 @@ define double @loadTLUninit() {
; LARGE64-NEXT: add r3, r13, r3
; LARGE64-NEXT: lfd f1, 0(r3)
; LARGE64-NEXT: blr
+;
+; SMALL32-LABEL: loadTLUninit:
+; SMALL32: # %bb.0: # %entry
+; SMALL32-NEXT: mflr r0
+; SMALL32-NEXT: stwu r1, -32(r1)
+; SMALL32-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit
+; SMALL32-NEXT: bla .__get_tpointer[PR]
+; SMALL32-NEXT: stw r0, 40(r1)
+; SMALL32-NEXT: add r3, r3, r4
+; SMALL32-NEXT: lfd f1, 0(r3)
+; SMALL32-NEXT: addi r1, r1, 32
+; SMALL32-NEXT: lwz r0, 8(r1)
+; SMALL32-NEXT: mtlr r0
+; SMALL32-NEXT: blr
+;
+; LARGE32-LABEL: loadTLUninit:
+; LARGE32: # %bb.0: # %entry
+; LARGE32-NEXT: mflr r0
+; LARGE32-NEXT: stwu r1, -32(r1)
+; LARGE32-NEXT: stw r0, 40(r1)
+; LARGE32-NEXT: addis r3, L..C2 at u(r2)
+; LARGE32-NEXT: lwz r4, L..C2 at l(r3)
+; LARGE32-NEXT: bla .__get_tpointer[PR]
+; LARGE32-NEXT: add r3, r3, r4
+; LARGE32-NEXT: lfd f1, 0(r3)
+; LARGE32-NEXT: addi r1, r1, 32
+; LARGE32-NEXT: lwz r0, 8(r1)
+; LARGE32-NEXT: mtlr r0
+; LARGE32-NEXT: blr
entry:
%0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarUninit)
%1 = load double, ptr %0, align 8
@@ -242,6 +523,42 @@ define double @loadTLUninit2() {
; LARGE64-NEXT: lfd f0, 0(r3)
; LARGE64-NEXT: xsadddp f1, f0, f1
; LARGE64-NEXT: blr
+;
+; SMALL32-LABEL: loadTLUninit2:
+; SMALL32: # %bb.0: # %entry
+; SMALL32-NEXT: mflr r0
+; SMALL32-NEXT: stwu r1, -32(r1)
+; SMALL32-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit
+; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit
+; SMALL32-NEXT: bla .__get_tpointer[PR]
+; SMALL32-NEXT: stw r0, 40(r1)
+; SMALL32-NEXT: add r3, r3, r4
+; SMALL32-NEXT: lfd f1, 0(r5)
+; SMALL32-NEXT: lfd f0, 0(r3)
+; SMALL32-NEXT: xsadddp f1, f0, f1
+; SMALL32-NEXT: addi r1, r1, 32
+; SMALL32-NEXT: lwz r0, 8(r1)
+; SMALL32-NEXT: mtlr r0
+; SMALL32-NEXT: blr
+;
+; LARGE32-LABEL: loadTLUninit2:
+; LARGE32: # %bb.0: # %entry
+; LARGE32-NEXT: mflr r0
+; LARGE32-NEXT: stwu r1, -32(r1)
+; LARGE32-NEXT: stw r0, 40(r1)
+; LARGE32-NEXT: addis r3, L..C2 at u(r2)
+; LARGE32-NEXT: lwz r4, L..C2 at l(r3)
+; LARGE32-NEXT: bla .__get_tpointer[PR]
+; LARGE32-NEXT: add r3, r3, r4
+; LARGE32-NEXT: lfd f0, 0(r3)
+; LARGE32-NEXT: addis r3, L..C4 at u(r2)
+; LARGE32-NEXT: lwz r3, L..C4 at l(r3)
+; LARGE32-NEXT: lfd f1, 0(r3)
+; LARGE32-NEXT: xsadddp f1, f0, f1
+; LARGE32-NEXT: addi r1, r1, 32
+; LARGE32-NEXT: lwz r0, 8(r1)
+; LARGE32-NEXT: mtlr r0
+; LARGE32-NEXT: blr
entry:
%0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarUninit)
%1 = load double, ptr %0, align 8
@@ -265,6 +582,35 @@ define double @loadTLInit() {
; LARGE64-NEXT: add r3, r13, r3
; LARGE64-NEXT: lfd f1, 0(r3)
; LARGE64-NEXT: blr
+;
+; SMALL32-LABEL: loadTLInit:
+; SMALL32: # %bb.0: # %entry
+; SMALL32-NEXT: mflr r0
+; SMALL32-NEXT: stwu r1, -32(r1)
+; SMALL32-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit
+; SMALL32-NEXT: bla .__get_tpointer[PR]
+; SMALL32-NEXT: stw r0, 40(r1)
+; SMALL32-NEXT: add r3, r3, r4
+; SMALL32-NEXT: lfd f1, 0(r3)
+; SMALL32-NEXT: addi r1, r1, 32
+; SMALL32-NEXT: lwz r0, 8(r1)
+; SMALL32-NEXT: mtlr r0
+; SMALL32-NEXT: blr
+;
+; LARGE32-LABEL: loadTLInit:
+; LARGE32: # %bb.0: # %entry
+; LARGE32-NEXT: mflr r0
+; LARGE32-NEXT: stwu r1, -32(r1)
+; LARGE32-NEXT: stw r0, 40(r1)
+; LARGE32-NEXT: addis r3, L..C3 at u(r2)
+; LARGE32-NEXT: lwz r4, L..C3 at l(r3)
+; LARGE32-NEXT: bla .__get_tpointer[PR]
+; LARGE32-NEXT: add r3, r3, r4
+; LARGE32-NEXT: lfd f1, 0(r3)
+; LARGE32-NEXT: addi r1, r1, 32
+; LARGE32-NEXT: lwz r0, 8(r1)
+; LARGE32-NEXT: mtlr r0
+; LARGE32-NEXT: blr
entry:
%0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit)
%1 = load double, ptr %0, align 8
@@ -293,6 +639,42 @@ define double @loadTLInit2() {
; LARGE64-NEXT: lfd f0, 0(r3)
; LARGE64-NEXT: xsadddp f1, f0, f1
; LARGE64-NEXT: blr
+;
+; SMALL32-LABEL: loadTLInit2:
+; SMALL32: # %bb.0: # %entry
+; SMALL32-NEXT: mflr r0
+; SMALL32-NEXT: stwu r1, -32(r1)
+; SMALL32-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit
+; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit
+; SMALL32-NEXT: bla .__get_tpointer[PR]
+; SMALL32-NEXT: stw r0, 40(r1)
+; SMALL32-NEXT: add r3, r3, r4
+; SMALL32-NEXT: lfd f1, 0(r5)
+; SMALL32-NEXT: lfd f0, 0(r3)
+; SMALL32-NEXT: xsadddp f1, f0, f1
+; SMALL32-NEXT: addi r1, r1, 32
+; SMALL32-NEXT: lwz r0, 8(r1)
+; SMALL32-NEXT: mtlr r0
+; SMALL32-NEXT: blr
+;
+; LARGE32-LABEL: loadTLInit2:
+; LARGE32: # %bb.0: # %entry
+; LARGE32-NEXT: mflr r0
+; LARGE32-NEXT: stwu r1, -32(r1)
+; LARGE32-NEXT: stw r0, 40(r1)
+; LARGE32-NEXT: addis r3, L..C3 at u(r2)
+; LARGE32-NEXT: lwz r4, L..C3 at l(r3)
+; LARGE32-NEXT: bla .__get_tpointer[PR]
+; LARGE32-NEXT: add r3, r3, r4
+; LARGE32-NEXT: lfd f0, 0(r3)
+; LARGE32-NEXT: addis r3, L..C4 at u(r2)
+; LARGE32-NEXT: lwz r3, L..C4 at l(r3)
+; LARGE32-NEXT: lfd f1, 0(r3)
+; LARGE32-NEXT: xsadddp f1, f0, f1
+; LARGE32-NEXT: addi r1, r1, 32
+; LARGE32-NEXT: lwz r0, 8(r1)
+; LARGE32-NEXT: mtlr r0
+; LARGE32-NEXT: blr
entry:
%0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit)
%1 = load double, ptr %0, align 8
@@ -326,3 +708,27 @@ entry:
; LARGE64-NEXT: .tc ThreadLocalVarInit[TE],ThreadLocalVarInit[TL]@le
; LARGE64-LABEL: L..C4:
; LARGE64-NEXT: .tc VarInit[TE],VarInit[RW]
+
+; SMALL32-LABEL: .toc
+; SMALL32-LABEL: L..C0:
+; SMALL32-NEXT: .tc IThreadLocalVarUninit[TC],IThreadLocalVarUninit[UL]@le
+; SMALL32-LABEL: L..C1:
+; SMALL32-NEXT: .tc IThreadLocalVarInit[TC],IThreadLocalVarInit[TL]@le
+; SMALL32-LABEL: L..C2:
+; SMALL32-NEXT: .tc ThreadLocalVarUninit[TC],ThreadLocalVarUninit[TL]@le
+; SMALL32-LABEL: L..C3:
+; SMALL32-NEXT: .tc ThreadLocalVarInit[TC],ThreadLocalVarInit[TL]@le
+; SMALL32-LABEL: L..C4:
+; SMALL32-NEXT: .tc VarInit[TC],VarInit[RW]
+
+; LARGE32-LABEL: .toc
+; LARGE32-LABEL: L..C0:
+; LARGE32-NEXT: .tc IThreadLocalVarUninit[TE],IThreadLocalVarUninit[UL]@le
+; LARGE32-LABEL: L..C1:
+; LARGE32-NEXT: .tc IThreadLocalVarInit[TE],IThreadLocalVarInit[TL]@le
+; LARGE32-LABEL: L..C2:
+; LARGE32-NEXT: .tc ThreadLocalVarUninit[TE],ThreadLocalVarUninit[TL]@le
+; LARGE32-LABEL: L..C3:
+; LARGE32-NEXT: .tc ThreadLocalVarInit[TE],ThreadLocalVarInit[TL]@le
+; LARGE32-LABEL: L..C4:
+; LARGE32-NEXT: .tc VarInit[TE],VarInit[RW]
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-float.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-float.ll
index 2b93616bfe8bd..427262c7952bb 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-float.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-float.ll
@@ -5,6 +5,12 @@
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large < %s \
; RUN: | FileCheck %s --check-prefix=LARGE64
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN: -mtriple powerpc-ibm-aix-xcoff < %s | FileCheck %s \
+; RUN: --check-prefix=SMALL32
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN: -mtriple powerpc-ibm-aix-xcoff --code-model=large < %s \
+; RUN: | FileCheck %s --check-prefix=LARGE32
@ThreadLocalVarInit = thread_local(localexec) global float 0x401D333340000000, align 4
@VarInit = global float 0x4021666660000000, align 4
@@ -28,6 +34,35 @@ define void @storeITLUninit(float noundef %x) {
; LARGE64-NEXT: add r3, r13, r3
; LARGE64-NEXT: stfs f1, 0(r3)
; LARGE64-NEXT: blr
+;
+; SMALL32-LABEL: storeITLUninit:
+; SMALL32: # %bb.0: # %entry
+; SMALL32-NEXT: mflr r0
+; SMALL32-NEXT: stwu r1, -32(r1)
+; SMALL32-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit
+; SMALL32-NEXT: bla .__get_tpointer[PR]
+; SMALL32-NEXT: stw r0, 40(r1)
+; SMALL32-NEXT: add r3, r3, r4
+; SMALL32-NEXT: stfs f1, 0(r3)
+; SMALL32-NEXT: addi r1, r1, 32
+; SMALL32-NEXT: lwz r0, 8(r1)
+; SMALL32-NEXT: mtlr r0
+; SMALL32-NEXT: blr
+;
+; LARGE32-LABEL: storeITLUninit:
+; LARGE32: # %bb.0: # %entry
+; LARGE32-NEXT: mflr r0
+; LARGE32-NEXT: stwu r1, -32(r1)
+; LARGE32-NEXT: stw r0, 40(r1)
+; LARGE32-NEXT: addis r3, L..C0 at u(r2)
+; LARGE32-NEXT: lwz r4, L..C0 at l(r3)
+; LARGE32-NEXT: bla .__get_tpointer[PR]
+; LARGE32-NEXT: add r3, r3, r4
+; LARGE32-NEXT: stfs f1, 0(r3)
+; LARGE32-NEXT: addi r1, r1, 32
+; LARGE32-NEXT: lwz r0, 8(r1)
+; LARGE32-NEXT: mtlr r0
+; LARGE32-NEXT: blr
entry:
%0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarUninit)
store float %x, ptr %0, align 4
@@ -49,6 +84,35 @@ define void @storeITLInit(float noundef %x) {
; LARGE64-NEXT: add r3, r13, r3
; LARGE64-NEXT: stfs f1, 0(r3)
; LARGE64-NEXT: blr
+;
+; SMALL32-LABEL: storeITLInit:
+; SMALL32: # %bb.0: # %entry
+; SMALL32-NEXT: mflr r0
+; SMALL32-NEXT: stwu r1, -32(r1)
+; SMALL32-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit
+; SMALL32-NEXT: bla .__get_tpointer[PR]
+; SMALL32-NEXT: stw r0, 40(r1)
+; SMALL32-NEXT: add r3, r3, r4
+; SMALL32-NEXT: stfs f1, 0(r3)
+; SMALL32-NEXT: addi r1, r1, 32
+; SMALL32-NEXT: lwz r0, 8(r1)
+; SMALL32-NEXT: mtlr r0
+; SMALL32-NEXT: blr
+;
+; LARGE32-LABEL: storeITLInit:
+; LARGE32: # %bb.0: # %entry
+; LARGE32-NEXT: mflr r0
+; LARGE32-NEXT: stwu r1, -32(r1)
+; LARGE32-NEXT: stw r0, 40(r1)
+; LARGE32-NEXT: addis r3, L..C1 at u(r2)
+; LARGE32-NEXT: lwz r4, L..C1 at l(r3)
+; LARGE32-NEXT: bla .__get_tpointer[PR]
+; LARGE32-NEXT: add r3, r3, r4
+; LARGE32-NEXT: stfs f1, 0(r3)
+; LARGE32-NEXT: addi r1, r1, 32
+; LARGE32-NEXT: lwz r0, 8(r1)
+; LARGE32-NEXT: mtlr r0
+; LARGE32-NEXT: blr
entry:
%0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit)
store float %x, ptr %0, align 4
@@ -70,6 +134,35 @@ define void @storeTLUninit(float noundef %x) {
; LARGE64-NEXT: add r3, r13, r3
; LARGE64-NEXT: stfs f1, 0(r3)
; LARGE64-NEXT: blr
+;
+; SMALL32-LABEL: storeTLUninit:
+; SMALL32: # %bb.0: # %entry
+; SMALL32-NEXT: mflr r0
+; SMALL32-NEXT: stwu r1, -32(r1)
+; SMALL32-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit
+; SMALL32-NEXT: bla .__get_tpointer[PR]
+; SMALL32-NEXT: stw r0, 40(r1)
+; SMALL32-NEXT: add r3, r3, r4
+; SMALL32-NEXT: stfs f1, 0(r3)
+; SMALL32-NEXT: addi r1, r1, 32
+; SMALL32-NEXT: lwz r0, 8(r1)
+; SMALL32-NEXT: mtlr r0
+; SMALL32-NEXT: blr
+;
+; LARGE32-LABEL: storeTLUninit:
+; LARGE32: # %bb.0: # %entry
+; LARGE32-NEXT: mflr r0
+; LARGE32-NEXT: stwu r1, -32(r1)
+; LARGE32-NEXT: stw r0, 40(r1)
+; LARGE32-NEXT: addis r3, L..C2 at u(r2)
+; LARGE32-NEXT: lwz r4, L..C2 at l(r3)
+; LARGE32-NEXT: bla .__get_tpointer[PR]
+; LARGE32-NEXT: add r3, r3, r4
+; LARGE32-NEXT: stfs f1, 0(r3)
+; LARGE32-NEXT: addi r1, r1, 32
+; LARGE32-NEXT: lwz r0, 8(r1)
+; LARGE32-NEXT: mtlr r0
+; LARGE32-NEXT: blr
entry:
%0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarUninit)
store float %x, ptr %0, align 4
@@ -91,6 +184,35 @@ define void @storeTLInit(float noundef %x) {
; LARGE64-NEXT: add r3, r13, r3
; LARGE64-NEXT: stfs f1, 0(r3)
; LARGE64-NEXT: blr
+;
+; SMALL32-LABEL: storeTLInit:
+; SMALL32: # %bb.0: # %entry
+; SMALL32-NEXT: mflr r0
+; SMALL32-NEXT: stwu r1, -32(r1)
+; SMALL32-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit
+; SMALL32-NEXT: bla .__get_tpointer[PR]
+; SMALL32-NEXT: stw r0, 40(r1)
+; SMALL32-NEXT: add r3, r3, r4
+; SMALL32-NEXT: stfs f1, 0(r3)
+; SMALL32-NEXT: addi r1, r1, 32
+; SMALL32-NEXT: lwz r0, 8(r1)
+; SMALL32-NEXT: mtlr r0
+; SMALL32-NEXT: blr
+;
+; LARGE32-LABEL: storeTLInit:
+; LARGE32: # %bb.0: # %entry
+; LARGE32-NEXT: mflr r0
+; LARGE32-NEXT: stwu r1, -32(r1)
+; LARGE32-NEXT: stw r0, 40(r1)
+; LARGE32-NEXT: addis r3, L..C3 at u(r2)
+; LARGE32-NEXT: lwz r4, L..C3 at l(r3)
+; LARGE32-NEXT: bla .__get_tpointer[PR]
+; LARGE32-NEXT: add r3, r3, r4
+; LARGE32-NEXT: stfs f1, 0(r3)
+; LARGE32-NEXT: addi r1, r1, 32
+; LARGE32-NEXT: lwz r0, 8(r1)
+; LARGE32-NEXT: mtlr r0
+; LARGE32-NEXT: blr
entry:
%0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit)
store float %x, ptr %0, align 4
@@ -112,6 +234,35 @@ define float @loadITLUninit() {
; LARGE64-NEXT: add r3, r13, r3
; LARGE64-NEXT: lfs f1, 0(r3)
; LARGE64-NEXT: blr
+;
+; SMALL32-LABEL: loadITLUninit:
+; SMALL32: # %bb.0: # %entry
+; SMALL32-NEXT: mflr r0
+; SMALL32-NEXT: stwu r1, -32(r1)
+; SMALL32-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit
+; SMALL32-NEXT: bla .__get_tpointer[PR]
+; SMALL32-NEXT: stw r0, 40(r1)
+; SMALL32-NEXT: add r3, r3, r4
+; SMALL32-NEXT: lfs f1, 0(r3)
+; SMALL32-NEXT: addi r1, r1, 32
+; SMALL32-NEXT: lwz r0, 8(r1)
+; SMALL32-NEXT: mtlr r0
+; SMALL32-NEXT: blr
+;
+; LARGE32-LABEL: loadITLUninit:
+; LARGE32: # %bb.0: # %entry
+; LARGE32-NEXT: mflr r0
+; LARGE32-NEXT: stwu r1, -32(r1)
+; LARGE32-NEXT: stw r0, 40(r1)
+; LARGE32-NEXT: addis r3, L..C0 at u(r2)
+; LARGE32-NEXT: lwz r4, L..C0 at l(r3)
+; LARGE32-NEXT: bla .__get_tpointer[PR]
+; LARGE32-NEXT: add r3, r3, r4
+; LARGE32-NEXT: lfs f1, 0(r3)
+; LARGE32-NEXT: addi r1, r1, 32
+; LARGE32-NEXT: lwz r0, 8(r1)
+; LARGE32-NEXT: mtlr r0
+; LARGE32-NEXT: blr
entry:
%0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarUninit)
%1 = load float, ptr %0, align 4
@@ -140,6 +291,42 @@ define float @loadITLUninit2() {
; LARGE64-NEXT: lfs f0, 0(r3)
; LARGE64-NEXT: fadds f1, f0, f1
; LARGE64-NEXT: blr
+;
+; SMALL32-LABEL: loadITLUninit2:
+; SMALL32: # %bb.0: # %entry
+; SMALL32-NEXT: mflr r0
+; SMALL32-NEXT: stwu r1, -32(r1)
+; SMALL32-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit
+; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit
+; SMALL32-NEXT: bla .__get_tpointer[PR]
+; SMALL32-NEXT: stw r0, 40(r1)
+; SMALL32-NEXT: add r3, r3, r4
+; SMALL32-NEXT: lfs f1, 0(r5)
+; SMALL32-NEXT: lfs f0, 0(r3)
+; SMALL32-NEXT: fadds f1, f0, f1
+; SMALL32-NEXT: addi r1, r1, 32
+; SMALL32-NEXT: lwz r0, 8(r1)
+; SMALL32-NEXT: mtlr r0
+; SMALL32-NEXT: blr
+;
+; LARGE32-LABEL: loadITLUninit2:
+; LARGE32: # %bb.0: # %entry
+; LARGE32-NEXT: mflr r0
+; LARGE32-NEXT: stwu r1, -32(r1)
+; LARGE32-NEXT: stw r0, 40(r1)
+; LARGE32-NEXT: addis r3, L..C0 at u(r2)
+; LARGE32-NEXT: lwz r4, L..C0 at l(r3)
+; LARGE32-NEXT: bla .__get_tpointer[PR]
+; LARGE32-NEXT: add r3, r3, r4
+; LARGE32-NEXT: lfs f0, 0(r3)
+; LARGE32-NEXT: addis r3, L..C4 at u(r2)
+; LARGE32-NEXT: lwz r3, L..C4 at l(r3)
+; LARGE32-NEXT: lfs f1, 0(r3)
+; LARGE32-NEXT: fadds f1, f0, f1
+; LARGE32-NEXT: addi r1, r1, 32
+; LARGE32-NEXT: lwz r0, 8(r1)
+; LARGE32-NEXT: mtlr r0
+; LARGE32-NEXT: blr
entry:
%0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarUninit)
%1 = load float, ptr %0, align 4
@@ -163,6 +350,35 @@ define float @loadITLInit() {
; LARGE64-NEXT: add r3, r13, r3
; LARGE64-NEXT: lfs f1, 0(r3)
; LARGE64-NEXT: blr
+;
+; SMALL32-LABEL: loadITLInit:
+; SMALL32: # %bb.0: # %entry
+; SMALL32-NEXT: mflr r0
+; SMALL32-NEXT: stwu r1, -32(r1)
+; SMALL32-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit
+; SMALL32-NEXT: bla .__get_tpointer[PR]
+; SMALL32-NEXT: stw r0, 40(r1)
+; SMALL32-NEXT: add r3, r3, r4
+; SMALL32-NEXT: lfs f1, 0(r3)
+; SMALL32-NEXT: addi r1, r1, 32
+; SMALL32-NEXT: lwz r0, 8(r1)
+; SMALL32-NEXT: mtlr r0
+; SMALL32-NEXT: blr
+;
+; LARGE32-LABEL: loadITLInit:
+; LARGE32: # %bb.0: # %entry
+; LARGE32-NEXT: mflr r0
+; LARGE32-NEXT: stwu r1, -32(r1)
+; LARGE32-NEXT: stw r0, 40(r1)
+; LARGE32-NEXT: addis r3, L..C1 at u(r2)
+; LARGE32-NEXT: lwz r4, L..C1 at l(r3)
+; LARGE32-NEXT: bla .__get_tpointer[PR]
+; LARGE32-NEXT: add r3, r3, r4
+; LARGE32-NEXT: lfs f1, 0(r3)
+; LARGE32-NEXT: addi r1, r1, 32
+; LARGE32-NEXT: lwz r0, 8(r1)
+; LARGE32-NEXT: mtlr r0
+; LARGE32-NEXT: blr
entry:
%0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit)
%1 = load float, ptr %0, align 4
@@ -191,6 +407,42 @@ define float @loadITLInit2() {
; LARGE64-NEXT: lfs f0, 0(r3)
; LARGE64-NEXT: fadds f1, f0, f1
; LARGE64-NEXT: blr
+;
+; SMALL32-LABEL: loadITLInit2:
+; SMALL32: # %bb.0: # %entry
+; SMALL32-NEXT: mflr r0
+; SMALL32-NEXT: stwu r1, -32(r1)
+; SMALL32-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit
+; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit
+; SMALL32-NEXT: bla .__get_tpointer[PR]
+; SMALL32-NEXT: stw r0, 40(r1)
+; SMALL32-NEXT: add r3, r3, r4
+; SMALL32-NEXT: lfs f1, 0(r5)
+; SMALL32-NEXT: lfs f0, 0(r3)
+; SMALL32-NEXT: fadds f1, f0, f1
+; SMALL32-NEXT: addi r1, r1, 32
+; SMALL32-NEXT: lwz r0, 8(r1)
+; SMALL32-NEXT: mtlr r0
+; SMALL32-NEXT: blr
+;
+; LARGE32-LABEL: loadITLInit2:
+; LARGE32: # %bb.0: # %entry
+; LARGE32-NEXT: mflr r0
+; LARGE32-NEXT: stwu r1, -32(r1)
+; LARGE32-NEXT: stw r0, 40(r1)
+; LARGE32-NEXT: addis r3, L..C1 at u(r2)
+; LARGE32-NEXT: lwz r4, L..C1 at l(r3)
+; LARGE32-NEXT: bla .__get_tpointer[PR]
+; LARGE32-NEXT: add r3, r3, r4
+; LARGE32-NEXT: lfs f0, 0(r3)
+; LARGE32-NEXT: addis r3, L..C4 at u(r2)
+; LARGE32-NEXT: lwz r3, L..C4 at l(r3)
+; LARGE32-NEXT: lfs f1, 0(r3)
+; LARGE32-NEXT: fadds f1, f0, f1
+; LARGE32-NEXT: addi r1, r1, 32
+; LARGE32-NEXT: lwz r0, 8(r1)
+; LARGE32-NEXT: mtlr r0
+; LARGE32-NEXT: blr
entry:
%0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit)
%1 = load float, ptr %0, align 4
@@ -214,6 +466,35 @@ define float @loadTLUninit() {
; LARGE64-NEXT: add r3, r13, r3
; LARGE64-NEXT: lfs f1, 0(r3)
; LARGE64-NEXT: blr
+;
+; SMALL32-LABEL: loadTLUninit:
+; SMALL32: # %bb.0: # %entry
+; SMALL32-NEXT: mflr r0
+; SMALL32-NEXT: stwu r1, -32(r1)
+; SMALL32-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit
+; SMALL32-NEXT: bla .__get_tpointer[PR]
+; SMALL32-NEXT: stw r0, 40(r1)
+; SMALL32-NEXT: add r3, r3, r4
+; SMALL32-NEXT: lfs f1, 0(r3)
+; SMALL32-NEXT: addi r1, r1, 32
+; SMALL32-NEXT: lwz r0, 8(r1)
+; SMALL32-NEXT: mtlr r0
+; SMALL32-NEXT: blr
+;
+; LARGE32-LABEL: loadTLUninit:
+; LARGE32: # %bb.0: # %entry
+; LARGE32-NEXT: mflr r0
+; LARGE32-NEXT: stwu r1, -32(r1)
+; LARGE32-NEXT: stw r0, 40(r1)
+; LARGE32-NEXT: addis r3, L..C2 at u(r2)
+; LARGE32-NEXT: lwz r4, L..C2 at l(r3)
+; LARGE32-NEXT: bla .__get_tpointer[PR]
+; LARGE32-NEXT: add r3, r3, r4
+; LARGE32-NEXT: lfs f1, 0(r3)
+; LARGE32-NEXT: addi r1, r1, 32
+; LARGE32-NEXT: lwz r0, 8(r1)
+; LARGE32-NEXT: mtlr r0
+; LARGE32-NEXT: blr
entry:
%0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarUninit)
%1 = load float, ptr %0, align 4
@@ -242,6 +523,42 @@ define float @loadTLUninit2() {
; LARGE64-NEXT: lfs f0, 0(r3)
; LARGE64-NEXT: fadds f1, f0, f1
; LARGE64-NEXT: blr
+;
+; SMALL32-LABEL: loadTLUninit2:
+; SMALL32: # %bb.0: # %entry
+; SMALL32-NEXT: mflr r0
+; SMALL32-NEXT: stwu r1, -32(r1)
+; SMALL32-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit
+; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit
+; SMALL32-NEXT: bla .__get_tpointer[PR]
+; SMALL32-NEXT: stw r0, 40(r1)
+; SMALL32-NEXT: add r3, r3, r4
+; SMALL32-NEXT: lfs f1, 0(r5)
+; SMALL32-NEXT: lfs f0, 0(r3)
+; SMALL32-NEXT: fadds f1, f0, f1
+; SMALL32-NEXT: addi r1, r1, 32
+; SMALL32-NEXT: lwz r0, 8(r1)
+; SMALL32-NEXT: mtlr r0
+; SMALL32-NEXT: blr
+;
+; LARGE32-LABEL: loadTLUninit2:
+; LARGE32: # %bb.0: # %entry
+; LARGE32-NEXT: mflr r0
+; LARGE32-NEXT: stwu r1, -32(r1)
+; LARGE32-NEXT: stw r0, 40(r1)
+; LARGE32-NEXT: addis r3, L..C2 at u(r2)
+; LARGE32-NEXT: lwz r4, L..C2 at l(r3)
+; LARGE32-NEXT: bla .__get_tpointer[PR]
+; LARGE32-NEXT: add r3, r3, r4
+; LARGE32-NEXT: lfs f0, 0(r3)
+; LARGE32-NEXT: addis r3, L..C4 at u(r2)
+; LARGE32-NEXT: lwz r3, L..C4 at l(r3)
+; LARGE32-NEXT: lfs f1, 0(r3)
+; LARGE32-NEXT: fadds f1, f0, f1
+; LARGE32-NEXT: addi r1, r1, 32
+; LARGE32-NEXT: lwz r0, 8(r1)
+; LARGE32-NEXT: mtlr r0
+; LARGE32-NEXT: blr
entry:
%0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarUninit)
%1 = load float, ptr %0, align 4
@@ -265,6 +582,35 @@ define float @loadTLInit() {
; LARGE64-NEXT: add r3, r13, r3
; LARGE64-NEXT: lfs f1, 0(r3)
; LARGE64-NEXT: blr
+;
+; SMALL32-LABEL: loadTLInit:
+; SMALL32: # %bb.0: # %entry
+; SMALL32-NEXT: mflr r0
+; SMALL32-NEXT: stwu r1, -32(r1)
+; SMALL32-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit
+; SMALL32-NEXT: bla .__get_tpointer[PR]
+; SMALL32-NEXT: stw r0, 40(r1)
+; SMALL32-NEXT: add r3, r3, r4
+; SMALL32-NEXT: lfs f1, 0(r3)
+; SMALL32-NEXT: addi r1, r1, 32
+; SMALL32-NEXT: lwz r0, 8(r1)
+; SMALL32-NEXT: mtlr r0
+; SMALL32-NEXT: blr
+;
+; LARGE32-LABEL: loadTLInit:
+; LARGE32: # %bb.0: # %entry
+; LARGE32-NEXT: mflr r0
+; LARGE32-NEXT: stwu r1, -32(r1)
+; LARGE32-NEXT: stw r0, 40(r1)
+; LARGE32-NEXT: addis r3, L..C3 at u(r2)
+; LARGE32-NEXT: lwz r4, L..C3 at l(r3)
+; LARGE32-NEXT: bla .__get_tpointer[PR]
+; LARGE32-NEXT: add r3, r3, r4
+; LARGE32-NEXT: lfs f1, 0(r3)
+; LARGE32-NEXT: addi r1, r1, 32
+; LARGE32-NEXT: lwz r0, 8(r1)
+; LARGE32-NEXT: mtlr r0
+; LARGE32-NEXT: blr
entry:
%0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit)
%1 = load float, ptr %0, align 4
@@ -293,6 +639,42 @@ define float @loadTLInit2() {
; LARGE64-NEXT: lfs f0, 0(r3)
; LARGE64-NEXT: fadds f1, f0, f1
; LARGE64-NEXT: blr
+;
+; SMALL32-LABEL: loadTLInit2:
+; SMALL32: # %bb.0: # %entry
+; SMALL32-NEXT: mflr r0
+; SMALL32-NEXT: stwu r1, -32(r1)
+; SMALL32-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit
+; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit
+; SMALL32-NEXT: bla .__get_tpointer[PR]
+; SMALL32-NEXT: stw r0, 40(r1)
+; SMALL32-NEXT: add r3, r3, r4
+; SMALL32-NEXT: lfs f1, 0(r5)
+; SMALL32-NEXT: lfs f0, 0(r3)
+; SMALL32-NEXT: fadds f1, f0, f1
+; SMALL32-NEXT: addi r1, r1, 32
+; SMALL32-NEXT: lwz r0, 8(r1)
+; SMALL32-NEXT: mtlr r0
+; SMALL32-NEXT: blr
+;
+; LARGE32-LABEL: loadTLInit2:
+; LARGE32: # %bb.0: # %entry
+; LARGE32-NEXT: mflr r0
+; LARGE32-NEXT: stwu r1, -32(r1)
+; LARGE32-NEXT: stw r0, 40(r1)
+; LARGE32-NEXT: addis r3, L..C3 at u(r2)
+; LARGE32-NEXT: lwz r4, L..C3 at l(r3)
+; LARGE32-NEXT: bla .__get_tpointer[PR]
+; LARGE32-NEXT: add r3, r3, r4
+; LARGE32-NEXT: lfs f0, 0(r3)
+; LARGE32-NEXT: addis r3, L..C4 at u(r2)
+; LARGE32-NEXT: lwz r3, L..C4 at l(r3)
+; LARGE32-NEXT: lfs f1, 0(r3)
+; LARGE32-NEXT: fadds f1, f0, f1
+; LARGE32-NEXT: addi r1, r1, 32
+; LARGE32-NEXT: lwz r0, 8(r1)
+; LARGE32-NEXT: mtlr r0
+; LARGE32-NEXT: blr
entry:
%0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit)
%1 = load float, ptr %0, align 4
@@ -326,3 +708,27 @@ entry:
; LARGE64-NEXT: .tc ThreadLocalVarInit[TE],ThreadLocalVarInit[TL]@le
; LARGE64-LABEL: L..C4:
; LARGE64-NEXT: .tc VarInit[TE],VarInit[RW]
+
+; SMALL32-LABEL: .toc
+; SMALL32-LABEL: L..C0:
+; SMALL32-NEXT: .tc IThreadLocalVarUninit[TC],IThreadLocalVarUninit[UL]@le
+; SMALL32-LABEL: L..C1:
+; SMALL32-NEXT: .tc IThreadLocalVarInit[TC],IThreadLocalVarInit[TL]@le
+; SMALL32-LABEL: L..C2:
+; SMALL32-NEXT: .tc ThreadLocalVarUninit[TC],ThreadLocalVarUninit[TL]@le
+; SMALL32-LABEL: L..C3:
+; SMALL32-NEXT: .tc ThreadLocalVarInit[TC],ThreadLocalVarInit[TL]@le
+; SMALL32-LABEL: L..C4:
+; SMALL32-NEXT: .tc VarInit[TC],VarInit[RW]
+
+; LARGE32-LABEL: .toc
+; LARGE32-LABEL: L..C0:
+; LARGE32-NEXT: .tc IThreadLocalVarUninit[TE],IThreadLocalVarUninit[UL]@le
+; LARGE32-LABEL: L..C1:
+; LARGE32-NEXT: .tc IThreadLocalVarInit[TE],IThreadLocalVarInit[TL]@le
+; LARGE32-LABEL: L..C2:
+; LARGE32-NEXT: .tc ThreadLocalVarUninit[TE],ThreadLocalVarUninit[TL]@le
+; LARGE32-LABEL: L..C3:
+; LARGE32-NEXT: .tc ThreadLocalVarInit[TE],ThreadLocalVarInit[TL]@le
+; LARGE32-LABEL: L..C4:
+; LARGE32-NEXT: .tc VarInit[TE],VarInit[RW]
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-int.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-int.ll
index 1df848481da4b..01aa56a9f7178 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-int.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-int.ll
@@ -5,6 +5,12 @@
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large < %s \
; RUN: | FileCheck %s --check-prefix=LARGE64
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN: -mtriple powerpc-ibm-aix-xcoff < %s | FileCheck %s \
+; RUN: --check-prefix=SMALL32
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN: -mtriple powerpc-ibm-aix-xcoff --code-model=large < %s \
+; RUN: | FileCheck %s --check-prefix=LARGE32
@ThreadLocalVarInit = thread_local(localexec) global i32 1, align 4
@VarInit = global i32 87, align 4
@@ -28,6 +34,37 @@ define void @storeITLUninit(i32 noundef signext %x) {
; LARGE64-NEXT: add r4, r13, r4
; LARGE64-NEXT: stw r3, 0(r4)
; LARGE64-NEXT: blr
+;
+; SMALL32-LABEL: storeITLUninit:
+; SMALL32: # %bb.0: # %entry
+; SMALL32-NEXT: mflr r0
+; SMALL32-NEXT: stwu r1, -32(r1)
+; SMALL32-NEXT: lwz r5, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit
+; SMALL32-NEXT: mr r4, r3
+; SMALL32-NEXT: bla .__get_tpointer[PR]
+; SMALL32-NEXT: stw r0, 40(r1)
+; SMALL32-NEXT: add r3, r3, r5
+; SMALL32-NEXT: stw r4, 0(r3)
+; SMALL32-NEXT: addi r1, r1, 32
+; SMALL32-NEXT: lwz r0, 8(r1)
+; SMALL32-NEXT: mtlr r0
+; SMALL32-NEXT: blr
+;
+; LARGE32-LABEL: storeITLUninit:
+; LARGE32: # %bb.0: # %entry
+; LARGE32-NEXT: mflr r0
+; LARGE32-NEXT: stwu r1, -32(r1)
+; LARGE32-NEXT: stw r0, 40(r1)
+; LARGE32-NEXT: mr r4, r3
+; LARGE32-NEXT: addis r3, L..C0 at u(r2)
+; LARGE32-NEXT: lwz r5, L..C0 at l(r3)
+; LARGE32-NEXT: bla .__get_tpointer[PR]
+; LARGE32-NEXT: add r3, r3, r5
+; LARGE32-NEXT: stw r4, 0(r3)
+; LARGE32-NEXT: addi r1, r1, 32
+; LARGE32-NEXT: lwz r0, 8(r1)
+; LARGE32-NEXT: mtlr r0
+; LARGE32-NEXT: blr
entry:
%0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarUninit)
store i32 %x, ptr %0, align 4
@@ -49,6 +86,37 @@ define void @storeITLInit(i32 noundef signext %x) {
; LARGE64-NEXT: add r4, r13, r4
; LARGE64-NEXT: stw r3, 0(r4)
; LARGE64-NEXT: blr
+;
+; SMALL32-LABEL: storeITLInit:
+; SMALL32: # %bb.0: # %entry
+; SMALL32-NEXT: mflr r0
+; SMALL32-NEXT: stwu r1, -32(r1)
+; SMALL32-NEXT: lwz r5, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit
+; SMALL32-NEXT: mr r4, r3
+; SMALL32-NEXT: bla .__get_tpointer[PR]
+; SMALL32-NEXT: stw r0, 40(r1)
+; SMALL32-NEXT: add r3, r3, r5
+; SMALL32-NEXT: stw r4, 0(r3)
+; SMALL32-NEXT: addi r1, r1, 32
+; SMALL32-NEXT: lwz r0, 8(r1)
+; SMALL32-NEXT: mtlr r0
+; SMALL32-NEXT: blr
+;
+; LARGE32-LABEL: storeITLInit:
+; LARGE32: # %bb.0: # %entry
+; LARGE32-NEXT: mflr r0
+; LARGE32-NEXT: stwu r1, -32(r1)
+; LARGE32-NEXT: stw r0, 40(r1)
+; LARGE32-NEXT: mr r4, r3
+; LARGE32-NEXT: addis r3, L..C1 at u(r2)
+; LARGE32-NEXT: lwz r5, L..C1 at l(r3)
+; LARGE32-NEXT: bla .__get_tpointer[PR]
+; LARGE32-NEXT: add r3, r3, r5
+; LARGE32-NEXT: stw r4, 0(r3)
+; LARGE32-NEXT: addi r1, r1, 32
+; LARGE32-NEXT: lwz r0, 8(r1)
+; LARGE32-NEXT: mtlr r0
+; LARGE32-NEXT: blr
entry:
%0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit)
store i32 %x, ptr %0, align 4
@@ -70,6 +138,37 @@ define void @storeTLUninit(i32 noundef signext %x) {
; LARGE64-NEXT: add r4, r13, r4
; LARGE64-NEXT: stw r3, 0(r4)
; LARGE64-NEXT: blr
+;
+; SMALL32-LABEL: storeTLUninit:
+; SMALL32: # %bb.0: # %entry
+; SMALL32-NEXT: mflr r0
+; SMALL32-NEXT: stwu r1, -32(r1)
+; SMALL32-NEXT: lwz r5, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit
+; SMALL32-NEXT: mr r4, r3
+; SMALL32-NEXT: bla .__get_tpointer[PR]
+; SMALL32-NEXT: stw r0, 40(r1)
+; SMALL32-NEXT: add r3, r3, r5
+; SMALL32-NEXT: stw r4, 0(r3)
+; SMALL32-NEXT: addi r1, r1, 32
+; SMALL32-NEXT: lwz r0, 8(r1)
+; SMALL32-NEXT: mtlr r0
+; SMALL32-NEXT: blr
+;
+; LARGE32-LABEL: storeTLUninit:
+; LARGE32: # %bb.0: # %entry
+; LARGE32-NEXT: mflr r0
+; LARGE32-NEXT: stwu r1, -32(r1)
+; LARGE32-NEXT: stw r0, 40(r1)
+; LARGE32-NEXT: mr r4, r3
+; LARGE32-NEXT: addis r3, L..C2 at u(r2)
+; LARGE32-NEXT: lwz r5, L..C2 at l(r3)
+; LARGE32-NEXT: bla .__get_tpointer[PR]
+; LARGE32-NEXT: add r3, r3, r5
+; LARGE32-NEXT: stw r4, 0(r3)
+; LARGE32-NEXT: addi r1, r1, 32
+; LARGE32-NEXT: lwz r0, 8(r1)
+; LARGE32-NEXT: mtlr r0
+; LARGE32-NEXT: blr
entry:
%0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarUninit)
store i32 %x, ptr %0, align 4
@@ -91,6 +190,37 @@ define void @storeTLInit(i32 noundef signext %x) {
; LARGE64-NEXT: add r4, r13, r4
; LARGE64-NEXT: stw r3, 0(r4)
; LARGE64-NEXT: blr
+;
+; SMALL32-LABEL: storeTLInit:
+; SMALL32: # %bb.0: # %entry
+; SMALL32-NEXT: mflr r0
+; SMALL32-NEXT: stwu r1, -32(r1)
+; SMALL32-NEXT: lwz r5, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit
+; SMALL32-NEXT: mr r4, r3
+; SMALL32-NEXT: bla .__get_tpointer[PR]
+; SMALL32-NEXT: stw r0, 40(r1)
+; SMALL32-NEXT: add r3, r3, r5
+; SMALL32-NEXT: stw r4, 0(r3)
+; SMALL32-NEXT: addi r1, r1, 32
+; SMALL32-NEXT: lwz r0, 8(r1)
+; SMALL32-NEXT: mtlr r0
+; SMALL32-NEXT: blr
+;
+; LARGE32-LABEL: storeTLInit:
+; LARGE32: # %bb.0: # %entry
+; LARGE32-NEXT: mflr r0
+; LARGE32-NEXT: stwu r1, -32(r1)
+; LARGE32-NEXT: stw r0, 40(r1)
+; LARGE32-NEXT: mr r4, r3
+; LARGE32-NEXT: addis r3, L..C3 at u(r2)
+; LARGE32-NEXT: lwz r5, L..C3 at l(r3)
+; LARGE32-NEXT: bla .__get_tpointer[PR]
+; LARGE32-NEXT: add r3, r3, r5
+; LARGE32-NEXT: stw r4, 0(r3)
+; LARGE32-NEXT: addi r1, r1, 32
+; LARGE32-NEXT: lwz r0, 8(r1)
+; LARGE32-NEXT: mtlr r0
+; LARGE32-NEXT: blr
entry:
%0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit)
store i32 %x, ptr %0, align 4
@@ -112,6 +242,35 @@ define signext i32 @loadITLUninit() {
; LARGE64-NEXT: add r3, r13, r3
; LARGE64-NEXT: lwa r3, 0(r3)
; LARGE64-NEXT: blr
+;
+; SMALL32-LABEL: loadITLUninit:
+; SMALL32: # %bb.0: # %entry
+; SMALL32-NEXT: mflr r0
+; SMALL32-NEXT: stwu r1, -32(r1)
+; SMALL32-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit
+; SMALL32-NEXT: bla .__get_tpointer[PR]
+; SMALL32-NEXT: stw r0, 40(r1)
+; SMALL32-NEXT: add r3, r3, r4
+; SMALL32-NEXT: lwz r3, 0(r3)
+; SMALL32-NEXT: addi r1, r1, 32
+; SMALL32-NEXT: lwz r0, 8(r1)
+; SMALL32-NEXT: mtlr r0
+; SMALL32-NEXT: blr
+;
+; LARGE32-LABEL: loadITLUninit:
+; LARGE32: # %bb.0: # %entry
+; LARGE32-NEXT: mflr r0
+; LARGE32-NEXT: stwu r1, -32(r1)
+; LARGE32-NEXT: stw r0, 40(r1)
+; LARGE32-NEXT: addis r3, L..C0 at u(r2)
+; LARGE32-NEXT: lwz r4, L..C0 at l(r3)
+; LARGE32-NEXT: bla .__get_tpointer[PR]
+; LARGE32-NEXT: add r3, r3, r4
+; LARGE32-NEXT: lwz r3, 0(r3)
+; LARGE32-NEXT: addi r1, r1, 32
+; LARGE32-NEXT: lwz r0, 8(r1)
+; LARGE32-NEXT: mtlr r0
+; LARGE32-NEXT: blr
entry:
%0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarUninit)
%1 = load i32, ptr %0, align 4
@@ -142,6 +301,42 @@ define signext i32 @loadITLUninit2() {
; LARGE64-NEXT: add r3, r4, r3
; LARGE64-NEXT: extsw r3, r3
; LARGE64-NEXT: blr
+;
+; SMALL32-LABEL: loadITLUninit2:
+; SMALL32: # %bb.0: # %entry
+; SMALL32-NEXT: mflr r0
+; SMALL32-NEXT: stwu r1, -32(r1)
+; SMALL32-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit
+; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit
+; SMALL32-NEXT: bla .__get_tpointer[PR]
+; SMALL32-NEXT: stw r0, 40(r1)
+; SMALL32-NEXT: add r3, r3, r4
+; SMALL32-NEXT: lwz r4, 0(r5)
+; SMALL32-NEXT: lwz r3, 0(r3)
+; SMALL32-NEXT: add r3, r4, r3
+; SMALL32-NEXT: addi r1, r1, 32
+; SMALL32-NEXT: lwz r0, 8(r1)
+; SMALL32-NEXT: mtlr r0
+; SMALL32-NEXT: blr
+;
+; LARGE32-LABEL: loadITLUninit2:
+; LARGE32: # %bb.0: # %entry
+; LARGE32-NEXT: mflr r0
+; LARGE32-NEXT: stwu r1, -32(r1)
+; LARGE32-NEXT: stw r0, 40(r1)
+; LARGE32-NEXT: addis r3, L..C0 at u(r2)
+; LARGE32-NEXT: lwz r4, L..C0 at l(r3)
+; LARGE32-NEXT: bla .__get_tpointer[PR]
+; LARGE32-NEXT: add r3, r3, r4
+; LARGE32-NEXT: lwz r3, 0(r3)
+; LARGE32-NEXT: addis r4, L..C4 at u(r2)
+; LARGE32-NEXT: lwz r4, L..C4 at l(r4)
+; LARGE32-NEXT: lwz r4, 0(r4)
+; LARGE32-NEXT: add r3, r4, r3
+; LARGE32-NEXT: addi r1, r1, 32
+; LARGE32-NEXT: lwz r0, 8(r1)
+; LARGE32-NEXT: mtlr r0
+; LARGE32-NEXT: blr
entry:
%0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarUninit)
%1 = load i32, ptr %0, align 4
@@ -165,6 +360,35 @@ define signext i32 @loadITLInit() {
; LARGE64-NEXT: add r3, r13, r3
; LARGE64-NEXT: lwa r3, 0(r3)
; LARGE64-NEXT: blr
+;
+; SMALL32-LABEL: loadITLInit:
+; SMALL32: # %bb.0: # %entry
+; SMALL32-NEXT: mflr r0
+; SMALL32-NEXT: stwu r1, -32(r1)
+; SMALL32-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit
+; SMALL32-NEXT: bla .__get_tpointer[PR]
+; SMALL32-NEXT: stw r0, 40(r1)
+; SMALL32-NEXT: add r3, r3, r4
+; SMALL32-NEXT: lwz r3, 0(r3)
+; SMALL32-NEXT: addi r1, r1, 32
+; SMALL32-NEXT: lwz r0, 8(r1)
+; SMALL32-NEXT: mtlr r0
+; SMALL32-NEXT: blr
+;
+; LARGE32-LABEL: loadITLInit:
+; LARGE32: # %bb.0: # %entry
+; LARGE32-NEXT: mflr r0
+; LARGE32-NEXT: stwu r1, -32(r1)
+; LARGE32-NEXT: stw r0, 40(r1)
+; LARGE32-NEXT: addis r3, L..C1 at u(r2)
+; LARGE32-NEXT: lwz r4, L..C1 at l(r3)
+; LARGE32-NEXT: bla .__get_tpointer[PR]
+; LARGE32-NEXT: add r3, r3, r4
+; LARGE32-NEXT: lwz r3, 0(r3)
+; LARGE32-NEXT: addi r1, r1, 32
+; LARGE32-NEXT: lwz r0, 8(r1)
+; LARGE32-NEXT: mtlr r0
+; LARGE32-NEXT: blr
entry:
%0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit)
%1 = load i32, ptr %0, align 4
@@ -195,6 +419,42 @@ define signext i32 @loadITLInit2() {
; LARGE64-NEXT: add r3, r4, r3
; LARGE64-NEXT: extsw r3, r3
; LARGE64-NEXT: blr
+;
+; SMALL32-LABEL: loadITLInit2:
+; SMALL32: # %bb.0: # %entry
+; SMALL32-NEXT: mflr r0
+; SMALL32-NEXT: stwu r1, -32(r1)
+; SMALL32-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit
+; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit
+; SMALL32-NEXT: bla .__get_tpointer[PR]
+; SMALL32-NEXT: stw r0, 40(r1)
+; SMALL32-NEXT: add r3, r3, r4
+; SMALL32-NEXT: lwz r4, 0(r5)
+; SMALL32-NEXT: lwz r3, 0(r3)
+; SMALL32-NEXT: add r3, r4, r3
+; SMALL32-NEXT: addi r1, r1, 32
+; SMALL32-NEXT: lwz r0, 8(r1)
+; SMALL32-NEXT: mtlr r0
+; SMALL32-NEXT: blr
+;
+; LARGE32-LABEL: loadITLInit2:
+; LARGE32: # %bb.0: # %entry
+; LARGE32-NEXT: mflr r0
+; LARGE32-NEXT: stwu r1, -32(r1)
+; LARGE32-NEXT: stw r0, 40(r1)
+; LARGE32-NEXT: addis r3, L..C1 at u(r2)
+; LARGE32-NEXT: lwz r4, L..C1 at l(r3)
+; LARGE32-NEXT: bla .__get_tpointer[PR]
+; LARGE32-NEXT: add r3, r3, r4
+; LARGE32-NEXT: lwz r3, 0(r3)
+; LARGE32-NEXT: addis r4, L..C4 at u(r2)
+; LARGE32-NEXT: lwz r4, L..C4 at l(r4)
+; LARGE32-NEXT: lwz r4, 0(r4)
+; LARGE32-NEXT: add r3, r4, r3
+; LARGE32-NEXT: addi r1, r1, 32
+; LARGE32-NEXT: lwz r0, 8(r1)
+; LARGE32-NEXT: mtlr r0
+; LARGE32-NEXT: blr
entry:
%0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit)
%1 = load i32, ptr %0, align 4
@@ -218,6 +478,35 @@ define signext i32 @loadTLUninit() {
; LARGE64-NEXT: add r3, r13, r3
; LARGE64-NEXT: lwa r3, 0(r3)
; LARGE64-NEXT: blr
+;
+; SMALL32-LABEL: loadTLUninit:
+; SMALL32: # %bb.0: # %entry
+; SMALL32-NEXT: mflr r0
+; SMALL32-NEXT: stwu r1, -32(r1)
+; SMALL32-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit
+; SMALL32-NEXT: bla .__get_tpointer[PR]
+; SMALL32-NEXT: stw r0, 40(r1)
+; SMALL32-NEXT: add r3, r3, r4
+; SMALL32-NEXT: lwz r3, 0(r3)
+; SMALL32-NEXT: addi r1, r1, 32
+; SMALL32-NEXT: lwz r0, 8(r1)
+; SMALL32-NEXT: mtlr r0
+; SMALL32-NEXT: blr
+;
+; LARGE32-LABEL: loadTLUninit:
+; LARGE32: # %bb.0: # %entry
+; LARGE32-NEXT: mflr r0
+; LARGE32-NEXT: stwu r1, -32(r1)
+; LARGE32-NEXT: stw r0, 40(r1)
+; LARGE32-NEXT: addis r3, L..C2 at u(r2)
+; LARGE32-NEXT: lwz r4, L..C2 at l(r3)
+; LARGE32-NEXT: bla .__get_tpointer[PR]
+; LARGE32-NEXT: add r3, r3, r4
+; LARGE32-NEXT: lwz r3, 0(r3)
+; LARGE32-NEXT: addi r1, r1, 32
+; LARGE32-NEXT: lwz r0, 8(r1)
+; LARGE32-NEXT: mtlr r0
+; LARGE32-NEXT: blr
entry:
%0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarUninit)
%1 = load i32, ptr %0, align 4
@@ -248,6 +537,42 @@ define signext i32 @loadTLUninit2() {
; LARGE64-NEXT: add r3, r4, r3
; LARGE64-NEXT: extsw r3, r3
; LARGE64-NEXT: blr
+;
+; SMALL32-LABEL: loadTLUninit2:
+; SMALL32: # %bb.0: # %entry
+; SMALL32-NEXT: mflr r0
+; SMALL32-NEXT: stwu r1, -32(r1)
+; SMALL32-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit
+; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit
+; SMALL32-NEXT: bla .__get_tpointer[PR]
+; SMALL32-NEXT: stw r0, 40(r1)
+; SMALL32-NEXT: add r3, r3, r4
+; SMALL32-NEXT: lwz r4, 0(r5)
+; SMALL32-NEXT: lwz r3, 0(r3)
+; SMALL32-NEXT: add r3, r4, r3
+; SMALL32-NEXT: addi r1, r1, 32
+; SMALL32-NEXT: lwz r0, 8(r1)
+; SMALL32-NEXT: mtlr r0
+; SMALL32-NEXT: blr
+;
+; LARGE32-LABEL: loadTLUninit2:
+; LARGE32: # %bb.0: # %entry
+; LARGE32-NEXT: mflr r0
+; LARGE32-NEXT: stwu r1, -32(r1)
+; LARGE32-NEXT: stw r0, 40(r1)
+; LARGE32-NEXT: addis r3, L..C2 at u(r2)
+; LARGE32-NEXT: lwz r4, L..C2 at l(r3)
+; LARGE32-NEXT: bla .__get_tpointer[PR]
+; LARGE32-NEXT: add r3, r3, r4
+; LARGE32-NEXT: lwz r3, 0(r3)
+; LARGE32-NEXT: addis r4, L..C4 at u(r2)
+; LARGE32-NEXT: lwz r4, L..C4 at l(r4)
+; LARGE32-NEXT: lwz r4, 0(r4)
+; LARGE32-NEXT: add r3, r4, r3
+; LARGE32-NEXT: addi r1, r1, 32
+; LARGE32-NEXT: lwz r0, 8(r1)
+; LARGE32-NEXT: mtlr r0
+; LARGE32-NEXT: blr
entry:
%0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarUninit)
%1 = load i32, ptr %0, align 4
@@ -271,6 +596,35 @@ define signext i32 @loadTLInit() {
; LARGE64-NEXT: add r3, r13, r3
; LARGE64-NEXT: lwa r3, 0(r3)
; LARGE64-NEXT: blr
+;
+; SMALL32-LABEL: loadTLInit:
+; SMALL32: # %bb.0: # %entry
+; SMALL32-NEXT: mflr r0
+; SMALL32-NEXT: stwu r1, -32(r1)
+; SMALL32-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit
+; SMALL32-NEXT: bla .__get_tpointer[PR]
+; SMALL32-NEXT: stw r0, 40(r1)
+; SMALL32-NEXT: add r3, r3, r4
+; SMALL32-NEXT: lwz r3, 0(r3)
+; SMALL32-NEXT: addi r1, r1, 32
+; SMALL32-NEXT: lwz r0, 8(r1)
+; SMALL32-NEXT: mtlr r0
+; SMALL32-NEXT: blr
+;
+; LARGE32-LABEL: loadTLInit:
+; LARGE32: # %bb.0: # %entry
+; LARGE32-NEXT: mflr r0
+; LARGE32-NEXT: stwu r1, -32(r1)
+; LARGE32-NEXT: stw r0, 40(r1)
+; LARGE32-NEXT: addis r3, L..C3 at u(r2)
+; LARGE32-NEXT: lwz r4, L..C3 at l(r3)
+; LARGE32-NEXT: bla .__get_tpointer[PR]
+; LARGE32-NEXT: add r3, r3, r4
+; LARGE32-NEXT: lwz r3, 0(r3)
+; LARGE32-NEXT: addi r1, r1, 32
+; LARGE32-NEXT: lwz r0, 8(r1)
+; LARGE32-NEXT: mtlr r0
+; LARGE32-NEXT: blr
entry:
%0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit)
%1 = load i32, ptr %0, align 4
@@ -301,6 +655,42 @@ define signext i32 @loadTLInit2() {
; LARGE64-NEXT: add r3, r4, r3
; LARGE64-NEXT: extsw r3, r3
; LARGE64-NEXT: blr
+;
+; SMALL32-LABEL: loadTLInit2:
+; SMALL32: # %bb.0: # %entry
+; SMALL32-NEXT: mflr r0
+; SMALL32-NEXT: stwu r1, -32(r1)
+; SMALL32-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit
+; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit
+; SMALL32-NEXT: bla .__get_tpointer[PR]
+; SMALL32-NEXT: stw r0, 40(r1)
+; SMALL32-NEXT: add r3, r3, r4
+; SMALL32-NEXT: lwz r4, 0(r5)
+; SMALL32-NEXT: lwz r3, 0(r3)
+; SMALL32-NEXT: add r3, r4, r3
+; SMALL32-NEXT: addi r1, r1, 32
+; SMALL32-NEXT: lwz r0, 8(r1)
+; SMALL32-NEXT: mtlr r0
+; SMALL32-NEXT: blr
+;
+; LARGE32-LABEL: loadTLInit2:
+; LARGE32: # %bb.0: # %entry
+; LARGE32-NEXT: mflr r0
+; LARGE32-NEXT: stwu r1, -32(r1)
+; LARGE32-NEXT: stw r0, 40(r1)
+; LARGE32-NEXT: addis r3, L..C3 at u(r2)
+; LARGE32-NEXT: lwz r4, L..C3 at l(r3)
+; LARGE32-NEXT: bla .__get_tpointer[PR]
+; LARGE32-NEXT: add r3, r3, r4
+; LARGE32-NEXT: lwz r3, 0(r3)
+; LARGE32-NEXT: addis r4, L..C4 at u(r2)
+; LARGE32-NEXT: lwz r4, L..C4 at l(r4)
+; LARGE32-NEXT: lwz r4, 0(r4)
+; LARGE32-NEXT: add r3, r4, r3
+; LARGE32-NEXT: addi r1, r1, 32
+; LARGE32-NEXT: lwz r0, 8(r1)
+; LARGE32-NEXT: mtlr r0
+; LARGE32-NEXT: blr
entry:
%0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit)
%1 = load i32, ptr %0, align 4
@@ -334,3 +724,27 @@ entry:
; LARGE64-NEXT: .tc ThreadLocalVarInit[TE],ThreadLocalVarInit[TL]@le
; LARGE64-LABEL: L..C4:
; LARGE64-NEXT: .tc VarInit[TE],VarInit[RW]
+
+; SMALL32-LABEL: .toc
+; SMALL32-LABEL: L..C0:
+; SMALL32-NEXT: .tc IThreadLocalVarUninit[TC],IThreadLocalVarUninit[UL]@le
+; SMALL32-LABEL: L..C1:
+; SMALL32-NEXT: .tc IThreadLocalVarInit[TC],IThreadLocalVarInit[TL]@le
+; SMALL32-LABEL: L..C2:
+; SMALL32-NEXT: .tc ThreadLocalVarUninit[TC],ThreadLocalVarUninit[TL]@le
+; SMALL32-LABEL: L..C3:
+; SMALL32-NEXT: .tc ThreadLocalVarInit[TC],ThreadLocalVarInit[TL]@le
+; SMALL32-LABEL: L..C4:
+; SMALL32-NEXT: .tc VarInit[TC],VarInit[RW]
+
+; LARGE32-LABEL: .toc
+; LARGE32-LABEL: L..C0:
+; LARGE32-NEXT: .tc IThreadLocalVarUninit[TE],IThreadLocalVarUninit[UL]@le
+; LARGE32-LABEL: L..C1:
+; LARGE32-NEXT: .tc IThreadLocalVarInit[TE],IThreadLocalVarInit[TL]@le
+; LARGE32-LABEL: L..C2:
+; LARGE32-NEXT: .tc ThreadLocalVarUninit[TE],ThreadLocalVarUninit[TL]@le
+; LARGE32-LABEL: L..C3:
+; LARGE32-NEXT: .tc ThreadLocalVarInit[TE],ThreadLocalVarInit[TL]@le
+; LARGE32-LABEL: L..C4:
+; LARGE32-NEXT: .tc VarInit[TE],VarInit[RW]
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-longlong.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-longlong.ll
index 8ef680909f91b..4661b574b714b 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-longlong.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-longlong.ll
@@ -5,6 +5,12 @@
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large < %s \
; RUN: | FileCheck %s --check-prefix=LARGE64
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN: -mtriple powerpc-ibm-aix-xcoff < %s | FileCheck %s \
+; RUN: --check-prefix=SMALL32
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN: -mtriple powerpc-ibm-aix-xcoff --code-model=large < %s \
+; RUN: | FileCheck %s --check-prefix=LARGE32
@ThreadLocalVarInit = thread_local(localexec) global i64 1, align 8
@VarInit = global i64 87, align 8
@@ -28,6 +34,39 @@ define void @storeITLUninit(i64 noundef %x) {
; LARGE64-NEXT: add r4, r13, r4
; LARGE64-NEXT: std r3, 0(r4)
; LARGE64-NEXT: blr
+;
+; SMALL32-LABEL: storeITLUninit:
+; SMALL32: # %bb.0: # %entry
+; SMALL32-NEXT: mflr r0
+; SMALL32-NEXT: stwu r1, -32(r1)
+; SMALL32-NEXT: lwz r6, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit
+; SMALL32-NEXT: mr r5, r3
+; SMALL32-NEXT: bla .__get_tpointer[PR]
+; SMALL32-NEXT: stw r0, 40(r1)
+; SMALL32-NEXT: add r3, r3, r6
+; SMALL32-NEXT: stw r4, 4(r3)
+; SMALL32-NEXT: stw r5, 0(r3)
+; SMALL32-NEXT: addi r1, r1, 32
+; SMALL32-NEXT: lwz r0, 8(r1)
+; SMALL32-NEXT: mtlr r0
+; SMALL32-NEXT: blr
+;
+; LARGE32-LABEL: storeITLUninit:
+; LARGE32: # %bb.0: # %entry
+; LARGE32-NEXT: mflr r0
+; LARGE32-NEXT: stwu r1, -32(r1)
+; LARGE32-NEXT: stw r0, 40(r1)
+; LARGE32-NEXT: mr r5, r3
+; LARGE32-NEXT: addis r3, L..C0 at u(r2)
+; LARGE32-NEXT: lwz r6, L..C0 at l(r3)
+; LARGE32-NEXT: bla .__get_tpointer[PR]
+; LARGE32-NEXT: add r3, r3, r6
+; LARGE32-NEXT: stw r4, 4(r3)
+; LARGE32-NEXT: stw r5, 0(r3)
+; LARGE32-NEXT: addi r1, r1, 32
+; LARGE32-NEXT: lwz r0, 8(r1)
+; LARGE32-NEXT: mtlr r0
+; LARGE32-NEXT: blr
entry:
%0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarUninit)
store i64 %x, ptr %0, align 8
@@ -49,6 +88,39 @@ define void @storeITLInit(i64 noundef %x) {
; LARGE64-NEXT: add r4, r13, r4
; LARGE64-NEXT: std r3, 0(r4)
; LARGE64-NEXT: blr
+;
+; SMALL32-LABEL: storeITLInit:
+; SMALL32: # %bb.0: # %entry
+; SMALL32-NEXT: mflr r0
+; SMALL32-NEXT: stwu r1, -32(r1)
+; SMALL32-NEXT: lwz r6, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit
+; SMALL32-NEXT: mr r5, r3
+; SMALL32-NEXT: bla .__get_tpointer[PR]
+; SMALL32-NEXT: stw r0, 40(r1)
+; SMALL32-NEXT: add r3, r3, r6
+; SMALL32-NEXT: stw r4, 4(r3)
+; SMALL32-NEXT: stw r5, 0(r3)
+; SMALL32-NEXT: addi r1, r1, 32
+; SMALL32-NEXT: lwz r0, 8(r1)
+; SMALL32-NEXT: mtlr r0
+; SMALL32-NEXT: blr
+;
+; LARGE32-LABEL: storeITLInit:
+; LARGE32: # %bb.0: # %entry
+; LARGE32-NEXT: mflr r0
+; LARGE32-NEXT: stwu r1, -32(r1)
+; LARGE32-NEXT: stw r0, 40(r1)
+; LARGE32-NEXT: mr r5, r3
+; LARGE32-NEXT: addis r3, L..C1 at u(r2)
+; LARGE32-NEXT: lwz r6, L..C1 at l(r3)
+; LARGE32-NEXT: bla .__get_tpointer[PR]
+; LARGE32-NEXT: add r3, r3, r6
+; LARGE32-NEXT: stw r4, 4(r3)
+; LARGE32-NEXT: stw r5, 0(r3)
+; LARGE32-NEXT: addi r1, r1, 32
+; LARGE32-NEXT: lwz r0, 8(r1)
+; LARGE32-NEXT: mtlr r0
+; LARGE32-NEXT: blr
entry:
%0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit)
store i64 %x, ptr %0, align 8
@@ -70,6 +142,39 @@ define void @storeTLUninit(i64 noundef %x) {
; LARGE64-NEXT: add r4, r13, r4
; LARGE64-NEXT: std r3, 0(r4)
; LARGE64-NEXT: blr
+;
+; SMALL32-LABEL: storeTLUninit:
+; SMALL32: # %bb.0: # %entry
+; SMALL32-NEXT: mflr r0
+; SMALL32-NEXT: stwu r1, -32(r1)
+; SMALL32-NEXT: lwz r6, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit
+; SMALL32-NEXT: mr r5, r3
+; SMALL32-NEXT: bla .__get_tpointer[PR]
+; SMALL32-NEXT: stw r0, 40(r1)
+; SMALL32-NEXT: add r3, r3, r6
+; SMALL32-NEXT: stw r4, 4(r3)
+; SMALL32-NEXT: stw r5, 0(r3)
+; SMALL32-NEXT: addi r1, r1, 32
+; SMALL32-NEXT: lwz r0, 8(r1)
+; SMALL32-NEXT: mtlr r0
+; SMALL32-NEXT: blr
+;
+; LARGE32-LABEL: storeTLUninit:
+; LARGE32: # %bb.0: # %entry
+; LARGE32-NEXT: mflr r0
+; LARGE32-NEXT: stwu r1, -32(r1)
+; LARGE32-NEXT: stw r0, 40(r1)
+; LARGE32-NEXT: mr r5, r3
+; LARGE32-NEXT: addis r3, L..C2 at u(r2)
+; LARGE32-NEXT: lwz r6, L..C2 at l(r3)
+; LARGE32-NEXT: bla .__get_tpointer[PR]
+; LARGE32-NEXT: add r3, r3, r6
+; LARGE32-NEXT: stw r4, 4(r3)
+; LARGE32-NEXT: stw r5, 0(r3)
+; LARGE32-NEXT: addi r1, r1, 32
+; LARGE32-NEXT: lwz r0, 8(r1)
+; LARGE32-NEXT: mtlr r0
+; LARGE32-NEXT: blr
entry:
%0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarUninit)
store i64 %x, ptr %0, align 8
@@ -91,6 +196,39 @@ define void @storeTLInit(i64 noundef %x) {
; LARGE64-NEXT: add r4, r13, r4
; LARGE64-NEXT: std r3, 0(r4)
; LARGE64-NEXT: blr
+;
+; SMALL32-LABEL: storeTLInit:
+; SMALL32: # %bb.0: # %entry
+; SMALL32-NEXT: mflr r0
+; SMALL32-NEXT: stwu r1, -32(r1)
+; SMALL32-NEXT: lwz r6, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit
+; SMALL32-NEXT: mr r5, r3
+; SMALL32-NEXT: bla .__get_tpointer[PR]
+; SMALL32-NEXT: stw r0, 40(r1)
+; SMALL32-NEXT: add r3, r3, r6
+; SMALL32-NEXT: stw r4, 4(r3)
+; SMALL32-NEXT: stw r5, 0(r3)
+; SMALL32-NEXT: addi r1, r1, 32
+; SMALL32-NEXT: lwz r0, 8(r1)
+; SMALL32-NEXT: mtlr r0
+; SMALL32-NEXT: blr
+;
+; LARGE32-LABEL: storeTLInit:
+; LARGE32: # %bb.0: # %entry
+; LARGE32-NEXT: mflr r0
+; LARGE32-NEXT: stwu r1, -32(r1)
+; LARGE32-NEXT: stw r0, 40(r1)
+; LARGE32-NEXT: mr r5, r3
+; LARGE32-NEXT: addis r3, L..C3 at u(r2)
+; LARGE32-NEXT: lwz r6, L..C3 at l(r3)
+; LARGE32-NEXT: bla .__get_tpointer[PR]
+; LARGE32-NEXT: add r3, r3, r6
+; LARGE32-NEXT: stw r4, 4(r3)
+; LARGE32-NEXT: stw r5, 0(r3)
+; LARGE32-NEXT: addi r1, r1, 32
+; LARGE32-NEXT: lwz r0, 8(r1)
+; LARGE32-NEXT: mtlr r0
+; LARGE32-NEXT: blr
entry:
%0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit)
store i64 %x, ptr %0, align 8
@@ -112,6 +250,37 @@ define i64 @loadITLUninit() {
; LARGE64-NEXT: add r3, r13, r3
; LARGE64-NEXT: ld r3, 0(r3)
; LARGE64-NEXT: blr
+;
+; SMALL32-LABEL: loadITLUninit:
+; SMALL32: # %bb.0: # %entry
+; SMALL32-NEXT: mflr r0
+; SMALL32-NEXT: stwu r1, -32(r1)
+; SMALL32-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit
+; SMALL32-NEXT: bla .__get_tpointer[PR]
+; SMALL32-NEXT: stw r0, 40(r1)
+; SMALL32-NEXT: add r4, r3, r4
+; SMALL32-NEXT: lwz r3, 0(r4)
+; SMALL32-NEXT: lwz r4, 4(r4)
+; SMALL32-NEXT: addi r1, r1, 32
+; SMALL32-NEXT: lwz r0, 8(r1)
+; SMALL32-NEXT: mtlr r0
+; SMALL32-NEXT: blr
+;
+; LARGE32-LABEL: loadITLUninit:
+; LARGE32: # %bb.0: # %entry
+; LARGE32-NEXT: mflr r0
+; LARGE32-NEXT: stwu r1, -32(r1)
+; LARGE32-NEXT: stw r0, 40(r1)
+; LARGE32-NEXT: addis r3, L..C0 at u(r2)
+; LARGE32-NEXT: lwz r4, L..C0 at l(r3)
+; LARGE32-NEXT: bla .__get_tpointer[PR]
+; LARGE32-NEXT: add r4, r3, r4
+; LARGE32-NEXT: lwz r3, 0(r4)
+; LARGE32-NEXT: lwz r4, 4(r4)
+; LARGE32-NEXT: addi r1, r1, 32
+; LARGE32-NEXT: lwz r0, 8(r1)
+; LARGE32-NEXT: mtlr r0
+; LARGE32-NEXT: blr
entry:
%0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarUninit)
%1 = load i64, ptr %0, align 8
@@ -140,6 +309,48 @@ define i64 @loadITLUninit2() {
; LARGE64-NEXT: ld r3, 0(r3)
; LARGE64-NEXT: add r3, r4, r3
; LARGE64-NEXT: blr
+;
+; SMALL32-LABEL: loadITLUninit2:
+; SMALL32: # %bb.0: # %entry
+; SMALL32-NEXT: mflr r0
+; SMALL32-NEXT: stwu r1, -32(r1)
+; SMALL32-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit
+; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit
+; SMALL32-NEXT: bla .__get_tpointer[PR]
+; SMALL32-NEXT: stw r0, 40(r1)
+; SMALL32-NEXT: add r3, r3, r4
+; SMALL32-NEXT: lwz r6, 4(r5)
+; SMALL32-NEXT: lwz r5, 0(r5)
+; SMALL32-NEXT: lwz r4, 4(r3)
+; SMALL32-NEXT: lwz r3, 0(r3)
+; SMALL32-NEXT: addc r4, r6, r4
+; SMALL32-NEXT: adde r3, r5, r3
+; SMALL32-NEXT: addi r1, r1, 32
+; SMALL32-NEXT: lwz r0, 8(r1)
+; SMALL32-NEXT: mtlr r0
+; SMALL32-NEXT: blr
+;
+; LARGE32-LABEL: loadITLUninit2:
+; LARGE32: # %bb.0: # %entry
+; LARGE32-NEXT: mflr r0
+; LARGE32-NEXT: stwu r1, -32(r1)
+; LARGE32-NEXT: stw r0, 40(r1)
+; LARGE32-NEXT: addis r3, L..C0 at u(r2)
+; LARGE32-NEXT: lwz r4, L..C0 at l(r3)
+; LARGE32-NEXT: bla .__get_tpointer[PR]
+; LARGE32-NEXT: add r3, r3, r4
+; LARGE32-NEXT: lwz r4, 4(r3)
+; LARGE32-NEXT: lwz r3, 0(r3)
+; LARGE32-NEXT: addis r5, L..C4 at u(r2)
+; LARGE32-NEXT: lwz r5, L..C4 at l(r5)
+; LARGE32-NEXT: lwz r6, 4(r5)
+; LARGE32-NEXT: lwz r5, 0(r5)
+; LARGE32-NEXT: addc r4, r6, r4
+; LARGE32-NEXT: adde r3, r5, r3
+; LARGE32-NEXT: addi r1, r1, 32
+; LARGE32-NEXT: lwz r0, 8(r1)
+; LARGE32-NEXT: mtlr r0
+; LARGE32-NEXT: blr
entry:
%0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarUninit)
%1 = load i64, ptr %0, align 8
@@ -163,6 +374,37 @@ define i64 @loadITLInit() {
; LARGE64-NEXT: add r3, r13, r3
; LARGE64-NEXT: ld r3, 0(r3)
; LARGE64-NEXT: blr
+;
+; SMALL32-LABEL: loadITLInit:
+; SMALL32: # %bb.0: # %entry
+; SMALL32-NEXT: mflr r0
+; SMALL32-NEXT: stwu r1, -32(r1)
+; SMALL32-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit
+; SMALL32-NEXT: bla .__get_tpointer[PR]
+; SMALL32-NEXT: stw r0, 40(r1)
+; SMALL32-NEXT: add r4, r3, r4
+; SMALL32-NEXT: lwz r3, 0(r4)
+; SMALL32-NEXT: lwz r4, 4(r4)
+; SMALL32-NEXT: addi r1, r1, 32
+; SMALL32-NEXT: lwz r0, 8(r1)
+; SMALL32-NEXT: mtlr r0
+; SMALL32-NEXT: blr
+;
+; LARGE32-LABEL: loadITLInit:
+; LARGE32: # %bb.0: # %entry
+; LARGE32-NEXT: mflr r0
+; LARGE32-NEXT: stwu r1, -32(r1)
+; LARGE32-NEXT: stw r0, 40(r1)
+; LARGE32-NEXT: addis r3, L..C1 at u(r2)
+; LARGE32-NEXT: lwz r4, L..C1 at l(r3)
+; LARGE32-NEXT: bla .__get_tpointer[PR]
+; LARGE32-NEXT: add r4, r3, r4
+; LARGE32-NEXT: lwz r3, 0(r4)
+; LARGE32-NEXT: lwz r4, 4(r4)
+; LARGE32-NEXT: addi r1, r1, 32
+; LARGE32-NEXT: lwz r0, 8(r1)
+; LARGE32-NEXT: mtlr r0
+; LARGE32-NEXT: blr
entry:
%0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit)
%1 = load i64, ptr %0, align 8
@@ -191,6 +433,48 @@ define i64 @loadITLInit2() {
; LARGE64-NEXT: ld r3, 0(r3)
; LARGE64-NEXT: add r3, r4, r3
; LARGE64-NEXT: blr
+;
+; SMALL32-LABEL: loadITLInit2:
+; SMALL32: # %bb.0: # %entry
+; SMALL32-NEXT: mflr r0
+; SMALL32-NEXT: stwu r1, -32(r1)
+; SMALL32-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit
+; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit
+; SMALL32-NEXT: bla .__get_tpointer[PR]
+; SMALL32-NEXT: stw r0, 40(r1)
+; SMALL32-NEXT: add r3, r3, r4
+; SMALL32-NEXT: lwz r6, 4(r5)
+; SMALL32-NEXT: lwz r5, 0(r5)
+; SMALL32-NEXT: lwz r4, 4(r3)
+; SMALL32-NEXT: lwz r3, 0(r3)
+; SMALL32-NEXT: addc r4, r6, r4
+; SMALL32-NEXT: adde r3, r5, r3
+; SMALL32-NEXT: addi r1, r1, 32
+; SMALL32-NEXT: lwz r0, 8(r1)
+; SMALL32-NEXT: mtlr r0
+; SMALL32-NEXT: blr
+;
+; LARGE32-LABEL: loadITLInit2:
+; LARGE32: # %bb.0: # %entry
+; LARGE32-NEXT: mflr r0
+; LARGE32-NEXT: stwu r1, -32(r1)
+; LARGE32-NEXT: stw r0, 40(r1)
+; LARGE32-NEXT: addis r3, L..C1 at u(r2)
+; LARGE32-NEXT: lwz r4, L..C1 at l(r3)
+; LARGE32-NEXT: bla .__get_tpointer[PR]
+; LARGE32-NEXT: add r3, r3, r4
+; LARGE32-NEXT: lwz r4, 4(r3)
+; LARGE32-NEXT: lwz r3, 0(r3)
+; LARGE32-NEXT: addis r5, L..C4 at u(r2)
+; LARGE32-NEXT: lwz r5, L..C4 at l(r5)
+; LARGE32-NEXT: lwz r6, 4(r5)
+; LARGE32-NEXT: lwz r5, 0(r5)
+; LARGE32-NEXT: addc r4, r6, r4
+; LARGE32-NEXT: adde r3, r5, r3
+; LARGE32-NEXT: addi r1, r1, 32
+; LARGE32-NEXT: lwz r0, 8(r1)
+; LARGE32-NEXT: mtlr r0
+; LARGE32-NEXT: blr
entry:
%0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarInit)
%1 = load i64, ptr %0, align 8
@@ -214,6 +498,37 @@ define i64 @loadTLUninit() {
; LARGE64-NEXT: add r3, r13, r3
; LARGE64-NEXT: ld r3, 0(r3)
; LARGE64-NEXT: blr
+;
+; SMALL32-LABEL: loadTLUninit:
+; SMALL32: # %bb.0: # %entry
+; SMALL32-NEXT: mflr r0
+; SMALL32-NEXT: stwu r1, -32(r1)
+; SMALL32-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit
+; SMALL32-NEXT: bla .__get_tpointer[PR]
+; SMALL32-NEXT: stw r0, 40(r1)
+; SMALL32-NEXT: add r4, r3, r4
+; SMALL32-NEXT: lwz r3, 0(r4)
+; SMALL32-NEXT: lwz r4, 4(r4)
+; SMALL32-NEXT: addi r1, r1, 32
+; SMALL32-NEXT: lwz r0, 8(r1)
+; SMALL32-NEXT: mtlr r0
+; SMALL32-NEXT: blr
+;
+; LARGE32-LABEL: loadTLUninit:
+; LARGE32: # %bb.0: # %entry
+; LARGE32-NEXT: mflr r0
+; LARGE32-NEXT: stwu r1, -32(r1)
+; LARGE32-NEXT: stw r0, 40(r1)
+; LARGE32-NEXT: addis r3, L..C2 at u(r2)
+; LARGE32-NEXT: lwz r4, L..C2 at l(r3)
+; LARGE32-NEXT: bla .__get_tpointer[PR]
+; LARGE32-NEXT: add r4, r3, r4
+; LARGE32-NEXT: lwz r3, 0(r4)
+; LARGE32-NEXT: lwz r4, 4(r4)
+; LARGE32-NEXT: addi r1, r1, 32
+; LARGE32-NEXT: lwz r0, 8(r1)
+; LARGE32-NEXT: mtlr r0
+; LARGE32-NEXT: blr
entry:
%0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarUninit)
%1 = load i64, ptr %0, align 8
@@ -242,6 +557,48 @@ define i64 @loadTLUninit2() {
; LARGE64-NEXT: ld r3, 0(r3)
; LARGE64-NEXT: add r3, r4, r3
; LARGE64-NEXT: blr
+;
+; SMALL32-LABEL: loadTLUninit2:
+; SMALL32: # %bb.0: # %entry
+; SMALL32-NEXT: mflr r0
+; SMALL32-NEXT: stwu r1, -32(r1)
+; SMALL32-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit
+; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit
+; SMALL32-NEXT: bla .__get_tpointer[PR]
+; SMALL32-NEXT: stw r0, 40(r1)
+; SMALL32-NEXT: add r3, r3, r4
+; SMALL32-NEXT: lwz r6, 4(r5)
+; SMALL32-NEXT: lwz r5, 0(r5)
+; SMALL32-NEXT: lwz r4, 4(r3)
+; SMALL32-NEXT: lwz r3, 0(r3)
+; SMALL32-NEXT: addc r4, r6, r4
+; SMALL32-NEXT: adde r3, r5, r3
+; SMALL32-NEXT: addi r1, r1, 32
+; SMALL32-NEXT: lwz r0, 8(r1)
+; SMALL32-NEXT: mtlr r0
+; SMALL32-NEXT: blr
+;
+; LARGE32-LABEL: loadTLUninit2:
+; LARGE32: # %bb.0: # %entry
+; LARGE32-NEXT: mflr r0
+; LARGE32-NEXT: stwu r1, -32(r1)
+; LARGE32-NEXT: stw r0, 40(r1)
+; LARGE32-NEXT: addis r3, L..C2 at u(r2)
+; LARGE32-NEXT: lwz r4, L..C2 at l(r3)
+; LARGE32-NEXT: bla .__get_tpointer[PR]
+; LARGE32-NEXT: add r3, r3, r4
+; LARGE32-NEXT: lwz r4, 4(r3)
+; LARGE32-NEXT: lwz r3, 0(r3)
+; LARGE32-NEXT: addis r5, L..C4 at u(r2)
+; LARGE32-NEXT: lwz r5, L..C4 at l(r5)
+; LARGE32-NEXT: lwz r6, 4(r5)
+; LARGE32-NEXT: lwz r5, 0(r5)
+; LARGE32-NEXT: addc r4, r6, r4
+; LARGE32-NEXT: adde r3, r5, r3
+; LARGE32-NEXT: addi r1, r1, 32
+; LARGE32-NEXT: lwz r0, 8(r1)
+; LARGE32-NEXT: mtlr r0
+; LARGE32-NEXT: blr
entry:
%0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarUninit)
%1 = load i64, ptr %0, align 8
@@ -265,6 +622,37 @@ define i64 @loadTLInit() {
; LARGE64-NEXT: add r3, r13, r3
; LARGE64-NEXT: ld r3, 0(r3)
; LARGE64-NEXT: blr
+;
+; SMALL32-LABEL: loadTLInit:
+; SMALL32: # %bb.0: # %entry
+; SMALL32-NEXT: mflr r0
+; SMALL32-NEXT: stwu r1, -32(r1)
+; SMALL32-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit
+; SMALL32-NEXT: bla .__get_tpointer[PR]
+; SMALL32-NEXT: stw r0, 40(r1)
+; SMALL32-NEXT: add r4, r3, r4
+; SMALL32-NEXT: lwz r3, 0(r4)
+; SMALL32-NEXT: lwz r4, 4(r4)
+; SMALL32-NEXT: addi r1, r1, 32
+; SMALL32-NEXT: lwz r0, 8(r1)
+; SMALL32-NEXT: mtlr r0
+; SMALL32-NEXT: blr
+;
+; LARGE32-LABEL: loadTLInit:
+; LARGE32: # %bb.0: # %entry
+; LARGE32-NEXT: mflr r0
+; LARGE32-NEXT: stwu r1, -32(r1)
+; LARGE32-NEXT: stw r0, 40(r1)
+; LARGE32-NEXT: addis r3, L..C3 at u(r2)
+; LARGE32-NEXT: lwz r4, L..C3 at l(r3)
+; LARGE32-NEXT: bla .__get_tpointer[PR]
+; LARGE32-NEXT: add r4, r3, r4
+; LARGE32-NEXT: lwz r3, 0(r4)
+; LARGE32-NEXT: lwz r4, 4(r4)
+; LARGE32-NEXT: addi r1, r1, 32
+; LARGE32-NEXT: lwz r0, 8(r1)
+; LARGE32-NEXT: mtlr r0
+; LARGE32-NEXT: blr
entry:
%0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit)
%1 = load i64, ptr %0, align 8
@@ -293,6 +681,48 @@ define i64 @loadTLInit2() {
; LARGE64-NEXT: ld r3, 0(r3)
; LARGE64-NEXT: add r3, r4, r3
; LARGE64-NEXT: blr
+;
+; SMALL32-LABEL: loadTLInit2:
+; SMALL32: # %bb.0: # %entry
+; SMALL32-NEXT: mflr r0
+; SMALL32-NEXT: stwu r1, -32(r1)
+; SMALL32-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit
+; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit
+; SMALL32-NEXT: bla .__get_tpointer[PR]
+; SMALL32-NEXT: stw r0, 40(r1)
+; SMALL32-NEXT: add r3, r3, r4
+; SMALL32-NEXT: lwz r6, 4(r5)
+; SMALL32-NEXT: lwz r5, 0(r5)
+; SMALL32-NEXT: lwz r4, 4(r3)
+; SMALL32-NEXT: lwz r3, 0(r3)
+; SMALL32-NEXT: addc r4, r6, r4
+; SMALL32-NEXT: adde r3, r5, r3
+; SMALL32-NEXT: addi r1, r1, 32
+; SMALL32-NEXT: lwz r0, 8(r1)
+; SMALL32-NEXT: mtlr r0
+; SMALL32-NEXT: blr
+;
+; LARGE32-LABEL: loadTLInit2:
+; LARGE32: # %bb.0: # %entry
+; LARGE32-NEXT: mflr r0
+; LARGE32-NEXT: stwu r1, -32(r1)
+; LARGE32-NEXT: stw r0, 40(r1)
+; LARGE32-NEXT: addis r3, L..C3 at u(r2)
+; LARGE32-NEXT: lwz r4, L..C3 at l(r3)
+; LARGE32-NEXT: bla .__get_tpointer[PR]
+; LARGE32-NEXT: add r3, r3, r4
+; LARGE32-NEXT: lwz r4, 4(r3)
+; LARGE32-NEXT: lwz r3, 0(r3)
+; LARGE32-NEXT: addis r5, L..C4 at u(r2)
+; LARGE32-NEXT: lwz r5, L..C4 at l(r5)
+; LARGE32-NEXT: lwz r6, 4(r5)
+; LARGE32-NEXT: lwz r5, 0(r5)
+; LARGE32-NEXT: addc r4, r6, r4
+; LARGE32-NEXT: adde r3, r5, r3
+; LARGE32-NEXT: addi r1, r1, 32
+; LARGE32-NEXT: lwz r0, 8(r1)
+; LARGE32-NEXT: mtlr r0
+; LARGE32-NEXT: blr
entry:
%0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit)
%1 = load i64, ptr %0, align 8
@@ -326,3 +756,27 @@ entry:
; LARGE64-NEXT: .tc ThreadLocalVarInit[TE],ThreadLocalVarInit[TL]@le
; LARGE64-LABEL: L..C4:
; LARGE64-NEXT: .tc VarInit[TE],VarInit[RW]
+
+; SMALL32-LABEL: .toc
+; SMALL32-LABEL: L..C0:
+; SMALL32-NEXT: .tc IThreadLocalVarUninit[TC],IThreadLocalVarUninit[UL]@le
+; SMALL32-LABEL: L..C1:
+; SMALL32-NEXT: .tc IThreadLocalVarInit[TC],IThreadLocalVarInit[TL]@le
+; SMALL32-LABEL: L..C2:
+; SMALL32-NEXT: .tc ThreadLocalVarUninit[TC],ThreadLocalVarUninit[TL]@le
+; SMALL32-LABEL: L..C3:
+; SMALL32-NEXT: .tc ThreadLocalVarInit[TC],ThreadLocalVarInit[TL]@le
+; SMALL32-LABEL: L..C4:
+; SMALL32-NEXT: .tc VarInit[TC],VarInit[RW]
+
+; LARGE32-LABEL: .toc
+; LARGE32-LABEL: L..C0:
+; LARGE32-NEXT: .tc IThreadLocalVarUninit[TE],IThreadLocalVarUninit[UL]@le
+; LARGE32-LABEL: L..C1:
+; LARGE32-NEXT: .tc IThreadLocalVarInit[TE],IThreadLocalVarInit[TL]@le
+; LARGE32-LABEL: L..C2:
+; LARGE32-NEXT: .tc ThreadLocalVarUninit[TE],ThreadLocalVarUninit[TL]@le
+; LARGE32-LABEL: L..C3:
+; LARGE32-NEXT: .tc ThreadLocalVarInit[TE],ThreadLocalVarInit[TL]@le
+; LARGE32-LABEL: L..C4:
+; LARGE32-NEXT: .tc VarInit[TE],VarInit[RW]
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc-large32.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc-large32.ll
new file mode 100644
index 0000000000000..261ee7e71ce1d
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc-large32.ll
@@ -0,0 +1,272 @@
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec -mtriple powerpc-ibm-aix-xcoff \
+; RUN: -xcoff-traceback-table=false --code-model=large -filetype=obj -o %t.o < %s
+; RUN: llvm-readobj --relocs --expand-relocs %t.o | FileCheck --check-prefix=RELOC %s
+; RUN: llvm-readobj --syms %t.o | FileCheck --check-prefix=SYM %s
+; RUN: llvm-objdump -D -r --symbol-description %t.o | FileCheck --check-prefix=DIS %s
+
+ at ThreadLocalVarInit = thread_local(localexec) global i64 1, align 8
+ at VarInit = global i64 87, align 8
+ at IThreadLocalVarUninit = internal thread_local(localexec) global i64 0, align 8
+declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull)
+
+define void @storeITLUninit(i64 noundef %x) {
+entry:
+ %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @IThreadLocalVarUninit)
+ store i64 %x, ptr %0, align 8
+ ret void
+}
+
+define i64 @loadTLInit() {
+entry:
+ %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @ThreadLocalVarInit)
+ %1 = load i64, ptr %0, align 8
+ %2 = load i64, ptr @VarInit, align 8
+ %add = add nsw i64 %2, %1
+ ret i64 %add
+}
+
+; RELOC: File: {{.*}}aix-tls-le-xcoff-reloc-large32.ll.tmp.o
+; RELOC-NEXT: Format: aixcoff-rs6000
+; RELOC-NEXT: Arch: powerpc
+; RELOC-NEXT: AddressSize: 32bit
+; RELOC-NEXT: Relocations [
+; RELOC: Virtual Address: 0x12
+; RELOC-NEXT: Symbol: IThreadLocalVarUninit (17)
+; RELOC-NEXT: IsSigned: No
+; RELOC-NEXT: FixupBitValue: 0
+; RELOC-NEXT: Length: 16
+; RELOC-NEXT: Type: R_TOCU (0x30)
+; RELOC-NEXT: }
+; RELOC: Virtual Address: 0x16
+; RELOC-NEXT: Symbol: IThreadLocalVarUninit (17)
+; RELOC-NEXT: IsSigned: No
+; RELOC-NEXT: FixupBitValue: 0
+; RELOC-NEXT: Length: 16
+; RELOC-NEXT: Type: R_TOCL (0x31)
+; RELOC-NEXT: }
+; RELOC: Virtual Address: 0x18
+; RELOC-NEXT: Symbol: .__get_tpointer (1)
+; RELOC-NEXT: IsSigned: No
+; RELOC-NEXT: FixupBitValue: 0
+; RELOC-NEXT: Length: 26
+; RELOC-NEXT: Type: R_RBA (0x18)
+; RELOC-NEXT: }
+; RELOC: Virtual Address: 0x4E
+; RELOC-NEXT: Symbol: ThreadLocalVarInit (19)
+; RELOC-NEXT: IsSigned: No
+; RELOC-NEXT: FixupBitValue: 0
+; RELOC-NEXT: Length: 16
+; RELOC-NEXT: Type: R_TOCU (0x30)
+; RELOC-NEXT: }
+; RELOC: Virtual Address: 0x52
+; RELOC-NEXT: Symbol: ThreadLocalVarInit (19)
+; RELOC-NEXT: IsSigned: No
+; RELOC-NEXT: FixupBitValue: 0
+; RELOC-NEXT: Length: 16
+; RELOC-NEXT: Type: R_TOCL (0x31)
+; RELOC-NEXT: }
+; RELOC: Virtual Address: 0x54
+; RELOC-NEXT: Symbol: .__get_tpointer (1)
+; RELOC-NEXT: IsSigned: No
+; RELOC-NEXT: FixupBitValue: 0
+; RELOC-NEXT: Length: 26
+; RELOC-NEXT: Type: R_RBA (0x18)
+; RELOC-NEXT: }
+; RELOC: Virtual Address: 0xB0
+; RELOC-NEXT: Symbol: IThreadLocalVarUninit (25)
+; RELOC-NEXT: IsSigned: No
+; RELOC-NEXT: FixupBitValue: 0
+; RELOC-NEXT: Length: 32
+; RELOC-NEXT: Type: R_TLS_LE (0x23)
+; RELOC-NEXT: }
+; RELOC: Relocation {
+; RELOC-NEXT: Virtual Address: 0xB4
+; RELOC-NEXT: Symbol: ThreadLocalVarInit (23)
+; RELOC-NEXT: IsSigned: No
+; RELOC-NEXT: FixupBitValue: 0
+; RELOC-NEXT: Length: 32
+; RELOC-NEXT: Type: R_TLS_LE (0x23)
+; RELOC-NEXT: }
+
+; SYM: File: {{.*}}aix-tls-le-xcoff-reloc-large32.ll.tmp.o
+; SYM-NEXT: Format: aixcoff-rs6000
+; SYM-NEXT: Arch: powerpc
+; SYM-NEXT: AddressSize: 32bit
+; SYM-NEXT: Symbols [
+; SYM: Index: 1
+; SYM-NEXT: Name: .__get_tpointer
+; SYM-NEXT: Value (RelocatableAddress): 0x0
+; SYM-NEXT: Section: N_UNDEF
+; SYM-NEXT: Type: 0x0
+; SYM-NEXT: StorageClass: C_EXT (0x2)
+; SYM-NEXT: NumberOfAuxEntries: 1
+; SYM-NEXT: CSECT Auxiliary Entry {
+; SYM-NEXT: Index: 2
+; SYM-NEXT: SectionLen: 0
+; SYM-NEXT: ParameterHashIndex: 0x0
+; SYM-NEXT: TypeChkSectNum: 0x0
+; SYM-NEXT: SymbolAlignmentLog2: 0
+; SYM-NEXT: SymbolType: XTY_ER (0x0)
+; SYM-NEXT: StorageMappingClass: XMC_PR (0x0)
+; SYM-NEXT: StabInfoIndex: 0x0
+; SYM-NEXT: StabSectNum: 0x0
+; SYM-NEXT: }
+; SYM-NEXT: }
+; SYM: Index: 17
+; SYM-NEXT: Name: IThreadLocalVarUninit
+; SYM-NEXT: Value (RelocatableAddress): 0xB0
+; SYM-NEXT: Section: .data
+; SYM-NEXT: Type: 0x0
+; SYM-NEXT: StorageClass: C_HIDEXT (0x6B)
+; SYM-NEXT: NumberOfAuxEntries: 1
+; SYM-NEXT: CSECT Auxiliary Entry {
+; SYM-NEXT: Index: 18
+; SYM-NEXT: SectionLen: 4
+; SYM-NEXT: ParameterHashIndex: 0x0
+; SYM-NEXT: TypeChkSectNum: 0x0
+; SYM-NEXT: SymbolAlignmentLog2: 2
+; SYM-NEXT: SymbolType: XTY_SD (0x1)
+; SYM-NEXT: StorageMappingClass: XMC_TE (0x16)
+; SYM-NEXT: StabInfoIndex: 0x0
+; SYM-NEXT: StabSectNum: 0x0
+; SYM-NEXT: }
+; SYM-NEXT: }
+; SYM: Index: 19
+; SYM-NEXT: Name: ThreadLocalVarInit
+; SYM-NEXT: Value (RelocatableAddress): 0xB4
+; SYM-NEXT: Section: .data
+; SYM-NEXT: Type: 0x0
+; SYM-NEXT: StorageClass: C_HIDEXT (0x6B)
+; SYM-NEXT: NumberOfAuxEntries: 1
+; SYM-NEXT: CSECT Auxiliary Entry {
+; SYM-NEXT: Index: 20
+; SYM-NEXT: SectionLen: 4
+; SYM-NEXT: ParameterHashIndex: 0x0
+; SYM-NEXT: TypeChkSectNum: 0x0
+; SYM-NEXT: SymbolAlignmentLog2: 2
+; SYM-NEXT: SymbolType: XTY_SD (0x1)
+; SYM-NEXT: StorageMappingClass: XMC_TE (0x16)
+; SYM-NEXT: StabInfoIndex: 0x0
+; SYM-NEXT: StabSectNum: 0x0
+; SYM-NEXT: }
+; SYM-NEXT: }
+; SYM: Index: 23
+; SYM-NEXT: Name: ThreadLocalVarInit
+; SYM-NEXT: Value (RelocatableAddress): 0x0
+; SYM-NEXT: Section: .tdata
+; SYM-NEXT: Type: 0x0
+; SYM-NEXT: StorageClass: C_EXT (0x2)
+; SYM-NEXT: NumberOfAuxEntries: 1
+; SYM-NEXT: CSECT Auxiliary Entry {
+; SYM-NEXT: Index: 24
+; SYM-NEXT: SectionLen: 8
+; SYM-NEXT: ParameterHashIndex: 0x0
+; SYM-NEXT: TypeChkSectNum: 0x0
+; SYM-NEXT: SymbolAlignmentLog2: 3
+; SYM-NEXT: SymbolType: XTY_SD (0x1)
+; SYM-NEXT: StorageMappingClass: XMC_TL (0x14)
+; SYM-NEXT: StabInfoIndex: 0x0
+; SYM-NEXT: StabSectNum: 0x0
+; SYM-NEXT: }
+; SYM-NEXT: }
+; SYM: Index: 25
+; SYM-NEXT: Name: IThreadLocalVarUninit
+; SYM-NEXT: Value (RelocatableAddress): 0x8
+; SYM-NEXT: Section: .tbss
+; SYM-NEXT: Type: 0x0
+; SYM-NEXT: StorageClass: C_HIDEXT (0x6B)
+; SYM-NEXT: NumberOfAuxEntries: 1
+; SYM-NEXT: CSECT Auxiliary Entry {
+; SYM-NEXT: Index: 26
+; SYM-NEXT: SectionLen: 8
+; SYM-NEXT: ParameterHashIndex: 0x0
+; SYM-NEXT: TypeChkSectNum: 0x0
+; SYM-NEXT: SymbolAlignmentLog2: 3
+; SYM-NEXT: SymbolType: XTY_CM (0x3)
+; SYM-NEXT: StorageMappingClass: XMC_UL (0x15)
+; SYM-NEXT: StabInfoIndex: 0x0
+; SYM-NEXT: StabSectNum: 0x0
+; SYM-NEXT: }
+; SYM-NEXT: }
+
+; DIS: {{.*}}aix-tls-le-xcoff-reloc-large32.ll.tmp.o: file format aixcoff-rs6000
+; DIS: Disassembly of section .text:
+; DIS: 00000000 (idx: 5) .storeITLUninit:
+; DIS-NEXT: mflr 0
+; DIS-NEXT: stwu 1, -32(1)
+; DIS-NEXT: stw 0, 40(1)
+; DIS-NEXT: mr 5, 3
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addis 3, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 17) IThreadLocalVarUninit[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 6, 0(3)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 17) IThreadLocalVarUninit[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} bla 0
+; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA (idx: 1) .__get_tpointer[PR]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 3, 3, 6
+; DIS-NEXT: stw 4, 4(3)
+; DIS-NEXT: stw 5, 0(3)
+; DIS-NEXT: addi 1, 1, 32
+; DIS-NEXT: lwz 0, 8(1)
+; DIS-NEXT: mtlr 0
+; DIS-NEXT: blr
+; DIS: 00000040 (idx: 7) .loadTLInit:
+; DIS-NEXT: mflr 0
+; DIS-NEXT: stwu 1, -32(1)
+; DIS-NEXT: stw 0, 40(1)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addis 3, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 19) ThreadLocalVarInit[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 4, 4(3)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 19) ThreadLocalVarInit[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} bla 0
+; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA (idx: 1) .__get_tpointer[PR]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 3, 3, 4
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 4, 4(3)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 3, 0(3)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addis 5, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 21) VarInit[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 5, 8(5)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 21) VarInit[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 6, 4(5)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 5, 0(5)
+; DIS-NEXT: addc 4, 6, 4
+; DIS-NEXT: adde 3, 5, 3
+; DIS-NEXT: addi 1, 1, 32
+; DIS-NEXT: lwz 0, 8(1)
+; DIS-NEXT: mtlr 0
+; DIS-NEXT: blr
+
+; DIS: Disassembly of section .data:
+; DIS: 00000090 (idx: 9) VarInit[RW]:
+; DIS-NEXT: 90: 00 00 00 00
+; DIS-NEXT: 94: 00 00 00 57
+; DIS: 00000098 (idx: 11) storeITLUninit[DS]:
+; DIS-NEXT: 98: 00 00 00 00
+; DIS-NEXT: 00000098: R_POS (idx: 5) .storeITLUninit
+; DIS-NEXT: 9c: 00 00 00 b0
+; DIS-NEXT: 0000009c: R_POS (idx: 15) TOC[TC0]
+; DIS-NEXT: a0: 00 00 00 00
+; DIS: 000000a4 (idx: 13) loadTLInit[DS]:
+; DIS-NEXT: a4: 00 00 00 40
+; DIS-NEXT: 000000a4: R_POS (idx: 7) .loadTLInit
+; DIS-NEXT: a8: 00 00 00 b0
+; DIS-NEXT: 000000a8: R_POS (idx: 15) TOC[TC0]
+; DIS-NEXT: ac: 00 00 00 00
+; DIS: 000000b0 (idx: 17) IThreadLocalVarUninit[TE]:
+; DIS-NEXT: b0: 00 00 00 00
+; DIS-NEXT: 000000b0: R_TLS_LE (idx: 25) IThreadLocalVarUninit[UL]
+; DIS: 000000b4 (idx: 19) ThreadLocalVarInit[TE]:
+; DIS-NEXT: b4: 00 00 00 00
+; DIS-NEXT: 000000b4: R_TLS_LE (idx: 23) ThreadLocalVarInit[TL]
+; DIS: 000000b8 (idx: 21) VarInit[TE]:
+; DIS-NEXT: b8: 00 00 00 90
+; DIS-NEXT: 000000b8: R_POS (idx: 9) VarInit[RW]
+
+; DIS: Disassembly of section .tdata:
+; DIS: 00000000 (idx: 23) ThreadLocalVarInit[TL]:
+; DIS-NEXT: 0: 00 00 00 00
+; DIS-NEXT: 4: 00 00 00 01
+
+; DIS: Disassembly of section .tbss:
+; DIS: 00000008 (idx: 25) IThreadLocalVarUninit[UL]:
+; DIS-NEXT: ...
+
diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc32.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc32.ll
new file mode 100644
index 0000000000000..d3dbbf4400491
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc32.ll
@@ -0,0 +1,245 @@
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec -mtriple powerpc-ibm-aix-xcoff \
+; RUN: -xcoff-traceback-table=false -data-sections=false -filetype=obj -o %t.o < %s
+; RUN: llvm-readobj --relocs --expand-relocs %t.o | FileCheck --check-prefix=RELOC %s
+; RUN: llvm-readobj --syms %t.o | FileCheck --check-prefix=SYM %s
+; RUN: llvm-objdump -D -r --symbol-description %t.o | FileCheck --check-prefix=DIS %s
+
+ at ThreadLocalVarInit = thread_local(localexec) global i32 1, align 4
+ at VarInit = global i32 87, align 4
+ at IThreadLocalVarUninit = internal thread_local(localexec) global i32 0, align 4
+declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull)
+
+define void @storeITLUninit(i32 noundef signext %x) {
+entry:
+ %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarUninit)
+ store i32 %x, ptr %0, align 4
+ ret void
+}
+
+define signext i32 @loadTLInit() {
+entry:
+ %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit)
+ %1 = load i32, ptr %0, align 4
+ %2 = load i32, ptr @VarInit, align 4
+ %add = add nsw i32 %2, %1
+ ret i32 %add
+}
+
+; RELOC: File: {{.*}}aix-tls-le-xcoff-reloc32.ll.tmp.o
+; RELOC-NEXT: Format: aixcoff-rs6000
+; RELOC-NEXT: Arch: powerpc
+; RELOC-NEXT: AddressSize: 32bit
+; RELOC-NEXT: Relocations [
+; RELOC: Virtual Address: 0xA
+; RELOC-NEXT: Symbol: IThreadLocalVarUninit (19)
+; RELOC-NEXT: IsSigned: No
+; RELOC-NEXT: FixupBitValue: 0
+; RELOC-NEXT: Length: 16
+; RELOC-NEXT: Type: R_TOC (0x3)
+; RELOC-NEXT: }
+; RELOC: Virtual Address: 0x10
+; RELOC-NEXT: Symbol: .__get_tpointer (1)
+; RELOC-NEXT: IsSigned: No
+; RELOC-NEXT: FixupBitValue: 0
+; RELOC-NEXT: Length: 26
+; RELOC-NEXT: Type: R_RBA (0x18)
+; RELOC-NEXT: }
+; RELOC: Virtual Address: 0x3A
+; RELOC-NEXT: Symbol: ThreadLocalVarInit (21)
+; RELOC-NEXT: IsSigned: No
+; RELOC-NEXT: FixupBitValue: 0
+; RELOC-NEXT: Length: 16
+; RELOC-NEXT: Type: R_TOC (0x3)
+; RELOC-NEXT: }
+; RELOC: Virtual Address: 0x40
+; RELOC-NEXT: Symbol: .__get_tpointer (1)
+; RELOC-NEXT: IsSigned: No
+; RELOC-NEXT: FixupBitValue: 0
+; RELOC-NEXT: Length: 26
+; RELOC-NEXT: Type: R_RBA (0x18)
+; RELOC-NEXT: }
+; RELOC: Virtual Address: 0x84
+; RELOC-NEXT: Symbol: IThreadLocalVarUninit (29)
+; RELOC-NEXT: IsSigned: No
+; RELOC-NEXT: FixupBitValue: 0
+; RELOC-NEXT: Length: 32
+; RELOC-NEXT: Type: R_TLS_LE (0x23)
+; RELOC-NEXT: }
+; RELOC: Virtual Address: 0x88
+; RELOC-NEXT: Symbol: ThreadLocalVarInit (27)
+; RELOC-NEXT: IsSigned: No
+; RELOC-NEXT: FixupBitValue: 0
+; RELOC-NEXT: Length: 32
+; RELOC-NEXT: Type: R_TLS_LE (0x23)
+; RELOC-NEXT: }
+
+; SYM: File: {{.*}}aix-tls-le-xcoff-reloc32.ll.tmp.o
+; SYM-NEXT: Format: aixcoff-rs6000
+; SYM-NEXT: Arch: powerpc
+; SYM-NEXT: AddressSize: 32bit
+; SYM-NEXT: Symbols [
+; SYM: Index: 1
+; SYM-NEXT: Name: .__get_tpointer
+; SYM-NEXT: Value (RelocatableAddress): 0x0
+; SYM-NEXT: Section: N_UNDEF
+; SYM-NEXT: Type: 0x0
+; SYM-NEXT: StorageClass: C_EXT (0x2)
+; SYM-NEXT: NumberOfAuxEntries: 1
+; SYM-NEXT: CSECT Auxiliary Entry {
+; SYM-NEXT: Index: 2
+; SYM-NEXT: SectionLen: 0
+; SYM-NEXT: ParameterHashIndex: 0x0
+; SYM-NEXT: TypeChkSectNum: 0x0
+; SYM-NEXT: SymbolAlignmentLog2: 0
+; SYM-NEXT: SymbolType: XTY_ER (0x0)
+; SYM-NEXT: StorageMappingClass: XMC_PR (0x0)
+; SYM-NEXT: StabInfoIndex: 0x0
+; SYM-NEXT: StabSectNum: 0x0
+; SYM-NEXT: }
+; SYM-NEXT: }
+; SYM: Index: 19
+; SYM-NEXT: Name: IThreadLocalVarUninit
+; SYM-NEXT: Value (RelocatableAddress): 0x84
+; SYM-NEXT: Section: .data
+; SYM-NEXT: Type: 0x0
+; SYM-NEXT: StorageClass: C_HIDEXT (0x6B)
+; SYM-NEXT: NumberOfAuxEntries: 1
+; SYM-NEXT: CSECT Auxiliary Entry {
+; SYM-NEXT: Index: 20
+; SYM-NEXT: SectionLen: 4
+; SYM-NEXT: ParameterHashIndex: 0x0
+; SYM-NEXT: TypeChkSectNum: 0x0
+; SYM-NEXT: SymbolAlignmentLog2: 2
+; SYM-NEXT: SymbolType: XTY_SD (0x1)
+; SYM-NEXT: StorageMappingClass: XMC_TC (0x3)
+; SYM-NEXT: StabInfoIndex: 0x0
+; SYM-NEXT: StabSectNum: 0x0
+; SYM-NEXT: }
+; SYM-NEXT: }
+; SYM: Index: 21
+; SYM-NEXT: Name: ThreadLocalVarInit
+; SYM-NEXT: Value (RelocatableAddress): 0x88
+; SYM-NEXT: Section: .data
+; SYM-NEXT: Type: 0x0
+; SYM-NEXT: StorageClass: C_HIDEXT (0x6B)
+; SYM-NEXT: NumberOfAuxEntries: 1
+; SYM-NEXT: CSECT Auxiliary Entry {
+; SYM-NEXT: Index: 22
+; SYM-NEXT: SectionLen: 4
+; SYM-NEXT: ParameterHashIndex: 0x0
+; SYM-NEXT: TypeChkSectNum: 0x0
+; SYM-NEXT: SymbolAlignmentLog2: 2
+; SYM-NEXT: SymbolType: XTY_SD (0x1)
+; SYM-NEXT: StorageMappingClass: XMC_TC (0x3)
+; SYM-NEXT: StabInfoIndex: 0x0
+; SYM-NEXT: StabSectNum: 0x0
+; SYM-NEXT: }
+; SYM-NEXT: }
+; SYM: Index: 27
+; SYM-NEXT: Name: ThreadLocalVarInit
+; SYM-NEXT: Value (RelocatableAddress): 0x0
+; SYM-NEXT: Section: .tdata
+; SYM-NEXT: Type: 0x0
+; SYM-NEXT: StorageClass: C_EXT (0x2)
+; SYM-NEXT: NumberOfAuxEntries: 1
+; SYM-NEXT: CSECT Auxiliary Entry {
+; SYM-NEXT: Index: 28
+; SYM-NEXT: ContainingCsectSymbolIndex: 25
+; SYM-NEXT: ParameterHashIndex: 0x0
+; SYM-NEXT: TypeChkSectNum: 0x0
+; SYM-NEXT: SymbolAlignmentLog2: 0
+; SYM-NEXT: SymbolType: XTY_LD (0x2)
+; SYM-NEXT: StorageMappingClass: XMC_TL (0x14)
+; SYM-NEXT: StabInfoIndex: 0x0
+; SYM-NEXT: StabSectNum: 0x0
+; SYM-NEXT: }
+; SYM-NEXT: }
+; SYM: Index: 29
+; SYM-NEXT: Name: IThreadLocalVarUninit
+; SYM-NEXT: Value (RelocatableAddress): 0x4
+; SYM-NEXT: Section: .tbss
+; SYM-NEXT: Type: 0x0
+; SYM-NEXT: StorageClass: C_HIDEXT (0x6B)
+; SYM-NEXT: NumberOfAuxEntries: 1
+; SYM-NEXT: CSECT Auxiliary Entry {
+; SYM-NEXT: Index: 30
+; SYM-NEXT: SectionLen: 4
+; SYM-NEXT: ParameterHashIndex: 0x0
+; SYM-NEXT: TypeChkSectNum: 0x0
+; SYM-NEXT: SymbolAlignmentLog2: 2
+; SYM-NEXT: SymbolType: XTY_CM (0x3)
+; SYM-NEXT: StorageMappingClass: XMC_UL (0x15)
+; SYM-NEXT: StabInfoIndex: 0x0
+; SYM-NEXT: StabSectNum: 0x0
+; SYM-NEXT: }
+; SYM-NEXT: }
+
+; DIS: {{.*}}aix-tls-le-xcoff-reloc32.ll.tmp.o: file format aixcoff-rs6000
+; DIS: Disassembly of section .text:
+; DIS: 00000000 (idx: 5) .storeITLUninit:
+; DIS-NEXT: mflr 0
+; DIS-NEXT: stwu 1, -32(1)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 5, 0(2)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 19) IThreadLocalVarUninit[TC]
+; DIS-NEXT: mr 4, 3
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} bla 0
+; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA (idx: 1) .__get_tpointer[PR]
+; DIS-NEXT: stw 0, 40(1)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 3, 3, 5
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 4, 0(3)
+; DIS-NEXT: addi 1, 1, 32
+; DIS-NEXT: lwz 0, 8(1)
+; DIS-NEXT: mtlr 0
+; DIS-NEXT: blr
+; DIS: 00000030 (idx: 7) .loadTLInit:
+; DIS-NEXT: mflr 0
+; DIS-NEXT: stwu 1, -32(1)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 4, 4(2)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 21) ThreadLocalVarInit[TC]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 5, 8(2)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 23) VarInit[TC]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} bla 0
+; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA (idx: 1) .__get_tpointer[PR]
+; DIS-NEXT: stw 0, 40(1)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 3, 3, 4
+; DIS-NEXT: lwz 4, 0(5)
+; DIS-NEXT: lwz 3, 0(3)
+; DIS-NEXT: add 3, 4, 3
+; DIS-NEXT: addi 1, 1, 32
+; DIS-NEXT: lwz 0, 8(1)
+; DIS-NEXT: mtlr 0
+; DIS-NEXT: blr
+
+; DIS: Disassembly of section .data:
+; DIS: 00000068 (idx: 11) VarInit:
+; DIS-NEXT: 68: 00 00 00 57
+; DIS: 0000006c (idx: 13) storeITLUninit[DS]:
+; DIS-NEXT: 6c: 00 00 00 00
+; DIS-NEXT: 0000006c: R_POS (idx: 5) .storeITLUninit
+; DIS-NEXT: 70: 00 00 00 84
+; DIS-NEXT: 00000070: R_POS (idx: 17) TOC[TC0]
+; DIS-NEXT: 74: 00 00 00 00
+; DIS: 00000078 (idx: 15) loadTLInit[DS]:
+; DIS-NEXT: 78: 00 00 00 30
+; DIS-NEXT: 00000078: R_POS (idx: 7) .loadTLInit
+; DIS-NEXT: 7c: 00 00 00 84
+; DIS-NEXT: 0000007c: R_POS (idx: 17) TOC[TC0]
+; DIS-NEXT: 80: 00 00 00 00
+; DIS: 00000084 (idx: 19) IThreadLocalVarUninit[TC]:
+; DIS-NEXT: 84: 00 00 00 00
+; DIS-NEXT: 00000084: R_TLS_LE (idx: 29) IThreadLocalVarUninit[UL]
+; DIS: 00000088 (idx: 21) ThreadLocalVarInit[TC]:
+; DIS-NEXT: 88: 00 00 00 00
+; DIS-NEXT: 00000088: R_TLS_LE (idx: 27) ThreadLocalVarInit
+; DIS: 0000008c (idx: 23) VarInit[TC]:
+; DIS-NEXT: 8c: 00 00 00 68
+; DIS-NEXT: 0000008c: R_POS (idx: 11) VarInit
+
+; DIS: Disassembly of section .tdata:
+; DIS: 00000000 (idx: 27) ThreadLocalVarInit:
+; DIS-NEXT: 0: 00 00 00 01
+
+; DIS: Disassembly of section .tbss:
+; DIS: 00000004 (idx: 29) IThreadLocalVarUninit[UL]:
+; DIS-NEXT: ...
+
More information about the llvm-commits
mailing list