[llvm] [AIX][TLS] Produce a faster local-exec access sequence for the "aix-small-tls" global variable attribute (PR #83053)
Amy Kwan via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 27 13:55:01 PDT 2024
https://github.com/amy-kwan updated https://github.com/llvm/llvm-project/pull/83053
>From d100b524c8860341b5cebbc45646e1774f34e217 Mon Sep 17 00:00:00 2001
From: Amy Kwan <amy.kwan1 at ibm.com>
Date: Sat, 24 Feb 2024 12:55:26 -0600
Subject: [PATCH 1/7] [AIX][TLS] Produce a faster local-exec access sequence
for the "aix-small-tls" global variable attribute
Similar to 3f46e5453d9310b15d974e876f6132e3cf50c4b1, this patch allows the
backend to produce a faster access sequence for the local-exec TLS model,
where loading from the TOC can be avoided, for local-exec TLS variables that
are annotated with the "aix-small-tls" attribute.
The expectation is for local-exec TLS variables to be set with this attribute
through PGO. Furthermore, the optimized access sequence is only generated for
local-exec TLS variables annotated with "aix-small-tls", only if they are less
than ~32KB in size.
---
llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 34 ++-
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 21 +-
.../aix-small-tls-globalvarattr-funcattr.ll | 197 ++++++++++++++
.../aix-small-tls-globalvarattr-loadaddr.ll | 251 ++++++++++++++++++
.../aix-small-tls-globalvarattr-targetattr.ll | 104 ++++++++
5 files changed, 593 insertions(+), 14 deletions(-)
create mode 100644 llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-funcattr.ll
create mode 100644 llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-loadaddr.ll
create mode 100644 llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-targetattr.ll
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index dfea9e7709240c..7e65255830633f 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -7558,6 +7558,22 @@ static void reduceVSXSwap(SDNode *N, SelectionDAG *DAG) {
DAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), N->getOperand(0));
}
+// Check if an SDValue has the 'aix-small-tls' global variable attribute.
+static bool hasAIXSmallTLSAttr(SDValue Val) {
+ GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val);
+ if (!GA)
+ return false;
+
+ const GlobalVariable *GV = dyn_cast<GlobalVariable>(GA->getGlobal());
+ if (!GV)
+ return false;
+
+ if (!GV->hasAttribute("aix-small-tls"))
+ return false;
+
+ return true;
+}
+
// Is an ADDI eligible for folding for non-TOC-based local-exec accesses?
static bool isEligibleToFoldADDIForLocalExecAccesses(SelectionDAG *DAG,
SDValue ADDIToFold) {
@@ -7567,20 +7583,25 @@ static bool isEligibleToFoldADDIForLocalExecAccesses(SelectionDAG *DAG,
(ADDIToFold.getMachineOpcode() != PPC::ADDI8))
return false;
+ // Folding is only allowed for the AIX small-local-exec TLS target attribute
+ // or when the 'aix-small-tls' global variable attribute is present.
+ const PPCSubtarget &Subtarget =
+ DAG->getMachineFunction().getSubtarget<PPCSubtarget>();
+ SDValue TLSVarNode = ADDIToFold.getOperand(1);
+ if (!(Subtarget.hasAIXSmallLocalExecTLS() || hasAIXSmallTLSAttr(TLSVarNode)))
+ return false;
+
// The first operand of the ADDIToFold should be the thread pointer.
// This transformation is only performed if the first operand of the
// addi is the thread pointer.
SDValue TPRegNode = ADDIToFold.getOperand(0);
RegisterSDNode *TPReg = dyn_cast<RegisterSDNode>(TPRegNode.getNode());
- const PPCSubtarget &Subtarget =
- DAG->getMachineFunction().getSubtarget<PPCSubtarget>();
if (!TPReg || (TPReg->getReg() != Subtarget.getThreadPointerRegister()))
return false;
// The second operand of the ADDIToFold should be the global TLS address
// (the local-exec TLS variable). We only perform the folding if the TLS
// variable is the second operand.
- SDValue TLSVarNode = ADDIToFold.getOperand(1);
GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(TLSVarNode);
if (!GA)
return false;
@@ -7649,7 +7670,6 @@ static void foldADDIForLocalExecAccesses(SDNode *N, SelectionDAG *DAG) {
void PPCDAGToDAGISel::PeepholePPC64() {
SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
- bool HasAIXSmallLocalExecTLS = Subtarget->hasAIXSmallLocalExecTLS();
while (Position != CurDAG->allnodes_begin()) {
SDNode *N = &*--Position;
@@ -7661,8 +7681,7 @@ void PPCDAGToDAGISel::PeepholePPC64() {
reduceVSXSwap(N, CurDAG);
// This optimization is performed for non-TOC-based local-exec accesses.
- if (HasAIXSmallLocalExecTLS)
- foldADDIForLocalExecAccesses(N, CurDAG);
+ foldADDIForLocalExecAccesses(N, CurDAG);
unsigned FirstOp;
unsigned StorageOpcode = N->getMachineOpcode();
@@ -7821,8 +7840,7 @@ void PPCDAGToDAGISel::PeepholePPC64() {
ImmOpnd.getValueType());
} else if (Offset != 0) {
// This optimization is performed for non-TOC-based local-exec accesses.
- if (HasAIXSmallLocalExecTLS &&
- isEligibleToFoldADDIForLocalExecAccesses(CurDAG, Base)) {
+ if (isEligibleToFoldADDIForLocalExecAccesses(CurDAG, Base)) {
// Add the non-zero offset information into the load or store
// instruction to be used for non-TOC-based local-exec accesses.
GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd);
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index cce0efad39c75b..e85bdf9b707b88 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -3368,6 +3368,7 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
EVT PtrVT = getPointerTy(DAG.getDataLayout());
bool Is64Bit = Subtarget.isPPC64();
bool HasAIXSmallLocalExecTLS = Subtarget.hasAIXSmallLocalExecTLS();
+ bool HasAIXSmallTLSGlobalAttr = false;
TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
bool IsTLSLocalExecModel = Model == TLSModel::LocalExec;
@@ -3376,6 +3377,11 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_FLAG);
SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
SDValue TLSReg;
+
+ if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+ if (GVar->hasAttribute("aix-small-tls"))
+ HasAIXSmallTLSGlobalAttr = true;
+
if (Is64Bit) {
// For local-exec and initial-exec on AIX (64-bit), the sequence generated
// involves a load of the variable offset (from the TOC), followed by an
@@ -3385,14 +3391,16 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
// add reg2, reg1, r13 // r13 contains the thread pointer
TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
- // With the -maix-small-local-exec-tls option, produce a faster access
- // sequence for local-exec TLS variables where the offset from the TLS
- // base is encoded as an immediate operand.
+ // With the -maix-small-local-exec-tls option, or with the "aix-small-tls"
+ // global variable attribute, produce a faster access sequence for
+ // local-exec TLS variables where the offset from the TLS base is encoded
+ // as an immediate operand.
//
// We only utilize the faster local-exec access sequence when the TLS
// variable has a size within the policy limit. We treat types that are
// not sized or are empty as being over the policy size limit.
- if (HasAIXSmallLocalExecTLS && IsTLSLocalExecModel) {
+ if ((HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr) &&
+ IsTLSLocalExecModel) {
Type *GVType = GV->getValueType();
if (GVType->isSized() && !GVType->isEmptyTy() &&
GV->getParent()->getDataLayout().getTypeAllocSize(GVType) <=
@@ -3410,8 +3418,9 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
TLSReg = DAG.getNode(PPCISD::GET_TPOINTER, dl, PtrVT);
// We do not implement the 32-bit version of the faster access sequence
- // for local-exec that is controlled by -maix-small-local-exec-tls.
- if (HasAIXSmallLocalExecTLS)
+ // for local-exec that is controlled by the -maix-small-local-exec-tls
+ // option, or the "aix-small-tls" global variable attribute.
+ if (HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr)
report_fatal_error("The small-local-exec TLS access sequence is "
"currently only supported on AIX (64-bit mode).");
}
diff --git a/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-funcattr.ll b/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-funcattr.ll
new file mode 100644
index 00000000000000..55e486876e3373
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-funcattr.ll
@@ -0,0 +1,197 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN: -mtriple powerpc64-ibm-aix-xcoff < %s \
+; RUN: | FileCheck %s --check-prefix=CHECK-SMALLCM64
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large \
+; RUN: < %s | FileCheck %s --check-prefix=CHECK-LARGECM64
+
+ at mySmallLocalExecTLS6 = external thread_local(localexec) global [60 x i64], align 8
+ at mySmallLocalExecTLS2 = external thread_local(localexec) global [3000 x i64], align 8 #0
+ at MyTLSGDVar = thread_local global [800 x i64] zeroinitializer, align 8
+ at mySmallLocalExecTLS3 = internal thread_local(localexec) global [3000 x i64] zeroinitializer, align 8
+ at mySmallLocalExecTLS4 = internal thread_local(localexec) global [3000 x i64] zeroinitializer, align 8 #0
+ at mySmallLocalExecTLS5 = thread_local(localexec) global [3000 x i64] zeroinitializer, align 8 #0
+ at mySmallLocalExecTLS = thread_local(localexec) local_unnamed_addr global [7800 x i64] zeroinitializer, align 8 #0
+declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull)
+
+; All accesses use a "faster" local-exec sequence directly off the thread pointer.
+define i64 @StoreLargeAccess1() #1 {
+; CHECK-SMALLCM64-LABEL: StoreLargeAccess1:
+; CHECK-SMALLCM64: # %bb.0: # %entry
+; CHECK-SMALLCM64-NEXT: mflr r0
+; CHECK-SMALLCM64-NEXT: stdu r1, -48(r1)
+; CHECK-SMALLCM64-NEXT: li r3, 212
+; CHECK-SMALLCM64-NEXT: li r4, 203
+; CHECK-SMALLCM64-NEXT: std r0, 64(r1)
+; CHECK-SMALLCM64-NEXT: std r3, mySmallLocalExecTLS6[UL]@le+424(r13)
+; CHECK-SMALLCM64-NEXT: std r4, mySmallLocalExecTLS2[UL]@le+1200(r13)
+; CHECK-SMALLCM64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tlsgdm) @MyTLSGDVar
+; CHECK-SMALLCM64-NEXT: ld r4, L..C1(r2) # target-flags(ppc-tlsgd) @MyTLSGDVar
+; CHECK-SMALLCM64-NEXT: bla .__tls_get_addr[PR]
+; CHECK-SMALLCM64-NEXT: li r4, 44
+; CHECK-SMALLCM64-NEXT: std r4, 440(r3)
+; CHECK-SMALLCM64-NEXT: li r3, 6
+; CHECK-SMALLCM64-NEXT: li r4, 100
+; CHECK-SMALLCM64-NEXT: std r3, mySmallLocalExecTLS3[UL]@le+2000(r13)
+; CHECK-SMALLCM64-NEXT: li r3, 882
+; CHECK-SMALLCM64-NEXT: std r4, (mySmallLocalExecTLS4[UL]@le+6800)-65536(r13)
+; CHECK-SMALLCM64-NEXT: std r3, (mySmallLocalExecTLS5[TL]@le+8400)-65536(r13)
+; CHECK-SMALLCM64-NEXT: li r3, 1191
+; CHECK-SMALLCM64-NEXT: addi r1, r1, 48
+; CHECK-SMALLCM64-NEXT: ld r0, 16(r1)
+; CHECK-SMALLCM64-NEXT: mtlr r0
+; CHECK-SMALLCM64-NEXT: blr
+;
+; CHECK-LARGECM64-LABEL: StoreLargeAccess1:
+; CHECK-LARGECM64: # %bb.0: # %entry
+; CHECK-LARGECM64-NEXT: mflr r0
+; CHECK-LARGECM64-NEXT: stdu r1, -48(r1)
+; CHECK-LARGECM64-NEXT: li r3, 212
+; CHECK-LARGECM64-NEXT: std r0, 64(r1)
+; CHECK-LARGECM64-NEXT: addis r4, L..C0 at u(r2)
+; CHECK-LARGECM64-NEXT: ld r4, L..C0 at l(r4)
+; CHECK-LARGECM64-NEXT: std r3, mySmallLocalExecTLS6[UL]@le+424(r13)
+; CHECK-LARGECM64-NEXT: li r3, 203
+; CHECK-LARGECM64-NEXT: std r3, mySmallLocalExecTLS2[UL]@le+1200(r13)
+; CHECK-LARGECM64-NEXT: addis r3, L..C1 at u(r2)
+; CHECK-LARGECM64-NEXT: ld r3, L..C1 at l(r3)
+; CHECK-LARGECM64-NEXT: bla .__tls_get_addr[PR]
+; CHECK-LARGECM64-NEXT: li r4, 44
+; CHECK-LARGECM64-NEXT: std r4, 440(r3)
+; CHECK-LARGECM64-NEXT: li r3, 6
+; CHECK-LARGECM64-NEXT: li r4, 100
+; CHECK-LARGECM64-NEXT: std r3, mySmallLocalExecTLS3[UL]@le+2000(r13)
+; CHECK-LARGECM64-NEXT: li r3, 882
+; CHECK-LARGECM64-NEXT: std r4, (mySmallLocalExecTLS4[UL]@le+6800)-65536(r13)
+; CHECK-LARGECM64-NEXT: std r3, (mySmallLocalExecTLS5[TL]@le+8400)-65536(r13)
+; CHECK-LARGECM64-NEXT: li r3, 1191
+; CHECK-LARGECM64-NEXT: addi r1, r1, 48
+; CHECK-LARGECM64-NEXT: ld r0, 16(r1)
+; CHECK-LARGECM64-NEXT: mtlr r0
+; CHECK-LARGECM64-NEXT: blr
+entry:
+ %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS6)
+ %arrayidx = getelementptr inbounds [60 x i64], ptr %0, i64 0, i64 53
+ store i64 212, ptr %arrayidx, align 8
+ %1 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS2)
+ %arrayidx1 = getelementptr inbounds [3000 x i64], ptr %1, i64 0, i64 150
+ store i64 203, ptr %arrayidx1, align 8
+ %2 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @MyTLSGDVar)
+ %arrayidx2 = getelementptr inbounds [800 x i64], ptr %2, i64 0, i64 55
+ store i64 44, ptr %arrayidx2, align 8
+ %3 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS3)
+ %arrayidx3 = getelementptr inbounds [3000 x i64], ptr %3, i64 0, i64 250
+ store i64 6, ptr %arrayidx3, align 8
+ %4 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS4)
+ %arrayidx4 = getelementptr inbounds [3000 x i64], ptr %4, i64 0, i64 850
+ store i64 100, ptr %arrayidx4, align 8
+ %5 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS5)
+ %arrayidx5 = getelementptr inbounds [3000 x i64], ptr %5, i64 0, i64 1050
+ store i64 882, ptr %arrayidx5, align 8
+ %6 = load i64, ptr %arrayidx1, align 8
+ %7 = load i64, ptr %arrayidx3, align 8
+ %8 = load i64, ptr %arrayidx4, align 8
+ %add = add i64 %6, 882
+ %add9 = add i64 %add, %7
+ %add11 = add i64 %add9, %8
+ ret i64 %add11
+}
+
+; Since this function does not have the 'aix-small-local-exec-tls` attribute,
+; only some local-exec variables should have the small-local-exec TLS access
+; sequence (as opposed to all of them).
+define i64 @StoreLargeAccess2() {
+; CHECK-SMALLCM64-LABEL: StoreLargeAccess2:
+; CHECK-SMALLCM64: # %bb.0: # %entry
+; CHECK-SMALLCM64-NEXT: mflr r0
+; CHECK-SMALLCM64-NEXT: stdu r1, -48(r1)
+; CHECK-SMALLCM64-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @mySmallLocalExecTLS6
+; CHECK-SMALLCM64-NEXT: li r4, 212
+; CHECK-SMALLCM64-NEXT: std r0, 64(r1)
+; CHECK-SMALLCM64-NEXT: add r3, r13, r3
+; CHECK-SMALLCM64-NEXT: std r4, 424(r3)
+; CHECK-SMALLCM64-NEXT: ld r4, L..C1(r2) # target-flags(ppc-tlsgd) @MyTLSGDVar
+; CHECK-SMALLCM64-NEXT: li r3, 203
+; CHECK-SMALLCM64-NEXT: std r3, mySmallLocalExecTLS2[UL]@le+1200(r13)
+; CHECK-SMALLCM64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tlsgdm) @MyTLSGDVar
+; CHECK-SMALLCM64-NEXT: bla .__tls_get_addr[PR]
+; CHECK-SMALLCM64-NEXT: li r4, 44
+; CHECK-SMALLCM64-NEXT: std r4, 440(r3)
+; CHECK-SMALLCM64-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @mySmallLocalExecTLS3
+; CHECK-SMALLCM64-NEXT: li r4, 6
+; CHECK-SMALLCM64-NEXT: add r3, r13, r3
+; CHECK-SMALLCM64-NEXT: std r4, 2000(r3)
+; CHECK-SMALLCM64-NEXT: li r3, 100
+; CHECK-SMALLCM64-NEXT: li r4, 882
+; CHECK-SMALLCM64-NEXT: std r3, mySmallLocalExecTLS4[UL]@le+6800(r13)
+; CHECK-SMALLCM64-NEXT: std r4, mySmallLocalExecTLS5[TL]@le+8400(r13)
+; CHECK-SMALLCM64-NEXT: li r3, 1191
+; CHECK-SMALLCM64-NEXT: addi r1, r1, 48
+; CHECK-SMALLCM64-NEXT: ld r0, 16(r1)
+; CHECK-SMALLCM64-NEXT: mtlr r0
+; CHECK-SMALLCM64-NEXT: blr
+;
+; CHECK-LARGECM64-LABEL: StoreLargeAccess2:
+; CHECK-LARGECM64: # %bb.0: # %entry
+; CHECK-LARGECM64-NEXT: mflr r0
+; CHECK-LARGECM64-NEXT: stdu r1, -48(r1)
+; CHECK-LARGECM64-NEXT: addis r3, L..C2 at u(r2)
+; CHECK-LARGECM64-NEXT: li r4, 212
+; CHECK-LARGECM64-NEXT: std r0, 64(r1)
+; CHECK-LARGECM64-NEXT: ld r3, L..C2 at l(r3)
+; CHECK-LARGECM64-NEXT: add r3, r13, r3
+; CHECK-LARGECM64-NEXT: std r4, 424(r3)
+; CHECK-LARGECM64-NEXT: li r3, 203
+; CHECK-LARGECM64-NEXT: addis r4, L..C0 at u(r2)
+; CHECK-LARGECM64-NEXT: ld r4, L..C0 at l(r4)
+; CHECK-LARGECM64-NEXT: std r3, mySmallLocalExecTLS2[UL]@le+1200(r13)
+; CHECK-LARGECM64-NEXT: addis r3, L..C1 at u(r2)
+; CHECK-LARGECM64-NEXT: ld r3, L..C1 at l(r3)
+; CHECK-LARGECM64-NEXT: bla .__tls_get_addr[PR]
+; CHECK-LARGECM64-NEXT: li r4, 44
+; CHECK-LARGECM64-NEXT: std r4, 440(r3)
+; CHECK-LARGECM64-NEXT: addis r3, L..C3 at u(r2)
+; CHECK-LARGECM64-NEXT: li r4, 6
+; CHECK-LARGECM64-NEXT: ld r3, L..C3 at l(r3)
+; CHECK-LARGECM64-NEXT: add r3, r13, r3
+; CHECK-LARGECM64-NEXT: std r4, 2000(r3)
+; CHECK-LARGECM64-NEXT: li r3, 100
+; CHECK-LARGECM64-NEXT: li r4, 882
+; CHECK-LARGECM64-NEXT: std r3, mySmallLocalExecTLS4[UL]@le+6800(r13)
+; CHECK-LARGECM64-NEXT: std r4, mySmallLocalExecTLS5[TL]@le+8400(r13)
+; CHECK-LARGECM64-NEXT: li r3, 1191
+; CHECK-LARGECM64-NEXT: addi r1, r1, 48
+; CHECK-LARGECM64-NEXT: ld r0, 16(r1)
+; CHECK-LARGECM64-NEXT: mtlr r0
+; CHECK-LARGECM64-NEXT: blr
+entry:
+ %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS6)
+ %arrayidx = getelementptr inbounds [60 x i64], ptr %0, i64 0, i64 53
+ store i64 212, ptr %arrayidx, align 8
+ %1 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS2)
+ %arrayidx1 = getelementptr inbounds [3000 x i64], ptr %1, i64 0, i64 150
+ store i64 203, ptr %arrayidx1, align 8
+ %2 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @MyTLSGDVar)
+ %arrayidx2 = getelementptr inbounds [800 x i64], ptr %2, i64 0, i64 55
+ store i64 44, ptr %arrayidx2, align 8
+ %3 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS3)
+ %arrayidx3 = getelementptr inbounds [3000 x i64], ptr %3, i64 0, i64 250
+ store i64 6, ptr %arrayidx3, align 8
+ %4 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS4)
+ %arrayidx4 = getelementptr inbounds [3000 x i64], ptr %4, i64 0, i64 850
+ store i64 100, ptr %arrayidx4, align 8
+ %5 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS5)
+ %arrayidx5 = getelementptr inbounds [3000 x i64], ptr %5, i64 0, i64 1050
+ store i64 882, ptr %arrayidx5, align 8
+ %6 = load i64, ptr %arrayidx1, align 8
+ %7 = load i64, ptr %arrayidx3, align 8
+ %8 = load i64, ptr %arrayidx4, align 8
+ %add = add i64 %6, 882
+ %add9 = add i64 %add, %7
+ %add11 = add i64 %add9, %8
+ ret i64 %add11
+}
+
+attributes #0 = { "aix-small-tls" }
+attributes #1 = { "target-features"="+aix-small-local-exec-tls" }
diff --git a/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-loadaddr.ll b/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-loadaddr.ll
new file mode 100644
index 00000000000000..db4266958daff1
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-loadaddr.ll
@@ -0,0 +1,251 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN: -mtriple powerpc64-ibm-aix-xcoff < %s \
+; RUN: | FileCheck %s --check-prefix=SMALLCM64
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large \
+; RUN: < %s | FileCheck %s --check-prefix=LARGECM64
+
+; Test that the 'aix-small-tls' global variable attribute generates the
+; optimized small-local-exec TLS sequence. Global variables without this
+; attribute should still generate a TOC-based local-exec access sequence.
+
+declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull)
+
+ at a = thread_local(localexec) global [87 x i8] zeroinitializer, align 1 #0
+ at a_noattr = thread_local(localexec) global [87 x i8] zeroinitializer, align 1
+ at b = thread_local(localexec) global [87 x i16] zeroinitializer, align 2 #0
+ at b_noattr = thread_local(localexec) global [87 x i16] zeroinitializer, align 2
+ at c = thread_local(localexec) global [87 x i32] zeroinitializer, align 4 #0
+ at c_noattr = thread_local(localexec) global [87 x i32] zeroinitializer, align 4
+ at d = thread_local(localexec) global [87 x i64] zeroinitializer, align 8 #0
+ at d_noattr = thread_local(localexec) global [87 x i64] zeroinitializer, align 8 #0
+
+ at e = thread_local(localexec) global [87 x double] zeroinitializer, align 8 #0
+ at e_noattr = thread_local(localexec) global [87 x double] zeroinitializer, align 8
+ at f = thread_local(localexec) global [87 x float] zeroinitializer, align 4 #0
+ at f_noattr = thread_local(localexec) global [87 x float] zeroinitializer, align 4
+
+define nonnull ptr @AddrTest1() local_unnamed_addr {
+; SMALLCM64-LABEL: AddrTest1:
+; SMALLCM64: # %bb.0: # %entry
+; SMALLCM64-NEXT: addi r3, r13, a[TL]@le+1
+; SMALLCM64-NEXT: blr
+;
+; LARGECM64-LABEL: AddrTest1:
+; LARGECM64: # %bb.0: # %entry
+; LARGECM64-NEXT: addi r3, r13, a[TL]@le+1
+; LARGECM64-NEXT: blr
+entry:
+ %0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @a)
+ %arrayidx = getelementptr inbounds [87 x i8], ptr %0, i64 0, i64 1
+ ret ptr %arrayidx
+}
+
+define nonnull ptr @AddrTest1_NoAttr() local_unnamed_addr {
+; SMALLCM64-LABEL: AddrTest1_NoAttr:
+; SMALLCM64: # %bb.0: # %entry
+; SMALLCM64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @a_noattr
+; SMALLCM64-NEXT: add r3, r13, r3
+; SMALLCM64-NEXT: addi r3, r3, 1
+; SMALLCM64-NEXT: blr
+;
+; LARGECM64-LABEL: AddrTest1_NoAttr:
+; LARGECM64: # %bb.0: # %entry
+; LARGECM64-NEXT: addis r3, L..C0 at u(r2)
+; LARGECM64-NEXT: ld r3, L..C0 at l(r3)
+; LARGECM64-NEXT: add r3, r13, r3
+; LARGECM64-NEXT: addi r3, r3, 1
+; LARGECM64-NEXT: blr
+entry:
+ %0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @a_noattr)
+ %arrayidx = getelementptr inbounds [87 x i8], ptr %0, i64 0, i64 1
+ ret ptr %arrayidx
+}
+
+define nonnull ptr @AddrTest2() local_unnamed_addr {
+; SMALLCM64-LABEL: AddrTest2:
+; SMALLCM64: # %bb.0: # %entry
+; SMALLCM64-NEXT: addi r3, r13, b[TL]@le+4
+; SMALLCM64-NEXT: blr
+;
+; LARGECM64-LABEL: AddrTest2:
+; LARGECM64: # %bb.0: # %entry
+; LARGECM64-NEXT: addi r3, r13, b[TL]@le+4
+; LARGECM64-NEXT: blr
+entry:
+ %0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @b)
+ %arrayidx = getelementptr inbounds [87 x i16], ptr %0, i64 0, i64 2
+ ret ptr %arrayidx
+}
+
+define nonnull ptr @AddrTest2_NoAttr() local_unnamed_addr {
+; SMALLCM64-LABEL: AddrTest2_NoAttr:
+; SMALLCM64: # %bb.0: # %entry
+; SMALLCM64-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @b_noattr
+; SMALLCM64-NEXT: add r3, r13, r3
+; SMALLCM64-NEXT: addi r3, r3, 4
+; SMALLCM64-NEXT: blr
+;
+; LARGECM64-LABEL: AddrTest2_NoAttr:
+; LARGECM64: # %bb.0: # %entry
+; LARGECM64-NEXT: addis r3, L..C1 at u(r2)
+; LARGECM64-NEXT: ld r3, L..C1 at l(r3)
+; LARGECM64-NEXT: add r3, r13, r3
+; LARGECM64-NEXT: addi r3, r3, 4
+; LARGECM64-NEXT: blr
+entry:
+ %0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @b_noattr)
+ %arrayidx = getelementptr inbounds [87 x i16], ptr %0, i64 0, i64 2
+ ret ptr %arrayidx
+}
+
+define nonnull ptr @AddrTest3() local_unnamed_addr {
+; SMALLCM64-LABEL: AddrTest3:
+; SMALLCM64: # %bb.0: # %entry
+; SMALLCM64-NEXT: addi r3, r13, c[TL]@le+12
+; SMALLCM64-NEXT: blr
+;
+; LARGECM64-LABEL: AddrTest3:
+; LARGECM64: # %bb.0: # %entry
+; LARGECM64-NEXT: addi r3, r13, c[TL]@le+12
+; LARGECM64-NEXT: blr
+entry:
+ %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @c)
+ %arrayidx = getelementptr inbounds [87 x i32], ptr %0, i64 0, i64 3
+ ret ptr %arrayidx
+}
+
+define nonnull ptr @AddrTest3_NoAttr() local_unnamed_addr {
+; SMALLCM64-LABEL: AddrTest3_NoAttr:
+; SMALLCM64: # %bb.0: # %entry
+; SMALLCM64-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @c_noattr
+; SMALLCM64-NEXT: add r3, r13, r3
+; SMALLCM64-NEXT: addi r3, r3, 12
+; SMALLCM64-NEXT: blr
+;
+; LARGECM64-LABEL: AddrTest3_NoAttr:
+; LARGECM64: # %bb.0: # %entry
+; LARGECM64-NEXT: addis r3, L..C2 at u(r2)
+; LARGECM64-NEXT: ld r3, L..C2 at l(r3)
+; LARGECM64-NEXT: add r3, r13, r3
+; LARGECM64-NEXT: addi r3, r3, 12
+; LARGECM64-NEXT: blr
+entry:
+ %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @c_noattr)
+ %arrayidx = getelementptr inbounds [87 x i32], ptr %0, i64 0, i64 3
+ ret ptr %arrayidx
+}
+
+define nonnull ptr @AddrTest4() local_unnamed_addr {
+; SMALLCM64-LABEL: AddrTest4:
+; SMALLCM64: # %bb.0: # %entry
+; SMALLCM64-NEXT: addi r3, r13, c[TL]@le+56
+; SMALLCM64-NEXT: blr
+;
+; LARGECM64-LABEL: AddrTest4:
+; LARGECM64: # %bb.0: # %entry
+; LARGECM64-NEXT: addi r3, r13, c[TL]@le+56
+; LARGECM64-NEXT: blr
+entry:
+ %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @c)
+ %arrayidx = getelementptr inbounds [87 x i64], ptr %0, i64 0, i64 7
+ ret ptr %arrayidx
+}
+
+define nonnull ptr @AddrTest4_NoAttr() local_unnamed_addr {
+; SMALLCM64-LABEL: AddrTest4_NoAttr:
+; SMALLCM64: # %bb.0: # %entry
+; SMALLCM64-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @c_noattr
+; SMALLCM64-NEXT: add r3, r13, r3
+; SMALLCM64-NEXT: addi r3, r3, 56
+; SMALLCM64-NEXT: blr
+;
+; LARGECM64-LABEL: AddrTest4_NoAttr:
+; LARGECM64: # %bb.0: # %entry
+; LARGECM64-NEXT: addis r3, L..C2 at u(r2)
+; LARGECM64-NEXT: ld r3, L..C2 at l(r3)
+; LARGECM64-NEXT: add r3, r13, r3
+; LARGECM64-NEXT: addi r3, r3, 56
+; LARGECM64-NEXT: blr
+entry:
+ %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @c_noattr)
+ %arrayidx = getelementptr inbounds [87 x i64], ptr %0, i64 0, i64 7
+ ret ptr %arrayidx
+}
+
+define nonnull ptr @AddrTest5() local_unnamed_addr {
+; SMALLCM64-LABEL: AddrTest5:
+; SMALLCM64: # %bb.0: # %entry
+; SMALLCM64-NEXT: addi r3, r13, e[TL]@le+48
+; SMALLCM64-NEXT: blr
+;
+; LARGECM64-LABEL: AddrTest5:
+; LARGECM64: # %bb.0: # %entry
+; LARGECM64-NEXT: addi r3, r13, e[TL]@le+48
+; LARGECM64-NEXT: blr
+entry:
+ %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @e)
+ %arrayidx = getelementptr inbounds [87 x double], ptr %0, i64 0, i64 6
+ ret ptr %arrayidx
+}
+
+define nonnull ptr @AddrTest5_NoAttr() local_unnamed_addr {
+; SMALLCM64-LABEL: AddrTest5_NoAttr:
+; SMALLCM64: # %bb.0: # %entry
+; SMALLCM64-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @e_noattr
+; SMALLCM64-NEXT: add r3, r13, r3
+; SMALLCM64-NEXT: addi r3, r3, 48
+; SMALLCM64-NEXT: blr
+;
+; LARGECM64-LABEL: AddrTest5_NoAttr:
+; LARGECM64: # %bb.0: # %entry
+; LARGECM64-NEXT: addis r3, L..C3 at u(r2)
+; LARGECM64-NEXT: ld r3, L..C3 at l(r3)
+; LARGECM64-NEXT: add r3, r13, r3
+; LARGECM64-NEXT: addi r3, r3, 48
+; LARGECM64-NEXT: blr
+entry:
+ %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @e_noattr)
+ %arrayidx = getelementptr inbounds [87 x double], ptr %0, i64 0, i64 6
+ ret ptr %arrayidx
+}
+
+define nonnull ptr @AddrTest6() local_unnamed_addr {
+; SMALLCM64-LABEL: AddrTest6:
+; SMALLCM64: # %bb.0: # %entry
+; SMALLCM64-NEXT: addi r3, r13, f[TL]@le+16
+; SMALLCM64-NEXT: blr
+;
+; LARGECM64-LABEL: AddrTest6:
+; LARGECM64: # %bb.0: # %entry
+; LARGECM64-NEXT: addi r3, r13, f[TL]@le+16
+; LARGECM64-NEXT: blr
+entry:
+ %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @f)
+ %arrayidx = getelementptr inbounds [87 x float], ptr %0, i64 0, i64 4
+ ret ptr %arrayidx
+}
+
+define nonnull ptr @AddrTest6_NoAttr() local_unnamed_addr {
+; SMALLCM64-LABEL: AddrTest6_NoAttr:
+; SMALLCM64: # %bb.0: # %entry
+; SMALLCM64-NEXT: ld r3, L..C4(r2) # target-flags(ppc-tprel) @f_noattr
+; SMALLCM64-NEXT: add r3, r13, r3
+; SMALLCM64-NEXT: addi r3, r3, 16
+; SMALLCM64-NEXT: blr
+;
+; LARGECM64-LABEL: AddrTest6_NoAttr:
+; LARGECM64: # %bb.0: # %entry
+; LARGECM64-NEXT: addis r3, L..C4 at u(r2)
+; LARGECM64-NEXT: ld r3, L..C4 at l(r3)
+; LARGECM64-NEXT: add r3, r13, r3
+; LARGECM64-NEXT: addi r3, r3, 16
+; LARGECM64-NEXT: blr
+entry:
+ %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @f_noattr)
+ %arrayidx = getelementptr inbounds [87 x float], ptr %0, i64 0, i64 4
+ ret ptr %arrayidx
+}
+
+attributes #0 = { "aix-small-tls" }
diff --git a/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-targetattr.ll b/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-targetattr.ll
new file mode 100644
index 00000000000000..8f617eac66ef6c
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-targetattr.ll
@@ -0,0 +1,104 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN: -mtriple powerpc64-ibm-aix-xcoff -mattr=+aix-small-local-exec-tls < %s \
+; RUN: | FileCheck %s --check-prefix=SMALL-LOCAL-EXEC-SMALLCM64
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large \
+; RUN: -mattr=+aix-small-local-exec-tls < %s | FileCheck %s \
+; RUN: --check-prefix=SMALL-LOCAL-EXEC-LARGECM64
+
+ at mySmallLocalExecTLS6 = external thread_local(localexec) global [60 x i64], align 8
+ at mySmallLocalExecTLS2 = external thread_local(localexec) global [3000 x i64], align 8 #0
+ at MyTLSGDVar = thread_local global [800 x i64] zeroinitializer, align 8
+ at mySmallLocalExecTLS3 = internal thread_local(localexec) global [3000 x i64] zeroinitializer, align 8
+ at mySmallLocalExecTLS4 = internal thread_local(localexec) global [3000 x i64] zeroinitializer, align 8 #0
+ at mySmallLocalExecTLS5 = thread_local(localexec) global [3000 x i64] zeroinitializer, align 8 #0
+ at mySmallLocalExecTLS = thread_local(localexec) local_unnamed_addr global [7800 x i64] zeroinitializer, align 8 #0
+declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull)
+
+; Although some global variables are annotated with 'aix-small-tls', because the
+; aix-small-local-exec-tls target attribute is turned on, all accesses will use
+; a "faster" local-exec sequence directly off the thread pointer.
+define i64 @StoreLargeAccess1() {
+; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: StoreLargeAccess1:
+; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: mflr r0
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stdu r1, -48(r1)
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r3, 212
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r4, 203
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: std r0, 64(r1)
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: std r3, mySmallLocalExecTLS6[UL]@le+424(r13)
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: std r4, mySmallLocalExecTLS2[UL]@le+1200(r13)
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tlsgdm) @MyTLSGDVar
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r4, L..C1(r2) # target-flags(ppc-tlsgd) @MyTLSGDVar
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: bla .__tls_get_addr[PR]
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r4, 44
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: std r4, 440(r3)
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r3, 6
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r4, 100
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: std r3, mySmallLocalExecTLS3[UL]@le+2000(r13)
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r3, 882
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: std r4, (mySmallLocalExecTLS4[UL]@le+6800)-65536(r13)
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: std r3, (mySmallLocalExecTLS5[TL]@le+8400)-65536(r13)
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r3, 1191
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: addi r1, r1, 48
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r0, 16(r1)
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: mtlr r0
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr
+;
+; SMALL-LOCAL-EXEC-LARGECM64-LABEL: StoreLargeAccess1:
+; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: mflr r0
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stdu r1, -48(r1)
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r3, 212
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: std r0, 64(r1)
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addis r4, L..C0 at u(r2)
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r4, L..C0 at l(r4)
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: std r3, mySmallLocalExecTLS6[UL]@le+424(r13)
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r3, 203
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: std r3, mySmallLocalExecTLS2[UL]@le+1200(r13)
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addis r3, L..C1 at u(r2)
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r3, L..C1 at l(r3)
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: bla .__tls_get_addr[PR]
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r4, 44
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: std r4, 440(r3)
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r3, 6
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r4, 100
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: std r3, mySmallLocalExecTLS3[UL]@le+2000(r13)
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r3, 882
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: std r4, (mySmallLocalExecTLS4[UL]@le+6800)-65536(r13)
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: std r3, (mySmallLocalExecTLS5[TL]@le+8400)-65536(r13)
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r3, 1191
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addi r1, r1, 48
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r0, 16(r1)
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: mtlr r0
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr
+entry:
+ %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS6)
+ %arrayidx = getelementptr inbounds [60 x i64], ptr %0, i64 0, i64 53
+ store i64 212, ptr %arrayidx, align 8
+ %1 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS2)
+ %arrayidx1 = getelementptr inbounds [3000 x i64], ptr %1, i64 0, i64 150
+ store i64 203, ptr %arrayidx1, align 8
+ %2 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @MyTLSGDVar)
+ %arrayidx2 = getelementptr inbounds [800 x i64], ptr %2, i64 0, i64 55
+ store i64 44, ptr %arrayidx2, align 8
+ %3 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS3)
+ %arrayidx3 = getelementptr inbounds [3000 x i64], ptr %3, i64 0, i64 250
+ store i64 6, ptr %arrayidx3, align 8
+ %4 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS4)
+ %arrayidx4 = getelementptr inbounds [3000 x i64], ptr %4, i64 0, i64 850
+ store i64 100, ptr %arrayidx4, align 8
+ %5 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS5)
+ %arrayidx5 = getelementptr inbounds [3000 x i64], ptr %5, i64 0, i64 1050
+ store i64 882, ptr %arrayidx5, align 8
+ %6 = load i64, ptr %arrayidx1, align 8
+ %7 = load i64, ptr %arrayidx3, align 8
+ %8 = load i64, ptr %arrayidx4, align 8
+ %add = add i64 %6, 882
+ %add9 = add i64 %add, %7
+ %add11 = add i64 %add9, %8
+ ret i64 %add11
+}
+
+attributes #0 = { "aix-small-tls" }
>From 558fc0e289d1c99b1f244e27d7c107b6e4348ca8 Mon Sep 17 00:00:00 2001
From: Amy Kwan <amy.kwan1 at ibm.com>
Date: Wed, 20 Mar 2024 08:07:54 -0500
Subject: [PATCH 2/7] Address review comments: move variables around and
simplify function to check for attribute
---
llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 16 +++++-----------
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 4 ++--
2 files changed, 7 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 7e65255830633f..af82b6cdb1809e 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -7560,18 +7560,12 @@ static void reduceVSXSwap(SDNode *N, SelectionDAG *DAG) {
// Check if an SDValue has the 'aix-small-tls' global variable attribute.
static bool hasAIXSmallTLSAttr(SDValue Val) {
- GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val);
- if (!GA)
- return false;
-
- const GlobalVariable *GV = dyn_cast<GlobalVariable>(GA->getGlobal());
- if (!GV)
- return false;
-
- if (!GV->hasAttribute("aix-small-tls"))
- return false;
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val))
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(GA->getGlobal()))
+ if (GV->hasAttribute("aix-small-tls"))
+ return true;
- return true;
+ return false;
}
// Is an ADDI eligible for folding for non-TOC-based local-exec accesses?
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index e85bdf9b707b88..7436b202fba0d9 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -3367,12 +3367,12 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
const GlobalValue *GV = GA->getGlobal();
EVT PtrVT = getPointerTy(DAG.getDataLayout());
bool Is64Bit = Subtarget.isPPC64();
- bool HasAIXSmallLocalExecTLS = Subtarget.hasAIXSmallLocalExecTLS();
- bool HasAIXSmallTLSGlobalAttr = false;
TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
bool IsTLSLocalExecModel = Model == TLSModel::LocalExec;
if (IsTLSLocalExecModel || Model == TLSModel::InitialExec) {
+ bool HasAIXSmallLocalExecTLS = Subtarget.hasAIXSmallLocalExecTLS();
+ bool HasAIXSmallTLSGlobalAttr = false;
SDValue VariableOffsetTGA =
DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_FLAG);
SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
>From f4baa8c240d4e96f0758a8f5d8953887765819b3 Mon Sep 17 00:00:00 2001
From: Amy Kwan <amy.kwan1 at ibm.com>
Date: Thu, 21 Mar 2024 00:26:43 -0500
Subject: [PATCH 3/7] Removed unsued variable
---
.../test/CodeGen/PowerPC/aix-small-tls-globalvarattr-funcattr.ll | 1 -
.../CodeGen/PowerPC/aix-small-tls-globalvarattr-targetattr.ll | 1 -
2 files changed, 2 deletions(-)
diff --git a/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-funcattr.ll b/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-funcattr.ll
index 55e486876e3373..f9dab93356a254 100644
--- a/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-funcattr.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-funcattr.ll
@@ -12,7 +12,6 @@
@mySmallLocalExecTLS3 = internal thread_local(localexec) global [3000 x i64] zeroinitializer, align 8
@mySmallLocalExecTLS4 = internal thread_local(localexec) global [3000 x i64] zeroinitializer, align 8 #0
@mySmallLocalExecTLS5 = thread_local(localexec) global [3000 x i64] zeroinitializer, align 8 #0
- at mySmallLocalExecTLS = thread_local(localexec) local_unnamed_addr global [7800 x i64] zeroinitializer, align 8 #0
declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull)
; All accesses use a "faster" local-exec sequence directly off the thread pointer.
diff --git a/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-targetattr.ll b/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-targetattr.ll
index 8f617eac66ef6c..539707fa4d5b1b 100644
--- a/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-targetattr.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-targetattr.ll
@@ -13,7 +13,6 @@
@mySmallLocalExecTLS3 = internal thread_local(localexec) global [3000 x i64] zeroinitializer, align 8
@mySmallLocalExecTLS4 = internal thread_local(localexec) global [3000 x i64] zeroinitializer, align 8 #0
@mySmallLocalExecTLS5 = thread_local(localexec) global [3000 x i64] zeroinitializer, align 8 #0
- at mySmallLocalExecTLS = thread_local(localexec) local_unnamed_addr global [7800 x i64] zeroinitializer, align 8 #0
declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull)
; Although some global variables are annotated with 'aix-small-tls', because the
>From eed24288fd9bb2b2b4b375c11033fdc099bc6838 Mon Sep 17 00:00:00 2001
From: Amy Kwan <amy.kwan1 at ibm.com>
Date: Fri, 22 Mar 2024 10:00:30 -0500
Subject: [PATCH 4/7] Remove extra variables in tests
---
.../aix-small-tls-globalvarattr-funcattr.ll | 235 ++++++------------
.../aix-small-tls-globalvarattr-targetattr.ll | 117 +++------
2 files changed, 120 insertions(+), 232 deletions(-)
diff --git a/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-funcattr.ll b/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-funcattr.ll
index f9dab93356a254..6523921682db4d 100644
--- a/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-funcattr.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-funcattr.ll
@@ -6,95 +6,59 @@
; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large \
; RUN: < %s | FileCheck %s --check-prefix=CHECK-LARGECM64
- at mySmallLocalExecTLS6 = external thread_local(localexec) global [60 x i64], align 8
- at mySmallLocalExecTLS2 = external thread_local(localexec) global [3000 x i64], align 8 #0
- at MyTLSGDVar = thread_local global [800 x i64] zeroinitializer, align 8
- at mySmallLocalExecTLS3 = internal thread_local(localexec) global [3000 x i64] zeroinitializer, align 8
- at mySmallLocalExecTLS4 = internal thread_local(localexec) global [3000 x i64] zeroinitializer, align 8 #0
- at mySmallLocalExecTLS5 = thread_local(localexec) global [3000 x i64] zeroinitializer, align 8 #0
+ at mySmallTLS = thread_local(localexec) global [7800 x i64] zeroinitializer, align 8 #0
+ at mySmallTLS2 = thread_local(localexec) global [3000 x i64] zeroinitializer, align 8 #0
+ at mySmallTLS3 = thread_local(localexec) global [3000 x i64] zeroinitializer, align 8
declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull)
-; All accesses use a "faster" local-exec sequence directly off the thread pointer.
+; All accesses use a "faster" local-exec sequence directly off the thread pointer,
+; except for mySmallTLS, as this variable is over the 32KB size limit.
define i64 @StoreLargeAccess1() #1 {
; CHECK-SMALLCM64-LABEL: StoreLargeAccess1:
; CHECK-SMALLCM64: # %bb.0: # %entry
-; CHECK-SMALLCM64-NEXT: mflr r0
-; CHECK-SMALLCM64-NEXT: stdu r1, -48(r1)
-; CHECK-SMALLCM64-NEXT: li r3, 212
-; CHECK-SMALLCM64-NEXT: li r4, 203
-; CHECK-SMALLCM64-NEXT: std r0, 64(r1)
-; CHECK-SMALLCM64-NEXT: std r3, mySmallLocalExecTLS6[UL]@le+424(r13)
-; CHECK-SMALLCM64-NEXT: std r4, mySmallLocalExecTLS2[UL]@le+1200(r13)
-; CHECK-SMALLCM64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tlsgdm) @MyTLSGDVar
-; CHECK-SMALLCM64-NEXT: ld r4, L..C1(r2) # target-flags(ppc-tlsgd) @MyTLSGDVar
-; CHECK-SMALLCM64-NEXT: bla .__tls_get_addr[PR]
-; CHECK-SMALLCM64-NEXT: li r4, 44
-; CHECK-SMALLCM64-NEXT: std r4, 440(r3)
-; CHECK-SMALLCM64-NEXT: li r3, 6
-; CHECK-SMALLCM64-NEXT: li r4, 100
-; CHECK-SMALLCM64-NEXT: std r3, mySmallLocalExecTLS3[UL]@le+2000(r13)
-; CHECK-SMALLCM64-NEXT: li r3, 882
-; CHECK-SMALLCM64-NEXT: std r4, (mySmallLocalExecTLS4[UL]@le+6800)-65536(r13)
-; CHECK-SMALLCM64-NEXT: std r3, (mySmallLocalExecTLS5[TL]@le+8400)-65536(r13)
-; CHECK-SMALLCM64-NEXT: li r3, 1191
-; CHECK-SMALLCM64-NEXT: addi r1, r1, 48
-; CHECK-SMALLCM64-NEXT: ld r0, 16(r1)
-; CHECK-SMALLCM64-NEXT: mtlr r0
+; CHECK-SMALLCM64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @mySmallTLS
+; CHECK-SMALLCM64-NEXT: li r4, 0
+; CHECK-SMALLCM64-NEXT: li r5, 23
+; CHECK-SMALLCM64-NEXT: ori r4, r4, 53328
+; CHECK-SMALLCM64-NEXT: add r3, r13, r3
+; CHECK-SMALLCM64-NEXT: stdx r5, r3, r4
+; CHECK-SMALLCM64-NEXT: li r3, 55
+; CHECK-SMALLCM64-NEXT: li r4, 64
+; CHECK-SMALLCM64-NEXT: std r3, (mySmallTLS2[TL]@le+696)-65536(r13)
+; CHECK-SMALLCM64-NEXT: li r3, 142
+; CHECK-SMALLCM64-NEXT: std r4, (mySmallTLS3[TL]@le+20000)-131072(r13)
; CHECK-SMALLCM64-NEXT: blr
;
; CHECK-LARGECM64-LABEL: StoreLargeAccess1:
; CHECK-LARGECM64: # %bb.0: # %entry
-; CHECK-LARGECM64-NEXT: mflr r0
-; CHECK-LARGECM64-NEXT: stdu r1, -48(r1)
-; CHECK-LARGECM64-NEXT: li r3, 212
-; CHECK-LARGECM64-NEXT: std r0, 64(r1)
-; CHECK-LARGECM64-NEXT: addis r4, L..C0 at u(r2)
-; CHECK-LARGECM64-NEXT: ld r4, L..C0 at l(r4)
-; CHECK-LARGECM64-NEXT: std r3, mySmallLocalExecTLS6[UL]@le+424(r13)
-; CHECK-LARGECM64-NEXT: li r3, 203
-; CHECK-LARGECM64-NEXT: std r3, mySmallLocalExecTLS2[UL]@le+1200(r13)
-; CHECK-LARGECM64-NEXT: addis r3, L..C1 at u(r2)
-; CHECK-LARGECM64-NEXT: ld r3, L..C1 at l(r3)
-; CHECK-LARGECM64-NEXT: bla .__tls_get_addr[PR]
-; CHECK-LARGECM64-NEXT: li r4, 44
-; CHECK-LARGECM64-NEXT: std r4, 440(r3)
-; CHECK-LARGECM64-NEXT: li r3, 6
-; CHECK-LARGECM64-NEXT: li r4, 100
-; CHECK-LARGECM64-NEXT: std r3, mySmallLocalExecTLS3[UL]@le+2000(r13)
-; CHECK-LARGECM64-NEXT: li r3, 882
-; CHECK-LARGECM64-NEXT: std r4, (mySmallLocalExecTLS4[UL]@le+6800)-65536(r13)
-; CHECK-LARGECM64-NEXT: std r3, (mySmallLocalExecTLS5[TL]@le+8400)-65536(r13)
-; CHECK-LARGECM64-NEXT: li r3, 1191
-; CHECK-LARGECM64-NEXT: addi r1, r1, 48
-; CHECK-LARGECM64-NEXT: ld r0, 16(r1)
-; CHECK-LARGECM64-NEXT: mtlr r0
+; CHECK-LARGECM64-NEXT: addis r3, L..C0 at u(r2)
+; CHECK-LARGECM64-NEXT: li r4, 0
+; CHECK-LARGECM64-NEXT: li r5, 23
+; CHECK-LARGECM64-NEXT: ld r3, L..C0 at l(r3)
+; CHECK-LARGECM64-NEXT: ori r4, r4, 53328
+; CHECK-LARGECM64-NEXT: add r3, r13, r3
+; CHECK-LARGECM64-NEXT: stdx r5, r3, r4
+; CHECK-LARGECM64-NEXT: li r3, 55
+; CHECK-LARGECM64-NEXT: li r4, 64
+; CHECK-LARGECM64-NEXT: std r3, (mySmallTLS2[TL]@le+696)-65536(r13)
+; CHECK-LARGECM64-NEXT: li r3, 142
+; CHECK-LARGECM64-NEXT: std r4, (mySmallTLS3[TL]@le+20000)-131072(r13)
; CHECK-LARGECM64-NEXT: blr
entry:
- %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS6)
- %arrayidx = getelementptr inbounds [60 x i64], ptr %0, i64 0, i64 53
- store i64 212, ptr %arrayidx, align 8
- %1 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS2)
- %arrayidx1 = getelementptr inbounds [3000 x i64], ptr %1, i64 0, i64 150
- store i64 203, ptr %arrayidx1, align 8
- %2 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @MyTLSGDVar)
- %arrayidx2 = getelementptr inbounds [800 x i64], ptr %2, i64 0, i64 55
- store i64 44, ptr %arrayidx2, align 8
- %3 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS3)
- %arrayidx3 = getelementptr inbounds [3000 x i64], ptr %3, i64 0, i64 250
- store i64 6, ptr %arrayidx3, align 8
- %4 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS4)
- %arrayidx4 = getelementptr inbounds [3000 x i64], ptr %4, i64 0, i64 850
- store i64 100, ptr %arrayidx4, align 8
- %5 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS5)
- %arrayidx5 = getelementptr inbounds [3000 x i64], ptr %5, i64 0, i64 1050
- store i64 882, ptr %arrayidx5, align 8
- %6 = load i64, ptr %arrayidx1, align 8
- %7 = load i64, ptr %arrayidx3, align 8
- %8 = load i64, ptr %arrayidx4, align 8
- %add = add i64 %6, 882
- %add9 = add i64 %add, %7
- %add11 = add i64 %add9, %8
- ret i64 %add11
+ %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallTLS)
+ %arrayidx = getelementptr inbounds i8, ptr %0, i32 53328
+ store i64 23, ptr %arrayidx, align 8
+ %1 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallTLS2)
+ %arrayidx1 = getelementptr inbounds i8, ptr %1, i32 696
+ store i64 55, ptr %arrayidx1, align 8
+ %2 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallTLS3)
+ %arrayidx2 = getelementptr inbounds i8, ptr %2, i32 20000
+ store i64 64, ptr %arrayidx2, align 8
+ %3 = load i64, ptr %arrayidx, align 8
+ %4 = load i64, ptr %arrayidx1, align 8
+ %add = add i64 %3, 64
+ %add6 = add i64 %add, %4
+ ret i64 %add6
}
; Since this function does not have the 'aix-small-local-exec-tls` attribute,
@@ -103,93 +67,54 @@ entry:
define i64 @StoreLargeAccess2() {
; CHECK-SMALLCM64-LABEL: StoreLargeAccess2:
; CHECK-SMALLCM64: # %bb.0: # %entry
-; CHECK-SMALLCM64-NEXT: mflr r0
-; CHECK-SMALLCM64-NEXT: stdu r1, -48(r1)
-; CHECK-SMALLCM64-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @mySmallLocalExecTLS6
-; CHECK-SMALLCM64-NEXT: li r4, 212
-; CHECK-SMALLCM64-NEXT: std r0, 64(r1)
-; CHECK-SMALLCM64-NEXT: add r3, r13, r3
-; CHECK-SMALLCM64-NEXT: std r4, 424(r3)
-; CHECK-SMALLCM64-NEXT: ld r4, L..C1(r2) # target-flags(ppc-tlsgd) @MyTLSGDVar
-; CHECK-SMALLCM64-NEXT: li r3, 203
-; CHECK-SMALLCM64-NEXT: std r3, mySmallLocalExecTLS2[UL]@le+1200(r13)
-; CHECK-SMALLCM64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tlsgdm) @MyTLSGDVar
-; CHECK-SMALLCM64-NEXT: bla .__tls_get_addr[PR]
-; CHECK-SMALLCM64-NEXT: li r4, 44
-; CHECK-SMALLCM64-NEXT: std r4, 440(r3)
-; CHECK-SMALLCM64-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @mySmallLocalExecTLS3
-; CHECK-SMALLCM64-NEXT: li r4, 6
-; CHECK-SMALLCM64-NEXT: add r3, r13, r3
-; CHECK-SMALLCM64-NEXT: std r4, 2000(r3)
-; CHECK-SMALLCM64-NEXT: li r3, 100
-; CHECK-SMALLCM64-NEXT: li r4, 882
-; CHECK-SMALLCM64-NEXT: std r3, mySmallLocalExecTLS4[UL]@le+6800(r13)
-; CHECK-SMALLCM64-NEXT: std r4, mySmallLocalExecTLS5[TL]@le+8400(r13)
-; CHECK-SMALLCM64-NEXT: li r3, 1191
-; CHECK-SMALLCM64-NEXT: addi r1, r1, 48
-; CHECK-SMALLCM64-NEXT: ld r0, 16(r1)
-; CHECK-SMALLCM64-NEXT: mtlr r0
+; CHECK-SMALLCM64-NEXT: ld r5, L..C0(r2) # target-flags(ppc-tprel) @mySmallTLS
+; CHECK-SMALLCM64-NEXT: li r3, 0
+; CHECK-SMALLCM64-NEXT: li r4, 23
+; CHECK-SMALLCM64-NEXT: ori r3, r3, 53328
+; CHECK-SMALLCM64-NEXT: add r5, r13, r5
+; CHECK-SMALLCM64-NEXT: stdx r4, r5, r3
+; CHECK-SMALLCM64-NEXT: ld r5, L..C1(r2) # target-flags(ppc-tprel) @mySmallTLS3
+; CHECK-SMALLCM64-NEXT: li r3, 55
+; CHECK-SMALLCM64-NEXT: li r4, 64
+; CHECK-SMALLCM64-NEXT: std r3, mySmallTLS2[TL]@le+696(r13)
+; CHECK-SMALLCM64-NEXT: li r3, 142
+; CHECK-SMALLCM64-NEXT: add r5, r13, r5
+; CHECK-SMALLCM64-NEXT: std r4, 20000(r5)
; CHECK-SMALLCM64-NEXT: blr
;
; CHECK-LARGECM64-LABEL: StoreLargeAccess2:
; CHECK-LARGECM64: # %bb.0: # %entry
-; CHECK-LARGECM64-NEXT: mflr r0
-; CHECK-LARGECM64-NEXT: stdu r1, -48(r1)
-; CHECK-LARGECM64-NEXT: addis r3, L..C2 at u(r2)
-; CHECK-LARGECM64-NEXT: li r4, 212
-; CHECK-LARGECM64-NEXT: std r0, 64(r1)
-; CHECK-LARGECM64-NEXT: ld r3, L..C2 at l(r3)
+; CHECK-LARGECM64-NEXT: addis r3, L..C0 at u(r2)
+; CHECK-LARGECM64-NEXT: li r4, 0
+; CHECK-LARGECM64-NEXT: li r5, 23
+; CHECK-LARGECM64-NEXT: ld r3, L..C0 at l(r3)
+; CHECK-LARGECM64-NEXT: ori r4, r4, 53328
; CHECK-LARGECM64-NEXT: add r3, r13, r3
-; CHECK-LARGECM64-NEXT: std r4, 424(r3)
-; CHECK-LARGECM64-NEXT: li r3, 203
-; CHECK-LARGECM64-NEXT: addis r4, L..C0 at u(r2)
-; CHECK-LARGECM64-NEXT: ld r4, L..C0 at l(r4)
-; CHECK-LARGECM64-NEXT: std r3, mySmallLocalExecTLS2[UL]@le+1200(r13)
+; CHECK-LARGECM64-NEXT: stdx r5, r3, r4
; CHECK-LARGECM64-NEXT: addis r3, L..C1 at u(r2)
+; CHECK-LARGECM64-NEXT: li r4, 55
+; CHECK-LARGECM64-NEXT: li r5, 64
; CHECK-LARGECM64-NEXT: ld r3, L..C1 at l(r3)
-; CHECK-LARGECM64-NEXT: bla .__tls_get_addr[PR]
-; CHECK-LARGECM64-NEXT: li r4, 44
-; CHECK-LARGECM64-NEXT: std r4, 440(r3)
-; CHECK-LARGECM64-NEXT: addis r3, L..C3 at u(r2)
-; CHECK-LARGECM64-NEXT: li r4, 6
-; CHECK-LARGECM64-NEXT: ld r3, L..C3 at l(r3)
+; CHECK-LARGECM64-NEXT: std r4, mySmallTLS2[TL]@le+696(r13)
; CHECK-LARGECM64-NEXT: add r3, r13, r3
-; CHECK-LARGECM64-NEXT: std r4, 2000(r3)
-; CHECK-LARGECM64-NEXT: li r3, 100
-; CHECK-LARGECM64-NEXT: li r4, 882
-; CHECK-LARGECM64-NEXT: std r3, mySmallLocalExecTLS4[UL]@le+6800(r13)
-; CHECK-LARGECM64-NEXT: std r4, mySmallLocalExecTLS5[TL]@le+8400(r13)
-; CHECK-LARGECM64-NEXT: li r3, 1191
-; CHECK-LARGECM64-NEXT: addi r1, r1, 48
-; CHECK-LARGECM64-NEXT: ld r0, 16(r1)
-; CHECK-LARGECM64-NEXT: mtlr r0
+; CHECK-LARGECM64-NEXT: std r5, 20000(r3)
+; CHECK-LARGECM64-NEXT: li r3, 142
; CHECK-LARGECM64-NEXT: blr
entry:
- %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS6)
- %arrayidx = getelementptr inbounds [60 x i64], ptr %0, i64 0, i64 53
- store i64 212, ptr %arrayidx, align 8
- %1 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS2)
- %arrayidx1 = getelementptr inbounds [3000 x i64], ptr %1, i64 0, i64 150
- store i64 203, ptr %arrayidx1, align 8
- %2 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @MyTLSGDVar)
- %arrayidx2 = getelementptr inbounds [800 x i64], ptr %2, i64 0, i64 55
- store i64 44, ptr %arrayidx2, align 8
- %3 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS3)
- %arrayidx3 = getelementptr inbounds [3000 x i64], ptr %3, i64 0, i64 250
- store i64 6, ptr %arrayidx3, align 8
- %4 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS4)
- %arrayidx4 = getelementptr inbounds [3000 x i64], ptr %4, i64 0, i64 850
- store i64 100, ptr %arrayidx4, align 8
- %5 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS5)
- %arrayidx5 = getelementptr inbounds [3000 x i64], ptr %5, i64 0, i64 1050
- store i64 882, ptr %arrayidx5, align 8
- %6 = load i64, ptr %arrayidx1, align 8
- %7 = load i64, ptr %arrayidx3, align 8
- %8 = load i64, ptr %arrayidx4, align 8
- %add = add i64 %6, 882
- %add9 = add i64 %add, %7
- %add11 = add i64 %add9, %8
- ret i64 %add11
+ %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallTLS)
+ %arrayidx = getelementptr inbounds i8, ptr %0, i32 53328
+ store i64 23, ptr %arrayidx, align 8
+ %1 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallTLS2)
+ %arrayidx1 = getelementptr inbounds i8, ptr %1, i32 696
+ store i64 55, ptr %arrayidx1, align 8
+ %2 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallTLS3)
+ %arrayidx2 = getelementptr inbounds i8, ptr %2, i32 20000
+ store i64 64, ptr %arrayidx2, align 8
+ %3 = load i64, ptr %arrayidx, align 8
+ %4 = load i64, ptr %arrayidx1, align 8
+ %add = add i64 %3, 64
+ %add6 = add i64 %add, %4
+ ret i64 %add6
}
attributes #0 = { "aix-small-tls" }
diff --git a/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-targetattr.ll b/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-targetattr.ll
index 539707fa4d5b1b..f8061bedda147c 100644
--- a/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-targetattr.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-targetattr.ll
@@ -7,12 +7,9 @@
; RUN: -mattr=+aix-small-local-exec-tls < %s | FileCheck %s \
; RUN: --check-prefix=SMALL-LOCAL-EXEC-LARGECM64
- at mySmallLocalExecTLS6 = external thread_local(localexec) global [60 x i64], align 8
- at mySmallLocalExecTLS2 = external thread_local(localexec) global [3000 x i64], align 8 #0
- at MyTLSGDVar = thread_local global [800 x i64] zeroinitializer, align 8
- at mySmallLocalExecTLS3 = internal thread_local(localexec) global [3000 x i64] zeroinitializer, align 8
- at mySmallLocalExecTLS4 = internal thread_local(localexec) global [3000 x i64] zeroinitializer, align 8 #0
- at mySmallLocalExecTLS5 = thread_local(localexec) global [3000 x i64] zeroinitializer, align 8 #0
+ at mySmallTLS = thread_local(localexec) global [7800 x i64] zeroinitializer, align 8 #0
+ at mySmallTLS2 = thread_local(localexec) global [3000 x i64] zeroinitializer, align 8 #0
+ at mySmallTLS3 = thread_local(localexec) global [3000 x i64] zeroinitializer, align 8
declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull)
; Although some global variables are annotated with 'aix-small-tls', because the
@@ -21,83 +18,49 @@ declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull)
define i64 @StoreLargeAccess1() {
; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: StoreLargeAccess1:
; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: mflr r0
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stdu r1, -48(r1)
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r3, 212
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r4, 203
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: std r0, 64(r1)
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: std r3, mySmallLocalExecTLS6[UL]@le+424(r13)
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: std r4, mySmallLocalExecTLS2[UL]@le+1200(r13)
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tlsgdm) @MyTLSGDVar
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r4, L..C1(r2) # target-flags(ppc-tlsgd) @MyTLSGDVar
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: bla .__tls_get_addr[PR]
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r4, 44
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: std r4, 440(r3)
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r3, 6
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r4, 100
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: std r3, mySmallLocalExecTLS3[UL]@le+2000(r13)
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r3, 882
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: std r4, (mySmallLocalExecTLS4[UL]@le+6800)-65536(r13)
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: std r3, (mySmallLocalExecTLS5[TL]@le+8400)-65536(r13)
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r3, 1191
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: addi r1, r1, 48
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r0, 16(r1)
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: mtlr r0
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @mySmallTLS
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r4, 0
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r5, 23
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ori r4, r4, 53328
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: add r3, r13, r3
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stdx r5, r3, r4
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r3, 55
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r4, 64
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: std r3, (mySmallTLS2[TL]@le+696)-65536(r13)
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r3, 142
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: std r4, (mySmallTLS3[TL]@le+20000)-131072(r13)
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr
;
; SMALL-LOCAL-EXEC-LARGECM64-LABEL: StoreLargeAccess1:
; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: mflr r0
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stdu r1, -48(r1)
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r3, 212
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: std r0, 64(r1)
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addis r4, L..C0 at u(r2)
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r4, L..C0 at l(r4)
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: std r3, mySmallLocalExecTLS6[UL]@le+424(r13)
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r3, 203
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: std r3, mySmallLocalExecTLS2[UL]@le+1200(r13)
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addis r3, L..C1 at u(r2)
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r3, L..C1 at l(r3)
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: bla .__tls_get_addr[PR]
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r4, 44
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: std r4, 440(r3)
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r3, 6
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r4, 100
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: std r3, mySmallLocalExecTLS3[UL]@le+2000(r13)
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r3, 882
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: std r4, (mySmallLocalExecTLS4[UL]@le+6800)-65536(r13)
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: std r3, (mySmallLocalExecTLS5[TL]@le+8400)-65536(r13)
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r3, 1191
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addi r1, r1, 48
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r0, 16(r1)
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: mtlr r0
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r4, 0
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r5, 23
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ori r4, r4, 53328
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: add r3, r13, r3
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stdx r5, r3, r4
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r3, 55
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r4, 64
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: std r3, (mySmallTLS2[TL]@le+696)-65536(r13)
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r3, 142
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: std r4, (mySmallTLS3[TL]@le+20000)-131072(r13)
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr
entry:
- %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS6)
- %arrayidx = getelementptr inbounds [60 x i64], ptr %0, i64 0, i64 53
- store i64 212, ptr %arrayidx, align 8
- %1 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS2)
- %arrayidx1 = getelementptr inbounds [3000 x i64], ptr %1, i64 0, i64 150
- store i64 203, ptr %arrayidx1, align 8
- %2 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @MyTLSGDVar)
- %arrayidx2 = getelementptr inbounds [800 x i64], ptr %2, i64 0, i64 55
- store i64 44, ptr %arrayidx2, align 8
- %3 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS3)
- %arrayidx3 = getelementptr inbounds [3000 x i64], ptr %3, i64 0, i64 250
- store i64 6, ptr %arrayidx3, align 8
- %4 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS4)
- %arrayidx4 = getelementptr inbounds [3000 x i64], ptr %4, i64 0, i64 850
- store i64 100, ptr %arrayidx4, align 8
- %5 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS5)
- %arrayidx5 = getelementptr inbounds [3000 x i64], ptr %5, i64 0, i64 1050
- store i64 882, ptr %arrayidx5, align 8
- %6 = load i64, ptr %arrayidx1, align 8
- %7 = load i64, ptr %arrayidx3, align 8
- %8 = load i64, ptr %arrayidx4, align 8
- %add = add i64 %6, 882
- %add9 = add i64 %add, %7
- %add11 = add i64 %add9, %8
- ret i64 %add11
+ %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallTLS)
+ %arrayidx = getelementptr inbounds i8, ptr %0, i32 53328
+ store i64 23, ptr %arrayidx, align 8
+ %1 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallTLS2)
+ %arrayidx1 = getelementptr inbounds i8, ptr %1, i32 696
+ store i64 55, ptr %arrayidx1, align 8
+ %2 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallTLS3)
+ %arrayidx2 = getelementptr inbounds i8, ptr %2, i32 20000
+ store i64 64, ptr %arrayidx2, align 8
+ %3 = load i64, ptr %arrayidx, align 8
+ %4 = load i64, ptr %arrayidx1, align 8
+ %add = add i64 %3, 64
+ %add6 = add i64 %add, %4
+ ret i64 %add6
}
attributes #0 = { "aix-small-tls" }
>From cfe59bd05722f00aef64eba5327a06723c34702d Mon Sep 17 00:00:00 2001
From: Amy Kwan <amy.kwan1 at ibm.com>
Date: Fri, 22 Mar 2024 11:16:28 -0500
Subject: [PATCH 5/7] Update variable names in test cases
---
.../aix-small-tls-globalvarattr-funcattr.ll | 44 ++++++++---------
.../aix-small-tls-globalvarattr-loadaddr.ll | 48 +++++++++----------
.../aix-small-tls-globalvarattr-targetattr.ll | 22 ++++-----
3 files changed, 57 insertions(+), 57 deletions(-)
diff --git a/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-funcattr.ll b/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-funcattr.ll
index 6523921682db4d..fba221ea30a87b 100644
--- a/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-funcattr.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-funcattr.ll
@@ -45,20 +45,20 @@ define i64 @StoreLargeAccess1() #1 {
; CHECK-LARGECM64-NEXT: std r4, (mySmallTLS3[TL]@le+20000)-131072(r13)
; CHECK-LARGECM64-NEXT: blr
entry:
- %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallTLS)
- %arrayidx = getelementptr inbounds i8, ptr %0, i32 53328
+ %tls0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallTLS)
+ %arrayidx = getelementptr inbounds i8, ptr %tls0, i32 53328
store i64 23, ptr %arrayidx, align 8
- %1 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallTLS2)
- %arrayidx1 = getelementptr inbounds i8, ptr %1, i32 696
+ %tls1 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallTLS2)
+ %arrayidx1 = getelementptr inbounds i8, ptr %tls1, i32 696
store i64 55, ptr %arrayidx1, align 8
- %2 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallTLS3)
- %arrayidx2 = getelementptr inbounds i8, ptr %2, i32 20000
+ %tls2 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallTLS3)
+ %arrayidx2 = getelementptr inbounds i8, ptr %tls2, i32 20000
store i64 64, ptr %arrayidx2, align 8
- %3 = load i64, ptr %arrayidx, align 8
- %4 = load i64, ptr %arrayidx1, align 8
- %add = add i64 %3, 64
- %add6 = add i64 %add, %4
- ret i64 %add6
+ %load1 = load i64, ptr %arrayidx, align 8
+ %load2 = load i64, ptr %arrayidx1, align 8
+ %add1 = add i64 %load1, 64
+ %add2 = add i64 %add1, %load2
+ ret i64 %add2
}
; Since this function does not have the 'aix-small-local-exec-tls` attribute,
@@ -101,20 +101,20 @@ define i64 @StoreLargeAccess2() {
; CHECK-LARGECM64-NEXT: li r3, 142
; CHECK-LARGECM64-NEXT: blr
entry:
- %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallTLS)
- %arrayidx = getelementptr inbounds i8, ptr %0, i32 53328
+ %tls0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallTLS)
+ %arrayidx = getelementptr inbounds i8, ptr %tls0, i32 53328
store i64 23, ptr %arrayidx, align 8
- %1 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallTLS2)
- %arrayidx1 = getelementptr inbounds i8, ptr %1, i32 696
+ %tls1 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallTLS2)
+ %arrayidx1 = getelementptr inbounds i8, ptr %tls1, i32 696
store i64 55, ptr %arrayidx1, align 8
- %2 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallTLS3)
- %arrayidx2 = getelementptr inbounds i8, ptr %2, i32 20000
+ %tls2 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallTLS3)
+ %arrayidx2 = getelementptr inbounds i8, ptr %tls2, i32 20000
store i64 64, ptr %arrayidx2, align 8
- %3 = load i64, ptr %arrayidx, align 8
- %4 = load i64, ptr %arrayidx1, align 8
- %add = add i64 %3, 64
- %add6 = add i64 %add, %4
- ret i64 %add6
+ %load1 = load i64, ptr %arrayidx, align 8
+ %load2 = load i64, ptr %arrayidx1, align 8
+ %add1 = add i64 %load1, 64
+ %add2 = add i64 %add1, %load2
+ ret i64 %add2
}
attributes #0 = { "aix-small-tls" }
diff --git a/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-loadaddr.ll b/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-loadaddr.ll
index db4266958daff1..03dbe3c4c52479 100644
--- a/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-loadaddr.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-loadaddr.ll
@@ -37,8 +37,8 @@ define nonnull ptr @AddrTest1() local_unnamed_addr {
; LARGECM64-NEXT: addi r3, r13, a[TL]@le+1
; LARGECM64-NEXT: blr
entry:
- %0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @a)
- %arrayidx = getelementptr inbounds [87 x i8], ptr %0, i64 0, i64 1
+ %tls0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @a)
+ %arrayidx = getelementptr inbounds [87 x i8], ptr %tls0, i64 0, i64 1
ret ptr %arrayidx
}
@@ -58,8 +58,8 @@ define nonnull ptr @AddrTest1_NoAttr() local_unnamed_addr {
; LARGECM64-NEXT: addi r3, r3, 1
; LARGECM64-NEXT: blr
entry:
- %0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @a_noattr)
- %arrayidx = getelementptr inbounds [87 x i8], ptr %0, i64 0, i64 1
+ %tls0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @a_noattr)
+ %arrayidx = getelementptr inbounds [87 x i8], ptr %tls0, i64 0, i64 1
ret ptr %arrayidx
}
@@ -74,8 +74,8 @@ define nonnull ptr @AddrTest2() local_unnamed_addr {
; LARGECM64-NEXT: addi r3, r13, b[TL]@le+4
; LARGECM64-NEXT: blr
entry:
- %0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @b)
- %arrayidx = getelementptr inbounds [87 x i16], ptr %0, i64 0, i64 2
+ %tls0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @b)
+ %arrayidx = getelementptr inbounds [87 x i16], ptr %tls0, i64 0, i64 2
ret ptr %arrayidx
}
@@ -95,8 +95,8 @@ define nonnull ptr @AddrTest2_NoAttr() local_unnamed_addr {
; LARGECM64-NEXT: addi r3, r3, 4
; LARGECM64-NEXT: blr
entry:
- %0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @b_noattr)
- %arrayidx = getelementptr inbounds [87 x i16], ptr %0, i64 0, i64 2
+ %tls0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @b_noattr)
+ %arrayidx = getelementptr inbounds [87 x i16], ptr %tls0, i64 0, i64 2
ret ptr %arrayidx
}
@@ -111,8 +111,8 @@ define nonnull ptr @AddrTest3() local_unnamed_addr {
; LARGECM64-NEXT: addi r3, r13, c[TL]@le+12
; LARGECM64-NEXT: blr
entry:
- %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @c)
- %arrayidx = getelementptr inbounds [87 x i32], ptr %0, i64 0, i64 3
+ %tls0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @c)
+ %arrayidx = getelementptr inbounds [87 x i32], ptr %tls0, i64 0, i64 3
ret ptr %arrayidx
}
@@ -132,8 +132,8 @@ define nonnull ptr @AddrTest3_NoAttr() local_unnamed_addr {
; LARGECM64-NEXT: addi r3, r3, 12
; LARGECM64-NEXT: blr
entry:
- %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @c_noattr)
- %arrayidx = getelementptr inbounds [87 x i32], ptr %0, i64 0, i64 3
+ %tls0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @c_noattr)
+ %arrayidx = getelementptr inbounds [87 x i32], ptr %tls0, i64 0, i64 3
ret ptr %arrayidx
}
@@ -148,8 +148,8 @@ define nonnull ptr @AddrTest4() local_unnamed_addr {
; LARGECM64-NEXT: addi r3, r13, c[TL]@le+56
; LARGECM64-NEXT: blr
entry:
- %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @c)
- %arrayidx = getelementptr inbounds [87 x i64], ptr %0, i64 0, i64 7
+ %tls0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @c)
+ %arrayidx = getelementptr inbounds [87 x i64], ptr %tls0, i64 0, i64 7
ret ptr %arrayidx
}
@@ -169,8 +169,8 @@ define nonnull ptr @AddrTest4_NoAttr() local_unnamed_addr {
; LARGECM64-NEXT: addi r3, r3, 56
; LARGECM64-NEXT: blr
entry:
- %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @c_noattr)
- %arrayidx = getelementptr inbounds [87 x i64], ptr %0, i64 0, i64 7
+ %tls0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @c_noattr)
+ %arrayidx = getelementptr inbounds [87 x i64], ptr %tls0, i64 0, i64 7
ret ptr %arrayidx
}
@@ -185,8 +185,8 @@ define nonnull ptr @AddrTest5() local_unnamed_addr {
; LARGECM64-NEXT: addi r3, r13, e[TL]@le+48
; LARGECM64-NEXT: blr
entry:
- %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @e)
- %arrayidx = getelementptr inbounds [87 x double], ptr %0, i64 0, i64 6
+ %tls0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @e)
+ %arrayidx = getelementptr inbounds [87 x double], ptr %tls0, i64 0, i64 6
ret ptr %arrayidx
}
@@ -206,8 +206,8 @@ define nonnull ptr @AddrTest5_NoAttr() local_unnamed_addr {
; LARGECM64-NEXT: addi r3, r3, 48
; LARGECM64-NEXT: blr
entry:
- %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @e_noattr)
- %arrayidx = getelementptr inbounds [87 x double], ptr %0, i64 0, i64 6
+ %tls0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @e_noattr)
+ %arrayidx = getelementptr inbounds [87 x double], ptr %tls0, i64 0, i64 6
ret ptr %arrayidx
}
@@ -222,8 +222,8 @@ define nonnull ptr @AddrTest6() local_unnamed_addr {
; LARGECM64-NEXT: addi r3, r13, f[TL]@le+16
; LARGECM64-NEXT: blr
entry:
- %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @f)
- %arrayidx = getelementptr inbounds [87 x float], ptr %0, i64 0, i64 4
+ %tls0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @f)
+ %arrayidx = getelementptr inbounds [87 x float], ptr %tls0, i64 0, i64 4
ret ptr %arrayidx
}
@@ -243,8 +243,8 @@ define nonnull ptr @AddrTest6_NoAttr() local_unnamed_addr {
; LARGECM64-NEXT: addi r3, r3, 16
; LARGECM64-NEXT: blr
entry:
- %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @f_noattr)
- %arrayidx = getelementptr inbounds [87 x float], ptr %0, i64 0, i64 4
+ %tls0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @f_noattr)
+ %arrayidx = getelementptr inbounds [87 x float], ptr %tls0, i64 0, i64 4
ret ptr %arrayidx
}
diff --git a/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-targetattr.ll b/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-targetattr.ll
index f8061bedda147c..70ae7f974d4be5 100644
--- a/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-targetattr.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-targetattr.ll
@@ -47,20 +47,20 @@ define i64 @StoreLargeAccess1() {
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: std r4, (mySmallTLS3[TL]@le+20000)-131072(r13)
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr
entry:
- %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallTLS)
- %arrayidx = getelementptr inbounds i8, ptr %0, i32 53328
+ %tls0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallTLS)
+ %arrayidx = getelementptr inbounds i8, ptr %tls0, i32 53328
store i64 23, ptr %arrayidx, align 8
- %1 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallTLS2)
- %arrayidx1 = getelementptr inbounds i8, ptr %1, i32 696
+ %tls1 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallTLS2)
+ %arrayidx1 = getelementptr inbounds i8, ptr %tls1, i32 696
store i64 55, ptr %arrayidx1, align 8
- %2 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallTLS3)
- %arrayidx2 = getelementptr inbounds i8, ptr %2, i32 20000
+ %tls2 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallTLS3)
+ %arrayidx2 = getelementptr inbounds i8, ptr %tls2, i32 20000
store i64 64, ptr %arrayidx2, align 8
- %3 = load i64, ptr %arrayidx, align 8
- %4 = load i64, ptr %arrayidx1, align 8
- %add = add i64 %3, 64
- %add6 = add i64 %add, %4
- ret i64 %add6
+ %load1 = load i64, ptr %arrayidx, align 8
+ %load2 = load i64, ptr %arrayidx1, align 8
+ %add1 = add i64 %load1, 64
+ %add2 = add i64 %add1, %load2
+ ret i64 %add2
}
attributes #0 = { "aix-small-tls" }
>From 626a8547525edb6da159fe3d9ea6426be77ae1dc Mon Sep 17 00:00:00 2001
From: Amy Kwan <amy.kwan1 at ibm.com>
Date: Fri, 22 Mar 2024 15:59:40 -0500
Subject: [PATCH 6/7] Further simplify test cases
---
.../aix-small-tls-globalvarattr-funcattr.ll | 68 +++++------
.../aix-small-tls-globalvarattr-loadaddr.ll | 106 +++++++-----------
.../aix-small-tls-globalvarattr-targetattr.ll | 43 +++----
3 files changed, 79 insertions(+), 138 deletions(-)
diff --git a/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-funcattr.ll b/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-funcattr.ll
index fba221ea30a87b..38b35dc6c81cf8 100644
--- a/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-funcattr.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-funcattr.ll
@@ -1,10 +1,9 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
; RUN: -mtriple powerpc64-ibm-aix-xcoff < %s \
-; RUN: | FileCheck %s --check-prefix=CHECK-SMALLCM64
+; RUN: | FileCheck %s --check-prefixes=COMMONCM,CHECK-SMALLCM64
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large \
-; RUN: < %s | FileCheck %s --check-prefix=CHECK-LARGECM64
+; RUN: < %s | FileCheck %s --check-prefixes=COMMONCM,CHECK-LARGECM64
@mySmallTLS = thread_local(localexec) global [7800 x i64] zeroinitializer, align 8 #0
@mySmallTLS2 = thread_local(localexec) global [3000 x i64] zeroinitializer, align 8 #0
@@ -14,36 +13,24 @@ declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull)
; All accesses use a "faster" local-exec sequence directly off the thread pointer,
; except for mySmallTLS, as this variable is over the 32KB size limit.
define i64 @StoreLargeAccess1() #1 {
-; CHECK-SMALLCM64-LABEL: StoreLargeAccess1:
-; CHECK-SMALLCM64: # %bb.0: # %entry
-; CHECK-SMALLCM64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @mySmallTLS
-; CHECK-SMALLCM64-NEXT: li r4, 0
-; CHECK-SMALLCM64-NEXT: li r5, 23
-; CHECK-SMALLCM64-NEXT: ori r4, r4, 53328
-; CHECK-SMALLCM64-NEXT: add r3, r13, r3
-; CHECK-SMALLCM64-NEXT: stdx r5, r3, r4
-; CHECK-SMALLCM64-NEXT: li r3, 55
-; CHECK-SMALLCM64-NEXT: li r4, 64
-; CHECK-SMALLCM64-NEXT: std r3, (mySmallTLS2[TL]@le+696)-65536(r13)
-; CHECK-SMALLCM64-NEXT: li r3, 142
-; CHECK-SMALLCM64-NEXT: std r4, (mySmallTLS3[TL]@le+20000)-131072(r13)
-; CHECK-SMALLCM64-NEXT: blr
-;
-; CHECK-LARGECM64-LABEL: StoreLargeAccess1:
-; CHECK-LARGECM64: # %bb.0: # %entry
-; CHECK-LARGECM64-NEXT: addis r3, L..C0 at u(r2)
-; CHECK-LARGECM64-NEXT: li r4, 0
-; CHECK-LARGECM64-NEXT: li r5, 23
-; CHECK-LARGECM64-NEXT: ld r3, L..C0 at l(r3)
-; CHECK-LARGECM64-NEXT: ori r4, r4, 53328
-; CHECK-LARGECM64-NEXT: add r3, r13, r3
-; CHECK-LARGECM64-NEXT: stdx r5, r3, r4
-; CHECK-LARGECM64-NEXT: li r3, 55
-; CHECK-LARGECM64-NEXT: li r4, 64
-; CHECK-LARGECM64-NEXT: std r3, (mySmallTLS2[TL]@le+696)-65536(r13)
-; CHECK-LARGECM64-NEXT: li r3, 142
-; CHECK-LARGECM64-NEXT: std r4, (mySmallTLS3[TL]@le+20000)-131072(r13)
-; CHECK-LARGECM64-NEXT: blr
+; COMMONCM-LABEL: StoreLargeAccess1:
+; COMMONCM-NEXT: # %bb.0: # %entry
+; CHECK-SMALLCM64: ld r3, L..C0(r2) # target-flags(ppc-tprel) @mySmallTLS
+; CHECK-SMALLCM64-NEXT: li r4, 0
+; CHECK-SMALLCM64-NEXT: li r5, 23
+; CHECK-LARGECM64: addis r3, L..C0 at u(r2)
+; CHECK-LARGECM64-NEXT: li r4, 0
+; CHECK-LARGECM64-NEXT: li r5, 23
+; CHECK-LARGECM64-NEXT: ld r3, L..C0 at l(r3)
+; COMMONCM: ori r4, r4, 53328
+; COMMONCM-NEXT: add r3, r13, r3
+; COMMONCM-NEXT: stdx r5, r3, r4
+; COMMONCM-NEXT: li r3, 55
+; COMMONCM-NEXT: li r4, 64
+; COMMONCM-NEXT: std r3, (mySmallTLS2[TL]@le+696)-65536(r13)
+; COMMONCM-NEXT: li r3, 142
+; COMMONCM-NEXT: std r4, (mySmallTLS3[TL]@le+20000)-131072(r13)
+; COMMONCM-NEXT: blr
entry:
%tls0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallTLS)
%arrayidx = getelementptr inbounds i8, ptr %tls0, i32 53328
@@ -65,9 +52,9 @@ entry:
; only some local-exec variables should have the small-local-exec TLS access
; sequence (as opposed to all of them).
define i64 @StoreLargeAccess2() {
-; CHECK-SMALLCM64-LABEL: StoreLargeAccess2:
-; CHECK-SMALLCM64: # %bb.0: # %entry
-; CHECK-SMALLCM64-NEXT: ld r5, L..C0(r2) # target-flags(ppc-tprel) @mySmallTLS
+; COMMONCM-LABEL: StoreLargeAccess2:
+; COMMONCM-NEXT: # %bb.0: # %entry
+; CHECK-SMALLCM64: ld r5, L..C0(r2) # target-flags(ppc-tprel) @mySmallTLS
; CHECK-SMALLCM64-NEXT: li r3, 0
; CHECK-SMALLCM64-NEXT: li r4, 23
; CHECK-SMALLCM64-NEXT: ori r3, r3, 53328
@@ -80,11 +67,7 @@ define i64 @StoreLargeAccess2() {
; CHECK-SMALLCM64-NEXT: li r3, 142
; CHECK-SMALLCM64-NEXT: add r5, r13, r5
; CHECK-SMALLCM64-NEXT: std r4, 20000(r5)
-; CHECK-SMALLCM64-NEXT: blr
-;
-; CHECK-LARGECM64-LABEL: StoreLargeAccess2:
-; CHECK-LARGECM64: # %bb.0: # %entry
-; CHECK-LARGECM64-NEXT: addis r3, L..C0 at u(r2)
+; CHECK-LARGECM64: addis r3, L..C0 at u(r2)
; CHECK-LARGECM64-NEXT: li r4, 0
; CHECK-LARGECM64-NEXT: li r5, 23
; CHECK-LARGECM64-NEXT: ld r3, L..C0 at l(r3)
@@ -99,7 +82,8 @@ define i64 @StoreLargeAccess2() {
; CHECK-LARGECM64-NEXT: add r3, r13, r3
; CHECK-LARGECM64-NEXT: std r5, 20000(r3)
; CHECK-LARGECM64-NEXT: li r3, 142
-; CHECK-LARGECM64-NEXT: blr
+; COMMONCM-NEXT: blr
+;
entry:
%tls0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallTLS)
%arrayidx = getelementptr inbounds i8, ptr %tls0, i32 53328
diff --git a/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-loadaddr.ll b/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-loadaddr.ll
index 03dbe3c4c52479..237c2d2e473cf2 100644
--- a/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-loadaddr.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-loadaddr.ll
@@ -1,10 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
; RUN: -mtriple powerpc64-ibm-aix-xcoff < %s \
-; RUN: | FileCheck %s --check-prefix=SMALLCM64
+; RUN: | FileCheck %s --check-prefixes=COMMONCM,SMALLCM64
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large \
-; RUN: < %s | FileCheck %s --check-prefix=LARGECM64
+; RUN: < %s | FileCheck %s --check-prefixes=COMMONCM,LARGECM64
; Test that the 'aix-small-tls' global variable attribute generates the
; optimized small-local-exec TLS sequence. Global variables without this
@@ -26,23 +26,18 @@ declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull)
@f = thread_local(localexec) global [87 x float] zeroinitializer, align 4 #0
@f_noattr = thread_local(localexec) global [87 x float] zeroinitializer, align 4
-define nonnull ptr @AddrTest1() local_unnamed_addr {
-; SMALLCM64-LABEL: AddrTest1:
-; SMALLCM64: # %bb.0: # %entry
-; SMALLCM64-NEXT: addi r3, r13, a[TL]@le+1
-; SMALLCM64-NEXT: blr
-;
-; LARGECM64-LABEL: AddrTest1:
-; LARGECM64: # %bb.0: # %entry
-; LARGECM64-NEXT: addi r3, r13, a[TL]@le+1
-; LARGECM64-NEXT: blr
+define nonnull ptr @AddrTest1() {
+; COMMONCM-LABEL: AddrTest1:
+; COMMONCM: # %bb.0: # %entry
+; COMMONCM-NEXT: addi r3, r13, a[TL]@le+1
+; COMMONCM-NEXT: blr
entry:
%tls0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @a)
%arrayidx = getelementptr inbounds [87 x i8], ptr %tls0, i64 0, i64 1
ret ptr %arrayidx
}
-define nonnull ptr @AddrTest1_NoAttr() local_unnamed_addr {
+define nonnull ptr @AddrTest1_NoAttr() {
; SMALLCM64-LABEL: AddrTest1_NoAttr:
; SMALLCM64: # %bb.0: # %entry
; SMALLCM64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @a_noattr
@@ -63,23 +58,18 @@ entry:
ret ptr %arrayidx
}
-define nonnull ptr @AddrTest2() local_unnamed_addr {
-; SMALLCM64-LABEL: AddrTest2:
-; SMALLCM64: # %bb.0: # %entry
-; SMALLCM64-NEXT: addi r3, r13, b[TL]@le+4
-; SMALLCM64-NEXT: blr
-;
-; LARGECM64-LABEL: AddrTest2:
-; LARGECM64: # %bb.0: # %entry
-; LARGECM64-NEXT: addi r3, r13, b[TL]@le+4
-; LARGECM64-NEXT: blr
+define nonnull ptr @AddrTest2() {
+; COMMONCM-LABEL: AddrTest2:
+; COMMONCM: # %bb.0: # %entry
+; COMMONCM-NEXT: addi r3, r13, b[TL]@le+4
+; COMMONCM-NEXT: blr
entry:
%tls0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @b)
%arrayidx = getelementptr inbounds [87 x i16], ptr %tls0, i64 0, i64 2
ret ptr %arrayidx
}
-define nonnull ptr @AddrTest2_NoAttr() local_unnamed_addr {
+define nonnull ptr @AddrTest2_NoAttr() {
; SMALLCM64-LABEL: AddrTest2_NoAttr:
; SMALLCM64: # %bb.0: # %entry
; SMALLCM64-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tprel) @b_noattr
@@ -100,23 +90,18 @@ entry:
ret ptr %arrayidx
}
-define nonnull ptr @AddrTest3() local_unnamed_addr {
-; SMALLCM64-LABEL: AddrTest3:
-; SMALLCM64: # %bb.0: # %entry
-; SMALLCM64-NEXT: addi r3, r13, c[TL]@le+12
-; SMALLCM64-NEXT: blr
-;
-; LARGECM64-LABEL: AddrTest3:
-; LARGECM64: # %bb.0: # %entry
-; LARGECM64-NEXT: addi r3, r13, c[TL]@le+12
-; LARGECM64-NEXT: blr
+define nonnull ptr @AddrTest3() {
+; COMMONCM-LABEL: AddrTest3:
+; COMMONCM: # %bb.0: # %entry
+; COMMONCM-NEXT: addi r3, r13, c[TL]@le+12
+; COMMONCM-NEXT: blr
entry:
%tls0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @c)
%arrayidx = getelementptr inbounds [87 x i32], ptr %tls0, i64 0, i64 3
ret ptr %arrayidx
}
-define nonnull ptr @AddrTest3_NoAttr() local_unnamed_addr {
+define nonnull ptr @AddrTest3_NoAttr() {
; SMALLCM64-LABEL: AddrTest3_NoAttr:
; SMALLCM64: # %bb.0: # %entry
; SMALLCM64-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @c_noattr
@@ -137,23 +122,18 @@ entry:
ret ptr %arrayidx
}
-define nonnull ptr @AddrTest4() local_unnamed_addr {
-; SMALLCM64-LABEL: AddrTest4:
-; SMALLCM64: # %bb.0: # %entry
-; SMALLCM64-NEXT: addi r3, r13, c[TL]@le+56
-; SMALLCM64-NEXT: blr
-;
-; LARGECM64-LABEL: AddrTest4:
-; LARGECM64: # %bb.0: # %entry
-; LARGECM64-NEXT: addi r3, r13, c[TL]@le+56
-; LARGECM64-NEXT: blr
+define nonnull ptr @AddrTest4() {
+; COMMONCM-LABEL: AddrTest4:
+; COMMONCM: # %bb.0: # %entry
+; COMMONCM-NEXT: addi r3, r13, c[TL]@le+56
+; COMMONCM-NEXT: blr
entry:
%tls0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @c)
%arrayidx = getelementptr inbounds [87 x i64], ptr %tls0, i64 0, i64 7
ret ptr %arrayidx
}
-define nonnull ptr @AddrTest4_NoAttr() local_unnamed_addr {
+define nonnull ptr @AddrTest4_NoAttr() {
; SMALLCM64-LABEL: AddrTest4_NoAttr:
; SMALLCM64: # %bb.0: # %entry
; SMALLCM64-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tprel) @c_noattr
@@ -174,23 +154,18 @@ entry:
ret ptr %arrayidx
}
-define nonnull ptr @AddrTest5() local_unnamed_addr {
-; SMALLCM64-LABEL: AddrTest5:
-; SMALLCM64: # %bb.0: # %entry
-; SMALLCM64-NEXT: addi r3, r13, e[TL]@le+48
-; SMALLCM64-NEXT: blr
-;
-; LARGECM64-LABEL: AddrTest5:
-; LARGECM64: # %bb.0: # %entry
-; LARGECM64-NEXT: addi r3, r13, e[TL]@le+48
-; LARGECM64-NEXT: blr
+define nonnull ptr @AddrTest5() {
+; COMMONCM-LABEL: AddrTest5:
+; COMMONCM: # %bb.0: # %entry
+; COMMONCM-NEXT: addi r3, r13, e[TL]@le+48
+; COMMONCM-NEXT: blr
entry:
%tls0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @e)
%arrayidx = getelementptr inbounds [87 x double], ptr %tls0, i64 0, i64 6
ret ptr %arrayidx
}
-define nonnull ptr @AddrTest5_NoAttr() local_unnamed_addr {
+define nonnull ptr @AddrTest5_NoAttr() {
; SMALLCM64-LABEL: AddrTest5_NoAttr:
; SMALLCM64: # %bb.0: # %entry
; SMALLCM64-NEXT: ld r3, L..C3(r2) # target-flags(ppc-tprel) @e_noattr
@@ -211,23 +186,18 @@ entry:
ret ptr %arrayidx
}
-define nonnull ptr @AddrTest6() local_unnamed_addr {
-; SMALLCM64-LABEL: AddrTest6:
-; SMALLCM64: # %bb.0: # %entry
-; SMALLCM64-NEXT: addi r3, r13, f[TL]@le+16
-; SMALLCM64-NEXT: blr
-;
-; LARGECM64-LABEL: AddrTest6:
-; LARGECM64: # %bb.0: # %entry
-; LARGECM64-NEXT: addi r3, r13, f[TL]@le+16
-; LARGECM64-NEXT: blr
+define nonnull ptr @AddrTest6() {
+; COMMONCM-LABEL: AddrTest6:
+; COMMONCM: # %bb.0: # %entry
+; COMMONCM-NEXT: addi r3, r13, f[TL]@le+16
+; COMMONCM-NEXT: blr
entry:
%tls0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @f)
%arrayidx = getelementptr inbounds [87 x float], ptr %tls0, i64 0, i64 4
ret ptr %arrayidx
}
-define nonnull ptr @AddrTest6_NoAttr() local_unnamed_addr {
+define nonnull ptr @AddrTest6_NoAttr() {
; SMALLCM64-LABEL: AddrTest6_NoAttr:
; SMALLCM64: # %bb.0: # %entry
; SMALLCM64-NEXT: ld r3, L..C4(r2) # target-flags(ppc-tprel) @f_noattr
diff --git a/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-targetattr.ll b/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-targetattr.ll
index 70ae7f974d4be5..1e4a3b9bcc47c0 100644
--- a/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-targetattr.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-targetattr.ll
@@ -1,11 +1,10 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
; RUN: -mtriple powerpc64-ibm-aix-xcoff -mattr=+aix-small-local-exec-tls < %s \
-; RUN: | FileCheck %s --check-prefix=SMALL-LOCAL-EXEC-SMALLCM64
+; RUN: | FileCheck %s --check-prefixes=COMMONCM,SMALL-LOCAL-EXEC-SMALLCM64
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large \
; RUN: -mattr=+aix-small-local-exec-tls < %s | FileCheck %s \
-; RUN: --check-prefix=SMALL-LOCAL-EXEC-LARGECM64
+; RUN: --check-prefixes=COMMONCM,SMALL-LOCAL-EXEC-LARGECM64
@mySmallTLS = thread_local(localexec) global [7800 x i64] zeroinitializer, align 8 #0
@mySmallTLS2 = thread_local(localexec) global [3000 x i64] zeroinitializer, align 8 #0
@@ -16,36 +15,24 @@ declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull)
; aix-small-local-exec-tls target attribute is turned on, all accesses will use
; a "faster" local-exec sequence directly off the thread pointer.
define i64 @StoreLargeAccess1() {
-; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: StoreLargeAccess1:
-; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @mySmallTLS
+; COMMONCM-LABEL: StoreLargeAccess1:
+; COMMONCM-NEXT: # %bb.0: # %entry
+; SMALL-LOCAL-EXEC-SMALLCM64: ld r3, L..C0(r2) # target-flags(ppc-tprel) @mySmallTLS
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r4, 0
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r5, 23
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ori r4, r4, 53328
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: add r3, r13, r3
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stdx r5, r3, r4
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r3, 55
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r4, 64
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: std r3, (mySmallTLS2[TL]@le+696)-65536(r13)
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r3, 142
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: std r4, (mySmallTLS3[TL]@le+20000)-131072(r13)
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr
-;
-; SMALL-LOCAL-EXEC-LARGECM64-LABEL: StoreLargeAccess1:
-; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-EXEC-LARGECM64: addis r3, L..C0 at u(r2)
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r4, 0
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r5, 23
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ori r4, r4, 53328
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: add r3, r13, r3
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stdx r5, r3, r4
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r3, 55
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r4, 64
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: std r3, (mySmallTLS2[TL]@le+696)-65536(r13)
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r3, 142
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: std r4, (mySmallTLS3[TL]@le+20000)-131072(r13)
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr
+; COMMONCM: ori r4, r4, 53328
+; COMMONCM-NEXT: add r3, r13, r3
+; COMMONCM-NEXT: stdx r5, r3, r4
+; COMMONCM-NEXT: li r3, 55
+; COMMONCM-NEXT: li r4, 64
+; COMMONCM-NEXT: std r3, (mySmallTLS2[TL]@le+696)-65536(r13)
+; COMMONCM-NEXT: li r3, 142
+; COMMONCM-NEXT: std r4, (mySmallTLS3[TL]@le+20000)-131072(r13)
+; COMMONCM-NEXT: blr
entry:
%tls0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallTLS)
%arrayidx = getelementptr inbounds i8, ptr %tls0, i32 53328
>From bff96975cca1f017dbf0d43e7514181fc11a13ed Mon Sep 17 00:00:00 2001
From: Amy Kwan <amy.kwan1 at ibm.com>
Date: Tue, 26 Mar 2024 15:22:55 -0500
Subject: [PATCH 7/7] Set -aix-small-local-exec-tls on
llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-loadaddr.ll
---
.../PowerPC/aix-small-tls-globalvarattr-loadaddr.ll | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-loadaddr.ll b/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-loadaddr.ll
index 237c2d2e473cf2..c8537fba6a3cff 100644
--- a/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-loadaddr.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-loadaddr.ll
@@ -1,10 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
-; RUN: -mtriple powerpc64-ibm-aix-xcoff < %s \
-; RUN: | FileCheck %s --check-prefixes=COMMONCM,SMALLCM64
+; RUN: -mtriple powerpc64-ibm-aix-xcoff -mattr=-aix-small-local-exec-tls \
+; RUN: < %s | FileCheck %s --check-prefixes=COMMONCM,SMALLCM64
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large \
-; RUN: < %s | FileCheck %s --check-prefixes=COMMONCM,LARGECM64
+; RUN: -mattr=-aix-small-local-exec-tls < %s | \
+; RUN: FileCheck %s --check-prefixes=COMMONCM,LARGECM64
; Test that the 'aix-small-tls' global variable attribute generates the
; optimized small-local-exec TLS sequence. Global variables without this
More information about the llvm-commits
mailing list