[llvm] [AIX][TLS] Optimize the small local-exec access sequence for non-zero offsets (PR #71485)
Amy Kwan via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 30 21:49:29 PST 2023
https://github.com/amy-kwan updated https://github.com/llvm/llvm-project/pull/71485
>From eac1c19a52b089c1f4175403b4c9bbfba0e839a1 Mon Sep 17 00:00:00 2001
From: Amy Kwan <amy.kwan1 at ibm.com>
Date: Mon, 6 Nov 2023 22:19:58 -0600
Subject: [PATCH 1/7] [AIX][TLS] Optimize the -maix-small-local-exec-tls
local-exec access sequence for non-zero offsets
This patch utilizes the -maix-small-local-exec-tls option to produce a faster,
non-TOC-based access sequence for the local-exec TLS model, specifically for
when the offsets from the TLS variable are non-zero.
In particular, this patch produces either a single:
- addi/la with a displacement off of R13 plus a non-zero offset for
when an address is calculated, or
- load or store off of R13 plus a non-zero offset for when an address is
calculated and used for further access
Where R13 is the thread pointer, respectively.
In order to produce a single addi or load/store off of the thread pointer with
a non-zero offset, this patch also adds the necessary support in the assembly
printer when printing these instructions.
Specifically:
- The non-zero offset is added to the TLS variable address when the address of
the TLS variable + it's offset is less than 32KB.
- Otherwise, when the address of the TLS variable + its offset is greater than
32KB, the non-zero offset (multiplied by a multiple of 64KB) is subtracted
from the TLS address.
This handling in the assembly printer is necessary to ensure that the TLS
address + the non-zero offset is between [-32768, 32768), so that the total
displacement can fit within the addi/load/store instructions.
---
llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 168 +++++++++++++-
llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 69 +++++-
.../PowerPC/aix-small-local-exec-tls-char.ll | 6 +-
.../aix-small-local-exec-tls-double.ll | 6 +-
.../PowerPC/aix-small-local-exec-tls-float.ll | 6 +-
.../PowerPC/aix-small-local-exec-tls-int.ll | 6 +-
.../aix-small-local-exec-tls-largeaccess.ll | 211 ++++++++----------
.../aix-small-local-exec-tls-largeaccess2.ll | 160 +++++++++++++
.../PowerPC/aix-small-local-exec-tls-short.ll | 6 +-
9 files changed, 495 insertions(+), 143 deletions(-)
create mode 100644 llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-largeaccess2.ll
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 4f15ba497d84c45..d2e30adad1f9034 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -66,6 +66,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Process.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/Threading.h"
@@ -155,6 +156,11 @@ class PPCAsmPrinter : public AsmPrinter {
TOC;
const PPCSubtarget *Subtarget = nullptr;
+ // Keep track of the number of TLS variables and their corresponding
+ // addresses, which is then used for the assembly printing of
+ // non-TOC-based local-exec variables.
+ MapVector<const GlobalValue *, uint64_t> TLSVarsToAddressMapping;
+
public:
explicit PPCAsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer)
@@ -199,6 +205,8 @@ class PPCAsmPrinter : public AsmPrinter {
void LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI);
void EmitTlsCall(const MachineInstr *MI, MCSymbolRefExpr::VariantKind VK);
void EmitAIXTlsCallHelper(const MachineInstr *MI);
+ const MCExpr *getAdjustedLocalExecExpr(const MachineOperand &MO,
+ int64_t Offset);
bool runOnMachineFunction(MachineFunction &MF) override {
Subtarget = &MF.getSubtarget<PPCSubtarget>();
bool Changed = AsmPrinter::runOnMachineFunction(MF);
@@ -1514,13 +1522,41 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
case PPC::LWA: {
// Verify alignment is legal, so we don't create relocations
// that can't be supported.
- unsigned OpNum = (MI->getOpcode() == PPC::STD) ? 2 : 1;
+ unsigned OpNum;
+ if (Subtarget->hasAIXSmallLocalExecTLS())
+ OpNum = 1;
+ else
+ OpNum = (MI->getOpcode() == PPC::STD) ? 2 : 1;
const MachineOperand &MO = MI->getOperand(OpNum);
if (MO.isGlobal()) {
const DataLayout &DL = MO.getGlobal()->getParent()->getDataLayout();
if (MO.getGlobal()->getPointerAlignment(DL) < 4)
llvm_unreachable("Global must be word-aligned for LD, STD, LWA!");
+
+ // A faster non-TOC-based local-exec sequence is represented by
+ // `lwa`/`ld`/`std` directingly loading or storing off of the thread
+ // pointer and with an immediate operand having the MO_TPREL_FLAG.
+ // Such instructions do not otherwise arise.
+ if ((MO.getTargetFlags() & PPCII::MO_TPREL_FLAG) != 0) {
+ assert(Subtarget->hasAIXSmallLocalExecTLS() &&
+ "lwa/ld/std with thread-pointer only expected with "
+ "local-exec small TLS");
+ int64_t Offset = MO.getOffset();
+ // Non-zero offsets for lwa/ld/std require special handling and are
+ // handled here.
+ if (!Offset)
+ break;
+
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
+ if (Offset) {
+ const MCExpr *Expr = getAdjustedLocalExecExpr(MO, Offset);
+ TmpInst.getOperand(1) = MCOperand::createExpr(Expr);
+ }
+ EmitToStreamer(*OutStreamer, TmpInst);
+ return;
+ }
}
+
// Now process the instruction normally.
break;
}
@@ -1534,17 +1570,55 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::EnforceIEIO));
return;
}
+ case PPC::LBZ:
+ case PPC::LBZ8:
+ case PPC::LHA:
+ case PPC::LHA8:
+ case PPC::LHZ:
+ case PPC::LHZ8:
+ case PPC::LWZ:
+ case PPC::LWZ8:
+ case PPC::STB:
+ case PPC::STB8:
+ case PPC::STH:
+ case PPC::STH8:
+ case PPC::STW:
+ case PPC::STW8:
+ case PPC::LFS:
+ case PPC::STFS:
+ case PPC::LFD:
+ case PPC::STFD:
case PPC::ADDI8: {
- // The faster non-TOC-based local-exec sequence is represented by `addi`
- // with an immediate operand having the MO_TPREL_FLAG. Such an instruction
- // does not otherwise arise.
- const MachineOperand &MO = MI->getOperand(2);
+ // A faster non-TOC-based local-exec sequence is represented by `addi`
+ // or a load/store instruction (that directly loads or stores off of the
+ // thread pointer) with an immediate operand having the MO_TPREL_FLAG.
+ // Such instructions do not otherwise arise.
+ bool IsMIADDI8 = MI->getOpcode() == PPC::ADDI8;
+ unsigned OpNum = IsMIADDI8 ? 2 : 1;
+ const MachineOperand &MO = MI->getOperand(OpNum);
if ((MO.getTargetFlags() & PPCII::MO_TPREL_FLAG) != 0) {
assert(
Subtarget->hasAIXSmallLocalExecTLS() &&
- "addi with thread-pointer only expected with local-exec small TLS");
+ "addi, or load/stores with thread-pointer only expected with "
+ "local-exec small TLS");
+
+ int64_t Offset = MO.getOffset();
+ // Non-zero offsets for loads/stores require special handling and are
+ // handled here. For `addi`, all offsets are handled here.
+ if (!Offset && !IsMIADDI8)
+ break;
+
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
- TmpInst.setOpcode(PPC::LA8);
+
+ if (Offset) {
+ const MCExpr *Expr = getAdjustedLocalExecExpr(MO, Offset);
+ TmpInst.getOperand(OpNum) = MCOperand::createExpr(Expr);
+ }
+
+ // Change the opcode to load address if the original opcode is an `addi`.
+ if (IsMIADDI8)
+ TmpInst.setOpcode(PPC::LA8);
+
EmitToStreamer(*OutStreamer, TmpInst);
return;
}
@@ -1556,6 +1630,69 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
EmitToStreamer(*OutStreamer, TmpInst);
}
+// For non-TOC-based local-exec variables that have a non-zero offset,
+// we need to create a new MCExpr that adds the non-zero offset to the address
+// of the local-exec variable that will be used in either an addi, load or
+// store. However, the final displacement for these instructions must be
+// between [-32768, 32768), so if the TLS address + it's non-zero offset is
+// greater than 32KB, a new MCExpr is produced to accommodate this situation.
+const MCExpr *PPCAsmPrinter::getAdjustedLocalExecExpr(const MachineOperand &MO,
+ int64_t Offset) {
+ assert(MO.isGlobal() && "Only expecting a global MachineOperand here!");
+ const GlobalValue *GValue = MO.getGlobal();
+ TLSModel::Model Model = TM.getTLSModel(GValue);
+ assert(Model == TLSModel::LocalExec &&
+ "Only local-exec accesses are handled!");
+ MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_PPC_AIX_TLSLE;
+
+ const MCExpr *Expr = MCSymbolRefExpr::create(getSymbol(GValue), RefKind,
+ OutContext);
+
+ bool IsGlobalADeclaration = GValue->isDeclarationForLinker();
+ // Find the GlobalVariable that corresponds to the particular TLS variable
+ // in the TLS variable to address mapping. All TLS variables should exist
+ // within this map, with the exception of TLS variables marked as extern.
+ const auto TLSVarsMapEntryIter = TLSVarsToAddressMapping.find(GValue);
+ if (TLSVarsMapEntryIter == TLSVarsToAddressMapping.end())
+ assert(IsGlobalADeclaration &&
+ "Only expecting to find extern TLS variables not present in the TLS "
+ "variables to address map!");
+
+ unsigned TLSVarAddress = TLSVarsMapEntryIter->second;
+ ptrdiff_t FinalAddress = (TLSVarAddress + Offset);
+ // If the address of the TLS variable + the offset is less than 32KB,
+ // or if the TLS variable is extern, we simply produce an MCExpr to add the
+ // non-zero offset to the TLS variable address.
+ // For when TLS variables are extern, this is safe to do because we can
+ // assume that the address of extern TLS variables are zero.
+ if ((FinalAddress < 32768) || IsGlobalADeclaration)
+ Expr = MCBinaryExpr::createAdd(Expr,
+ MCConstantExpr::create(Offset, OutContext),
+ OutContext);
+ else {
+ // Handle the written offset for cases where:
+ // address of the TLS variable + the offset is greater than 32KB.
+
+ // Get the address in the range of 0 to 64KB.
+ FinalAddress = FinalAddress & 0xFFFF;
+ // If the highest bit in the calculated address is set, subtract
+ // additional 64KB to ensure that the final address fits within
+ // [-32768,32768).
+ if (FinalAddress & 0x8000)
+ FinalAddress = FinalAddress - 0x10000;
+ assert((FinalAddress < 32768) || (FinalAddress >= -32768) &&
+ "Expecting the final address for local-exec TLS variables to be "
+ "between [-32768,32768)!");
+ // Get the offset that is actually written out in assembly by adding back
+ // the original address of the TLS variable.
+ ptrdiff_t WrittenOffset = FinalAddress - TLSVarAddress;
+ Expr = MCBinaryExpr::createAdd(
+ Expr, MCConstantExpr::create(WrittenOffset, OutContext), OutContext);
+ }
+
+ return Expr;
+}
+
void PPCLinuxAsmPrinter::emitGNUAttributes(Module &M) {
// Emit float ABI into GNU attribute
Metadata *MD = M.getModuleFlag("float-abi");
@@ -2758,6 +2895,23 @@ bool PPCAIXAsmPrinter::doInitialization(Module &M) {
Csect->ensureMinAlignment(GOAlign);
};
+ // For all TLS variables, calculate their corresponding addresses and store
+ // them into TLSVarsToAddressMapping, which will be used to determine whether
+ // or not local-exec TLS variables require special assembly printing.
+ // This address calculation follows the same method seen within
+ // assignAddressesAndIndices() in XCOFFObjectWriter.cpp.
+ uint64_t Address = 0;
+ uint64_t TLSVarAddress = 0;
+ auto DL = M.getDataLayout();
+ for (const auto &G : M.globals()) {
+ if (G.isThreadLocal() && !G.isDeclaration()) {
+ TLSVarAddress = alignTo(Address, getGVAlignment(&G, DL));
+ unsigned GVSize = DL.getTypeAllocSize(G.getValueType());
+ Address = TLSVarAddress + GVSize;
+ TLSVarsToAddressMapping[&G] = TLSVarAddress;
+ }
+ }
+
// We need to know, up front, the alignment of csects for the assembly path,
// because once a .csect directive gets emitted, we could not change the
// alignment value on it.
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index b57d185bb638b8c..1c44320177e61d9 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -7567,8 +7567,64 @@ static void reduceVSXSwap(SDNode *N, SelectionDAG *DAG) {
DAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), N->getOperand(0));
}
+// For non-TOC-based local-exec access where an addi is feeding into another
+// addi, fold this sequence into a single addi if possible.
+static void foldADDIForLocalExecAccesses(SDNode *N, SelectionDAG *DAG) {
+ const PPCSubtarget &Subtarget =
+ DAG->getMachineFunction().getSubtarget<PPCSubtarget>();
+ // This optimization is only performed for non-TOC-based local-exec accesses.
+ if (!Subtarget.hasAIXSmallLocalExecTLS())
+ return;
+
+ if (N->getMachineOpcode() != PPC::ADDI8)
+ return;
+
+ // InitialADDI is the addi feeding into N (also an addi), and the addi that
+ // we want optimized out.
+ SDValue InitialADDI = N->getOperand(0);
+ if (!InitialADDI.isMachineOpcode())
+ return;
+ if (InitialADDI.getMachineOpcode() != PPC::ADDI8)
+ return;
+
+ // The first operand of the InitialADDI will be the thread pointer.
+ // This transformation is only performed if the first operand of the
+ // addi is the thread pointer.
+ SDValue TPRegNode = InitialADDI.getOperand(0);
+ RegisterSDNode *TPReg =
+ dyn_cast_or_null<RegisterSDNode>(TPRegNode.getNode());
+ if (!TPReg)
+ return;
+ if (TPReg->getReg() != Subtarget.getThreadPointerRegister())
+ return;
+
+ // The second operand of the InitialADDI will be a TargetGlobalTLSAddress,
+ // (the local-exec TLS variable). We only perform the folding if the TLS
+ // variable is the second operand.
+ SDValue TLSVarNode = InitialADDI.getOperand(1);
+ GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(TLSVarNode);
+ if (!GA)
+ return;
+
+ unsigned TargetFlags = GA->getTargetFlags();
+ if ((TargetFlags & PPCII::MO_TPREL_FLAG) == 0)
+ return;
+ // The second operand of the addi that we want to preserve will be an
+ // immediate. We add this immediate together with the address of the TLS
+ // variable found in InitialADDI in order to preserve the correct TLS address
+ // information during assembly printing.
+ int Offset = N->getConstantOperandVal(1);
+ TLSVarNode = DAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA), MVT::i64,
+ Offset, TargetFlags);
+
+ (void)DAG->UpdateNodeOperands(N, TPRegNode, TLSVarNode);
+ if (InitialADDI.getNode()->use_empty())
+ DAG->RemoveDeadNode(InitialADDI.getNode());
+}
+
void PPCDAGToDAGISel::PeepholePPC64() {
SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
+ bool HasAIXSmallLocalExecTLS = Subtarget->hasAIXSmallLocalExecTLS();
while (Position != CurDAG->allnodes_begin()) {
SDNode *N = &*--Position;
@@ -7579,6 +7635,8 @@ void PPCDAGToDAGISel::PeepholePPC64() {
if (isVSXSwap(SDValue(N, 0)))
reduceVSXSwap(N, CurDAG);
+ foldADDIForLocalExecAccesses(N, CurDAG);
+
unsigned FirstOp;
unsigned StorageOpcode = N->getMachineOpcode();
bool RequiresMod4Offset = false;
@@ -7735,7 +7793,16 @@ void PPCDAGToDAGISel::PeepholePPC64() {
ImmOpnd = CurDAG->getTargetConstant(Offset, SDLoc(ImmOpnd),
ImmOpnd.getValueType());
} else if (Offset != 0) {
- continue;
+ if (!HasAIXSmallLocalExecTLS)
+ continue;
+ // Add the non-zero offset information into the load or store
+ // instruction to be used for non-TOC-based local-exec accesses.
+ GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd);
+ if (!GA)
+ continue;
+ ImmOpnd = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA),
+ MVT::i64, Offset,
+ GA->getTargetFlags());
}
}
diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-char.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-char.ll
index 6c05fb38ee16d83..c938b9485c25732 100644
--- a/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-char.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-char.ll
@@ -16,14 +16,12 @@ declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
define nonnull ptr @AddrTest1() local_unnamed_addr #0 {
; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: AddrTest1:
; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r3, c[TL]@le(r13)
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: addi r3, r3, 1
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r3, c[TL]@le+1(r13)
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr
;
; SMALL-LOCAL-EXEC-LARGECM64-LABEL: AddrTest1:
; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r3, c[TL]@le(r13)
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addi r3, r3, 1
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r3, c[TL]@le+1(r13)
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr
entry:
%0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @c)
diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-double.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-double.ll
index 5cf359f68f8bd11..02d794fec75cc98 100644
--- a/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-double.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-double.ll
@@ -16,14 +16,12 @@ declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
define nonnull ptr @AddrTest1() local_unnamed_addr #0 {
; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: AddrTest1:
; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r3, f[TL]@le(r13)
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: addi r3, r3, 48
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r3, f[TL]@le+48(r13)
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr
;
; SMALL-LOCAL-EXEC-LARGECM64-LABEL: AddrTest1:
; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r3, f[TL]@le(r13)
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addi r3, r3, 48
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r3, f[TL]@le+48(r13)
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr
entry:
%0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @f)
diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-float.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-float.ll
index 1fc014edaf2bb5f..a1f6f4f974bd818 100644
--- a/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-float.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-float.ll
@@ -16,14 +16,12 @@ declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
define nonnull ptr @AddrTest1() local_unnamed_addr #0 {
; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: AddrTest1:
; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r3, e[TL]@le(r13)
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: addi r3, r3, 16
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r3, e[TL]@le+16(r13)
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr
;
; SMALL-LOCAL-EXEC-LARGECM64-LABEL: AddrTest1:
; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r3, e[TL]@le(r13)
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addi r3, r3, 16
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r3, e[TL]@le+16(r13)
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr
entry:
%0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @e)
diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-int.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-int.ll
index 40adf27d7ee39b3..c74abe93c18bf30 100644
--- a/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-int.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-int.ll
@@ -18,14 +18,12 @@ declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
define nonnull ptr @AddrTest1() local_unnamed_addr #0 {
; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: AddrTest1:
; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r3, a[TL]@le(r13)
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: addi r3, r3, 12
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r3, a[TL]@le+12(r13)
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr
;
; SMALL-LOCAL-EXEC-LARGECM64-LABEL: AddrTest1:
; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r3, a[TL]@le(r13)
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addi r3, r3, 12
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r3, a[TL]@le+12(r13)
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr
entry:
%0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @a)
diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-largeaccess.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-largeaccess.ll
index 55c69839515c439..3aa3ecc9f2b0d10 100644
--- a/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-largeaccess.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-largeaccess.ll
@@ -25,43 +25,33 @@ declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
define signext i32 @StoreArrays1() {
; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: StoreArrays1:
; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r3, mySmallLocalExecTLSv1[TL]@le(r13)
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r4, 1
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r5, 4
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r4, mySmallLocalExecTLSv1[TL]@le(r13)
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r4, mySmallLocalExecTLS2[TL]@le(r13)
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r5, 24(r3)
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r3, 1
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r4, 4
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r3, mySmallLocalExecTLSv1[TL]@le(r13)
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r3, 2
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r3, 320(r4)
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r3, mySmallLocalExecTLS3[TL]@le(r13)
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r4, 3
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r4, 324(r3)
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r3, mySmallLocalExecTLS4[TL]@le(r13)
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r4, 88
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r5, 328(r3)
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r3, mySmallLocalExecTLS5[TL]@le(r13)
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r4, 332(r3)
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r4, mySmallLocalExecTLSv1[TL]@le+24(r13)
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r3, mySmallLocalExecTLS2[TL]@le-65216(r13)
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r3, 3
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r3, mySmallLocalExecTLS3[TL]@le-65212(r13)
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r3, 88
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r4, mySmallLocalExecTLS4[TL]@le-65208(r13)
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r3, mySmallLocalExecTLS5[TL]@le-65204(r13)
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r3, 102
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr
;
; SMALL-LOCAL-EXEC-LARGECM64-LABEL: StoreArrays1:
; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r3, mySmallLocalExecTLSv1[TL]@le(r13)
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r4, 1
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r5, 4
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r4, mySmallLocalExecTLSv1[TL]@le(r13)
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r4, mySmallLocalExecTLS2[TL]@le(r13)
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r5, 24(r3)
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r3, 1
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r4, 4
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r3, mySmallLocalExecTLSv1[TL]@le(r13)
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r3, 2
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r3, 320(r4)
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r3, mySmallLocalExecTLS3[TL]@le(r13)
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r4, 3
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r4, 324(r3)
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r3, mySmallLocalExecTLS4[TL]@le(r13)
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r4, 88
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r5, 328(r3)
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r3, mySmallLocalExecTLS5[TL]@le(r13)
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r4, 332(r3)
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r4, mySmallLocalExecTLSv1[TL]@le+24(r13)
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r3, mySmallLocalExecTLS2[TL]@le-65216(r13)
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r3, 3
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r3, mySmallLocalExecTLS3[TL]@le-65212(r13)
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r3, 88
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r4, mySmallLocalExecTLS4[TL]@le-65208(r13)
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r3, mySmallLocalExecTLS5[TL]@le-65204(r13)
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r3, 102
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr
entry:
@@ -98,46 +88,38 @@ entry:
define signext i32 @StoreArrays2() {
; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: StoreArrays2:
; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tprel) @mySmallLocalExecTLSv2
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r4, 1
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r5, 4
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: add r3, r13, r3
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r4, 0(r3)
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r4, mySmallLocalExecTLS2[TL]@le(r13)
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r5, 24(r3)
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r3, 2
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r3, 320(r4)
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r3, mySmallLocalExecTLS3[TL]@le(r13)
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r4, L..C0(r2) # target-flags(ppc-tprel) @mySmallLocalExecTLSv2
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r3, 1
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: add r4, r13, r4
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r3, 0(r4)
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r3, 4
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r3, 24(r4)
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r4, 2
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r4, mySmallLocalExecTLS2[TL]@le-65216(r13)
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r4, 3
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r4, 324(r3)
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r3, mySmallLocalExecTLS4[TL]@le(r13)
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r4, mySmallLocalExecTLS5[TL]@le(r13)
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r5, 328(r3)
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r3, 88
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r3, 332(r4)
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r4, mySmallLocalExecTLS3[TL]@le-65212(r13)
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r4, 88
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r3, mySmallLocalExecTLS4[TL]@le-65208(r13)
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r3, 102
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r4, mySmallLocalExecTLS5[TL]@le-65204(r13)
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr
;
; SMALL-LOCAL-EXEC-LARGECM64-LABEL: StoreArrays2:
; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addis r4, L..C0 at u(r2)
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r3, 1
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r5, 4
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r4, L..C0 at l(r4)
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: add r4, r13, r4
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r3, 0(r4)
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r3, mySmallLocalExecTLS2[TL]@le(r13)
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r5, 24(r4)
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r4, 2
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r4, 320(r3)
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r3, mySmallLocalExecTLS3[TL]@le(r13)
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r4, 3
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r4, 324(r3)
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r3, mySmallLocalExecTLS4[TL]@le(r13)
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r4, mySmallLocalExecTLS5[TL]@le(r13)
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r5, 328(r3)
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addis r3, L..C0 at u(r2)
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r4, 1
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r3, L..C0 at l(r3)
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: add r3, r13, r3
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r4, 0(r3)
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r4, 4
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r4, 24(r3)
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r3, 2
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r3, mySmallLocalExecTLS2[TL]@le-65216(r13)
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r3, 3
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r3, mySmallLocalExecTLS3[TL]@le-65212(r13)
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r3, 88
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r3, 332(r4)
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r4, mySmallLocalExecTLS4[TL]@le-65208(r13)
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r3, mySmallLocalExecTLS5[TL]@le-65204(r13)
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r3, 102
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr
entry:
@@ -173,77 +155,76 @@ entry:
; DIS: {{.*}}aix-small-local-exec-tls-largeaccess.ll.tmp.o: file format aix5coff64-rs6000
; DIS: Disassembly of section .text:
; DIS: 0000000000000000 (idx: 3) .StoreArrays1:
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addi 3, 13, 0
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 3, 1
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 4, 4
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 3, 0(13)
; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TLS_LE (idx: 15) mySmallLocalExecTLSv1[TL]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 4, 1
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 5, 4
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 4, 0(13)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 3, 2
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 4, 24(13)
; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TLS_LE (idx: 15) mySmallLocalExecTLSv1[TL]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addi 4, 13, 32748
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 3, -32468(13)
; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TLS_LE (idx: 17) mySmallLocalExecTLS2[TL]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 5, 24(3)
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 3, 2
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 3, 320(4)
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addi 3, 13, -16788
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 3, 3
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 3, -16464(13)
; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TLS_LE (idx: 19) mySmallLocalExecTLS3[TL]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 4, 3
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 4, 324(3)
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addi 3, 13, -788
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 3, 88
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 4, -460(13)
; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TLS_LE (idx: 21) mySmallLocalExecTLS4[TL]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 4, 88
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 5, 328(3)
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addi 3, 13, 15212
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 3, 15544(13)
; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TLS_LE (idx: 23) mySmallLocalExecTLS5[TL]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 4, 332(3)
; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 3, 102
; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} blr
-; DIS: 0000000000000050 (idx: 5) .StoreArrays2:
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addis 4, 2, 0
+; DIS: 0000000000000040 (idx: 5) .StoreArrays2:
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addis 3, 2, 0
; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 13) mySmallLocalExecTLSv2[TE]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 3, 1
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 5, 4
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} ld 4, 0(4)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 4, 1
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} ld 3, 0(3)
; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 13) mySmallLocalExecTLSv2[TE]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 4, 13, 4
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 3, 0(4)
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addi 3, 13, 32748
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 3, 13, 3
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 4, 0(3)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 4, 4
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 4, 24(3)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 3, 2
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 3, -32468(13)
; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TLS_LE (idx: 17) mySmallLocalExecTLS2[TL]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 5, 24(4)
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 4, 2
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 4, 320(3)
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addi 3, 13, -16788
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 3, 3
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 3, -16464(13)
; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TLS_LE (idx: 19) mySmallLocalExecTLS3[TL]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 4, 3
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 4, 324(3)
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addi 3, 13, -788
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 3, 88
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 4, -460(13)
; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TLS_LE (idx: 21) mySmallLocalExecTLS4[TL]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addi 4, 13, 15212
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 3, 15544(13)
; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TLS_LE (idx: 23) mySmallLocalExecTLS5[TL]
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 5, 328(3)
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 3, 88
-; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stw 3, 332(4)
; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 3, 102
; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} blr
; DIS: Disassembly of section .data:
-; DIS: 00000000000000a0 (idx: 7) StoreArrays1[DS]:
+; DIS: 0000000000000080 (idx: 7) StoreArrays1[DS]:
+; DIS-NEXT: 80: 00 00 00 00
+; DIS-NEXT: 0000000000000080: R_POS (idx: 3) .StoreArrays1
+; DIS-NEXT: 84: 00 00 00 00
+; DIS-NEXT: 88: 00 00 00 00
+; DIS-NEXT: 0000000000000088: R_POS (idx: 11) TOC[TC0]
+; DIS-NEXT: 8c: 00 00 00 b0
+
+; DIS: 0000000000000098 (idx: 9) StoreArrays2[DS]:
+; DIS-NEXT: 98: 00 00 00 00
+; DIS-NEXT: 0000000000000098: R_POS (idx: 5) .StoreArrays2
+; DIS-NEXT: 9c: 00 00 00 40
; DIS-NEXT: a0: 00 00 00 00
-; DIS-NEXT: 00000000000000a0: R_POS (idx: 3) .StoreArrays1
-; DIS-NEXT: a4: 00 00 00 00
-; DIS-NEXT: a8: 00 00 00 00
-; DIS-NEXT: 00000000000000a8: R_POS (idx: 11) TOC[TC0]
-; DIS-NEXT: ac: 00 00 00 d0
+; DIS-NEXT: 00000000000000a0: R_POS (idx: 11) TOC[TC0]
+; DIS-NEXT: a4: 00 00 00 b0
-; DIS: 00000000000000b8 (idx: 9) StoreArrays2[DS]:
-; DIS-NEXT: b8: 00 00 00 00
-; DIS-NEXT: 00000000000000b8: R_POS (idx: 5) .StoreArrays2
-; DIS-NEXT: bc: 00 00 00 50
-; DIS-NEXT: c0: 00 00 00 00
-; DIS-NEXT: 00000000000000c0: R_POS (idx: 11) TOC[TC0]
-; DIS-NEXT: c4: 00 00 00 d0
+; DIS: 00000000000000b0 (idx: 13) mySmallLocalExecTLSv2[TE]:
+; DIS-NEXT: b0: 00 00 00 00
+; DIS-NEXT: 00000000000000b0: R_TLS_LE (idx: 25) mySmallLocalExecTLSv2[TL]
+; DIS-NEXT: b4: 00 01 79 ec
-; DIS: 00000000000000d0 (idx: 13) mySmallLocalExecTLSv2[TE]:
-; DIS-NEXT: d0: 00 00 00 00
-; DIS-NEXT: 00000000000000d0: R_TLS_LE (idx: 25) mySmallLocalExecTLSv2[TL]
-; DIS-NEXT: d4: 00 01 79 ec
+; DIS: Disassembly of section .tdata:
+; DIS: 0000000000000000 (idx: 15) mySmallLocalExecTLSv1[TL]:
+; DIS: 0000000000007fec (idx: 17) mySmallLocalExecTLS2[TL]:
+; DIS: 000000000000be6c (idx: 19) mySmallLocalExecTLS3[TL]:
+; DIS: 000000000000fcec (idx: 21) mySmallLocalExecTLS4[TL]:
+; DIS: 0000000000013b6c (idx: 23) mySmallLocalExecTLS5[TL]:
+; DIS: 00000000000179ec (idx: 25) mySmallLocalExecTLSv2[TL]:
diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-largeaccess2.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-largeaccess2.ll
new file mode 100644
index 000000000000000..c87b7acb6211c61
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-largeaccess2.ll
@@ -0,0 +1,160 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN: -mtriple powerpc64-ibm-aix-xcoff -mattr=+aix-small-local-exec-tls < %s \
+; RUN: | FileCheck %s --check-prefix=SMALL-LOCAL-EXEC-SMALLCM64
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large \
+; RUN: -mattr=+aix-small-local-exec-tls < %s | FileCheck %s \
+; RUN: --check-prefix=SMALL-LOCAL-EXEC-LARGECM64
+
+; Test disassembly of object.
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+aix-small-local-exec-tls \
+; RUN: -mtriple powerpc64-ibm-aix-xcoff -xcoff-traceback-table=false \
+; RUN: --code-model=large -filetype=obj -o %t.o < %s
+; RUN: llvm-objdump -D -r --symbol-description %t.o | FileCheck --check-prefix=DIS %s
+
+ at mySmallLocalExecTLS6 = external thread_local(localexec) global [60 x i64], align 8
+ at mySmallLocalExecTLS2 = thread_local(localexec) global [3000 x i64] zeroinitializer, align 8
+ at MyTLSGDVar = thread_local global [800 x i64] zeroinitializer, align 8
+ at mySmallLocalExecTLS3 = thread_local(localexec) global [3000 x i64] zeroinitializer, align 8
+ at mySmallLocalExecTLS4 = thread_local(localexec) global [3000 x i64] zeroinitializer, align 8
+ at mySmallLocalExecTLS5 = thread_local(localexec) global [3000 x i64] zeroinitializer, align 8
+ at mySmallLocalExecTLS = thread_local(localexec) local_unnamed_addr global [7800 x i64] zeroinitializer, align 8
+declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
+
+; All accesses use a "faster" local-exec sequence directly off the thread pointer.
+define i64 @StoreLargeAccess1() {
+; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: StoreLargeAccess1:
+; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: mflr r0
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stdu r1, -48(r1)
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r3, 212
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r4, 203
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: std r0, 64(r1)
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: std r3, mySmallLocalExecTLS6[UL]@le+424(r13)
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: std r4, mySmallLocalExecTLS2[TL]@le+1200(r13)
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-lo) @MyTLSGDVar
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r4, L..C1(r2) # target-flags(ppc-tlsgd) @MyTLSGDVar
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: bla .__tls_get_addr[PR]
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r4, 44
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: std r4, 440(r3)
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r3, 6
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r4, 100
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: std r3, mySmallLocalExecTLS3[TL]@le+2000(r13)
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r3, 882
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: std r4, mySmallLocalExecTLS4[TL]@le-58736(r13)
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: std r3, mySmallLocalExecTLS5[TL]@le-57136(r13)
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r3, 1191
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: addi r1, r1, 48
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r0, 16(r1)
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: mtlr r0
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr
+;
+; SMALL-LOCAL-EXEC-LARGECM64-LABEL: StoreLargeAccess1:
+; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: mflr r0
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stdu r1, -48(r1)
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r3, 212
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: std r0, 64(r1)
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addis r4, L..C0 at u(r2)
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r4, L..C0 at l(r4)
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: std r3, mySmallLocalExecTLS6[UL]@le+424(r13)
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r3, 203
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: std r3, mySmallLocalExecTLS2[TL]@le+1200(r13)
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addis r3, L..C1 at u(r2)
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r3, L..C1 at l(r3)
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: bla .__tls_get_addr[PR]
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r4, 44
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: std r4, 440(r3)
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r3, 6
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r4, 100
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: std r3, mySmallLocalExecTLS3[TL]@le+2000(r13)
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r3, 882
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: std r4, mySmallLocalExecTLS4[TL]@le-58736(r13)
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: std r3, mySmallLocalExecTLS5[TL]@le-57136(r13)
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r3, 1191
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addi r1, r1, 48
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r0, 16(r1)
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: mtlr r0
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr
+entry:
+ %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS6)
+ %arrayidx = getelementptr inbounds [60 x i64], ptr %0, i64 0, i64 53
+ store i64 212, ptr %arrayidx, align 8
+ %1 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS2)
+ %arrayidx1 = getelementptr inbounds [3000 x i64], ptr %1, i64 0, i64 150
+ store i64 203, ptr %arrayidx1, align 8
+ %2 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @MyTLSGDVar)
+ %arrayidx2 = getelementptr inbounds [800 x i64], ptr %2, i64 0, i64 55
+ store i64 44, ptr %arrayidx2, align 8
+ %3 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS3)
+ %arrayidx3 = getelementptr inbounds [3000 x i64], ptr %3, i64 0, i64 250
+ store i64 6, ptr %arrayidx3, align 8
+ %4 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS4)
+ %arrayidx4 = getelementptr inbounds [3000 x i64], ptr %4, i64 0, i64 850
+ store i64 100, ptr %arrayidx4, align 8
+ %5 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS5)
+ %arrayidx5 = getelementptr inbounds [3000 x i64], ptr %5, i64 0, i64 1050
+ store i64 882, ptr %arrayidx5, align 8
+ %6 = load i64, ptr %arrayidx1, align 8
+ %7 = load i64, ptr %arrayidx3, align 8
+ %8 = load i64, ptr %arrayidx4, align 8
+ %add = add i64 %6, 882
+ %add9 = add i64 %add, %7
+ %add11 = add i64 %add9, %8
+ ret i64 %add11
+}
+
+; DIS: 0000000000000000 (idx: 7) .StoreLargeAccess1:
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} mflr 0
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} stdu 1, -48(1)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 3, 212
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} std 0, 64(1)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addis 4, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 13) MyTLSGDVar[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} ld 4, 0(4)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 13) MyTLSGDVar[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} std 3, 424(13)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TLS_LE (idx: 1) mySmallLocalExecTLS6[UL]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 3, 203
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} std 3, 1200(13)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TLS_LE (idx: 17) mySmallLocalExecTLS2[TL]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addis 3, 2, 0
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 15) .MyTLSGDVar[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} ld 3, 8(3)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: 15) .MyTLSGDVar[TE]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} bla 0
+; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA (idx: 3) .__tls_get_addr[PR]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 4, 44
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} std 4, 440(3)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 3, 6
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 4, 100
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} std 3, 32400(13)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TLS_LE (idx: 21) mySmallLocalExecTLS3[TL]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 3, 882
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} std 4, -4336(13)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TLS_LE (idx: 23) mySmallLocalExecTLS4[TL]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} std 3, 21264(13)
+; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TLS_LE (idx: 25) mySmallLocalExecTLS5[TL]
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} li 3, 1191
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addi 1, 1, 48
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} ld 0, 16(1)
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} mtlr 0
+; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} blr
+
+; DIS: Disassembly of section .data:
+; DIS: 0000000000000068 (idx: 9) StoreLargeAccess1[DS]:
+; DIS-NEXT: 68: 00 00 00 00
+; DIS-NEXT: 0000000000000068: R_POS (idx: 7) .StoreLargeAccess1
+; DIS-NEXT: 6c: 00 00 00 00
+; DIS-NEXT: 70: 00 00 00 00
+; DIS-NEXT: 0000000000000070: R_POS (idx: 11) TOC[TC0]
+; DIS-NEXT: 74: 00 00 00 80
+
+; DIS: Disassembly of section .tdata:
+; DIS: 0000000000000000 (idx: 17) mySmallLocalExecTLS2[TL]:
+; DIS: 0000000000005dc0 (idx: 19) MyTLSGDVar[TL]:
+; DIS: 00000000000076c0 (idx: 21) mySmallLocalExecTLS3[TL]:
+; DIS: 000000000000d480 (idx: 23) mySmallLocalExecTLS4[TL]:
+; DIS: 0000000000013240 (idx: 25) mySmallLocalExecTLS5[TL]:
+; DIS: 0000000000019000 (idx: 27) mySmallLocalExecTLS[TL]:
diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-short.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-short.ll
index bf1b7fab308149c..b172c2985e69534 100644
--- a/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-short.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-short.ll
@@ -16,14 +16,12 @@ declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
define nonnull ptr @AddrTest1() local_unnamed_addr #0 {
; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: AddrTest1:
; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r3, b[TL]@le(r13)
-; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: addi r3, r3, 4
+; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r3, b[TL]@le+4(r13)
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr
;
; SMALL-LOCAL-EXEC-LARGECM64-LABEL: AddrTest1:
; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r3, b[TL]@le(r13)
-; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addi r3, r3, 4
+; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r3, b[TL]@le+4(r13)
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr
entry:
%0 = tail call align 2 ptr @llvm.threadlocal.address.p0(ptr align 2 @b)
>From 397887ca9fe449f648e9a7cfb2936eba0322515a Mon Sep 17 00:00:00 2001
From: Amy Kwan <amy.kwan1 at ibm.com>
Date: Mon, 6 Nov 2023 22:36:01 -0600
Subject: [PATCH 2/7] Apply formatting changes
---
llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 25 ++++++++++-----------
llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 8 +++----
2 files changed, 15 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index d2e30adad1f9034..02a2d4d884080ce 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -68,8 +68,8 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Process.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/Threading.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -1597,10 +1597,9 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
unsigned OpNum = IsMIADDI8 ? 2 : 1;
const MachineOperand &MO = MI->getOperand(OpNum);
if ((MO.getTargetFlags() & PPCII::MO_TPREL_FLAG) != 0) {
- assert(
- Subtarget->hasAIXSmallLocalExecTLS() &&
- "addi, or load/stores with thread-pointer only expected with "
- "local-exec small TLS");
+ assert(Subtarget->hasAIXSmallLocalExecTLS() &&
+ "addi, or load/stores with thread-pointer only expected with "
+ "local-exec small TLS");
int64_t Offset = MO.getOffset();
// Non-zero offsets for loads/stores require special handling and are
@@ -1645,8 +1644,8 @@ const MCExpr *PPCAsmPrinter::getAdjustedLocalExecExpr(const MachineOperand &MO,
"Only local-exec accesses are handled!");
MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_PPC_AIX_TLSLE;
- const MCExpr *Expr = MCSymbolRefExpr::create(getSymbol(GValue), RefKind,
- OutContext);
+ const MCExpr *Expr =
+ MCSymbolRefExpr::create(getSymbol(GValue), RefKind, OutContext);
bool IsGlobalADeclaration = GValue->isDeclarationForLinker();
// Find the GlobalVariable that corresponds to the particular TLS variable
@@ -1666,9 +1665,8 @@ const MCExpr *PPCAsmPrinter::getAdjustedLocalExecExpr(const MachineOperand &MO,
// For when TLS variables are extern, this is safe to do because we can
// assume that the address of extern TLS variables are zero.
if ((FinalAddress < 32768) || IsGlobalADeclaration)
- Expr = MCBinaryExpr::createAdd(Expr,
- MCConstantExpr::create(Offset, OutContext),
- OutContext);
+ Expr = MCBinaryExpr::createAdd(
+ Expr, MCConstantExpr::create(Offset, OutContext), OutContext);
else {
// Handle the written offset for cases where:
// address of the TLS variable + the offset is greater than 32KB.
@@ -1680,9 +1678,10 @@ const MCExpr *PPCAsmPrinter::getAdjustedLocalExecExpr(const MachineOperand &MO,
// [-32768,32768).
if (FinalAddress & 0x8000)
FinalAddress = FinalAddress - 0x10000;
- assert((FinalAddress < 32768) || (FinalAddress >= -32768) &&
- "Expecting the final address for local-exec TLS variables to be "
- "between [-32768,32768)!");
+ assert((FinalAddress < 32768) ||
+ (FinalAddress >= -32768) &&
+ "Expecting the final address for local-exec TLS variables to be "
+ "between [-32768,32768)!");
// Get the offset that is actually written out in assembly by adding back
// the original address of the TLS variable.
ptrdiff_t WrittenOffset = FinalAddress - TLSVarAddress;
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 1c44320177e61d9..32e0015e0d5a291 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -7591,8 +7591,7 @@ static void foldADDIForLocalExecAccesses(SDNode *N, SelectionDAG *DAG) {
// This transformation is only performed if the first operand of the
// addi is the thread pointer.
SDValue TPRegNode = InitialADDI.getOperand(0);
- RegisterSDNode *TPReg =
- dyn_cast_or_null<RegisterSDNode>(TPRegNode.getNode());
+ RegisterSDNode *TPReg = dyn_cast_or_null<RegisterSDNode>(TPRegNode.getNode());
if (!TPReg)
return;
if (TPReg->getReg() != Subtarget.getThreadPointerRegister())
@@ -7800,9 +7799,8 @@ void PPCDAGToDAGISel::PeepholePPC64() {
GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd);
if (!GA)
continue;
- ImmOpnd = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA),
- MVT::i64, Offset,
- GA->getTargetFlags());
+ ImmOpnd = CurDAG->getTargetGlobalAddress(
+ GA->getGlobal(), SDLoc(GA), MVT::i64, Offset, GA->getTargetFlags());
}
}
>From 54b586550fe510e6651bfd11198d0fdd4b8c4c22 Mon Sep 17 00:00:00 2001
From: Amy Kwan <amy.kwan1 at ibm.com>
Date: Wed, 15 Nov 2023 23:27:01 -0600
Subject: [PATCH 3/7] Address review comments: fix comments, simplify offset
generation, remove unnecessary breaks
---
llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 81 +++++++++++------------
1 file changed, 37 insertions(+), 44 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 02a2d4d884080ce..c80b4315b653ebc 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -775,6 +775,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
MCInst TmpInst;
const bool IsPPC64 = Subtarget->isPPC64();
const bool IsAIX = Subtarget->isAIXABI();
+ const bool HasAIXSmallLocalExecTLS = Subtarget->hasAIXSmallLocalExecTLS();
const Module *M = MF->getFunction().getParent();
PICLevel::Level PL = M->getPICLevel();
@@ -1522,11 +1523,15 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
case PPC::LWA: {
// Verify alignment is legal, so we don't create relocations
// that can't be supported.
- unsigned OpNum;
- if (Subtarget->hasAIXSmallLocalExecTLS())
- OpNum = 1;
- else
- OpNum = (MI->getOpcode() == PPC::STD) ? 2 : 1;
+ unsigned OpNum = (MI->getOpcode() == PPC::STD) ? 2 : 1;
+ // For non-TOC-based local-exec TLS accesses with non-zero offsets, the
+ // machine operand (which is a TargetGlobalTLSAddress) is expected to be
+ // the same operand for both loads and stores.
+ for (const MachineOperand &TempMO : MI->operands()) {
+ if (((TempMO.getTargetFlags() & PPCII::MO_TPREL_FLAG) != 0) &&
+ TempMO.getOperandNo() == 1)
+ OpNum = 1;
+ }
const MachineOperand &MO = MI->getOperand(OpNum);
if (MO.isGlobal()) {
const DataLayout &DL = MO.getGlobal()->getParent()->getDataLayout();
@@ -1538,20 +1543,14 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
// pointer and with an immediate operand having the MO_TPREL_FLAG.
// Such instructions do not otherwise arise.
if ((MO.getTargetFlags() & PPCII::MO_TPREL_FLAG) != 0) {
- assert(Subtarget->hasAIXSmallLocalExecTLS() &&
+ assert(HasAIXSmallLocalExecTLS &&
"lwa/ld/std with thread-pointer only expected with "
"local-exec small TLS");
int64_t Offset = MO.getOffset();
- // Non-zero offsets for lwa/ld/std require special handling and are
- // handled here.
- if (!Offset)
- break;
-
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
- if (Offset) {
- const MCExpr *Expr = getAdjustedLocalExecExpr(MO, Offset);
- TmpInst.getOperand(1) = MCOperand::createExpr(Expr);
- }
+ const MCExpr *Expr = getAdjustedLocalExecExpr(MO, Offset);
+ if (Expr)
+ TmpInst.getOperand(OpNum) = MCOperand::createExpr(Expr);
EmitToStreamer(*OutStreamer, TmpInst);
return;
}
@@ -1597,22 +1596,15 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
unsigned OpNum = IsMIADDI8 ? 2 : 1;
const MachineOperand &MO = MI->getOperand(OpNum);
if ((MO.getTargetFlags() & PPCII::MO_TPREL_FLAG) != 0) {
- assert(Subtarget->hasAIXSmallLocalExecTLS() &&
+ assert(HasAIXSmallLocalExecTLS &&
"addi, or load/stores with thread-pointer only expected with "
"local-exec small TLS");
-
int64_t Offset = MO.getOffset();
- // Non-zero offsets for loads/stores require special handling and are
- // handled here. For `addi`, all offsets are handled here.
- if (!Offset && !IsMIADDI8)
- break;
-
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
- if (Offset) {
- const MCExpr *Expr = getAdjustedLocalExecExpr(MO, Offset);
+ const MCExpr *Expr = getAdjustedLocalExecExpr(MO, Offset);
+ if (Expr)
TmpInst.getOperand(OpNum) = MCOperand::createExpr(Expr);
- }
// Change the opcode to load address if the original opcode is an `addi`.
if (IsMIADDI8)
@@ -1633,7 +1625,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
// we need to create a new MCExpr that adds the non-zero offset to the address
// of the local-exec variable that will be used in either an addi, load or
// store. However, the final displacement for these instructions must be
-// between [-32768, 32768), so if the TLS address + it's non-zero offset is
+// between [-32768, 32768), so if the TLS address + its non-zero offset is
// greater than 32KB, a new MCExpr is produced to accommodate this situation.
const MCExpr *PPCAsmPrinter::getAdjustedLocalExecExpr(const MachineOperand &MO,
int64_t Offset) {
@@ -1644,6 +1636,10 @@ const MCExpr *PPCAsmPrinter::getAdjustedLocalExecExpr(const MachineOperand &MO,
"Only local-exec accesses are handled!");
MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_PPC_AIX_TLSLE;
+ // Non-zero offsets (for loads, stores or `addi`) require additional handling.
+ // When the offset is zero, there is no need to create an adjusted MCExpr.
+ if (!Offset)
+ return nullptr;
const MCExpr *Expr =
MCSymbolRefExpr::create(getSymbol(GValue), RefKind, OutContext);
@@ -1657,7 +1653,8 @@ const MCExpr *PPCAsmPrinter::getAdjustedLocalExecExpr(const MachineOperand &MO,
"Only expecting to find extern TLS variables not present in the TLS "
"variables to address map!");
- unsigned TLSVarAddress = TLSVarsMapEntryIter->second;
+ unsigned TLSVarAddress =
+ IsGlobalADeclaration ? 0 : TLSVarsMapEntryIter->second;
ptrdiff_t FinalAddress = (TLSVarAddress + Offset);
// If the address of the TLS variable + the offset is less than 32KB,
// or if the TLS variable is extern, we simply produce an MCExpr to add the
@@ -1669,24 +1666,20 @@ const MCExpr *PPCAsmPrinter::getAdjustedLocalExecExpr(const MachineOperand &MO,
Expr, MCConstantExpr::create(Offset, OutContext), OutContext);
else {
// Handle the written offset for cases where:
- // address of the TLS variable + the offset is greater than 32KB.
-
- // Get the address in the range of 0 to 64KB.
- FinalAddress = FinalAddress & 0xFFFF;
- // If the highest bit in the calculated address is set, subtract
- // additional 64KB to ensure that the final address fits within
- // [-32768,32768).
- if (FinalAddress & 0x8000)
- FinalAddress = FinalAddress - 0x10000;
- assert((FinalAddress < 32768) ||
- (FinalAddress >= -32768) &&
- "Expecting the final address for local-exec TLS variables to be "
- "between [-32768,32768)!");
- // Get the offset that is actually written out in assembly by adding back
- // the original address of the TLS variable.
- ptrdiff_t WrittenOffset = FinalAddress - TLSVarAddress;
+ // TLS variable address + Offset > 32KB.
+
+ // The assembly that is printed is actually:
+ // TLSVar[storageMappingClass]@le + Offset - Delta
+ // where Delta is a multiple of 64KB: ((FinalAddress + 32768) & ~0xFFFF).
+ ptrdiff_t OffsetDelta = Offset - ((FinalAddress + 32768) & ~0xFFFF);
+ // Check that the total instruction displacement fits within [-32768,32768).
+ ptrdiff_t InstDisp = TLSVarAddress + OffsetDelta;
+ assert((InstDisp < 32768) ||
+ (InstDisp >= -32768) &&
+ "Expecting the instruction displacement for local-exec TLS "
+ "variables to be between [-32768, 32768)!");
Expr = MCBinaryExpr::createAdd(
- Expr, MCConstantExpr::create(WrittenOffset, OutContext), OutContext);
+ Expr, MCConstantExpr::create(OffsetDelta, OutContext), OutContext);
}
return Expr;
>From aefc68b3a1394fa0d5e7dc7cbac8c62bcb148d1b Mon Sep 17 00:00:00 2001
From: Amy Kwan <amy.kwan1 at ibm.com>
Date: Wed, 22 Nov 2023 09:30:45 -0600
Subject: [PATCH 4/7] Update comments and combine conditions
---
llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 2 +-
llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 20 ++++++++++----------
2 files changed, 11 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index c80b4315b653ebc..0cfb42efe158006 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -1669,7 +1669,7 @@ const MCExpr *PPCAsmPrinter::getAdjustedLocalExecExpr(const MachineOperand &MO,
// TLS variable address + Offset > 32KB.
// The assembly that is printed is actually:
- // TLSVar[storageMappingClass]@le + Offset - Delta
+ // TLSVar at le + Offset - Delta
// where Delta is a multiple of 64KB: ((FinalAddress + 32768) & ~0xFFFF).
ptrdiff_t OffsetDelta = Offset - ((FinalAddress + 32768) & ~0xFFFF);
// Check that the total instruction displacement fits within [-32768,32768).
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 32e0015e0d5a291..d7c8c4d679c6ac4 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -7582,22 +7582,19 @@ static void foldADDIForLocalExecAccesses(SDNode *N, SelectionDAG *DAG) {
// InitialADDI is the addi feeding into N (also an addi), and the addi that
// we want optimized out.
SDValue InitialADDI = N->getOperand(0);
- if (!InitialADDI.isMachineOpcode())
- return;
- if (InitialADDI.getMachineOpcode() != PPC::ADDI8)
+ if (!InitialADDI.isMachineOpcode() ||
+ (InitialADDI.getMachineOpcode() != PPC::ADDI8))
return;
- // The first operand of the InitialADDI will be the thread pointer.
+ // The first operand of the InitialADDI should be the thread pointer.
// This transformation is only performed if the first operand of the
// addi is the thread pointer.
SDValue TPRegNode = InitialADDI.getOperand(0);
RegisterSDNode *TPReg = dyn_cast_or_null<RegisterSDNode>(TPRegNode.getNode());
- if (!TPReg)
- return;
- if (TPReg->getReg() != Subtarget.getThreadPointerRegister())
+ if (!TPReg || (TPReg->getReg() != Subtarget.getThreadPointerRegister()))
return;
- // The second operand of the InitialADDI will be a TargetGlobalTLSAddress,
+ // The second operand of the InitialADDI should be the global TLS address
// (the local-exec TLS variable). We only perform the folding if the TLS
// variable is the second operand.
SDValue TLSVarNode = InitialADDI.getOperand(1);
@@ -7605,12 +7602,15 @@ static void foldADDIForLocalExecAccesses(SDNode *N, SelectionDAG *DAG) {
if (!GA)
return;
+ // The local-exec TLS variable should only have the MO_TPREL_FLAG target flag,
+ // so this optimization is not performed otherwise if the flag is not set.
unsigned TargetFlags = GA->getTargetFlags();
if ((TargetFlags & PPCII::MO_TPREL_FLAG) == 0)
return;
+
// The second operand of the addi that we want to preserve will be an
- // immediate. We add this immediate together with the address of the TLS
- // variable found in InitialADDI in order to preserve the correct TLS address
+ // immediate. We add this immediate, together with the address of the TLS
+ // variable found in InitialADDI, in order to preserve the correct TLS address
// information during assembly printing.
int Offset = N->getConstantOperandVal(1);
TLSVarNode = DAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA), MVT::i64,
>From db2d93d1b16e197c4037442eaea5c23722917f4d Mon Sep 17 00:00:00 2001
From: Amy Kwan <amy.kwan1 at ibm.com>
Date: Wed, 22 Nov 2023 12:33:12 -0600
Subject: [PATCH 5/7] Remove unnecessary whitespace
---
llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 1 -
1 file changed, 1 deletion(-)
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 0cfb42efe158006..543594638d8298a 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -1555,7 +1555,6 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
return;
}
}
-
// Now process the instruction normally.
break;
}
>From da681a78d2a5e86509f6d29a1b64c809fe480413 Mon Sep 17 00:00:00 2001
From: Amy Kwan <amy.kwan1 at ibm.com>
Date: Wed, 29 Nov 2023 11:20:12 -0600
Subject: [PATCH 6/7] Address review comments by pulling out checks for if addi
is eligible for folding
---
llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 6 +-
llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 84 ++++++++++++++-------
2 files changed, 58 insertions(+), 32 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 543594638d8298a..35e17f0ad3bbb3f 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -1539,12 +1539,12 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
llvm_unreachable("Global must be word-aligned for LD, STD, LWA!");
// A faster non-TOC-based local-exec sequence is represented by
- // `lwa`/`ld`/`std` directingly loading or storing off of the thread
- // pointer and with an immediate operand having the MO_TPREL_FLAG.
+ // directingly loading or storing off of the thread pointer and with
+ // an immediate operand having the MO_TPREL_FLAG.
// Such instructions do not otherwise arise.
if ((MO.getTargetFlags() & PPCII::MO_TPREL_FLAG) != 0) {
assert(HasAIXSmallLocalExecTLS &&
- "lwa/ld/std with thread-pointer only expected with "
+ "loads/stores with thread-pointer only expected with "
"local-exec small TLS");
int64_t Offset = MO.getOffset();
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index d7c8c4d679c6ac4..e0204a8f6ecf6a7 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -7567,51 +7567,76 @@ static void reduceVSXSwap(SDNode *N, SelectionDAG *DAG) {
DAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), N->getOperand(0));
}
-// For non-TOC-based local-exec access where an addi is feeding into another
-// addi, fold this sequence into a single addi if possible.
-static void foldADDIForLocalExecAccesses(SDNode *N, SelectionDAG *DAG) {
+// Is an ADDI eligible for folding for non-TOC-based local-exec accesses?
+static bool isEligibleToFoldADDIForLocalExecAccesses(SDNode *N,
+ SelectionDAG *DAG,
+ SDValue ADDIToFold) {
const PPCSubtarget &Subtarget =
DAG->getMachineFunction().getSubtarget<PPCSubtarget>();
// This optimization is only performed for non-TOC-based local-exec accesses.
if (!Subtarget.hasAIXSmallLocalExecTLS())
- return;
-
- if (N->getMachineOpcode() != PPC::ADDI8)
- return;
+ return false;
- // InitialADDI is the addi feeding into N (also an addi), and the addi that
- // we want optimized out.
- SDValue InitialADDI = N->getOperand(0);
- if (!InitialADDI.isMachineOpcode() ||
- (InitialADDI.getMachineOpcode() != PPC::ADDI8))
- return;
+ // Check if ADDIToFold (the ADDI that we want to fold into local-exec
+ // accesses), is truly an ADDI.
+ if (!ADDIToFold.isMachineOpcode() ||
+ (ADDIToFold.getMachineOpcode() != PPC::ADDI8))
+ return false;
- // The first operand of the InitialADDI should be the thread pointer.
+ // The first operand of the ADDIToFold should be the thread pointer.
// This transformation is only performed if the first operand of the
// addi is the thread pointer.
- SDValue TPRegNode = InitialADDI.getOperand(0);
+ SDValue TPRegNode = ADDIToFold.getOperand(0);
RegisterSDNode *TPReg = dyn_cast_or_null<RegisterSDNode>(TPRegNode.getNode());
if (!TPReg || (TPReg->getReg() != Subtarget.getThreadPointerRegister()))
- return;
+ return false;
- // The second operand of the InitialADDI should be the global TLS address
+ // The second operand of the ADDIToFold should be the global TLS address
// (the local-exec TLS variable). We only perform the folding if the TLS
// variable is the second operand.
- SDValue TLSVarNode = InitialADDI.getOperand(1);
+ SDValue TLSVarNode = ADDIToFold.getOperand(1);
GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(TLSVarNode);
if (!GA)
- return;
+ return false;
// The local-exec TLS variable should only have the MO_TPREL_FLAG target flag,
// so this optimization is not performed otherwise if the flag is not set.
unsigned TargetFlags = GA->getTargetFlags();
if ((TargetFlags & PPCII::MO_TPREL_FLAG) == 0)
+ return false;
+
+ // If all conditions are satisfied, the ADDI is valid for folding.
+ return true;
+}
+
+// For non-TOC-based local-exec access where an addi is feeding into another
+// addi, fold this sequence into a single addi if possible.
+static void foldADDIForLocalExecAccesses(SDNode *N, SelectionDAG *DAG) {
+ if (N->getMachineOpcode() != PPC::ADDI8)
+ return;
+
+ // InitialADDI is the addi feeding into N (also an addi), and the addi that
+ // we want optimized out.
+ SDValue InitialADDI = N->getOperand(0);
+
+ if (!isEligibleToFoldADDIForLocalExecAccesses(N, DAG, InitialADDI))
return;
+ // At this point, InitialADDI can be folded into a non-TOC-based local-exec
+ // access. The first operand of InitialADDI should be the thread pointer.
+ SDValue TPRegNode = InitialADDI.getOperand(0);
+
+ // The second operand of the InitialADDI should be the global TLS address
+ // (the local-exec TLS variable), with the MO_TPREL_FLAG target flag.
+ SDValue TLSVarNode = InitialADDI.getOperand(1);
+ GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(TLSVarNode);
+ unsigned TargetFlags = GA->getTargetFlags();
+
// The second operand of the addi that we want to preserve will be an
// immediate. We add this immediate, together with the address of the TLS
// variable found in InitialADDI, in order to preserve the correct TLS address
- // information during assembly printing.
+ // information during assembly printing. The offset is likely to be non-zero
+ // when we end up in this case.
int Offset = N->getConstantOperandVal(1);
TLSVarNode = DAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA), MVT::i64,
Offset, TargetFlags);
@@ -7623,7 +7648,6 @@ static void foldADDIForLocalExecAccesses(SDNode *N, SelectionDAG *DAG) {
void PPCDAGToDAGISel::PeepholePPC64() {
SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
- bool HasAIXSmallLocalExecTLS = Subtarget->hasAIXSmallLocalExecTLS();
while (Position != CurDAG->allnodes_begin()) {
SDNode *N = &*--Position;
@@ -7792,15 +7816,17 @@ void PPCDAGToDAGISel::PeepholePPC64() {
ImmOpnd = CurDAG->getTargetConstant(Offset, SDLoc(ImmOpnd),
ImmOpnd.getValueType());
} else if (Offset != 0) {
- if (!HasAIXSmallLocalExecTLS)
- continue;
- // Add the non-zero offset information into the load or store
- // instruction to be used for non-TOC-based local-exec accesses.
- GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd);
- if (!GA)
+ if (isEligibleToFoldADDIForLocalExecAccesses(N, CurDAG, Base)) {
+ // Add the non-zero offset information into the load or store
+ // instruction to be used for non-TOC-based local-exec accesses.
+ GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd);
+ if (!GA)
+ continue;
+ ImmOpnd = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA),
+ MVT::i64, Offset,
+ GA->getTargetFlags());
+ } else
continue;
- ImmOpnd = CurDAG->getTargetGlobalAddress(
- GA->getGlobal(), SDLoc(GA), MVT::i64, Offset, GA->getTargetFlags());
}
}
>From 789629a5ad687daf8cc11ca30cda7ee13510b1db Mon Sep 17 00:00:00 2001
From: Amy Kwan <amy.kwan1 at ibm.com>
Date: Wed, 29 Nov 2023 15:05:36 -0600
Subject: [PATCH 7/7] Remove comment and add asserts
---
llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 2 --
llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 15 ++++++++++++++-
2 files changed, 14 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 35e17f0ad3bbb3f..b7b871d971ee425 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -2889,8 +2889,6 @@ bool PPCAIXAsmPrinter::doInitialization(Module &M) {
// For all TLS variables, calculate their corresponding addresses and store
// them into TLSVarsToAddressMapping, which will be used to determine whether
// or not local-exec TLS variables require special assembly printing.
- // This address calculation follows the same method seen within
- // assignAddressesAndIndices() in XCOFFObjectWriter.cpp.
uint64_t Address = 0;
uint64_t TLSVarAddress = 0;
auto DL = M.getDataLayout();
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index e0204a8f6ecf6a7..10ab073ee14c932 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -7623,8 +7623,21 @@ static void foldADDIForLocalExecAccesses(SDNode *N, SelectionDAG *DAG) {
return;
// At this point, InitialADDI can be folded into a non-TOC-based local-exec
- // access. The first operand of InitialADDI should be the thread pointer.
+ // access. The first operand of InitialADDI should be the thread pointer,
+ // which has been checked in isEligibleToFoldADDIForLocalExecAccesses().
SDValue TPRegNode = InitialADDI.getOperand(0);
+ RegisterSDNode *TPReg = dyn_cast_or_null<RegisterSDNode>(TPRegNode.getNode());
+ assert((TPReg && (TPReg->getReg() == Subtarget.getThreadPointerRegister())) &&
+ "Expecting the first operand to be a thread pointer for folding addi "
+ "in local-exec accesses!");
+
+ // The second operand of the ADDIToFold should be the global TLS address
+ // (the local-exec TLS variable), which has been checked in
+ // isEligibleToFoldADDIForLocalExecAccesses().
+ SDValue TLSVarNode = ADDIToFold.getOperand(1);
+ GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(TLSVarNode);
+ assert(GA && "Expecting a valid GlobalAddressSDNode when folding addi into "
+ "local-exec accesses!");
// The second operand of the InitialADDI should be the global TLS address
// (the local-exec TLS variable), with the MO_TPREL_FLAG target flag.
More information about the llvm-commits
mailing list