[llvm] [PowerPC][AIX] Refactor existing logic to handle non-zero offsets for aix-small-local-dynamic-tls (PR #89182)
Felix via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 18 01:02:00 PDT 2024
https://github.com/orcguru created https://github.com/llvm/llvm-project/pull/89182
To enable optimized small local-dynamic access sequence for non-zero offsets, this patch refactors existing 2a50921553798d2db52ca6330c89f0f8a5bc2215.
>From 243671fa84da8cc1a57542fb36d1d64a2266afcd Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Thu, 18 Apr 2024 03:29:38 -0400
Subject: [PATCH 1/2] Refactor existing logic to handle non-zero offsets for
small local-dynamic TLS
---
llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 38 ++---
llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 89 ++++++-----
...aix-small-local-dynamic-tls-largeaccess.ll | 148 ++++++++----------
.../aix-small-local-dynamic-tls-types.ll | 6 +-
4 files changed, 126 insertions(+), 155 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 1c57b92057fff5..4f112c040dffdf 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -205,8 +205,8 @@ class PPCAsmPrinter : public AsmPrinter {
void LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI);
void EmitTlsCall(const MachineInstr *MI, MCSymbolRefExpr::VariantKind VK);
void EmitAIXTlsCallHelper(const MachineInstr *MI);
- const MCExpr *getAdjustedLocalExecExpr(const MachineOperand &MO,
- int64_t Offset);
+ const MCExpr *getAdjustedFasterLocalExpr(const MachineOperand &MO,
+ int64_t Offset);
bool runOnMachineFunction(MachineFunction &MF) override {
Subtarget = &MF.getSubtarget<PPCSubtarget>();
bool Changed = AsmPrinter::runOnMachineFunction(MF);
@@ -1628,7 +1628,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
Flag == PPCII::MO_TPREL_PCREL_FLAG || Flag == PPCII::MO_TLSLD_FLAG) {
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
- const MCExpr *Expr = getAdjustedLocalExecExpr(MO, MO.getOffset());
+ const MCExpr *Expr = getAdjustedFasterLocalExpr(MO, MO.getOffset());
if (Expr)
TmpInst.getOperand(OpNum) = MCOperand::createExpr(Expr);
@@ -1658,14 +1658,15 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
EmitToStreamer(*OutStreamer, TmpInst);
}
-// For non-TOC-based local-exec variables that have a non-zero offset,
+// For non-TOC-based local-[exec|dynamic] variables that have a non-zero offset,
// we need to create a new MCExpr that adds the non-zero offset to the address
-// of the local-exec variable that will be used in either an addi, load or
-// store. However, the final displacement for these instructions must be
+// of the local-[exec|dynamic] variable that will be used in either an addi,
+// load or store. However, the final displacement for these instructions must be
// between [-32768, 32768), so if the TLS address + its non-zero offset is
// greater than 32KB, a new MCExpr is produced to accommodate this situation.
-const MCExpr *PPCAsmPrinter::getAdjustedLocalExecExpr(const MachineOperand &MO,
- int64_t Offset) {
+const MCExpr *
+PPCAsmPrinter::getAdjustedFasterLocalExpr(const MachineOperand &MO,
+ int64_t Offset) {
// Non-zero offsets (for loads, stores or `addi`) require additional handling.
// When the offset is zero, there is no need to create an adjusted MCExpr.
if (!Offset)
@@ -1673,13 +1674,9 @@ const MCExpr *PPCAsmPrinter::getAdjustedLocalExecExpr(const MachineOperand &MO,
assert(MO.isGlobal() && "Only expecting a global MachineOperand here!");
const GlobalValue *GValue = MO.getGlobal();
- // TODO: Handle the aix-small-local-dynamic-tls non-zero offset case.
TLSModel::Model Model = TM.getTLSModel(GValue);
- if (Model == TLSModel::LocalDynamic) {
- return nullptr;
- }
- assert(Model == TLSModel::LocalExec &&
- "Only local-exec accesses are handled!");
+ assert((Model == TLSModel::LocalExec || Model == TLSModel::LocalDynamic) &&
+ "Only local-[exec|dynamic] accesses are handled!");
bool IsGlobalADeclaration = GValue->isDeclarationForLinker();
// Find the GlobalVariable that corresponds to the particular TLS variable
@@ -1700,7 +1697,10 @@ const MCExpr *PPCAsmPrinter::getAdjustedLocalExecExpr(const MachineOperand &MO,
// For when TLS variables are extern, this is safe to do because we can
// assume that the address of extern TLS variables are zero.
const MCExpr *Expr = MCSymbolRefExpr::create(
- getSymbol(GValue), MCSymbolRefExpr::VK_PPC_AIX_TLSLE, OutContext);
+ getSymbol(GValue),
+ Model == TLSModel::LocalExec ? MCSymbolRefExpr::VK_PPC_AIX_TLSLE
+ : MCSymbolRefExpr::VK_PPC_AIX_TLSLD,
+ OutContext);
Expr = MCBinaryExpr::createAdd(
Expr, MCConstantExpr::create(Offset, OutContext), OutContext);
if (FinalAddress >= 32768) {
@@ -1713,10 +1713,10 @@ const MCExpr *PPCAsmPrinter::getAdjustedLocalExecExpr(const MachineOperand &MO,
ptrdiff_t Delta = ((FinalAddress + 32768) & ~0xFFFF);
// Check that the total instruction displacement fits within [-32768,32768).
[[maybe_unused]] ptrdiff_t InstDisp = TLSVarAddress + Offset - Delta;
- assert(((InstDisp < 32768) &&
- (InstDisp >= -32768)) &&
- "Expecting the instruction displacement for local-exec TLS "
- "variables to be between [-32768, 32768)!");
+ assert(
+ ((InstDisp < 32768) && (InstDisp >= -32768)) &&
+ "Expecting the instruction displacement for local-[exec|dynamic] TLS "
+ "variables to be between [-32768, 32768)!");
Expr = MCBinaryExpr::createAdd(
Expr, MCConstantExpr::create(-Delta, OutContext), OutContext);
}
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index af82b6cdb1809e..bee40fc416fcb8 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -7568,29 +7568,23 @@ static bool hasAIXSmallTLSAttr(SDValue Val) {
return false;
}
-// Is an ADDI eligible for folding for non-TOC-based local-exec accesses?
-static bool isEligibleToFoldADDIForLocalExecAccesses(SelectionDAG *DAG,
- SDValue ADDIToFold) {
+// Is an ADDI eligible for folding for non-TOC-based local-[exec|dynamic]
+// accesses?
+static bool isEligibleToFoldADDIForFasterLocalAccesses(SelectionDAG *DAG,
+ SDValue ADDIToFold) {
// Check if ADDIToFold (the ADDI that we want to fold into local-exec
// accesses), is truly an ADDI.
if (!ADDIToFold.isMachineOpcode() ||
(ADDIToFold.getMachineOpcode() != PPC::ADDI8))
return false;
- // Folding is only allowed for the AIX small-local-exec TLS target attribute
- // or when the 'aix-small-tls' global variable attribute is present.
+ // Folding is only allowed for the AIX small-local-[exec|dynamic] TLS target
+ // attribute or when the 'aix-small-tls' global variable attribute is present.
const PPCSubtarget &Subtarget =
DAG->getMachineFunction().getSubtarget<PPCSubtarget>();
SDValue TLSVarNode = ADDIToFold.getOperand(1);
- if (!(Subtarget.hasAIXSmallLocalExecTLS() || hasAIXSmallTLSAttr(TLSVarNode)))
- return false;
-
- // The first operand of the ADDIToFold should be the thread pointer.
- // This transformation is only performed if the first operand of the
- // addi is the thread pointer.
- SDValue TPRegNode = ADDIToFold.getOperand(0);
- RegisterSDNode *TPReg = dyn_cast<RegisterSDNode>(TPRegNode.getNode());
- if (!TPReg || (TPReg->getReg() != Subtarget.getThreadPointerRegister()))
+ if (!(Subtarget.hasAIXSmallLocalDynamicTLS() ||
+ Subtarget.hasAIXSmallLocalExecTLS() || hasAIXSmallTLSAttr(TLSVarNode)))
return false;
// The second operand of the ADDIToFold should be the global TLS address
@@ -7600,24 +7594,36 @@ static bool isEligibleToFoldADDIForLocalExecAccesses(SelectionDAG *DAG,
if (!GA)
return false;
- // The local-exec TLS variable should only have the MO_TPREL_FLAG target flag,
- // so this optimization is not performed otherwise if the flag is not set.
+ if (DAG->getTarget().getTLSModel(GA->getGlobal()) == TLSModel::LocalExec) {
+ // The first operand of the ADDIToFold should be the thread pointer.
+ // This transformation is only performed if the first operand of the
+ // addi is the thread pointer.
+ SDValue TPRegNode = ADDIToFold.getOperand(0);
+ RegisterSDNode *TPReg = dyn_cast<RegisterSDNode>(TPRegNode.getNode());
+ if (!TPReg || (TPReg->getReg() != Subtarget.getThreadPointerRegister()))
+ return false;
+ }
+
+ // The local-[exec|dynamic] TLS variable should only have the
+ // [MO_TPREL_FLAG|MO_TLSLD_FLAG] target flags, so this optimization is not
+ // performed otherwise if the flag is not set.
unsigned TargetFlags = GA->getTargetFlags();
- if (TargetFlags != PPCII::MO_TPREL_FLAG)
+ if (!(TargetFlags == PPCII::MO_TPREL_FLAG ||
+ TargetFlags == PPCII::MO_TLSLD_FLAG))
return false;
// If all conditions are satisfied, the ADDI is valid for folding.
return true;
}
-// For non-TOC-based local-exec access where an addi is feeding into another
-// addi, fold this sequence into a single addi if possible.
-// Before this optimization, the sequence appears as:
-// addi rN, r13, sym at le
+// For non-TOC-based local-[exec|dynamic] access where an addi is feeding into
+// another addi, fold this sequence into a single addi if possible. Before this
+// optimization, the sequence appears as:
+// addi rN, r13, sym@[le|ld]
// addi rM, rN, imm
// After this optimization, we can fold the two addi into a single one:
-// addi rM, r13, sym at le + imm
-static void foldADDIForLocalExecAccesses(SDNode *N, SelectionDAG *DAG) {
+// addi rM, r13, sym@[le|ld] + imm
+static void foldADDIForFasterLocalAccesses(SDNode *N, SelectionDAG *DAG) {
if (N->getMachineOpcode() != PPC::ADDI8)
return;
@@ -7625,27 +7631,17 @@ static void foldADDIForLocalExecAccesses(SDNode *N, SelectionDAG *DAG) {
// we want optimized out.
SDValue InitialADDI = N->getOperand(0);
- if (!isEligibleToFoldADDIForLocalExecAccesses(DAG, InitialADDI))
+ if (!isEligibleToFoldADDIForFasterLocalAccesses(DAG, InitialADDI))
return;
- // At this point, InitialADDI can be folded into a non-TOC-based local-exec
- // access. The first operand of InitialADDI should be the thread pointer,
- // which has been checked in isEligibleToFoldADDIForLocalExecAccesses().
- SDValue TPRegNode = InitialADDI.getOperand(0);
- [[maybe_unused]] RegisterSDNode *TPReg = dyn_cast<RegisterSDNode>(TPRegNode.getNode());
- [[maybe_unused]] const PPCSubtarget &Subtarget =
- DAG->getMachineFunction().getSubtarget<PPCSubtarget>();
- assert((TPReg && (TPReg->getReg() == Subtarget.getThreadPointerRegister())) &&
- "Expecting the first operand to be a thread pointer for folding addi "
- "in local-exec accesses!");
-
// The second operand of the InitialADDI should be the global TLS address
- // (the local-exec TLS variable), with the MO_TPREL_FLAG target flag.
- // This has been checked in isEligibleToFoldADDIForLocalExecAccesses().
+ // (the local-[exec|dynamic] TLS variable), with the
+ // [MO_TPREL_FLAG|MO_TLSLD_FLAG] target flag. This has been checked in
+ // isEligibleToFoldADDIForFasterLocalAccesses().
SDValue TLSVarNode = InitialADDI.getOperand(1);
GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(TLSVarNode);
assert(GA && "Expecting a valid GlobalAddressSDNode when folding addi into "
- "local-exec accesses!");
+ "local-[exec|dynamic] accesses!");
unsigned TargetFlags = GA->getTargetFlags();
// The second operand of the addi that we want to preserve will be an
@@ -7657,7 +7653,7 @@ static void foldADDIForLocalExecAccesses(SDNode *N, SelectionDAG *DAG) {
TLSVarNode = DAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA), MVT::i64,
Offset, TargetFlags);
- (void)DAG->UpdateNodeOperands(N, TPRegNode, TLSVarNode);
+ (void)DAG->UpdateNodeOperands(N, InitialADDI.getOperand(0), TLSVarNode);
if (InitialADDI.getNode()->use_empty())
DAG->RemoveDeadNode(InitialADDI.getNode());
}
@@ -7674,8 +7670,9 @@ void PPCDAGToDAGISel::PeepholePPC64() {
if (isVSXSwap(SDValue(N, 0)))
reduceVSXSwap(N, CurDAG);
- // This optimization is performed for non-TOC-based local-exec accesses.
- foldADDIForLocalExecAccesses(N, CurDAG);
+ // This optimization is performed for non-TOC-based local-[exec|dynamic]
+ // accesses.
+ foldADDIForFasterLocalAccesses(N, CurDAG);
unsigned FirstOp;
unsigned StorageOpcode = N->getMachineOpcode();
@@ -7833,13 +7830,15 @@ void PPCDAGToDAGISel::PeepholePPC64() {
ImmOpnd = CurDAG->getTargetConstant(Offset, SDLoc(ImmOpnd),
ImmOpnd.getValueType());
} else if (Offset != 0) {
- // This optimization is performed for non-TOC-based local-exec accesses.
- if (isEligibleToFoldADDIForLocalExecAccesses(CurDAG, Base)) {
+ // This optimization is performed for non-TOC-based local-[exec|dynamic]
+ // accesses.
+ if (isEligibleToFoldADDIForFasterLocalAccesses(CurDAG, Base)) {
// Add the non-zero offset information into the load or store
- // instruction to be used for non-TOC-based local-exec accesses.
+ // instruction to be used for non-TOC-based local-[exec|dynamic]
+ // accesses.
GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd);
assert(GA && "Expecting a valid GlobalAddressSDNode when folding "
- "addi into local-exec accesses!");
+ "addi into local-[exec|dynamic] accesses!");
ImmOpnd = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA),
MVT::i64, Offset,
GA->getTargetFlags());
diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-largeaccess.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-largeaccess.ll
index 7db1048c258cd0..af2aa0fe33ff77 100644
--- a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-largeaccess.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-largeaccess.ll
@@ -39,23 +39,18 @@ define signext i32 @test1() {
; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: stdu r1, -48(r1)
; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: li r6, 4
; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: li r5, 1
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: la r4, ElementIntTLSv1[TL]@ld(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: stw r5, ElementIntTLSv1[TL]@ld(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: la r5, ElementIntTLS2[TL]@ld(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: stw r6, 24(r4)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: li r4, 1
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: li r5, 4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: stw r4, ElementIntTLSv1[TL]@ld(r3)
; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: li r4, 2
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: stw r4, 320(r5)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: la r4, ElementIntTLS3[TL]@ld(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: li r5, 3
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: stw r5, 324(r4)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: la r4, ElementIntTLS4[TL]@ld(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: la r3, ElementIntTLS5[TL]@ld(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: stw r6, 328(r4)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: stw r5, ElementIntTLSv1[TL]@ld+24(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: stw r4, (ElementIntTLS2[TL]@ld+320)-65536(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: li r4, 3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: stw r4, (ElementIntTLS3[TL]@ld+324)-65536(r3)
; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: li r4, 88
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: stw r4, 332(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: stw r5, (ElementIntTLS4[TL]@ld+328)-65536(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: stw r4, (ElementIntTLS5[TL]@ld+332)-65536(r3)
; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: li r3, 102
; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: addi r1, r1, 48
; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: ld r0, 16(r1)
@@ -68,24 +63,19 @@ define signext i32 @test1() {
; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: stdu r1, -48(r1)
; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: addis r3, L..C0 at u(r2)
; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: li r6, 4
; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: ld r3, L..C0 at l(r3)
; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: li r5, 1
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: la r4, ElementIntTLSv1[TL]@ld(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: stw r5, ElementIntTLSv1[TL]@ld(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: la r5, ElementIntTLS2[TL]@ld(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: stw r6, 24(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: li r4, 1
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: li r5, 4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: stw r4, ElementIntTLSv1[TL]@ld(r3)
; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: li r4, 2
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: stw r4, 320(r5)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: la r4, ElementIntTLS3[TL]@ld(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: li r5, 3
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: stw r5, 324(r4)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: la r4, ElementIntTLS4[TL]@ld(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: la r3, ElementIntTLS5[TL]@ld(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: stw r6, 328(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: stw r5, ElementIntTLSv1[TL]@ld+24(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: stw r4, (ElementIntTLS2[TL]@ld+320)-65536(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: li r4, 3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: stw r4, (ElementIntTLS3[TL]@ld+324)-65536(r3)
; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: li r4, 88
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: stw r4, 332(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: stw r5, (ElementIntTLS4[TL]@ld+328)-65536(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: stw r4, (ElementIntTLS5[TL]@ld+332)-65536(r3)
; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: li r3, 102
; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: addi r1, r1, 48
; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: ld r0, 16(r1)
@@ -132,26 +122,21 @@ define i64 @test2() {
; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: std r0, 64(r1)
; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: bla .__tls_get_mod[PR]
; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: mr r6, r3
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: la r3, ElementLongTLS6[UL]@ld(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: li r4, 212
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: std r4, 424(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: la r3, ElementLongTLS2[TL]@ld(r6)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: li r3, 212
; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: li r4, 203
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: std r4, 1200(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: std r3, ElementLongTLS6[UL]@ld+424(r6)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: std r4, (ElementLongTLS2[TL]@ld+1200)-131072(r6)
; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: ld r3, L..C1(r2) # target-flags(ppc-tlsgdm) @MyTLSGDVar
; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: ld r4, L..C2(r2) # target-flags(ppc-tlsgd) @MyTLSGDVar
; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: bla .__tls_get_addr[PR]
; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: li r4, 44
; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: std r4, 440(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: la r3, ElementLongTLS3[TL]@ld(r6)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: li r4, 6
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: std r4, 2000(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: la r3, ElementLongTLS4[TL]@ld(r6)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: li r3, 6
; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: li r4, 100
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: std r4, 6800(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: la r3, ElementLongTLS5[TL]@ld(r6)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: li r4, 882
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: std r4, 8400(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: std r3, (ElementLongTLS3[TL]@ld+2000)-196608(r6)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: li r3, 882
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: std r4, (ElementLongTLS4[TL]@ld+6800)-196608(r6)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: std r3, (ElementLongTLS5[TL]@ld+8400)-196608(r6)
; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: li r3, 1191
; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: addi r1, r1, 48
; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: ld r0, 16(r1)
@@ -166,29 +151,24 @@ define i64 @test2() {
; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: std r0, 64(r1)
; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: ld r3, L..C0 at l(r3)
; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: li r4, 212
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: addis r4, L..C1 at u(r2)
; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: mr r6, r3
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: la r3, ElementLongTLS6[UL]@ld(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: std r4, 424(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: la r3, ElementLongTLS2[TL]@ld(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: li r4, 203
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: std r4, 1200(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: addis r3, L..C1 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: addis r4, L..C2 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: ld r3, L..C1 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: ld r4, L..C2 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: li r3, 212
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: ld r4, L..C1 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: std r3, ElementLongTLS6[UL]@ld+424(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: li r3, 203
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: std r3, (ElementLongTLS2[TL]@ld+1200)-131072(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: addis r3, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: ld r3, L..C2 at l(r3)
; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: bla .__tls_get_addr[PR]
; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: li r4, 44
; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: std r4, 440(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: la r3, ElementLongTLS3[TL]@ld(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: li r4, 6
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: std r4, 2000(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: la r3, ElementLongTLS4[TL]@ld(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: li r3, 6
; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: li r4, 100
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: std r4, 6800(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: la r3, ElementLongTLS5[TL]@ld(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: li r4, 882
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: std r4, 8400(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: std r3, (ElementLongTLS3[TL]@ld+2000)-196608(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: li r3, 882
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: std r4, (ElementLongTLS4[TL]@ld+6800)-196608(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: std r3, (ElementLongTLS5[TL]@ld+8400)-196608(r6)
; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: li r3, 1191
; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: addi r1, r1, 48
; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: ld r0, 16(r1)
@@ -229,24 +209,21 @@ define signext i32 @test3() {
; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: mflr r0
; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: stdu r1, -48(r1)
; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: ld r8, L..C3(r2) # target-flags(ppc-tlsld) @ElementIntTLSv2
; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: std r0, 64(r1)
; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: ld r5, L..C3(r2) # target-flags(ppc-tlsld) @ElementIntTLSv2
; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: li r4, 1
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: la r5, ElementIntTLS2[TL]@ld(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: la r6, ElementIntTLS3[TL]@ld(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: la r7, ElementIntTLS4[TL]@ld(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: la r9, ElementIntTLS5[TL]@ld(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: stwux r4, r3, r8
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: mr r6, r3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: stwux r4, r6, r5
; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: li r4, 4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: stw r4, 24(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: li r3, 2
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: stw r3, 320(r5)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: li r3, 3
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: stw r3, 324(r6)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: li r3, 88
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: stw r4, 328(r7)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: stw r3, 332(r9)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: li r5, 2
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: stw r4, 24(r6)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: li r6, 3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: stw r5, (ElementIntTLS2[TL]@ld+320)-65536(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: li r5, 88
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: stw r6, (ElementIntTLS3[TL]@ld+324)-65536(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: stw r4, (ElementIntTLS4[TL]@ld+328)-65536(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: stw r5, (ElementIntTLS5[TL]@ld+332)-65536(r3)
; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: li r3, 102
; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: addi r1, r1, 48
; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: ld r0, 16(r1)
@@ -261,23 +238,20 @@ define signext i32 @test3() {
; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: addis r6, L..C3 at u(r2)
; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: std r0, 64(r1)
; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: ld r3, L..C0 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: ld r6, L..C3 at l(r6)
; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: ld r5, L..C3 at l(r6)
; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: li r4, 1
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: la r5, ElementIntTLS2[TL]@ld(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: la r7, ElementIntTLS3[TL]@ld(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: la r8, ElementIntTLS4[TL]@ld(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: la r9, ElementIntTLS5[TL]@ld(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: stwux r4, r3, r6
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: mr r6, r3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: stwux r4, r6, r5
; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: li r4, 4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: stw r4, 24(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: li r3, 2
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: stw r3, 320(r5)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: li r3, 3
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: stw r3, 324(r7)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: li r3, 88
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: stw r4, 328(r8)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: stw r3, 332(r9)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: li r5, 2
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: stw r4, 24(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: li r6, 3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: stw r5, (ElementIntTLS2[TL]@ld+320)-65536(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: li r5, 88
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: stw r6, (ElementIntTLS3[TL]@ld+324)-65536(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: stw r4, (ElementIntTLS4[TL]@ld+328)-65536(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: stw r5, (ElementIntTLS5[TL]@ld+332)-65536(r3)
; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: li r3, 102
; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: addi r1, r1, 48
; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: ld r0, 16(r1)
diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-types.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-types.ll
index 161a58a90296ad..489260b4e0aeb9 100644
--- a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-types.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-types.ll
@@ -51,8 +51,7 @@ define nonnull ptr @AddrTest1() local_unnamed_addr {
; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: std r0, 64(r1)
; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: la r3, a[TL]@ld(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: addi r3, r3, 12
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: la r3, a[TL]@ld+12(r3)
; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: addi r1, r1, 48
; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: ld r0, 16(r1)
; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: mtlr r0
@@ -66,8 +65,7 @@ define nonnull ptr @AddrTest1() local_unnamed_addr {
; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: std r0, 64(r1)
; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: ld r3, L..C0 at l(r3)
; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: la r3, a[TL]@ld(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: addi r3, r3, 12
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: la r3, a[TL]@ld+12(r3)
; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: addi r1, r1, 48
; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: ld r0, 16(r1)
; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: mtlr r0
>From 20f9c06f908df1b596868967020aba27e000e146 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Thu, 18 Apr 2024 03:57:14 -0400
Subject: [PATCH 2/2] add missing check
---
llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 4f112c040dffdf..04337d9442d153 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -1579,7 +1579,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
// machine operand (which is a TargetGlobalTLSAddress) is expected to be
// the same operand for both loads and stores.
for (const MachineOperand &TempMO : MI->operands()) {
- if (((TempMO.getTargetFlags() == PPCII::MO_TPREL_FLAG)) &&
+ if (((TempMO.getTargetFlags() == PPCII::MO_TPREL_FLAG ||
+ TempMO.getTargetFlags() == PPCII::MO_TLSLD_FLAG)) &&
TempMO.getOperandNo() == 1)
OpNum = 1;
}
@@ -1615,8 +1616,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
case PPC::ADDI8: {
// A faster non-TOC-based local-[exec|dynamic] sequence is represented by
// `addi` or a load/store instruction (that directly loads or stores off of
- // the thread pointer) with an immediate operand having the MO_TPREL_FLAG.
- // Such instructions do not otherwise arise.
+ // the thread pointer) with an immediate operand having the
+ // [MO_TPREL_FLAG|MO_TLSLD_FLAG]. Such instructions do not otherwise arise.
if (!HasAIXSmallLocalTLS)
break;
bool IsMIADDI8 = MI->getOpcode() == PPC::ADDI8;
More information about the llvm-commits
mailing list