[llvm] [PowerPC][AIX] Refactor existing logic to handle non-zero offsets for aix-small-local-dynamic-tls (PR #89182)

Felix via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 22 19:07:35 PDT 2024


https://github.com/orcguru updated https://github.com/llvm/llvm-project/pull/89182

>From 4335fe60c058ce6035c4c5216a6750c62fca4e3a Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Thu, 18 Apr 2024 03:29:38 -0400
Subject: [PATCH 1/3] Refactor existing logic to handle non-zero offsets for
 small local-dynamic TLS

---
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp     | 38 +++----
 llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp   | 89 +++++++++--------
 ...aix-small-local-dynamic-tls-largeaccess.ll | 99 ++++++++-----------
 .../aix-small-local-dynamic-tls-types.ll      |  6 +-
 4 files changed, 105 insertions(+), 127 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 6e1002c45d81cb..2c7dca33061676 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -205,8 +205,8 @@ class PPCAsmPrinter : public AsmPrinter {
   void LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI);
   void EmitTlsCall(const MachineInstr *MI, MCSymbolRefExpr::VariantKind VK);
   void EmitAIXTlsCallHelper(const MachineInstr *MI);
-  const MCExpr *getAdjustedLocalExecExpr(const MachineOperand &MO,
-                                         int64_t Offset);
+  const MCExpr *getAdjustedFasterLocalExpr(const MachineOperand &MO,
+                                           int64_t Offset);
   bool runOnMachineFunction(MachineFunction &MF) override {
     Subtarget = &MF.getSubtarget<PPCSubtarget>();
     bool Changed = AsmPrinter::runOnMachineFunction(MF);
@@ -1647,7 +1647,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
         Flag == PPCII::MO_TPREL_PCREL_FLAG || Flag == PPCII::MO_TLSLD_FLAG) {
       LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
 
-      const MCExpr *Expr = getAdjustedLocalExecExpr(MO, MO.getOffset());
+      const MCExpr *Expr = getAdjustedFasterLocalExpr(MO, MO.getOffset());
       if (Expr)
         TmpInst.getOperand(OpNum) = MCOperand::createExpr(Expr);
 
@@ -1677,14 +1677,15 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
   EmitToStreamer(*OutStreamer, TmpInst);
 }
 
-// For non-TOC-based local-exec variables that have a non-zero offset,
+// For non-TOC-based local-[exec|dynamic] variables that have a non-zero offset,
 // we need to create a new MCExpr that adds the non-zero offset to the address
-// of the local-exec variable that will be used in either an addi, load or
-// store. However, the final displacement for these instructions must be
+// of the local-[exec|dynamic] variable that will be used in either an addi,
+// load or store. However, the final displacement for these instructions must be
 // between [-32768, 32768), so if the TLS address + its non-zero offset is
 // greater than 32KB, a new MCExpr is produced to accommodate this situation.
-const MCExpr *PPCAsmPrinter::getAdjustedLocalExecExpr(const MachineOperand &MO,
-                                                      int64_t Offset) {
+const MCExpr *
+PPCAsmPrinter::getAdjustedFasterLocalExpr(const MachineOperand &MO,
+                                          int64_t Offset) {
   // Non-zero offsets (for loads, stores or `addi`) require additional handling.
   // When the offset is zero, there is no need to create an adjusted MCExpr.
   if (!Offset)
@@ -1692,13 +1693,9 @@ const MCExpr *PPCAsmPrinter::getAdjustedLocalExecExpr(const MachineOperand &MO,
 
   assert(MO.isGlobal() && "Only expecting a global MachineOperand here!");
   const GlobalValue *GValue = MO.getGlobal();
-  // TODO: Handle the aix-small-local-dynamic-tls non-zero offset case.
   TLSModel::Model Model = TM.getTLSModel(GValue);
-  if (Model == TLSModel::LocalDynamic) {
-    return nullptr;
-  }
-  assert(Model == TLSModel::LocalExec &&
-         "Only local-exec accesses are handled!");
+  assert((Model == TLSModel::LocalExec || Model == TLSModel::LocalDynamic) &&
+         "Only local-[exec|dynamic] accesses are handled!");
 
   bool IsGlobalADeclaration = GValue->isDeclarationForLinker();
   // Find the GlobalVariable that corresponds to the particular TLS variable
@@ -1719,7 +1716,10 @@ const MCExpr *PPCAsmPrinter::getAdjustedLocalExecExpr(const MachineOperand &MO,
   // For when TLS variables are extern, this is safe to do because we can
   // assume that the address of extern TLS variables are zero.
   const MCExpr *Expr = MCSymbolRefExpr::create(
-      getSymbol(GValue), MCSymbolRefExpr::VK_PPC_AIX_TLSLE, OutContext);
+      getSymbol(GValue),
+      Model == TLSModel::LocalExec ? MCSymbolRefExpr::VK_PPC_AIX_TLSLE
+                                   : MCSymbolRefExpr::VK_PPC_AIX_TLSLD,
+      OutContext);
   Expr = MCBinaryExpr::createAdd(
       Expr, MCConstantExpr::create(Offset, OutContext), OutContext);
   if (FinalAddress >= 32768) {
@@ -1732,10 +1732,10 @@ const MCExpr *PPCAsmPrinter::getAdjustedLocalExecExpr(const MachineOperand &MO,
     ptrdiff_t Delta = ((FinalAddress + 32768) & ~0xFFFF);
     // Check that the total instruction displacement fits within [-32768,32768).
     [[maybe_unused]] ptrdiff_t InstDisp = TLSVarAddress + Offset - Delta;
-    assert(((InstDisp < 32768) &&
-            (InstDisp >= -32768)) &&
-               "Expecting the instruction displacement for local-exec TLS "
-               "variables to be between [-32768, 32768)!");
+    assert(
+        ((InstDisp < 32768) && (InstDisp >= -32768)) &&
+        "Expecting the instruction displacement for local-[exec|dynamic] TLS "
+        "variables to be between [-32768, 32768)!");
     Expr = MCBinaryExpr::createAdd(
         Expr, MCConstantExpr::create(-Delta, OutContext), OutContext);
   }
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 2f647daa4bcb57..2e504aff70b3d6 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -7587,29 +7587,23 @@ static bool hasAIXSmallTLSAttr(SDValue Val) {
   return false;
 }
 
-// Is an ADDI eligible for folding for non-TOC-based local-exec accesses?
-static bool isEligibleToFoldADDIForLocalExecAccesses(SelectionDAG *DAG,
-                                                     SDValue ADDIToFold) {
+// Is an ADDI eligible for folding for non-TOC-based local-[exec|dynamic]
+// accesses?
+static bool isEligibleToFoldADDIForFasterLocalAccesses(SelectionDAG *DAG,
+                                                       SDValue ADDIToFold) {
   // Check if ADDIToFold (the ADDI that we want to fold into local-exec
   // accesses), is truly an ADDI.
   if (!ADDIToFold.isMachineOpcode() ||
       (ADDIToFold.getMachineOpcode() != PPC::ADDI8))
     return false;
 
-  // Folding is only allowed for the AIX small-local-exec TLS target attribute
-  // or when the 'aix-small-tls' global variable attribute is present.
+  // Folding is only allowed for the AIX small-local-[exec|dynamic] TLS target
+  // attribute or when the 'aix-small-tls' global variable attribute is present.
   const PPCSubtarget &Subtarget =
       DAG->getMachineFunction().getSubtarget<PPCSubtarget>();
   SDValue TLSVarNode = ADDIToFold.getOperand(1);
-  if (!(Subtarget.hasAIXSmallLocalExecTLS() || hasAIXSmallTLSAttr(TLSVarNode)))
-    return false;
-
-  // The first operand of the ADDIToFold should be the thread pointer.
-  // This transformation is only performed if the first operand of the
-  // addi is the thread pointer.
-  SDValue TPRegNode = ADDIToFold.getOperand(0);
-  RegisterSDNode *TPReg = dyn_cast<RegisterSDNode>(TPRegNode.getNode());
-  if (!TPReg || (TPReg->getReg() != Subtarget.getThreadPointerRegister()))
+  if (!(Subtarget.hasAIXSmallLocalDynamicTLS() ||
+        Subtarget.hasAIXSmallLocalExecTLS() || hasAIXSmallTLSAttr(TLSVarNode)))
     return false;
 
   // The second operand of the ADDIToFold should be the global TLS address
@@ -7619,24 +7613,36 @@ static bool isEligibleToFoldADDIForLocalExecAccesses(SelectionDAG *DAG,
   if (!GA)
     return false;
 
-  // The local-exec TLS variable should only have the MO_TPREL_FLAG target flag,
-  // so this optimization is not performed otherwise if the flag is not set.
+  if (DAG->getTarget().getTLSModel(GA->getGlobal()) == TLSModel::LocalExec) {
+    // The first operand of the ADDIToFold should be the thread pointer.
+    // This transformation is only performed if the first operand of the
+    // addi is the thread pointer.
+    SDValue TPRegNode = ADDIToFold.getOperand(0);
+    RegisterSDNode *TPReg = dyn_cast<RegisterSDNode>(TPRegNode.getNode());
+    if (!TPReg || (TPReg->getReg() != Subtarget.getThreadPointerRegister()))
+      return false;
+  }
+
+  // The local-[exec|dynamic] TLS variable should only have the
+  // [MO_TPREL_FLAG|MO_TLSLD_FLAG] target flags, so this optimization is not
+  // performed otherwise if the flag is not set.
   unsigned TargetFlags = GA->getTargetFlags();
-  if (TargetFlags != PPCII::MO_TPREL_FLAG)
+  if (!(TargetFlags == PPCII::MO_TPREL_FLAG ||
+        TargetFlags == PPCII::MO_TLSLD_FLAG))
     return false;
 
   // If all conditions are satisfied, the ADDI is valid for folding.
   return true;
 }
 
-// For non-TOC-based local-exec access where an addi is feeding into another
-// addi, fold this sequence into a single addi if possible.
-// Before this optimization, the sequence appears as:
-//    addi rN, r13, sym at le
+// For non-TOC-based local-[exec|dynamic] access where an addi is feeding into
+// another addi, fold this sequence into a single addi if possible. Before this
+// optimization, the sequence appears as:
+//    addi rN, r13, sym@[le|ld]
 //    addi rM, rN, imm
 // After this optimization, we can fold the two addi into a single one:
-//    addi rM, r13, sym at le + imm
-static void foldADDIForLocalExecAccesses(SDNode *N, SelectionDAG *DAG) {
+//    addi rM, r13, sym@[le|ld] + imm
+static void foldADDIForFasterLocalAccesses(SDNode *N, SelectionDAG *DAG) {
   if (N->getMachineOpcode() != PPC::ADDI8)
     return;
 
@@ -7644,27 +7650,17 @@ static void foldADDIForLocalExecAccesses(SDNode *N, SelectionDAG *DAG) {
   // we want optimized out.
   SDValue InitialADDI = N->getOperand(0);
 
-  if (!isEligibleToFoldADDIForLocalExecAccesses(DAG, InitialADDI))
+  if (!isEligibleToFoldADDIForFasterLocalAccesses(DAG, InitialADDI))
     return;
 
-  // At this point, InitialADDI can be folded into a non-TOC-based local-exec
-  // access. The first operand of InitialADDI should be the thread pointer,
-  // which has been checked in isEligibleToFoldADDIForLocalExecAccesses().
-  SDValue TPRegNode = InitialADDI.getOperand(0);
-  [[maybe_unused]] RegisterSDNode *TPReg = dyn_cast<RegisterSDNode>(TPRegNode.getNode());
-  [[maybe_unused]] const PPCSubtarget &Subtarget =
-      DAG->getMachineFunction().getSubtarget<PPCSubtarget>();
-  assert((TPReg && (TPReg->getReg() == Subtarget.getThreadPointerRegister())) &&
-         "Expecting the first operand to be a thread pointer for folding addi "
-         "in local-exec accesses!");
-
   // The second operand of the InitialADDI should be the global TLS address
-  // (the local-exec TLS variable), with the MO_TPREL_FLAG target flag.
-  // This has been checked in isEligibleToFoldADDIForLocalExecAccesses().
+  // (the local-[exec|dynamic] TLS variable), with the
+  // [MO_TPREL_FLAG|MO_TLSLD_FLAG] target flag. This has been checked in
+  // isEligibleToFoldADDIForFasterLocalAccesses().
   SDValue TLSVarNode = InitialADDI.getOperand(1);
   GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(TLSVarNode);
   assert(GA && "Expecting a valid GlobalAddressSDNode when folding addi into "
-               "local-exec accesses!");
+               "local-[exec|dynamic] accesses!");
   unsigned TargetFlags = GA->getTargetFlags();
 
   // The second operand of the addi that we want to preserve will be an
@@ -7676,7 +7672,7 @@ static void foldADDIForLocalExecAccesses(SDNode *N, SelectionDAG *DAG) {
   TLSVarNode = DAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA), MVT::i64,
                                            Offset, TargetFlags);
 
-  (void)DAG->UpdateNodeOperands(N, TPRegNode, TLSVarNode);
+  (void)DAG->UpdateNodeOperands(N, InitialADDI.getOperand(0), TLSVarNode);
   if (InitialADDI.getNode()->use_empty())
     DAG->RemoveDeadNode(InitialADDI.getNode());
 }
@@ -7693,8 +7689,9 @@ void PPCDAGToDAGISel::PeepholePPC64() {
     if (isVSXSwap(SDValue(N, 0)))
       reduceVSXSwap(N, CurDAG);
 
-    // This optimization is performed for non-TOC-based local-exec accesses.
-    foldADDIForLocalExecAccesses(N, CurDAG);
+    // This optimization is performed for non-TOC-based local-[exec|dynamic]
+    // accesses.
+    foldADDIForFasterLocalAccesses(N, CurDAG);
 
     unsigned FirstOp;
     unsigned StorageOpcode = N->getMachineOpcode();
@@ -7852,13 +7849,15 @@ void PPCDAGToDAGISel::PeepholePPC64() {
         ImmOpnd = CurDAG->getTargetConstant(Offset, SDLoc(ImmOpnd),
                                             ImmOpnd.getValueType());
       } else if (Offset != 0) {
-        // This optimization is performed for non-TOC-based local-exec accesses.
-        if (isEligibleToFoldADDIForLocalExecAccesses(CurDAG, Base)) {
+        // This optimization is performed for non-TOC-based local-[exec|dynamic]
+        // accesses.
+        if (isEligibleToFoldADDIForFasterLocalAccesses(CurDAG, Base)) {
           // Add the non-zero offset information into the load or store
-          // instruction to be used for non-TOC-based local-exec accesses.
+          // instruction to be used for non-TOC-based local-[exec|dynamic]
+          // accesses.
           GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd);
           assert(GA && "Expecting a valid GlobalAddressSDNode when folding "
-                       "addi into local-exec accesses!");
+                       "addi into local-[exec|dynamic] accesses!");
           ImmOpnd = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA),
                                                    MVT::i64, Offset,
                                                    GA->getTargetFlags());
diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-largeaccess.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-largeaccess.ll
index d3fa94779dd714..491ceccdddeded 100644
--- a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-largeaccess.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-largeaccess.ll
@@ -39,23 +39,18 @@ define signext i32 @test1() {
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r6, 4
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r5, 1
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    la r4, ElementIntTLSv1[TL]@ld(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r5, ElementIntTLSv1[TL]@ld(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    la r5, ElementIntTLS2[TL]@ld(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r6, 24(r4)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 1
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r5, 4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r4, ElementIntTLSv1[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 2
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r4, 320(r5)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    la r4, ElementIntTLS3[TL]@ld(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r5, 3
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r5, 324(r4)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    la r4, ElementIntTLS4[TL]@ld(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    la r3, ElementIntTLS5[TL]@ld(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r6, 328(r4)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r5, ElementIntTLSv1[TL]@ld+24(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r4, (ElementIntTLS2[TL]@ld+320)-65536(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 3
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r4, (ElementIntTLS3[TL]@ld+324)-65536(r3)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 88
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r4, 332(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r5, (ElementIntTLS4[TL]@ld+328)-65536(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r4, (ElementIntTLS5[TL]@ld+332)-65536(r3)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 102
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
@@ -68,24 +63,19 @@ define signext i32 @test1() {
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stdu r1, -48(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C0 at u(r2)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r6, 4
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r5, 1
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    la r4, ElementIntTLSv1[TL]@ld(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r5, ElementIntTLSv1[TL]@ld(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    la r5, ElementIntTLS2[TL]@ld(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r6, 24(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 1
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r5, 4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r4, ElementIntTLSv1[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 2
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r4, 320(r5)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    la r4, ElementIntTLS3[TL]@ld(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r5, 3
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r5, 324(r4)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    la r4, ElementIntTLS4[TL]@ld(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    la r3, ElementIntTLS5[TL]@ld(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r6, 328(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r5, ElementIntTLSv1[TL]@ld+24(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r4, (ElementIntTLS2[TL]@ld+320)-65536(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r4, (ElementIntTLS3[TL]@ld+324)-65536(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 88
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r4, 332(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r5, (ElementIntTLS4[TL]@ld+328)-65536(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r4, (ElementIntTLS5[TL]@ld+332)-65536(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 102
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
@@ -132,26 +122,21 @@ define i64 @test2() {
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mr r6, r3
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    la r3, ElementLongTLS6[UL]@ld(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 212
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r4, 424(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    la r3, ElementLongTLS2[TL]@ld(r6)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 212
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 203
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r4, 1200(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r3, ElementLongTLS6[UL]@ld+424(r6)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r4, (ElementLongTLS2[TL]@ld+1200)-131072(r6)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C1(r2) # target-flags(ppc-tlsgdm) @MyTLSGDVar
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsgd) @MyTLSGDVar
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 44
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r4, 440(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    la r3, ElementLongTLS3[TL]@ld(r6)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 6
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r4, 2000(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    la r3, ElementLongTLS4[TL]@ld(r6)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 6
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 100
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r4, 6800(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    la r3, ElementLongTLS5[TL]@ld(r6)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 882
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r4, 8400(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r3, (ElementLongTLS3[TL]@ld+2000)-196608(r6)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 882
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r4, (ElementLongTLS4[TL]@ld+6800)-196608(r6)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r3, (ElementLongTLS5[TL]@ld+8400)-196608(r6)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 1191
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
@@ -166,29 +151,24 @@ define i64 @test2() {
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 212
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r4, L..C1 at u(r2)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mr r6, r3
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    la r3, ElementLongTLS6[UL]@ld(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r4, 424(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    la r3, ElementLongTLS2[TL]@ld(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 203
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r4, 1200(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C1 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r4, L..C2 at u(r2)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C1 at l(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C2 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 212
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r4, L..C1 at l(r4)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r3, ElementLongTLS6[UL]@ld+424(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 203
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r3, (ElementLongTLS2[TL]@ld+1200)-131072(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r3, L..C2 at u(r2)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C2 at l(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 44
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r4, 440(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    la r3, ElementLongTLS3[TL]@ld(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 6
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r4, 2000(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    la r3, ElementLongTLS4[TL]@ld(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 6
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 100
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r4, 6800(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    la r3, ElementLongTLS5[TL]@ld(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 882
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r4, 8400(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r3, (ElementLongTLS3[TL]@ld+2000)-196608(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 882
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r4, (ElementLongTLS4[TL]@ld+6800)-196608(r6)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r3, (ElementLongTLS5[TL]@ld+8400)-196608(r6)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 1191
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
@@ -232,6 +212,7 @@ define signext i32 @test3() {
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r6, 2
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r5, L..C3(r2) # target-flags(ppc-tlsld) @ElementIntTLSv2
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 1
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    la r5, ElementIntTLS2[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    la r7, ElementIntTLS3[TL]@ld(r3)
diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-types.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-types.ll
index 161a58a90296ad..489260b4e0aeb9 100644
--- a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-types.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-types.ll
@@ -51,8 +51,7 @@ define nonnull ptr @AddrTest1() local_unnamed_addr {
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    la r3, a[TL]@ld(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r3, r3, 12
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    la r3, a[TL]@ld+12(r3)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mtlr r0
@@ -66,8 +65,7 @@ define nonnull ptr @AddrTest1() local_unnamed_addr {
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    std r0, 64(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    la r3, a[TL]@ld(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r3, r3, 12
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    la r3, a[TL]@ld+12(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    mtlr r0

>From 932d4a11a201629dc6e6b00072c0b57d8da4303f Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Thu, 18 Apr 2024 03:57:14 -0400
Subject: [PATCH 2/3] add missing check

---
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 2c7dca33061676..089dd18acad2d0 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -1598,7 +1598,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     // machine operand (which is a TargetGlobalTLSAddress) is expected to be
     // the same operand for both loads and stores.
     for (const MachineOperand &TempMO : MI->operands()) {
-      if (((TempMO.getTargetFlags() == PPCII::MO_TPREL_FLAG)) &&
+      if (((TempMO.getTargetFlags() == PPCII::MO_TPREL_FLAG ||
+            TempMO.getTargetFlags() == PPCII::MO_TLSLD_FLAG)) &&
           TempMO.getOperandNo() == 1)
         OpNum = 1;
     }
@@ -1634,8 +1635,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
   case PPC::ADDI8: {
     // A faster non-TOC-based local-[exec|dynamic] sequence is represented by
     // `addi` or a load/store instruction (that directly loads or stores off of
-    // the thread pointer) with an immediate operand having the MO_TPREL_FLAG.
-    // Such instructions do not otherwise arise.
+    // the thread pointer) with an immediate operand having the
+    // [MO_TPREL_FLAG|MO_TLSLD_FLAG]. Such instructions do not otherwise arise.
     if (!HasAIXSmallLocalTLS)
       break;
     bool IsMIADDI8 = MI->getOpcode() == PPC::ADDI8;

>From 5873b663a02dbcfc2d6df95379da491151103364 Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH at ibm.com>
Date: Mon, 22 Apr 2024 22:07:16 -0400
Subject: [PATCH 3/3] [NFC] update test case after rebase

---
 ...aix-small-local-dynamic-tls-largeaccess.ll | 57 ++++++++-----------
 1 file changed, 24 insertions(+), 33 deletions(-)

diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-largeaccess.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-largeaccess.ll
index 491ceccdddeded..44d62124ac58df 100644
--- a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-largeaccess.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-largeaccess.ll
@@ -45,11 +45,11 @@ define signext i32 @test1() {
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r4, ElementIntTLSv1[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 2
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r5, ElementIntTLSv1[TL]@ld+24(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r5, (ElementIntTLS4[TL]@ld+328)-65536(r3)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r4, (ElementIntTLS2[TL]@ld+320)-65536(r3)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 3
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r4, (ElementIntTLS3[TL]@ld+324)-65536(r3)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 88
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r5, (ElementIntTLS4[TL]@ld+328)-65536(r3)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r4, (ElementIntTLS5[TL]@ld+332)-65536(r3)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 102
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
@@ -70,11 +70,11 @@ define signext i32 @test1() {
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r4, ElementIntTLSv1[TL]@ld(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 2
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r5, ElementIntTLSv1[TL]@ld+24(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r5, (ElementIntTLS4[TL]@ld+328)-65536(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r4, (ElementIntTLS2[TL]@ld+320)-65536(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 3
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r4, (ElementIntTLS3[TL]@ld+324)-65536(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 88
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r5, (ElementIntTLS4[TL]@ld+328)-65536(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r4, (ElementIntTLS5[TL]@ld+332)-65536(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 102
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
@@ -124,10 +124,10 @@ define i64 @test2() {
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    mr r6, r3
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 212
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 203
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r3, ElementLongTLS6[UL]@ld+424(r6)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r4, (ElementLongTLS2[TL]@ld+1200)-131072(r6)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C1(r2) # target-flags(ppc-tlsgdm) @MyTLSGDVar
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C2(r2) # target-flags(ppc-tlsgd) @MyTLSGDVar
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r4, L..C1(r2) # target-flags(ppc-tlsgd) @MyTLSGDVar
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r3, ElementLongTLS6[UL]@ld+424(r6)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C2(r2) # target-flags(ppc-tlsgdm) @MyTLSGDVar
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 44
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r4, 440(r3)
@@ -210,24 +210,19 @@ define signext i32 @test3() {
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stdu r1, -48(r1)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsldm) @"_$TLSML"
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    std r0, 64(r1)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r6, 2
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r6, 3
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    bla .__tls_get_mod[PR]
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r5, L..C3(r2) # target-flags(ppc-tlsld) @ElementIntTLSv2
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r5, 2
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 1
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    la r5, ElementIntTLS2[TL]@ld(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    la r7, ElementIntTLS3[TL]@ld(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r6, 320(r5)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r5, 3
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    la r6, ElementIntTLS4[TL]@ld(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r5, 324(r7)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r5, L..C3(r2) # target-flags(ppc-tlsld) @ElementIntTLSv2
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    la r7, ElementIntTLS5[TL]@ld(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stwux r4, r3, r5
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r4, 4
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r4, 24(r3)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 88
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r4, 328(r6)
-; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r3, 332(r7)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r6, (ElementIntTLS3[TL]@ld+324)-65536(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r6, L..C3(r2) # target-flags(ppc-tlsld) @ElementIntTLSv2
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r5, (ElementIntTLS2[TL]@ld+320)-65536(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r5, 88
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r5, (ElementIntTLS5[TL]@ld+332)-65536(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r5, 4
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r5, (ElementIntTLS4[TL]@ld+328)-65536(r3)
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stwux r4, r3, r6
+; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    stw r5, 24(r3)
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    li r3, 102
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT:    ld r0, 16(r1)
@@ -243,22 +238,18 @@ define signext i32 @test3() {
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addis r6, L..C3 at u(r2)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r3, L..C0 at l(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r6, L..C3 at l(r6)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r7, 3
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    bla .__tls_get_mod[PR]
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r5, 2
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    la r4, ElementIntTLS2[TL]@ld(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r5, 320(r4)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 1
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    la r5, ElementIntTLS3[TL]@ld(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r7, 324(r5)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    la r5, ElementIntTLS4[TL]@ld(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    la r7, ElementIntTLS5[TL]@ld(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r5, (ElementIntTLS2[TL]@ld+320)-65536(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r5, 3
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r5, (ElementIntTLS3[TL]@ld+324)-65536(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r5, 88
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r5, (ElementIntTLS5[TL]@ld+332)-65536(r3)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r5, 4
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r5, (ElementIntTLS4[TL]@ld+328)-65536(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stwux r4, r3, r6
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r4, 4
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r4, 24(r3)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 88
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r4, 328(r5)
-; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r3, 332(r7)
+; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    stw r5, 24(r3)
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    li r3, 102
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    addi r1, r1, 48
 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT:    ld r0, 16(r1)



More information about the llvm-commits mailing list