[llvm] 11b71ad - [PowerPC][TLS] Add additional TLS X-Form loads/store instructions

Amy Kwan via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 27 09:33:45 PDT 2023


Author: Amy Kwan
Date: 2023-06-27T11:33:38-05:00
New Revision: 11b71ade51e0d1f90f1c68a7552a11f7e85eace1

URL: https://github.com/llvm/llvm-project/commit/11b71ade51e0d1f90f1c68a7552a11f7e85eace1
DIFF: https://github.com/llvm/llvm-project/commit/11b71ade51e0d1f90f1c68a7552a11f7e85eace1.diff

LOG: [PowerPC][TLS] Add additional TLS X-Form loads/store instructions

This patch is a follow up to D43315, and adds the following new load/store
TLS specific instructions for integer and floating point scalar types:
```
LHAXTLS
LWAXTLS
LHAXTLS_32
LWAXTLS_32
LFSXTLS
LFDXTLS
STFSXTLS
STFDXTLS
```
These instructions can be used to optimized TLS sequences when D-Form
loads/stores follow an ADD_TLS instruction.

Duplicate versions of these instructions are also added within an isAsmParserOnly=1
block (similar to D47382) to allow llvm-mc to assemble these instructions.

Differential Revision: https://reviews.llvm.org/D153645

Added: 
    llvm/test/MC/PowerPC/ppc64-tls-relocs-double-01.s
    llvm/test/MC/PowerPC/ppc64-tls-relocs-float-01.s

Modified: 
    llvm/lib/Target/PowerPC/P10InstrResources.td
    llvm/lib/Target/PowerPC/P9InstrResources.td
    llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
    llvm/lib/Target/PowerPC/PPCInstr64Bit.td
    llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll
    llvm/test/CodeGen/PowerPC/tls-pie-xform.ll
    llvm/test/MC/PowerPC/ppc64-tls-relocs-01.s

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/P10InstrResources.td b/llvm/lib/Target/PowerPC/P10InstrResources.td
index b96fdb00306d1..6fe23699e93e2 100644
--- a/llvm/lib/Target/PowerPC/P10InstrResources.td
+++ b/llvm/lib/Target/PowerPC/P10InstrResources.td
@@ -1296,11 +1296,9 @@ def : InstRW<[P10W_LD_6C, P10W_DISP_ANY, P10LD_Read],
     LD,  LDtoc,  LDtocBA,  LDtocCPT,  LDtocJTI,  LDtocL, SPILLTOVSR_LD,
     LDBRX,
      DFLOADf32, DFLOADf64, LFD,
-    LFDX,  XFLOADf32, XFLOADf64,
     LFIWAX, LIWAX,
     LFIWZX, LIWZX,
     LHA, LHA8,
-    LHAX, LHAX8,
     LHBRX, LHBRX8,
     LHZ, LHZ8,
     LVEBX,
@@ -1309,7 +1307,7 @@ def : InstRW<[P10W_LD_6C, P10W_DISP_ANY, P10LD_Read],
     LVX,
     LVXL,
     LWA, LWA_32,
-    LWAX, LWAX_32,
+    LWAX, LWAXTLS, LWAXTLS_, LWAXTLS_32, LWAX_32,
     LWBRX, LWBRX8,
     LWZ, LWZ8,  LWZtoc, LWZtocL,
     LXSD,
@@ -1340,6 +1338,8 @@ def : InstRW<[P10W_LD_6C, P10W_DISP_ANY, P10LD_Read, P10LD_Read],
     ICBT,
     LBZX, LBZX8, LBZXTLS, LBZXTLS_, LBZXTLS_32,
     LDX, LDXTLS, LDXTLS_, SPILLTOVSR_LDX,
+    LFDX, LFDXTLS, LFDXTLS_,  XFLOADf32, XFLOADf64,
+    LHAX, LHAX8, LHAXTLS, LHAXTLS_, LHAXTLS_32,
     LHZX, LHZX8, LHZXTLS, LHZXTLS_, LHZXTLS_32,
     LWZX, LWZX8, LWZXTLS, LWZXTLS_, LWZXTLS_32,
     LXVL,
@@ -1442,11 +1442,17 @@ def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10LD_Read],
 def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_F2_4C],
       (instrs
     LFS,
-    LFSX,
     LXSSP,
     LXSSPX
 )>;
 
+// 2-way crack instructions
+// 6 Cycles Load operations, and 4 Cycles ALU2 operations, 2 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_F2_4C],
+      (instrs
+    LFSX, LFSXTLS, LFSXTLS_
+)>;
+
 // 4-way crack instructions
 // 6 Cycles Load operations, 4 Cycles ALU2 operations, 3 Cycles Simple Fixed-point (SFX) operations, and 3 Cycles ALU operations, 1 input operands
 def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_F2_4C, P10W_SX_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY],
@@ -1823,12 +1829,10 @@ def : InstRW<[P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read],
      DFSTOREf32, DFSTOREf64, STFD,
     STFDU,
     STFDUX,
-    STFDX,
     STFIWX, STIWX,
     STFS,
     STFSU,
     STFSUX,
-    STFSX,
     STH, STH8,
     STHBRX,
     STHU, STHU8,
@@ -1867,6 +1871,8 @@ def : InstRW<[P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read, P10ST_Read],
     CP_COPY, CP_COPY8,
     STBX, STBX8, STBXTLS, STBXTLS_, STBXTLS_32,
     SPILLTOVSR_STX, STDX, STDXTLS, STDXTLS_,
+    STFDX, STFDXTLS, STFDXTLS_,
+    STFSX, STFSXTLS, STFSXTLS_,
     STHX, STHX8, STHXTLS, STHXTLS_, STHXTLS_32,
     STWX, STWX8, STWXTLS, STWXTLS_, STWXTLS_32,
     STXVL,

diff  --git a/llvm/lib/Target/PowerPC/P9InstrResources.td b/llvm/lib/Target/PowerPC/P9InstrResources.td
index 62275722a9635..395999c7242af 100644
--- a/llvm/lib/Target/PowerPC/P9InstrResources.td
+++ b/llvm/lib/Target/PowerPC/P9InstrResources.td
@@ -765,6 +765,7 @@ def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_3SLOTS_1C],
       (instrs
     LFIWZX,
     LFDX,
+    (instregex "LFDXTLS?(_)?$"),
     LFD
 )>;
 
@@ -815,9 +816,9 @@ def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C,
 def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
               DISP_1C, DISP_1C],
       (instrs
-    (instregex "LHA(X)?(8)?$"),
+    (instregex "LHA(X)?(TLS)?(8)?(_32)?(_)?$"),
     (instregex "CP_PASTE(8)?_rec$"),
-    (instregex "LWA(X)?(_32)?$"),
+    (instregex "LWA(X)?(TLS)?(_32)?(_)?$"),
     TCHECK
 )>;
 
@@ -850,6 +851,7 @@ def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C,
               DISP_3SLOTS_1C, DISP_3SLOTS_1C],
       (instrs
     LFSX,
+    (instregex "LFSXTLS?(_)?$"),
     LFS
 )>;
 
@@ -891,7 +893,7 @@ def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C,
 // all three dispatches for the superslice.
 def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_3SLOTS_1C],
       (instrs
-    (instregex "STF(S|D|IWX|SX|DX)$"),
+    (instregex "STF(S|D|IWX|SX|DX|SXTLS|DXTLS|SXTLS_|DXTLS_)$"),
     (instregex "STXS(D|DX|SPX|IWX|IBX|IHX|SP)(v)?$"),
     (instregex "STW(8)?$"),
     (instregex "(D|X)FSTORE(f32|f64)$"),

diff  --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 6748d97345d48..96076219760dc 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -739,6 +739,14 @@ bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) {
       Opcode = PPC::STDXTLS;
       break;
     }
+    case MVT::f32: {
+      Opcode = PPC::STFSXTLS;
+      break;
+    }
+    case MVT::f64: {
+      Opcode = PPC::STFDXTLS;
+      break;
+    }
   }
   SDValue Chain = ST->getChain();
   SDVTList VTs = ST->getVTList();
@@ -763,6 +771,7 @@ bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) {
   SDLoc dl(LD);
   EVT MemVT = LD->getMemoryVT();
   EVT RegVT = LD->getValueType(0);
+  bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
   unsigned Opcode;
   switch (MemVT.getSimpleVT().SimpleTy) {
     default:
@@ -772,17 +781,31 @@ bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) {
       break;
     }
     case MVT::i16: {
-      Opcode = (RegVT == MVT::i32) ? PPC::LHZXTLS_32 : PPC::LHZXTLS;
+      if (RegVT == MVT::i32)
+        Opcode = isSExt ? PPC::LHAXTLS_32 : PPC::LHZXTLS_32;
+      else
+        Opcode = isSExt ? PPC::LHAXTLS : PPC::LHZXTLS;
       break;
     }
     case MVT::i32: {
-      Opcode = (RegVT == MVT::i32) ? PPC::LWZXTLS_32 : PPC::LWZXTLS;
+      if (RegVT == MVT::i32)
+        Opcode = isSExt ? PPC::LWAXTLS_32 : PPC::LWZXTLS_32;
+      else
+        Opcode = isSExt ? PPC::LWAXTLS : PPC::LWZXTLS;
       break;
     }
     case MVT::i64: {
       Opcode = PPC::LDXTLS;
       break;
     }
+    case MVT::f32: {
+      Opcode = PPC::LFSXTLS;
+      break;
+    }
+    case MVT::f64: {
+      Opcode = PPC::LFDXTLS;
+      break;
+    }
   }
   SDValue Chain = LD->getChain();
   SDVTList VTs = LD->getVTList();

diff  --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index f4f058aff237f..fd44efa1b3f4e 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -724,18 +724,32 @@ def LBZXTLS : XForm_1<31,  87, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$R
                       "lbzx $RST, $RA, $RB", IIC_LdStLoad, []>;
 def LHZXTLS : XForm_1<31, 279, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
                       "lhzx $RST, $RA, $RB", IIC_LdStLoad, []>;
+def LHAXTLS : XForm_1<31, 343, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
+                      "lhax $RST, $RA, $RB", IIC_LdStLoad, []>;
 def LWZXTLS : XForm_1<31,  23, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
                       "lwzx $RST, $RA, $RB", IIC_LdStLoad, []>;
+def LWAXTLS : XForm_1<31, 341, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
+                      "lwax $RST, $RA, $RB", IIC_LdStLoad, []>;
 def LDXTLS  : XForm_1<31,  21, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
                       "ldx $RST, $RA, $RB", IIC_LdStLD, []>, isPPC64;
 def LBZXTLS_32 : XForm_1<31,  87, (outs gprc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
                          "lbzx $RST, $RA, $RB", IIC_LdStLoad, []>;
 def LHZXTLS_32 : XForm_1<31, 279, (outs gprc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
                          "lhzx $RST, $RA, $RB", IIC_LdStLoad, []>;
+def LHAXTLS_32 : XForm_1<31, 343, (outs gprc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
+                         "lhax $RST, $RA, $RB", IIC_LdStLoad, []>;
 def LWZXTLS_32 : XForm_1<31,  23, (outs gprc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
                          "lwzx $RST, $RA, $RB", IIC_LdStLoad, []>;
+def LWAXTLS_32 : XForm_1<31, 341, (outs gprc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
+                         "lwax $RST, $RA, $RB", IIC_LdStLoad, []>;
 
 }
+let mayLoad = 1, Predicates = [HasFPU] in {
+def LFSXTLS : XForm_25<31, 535, (outs f4rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
+                       "lfsx $RST, $RA, $RB", IIC_LdStLFD, []>;
+def LFDXTLS : XForm_25<31, 599, (outs f8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
+                       "lfdx $RST, $RA, $RB", IIC_LdStLFD, []>;
+}
 
 let mayStore = 1 in {
 def STBXTLS : XForm_8<31, 215, (outs), (ins g8rc:$RST, ptr_rc_nor0:$RA, tlsreg:$RB),
@@ -761,6 +775,14 @@ def STWXTLS_32 : XForm_8<31, 151, (outs), (ins gprc:$RST, ptr_rc_nor0:$RA, tlsre
                          PPC970_DGroup_Cracked;
 
 }
+let mayStore = 1, Predicates = [HasFPU] in {
+def STFSXTLS : XForm_8<31, 663, (outs), (ins f4rc:$RST, ptr_rc_nor0:$RA, tlsreg:$RB),
+                       "stfsx $RST, $RA, $RB", IIC_LdStSTFD, []>,
+                       PPC970_DGroup_Cracked;
+def STFDXTLS : XForm_8<31, 727, (outs), (ins f8rc:$RST, ptr_rc_nor0:$RA, tlsreg:$RB),
+                       "stfdx $RST, $RA, $RB", IIC_LdStSTFD, []>,
+                       PPC970_DGroup_Cracked;
+}
 
 let isCommutable = 1 in
 defm ADDC8 : XOForm_1rc<31, 10, 0, (outs g8rc:$RT), (ins g8rc:$RA, g8rc:$RB),
@@ -834,12 +856,23 @@ def LBZXTLS_ : XForm_1<31,  87, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$
                       "lbzx $RST, $RA, $RB", IIC_LdStLoad, []>;
 def LHZXTLS_ : XForm_1<31, 279, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
                       "lhzx $RST, $RA, $RB", IIC_LdStLoad, []>;
+def LHAXTLS_ : XForm_1<31, 343, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
+                       "lhax $RST, $RA, $RB", IIC_LdStLoad, []>;
 def LWZXTLS_ : XForm_1<31,  23, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
                       "lwzx $RST, $RA, $RB", IIC_LdStLoad, []>;
+def LWAXTLS_ : XForm_1<31, 341, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
+                       "lwax $RST, $RA, $RB", IIC_LdStLoad, []>;
 def LDXTLS_  : XForm_1<31,  21, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
                       "ldx $RST, $RA, $RB", IIC_LdStLD, []>, isPPC64;
 }
 
+let mayLoad = 1, Predicates = [HasFPU] in {
+def LFSXTLS_ : XForm_25<31, 535, (outs f4rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
+                        "lfsx $RST, $RA, $RB", IIC_LdStLFD, []>;
+def LFDXTLS_ : XForm_25<31, 599, (outs f8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
+                        "lfdx $RST, $RA, $RB", IIC_LdStLFD, []>;
+}
+
 let mayStore = 1 in {
 def STBXTLS_ : XForm_8<31, 215, (outs), (ins g8rc:$RST, ptr_rc_nor0:$RA, tlsreg:$RB),
                       "stbx $RST, $RA, $RB", IIC_LdStStore, []>,
@@ -854,6 +887,15 @@ def STDXTLS_  : XForm_8<31, 149, (outs), (ins g8rc:$RST, ptr_rc_nor0:$RA, tlsreg
                        "stdx $RST, $RA, $RB", IIC_LdStSTD, []>, isPPC64,
                        PPC970_DGroup_Cracked;
 }
+
+let mayStore = 1, Predicates = [HasFPU] in {
+def STFSXTLS_ : XForm_8<31, 663, (outs), (ins f4rc:$RST, ptr_rc_nor0:$RA, tlsreg:$RB),
+                        "stfsx $RST, $RA, $RB", IIC_LdStSTFD, []>,
+                        PPC970_DGroup_Cracked;
+def STFDXTLS_ : XForm_8<31, 727, (outs), (ins f8rc:$RST, ptr_rc_nor0:$RA, tlsreg:$RB),
+                        "stfdx $RST, $RA, $RB", IIC_LdStSTFD, []>,
+                        PPC970_DGroup_Cracked;
+}
 }
 
 let isCommutable = 1 in {

diff  --git a/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll b/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll
index 20824e094661b..aae23265710ce 100644
--- a/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll
@@ -592,8 +592,8 @@ define <2 x double> @testDouble1(<2 x double> %a, double %b, i32 zeroext %idx1)
 ;
 ; CHECK-32-P10-LABEL: testDouble1:
 ; CHECK-32-P10:       # %bb.0: # %entry
-; CHECK-32-P10-NEXT:    rlwinm 3, 5, 3, 28, 28
 ; CHECK-32-P10-NEXT:    addi 4, 1, -16
+; CHECK-32-P10-NEXT:    rlwinm 3, 5, 3, 28, 28
 ; CHECK-32-P10-NEXT:    stxv 34, -16(1)
 ; CHECK-32-P10-NEXT:    stfdx 1, 4, 3
 ; CHECK-32-P10-NEXT:    lxv 34, -16(1)
@@ -650,8 +650,8 @@ define <2 x double> @testDouble2(<2 x double> %a, ptr %b, i32 zeroext %idx1, i32
 ; CHECK-32-P10-LABEL: testDouble2:
 ; CHECK-32-P10:       # %bb.0: # %entry
 ; CHECK-32-P10-NEXT:    lfd 0, 0(3)
-; CHECK-32-P10-NEXT:    rlwinm 4, 4, 3, 28, 28
 ; CHECK-32-P10-NEXT:    addi 6, 1, -32
+; CHECK-32-P10-NEXT:    rlwinm 4, 4, 3, 28, 28
 ; CHECK-32-P10-NEXT:    stxv 34, -32(1)
 ; CHECK-32-P10-NEXT:    rlwinm 5, 5, 3, 28, 28
 ; CHECK-32-P10-NEXT:    stfdx 0, 6, 4
@@ -723,8 +723,8 @@ define <2 x double> @testDouble3(<2 x double> %a, ptr %b, i32 zeroext %idx1, i32
 ; CHECK-32-P10-LABEL: testDouble3:
 ; CHECK-32-P10:       # %bb.0: # %entry
 ; CHECK-32-P10-NEXT:    plfd 0, 65536(3), 0
-; CHECK-32-P10-NEXT:    rlwinm 4, 4, 3, 28, 28
 ; CHECK-32-P10-NEXT:    addi 6, 1, -32
+; CHECK-32-P10-NEXT:    rlwinm 4, 4, 3, 28, 28
 ; CHECK-32-P10-NEXT:    stxv 34, -32(1)
 ; CHECK-32-P10-NEXT:    rlwinm 5, 5, 3, 28, 28
 ; CHECK-32-P10-NEXT:    stfdx 0, 6, 4

diff  --git a/llvm/test/CodeGen/PowerPC/tls-pie-xform.ll b/llvm/test/CodeGen/PowerPC/tls-pie-xform.ll
index a6619a58b241f..e787bb4a682fe 100644
--- a/llvm/test/CodeGen/PowerPC/tls-pie-xform.ll
+++ b/llvm/test/CodeGen/PowerPC/tls-pie-xform.ll
@@ -5,6 +5,8 @@
 @var_short = external thread_local local_unnamed_addr global i16, align 2
 @var_int = external thread_local local_unnamed_addr global i32, align 4
 @var_long_long = external thread_local local_unnamed_addr global i64, align 8
+ at var_float = external thread_local local_unnamed_addr global float, align 4
+ at var_double = external thread_local local_unnamed_addr global double, align 8
 
 define dso_local zeroext i8 @test_char_one() {
 ; CHECK-LABEL: test_char_one:
@@ -53,6 +55,18 @@ define dso_local signext i16 @test_short_one() {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addis 3, 2, var_short at got@tprel at ha
 ; CHECK-NEXT:    ld 3, var_short at got@tprel at l(3)
+; CHECK-NEXT:    lhax 3, 3, var_short at tls
+; CHECK-NEXT:    blr
+entry:
+  %0 = load i16, ptr @var_short, align 2, !tbaa !7
+  ret i16 %0
+}
+
+define dso_local zeroext i16 @test_short_one_zeroext() {
+; CHECK-LABEL: test_short_one_zeroext:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addis 3, 2, var_short at got@tprel at ha
+; CHECK-NEXT:    ld 3, var_short at got@tprel at l(3)
 ; CHECK-NEXT:    lhzx 3, 3, var_short at tls
 ; CHECK-NEXT:    blr
 entry:
@@ -95,6 +109,18 @@ define dso_local signext i32 @test_int_one() {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addis 3, 2, var_int at got@tprel at ha
 ; CHECK-NEXT:    ld 3, var_int at got@tprel at l(3)
+; CHECK-NEXT:    lwax 3, 3, var_int at tls
+; CHECK-NEXT:    blr
+entry:
+  %0 = load i32, ptr @var_int, align 4, !tbaa !9
+  ret i32 %0
+}
+
+define dso_local zeroext i32 @test_int_one_zeroext() {
+; CHECK-LABEL: test_int_one_zeroext:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addis 3, 2, var_int at got@tprel at ha
+; CHECK-NEXT:    ld 3, var_int at got@tprel at l(3)
 ; CHECK-NEXT:    lwzx 3, 3, var_int at tls
 ; CHECK-NEXT:    blr
 entry:
@@ -172,6 +198,54 @@ entry:
   ret i64 %add
 }
 
+define float @test_float_one() {
+; CHECK-LABEL: test_float_one:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addis 3, 2, var_float at got@tprel at ha
+; CHECK-NEXT:    ld 3, var_float at got@tprel at l(3)
+; CHECK-NEXT:    lfsx 1, 3, var_float at tls
+; CHECK-NEXT:    blr
+entry:
+  %0 = load float, ptr @var_float, align 4
+  ret float %0
+}
+
+define void @test_float_two(float %a) {
+; CHECK-LABEL: test_float_two:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addis 3, 2, var_float at got@tprel at ha
+; CHECK-NEXT:    ld 3, var_float at got@tprel at l(3)
+; CHECK-NEXT:    stfsx 1, 3, var_float at tls
+; CHECK-NEXT:    blr
+entry:
+  store float %a, ptr @var_float, align 4
+  ret void
+}
+
+define double @test_double_one() {
+; CHECK-LABEL: test_double_one:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addis 3, 2, var_double at got@tprel at ha
+; CHECK-NEXT:    ld 3, var_double at got@tprel at l(3)
+; CHECK-NEXT:    lfdx 1, 3, var_double at tls
+; CHECK-NEXT:    blr
+entry:
+  %0 = load double, ptr @var_double, align 8
+  ret double %0
+}
+
+define void @test_double_two(double %a) {
+; CHECK-LABEL: test_double_two:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addis 3, 2, var_double at got@tprel at ha
+; CHECK-NEXT:    ld 3, var_double at got@tprel at l(3)
+; CHECK-NEXT:    stfdx 1, 3, var_double at tls
+; CHECK-NEXT:    blr
+entry:
+  store double %a, ptr @var_double, align 8
+  ret void
+}
+
 !llvm.module.flags = !{!0, !1, !2}
 
 !0 = !{i32 1, !"wchar_size", i32 4}

diff  --git a/llvm/test/MC/PowerPC/ppc64-tls-relocs-01.s b/llvm/test/MC/PowerPC/ppc64-tls-relocs-01.s
index e9e8466f86b17..987c4610c015e 100644
--- a/llvm/test/MC/PowerPC/ppc64-tls-relocs-01.s
+++ b/llvm/test/MC/PowerPC/ppc64-tls-relocs-01.s
@@ -7,7 +7,9 @@
         addis 3, 2, t at got@tprel at ha
         ld 3, t at got@tprel at l(3)
         lwzx 4, 3, t at tls
+        lwax 4, 3, t at tls
         lhzx 4, 3, t at tls
+        lhax 4, 3, t at tls
         lbzx 4, 3, t at tls
         ldx 4, 3, t at tls
         stbx 4, 3, t at tls
@@ -38,4 +40,6 @@ t:
 # CHECK-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TLS t 0x0
 # CHECK-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TLS t 0x0
 # CHECK-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TLS t 0x0
+# CHECK-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TLS t 0x0
+# CHECK-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TLS t 0x0
 # CHECK-NEXT:   }

diff  --git a/llvm/test/MC/PowerPC/ppc64-tls-relocs-double-01.s b/llvm/test/MC/PowerPC/ppc64-tls-relocs-double-01.s
new file mode 100644
index 0000000000000..7ffa426c86701
--- /dev/null
+++ b/llvm/test/MC/PowerPC/ppc64-tls-relocs-double-01.s
@@ -0,0 +1,30 @@
+# RUN: llvm-mc -triple=powerpc64-unknown-linux-gnu -filetype=obj %s | \
+# RUN: llvm-readobj -r - | FileCheck %s
+
+        .text
+        addis 3, 13, myDouble at tprel@ha
+        addi 3, 3, myDouble at tprel@l
+        addis 3, 2, myDouble at got@tprel at ha
+        ld 3, myDouble at got@tprel at l(3)
+        lfdx 4, 3, myDouble at tls
+        stfdx 4, 3, myDouble at tls
+        .type myDouble, at object
+        .section .tbss,"awT", at nobits
+        .globl myDouble
+        .align 2
+
+myDouble:
+	.quad	0
+	.size	myDouble, 8
+
+# Check for a pair of R_PPC64_TPREL16_HA / R_PPC64_TPREL16_LO relocs
+# against the thread-local symbol 'myDouble'.
+# CHECK:      Relocations [
+# CHECK:        Section ({{[0-9]+}}) .rela.text {
+# CHECK-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TPREL16_HA myDouble
+# CHECK-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TPREL16_LO myDouble
+# CHECK-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_GOT_TPREL16_HA myDouble 0x0
+# CHECK-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_GOT_TPREL16_LO_DS myDouble 0x0
+# CHECK-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TLS myDouble 0x0
+# CHECK-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TLS myDouble 0x0
+# CHECK-NEXT:   }

diff  --git a/llvm/test/MC/PowerPC/ppc64-tls-relocs-float-01.s b/llvm/test/MC/PowerPC/ppc64-tls-relocs-float-01.s
new file mode 100644
index 0000000000000..a6c6fd5815329
--- /dev/null
+++ b/llvm/test/MC/PowerPC/ppc64-tls-relocs-float-01.s
@@ -0,0 +1,30 @@
+# RUN: llvm-mc -triple=powerpc64-unknown-linux-gnu -filetype=obj %s | \
+# RUN: llvm-readobj -r - | FileCheck %s
+
+        .text
+        addis 3, 13, myFloat at tprel@ha
+        addi 3, 3, myFloat at tprel@l
+        addis 3, 2, myFloat at got@tprel at ha
+        ld 3, myFloat at got@tprel at l(3)
+        lfsx 4, 3, myFloat at tls
+        stfsx 4, 3, myFloat at tls
+        .type myFloat, at object
+        .section .tbss,"awT", at nobits
+        .globl myFloat
+        .align 2
+
+myFloat:
+	.long	0
+	.size	myFloat, 4
+
+# Check for a pair of R_PPC64_TPREL16_HA / R_PPC64_TPREL16_LO relocs
+# against the thread-local symbol 'myFloat'.
+# CHECK:      Relocations [
+# CHECK:        Section ({{[0-9]+}}) .rela.text {
+# CHECK-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TPREL16_HA myFloat
+# CHECK-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TPREL16_LO myFloat
+# CHECK-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_GOT_TPREL16_HA myFloat 0x0
+# CHECK-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_GOT_TPREL16_LO_DS myFloat 0x0
+# CHECK-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TLS myFloat 0x0
+# CHECK-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TLS myFloat 0x0
+# CHECK-NEXT:   }


        


More information about the llvm-commits mailing list