[llvm] 11b71ad - [PowerPC][TLS] Add additional TLS X-Form loads/store instructions
Amy Kwan via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 27 09:33:45 PDT 2023
Author: Amy Kwan
Date: 2023-06-27T11:33:38-05:00
New Revision: 11b71ade51e0d1f90f1c68a7552a11f7e85eace1
URL: https://github.com/llvm/llvm-project/commit/11b71ade51e0d1f90f1c68a7552a11f7e85eace1
DIFF: https://github.com/llvm/llvm-project/commit/11b71ade51e0d1f90f1c68a7552a11f7e85eace1.diff
LOG: [PowerPC][TLS] Add additional TLS X-Form loads/store instructions
This patch is a follow up to D43315, and adds the following new load/store
TLS specific instructions for integer and floating point scalar types:
```
LHAXTLS
LWAXTLS
LHAXTLS_32
LWAXTLS_32
LFSXTLS
LFDXTLS
STFSXTLS
STFDXTLS
```
These instructions can be used to optimized TLS sequences when D-Form
loads/stores follow an ADD_TLS instruction.
Duplicate versions of these instructions are also added within an isAsmParserOnly=1
block (similar to D47382) to allow llvm-mc to assemble these instructions.
Differential Revision: https://reviews.llvm.org/D153645
Added:
llvm/test/MC/PowerPC/ppc64-tls-relocs-double-01.s
llvm/test/MC/PowerPC/ppc64-tls-relocs-float-01.s
Modified:
llvm/lib/Target/PowerPC/P10InstrResources.td
llvm/lib/Target/PowerPC/P9InstrResources.td
llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
llvm/lib/Target/PowerPC/PPCInstr64Bit.td
llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll
llvm/test/CodeGen/PowerPC/tls-pie-xform.ll
llvm/test/MC/PowerPC/ppc64-tls-relocs-01.s
Removed:
################################################################################
diff --git a/llvm/lib/Target/PowerPC/P10InstrResources.td b/llvm/lib/Target/PowerPC/P10InstrResources.td
index b96fdb00306d1..6fe23699e93e2 100644
--- a/llvm/lib/Target/PowerPC/P10InstrResources.td
+++ b/llvm/lib/Target/PowerPC/P10InstrResources.td
@@ -1296,11 +1296,9 @@ def : InstRW<[P10W_LD_6C, P10W_DISP_ANY, P10LD_Read],
LD, LDtoc, LDtocBA, LDtocCPT, LDtocJTI, LDtocL, SPILLTOVSR_LD,
LDBRX,
DFLOADf32, DFLOADf64, LFD,
- LFDX, XFLOADf32, XFLOADf64,
LFIWAX, LIWAX,
LFIWZX, LIWZX,
LHA, LHA8,
- LHAX, LHAX8,
LHBRX, LHBRX8,
LHZ, LHZ8,
LVEBX,
@@ -1309,7 +1307,7 @@ def : InstRW<[P10W_LD_6C, P10W_DISP_ANY, P10LD_Read],
LVX,
LVXL,
LWA, LWA_32,
- LWAX, LWAX_32,
+ LWAX, LWAXTLS, LWAXTLS_, LWAXTLS_32, LWAX_32,
LWBRX, LWBRX8,
LWZ, LWZ8, LWZtoc, LWZtocL,
LXSD,
@@ -1340,6 +1338,8 @@ def : InstRW<[P10W_LD_6C, P10W_DISP_ANY, P10LD_Read, P10LD_Read],
ICBT,
LBZX, LBZX8, LBZXTLS, LBZXTLS_, LBZXTLS_32,
LDX, LDXTLS, LDXTLS_, SPILLTOVSR_LDX,
+ LFDX, LFDXTLS, LFDXTLS_, XFLOADf32, XFLOADf64,
+ LHAX, LHAX8, LHAXTLS, LHAXTLS_, LHAXTLS_32,
LHZX, LHZX8, LHZXTLS, LHZXTLS_, LHZXTLS_32,
LWZX, LWZX8, LWZXTLS, LWZXTLS_, LWZXTLS_32,
LXVL,
@@ -1442,11 +1442,17 @@ def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10LD_Read],
def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_F2_4C],
(instrs
LFS,
- LFSX,
LXSSP,
LXSSPX
)>;
+// 2-way crack instructions
+// 6 Cycles Load operations, and 4 Cycles ALU2 operations, 2 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_F2_4C],
+ (instrs
+ LFSX, LFSXTLS, LFSXTLS_
+)>;
+
// 4-way crack instructions
// 6 Cycles Load operations, 4 Cycles ALU2 operations, 3 Cycles Simple Fixed-point (SFX) operations, and 3 Cycles ALU operations, 1 input operands
def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_F2_4C, P10W_SX_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY],
@@ -1823,12 +1829,10 @@ def : InstRW<[P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read],
DFSTOREf32, DFSTOREf64, STFD,
STFDU,
STFDUX,
- STFDX,
STFIWX, STIWX,
STFS,
STFSU,
STFSUX,
- STFSX,
STH, STH8,
STHBRX,
STHU, STHU8,
@@ -1867,6 +1871,8 @@ def : InstRW<[P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read, P10ST_Read],
CP_COPY, CP_COPY8,
STBX, STBX8, STBXTLS, STBXTLS_, STBXTLS_32,
SPILLTOVSR_STX, STDX, STDXTLS, STDXTLS_,
+ STFDX, STFDXTLS, STFDXTLS_,
+ STFSX, STFSXTLS, STFSXTLS_,
STHX, STHX8, STHXTLS, STHXTLS_, STHXTLS_32,
STWX, STWX8, STWXTLS, STWXTLS_, STWXTLS_32,
STXVL,
diff --git a/llvm/lib/Target/PowerPC/P9InstrResources.td b/llvm/lib/Target/PowerPC/P9InstrResources.td
index 62275722a9635..395999c7242af 100644
--- a/llvm/lib/Target/PowerPC/P9InstrResources.td
+++ b/llvm/lib/Target/PowerPC/P9InstrResources.td
@@ -765,6 +765,7 @@ def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_3SLOTS_1C],
(instrs
LFIWZX,
LFDX,
+ (instregex "LFDXTLS?(_)?$"),
LFD
)>;
@@ -815,9 +816,9 @@ def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C,
def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
DISP_1C, DISP_1C],
(instrs
- (instregex "LHA(X)?(8)?$"),
+ (instregex "LHA(X)?(TLS)?(8)?(_32)?(_)?$"),
(instregex "CP_PASTE(8)?_rec$"),
- (instregex "LWA(X)?(_32)?$"),
+ (instregex "LWA(X)?(TLS)?(_32)?(_)?$"),
TCHECK
)>;
@@ -850,6 +851,7 @@ def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C,
DISP_3SLOTS_1C, DISP_3SLOTS_1C],
(instrs
LFSX,
+ (instregex "LFSXTLS?(_)?$"),
LFS
)>;
@@ -891,7 +893,7 @@ def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C,
// all three dispatches for the superslice.
def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_3SLOTS_1C],
(instrs
- (instregex "STF(S|D|IWX|SX|DX)$"),
+ (instregex "STF(S|D|IWX|SX|DX|SXTLS|DXTLS|SXTLS_|DXTLS_)$"),
(instregex "STXS(D|DX|SPX|IWX|IBX|IHX|SP)(v)?$"),
(instregex "STW(8)?$"),
(instregex "(D|X)FSTORE(f32|f64)$"),
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 6748d97345d48..96076219760dc 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -739,6 +739,14 @@ bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) {
Opcode = PPC::STDXTLS;
break;
}
+ case MVT::f32: {
+ Opcode = PPC::STFSXTLS;
+ break;
+ }
+ case MVT::f64: {
+ Opcode = PPC::STFDXTLS;
+ break;
+ }
}
SDValue Chain = ST->getChain();
SDVTList VTs = ST->getVTList();
@@ -763,6 +771,7 @@ bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) {
SDLoc dl(LD);
EVT MemVT = LD->getMemoryVT();
EVT RegVT = LD->getValueType(0);
+ bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
unsigned Opcode;
switch (MemVT.getSimpleVT().SimpleTy) {
default:
@@ -772,17 +781,31 @@ bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) {
break;
}
case MVT::i16: {
- Opcode = (RegVT == MVT::i32) ? PPC::LHZXTLS_32 : PPC::LHZXTLS;
+ if (RegVT == MVT::i32)
+ Opcode = isSExt ? PPC::LHAXTLS_32 : PPC::LHZXTLS_32;
+ else
+ Opcode = isSExt ? PPC::LHAXTLS : PPC::LHZXTLS;
break;
}
case MVT::i32: {
- Opcode = (RegVT == MVT::i32) ? PPC::LWZXTLS_32 : PPC::LWZXTLS;
+ if (RegVT == MVT::i32)
+ Opcode = isSExt ? PPC::LWAXTLS_32 : PPC::LWZXTLS_32;
+ else
+ Opcode = isSExt ? PPC::LWAXTLS : PPC::LWZXTLS;
break;
}
case MVT::i64: {
Opcode = PPC::LDXTLS;
break;
}
+ case MVT::f32: {
+ Opcode = PPC::LFSXTLS;
+ break;
+ }
+ case MVT::f64: {
+ Opcode = PPC::LFDXTLS;
+ break;
+ }
}
SDValue Chain = LD->getChain();
SDVTList VTs = LD->getVTList();
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index f4f058aff237f..fd44efa1b3f4e 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -724,18 +724,32 @@ def LBZXTLS : XForm_1<31, 87, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$R
"lbzx $RST, $RA, $RB", IIC_LdStLoad, []>;
def LHZXTLS : XForm_1<31, 279, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
"lhzx $RST, $RA, $RB", IIC_LdStLoad, []>;
+def LHAXTLS : XForm_1<31, 343, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
+ "lhax $RST, $RA, $RB", IIC_LdStLoad, []>;
def LWZXTLS : XForm_1<31, 23, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
"lwzx $RST, $RA, $RB", IIC_LdStLoad, []>;
+def LWAXTLS : XForm_1<31, 341, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
+ "lwax $RST, $RA, $RB", IIC_LdStLoad, []>;
def LDXTLS : XForm_1<31, 21, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
"ldx $RST, $RA, $RB", IIC_LdStLD, []>, isPPC64;
def LBZXTLS_32 : XForm_1<31, 87, (outs gprc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
"lbzx $RST, $RA, $RB", IIC_LdStLoad, []>;
def LHZXTLS_32 : XForm_1<31, 279, (outs gprc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
"lhzx $RST, $RA, $RB", IIC_LdStLoad, []>;
+def LHAXTLS_32 : XForm_1<31, 343, (outs gprc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
+ "lhax $RST, $RA, $RB", IIC_LdStLoad, []>;
def LWZXTLS_32 : XForm_1<31, 23, (outs gprc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
"lwzx $RST, $RA, $RB", IIC_LdStLoad, []>;
+def LWAXTLS_32 : XForm_1<31, 341, (outs gprc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
+ "lwax $RST, $RA, $RB", IIC_LdStLoad, []>;
}
+let mayLoad = 1, Predicates = [HasFPU] in {
+def LFSXTLS : XForm_25<31, 535, (outs f4rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
+ "lfsx $RST, $RA, $RB", IIC_LdStLFD, []>;
+def LFDXTLS : XForm_25<31, 599, (outs f8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
+ "lfdx $RST, $RA, $RB", IIC_LdStLFD, []>;
+}
let mayStore = 1 in {
def STBXTLS : XForm_8<31, 215, (outs), (ins g8rc:$RST, ptr_rc_nor0:$RA, tlsreg:$RB),
@@ -761,6 +775,14 @@ def STWXTLS_32 : XForm_8<31, 151, (outs), (ins gprc:$RST, ptr_rc_nor0:$RA, tlsre
PPC970_DGroup_Cracked;
}
+let mayStore = 1, Predicates = [HasFPU] in {
+def STFSXTLS : XForm_8<31, 663, (outs), (ins f4rc:$RST, ptr_rc_nor0:$RA, tlsreg:$RB),
+ "stfsx $RST, $RA, $RB", IIC_LdStSTFD, []>,
+ PPC970_DGroup_Cracked;
+def STFDXTLS : XForm_8<31, 727, (outs), (ins f8rc:$RST, ptr_rc_nor0:$RA, tlsreg:$RB),
+ "stfdx $RST, $RA, $RB", IIC_LdStSTFD, []>,
+ PPC970_DGroup_Cracked;
+}
let isCommutable = 1 in
defm ADDC8 : XOForm_1rc<31, 10, 0, (outs g8rc:$RT), (ins g8rc:$RA, g8rc:$RB),
@@ -834,12 +856,23 @@ def LBZXTLS_ : XForm_1<31, 87, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$
"lbzx $RST, $RA, $RB", IIC_LdStLoad, []>;
def LHZXTLS_ : XForm_1<31, 279, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
"lhzx $RST, $RA, $RB", IIC_LdStLoad, []>;
+def LHAXTLS_ : XForm_1<31, 343, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
+ "lhax $RST, $RA, $RB", IIC_LdStLoad, []>;
def LWZXTLS_ : XForm_1<31, 23, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
"lwzx $RST, $RA, $RB", IIC_LdStLoad, []>;
+def LWAXTLS_ : XForm_1<31, 341, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
+ "lwax $RST, $RA, $RB", IIC_LdStLoad, []>;
def LDXTLS_ : XForm_1<31, 21, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
"ldx $RST, $RA, $RB", IIC_LdStLD, []>, isPPC64;
}
+let mayLoad = 1, Predicates = [HasFPU] in {
+def LFSXTLS_ : XForm_25<31, 535, (outs f4rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
+ "lfsx $RST, $RA, $RB", IIC_LdStLFD, []>;
+def LFDXTLS_ : XForm_25<31, 599, (outs f8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
+ "lfdx $RST, $RA, $RB", IIC_LdStLFD, []>;
+}
+
let mayStore = 1 in {
def STBXTLS_ : XForm_8<31, 215, (outs), (ins g8rc:$RST, ptr_rc_nor0:$RA, tlsreg:$RB),
"stbx $RST, $RA, $RB", IIC_LdStStore, []>,
@@ -854,6 +887,15 @@ def STDXTLS_ : XForm_8<31, 149, (outs), (ins g8rc:$RST, ptr_rc_nor0:$RA, tlsreg
"stdx $RST, $RA, $RB", IIC_LdStSTD, []>, isPPC64,
PPC970_DGroup_Cracked;
}
+
+let mayStore = 1, Predicates = [HasFPU] in {
+def STFSXTLS_ : XForm_8<31, 663, (outs), (ins f4rc:$RST, ptr_rc_nor0:$RA, tlsreg:$RB),
+ "stfsx $RST, $RA, $RB", IIC_LdStSTFD, []>,
+ PPC970_DGroup_Cracked;
+def STFDXTLS_ : XForm_8<31, 727, (outs), (ins f8rc:$RST, ptr_rc_nor0:$RA, tlsreg:$RB),
+ "stfdx $RST, $RA, $RB", IIC_LdStSTFD, []>,
+ PPC970_DGroup_Cracked;
+}
}
let isCommutable = 1 in {
diff --git a/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll b/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll
index 20824e094661b..aae23265710ce 100644
--- a/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll
@@ -592,8 +592,8 @@ define <2 x double> @testDouble1(<2 x double> %a, double %b, i32 zeroext %idx1)
;
; CHECK-32-P10-LABEL: testDouble1:
; CHECK-32-P10: # %bb.0: # %entry
-; CHECK-32-P10-NEXT: rlwinm 3, 5, 3, 28, 28
; CHECK-32-P10-NEXT: addi 4, 1, -16
+; CHECK-32-P10-NEXT: rlwinm 3, 5, 3, 28, 28
; CHECK-32-P10-NEXT: stxv 34, -16(1)
; CHECK-32-P10-NEXT: stfdx 1, 4, 3
; CHECK-32-P10-NEXT: lxv 34, -16(1)
@@ -650,8 +650,8 @@ define <2 x double> @testDouble2(<2 x double> %a, ptr %b, i32 zeroext %idx1, i32
; CHECK-32-P10-LABEL: testDouble2:
; CHECK-32-P10: # %bb.0: # %entry
; CHECK-32-P10-NEXT: lfd 0, 0(3)
-; CHECK-32-P10-NEXT: rlwinm 4, 4, 3, 28, 28
; CHECK-32-P10-NEXT: addi 6, 1, -32
+; CHECK-32-P10-NEXT: rlwinm 4, 4, 3, 28, 28
; CHECK-32-P10-NEXT: stxv 34, -32(1)
; CHECK-32-P10-NEXT: rlwinm 5, 5, 3, 28, 28
; CHECK-32-P10-NEXT: stfdx 0, 6, 4
@@ -723,8 +723,8 @@ define <2 x double> @testDouble3(<2 x double> %a, ptr %b, i32 zeroext %idx1, i32
; CHECK-32-P10-LABEL: testDouble3:
; CHECK-32-P10: # %bb.0: # %entry
; CHECK-32-P10-NEXT: plfd 0, 65536(3), 0
-; CHECK-32-P10-NEXT: rlwinm 4, 4, 3, 28, 28
; CHECK-32-P10-NEXT: addi 6, 1, -32
+; CHECK-32-P10-NEXT: rlwinm 4, 4, 3, 28, 28
; CHECK-32-P10-NEXT: stxv 34, -32(1)
; CHECK-32-P10-NEXT: rlwinm 5, 5, 3, 28, 28
; CHECK-32-P10-NEXT: stfdx 0, 6, 4
diff --git a/llvm/test/CodeGen/PowerPC/tls-pie-xform.ll b/llvm/test/CodeGen/PowerPC/tls-pie-xform.ll
index a6619a58b241f..e787bb4a682fe 100644
--- a/llvm/test/CodeGen/PowerPC/tls-pie-xform.ll
+++ b/llvm/test/CodeGen/PowerPC/tls-pie-xform.ll
@@ -5,6 +5,8 @@
@var_short = external thread_local local_unnamed_addr global i16, align 2
@var_int = external thread_local local_unnamed_addr global i32, align 4
@var_long_long = external thread_local local_unnamed_addr global i64, align 8
+ at var_float = external thread_local local_unnamed_addr global float, align 4
+ at var_double = external thread_local local_unnamed_addr global double, align 8
define dso_local zeroext i8 @test_char_one() {
; CHECK-LABEL: test_char_one:
@@ -53,6 +55,18 @@ define dso_local signext i16 @test_short_one() {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addis 3, 2, var_short at got@tprel at ha
; CHECK-NEXT: ld 3, var_short at got@tprel at l(3)
+; CHECK-NEXT: lhax 3, 3, var_short at tls
+; CHECK-NEXT: blr
+entry:
+ %0 = load i16, ptr @var_short, align 2, !tbaa !7
+ ret i16 %0
+}
+
+define dso_local zeroext i16 @test_short_one_zeroext() {
+; CHECK-LABEL: test_short_one_zeroext:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addis 3, 2, var_short at got@tprel at ha
+; CHECK-NEXT: ld 3, var_short at got@tprel at l(3)
; CHECK-NEXT: lhzx 3, 3, var_short at tls
; CHECK-NEXT: blr
entry:
@@ -95,6 +109,18 @@ define dso_local signext i32 @test_int_one() {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addis 3, 2, var_int at got@tprel at ha
; CHECK-NEXT: ld 3, var_int at got@tprel at l(3)
+; CHECK-NEXT: lwax 3, 3, var_int at tls
+; CHECK-NEXT: blr
+entry:
+ %0 = load i32, ptr @var_int, align 4, !tbaa !9
+ ret i32 %0
+}
+
+define dso_local zeroext i32 @test_int_one_zeroext() {
+; CHECK-LABEL: test_int_one_zeroext:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addis 3, 2, var_int at got@tprel at ha
+; CHECK-NEXT: ld 3, var_int at got@tprel at l(3)
; CHECK-NEXT: lwzx 3, 3, var_int at tls
; CHECK-NEXT: blr
entry:
@@ -172,6 +198,54 @@ entry:
ret i64 %add
}
+define float @test_float_one() {
+; CHECK-LABEL: test_float_one:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addis 3, 2, var_float at got@tprel at ha
+; CHECK-NEXT: ld 3, var_float at got@tprel at l(3)
+; CHECK-NEXT: lfsx 1, 3, var_float at tls
+; CHECK-NEXT: blr
+entry:
+ %0 = load float, ptr @var_float, align 4
+ ret float %0
+}
+
+define void @test_float_two(float %a) {
+; CHECK-LABEL: test_float_two:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addis 3, 2, var_float at got@tprel at ha
+; CHECK-NEXT: ld 3, var_float at got@tprel at l(3)
+; CHECK-NEXT: stfsx 1, 3, var_float at tls
+; CHECK-NEXT: blr
+entry:
+ store float %a, ptr @var_float, align 4
+ ret void
+}
+
+define double @test_double_one() {
+; CHECK-LABEL: test_double_one:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addis 3, 2, var_double at got@tprel at ha
+; CHECK-NEXT: ld 3, var_double at got@tprel at l(3)
+; CHECK-NEXT: lfdx 1, 3, var_double at tls
+; CHECK-NEXT: blr
+entry:
+ %0 = load double, ptr @var_double, align 8
+ ret double %0
+}
+
+define void @test_double_two(double %a) {
+; CHECK-LABEL: test_double_two:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addis 3, 2, var_double at got@tprel at ha
+; CHECK-NEXT: ld 3, var_double at got@tprel at l(3)
+; CHECK-NEXT: stfdx 1, 3, var_double at tls
+; CHECK-NEXT: blr
+entry:
+ store double %a, ptr @var_double, align 8
+ ret void
+}
+
!llvm.module.flags = !{!0, !1, !2}
!0 = !{i32 1, !"wchar_size", i32 4}
diff --git a/llvm/test/MC/PowerPC/ppc64-tls-relocs-01.s b/llvm/test/MC/PowerPC/ppc64-tls-relocs-01.s
index e9e8466f86b17..987c4610c015e 100644
--- a/llvm/test/MC/PowerPC/ppc64-tls-relocs-01.s
+++ b/llvm/test/MC/PowerPC/ppc64-tls-relocs-01.s
@@ -7,7 +7,9 @@
addis 3, 2, t at got@tprel at ha
ld 3, t at got@tprel at l(3)
lwzx 4, 3, t at tls
+ lwax 4, 3, t at tls
lhzx 4, 3, t at tls
+ lhax 4, 3, t at tls
lbzx 4, 3, t at tls
ldx 4, 3, t at tls
stbx 4, 3, t at tls
@@ -38,4 +40,6 @@ t:
# CHECK-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TLS t 0x0
# CHECK-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TLS t 0x0
# CHECK-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TLS t 0x0
+# CHECK-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TLS t 0x0
+# CHECK-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TLS t 0x0
# CHECK-NEXT: }
diff --git a/llvm/test/MC/PowerPC/ppc64-tls-relocs-double-01.s b/llvm/test/MC/PowerPC/ppc64-tls-relocs-double-01.s
new file mode 100644
index 0000000000000..7ffa426c86701
--- /dev/null
+++ b/llvm/test/MC/PowerPC/ppc64-tls-relocs-double-01.s
@@ -0,0 +1,30 @@
+# RUN: llvm-mc -triple=powerpc64-unknown-linux-gnu -filetype=obj %s | \
+# RUN: llvm-readobj -r - | FileCheck %s
+
+ .text
+ addis 3, 13, myDouble at tprel@ha
+ addi 3, 3, myDouble at tprel@l
+ addis 3, 2, myDouble at got@tprel at ha
+ ld 3, myDouble at got@tprel at l(3)
+ lfdx 4, 3, myDouble at tls
+ stfdx 4, 3, myDouble at tls
+ .type myDouble, at object
+ .section .tbss,"awT", at nobits
+ .globl myDouble
+ .align 2
+
+myDouble:
+ .quad 0
+ .size myDouble, 8
+
+# Check for a pair of R_PPC64_TPREL16_HA / R_PPC64_TPREL16_LO relocs
+# against the thread-local symbol 'myDouble'.
+# CHECK: Relocations [
+# CHECK: Section ({{[0-9]+}}) .rela.text {
+# CHECK-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TPREL16_HA myDouble
+# CHECK-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TPREL16_LO myDouble
+# CHECK-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_GOT_TPREL16_HA myDouble 0x0
+# CHECK-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_GOT_TPREL16_LO_DS myDouble 0x0
+# CHECK-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TLS myDouble 0x0
+# CHECK-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TLS myDouble 0x0
+# CHECK-NEXT: }
diff --git a/llvm/test/MC/PowerPC/ppc64-tls-relocs-float-01.s b/llvm/test/MC/PowerPC/ppc64-tls-relocs-float-01.s
new file mode 100644
index 0000000000000..a6c6fd5815329
--- /dev/null
+++ b/llvm/test/MC/PowerPC/ppc64-tls-relocs-float-01.s
@@ -0,0 +1,30 @@
+# RUN: llvm-mc -triple=powerpc64-unknown-linux-gnu -filetype=obj %s | \
+# RUN: llvm-readobj -r - | FileCheck %s
+
+ .text
+ addis 3, 13, myFloat at tprel@ha
+ addi 3, 3, myFloat at tprel@l
+ addis 3, 2, myFloat at got@tprel at ha
+ ld 3, myFloat at got@tprel at l(3)
+ lfsx 4, 3, myFloat at tls
+ stfsx 4, 3, myFloat at tls
+ .type myFloat, at object
+ .section .tbss,"awT", at nobits
+ .globl myFloat
+ .align 2
+
+myFloat:
+ .long 0
+ .size myFloat, 4
+
+# Check for a pair of R_PPC64_TPREL16_HA / R_PPC64_TPREL16_LO relocs
+# against the thread-local symbol 'myFloat'.
+# CHECK: Relocations [
+# CHECK: Section ({{[0-9]+}}) .rela.text {
+# CHECK-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TPREL16_HA myFloat
+# CHECK-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TPREL16_LO myFloat
+# CHECK-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_GOT_TPREL16_HA myFloat 0x0
+# CHECK-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_GOT_TPREL16_LO_DS myFloat 0x0
+# CHECK-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TLS myFloat 0x0
+# CHECK-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TLS myFloat 0x0
+# CHECK-NEXT: }
More information about the llvm-commits
mailing list