[llvm] r327635 - [PowerPC] Optimize TLS initial-exec sequence to use X-Form loads/stores
Zaara Syeda via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 15 08:34:41 PDT 2018
Author: syzaara
Date: Thu Mar 15 08:34:41 2018
New Revision: 327635
URL: http://llvm.org/viewvc/llvm-project?rev=327635&view=rev
Log:
[PowerPC] Optimize TLS initial-exec sequence to use X-Form loads/stores
This patch adds new load/store instructions for integer scalar types
which can be used for X-Form when fed by add with an @tls relocation.
Differential Revision: https://reviews.llvm.org/D43315
Added:
llvm/trunk/test/CodeGen/PowerPC/tls-pie-xform.ll
Modified:
llvm/trunk/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
llvm/trunk/lib/Target/PowerPC/PPCInstr64Bit.td
Modified: llvm/trunk/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelDAGToDAG.cpp?rev=327635&r1=327634&r2=327635&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelDAGToDAG.cpp Thu Mar 15 08:34:41 2018
@@ -101,6 +101,11 @@ static cl::opt<bool> EnableBranchHint(
cl::desc("Enable static hinting of branches on ppc"),
cl::Hidden);
+static cl::opt<bool> EnableTLSOpt(
+ "ppc-tls-opt", cl::init(true),
+ cl::desc("Enable tls optimization peephole"),
+ cl::Hidden);
+
enum ICmpInGPRType { ICGPR_All, ICGPR_None, ICGPR_I32, ICGPR_I64,
ICGPR_NonExtIn, ICGPR_Zext, ICGPR_Sext, ICGPR_ZextI32,
ICGPR_SextI32, ICGPR_ZextI64, ICGPR_SextI64 };
@@ -199,6 +204,14 @@ namespace {
bool tryBitPermutation(SDNode *N);
bool tryIntCompareInGPR(SDNode *N);
+ // tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into
+ // an X-Form load instruction with the offset being a relocation coming from
+ // the PPCISD::ADD_TLS.
+ bool tryTLSXFormLoad(LoadSDNode *N);
+ // tryTLSXFormStore - Convert an ISD::STORE fed by a PPCISD::ADD_TLS into
+ // an X-Form store instruction with the offset being a relocation coming from
+ // the PPCISD::ADD_TLS.
+ bool tryTLSXFormStore(StoreSDNode *N);
/// SelectCC - Select a comparison of the specified values with the
/// specified condition code, returning the CR# of the expression.
SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
@@ -582,6 +595,90 @@ bool PPCDAGToDAGISel::isRotateAndMask(SD
return false;
}
+bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) {
+ SDValue Base = ST->getBasePtr();
+ if (Base.getOpcode() != PPCISD::ADD_TLS)
+ return false;
+ SDValue Offset = ST->getOffset();
+ if (!Offset.isUndef())
+ return false;
+
+ SDLoc dl(ST);
+ EVT MemVT = ST->getMemoryVT();
+ EVT RegVT = ST->getValue().getValueType();
+
+ unsigned Opcode;
+ switch (MemVT.getSimpleVT().SimpleTy) {
+ default:
+ return false;
+ case MVT::i8: {
+ Opcode = (RegVT == MVT::i32) ? PPC::STBXTLS_32 : PPC::STBXTLS;
+ break;
+ }
+ case MVT::i16: {
+ Opcode = (RegVT == MVT::i32) ? PPC::STHXTLS_32 : PPC::STHXTLS;
+ break;
+ }
+ case MVT::i32: {
+ Opcode = (RegVT == MVT::i32) ? PPC::STWXTLS_32 : PPC::STWXTLS;
+ break;
+ }
+ case MVT::i64: {
+ Opcode = PPC::STDXTLS;
+ break;
+ }
+ }
+ SDValue Chain = ST->getChain();
+ SDVTList VTs = ST->getVTList();
+ SDValue Ops[] = {ST->getValue(), Base.getOperand(0), Base.getOperand(1),
+ Chain};
+ SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
+ transferMemOperands(ST, MN);
+ ReplaceNode(ST, MN);
+ return true;
+}
+
+bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) {
+ SDValue Base = LD->getBasePtr();
+ if (Base.getOpcode() != PPCISD::ADD_TLS)
+ return false;
+ SDValue Offset = LD->getOffset();
+ if (!Offset.isUndef())
+ return false;
+
+ SDLoc dl(LD);
+ EVT MemVT = LD->getMemoryVT();
+ EVT RegVT = LD->getValueType(0);
+ unsigned Opcode;
+ switch (MemVT.getSimpleVT().SimpleTy) {
+ default:
+ return false;
+ case MVT::i8: {
+ Opcode = (RegVT == MVT::i32) ? PPC::LBZXTLS_32 : PPC::LBZXTLS;
+ break;
+ }
+ case MVT::i16: {
+ Opcode = (RegVT == MVT::i32) ? PPC::LHZXTLS_32 : PPC::LHZXTLS;
+ break;
+ }
+ case MVT::i32: {
+ Opcode = (RegVT == MVT::i32) ? PPC::LWZXTLS_32 : PPC::LWZXTLS;
+ break;
+ }
+ case MVT::i64: {
+ Opcode = PPC::LDXTLS;
+ break;
+ }
+ }
+ SDValue Chain = LD->getChain();
+ SDVTList VTs = LD->getVTList();
+ SDValue Ops[] = {Base.getOperand(0), Base.getOperand(1), Chain};
+ SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
+ transferMemOperands(LD, MN);
+ ReplaceNode(LD, MN);
+ return true;
+}
+
/// Turn an or of two masked values into the rotate left word immediate then
/// mask insert (rlwimi) instruction.
bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
@@ -3949,14 +4046,28 @@ void PPCDAGToDAGISel::Select(SDNode *N)
}
}
+ case ISD::STORE: {
+ // Change TLS initial-exec D-form stores to X-form stores.
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ if (EnableTLSOpt && PPCSubTarget->isELFv2ABI() &&
+ ST->getAddressingMode() != ISD::PRE_INC)
+ if (tryTLSXFormStore(ST))
+ return;
+ break;
+ }
case ISD::LOAD: {
// Handle preincrement loads.
LoadSDNode *LD = cast<LoadSDNode>(N);
EVT LoadedVT = LD->getMemoryVT();
// Normal loads are handled by code generated from the .td file.
- if (LD->getAddressingMode() != ISD::PRE_INC)
+ if (LD->getAddressingMode() != ISD::PRE_INC) {
+ // Change TLS initial-exec D-form loads to X-form loads.
+ if (EnableTLSOpt && PPCSubTarget->isELFv2ABI())
+ if (tryTLSXFormLoad(LD))
+ return;
break;
+ }
SDValue Offset = LD->getOffset();
if (Offset.getOpcode() == ISD::TargetConstant ||
Modified: llvm/trunk/lib/Target/PowerPC/PPCInstr64Bit.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstr64Bit.td?rev=327635&r1=327634&r2=327635&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCInstr64Bit.td (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCInstr64Bit.td Thu Mar 15 08:34:41 2018
@@ -499,7 +499,49 @@ defm ADD8 : XOForm_1r<31, 266, 0, (outs
def ADD8TLS : XOForm_1<31, 266, 0, (outs g8rc:$rT), (ins g8rc_nox0:$rA, tlsreg:$rB),
"add $rT, $rA, $rB", IIC_IntSimple,
[(set i64:$rT, (add i64:$rA, tglobaltlsaddr:$rB))]>;
-
+let mayLoad = 1 in {
+def LBZXTLS : XForm_1<31, 87, (outs g8rc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB),
+ "lbzx $rD, $rA, $rB", IIC_LdStLoad, []>;
+def LHZXTLS : XForm_1<31, 279, (outs g8rc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB),
+ "lhzx $rD, $rA, $rB", IIC_LdStLoad, []>;
+def LWZXTLS : XForm_1<31, 23, (outs g8rc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB),
+ "lwzx $rD, $rA, $rB", IIC_LdStLoad, []>;
+def LDXTLS : XForm_1<31, 21, (outs g8rc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB),
+ "ldx $rD, $rA, $rB", IIC_LdStLD, []>, isPPC64;
+def LBZXTLS_32 : XForm_1<31, 87, (outs gprc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB),
+ "lbzx $rD, $rA, $rB", IIC_LdStLoad, []>;
+def LHZXTLS_32 : XForm_1<31, 279, (outs gprc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB),
+ "lhzx $rD, $rA, $rB", IIC_LdStLoad, []>;
+def LWZXTLS_32 : XForm_1<31, 23, (outs gprc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB),
+ "lwzx $rD, $rA, $rB", IIC_LdStLoad, []>;
+
+}
+
+let mayStore = 1 in {
+def STBXTLS : XForm_8<31, 215, (outs), (ins g8rc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB),
+ "stbx $rS, $rA, $rB", IIC_LdStStore, []>,
+ PPC970_DGroup_Cracked;
+def STHXTLS : XForm_8<31, 407, (outs), (ins g8rc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB),
+ "sthx $rS, $rA, $rB", IIC_LdStStore, []>,
+ PPC970_DGroup_Cracked;
+def STWXTLS : XForm_8<31, 151, (outs), (ins g8rc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB),
+ "stwx $rS, $rA, $rB", IIC_LdStStore, []>,
+ PPC970_DGroup_Cracked;
+def STDXTLS : XForm_8<31, 149, (outs), (ins g8rc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB),
+ "stdx $rS, $rA, $rB", IIC_LdStSTD, []>, isPPC64,
+ PPC970_DGroup_Cracked;
+def STBXTLS_32 : XForm_8<31, 215, (outs), (ins gprc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB),
+ "stbx $rS, $rA, $rB", IIC_LdStStore, []>,
+ PPC970_DGroup_Cracked;
+def STHXTLS_32 : XForm_8<31, 407, (outs), (ins gprc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB),
+ "sthx $rS, $rA, $rB", IIC_LdStStore, []>,
+ PPC970_DGroup_Cracked;
+def STWXTLS_32 : XForm_8<31, 151, (outs), (ins gprc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB),
+ "stwx $rS, $rA, $rB", IIC_LdStStore, []>,
+ PPC970_DGroup_Cracked;
+
+}
+
let isCommutable = 1 in
defm ADDC8 : XOForm_1rc<31, 10, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
"addc", "$rT, $rA, $rB", IIC_IntGeneral,
Added: llvm/trunk/test/CodeGen/PowerPC/tls-pie-xform.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/tls-pie-xform.ll?rev=327635&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/tls-pie-xform.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/tls-pie-xform.ll Thu Mar 15 08:34:41 2018
@@ -0,0 +1,169 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-gnu-linux < %s | FileCheck %s -check-prefix=CHECK
+
+ at var_char = external thread_local local_unnamed_addr global i8, align 1
+ at var_short = external thread_local local_unnamed_addr global i16, align 2
+ at var_int = external thread_local local_unnamed_addr global i32, align 4
+ at var_long_long = external thread_local local_unnamed_addr global i64, align 8
+
+define dso_local zeroext i8 @test_char_one() {
+; CHECK-LABEL: test_char_one:
+; CHECK: # %bb.0: # %entry
+; CHECK: addis 3, 2, var_char at got@tprel at ha
+; CHECK-NEXT: ld 3, var_char at got@tprel at l(3)
+; CHECK-NEXT: lbzx 3, 3, var_char at tls
+entry:
+ %0 = load i8, i8* @var_char, align 1, !tbaa !4
+ ret i8 %0
+}
+
+define dso_local void @test_char_two(i32 signext %a) {
+; CHECK-LABEL: test_char_two:
+; CHECK: # %bb.0: # %entry
+; CHECK: addis 4, 2, var_char at got@tprel at ha
+; CHECK-NEXT: ld 4, var_char at got@tprel at l(4)
+; CHECK-NEXT: stbx 3, 4, var_char at tls
+entry:
+ %conv = trunc i32 %a to i8
+ store i8 %conv, i8* @var_char, align 1, !tbaa !4
+ ret void
+}
+
+define dso_local zeroext i8 @test_char_three(i8 zeroext %a) {
+; CHECK-LABEL: test_char_three:
+; CHECK: # %bb.0: # %entry
+; CHECK: addis 4, 2, var_char at got@tprel at ha
+; CHECK-NEXT: ld 4, var_char at got@tprel at l(4)
+; CHECK-NEXT: lbzx 5, 4, var_char at tls
+; CHECK: stbx {{[0-9]+}}, 4, var_char at tls
+entry:
+ %0 = load i8, i8* @var_char, align 1, !tbaa !4
+ %add = add i8 %0, %a
+ store i8 %add, i8* @var_char, align 1, !tbaa !4
+ ret i8 %add
+}
+
+define dso_local signext i16 @test_short_one() {
+; CHECK-LABEL: test_short_one:
+; CHECK: # %bb.0: # %entry
+; CHECK: addis 3, 2, var_short at got@tprel at ha
+; CHECK-NEXT: ld 3, var_short at got@tprel at l(3)
+; CHECK-NEXT: lhzx 3, 3, var_short at tls
+entry:
+ %0 = load i16, i16* @var_short, align 2, !tbaa !7
+ ret i16 %0
+}
+
+define dso_local void @test_short_two(i32 signext %a) {
+; CHECK-LABEL: test_short_two:
+; CHECK: # %bb.0: # %entry
+; CHECK: addis 4, 2, var_short at got@tprel at ha
+; CHECK-NEXT: ld 4, var_short at got@tprel at l(4)
+; CHECK-NEXT: sthx 3, 4, var_short at tls
+entry:
+ %conv = trunc i32 %a to i16
+ store i16 %conv, i16* @var_short, align 2, !tbaa !7
+ ret void
+}
+
+define dso_local signext i16 @test_short_three(i16 signext %a) {
+; CHECK-LABEL: test_short_three:
+; CHECK: # %bb.0: # %entry
+; CHECK: addis 4, 2, var_short at got@tprel at ha
+; CHECK-NEXT: ld 4, var_short at got@tprel at l(4)
+; CHECK-NEXT: lhzx 5, 4, var_short at tls
+; CHECK: sthx {{[0-9]+}}, 4, var_short at tls
+entry:
+ %0 = load i16, i16* @var_short, align 2, !tbaa !7
+ %add = add i16 %0, %a
+ store i16 %add, i16* @var_short, align 2, !tbaa !7
+ ret i16 %add
+}
+
+define dso_local signext i32 @test_int_one() {
+; CHECK-LABEL: test_int_one:
+; CHECK: # %bb.0: # %entry
+; CHECK: addis 3, 2, var_int at got@tprel at ha
+; CHECK-NEXT: ld 3, var_int at got@tprel at l(3)
+; CHECK-NEXT: lwzx 3, 3, var_int at tls
+entry:
+ %0 = load i32, i32* @var_int, align 4, !tbaa !9
+ ret i32 %0
+}
+
+define dso_local void @test_int_two(i32 signext %a) {
+; CHECK-LABEL: test_int_two:
+; CHECK: # %bb.0: # %entry
+; CHECK: addis 4, 2, var_int at got@tprel at ha
+; CHECK-NEXT: ld 4, var_int at got@tprel at l(4)
+; CHECK-NEXT: stwx 3, 4, var_int at tls
+entry:
+ store i32 %a, i32* @var_int, align 4, !tbaa !9
+ ret void
+}
+
+define dso_local signext i32 @test_int_three(i32 signext %a) {
+; CHECK-LABEL: test_int_three:
+; CHECK: # %bb.0: # %entry
+; CHECK: addis 4, 2, var_int at got@tprel at ha
+; CHECK-NEXT: ld 4, var_int at got@tprel at l(4)
+; CHECK-NEXT: lwzx 5, 4, var_int at tls
+; CHECK: stwx {{[0-9]+}}, 4, var_int at tls
+entry:
+ %0 = load i32, i32* @var_int, align 4, !tbaa !9
+ %add = add nsw i32 %0, %a
+ store i32 %add, i32* @var_int, align 4, !tbaa !9
+ ret i32 %add
+}
+
+define dso_local i64 @test_longlong_one() {
+; CHECK-LABEL: test_longlong_one:
+; CHECK: # %bb.0: # %entry
+; CHECK: addis 3, 2, var_long_long at got@tprel at ha
+; CHECK-NEXT: ld 3, var_long_long at got@tprel at l(3)
+; CHECK-NEXT: ldx 3, 3, var_long_long at tls
+entry:
+ %0 = load i64, i64* @var_long_long, align 8, !tbaa !11
+ ret i64 %0
+}
+
+define dso_local void @test_longlong_two(i32 signext %a) {
+; CHECK-LABEL: test_longlong_two:
+; CHECK: # %bb.0: # %entry
+; CHECK: addis 4, 2, var_long_long at got@tprel at ha
+; CHECK-NEXT: ld 4, var_long_long at got@tprel at l(4)
+; CHECK-NEXT: stdx 3, 4, var_long_long at tls
+entry:
+ %conv = sext i32 %a to i64
+ store i64 %conv, i64* @var_long_long, align 8, !tbaa !11
+ ret void
+}
+
+define dso_local i64 @test_longlong_three(i64 %a) {
+; CHECK-LABEL: test_longlong_three:
+; CHECK: # %bb.0: # %entry
+; CHECK: addis 4, 2, var_long_long at got@tprel at ha
+; CHECK-NEXT: ld 4, var_long_long at got@tprel at l(4)
+; CHECK-NEXT: ldx 5, 4, var_long_long at tls
+; CHECK: stdx {{[0-9]+}}, 4, var_long_long at tls
+entry:
+ %0 = load i64, i64* @var_long_long, align 8, !tbaa !11
+ %add = add nsw i64 %0, %a
+ store i64 %add, i64* @var_long_long, align 8, !tbaa !11
+ ret i64 %add
+}
+
+!llvm.module.flags = !{!0, !1, !2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"PIC Level", i32 1}
+!2 = !{i32 7, !"PIE Level", i32 1}
+!4 = !{!5, !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}
+!7 = !{!8, !8, i64 0}
+!8 = !{!"short", !5, i64 0}
+!9 = !{!10, !10, i64 0}
+!10 = !{!"int", !5, i64 0}
+!11 = !{!12, !12, i64 0}
+!12 = !{!"long long", !5, i64 0}
More information about the llvm-commits
mailing list