[llvm] r327635 - [PowerPC] Optimize TLS initial-exec sequence to use X-Form loads/stores

Zaara Syeda via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 15 08:34:41 PDT 2018


Author: syzaara
Date: Thu Mar 15 08:34:41 2018
New Revision: 327635

URL: http://llvm.org/viewvc/llvm-project?rev=327635&view=rev
Log:
[PowerPC] Optimize TLS initial-exec sequence to use X-Form loads/stores

This patch adds new load/store instructions for integer scalar types
which can be used for X-Form when fed by add with an @tls relocation.

Differential Revision: https://reviews.llvm.org/D43315

Added:
    llvm/trunk/test/CodeGen/PowerPC/tls-pie-xform.ll
Modified:
    llvm/trunk/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
    llvm/trunk/lib/Target/PowerPC/PPCInstr64Bit.td

Modified: llvm/trunk/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelDAGToDAG.cpp?rev=327635&r1=327634&r2=327635&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelDAGToDAG.cpp Thu Mar 15 08:34:41 2018
@@ -101,6 +101,11 @@ static cl::opt<bool> EnableBranchHint(
     cl::desc("Enable static hinting of branches on ppc"),
     cl::Hidden);
 
+static cl::opt<bool> EnableTLSOpt(
+  "ppc-tls-opt", cl::init(true),
+    cl::desc("Enable tls optimization peephole"),
+    cl::Hidden);
+
 enum ICmpInGPRType { ICGPR_All, ICGPR_None, ICGPR_I32, ICGPR_I64,
   ICGPR_NonExtIn, ICGPR_Zext, ICGPR_Sext, ICGPR_ZextI32,
   ICGPR_SextI32, ICGPR_ZextI64, ICGPR_SextI64 };
@@ -199,6 +204,14 @@ namespace {
     bool tryBitPermutation(SDNode *N);
     bool tryIntCompareInGPR(SDNode *N);
 
+    // tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into
+    // an X-Form load instruction with the offset being a relocation coming from
+    // the PPCISD::ADD_TLS.
+    bool tryTLSXFormLoad(LoadSDNode *N);
+    // tryTLSXFormStore - Convert an ISD::STORE fed by a PPCISD::ADD_TLS into
+    // an X-Form store instruction with the offset being a relocation coming from
+    // the PPCISD::ADD_TLS.
+    bool tryTLSXFormStore(StoreSDNode *N);
     /// SelectCC - Select a comparison of the specified values with the
     /// specified condition code, returning the CR# of the expression.
     SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
@@ -582,6 +595,90 @@ bool PPCDAGToDAGISel::isRotateAndMask(SD
   return false;
 }
 
+bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) {
+  SDValue Base = ST->getBasePtr();
+  if (Base.getOpcode() != PPCISD::ADD_TLS)
+    return false;
+  SDValue Offset = ST->getOffset();
+  if (!Offset.isUndef())
+    return false;
+
+  SDLoc dl(ST);
+  EVT MemVT = ST->getMemoryVT();
+  EVT RegVT = ST->getValue().getValueType();
+
+  unsigned Opcode;
+  switch (MemVT.getSimpleVT().SimpleTy) {
+    default:
+      return false;
+    case MVT::i8: {
+      Opcode = (RegVT == MVT::i32) ? PPC::STBXTLS_32 : PPC::STBXTLS;
+      break;
+    }
+    case MVT::i16: {
+      Opcode = (RegVT == MVT::i32) ? PPC::STHXTLS_32 : PPC::STHXTLS;
+      break;
+    }
+    case MVT::i32: {
+      Opcode = (RegVT == MVT::i32) ? PPC::STWXTLS_32 : PPC::STWXTLS;
+      break;
+    }
+    case MVT::i64: {
+      Opcode = PPC::STDXTLS;
+      break;
+    }
+  }
+  SDValue Chain = ST->getChain();
+  SDVTList VTs = ST->getVTList();
+  SDValue Ops[] = {ST->getValue(), Base.getOperand(0), Base.getOperand(1),
+                   Chain};
+  SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
+  transferMemOperands(ST, MN);
+  ReplaceNode(ST, MN);
+  return true;
+}
+
+bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) {
+  SDValue Base = LD->getBasePtr();
+  if (Base.getOpcode() != PPCISD::ADD_TLS)
+    return false;
+  SDValue Offset = LD->getOffset();
+  if (!Offset.isUndef())
+    return false;
+
+  SDLoc dl(LD);
+  EVT MemVT = LD->getMemoryVT();
+  EVT RegVT = LD->getValueType(0);
+  unsigned Opcode;
+  switch (MemVT.getSimpleVT().SimpleTy) {
+    default:
+      return false;
+    case MVT::i8: {
+      Opcode = (RegVT == MVT::i32) ? PPC::LBZXTLS_32 : PPC::LBZXTLS;
+      break;
+    }
+    case MVT::i16: {
+      Opcode = (RegVT == MVT::i32) ? PPC::LHZXTLS_32 : PPC::LHZXTLS;
+      break;
+    }
+    case MVT::i32: {
+      Opcode = (RegVT == MVT::i32) ? PPC::LWZXTLS_32 : PPC::LWZXTLS;
+      break;
+    }
+    case MVT::i64: {
+      Opcode = PPC::LDXTLS;
+      break;
+    }
+  }
+  SDValue Chain = LD->getChain();
+  SDVTList VTs = LD->getVTList();
+  SDValue Ops[] = {Base.getOperand(0), Base.getOperand(1), Chain};
+  SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
+  transferMemOperands(LD, MN);
+  ReplaceNode(LD, MN);
+  return true;
+}
+
 /// Turn an or of two masked values into the rotate left word immediate then
 /// mask insert (rlwimi) instruction.
 bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
@@ -3949,14 +4046,28 @@ void PPCDAGToDAGISel::Select(SDNode *N)
     }
   }
 
+  case ISD::STORE: {
+    // Change TLS initial-exec D-form stores to X-form stores.
+    StoreSDNode *ST = cast<StoreSDNode>(N);
+    if (EnableTLSOpt && PPCSubTarget->isELFv2ABI() &&
+        ST->getAddressingMode() != ISD::PRE_INC)
+      if (tryTLSXFormStore(ST))
+        return;
+    break;
+  }
   case ISD::LOAD: {
     // Handle preincrement loads.
     LoadSDNode *LD = cast<LoadSDNode>(N);
     EVT LoadedVT = LD->getMemoryVT();
 
     // Normal loads are handled by code generated from the .td file.
-    if (LD->getAddressingMode() != ISD::PRE_INC)
+    if (LD->getAddressingMode() != ISD::PRE_INC) {
+      // Change TLS initial-exec D-form loads to X-form loads.
+      if (EnableTLSOpt && PPCSubTarget->isELFv2ABI())
+        if (tryTLSXFormLoad(LD))
+          return;
       break;
+    }
 
     SDValue Offset = LD->getOffset();
     if (Offset.getOpcode() == ISD::TargetConstant ||

Modified: llvm/trunk/lib/Target/PowerPC/PPCInstr64Bit.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstr64Bit.td?rev=327635&r1=327634&r2=327635&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCInstr64Bit.td (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCInstr64Bit.td Thu Mar 15 08:34:41 2018
@@ -499,7 +499,49 @@ defm ADD8  : XOForm_1r<31, 266, 0, (outs
 def ADD8TLS  : XOForm_1<31, 266, 0, (outs g8rc:$rT), (ins g8rc_nox0:$rA, tlsreg:$rB),
                         "add $rT, $rA, $rB", IIC_IntSimple,
                         [(set i64:$rT, (add i64:$rA, tglobaltlsaddr:$rB))]>;
-                     
+let mayLoad = 1 in {
+def LBZXTLS : XForm_1<31,  87, (outs g8rc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB),
+                      "lbzx $rD, $rA, $rB", IIC_LdStLoad, []>;
+def LHZXTLS : XForm_1<31, 279, (outs g8rc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB),
+                      "lhzx $rD, $rA, $rB", IIC_LdStLoad, []>;
+def LWZXTLS : XForm_1<31,  23, (outs g8rc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB),
+                      "lwzx $rD, $rA, $rB", IIC_LdStLoad, []>;
+def LDXTLS  : XForm_1<31,  21, (outs g8rc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB),
+                      "ldx $rD, $rA, $rB", IIC_LdStLD, []>, isPPC64;
+def LBZXTLS_32 : XForm_1<31,  87, (outs gprc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB),
+                         "lbzx $rD, $rA, $rB", IIC_LdStLoad, []>;
+def LHZXTLS_32 : XForm_1<31, 279, (outs gprc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB),
+                         "lhzx $rD, $rA, $rB", IIC_LdStLoad, []>;
+def LWZXTLS_32 : XForm_1<31,  23, (outs gprc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB),
+                         "lwzx $rD, $rA, $rB", IIC_LdStLoad, []>;
+
+}
+
+let mayStore = 1 in {
+def STBXTLS : XForm_8<31, 215, (outs), (ins g8rc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB),
+                      "stbx $rS, $rA, $rB", IIC_LdStStore, []>,
+                      PPC970_DGroup_Cracked;
+def STHXTLS : XForm_8<31, 407, (outs), (ins g8rc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB),
+                      "sthx $rS, $rA, $rB", IIC_LdStStore, []>,
+                      PPC970_DGroup_Cracked;
+def STWXTLS : XForm_8<31, 151, (outs), (ins g8rc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB),
+                      "stwx $rS, $rA, $rB", IIC_LdStStore, []>,
+                      PPC970_DGroup_Cracked;
+def STDXTLS  : XForm_8<31, 149, (outs), (ins g8rc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB),
+                       "stdx $rS, $rA, $rB", IIC_LdStSTD, []>, isPPC64,
+                       PPC970_DGroup_Cracked;
+def STBXTLS_32 : XForm_8<31, 215, (outs), (ins gprc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB),
+                         "stbx $rS, $rA, $rB", IIC_LdStStore, []>,
+                         PPC970_DGroup_Cracked;
+def STHXTLS_32 : XForm_8<31, 407, (outs), (ins gprc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB),
+                         "sthx $rS, $rA, $rB", IIC_LdStStore, []>,
+                         PPC970_DGroup_Cracked;
+def STWXTLS_32 : XForm_8<31, 151, (outs), (ins gprc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB),
+                         "stwx $rS, $rA, $rB", IIC_LdStStore, []>,
+                         PPC970_DGroup_Cracked;
+
+}
+
 let isCommutable = 1 in
 defm ADDC8 : XOForm_1rc<31, 10, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
                         "addc", "$rT, $rA, $rB", IIC_IntGeneral,

Added: llvm/trunk/test/CodeGen/PowerPC/tls-pie-xform.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/tls-pie-xform.ll?rev=327635&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/tls-pie-xform.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/tls-pie-xform.ll Thu Mar 15 08:34:41 2018
@@ -0,0 +1,169 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-gnu-linux < %s | FileCheck %s -check-prefix=CHECK
+
+ at var_char = external thread_local local_unnamed_addr global i8, align 1
+ at var_short = external thread_local local_unnamed_addr global i16, align 2
+ at var_int = external thread_local local_unnamed_addr global i32, align 4
+ at var_long_long = external thread_local local_unnamed_addr global i64, align 8
+
+define dso_local zeroext i8 @test_char_one() {
+; CHECK-LABEL: test_char_one:
+; CHECK:       # %bb.0: # %entry
+; CHECK:    addis 3, 2, var_char at got@tprel at ha
+; CHECK-NEXT:    ld 3, var_char at got@tprel at l(3)
+; CHECK-NEXT:    lbzx 3, 3, var_char at tls
+entry:
+  %0 = load i8, i8* @var_char, align 1, !tbaa !4
+  ret i8 %0
+}
+
+define dso_local void @test_char_two(i32 signext %a) {
+; CHECK-LABEL: test_char_two:
+; CHECK:       # %bb.0: # %entry
+; CHECK:    addis 4, 2, var_char at got@tprel at ha
+; CHECK-NEXT:    ld 4, var_char at got@tprel at l(4)
+; CHECK-NEXT:    stbx 3, 4, var_char at tls
+entry:
+  %conv = trunc i32 %a to i8
+  store i8 %conv, i8* @var_char, align 1, !tbaa !4
+  ret void
+}
+
+define dso_local zeroext i8 @test_char_three(i8 zeroext %a) {
+; CHECK-LABEL: test_char_three:
+; CHECK:       # %bb.0: # %entry
+; CHECK:    addis 4, 2, var_char at got@tprel at ha
+; CHECK-NEXT:    ld 4, var_char at got@tprel at l(4)
+; CHECK-NEXT:    lbzx 5, 4, var_char at tls
+; CHECK:    stbx {{[0-9]+}}, 4, var_char at tls
+entry:
+  %0 = load i8, i8* @var_char, align 1, !tbaa !4
+  %add = add i8 %0, %a
+  store i8 %add, i8* @var_char, align 1, !tbaa !4
+  ret i8 %add
+}
+
+define dso_local signext i16 @test_short_one() {
+; CHECK-LABEL: test_short_one:
+; CHECK:       # %bb.0: # %entry
+; CHECK:    addis 3, 2, var_short at got@tprel at ha
+; CHECK-NEXT:    ld 3, var_short at got@tprel at l(3)
+; CHECK-NEXT:    lhzx 3, 3, var_short at tls
+entry:
+  %0 = load i16, i16* @var_short, align 2, !tbaa !7
+  ret i16 %0
+}
+
+define dso_local void @test_short_two(i32 signext %a) {
+; CHECK-LABEL: test_short_two:
+; CHECK:       # %bb.0: # %entry
+; CHECK:    addis 4, 2, var_short at got@tprel at ha
+; CHECK-NEXT:    ld 4, var_short at got@tprel at l(4)
+; CHECK-NEXT:    sthx 3, 4, var_short at tls
+entry:
+  %conv = trunc i32 %a to i16
+  store i16 %conv, i16* @var_short, align 2, !tbaa !7
+  ret void
+}
+
+define dso_local signext i16 @test_short_three(i16 signext %a) {
+; CHECK-LABEL: test_short_three:
+; CHECK:       # %bb.0: # %entry
+; CHECK:    addis 4, 2, var_short at got@tprel at ha
+; CHECK-NEXT:    ld 4, var_short at got@tprel at l(4)
+; CHECK-NEXT:    lhzx 5, 4, var_short at tls
+; CHECK:    sthx {{[0-9]+}}, 4, var_short at tls
+entry:
+  %0 = load i16, i16* @var_short, align 2, !tbaa !7
+  %add = add i16 %0, %a
+  store i16 %add, i16* @var_short, align 2, !tbaa !7
+  ret i16 %add
+}
+
+define dso_local signext i32 @test_int_one() {
+; CHECK-LABEL: test_int_one:
+; CHECK:       # %bb.0: # %entry
+; CHECK:    addis 3, 2, var_int at got@tprel at ha
+; CHECK-NEXT:    ld 3, var_int at got@tprel at l(3)
+; CHECK-NEXT:    lwzx 3, 3, var_int at tls
+entry:
+  %0 = load i32, i32* @var_int, align 4, !tbaa !9
+  ret i32 %0
+}
+
+define dso_local void @test_int_two(i32 signext %a) {
+; CHECK-LABEL: test_int_two:
+; CHECK:       # %bb.0: # %entry
+; CHECK:    addis 4, 2, var_int at got@tprel at ha
+; CHECK-NEXT:    ld 4, var_int at got@tprel at l(4)
+; CHECK-NEXT:    stwx 3, 4, var_int at tls
+entry:
+  store i32 %a, i32* @var_int, align 4, !tbaa !9
+  ret void
+}
+
+define dso_local signext i32 @test_int_three(i32 signext %a) {
+; CHECK-LABEL: test_int_three:
+; CHECK:       # %bb.0: # %entry
+; CHECK:    addis 4, 2, var_int at got@tprel at ha
+; CHECK-NEXT:    ld 4, var_int at got@tprel at l(4)
+; CHECK-NEXT:    lwzx 5, 4, var_int at tls
+; CHECK:    stwx {{[0-9]+}}, 4, var_int at tls
+entry:
+  %0 = load i32, i32* @var_int, align 4, !tbaa !9
+  %add = add nsw i32 %0, %a
+  store i32 %add, i32* @var_int, align 4, !tbaa !9
+  ret i32 %add
+}
+
+define dso_local i64 @test_longlong_one() {
+; CHECK-LABEL: test_longlong_one:
+; CHECK:       # %bb.0: # %entry
+; CHECK:    addis 3, 2, var_long_long at got@tprel at ha
+; CHECK-NEXT:    ld 3, var_long_long at got@tprel at l(3)
+; CHECK-NEXT:    ldx 3, 3, var_long_long at tls
+entry:
+  %0 = load i64, i64* @var_long_long, align 8, !tbaa !11
+  ret i64 %0
+}
+
+define dso_local void @test_longlong_two(i32 signext %a) {
+; CHECK-LABEL: test_longlong_two:
+; CHECK:       # %bb.0: # %entry
+; CHECK:    addis 4, 2, var_long_long at got@tprel at ha
+; CHECK-NEXT:    ld 4, var_long_long at got@tprel at l(4)
+; CHECK-NEXT:    stdx 3, 4, var_long_long at tls
+entry:
+  %conv = sext i32 %a to i64
+  store i64 %conv, i64* @var_long_long, align 8, !tbaa !11
+  ret void
+}
+
+define dso_local i64 @test_longlong_three(i64 %a) {
+; CHECK-LABEL: test_longlong_three:
+; CHECK:       # %bb.0: # %entry
+; CHECK:    addis 4, 2, var_long_long at got@tprel at ha
+; CHECK-NEXT:    ld 4, var_long_long at got@tprel at l(4)
+; CHECK-NEXT:    ldx 5, 4, var_long_long at tls
+; CHECK:    stdx {{[0-9]+}}, 4, var_long_long at tls
+entry:
+  %0 = load i64, i64* @var_long_long, align 8, !tbaa !11
+  %add = add nsw i64 %0, %a
+  store i64 %add, i64* @var_long_long, align 8, !tbaa !11
+  ret i64 %add
+}
+
+!llvm.module.flags = !{!0, !1, !2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"PIC Level", i32 1}
+!2 = !{i32 7, !"PIE Level", i32 1}
+!4 = !{!5, !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}
+!7 = !{!8, !8, i64 0}
+!8 = !{!"short", !5, i64 0}
+!9 = !{!10, !10, i64 0}
+!10 = !{!"int", !5, i64 0}
+!11 = !{!12, !12, i64 0}
+!12 = !{!"long long", !5, i64 0}




More information about the llvm-commits mailing list