[llvm] e021de0 - [PowerPC] Exploit paddi instruction on Power 10 for constant materialization

Stefan Pintilie via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 11 06:38:11 PST 2021


Author: Stefan Pintilie
Date: 2021-03-11T08:37:49-06:00
New Revision: e021de0aab221beaa03f73821ab41850e9c1bdb8

URL: https://github.com/llvm/llvm-project/commit/e021de0aab221beaa03f73821ab41850e9c1bdb8
DIFF: https://github.com/llvm/llvm-project/commit/e021de0aab221beaa03f73821ab41850e9c1bdb8.diff

LOG: [PowerPC] Exploit paddi instruction on Power 10 for constant materialization

Starting with Power 10 the instruction paddi is available to use.
The instruction allows for immediates that are 34 bits.

This patch adds exploitation of the paddi instruction to allow us
to materialize constants.

Reviewed By: lei, amyk

Differential Revision: https://reviews.llvm.org/D93300

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
    llvm/test/CodeGen/PowerPC/fast-isel-pcrel.ll
    llvm/test/CodeGen/PowerPC/p10-constants.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index d42cc364e974..3d080d3b9df7 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -1040,6 +1040,21 @@ static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl,
 // were selected.
 static SDNode *selectI64ImmDirectPrefix(SelectionDAG *CurDAG, const SDLoc &dl,
                                         uint64_t Imm, unsigned &InstCnt) {
+  unsigned TZ = countTrailingZeros<uint64_t>(Imm);
+  unsigned LZ = countLeadingZeros<uint64_t>(Imm);
+  unsigned TO = countTrailingOnes<uint64_t>(Imm);
+  unsigned FO = countLeadingOnes<uint64_t>(Imm << LZ);
+  unsigned Hi32 = Hi_32(Imm);
+  unsigned Lo32 = Lo_32(Imm);
+
+  auto getI32Imm = [CurDAG, dl](unsigned Imm) {
+    return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
+  };
+
+  auto getI64Imm = [CurDAG, dl](uint64_t Imm) {
+    return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
+  };
+
   // Following patterns use 1 instruction to materialize Imm.
   InstCnt = 1;
 
@@ -1048,8 +1063,98 @@ static SDNode *selectI64ImmDirectPrefix(SelectionDAG *CurDAG, const SDLoc &dl,
   if (isInt<34>(Imm))
     return cast<ConstantSDNode>(CurDAG->getConstant(Imm, dl, MVT::i64));
 
-  InstCnt = 0;
-  return nullptr;
+  // Require at least two instructions.
+  InstCnt = 2;
+  SDNode *Result = nullptr;
+  // Patterns : {zeros}{ones}{33-bit value}{zeros}
+  //            {zeros}{33-bit value}{zeros}
+  //            {zeros}{ones}{33-bit value}
+  //            {ones}{33-bit value}{zeros}
+  // We can take advantage of PLI's sign-extension semantics to generate leading
+  // ones, and then use RLDIC to mask off the ones on both sides after rotation.
+  if ((LZ + FO + TZ) > 30) {
+    APInt SignedInt34 = APInt(34, (Imm >> TZ) & 0x3ffffffff);
+    APInt Extended = SignedInt34.sext(64);
+    Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
+                                    getI64Imm(*Extended.getRawData()));
+    return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
+                                  getI32Imm(TZ), getI32Imm(LZ));
+  }
+  // Pattern : {zeros}{33-bit value}{ones}
+  // Shift right the Imm by (30 - LZ) bits to construct a negative 34 bit value,
+  // therefore we can take advantage of PLI's sign-extension semantics, and then
+  // mask them off after rotation.
+  //
+  // +--LZ--||-33-bit-||--TO--+     +-------------|--34-bit--+
+  // |00000001bbbbbbbbb1111111| ->  |00000000000001bbbbbbbbb1|
+  // +------------------------+     +------------------------+
+  // 63                      0      63                      0
+  //
+  // +----sext-----|--34-bit--+     +clear-|-----------------+
+  // |11111111111111bbbbbbbbb1| ->  |00000001bbbbbbbbb1111111|
+  // +------------------------+     +------------------------+
+  // 63                      0      63                      0
+  if ((LZ + TO) > 30) {
+    APInt SignedInt34 = APInt(34, (Imm >> (30 - LZ)) & 0x3ffffffff);
+    APInt Extended = SignedInt34.sext(64);
+    Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
+                                    getI64Imm(*Extended.getRawData()));
+    return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
+                                  getI32Imm(30 - LZ), getI32Imm(LZ));
+  }
+  // Patterns : {zeros}{ones}{33-bit value}{ones}
+  //            {ones}{33-bit value}{ones}
+  // Similar to LI we can take advantage of PLI's sign-extension semantics to
+  // generate leading ones, and then use RLDICL to mask off the ones in left
+  // sides (if required) after rotation.
+  if ((LZ + FO + TO) > 30) {
+    APInt SignedInt34 = APInt(34, (Imm >> TO) & 0x3ffffffff);
+    APInt Extended = SignedInt34.sext(64);
+    Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
+                                    getI64Imm(*Extended.getRawData()));
+    return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
+                                  getI32Imm(TO), getI32Imm(LZ));
+  }
+  // Patterns : {******}{31 zeros}{******}
+  //          : {******}{31 ones}{******}
+  // If Imm contains 31 consecutive zeros/ones then the remaining bit count
+  // is 33. Rotate right the Imm to construct a int<33> value, we can use PLI
+  // for the int<33> value and then use RLDICL without a mask to rotate it back.
+  //
+  // +------|--ones--|------+     +---ones--||---33 bit--+
+  // |bbbbbb1111111111aaaaaa| ->  |1111111111aaaaaabbbbbb|
+  // +----------------------+     +----------------------+
+  // 63                    0      63                    0
+  for (unsigned Shift = 0; Shift < 63; ++Shift) {
+    uint64_t RotImm = (Imm >> Shift) | (Imm << (64 - Shift));
+    if (isInt<34>(RotImm)) {
+      Result =
+          CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(RotImm));
+      return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
+                                    SDValue(Result, 0), getI32Imm(Shift),
+                                    getI32Imm(0));
+    }
+  }
+
+  // Patterns : High word == Low word
+  // This is basically a splat of a 32 bit immediate.
+  if (Hi32 == Lo32) {
+    Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32));
+    SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),
+                     getI32Imm(0)};
+    return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
+  }
+
+  InstCnt = 3;
+  // Catch-all
+  // This pattern can form any 64 bit immediate in 3 instructions.
+  SDNode *ResultHi =
+      CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32));
+  SDNode *ResultLo =
+      CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Lo32));
+  SDValue Ops[] = {SDValue(ResultLo, 0), SDValue(ResultHi, 0), getI32Imm(32),
+                   getI32Imm(0)};
+  return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
 }
 
 static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm,

diff  --git a/llvm/test/CodeGen/PowerPC/fast-isel-pcrel.ll b/llvm/test/CodeGen/PowerPC/fast-isel-pcrel.ll
index 5bdbaf25fb4b..0328882c1991 100644
--- a/llvm/test/CodeGen/PowerPC/fast-isel-pcrel.ll
+++ b/llvm/test/CodeGen/PowerPC/fast-isel-pcrel.ll
@@ -22,11 +22,9 @@ define internal void @loadFP(double* %d) #0 {
 ; CHECK-NEXT:    paddi r3, 0, .L.str at PCREL, 1
 ; CHECK-NEXT:    bl printf at notoc
 ; CHECK-NEXT:    ld r4, 104(r1)
-; CHECK-NEXT:    lis r3, 8201
-; CHECK-NEXT:    ori r3, r3, 64225
-; CHECK-NEXT:    rldic r3, r3, 33, 1
-; CHECK-NEXT:    oris r3, r3, 36700
-; CHECK-NEXT:    ori r3, r3, 10486
+; CHECK-NEXT:    pli r5, 1075049922
+; CHECK-NEXT:    pli r3, 2405181686
+; CHECK-NEXT:    rldimi r3, r5, 32, 0
 ; CHECK-NEXT:    std r3, 0(r4)
 ; CHECK-NEXT:    addi r1, r1, 112
 ; CHECK-NEXT:    ld r0, 16(r1)

diff  --git a/llvm/test/CodeGen/PowerPC/p10-constants.ll b/llvm/test/CodeGen/PowerPC/p10-constants.ll
index 3a266ff2d044..dd0619dad17f 100644
--- a/llvm/test/CodeGen/PowerPC/p10-constants.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-constants.ll
@@ -196,8 +196,7 @@ entry:
 
 define  i64 @t_34Bits() {
 ; CHECK-LABEL: t_34Bits:
-; CHECK:	lis r3, 25158
-; CHECK-NEXT:	ori r3, r3, 35535
+; CHECK:	pli r3, 1648790223
 ; CHECK-NEXT:	rldic r3, r3, 3, 30
 ; CHECK-NEXT:	blr
 ; CHECK32-LABEL: t_34Bits:
@@ -211,8 +210,7 @@ entry:
 
 define  i64 @t_35Bits() {
 ; CHECK-LABEL: t_35Bits:
-; CHECK:	lis r3, -442
-; CHECK-NEXT:	ori r3, r3, 35535
+; CHECK:	pli r3, 4266035919
 ; CHECK-NEXT:	rldic r3, r3, 3, 29
 ; CHECK-NEXT:	blr
 ; CHECK32-LABEL: t_35Bits:
@@ -224,6 +222,87 @@ entry:
   ret i64 34128287352
 }
 
+; (Value >> Shift) can be expressed in 34 bits
+define  i64 @t_Shift() {
+; CHECK-LABEL: t_Shift:
+; CHECK:         pli r3, 8522759166
+; CHECK-NEXT:    rotldi r3, r3, 48
+; CHECK-NEXT:    blr
+
+entry:
+  ; 0xFBFE00000001FBFE
+  ret i64 18157950747604548606
+}
+
+; Leading Zeros + Following Ones + Trailing Zeros > 30
+define  i64 @t_LZFOTZ() {
+; CHECK-LABEL: t_LZFOTZ:
+; CHECK:         pli r3, -349233
+; CHECK-NEXT:    rldic r3, r3, 4, 12
+; CHECK-NEXT:    blr
+
+entry:
+  ; 0x000FFFFFFFAABCF0
+  ret i64 4503599621782768
+}
+
+; Leading Zeros + Trailing Ones > 30
+define  i64 @t_LZTO() {
+; CHECK-LABEL: t_LZTO:
+; CHECK:         pli r3, -2684406441
+; CHECK-NEXT:    rldicl r3, r3, 11, 19
+; CHECK-NEXT:    blr
+entry:
+  ; 0x00001AFFF9AABFFF
+  ret i64 29686707699711
+}
+
+; Leading Zeros + Trailing Ones + Following Zeros > 30
+define  i64 @t_LZTOFO() {
+; CHECK-LABEL: t_LZTOFO:
+; CHECK:         pli r3, -5720033968
+; CHECK-NEXT:    rldicl r3, r3, 11, 12
+; CHECK-NEXT:    blr
+entry:
+  ; 0x000FF55879AA87FF
+  ret i64 4491884997806079
+}
+
+; Requires full expansion
+define  i64 @t_Full64Bits1() {
+; CHECK-LABEL: t_Full64Bits1:
+; CHECK:         pli r4, 2146500607
+; CHECK-NEXT:    pli r3, 4043305214
+; CHECK-NEXT:    rldimi r3, r4, 32, 0
+; CHECK-NEXT:    blr
+entry:
+  ; 0x7FF0FFFFF0FFF0FE
+  ret i64 9219149911952453886
+}
+
+; Requires full expansion
+define  i64 @t_Ful64Bits2() {
+; CHECK-LABEL: t_Ful64Bits2:
+; CHECK:         pli r4, 4042326015
+; CHECK-NEXT:    pli r3, 4043305214
+; CHECK-NEXT:    rldimi r3, r4, 32, 0
+; CHECK-NEXT:    blr
+entry:
+  ; 0xF0F0FFFFF0FFF0FE
+  ret i64 17361658038238310654
+}
+
+; A splat of 32 bits: 32 Bits Low == 32 Bits High
+define  i64 @t_Splat32Bits() {
+; CHECK-LABEL: t_Splat32Bits:
+; CHECK:         pli r3, 262916796
+; CHECK-NEXT:    rldimi r3, r3, 32, 0
+; CHECK-NEXT:    blr
+entry:
+  ; 0x0FABCABC0FABCABC
+  ret i64 1129219040652020412
+}
+
 ; The load immediates resulting from phi-nodes are needed to test whether
 ; li/lis is preferred to pli by the instruction selector.
 define dso_local void @t_phiNode() {


        


More information about the llvm-commits mailing list