[llvm] 4bd186c - [PowerPC] Exploit the rldicl + rldicl when and with mask

QingShan Zhang via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 16 22:27:07 PDT 2020


Author: QingShan Zhang
Date: 2020-04-17T05:24:00Z
New Revision: 4bd186c0ff76063de9de5507e1e512f7ab4372f1

URL: https://github.com/llvm/llvm-project/commit/4bd186c0ff76063de9de5507e1e512f7ab4372f1
DIFF: https://github.com/llvm/llvm-project/commit/4bd186c0ff76063de9de5507e1e512f7ab4372f1.diff

LOG: [PowerPC] Exploit the rldicl + rldicl when and with mask

If we are and the constant like 0xFFFFFFC00000, for now, we are using several
instructions to generate this 48bit constant and final an "and". However, we
could exploit it with two rotate instructions.

       MB          ME               MB+63-ME
+----------------------+     +----------------------+
|0000001111111111111000| ->  |0000000001111111111111|
+----------------------+     +----------------------+
 0                    63      0                    63
Rotate left ME + 1 bit first, and then, mask it with (MB + 63 - ME, 63),
finally, rotate back. Notice that, we need to round it with 64 bit for the
wrapping case.

Reviewed by: ChenZheng, Nemanjai

Differential Revision: https://reviews.llvm.org/D71831

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
    llvm/test/CodeGen/PowerPC/2016-04-17-combine.ll
    llvm/test/CodeGen/PowerPC/Frames-dyn-alloca.ll
    llvm/test/CodeGen/PowerPC/and-mask.ll
    llvm/test/CodeGen/PowerPC/cmpb.ll
    llvm/test/CodeGen/PowerPC/setcc-logic.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index a3e3b128355b..dd6d9249ab4d 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -351,6 +351,7 @@ namespace {
     bool tryAsSingleRLWINM(SDNode *N);
     bool tryAsSingleRLWINM8(SDNode *N);
     bool tryAsSingleRLWIMI(SDNode *N);
+    bool tryAsPairOfRLDICL(SDNode *N);
 
     void PeepholePPC64();
     void PeepholePPC64ZExt();
@@ -4439,6 +4440,60 @@ bool PPCDAGToDAGISel::tryAsSingleRLWINM8(SDNode *N) {
   return false;
 }
 
+bool PPCDAGToDAGISel::tryAsPairOfRLDICL(SDNode *N) {
+  assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
+  uint64_t Imm64;
+  if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64))
+    return false;
+
+  // Do nothing if it is 16-bit imm as the pattern in the .td file handle
+  // it well with "andi.".
+  if (isUInt<16>(Imm64))
+    return false;
+
+  SDLoc Loc(N);
+  SDValue Val = N->getOperand(0);
+
+  // Optimized with two rldicl's as follows:
+  // Add missing bits on left to the mask and check that the mask is a
+  // wrapped run of ones, i.e.
+  // Change pattern |0001111100000011111111|
+  //             to |1111111100000011111111|.
+  unsigned NumOfLeadingZeros = countLeadingZeros(Imm64);
+  if (NumOfLeadingZeros != 0)
+    Imm64 |= maskLeadingOnes<uint64_t>(NumOfLeadingZeros);
+
+  unsigned MB, ME;
+  if (!isRunOfOnes64(Imm64, MB, ME))
+    return false;
+
+  //         ME     MB                   MB-ME+63
+  // +----------------------+     +----------------------+
+  // |1111111100000011111111| ->  |0000001111111111111111|
+  // +----------------------+     +----------------------+
+  //  0                    63      0                    63
+  // There are ME + 1 ones on the left and (MB - ME + 63) & 63 zeros in between.
+  unsigned OnesOnLeft = ME + 1;
+  unsigned ZerosInBetween = (MB - ME + 63) & 63;
+  // Rotate left by OnesOnLeft (so leading ones are now trailing ones) and clear
+  // on the left the bits that are already zeros in the mask.
+  Val = SDValue(CurDAG->getMachineNode(PPC::RLDICL, Loc, MVT::i64, Val,
+                                       getI64Imm(OnesOnLeft, Loc),
+                                       getI64Imm(ZerosInBetween, Loc)),
+                0);
+  //        MB-ME+63                      ME     MB
+  // +----------------------+     +----------------------+
+  // |0000001111111111111111| ->  |0001111100000011111111|
+  // +----------------------+     +----------------------+
+  //  0                    63      0                    63
+  // Rotate back by 64 - OnesOnLeft to undo previous rotate. Then clear on the
+  // left the number of ones we previously added.
+  SDValue Ops[] = {Val, getI64Imm(64 - OnesOnLeft, Loc),
+                   getI64Imm(NumOfLeadingZeros, Loc)};
+  CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
+  return true;
+}
+
 bool PPCDAGToDAGISel::tryAsSingleRLWIMI(SDNode *N) {
   assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
   unsigned Imm;
@@ -4766,7 +4821,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
   case ISD::AND:
     // If this is an 'and' with a mask, try to emit rlwinm/rldicl/rldicr
     if (tryAsSingleRLWINM(N) || tryAsSingleRLWIMI(N) || tryAsSingleRLDICL(N) ||
-        tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N))
+        tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N) || tryAsPairOfRLDICL(N))
       return;
 
     // Other cases are autogenerated.

diff  --git a/llvm/test/CodeGen/PowerPC/2016-04-17-combine.ll b/llvm/test/CodeGen/PowerPC/2016-04-17-combine.ll
index 271dcd1402a0..944bf0f29c77 100644
--- a/llvm/test/CodeGen/PowerPC/2016-04-17-combine.ll
+++ b/llvm/test/CodeGen/PowerPC/2016-04-17-combine.ll
@@ -7,8 +7,8 @@ target triple = "powerpc64le-unknown-linux-gnu"
 %typ = type { i32, i32 }
 
 ; On release builds, it doesn't crash, spewing nonsense instead.
-; To make sure it works, check that and is still alive.
-; CHECK: and
+; To make sure it works, check that rldicl is still alive.
+; CHECK: rldicl
 ; Also, in release, it emits a COPY from a 32-bit register to
 ; a 64-bit register, which happens to be emitted as cror [!]
 ; by the confused CodeGen.  Just to be sure, check there isn't one.

diff  --git a/llvm/test/CodeGen/PowerPC/Frames-dyn-alloca.ll b/llvm/test/CodeGen/PowerPC/Frames-dyn-alloca.ll
index a1773157d42b..8bfd305758dc 100644
--- a/llvm/test/CodeGen/PowerPC/Frames-dyn-alloca.ll
+++ b/llvm/test/CodeGen/PowerPC/Frames-dyn-alloca.ll
@@ -43,15 +43,13 @@ define i32* @f1(i32 %n) nounwind {
 ; PPC64-LINUX-LABEL: f1
 ; PPC64-LINUX:      std 31, -8(1)
 ; PPC64-LINUX-NEXT: stdu 1, -64(1)
-; PPC64-LINUX-NEXT: lis 4, 32767
 ; PPC64-LINUX-NEXT: rldic 3, 3, 2, 30
-; PPC64-LINUX-NEXT: ori 4, 4, 65535
-; PPC64-LINUX-NEXT: addi 3, 3, 15
-; PPC64-LINUX-NEXT: sldi 4, 4, 4
 ; PPC64-LINUX-NEXT: mr 31, 1
-; PPC64-LINUX-NEXT: and 3, 3, 4
-; PPC64-LINUX-NEXT: neg 3, 3
+; PPC64-LINUX-NEXT: addi 3, 3, 15
+; PPC64-LINUX-NEXT: rldicl 3, 3, 60, 4
 ; PPC64-LINUX-NEXT: addi 4, 31, 64
+; PPC64-LINUX-NEXT: rldicl 3, 3, 4, 29
+; PPC64-LINUX-NEXT: neg 3, 3
 ; PPC64-LINUX-NEXT: stdux 4, 1, 3
 
 ; The linkage area is always put on the top of the stack.
@@ -82,14 +80,12 @@ define i32* @f1(i32 %n) nounwind {
 ; PPC64-AIX-LABEL: f1
 ; PPC64-AIX:      std 31, -8(1)
 ; PPC64-AIX-NEXT: stdu 1, -64(1)
-; PPC64-AIX-NEXT: lis 4, 32767
 ; PPC64-AIX-NEXT: rldic 3, 3, 2, 30
-; PPC64-AIX-NEXT: ori 4, 4, 65535
-; PPC64-AIX-NEXT: addi 3, 3, 15
-; PPC64-AIX-NEXT: sldi 4, 4, 4
 ; PPC64-AIX-NEXT: mr 31, 1
-; PPC64-AIX-NEXT: and 3, 3, 4
+; PPC64-AIX-NEXT: addi 3, 3, 15
 ; PPC64-AIX-NEXT: addi 4, 31, 64
+; PPC64-AIX-NEXT: rldicl 3, 3, 60, 4 
+; PPC64-AIX-NEXT: rldicl 3, 3, 4, 29
 ; PPC64-AIX-NEXT: neg 3, 3
 ; PPC64-AIX-NEXT: stdux 4, 1, 3
 

diff  --git a/llvm/test/CodeGen/PowerPC/and-mask.ll b/llvm/test/CodeGen/PowerPC/and-mask.ll
index 89f568196327..489880b29e67 100644
--- a/llvm/test/CodeGen/PowerPC/and-mask.ll
+++ b/llvm/test/CodeGen/PowerPC/and-mask.ll
@@ -15,8 +15,8 @@ define i32 @test1(i32 %a) {
 define i64 @test2(i64 %a) {
 ; CHECK-LABEL: test2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    li 4, -7
-; CHECK-NEXT:    and 3, 3, 4
+; CHECK-NEXT:    rldicl 3, 3, 61, 2
+; CHECK-NEXT:    rotldi 3, 3, 3
 ; CHECK-NEXT:    blr
   %and = and i64 %a, -7
   ret i64 %and
@@ -26,10 +26,8 @@ define i64 @test2(i64 %a) {
 define i64 @test3(i64 %a) {
 ; CHECK-LABEL: test3:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lis 4, 1023
-; CHECK-NEXT:    ori 4, 4, 65535
-; CHECK-NEXT:    sldi 4, 4, 22
-; CHECK-NEXT:    and 3, 3, 4
+; CHECK-NEXT:    rldicl 3, 3, 42, 22
+; CHECK-NEXT:    rldicl 3, 3, 22, 16
 ; CHECK-NEXT:    blr
   %and = and i64 %a, 281474972516352
   ret i64 %and
@@ -39,10 +37,8 @@ define i64 @test3(i64 %a) {
 define i64 @test4(i64 %a) {
 ; CHECK-LABEL: test4:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    li 4, 12
-; CHECK-NEXT:    sldi 4, 4, 32
-; CHECK-NEXT:    ori 4, 4, 255
-; CHECK-NEXT:    and 3, 3, 4
+; CHECK-NEXT:    rldicl 3, 3, 30, 26
+; CHECK-NEXT:    rldicl 3, 3, 34, 28
 ; CHECK-NEXT:    blr
   %and = and i64 %a, 51539607807
   ret i64 %and
@@ -52,10 +48,8 @@ define i64 @test4(i64 %a) {
 define i64 @test5(i64 %a) {
 ; CHECK-LABEL: test5:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    li 4, 0
-; CHECK-NEXT:    oris 4, 4, 65472
-; CHECK-NEXT:    ori 4, 4, 65535
-; CHECK-NEXT:    and 3, 3, 4
+; CHECK-NEXT:    rldicl 3, 3, 42, 6
+; CHECK-NEXT:    rldicl 3, 3, 22, 32
 ; CHECK-NEXT:    blr
   %and = and i64 %a, 4290838527
   ret i64 %and
@@ -77,11 +71,8 @@ define i64 @test6(i64 %a) {
 define i64 @test7(i64 %a) {
 ; CHECK-LABEL: test7:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    li 4, -32767
-; CHECK-NEXT:    sldi 4, 4, 32
-; CHECK-NEXT:    oris 4, 4, 65024
-; CHECK-NEXT:    rldicr 4, 4, 17, 63
-; CHECK-NEXT:    and 3, 3, 4
+; CHECK-NEXT:    rldicl 3, 3, 22, 25
+; CHECK-NEXT:    rldicl 3, 3, 42, 14
 ; CHECK-NEXT:    blr
   %and = and i64 %a, 1121501860462591
   ret i64 %and

diff  --git a/llvm/test/CodeGen/PowerPC/cmpb.ll b/llvm/test/CodeGen/PowerPC/cmpb.ll
index dc70af317a29..e7f5579e0a45 100644
--- a/llvm/test/CodeGen/PowerPC/cmpb.ll
+++ b/llvm/test/CodeGen/PowerPC/cmpb.ll
@@ -123,11 +123,9 @@ entry:
   ret i32 %or55
 
 ; CHECK-LABEL: @test32p1
-; CHECK: li [[REG1:[0-9]+]], 0
-; CHECK: cmpb [[REG4:[0-9]+]], 4, 3
-; CHECK: oris [[REG2:[0-9]+]], [[REG1]], 65287
-; CHECK: ori [[REG3:[0-9]+]], [[REG2]], 65535
-; CHECK: and 3, [[REG4]], [[REG3]]
+; CHECK: cmpb [[REG1:[0-9]+]], 4, 3
+; CHECK: rldicl [[REG2:[0-9]+]], [[REG1]], 40, 5 
+; CHECK: rldicl 3, [[REG2]], 24, 32  
 ; CHECK: blr
 }
 
@@ -147,11 +145,9 @@ entry:
   ret i32 %or37
 
 ; CHECK-LABEL: @test32p2
-; CHECK: li [[REG1:[0-9]+]], 0
-; CHECK: cmpb [[REG4:[0-9]+]], 4, 3
-; CHECK: oris [[REG2:[0-9]+]], [[REG1]], 65280
-; CHECK: ori [[REG3:[0-9]+]], [[REG2]], 65535
-; CHECK: and 3, [[REG4]], [[REG3]]
+; CHECK: cmpb [[REG1:[0-9]+]], 4, 3
+; CHECK: rldicl [[REG2:[0-9]+]], [[REG1]], 40, 8 
+; CHECK: rldicl 3, [[REG2]], 24, 32 
 ; CHECK: blr
 }
 

diff  --git a/llvm/test/CodeGen/PowerPC/setcc-logic.ll b/llvm/test/CodeGen/PowerPC/setcc-logic.ll
index 2e23611bea0a..3bed3ba9ce8f 100644
--- a/llvm/test/CodeGen/PowerPC/setcc-logic.ll
+++ b/llvm/test/CodeGen/PowerPC/setcc-logic.ll
@@ -481,9 +481,9 @@ define <4 x i1> @and_eq_vec(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32>
 define i1 @or_icmps_const_1bit_
diff (i64 %x) {
 ; CHECK-LABEL: or_icmps_const_1bit_
diff :
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    li 4, -5
 ; CHECK-NEXT:    addi 3, 3, -13
-; CHECK-NEXT:    and 3, 3, 4
+; CHECK-NEXT:    rldicl 3, 3, 61, 1
+; CHECK-NEXT:    rotldi 3, 3, 3
 ; CHECK-NEXT:    cntlzd 3, 3
 ; CHECK-NEXT:    rldicl 3, 3, 58, 63
 ; CHECK-NEXT:    blr


        


More information about the llvm-commits mailing list