[llvm] 492c1f3 - [PowerPC] Merge rotate and clear into single instruction.

Stefan Pintilie via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 7 06:25:47 PDT 2023


Author: Stefan Pintilie
Date: 2023-09-07T09:25:41-04:00
New Revision: 492c1f3d7cf672a1ae4df85350daf05f82f558c6

URL: https://github.com/llvm/llvm-project/commit/492c1f3d7cf672a1ae4df85350daf05f82f558c6
DIFF: https://github.com/llvm/llvm-project/commit/492c1f3d7cf672a1ae4df85350daf05f82f558c6.diff

LOG: [PowerPC] Merge rotate and clear into single instruction.

This patch tries to catch a codegen opportunity where the rotate and
mask can be merged into a single RLDCL instruction.

Reviewed By: lei, amyk

Differential Revision: https://reviews.llvm.org/D158328

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
    llvm/test/CodeGen/PowerPC/ppc-rotate-clear.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 98936fe4011d40..95a66442d47cf9 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -424,6 +424,7 @@ namespace {
     bool tryFoldSWTestBRCC(SDNode *N);
     bool trySelectLoopCountIntrinsic(SDNode *N);
     bool tryAsSingleRLDICL(SDNode *N);
+    bool tryAsSingleRLDCL(SDNode *N);
     bool tryAsSingleRLDICR(SDNode *N);
     bool tryAsSingleRLWINM(SDNode *N);
     bool tryAsSingleRLWINM8(SDNode *N);
@@ -5084,6 +5085,35 @@ bool PPCDAGToDAGISel::tryAsSingleRLWIMI(SDNode *N) {
   return false;
 }
 
+bool PPCDAGToDAGISel::tryAsSingleRLDCL(SDNode *N) {
+  assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
+
+  uint64_t Imm64;
+  if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || !isMask_64(Imm64))
+    return false;
+
+  SDValue Val = N->getOperand(0);
+
+  if (Val.getOpcode() != ISD::ROTL)
+    return false;
+
+  // Looking to try to avoid a situation like this one:
+  //   %2 = tail call i64 @llvm.fshl.i64(i64 %word, i64 %word, i64 23)
+  //   %and1 = and i64 %2, 9223372036854775807
+  // In this function we are looking to try to match RLDCL. However, the above
+  // DAG would better match RLDICL instead which is not what we are looking
+  // for here.
+  SDValue RotateAmt = Val.getOperand(1);
+  if (RotateAmt.getOpcode() == ISD::Constant)
+    return false;
+
+  unsigned MB = 64 - llvm::countr_one(Imm64);
+  SDLoc dl(N);
+  SDValue Ops[] = {Val.getOperand(0), RotateAmt, getI32Imm(MB, dl)};
+  CurDAG->SelectNodeTo(N, PPC::RLDCL, MVT::i64, Ops);
+  return true;
+}
+
 bool PPCDAGToDAGISel::tryAsSingleRLDICL(SDNode *N) {
   assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
   uint64_t Imm64;
@@ -5604,8 +5634,9 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
 
   case ISD::AND:
     // If this is an 'and' with a mask, try to emit rlwinm/rldicl/rldicr
-    if (tryAsSingleRLWINM(N) || tryAsSingleRLWIMI(N) || tryAsSingleRLDICL(N) ||
-        tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N) || tryAsPairOfRLDICL(N))
+    if (tryAsSingleRLWINM(N) || tryAsSingleRLWIMI(N) || tryAsSingleRLDCL(N) ||
+        tryAsSingleRLDICL(N) || tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N) ||
+        tryAsPairOfRLDICL(N))
       return;
 
     // Other cases are autogenerated.

diff  --git a/llvm/test/CodeGen/PowerPC/ppc-rotate-clear.ll b/llvm/test/CodeGen/PowerPC/ppc-rotate-clear.ll
index fa7744e2048111..17f04b9587d7c3 100644
--- a/llvm/test/CodeGen/PowerPC/ppc-rotate-clear.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc-rotate-clear.ll
@@ -75,22 +75,19 @@ define dso_local i64 @rotatemask64(i64 noundef %word) local_unnamed_addr #0 {
 ; AIX64-LABEL: rotatemask64:
 ; AIX64:       # %bb.0: # %entry
 ; AIX64-NEXT:    cntlzd r4, r3
-; AIX64-NEXT:    rotld r3, r3, r4
-; AIX64-NEXT:    clrldi r3, r3, 1
+; AIX64-NEXT:    rldcl r3, r3, r4, 1
 ; AIX64-NEXT:    blr
 ;
 ; LINUX64BE-LABEL: rotatemask64:
 ; LINUX64BE:       # %bb.0: # %entry
 ; LINUX64BE-NEXT:    cntlzd r4, r3
-; LINUX64BE-NEXT:    rotld r3, r3, r4
-; LINUX64BE-NEXT:    clrldi r3, r3, 1
+; LINUX64BE-NEXT:    rldcl r3, r3, r4, 1
 ; LINUX64BE-NEXT:    blr
 ;
 ; LINUX64LE-LABEL: rotatemask64:
 ; LINUX64LE:       # %bb.0: # %entry
 ; LINUX64LE-NEXT:    cntlzd r4, r3
-; LINUX64LE-NEXT:    rotld r3, r3, r4
-; LINUX64LE-NEXT:    clrldi r3, r3, 1
+; LINUX64LE-NEXT:    rldcl r3, r3, r4, 1
 ; LINUX64LE-NEXT:    blr
 entry:
   %0 = tail call i64 @llvm.ctlz.i64(i64 %word, i1 false)
@@ -126,22 +123,19 @@ define dso_local i64 @rotatemask64_2(i64 noundef %word) local_unnamed_addr #0 {
 ; AIX64-LABEL: rotatemask64_2:
 ; AIX64:       # %bb.0: # %entry
 ; AIX64-NEXT:    cntlzd r4, r3
-; AIX64-NEXT:    rotld r3, r3, r4
-; AIX64-NEXT:    clrldi r3, r3, 1
+; AIX64-NEXT:    rldcl r3, r3, r4, 1
 ; AIX64-NEXT:    blr
 ;
 ; LINUX64BE-LABEL: rotatemask64_2:
 ; LINUX64BE:       # %bb.0: # %entry
 ; LINUX64BE-NEXT:    cntlzd r4, r3
-; LINUX64BE-NEXT:    rotld r3, r3, r4
-; LINUX64BE-NEXT:    clrldi r3, r3, 1
+; LINUX64BE-NEXT:    rldcl r3, r3, r4, 1
 ; LINUX64BE-NEXT:    blr
 ;
 ; LINUX64LE-LABEL: rotatemask64_2:
 ; LINUX64LE:       # %bb.0: # %entry
 ; LINUX64LE-NEXT:    cntlzd r4, r3
-; LINUX64LE-NEXT:    rotld r3, r3, r4
-; LINUX64LE-NEXT:    clrldi r3, r3, 1
+; LINUX64LE-NEXT:    rldcl r3, r3, r4, 1
 ; LINUX64LE-NEXT:    blr
 entry:
   %0 = tail call i64 @llvm.ctlz.i64(i64 %word, i1 false)
@@ -222,20 +216,17 @@ define dso_local i64 @rotatemask64_nocount(i64 noundef %word, i64 noundef %clz)
 ;
 ; AIX64-LABEL: rotatemask64_nocount:
 ; AIX64:       # %bb.0: # %entry
-; AIX64-NEXT:    rotld r3, r3, r4
-; AIX64-NEXT:    clrldi r3, r3, 8
+; AIX64-NEXT:    rldcl r3, r3, r4, 8
 ; AIX64-NEXT:    blr
 ;
 ; LINUX64BE-LABEL: rotatemask64_nocount:
 ; LINUX64BE:       # %bb.0: # %entry
-; LINUX64BE-NEXT:    rotld r3, r3, r4
-; LINUX64BE-NEXT:    clrldi r3, r3, 8
+; LINUX64BE-NEXT:    rldcl r3, r3, r4, 8
 ; LINUX64BE-NEXT:    blr
 ;
 ; LINUX64LE-LABEL: rotatemask64_nocount:
 ; LINUX64LE:       # %bb.0: # %entry
-; LINUX64LE-NEXT:    rotld r3, r3, r4
-; LINUX64LE-NEXT:    clrldi r3, r3, 8
+; LINUX64LE-NEXT:    rldcl r3, r3, r4, 8
 ; LINUX64LE-NEXT:    blr
 entry:
   %0 = tail call i64 @llvm.fshl.i64(i64 %word, i64 %word, i64 %clz)
@@ -262,20 +253,17 @@ define dso_local i64 @builtincheck(i64 noundef %word, i64 noundef %shift) local_
 ;
 ; AIX64-LABEL: builtincheck:
 ; AIX64:       # %bb.0: # %entry
-; AIX64-NEXT:    rotld r3, r3, r4
-; AIX64-NEXT:    clrldi r3, r3, 1
+; AIX64-NEXT:    rldcl r3, r3, r4, 1
 ; AIX64-NEXT:    blr
 ;
 ; LINUX64BE-LABEL: builtincheck:
 ; LINUX64BE:       # %bb.0: # %entry
-; LINUX64BE-NEXT:    rotld r3, r3, r4
-; LINUX64BE-NEXT:    clrldi r3, r3, 1
+; LINUX64BE-NEXT:    rldcl r3, r3, r4, 1
 ; LINUX64BE-NEXT:    blr
 ;
 ; LINUX64LE-LABEL: builtincheck:
 ; LINUX64LE:       # %bb.0: # %entry
-; LINUX64LE-NEXT:    rotld r3, r3, r4
-; LINUX64LE-NEXT:    clrldi r3, r3, 1
+; LINUX64LE-NEXT:    rldcl r3, r3, r4, 1
 ; LINUX64LE-NEXT:    blr
 entry:
   %0 = tail call i64 @llvm.fshl.i64(i64 %word, i64 %word, i64 %shift)
@@ -352,9 +340,9 @@ define dso_local i64 @twomasks(i64 noundef %word) local_unnamed_addr #0 {
 ; AIX64-NEXT:    stdu r1, -112(r1)
 ; AIX64-NEXT:    cntlzd r4, r3
 ; AIX64-NEXT:    std r0, 128(r1)
-; AIX64-NEXT:    rotld r4, r3, r4
-; AIX64-NEXT:    clrldi r3, r4, 1
-; AIX64-NEXT:    clrldi r4, r4, 16
+; AIX64-NEXT:    rldcl r5, r3, r4, 1
+; AIX64-NEXT:    rldcl r4, r3, r4, 16
+; AIX64-NEXT:    mr r3, r5
 ; AIX64-NEXT:    bl .callee[PR]
 ; AIX64-NEXT:    nop
 ; AIX64-NEXT:    addi r1, r1, 112
@@ -368,9 +356,9 @@ define dso_local i64 @twomasks(i64 noundef %word) local_unnamed_addr #0 {
 ; LINUX64BE-NEXT:    stdu r1, -112(r1)
 ; LINUX64BE-NEXT:    cntlzd r4, r3
 ; LINUX64BE-NEXT:    std r0, 128(r1)
-; LINUX64BE-NEXT:    rotld r4, r3, r4
-; LINUX64BE-NEXT:    clrldi r3, r4, 1
-; LINUX64BE-NEXT:    clrldi r4, r4, 16
+; LINUX64BE-NEXT:    rldcl r5, r3, r4, 1
+; LINUX64BE-NEXT:    rldcl r4, r3, r4, 16
+; LINUX64BE-NEXT:    mr r3, r5
 ; LINUX64BE-NEXT:    bl callee
 ; LINUX64BE-NEXT:    nop
 ; LINUX64BE-NEXT:    addi r1, r1, 112
@@ -384,9 +372,9 @@ define dso_local i64 @twomasks(i64 noundef %word) local_unnamed_addr #0 {
 ; LINUX64LE-NEXT:    stdu r1, -32(r1)
 ; LINUX64LE-NEXT:    cntlzd r4, r3
 ; LINUX64LE-NEXT:    std r0, 48(r1)
-; LINUX64LE-NEXT:    rotld r4, r3, r4
-; LINUX64LE-NEXT:    clrldi r3, r4, 1
-; LINUX64LE-NEXT:    clrldi r4, r4, 16
+; LINUX64LE-NEXT:    rldcl r5, r3, r4, 1
+; LINUX64LE-NEXT:    rldcl r4, r3, r4, 16
+; LINUX64LE-NEXT:    mr r3, r5
 ; LINUX64LE-NEXT:    bl callee
 ; LINUX64LE-NEXT:    nop
 ; LINUX64LE-NEXT:    addi r1, r1, 32
@@ -445,8 +433,7 @@ define dso_local i64 @tworotates(i64 noundef %word) local_unnamed_addr #0 {
 ; AIX64-NEXT:    stdu r1, -112(r1)
 ; AIX64-NEXT:    cntlzd r4, r3
 ; AIX64-NEXT:    std r0, 128(r1)
-; AIX64-NEXT:    rotld r4, r3, r4
-; AIX64-NEXT:    clrldi r5, r4, 1
+; AIX64-NEXT:    rldcl r5, r3, r4, 1
 ; AIX64-NEXT:    rldicl r4, r3, 23, 1
 ; AIX64-NEXT:    mr r3, r5
 ; AIX64-NEXT:    bl .callee[PR]
@@ -462,8 +449,7 @@ define dso_local i64 @tworotates(i64 noundef %word) local_unnamed_addr #0 {
 ; LINUX64BE-NEXT:    stdu r1, -112(r1)
 ; LINUX64BE-NEXT:    cntlzd r4, r3
 ; LINUX64BE-NEXT:    std r0, 128(r1)
-; LINUX64BE-NEXT:    rotld r4, r3, r4
-; LINUX64BE-NEXT:    clrldi r5, r4, 1
+; LINUX64BE-NEXT:    rldcl r5, r3, r4, 1
 ; LINUX64BE-NEXT:    rldicl r4, r3, 23, 1
 ; LINUX64BE-NEXT:    mr r3, r5
 ; LINUX64BE-NEXT:    bl callee
@@ -479,8 +465,7 @@ define dso_local i64 @tworotates(i64 noundef %word) local_unnamed_addr #0 {
 ; LINUX64LE-NEXT:    stdu r1, -32(r1)
 ; LINUX64LE-NEXT:    cntlzd r4, r3
 ; LINUX64LE-NEXT:    std r0, 48(r1)
-; LINUX64LE-NEXT:    rotld r4, r3, r4
-; LINUX64LE-NEXT:    clrldi r5, r4, 1
+; LINUX64LE-NEXT:    rldcl r5, r3, r4, 1
 ; LINUX64LE-NEXT:    rldicl r4, r3, 23, 1
 ; LINUX64LE-NEXT:    mr r3, r5
 ; LINUX64LE-NEXT:    bl callee


        


More information about the llvm-commits mailing list