[llvm] 492c1f3 - [PowerPC] Merge rotate and clear into single instruction.
Stefan Pintilie via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 7 06:25:47 PDT 2023
Author: Stefan Pintilie
Date: 2023-09-07T09:25:41-04:00
New Revision: 492c1f3d7cf672a1ae4df85350daf05f82f558c6
URL: https://github.com/llvm/llvm-project/commit/492c1f3d7cf672a1ae4df85350daf05f82f558c6
DIFF: https://github.com/llvm/llvm-project/commit/492c1f3d7cf672a1ae4df85350daf05f82f558c6.diff
LOG: [PowerPC] Merge rotate and clear into single instruction.
This patch tries to catch a codegen opportunity where the rotate and
mask can be merged into a single RLDCL instruction.
Reviewed By: lei, amyk
Differential Revision: https://reviews.llvm.org/D158328
Added:
Modified:
llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
llvm/test/CodeGen/PowerPC/ppc-rotate-clear.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 98936fe4011d40..95a66442d47cf9 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -424,6 +424,7 @@ namespace {
bool tryFoldSWTestBRCC(SDNode *N);
bool trySelectLoopCountIntrinsic(SDNode *N);
bool tryAsSingleRLDICL(SDNode *N);
+ bool tryAsSingleRLDCL(SDNode *N);
bool tryAsSingleRLDICR(SDNode *N);
bool tryAsSingleRLWINM(SDNode *N);
bool tryAsSingleRLWINM8(SDNode *N);
@@ -5084,6 +5085,35 @@ bool PPCDAGToDAGISel::tryAsSingleRLWIMI(SDNode *N) {
return false;
}
+bool PPCDAGToDAGISel::tryAsSingleRLDCL(SDNode *N) {
+ assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
+
+ uint64_t Imm64;
+ if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || !isMask_64(Imm64))
+ return false;
+
+ SDValue Val = N->getOperand(0);
+
+ if (Val.getOpcode() != ISD::ROTL)
+ return false;
+
+ // Looking to try to avoid a situation like this one:
+ // %2 = tail call i64 @llvm.fshl.i64(i64 %word, i64 %word, i64 23)
+ // %and1 = and i64 %2, 9223372036854775807
+ // In this function we are looking to try to match RLDCL. However, the above
+ // DAG would better match RLDICL instead which is not what we are looking
+ // for here.
+ SDValue RotateAmt = Val.getOperand(1);
+ if (RotateAmt.getOpcode() == ISD::Constant)
+ return false;
+
+ unsigned MB = 64 - llvm::countr_one(Imm64);
+ SDLoc dl(N);
+ SDValue Ops[] = {Val.getOperand(0), RotateAmt, getI32Imm(MB, dl)};
+ CurDAG->SelectNodeTo(N, PPC::RLDCL, MVT::i64, Ops);
+ return true;
+}
+
bool PPCDAGToDAGISel::tryAsSingleRLDICL(SDNode *N) {
assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
uint64_t Imm64;
@@ -5604,8 +5634,9 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
case ISD::AND:
// If this is an 'and' with a mask, try to emit rlwinm/rldicl/rldicr
- if (tryAsSingleRLWINM(N) || tryAsSingleRLWIMI(N) || tryAsSingleRLDICL(N) ||
- tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N) || tryAsPairOfRLDICL(N))
+ if (tryAsSingleRLWINM(N) || tryAsSingleRLWIMI(N) || tryAsSingleRLDCL(N) ||
+ tryAsSingleRLDICL(N) || tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N) ||
+ tryAsPairOfRLDICL(N))
return;
// Other cases are autogenerated.
diff --git a/llvm/test/CodeGen/PowerPC/ppc-rotate-clear.ll b/llvm/test/CodeGen/PowerPC/ppc-rotate-clear.ll
index fa7744e2048111..17f04b9587d7c3 100644
--- a/llvm/test/CodeGen/PowerPC/ppc-rotate-clear.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc-rotate-clear.ll
@@ -75,22 +75,19 @@ define dso_local i64 @rotatemask64(i64 noundef %word) local_unnamed_addr #0 {
; AIX64-LABEL: rotatemask64:
; AIX64: # %bb.0: # %entry
; AIX64-NEXT: cntlzd r4, r3
-; AIX64-NEXT: rotld r3, r3, r4
-; AIX64-NEXT: clrldi r3, r3, 1
+; AIX64-NEXT: rldcl r3, r3, r4, 1
; AIX64-NEXT: blr
;
; LINUX64BE-LABEL: rotatemask64:
; LINUX64BE: # %bb.0: # %entry
; LINUX64BE-NEXT: cntlzd r4, r3
-; LINUX64BE-NEXT: rotld r3, r3, r4
-; LINUX64BE-NEXT: clrldi r3, r3, 1
+; LINUX64BE-NEXT: rldcl r3, r3, r4, 1
; LINUX64BE-NEXT: blr
;
; LINUX64LE-LABEL: rotatemask64:
; LINUX64LE: # %bb.0: # %entry
; LINUX64LE-NEXT: cntlzd r4, r3
-; LINUX64LE-NEXT: rotld r3, r3, r4
-; LINUX64LE-NEXT: clrldi r3, r3, 1
+; LINUX64LE-NEXT: rldcl r3, r3, r4, 1
; LINUX64LE-NEXT: blr
entry:
%0 = tail call i64 @llvm.ctlz.i64(i64 %word, i1 false)
@@ -126,22 +123,19 @@ define dso_local i64 @rotatemask64_2(i64 noundef %word) local_unnamed_addr #0 {
; AIX64-LABEL: rotatemask64_2:
; AIX64: # %bb.0: # %entry
; AIX64-NEXT: cntlzd r4, r3
-; AIX64-NEXT: rotld r3, r3, r4
-; AIX64-NEXT: clrldi r3, r3, 1
+; AIX64-NEXT: rldcl r3, r3, r4, 1
; AIX64-NEXT: blr
;
; LINUX64BE-LABEL: rotatemask64_2:
; LINUX64BE: # %bb.0: # %entry
; LINUX64BE-NEXT: cntlzd r4, r3
-; LINUX64BE-NEXT: rotld r3, r3, r4
-; LINUX64BE-NEXT: clrldi r3, r3, 1
+; LINUX64BE-NEXT: rldcl r3, r3, r4, 1
; LINUX64BE-NEXT: blr
;
; LINUX64LE-LABEL: rotatemask64_2:
; LINUX64LE: # %bb.0: # %entry
; LINUX64LE-NEXT: cntlzd r4, r3
-; LINUX64LE-NEXT: rotld r3, r3, r4
-; LINUX64LE-NEXT: clrldi r3, r3, 1
+; LINUX64LE-NEXT: rldcl r3, r3, r4, 1
; LINUX64LE-NEXT: blr
entry:
%0 = tail call i64 @llvm.ctlz.i64(i64 %word, i1 false)
@@ -222,20 +216,17 @@ define dso_local i64 @rotatemask64_nocount(i64 noundef %word, i64 noundef %clz)
;
; AIX64-LABEL: rotatemask64_nocount:
; AIX64: # %bb.0: # %entry
-; AIX64-NEXT: rotld r3, r3, r4
-; AIX64-NEXT: clrldi r3, r3, 8
+; AIX64-NEXT: rldcl r3, r3, r4, 8
; AIX64-NEXT: blr
;
; LINUX64BE-LABEL: rotatemask64_nocount:
; LINUX64BE: # %bb.0: # %entry
-; LINUX64BE-NEXT: rotld r3, r3, r4
-; LINUX64BE-NEXT: clrldi r3, r3, 8
+; LINUX64BE-NEXT: rldcl r3, r3, r4, 8
; LINUX64BE-NEXT: blr
;
; LINUX64LE-LABEL: rotatemask64_nocount:
; LINUX64LE: # %bb.0: # %entry
-; LINUX64LE-NEXT: rotld r3, r3, r4
-; LINUX64LE-NEXT: clrldi r3, r3, 8
+; LINUX64LE-NEXT: rldcl r3, r3, r4, 8
; LINUX64LE-NEXT: blr
entry:
%0 = tail call i64 @llvm.fshl.i64(i64 %word, i64 %word, i64 %clz)
@@ -262,20 +253,17 @@ define dso_local i64 @builtincheck(i64 noundef %word, i64 noundef %shift) local_
;
; AIX64-LABEL: builtincheck:
; AIX64: # %bb.0: # %entry
-; AIX64-NEXT: rotld r3, r3, r4
-; AIX64-NEXT: clrldi r3, r3, 1
+; AIX64-NEXT: rldcl r3, r3, r4, 1
; AIX64-NEXT: blr
;
; LINUX64BE-LABEL: builtincheck:
; LINUX64BE: # %bb.0: # %entry
-; LINUX64BE-NEXT: rotld r3, r3, r4
-; LINUX64BE-NEXT: clrldi r3, r3, 1
+; LINUX64BE-NEXT: rldcl r3, r3, r4, 1
; LINUX64BE-NEXT: blr
;
; LINUX64LE-LABEL: builtincheck:
; LINUX64LE: # %bb.0: # %entry
-; LINUX64LE-NEXT: rotld r3, r3, r4
-; LINUX64LE-NEXT: clrldi r3, r3, 1
+; LINUX64LE-NEXT: rldcl r3, r3, r4, 1
; LINUX64LE-NEXT: blr
entry:
%0 = tail call i64 @llvm.fshl.i64(i64 %word, i64 %word, i64 %shift)
@@ -352,9 +340,9 @@ define dso_local i64 @twomasks(i64 noundef %word) local_unnamed_addr #0 {
; AIX64-NEXT: stdu r1, -112(r1)
; AIX64-NEXT: cntlzd r4, r3
; AIX64-NEXT: std r0, 128(r1)
-; AIX64-NEXT: rotld r4, r3, r4
-; AIX64-NEXT: clrldi r3, r4, 1
-; AIX64-NEXT: clrldi r4, r4, 16
+; AIX64-NEXT: rldcl r5, r3, r4, 1
+; AIX64-NEXT: rldcl r4, r3, r4, 16
+; AIX64-NEXT: mr r3, r5
; AIX64-NEXT: bl .callee[PR]
; AIX64-NEXT: nop
; AIX64-NEXT: addi r1, r1, 112
@@ -368,9 +356,9 @@ define dso_local i64 @twomasks(i64 noundef %word) local_unnamed_addr #0 {
; LINUX64BE-NEXT: stdu r1, -112(r1)
; LINUX64BE-NEXT: cntlzd r4, r3
; LINUX64BE-NEXT: std r0, 128(r1)
-; LINUX64BE-NEXT: rotld r4, r3, r4
-; LINUX64BE-NEXT: clrldi r3, r4, 1
-; LINUX64BE-NEXT: clrldi r4, r4, 16
+; LINUX64BE-NEXT: rldcl r5, r3, r4, 1
+; LINUX64BE-NEXT: rldcl r4, r3, r4, 16
+; LINUX64BE-NEXT: mr r3, r5
; LINUX64BE-NEXT: bl callee
; LINUX64BE-NEXT: nop
; LINUX64BE-NEXT: addi r1, r1, 112
@@ -384,9 +372,9 @@ define dso_local i64 @twomasks(i64 noundef %word) local_unnamed_addr #0 {
; LINUX64LE-NEXT: stdu r1, -32(r1)
; LINUX64LE-NEXT: cntlzd r4, r3
; LINUX64LE-NEXT: std r0, 48(r1)
-; LINUX64LE-NEXT: rotld r4, r3, r4
-; LINUX64LE-NEXT: clrldi r3, r4, 1
-; LINUX64LE-NEXT: clrldi r4, r4, 16
+; LINUX64LE-NEXT: rldcl r5, r3, r4, 1
+; LINUX64LE-NEXT: rldcl r4, r3, r4, 16
+; LINUX64LE-NEXT: mr r3, r5
; LINUX64LE-NEXT: bl callee
; LINUX64LE-NEXT: nop
; LINUX64LE-NEXT: addi r1, r1, 32
@@ -445,8 +433,7 @@ define dso_local i64 @tworotates(i64 noundef %word) local_unnamed_addr #0 {
; AIX64-NEXT: stdu r1, -112(r1)
; AIX64-NEXT: cntlzd r4, r3
; AIX64-NEXT: std r0, 128(r1)
-; AIX64-NEXT: rotld r4, r3, r4
-; AIX64-NEXT: clrldi r5, r4, 1
+; AIX64-NEXT: rldcl r5, r3, r4, 1
; AIX64-NEXT: rldicl r4, r3, 23, 1
; AIX64-NEXT: mr r3, r5
; AIX64-NEXT: bl .callee[PR]
@@ -462,8 +449,7 @@ define dso_local i64 @tworotates(i64 noundef %word) local_unnamed_addr #0 {
; LINUX64BE-NEXT: stdu r1, -112(r1)
; LINUX64BE-NEXT: cntlzd r4, r3
; LINUX64BE-NEXT: std r0, 128(r1)
-; LINUX64BE-NEXT: rotld r4, r3, r4
-; LINUX64BE-NEXT: clrldi r5, r4, 1
+; LINUX64BE-NEXT: rldcl r5, r3, r4, 1
; LINUX64BE-NEXT: rldicl r4, r3, 23, 1
; LINUX64BE-NEXT: mr r3, r5
; LINUX64BE-NEXT: bl callee
@@ -479,8 +465,7 @@ define dso_local i64 @tworotates(i64 noundef %word) local_unnamed_addr #0 {
; LINUX64LE-NEXT: stdu r1, -32(r1)
; LINUX64LE-NEXT: cntlzd r4, r3
; LINUX64LE-NEXT: std r0, 48(r1)
-; LINUX64LE-NEXT: rotld r4, r3, r4
-; LINUX64LE-NEXT: clrldi r5, r4, 1
+; LINUX64LE-NEXT: rldcl r5, r3, r4, 1
; LINUX64LE-NEXT: rldicl r4, r3, 23, 1
; LINUX64LE-NEXT: mr r3, r5
; LINUX64LE-NEXT: bl callee
More information about the llvm-commits
mailing list