[llvm] ff40fb0 - [PowerPC] Try to fold sqrt/sdiv test results with the branch.
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 13 18:16:37 PST 2021
Author: Esme-Yi
Date: 2021-01-14T02:15:19Z
New Revision: ff40fb07ad6309131c2448ca00572a078c7a2d59
URL: https://github.com/llvm/llvm-project/commit/ff40fb07ad6309131c2448ca00572a078c7a2d59
DIFF: https://github.com/llvm/llvm-project/commit/ff40fb07ad6309131c2448ca00572a078c7a2d59.diff
LOG: [PowerPC] Try to fold sqrt/sdiv test results with the branch.
Summary: The patch tries to fold sqrt/sdiv test node, i.g FTSQRT, XVTDIVDP, and the branch, i.e br_cc if they meet these patterns:
(br_cc seteq, (truncateToi1 SWTestOp), 0) -> (BCC PRED_NU, SWTestOp)
(br_cc seteq, (and SWTestOp, 2), 0) -> (BCC PRED_NE, SWTestOp)
(br_cc seteq, (and SWTestOp, 4), 0) -> (BCC PRED_LE, SWTestOp)
(br_cc seteq, (and SWTestOp, 8), 0) -> (BCC PRED_GE, SWTestOp)
(br_cc setne, (truncateToi1 SWTestOp), 0) -> (BCC PRED_UN, SWTestOp)
(br_cc setne, (and SWTestOp, 2), 0) -> (BCC PRED_EQ, SWTestOp)
(br_cc setne, (and SWTestOp, 4), 0) -> (BCC PRED_GT, SWTestOp)
(br_cc setne, (and SWTestOp, 8), 0) -> (BCC PRED_LT, SWTestOp)
Reviewed By: steven.zhang
Differential Revision: https://reviews.llvm.org/D94054
Added:
llvm/test/CodeGen/PowerPC/fold_swtest_br.ll
Modified:
llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 07b81a4325fc..693b0adaede4 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -352,6 +352,7 @@ namespace {
private:
bool trySETCC(SDNode *N);
+ bool tryFoldSWTestBRCC(SDNode *N);
bool tryAsSingleRLDICL(SDNode *N);
bool tryAsSingleRLDICR(SDNode *N);
bool tryAsSingleRLWINM(SDNode *N);
@@ -4378,6 +4379,81 @@ static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG,
return true;
}
+// Return true if it's a software square-root/divide operand.
+static bool isSWTestOp(SDValue N) {
+ if (N.getOpcode() == PPCISD::FTSQRT)
+ return true;
+ if (N.getNumOperands() < 1 || !isa<ConstantSDNode>(N.getOperand(0)))
+ return false;
+ switch (N.getConstantOperandVal(0)) {
+ case Intrinsic::ppc_vsx_xvtdivdp:
+ case Intrinsic::ppc_vsx_xvtdivsp:
+ case Intrinsic::ppc_vsx_xvtsqrtdp:
+ case Intrinsic::ppc_vsx_xvtsqrtsp:
+ return true;
+ }
+ return false;
+}
+
+bool PPCDAGToDAGISel::tryFoldSWTestBRCC(SDNode *N) {
+ assert(N->getOpcode() == ISD::BR_CC && "ISD::BR_CC is expected.");
+ // We are looking for following patterns, where `truncate to i1` actually has
+ // the same semantic with `and 1`.
+ // (br_cc seteq, (truncateToi1 SWTestOp), 0) -> (BCC PRED_NU, SWTestOp)
+ // (br_cc seteq, (and SWTestOp, 2), 0) -> (BCC PRED_NE, SWTestOp)
+ // (br_cc seteq, (and SWTestOp, 4), 0) -> (BCC PRED_LE, SWTestOp)
+ // (br_cc seteq, (and SWTestOp, 8), 0) -> (BCC PRED_GE, SWTestOp)
+ // (br_cc setne, (truncateToi1 SWTestOp), 0) -> (BCC PRED_UN, SWTestOp)
+ // (br_cc setne, (and SWTestOp, 2), 0) -> (BCC PRED_EQ, SWTestOp)
+ // (br_cc setne, (and SWTestOp, 4), 0) -> (BCC PRED_GT, SWTestOp)
+ // (br_cc setne, (and SWTestOp, 8), 0) -> (BCC PRED_LT, SWTestOp)
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
+ if (CC != ISD::SETEQ && CC != ISD::SETNE)
+ return false;
+
+ SDValue CmpRHS = N->getOperand(3);
+ if (!isa<ConstantSDNode>(CmpRHS) ||
+ cast<ConstantSDNode>(CmpRHS)->getSExtValue() != 0)
+ return false;
+
+ SDValue CmpLHS = N->getOperand(2);
+ if (CmpLHS.getNumOperands() < 1 || !isSWTestOp(CmpLHS.getOperand(0)))
+ return false;
+
+ unsigned PCC = 0;
+ bool IsCCNE = CC == ISD::SETNE;
+ if (CmpLHS.getOpcode() == ISD::AND &&
+ isa<ConstantSDNode>(CmpLHS.getOperand(1)))
+ switch (CmpLHS.getConstantOperandVal(1)) {
+ case 1:
+ PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;
+ break;
+ case 2:
+ PCC = IsCCNE ? PPC::PRED_EQ : PPC::PRED_NE;
+ break;
+ case 4:
+ PCC = IsCCNE ? PPC::PRED_GT : PPC::PRED_LE;
+ break;
+ case 8:
+ PCC = IsCCNE ? PPC::PRED_LT : PPC::PRED_GE;
+ break;
+ default:
+ return false;
+ }
+ else if (CmpLHS.getOpcode() == ISD::TRUNCATE &&
+ CmpLHS.getValueType() == MVT::i1)
+ PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;
+
+ if (PCC) {
+ SDLoc dl(N);
+ SDValue Ops[] = {getI32Imm(PCC, dl), CmpLHS.getOperand(0), N->getOperand(4),
+ N->getOperand(0)};
+ CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
+ return true;
+ }
+ return false;
+}
+
bool PPCDAGToDAGISel::tryAsSingleRLWINM(SDNode *N) {
assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
unsigned Imm;
@@ -5247,6 +5323,8 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
return;
}
case ISD::BR_CC: {
+ if (tryFoldSWTestBRCC(N))
+ return;
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
unsigned PCC =
getPredicateForSetCC(CC, N->getOperand(2).getValueType(), Subtarget);
diff --git a/llvm/test/CodeGen/PowerPC/fold_swtest_br.ll b/llvm/test/CodeGen/PowerPC/fold_swtest_br.ll
new file mode 100644
index 000000000000..0d5581f8b26f
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/fold_swtest_br.ll
@@ -0,0 +1,204 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \
+; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s | FileCheck %s
+
+ at val = external local_unnamed_addr global i32, align 4
+declare i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double>)
+
+define dso_local signext i32 @xvtsqrtdp_and_1_eq(<2 x double> %input) {
+; CHECK-LABEL: xvtsqrtdp_and_1_eq:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvtsqrtdp cr0, v2
+; CHECK-NEXT: bnu cr0, .LBB0_2
+; CHECK-NEXT: # %bb.1: # %if.then
+; CHECK-NEXT: addis r3, r2, .LC0 at toc@ha
+; CHECK-NEXT: li r4, 100
+; CHECK-NEXT: ld r3, .LC0 at toc@l(r3)
+; CHECK-NEXT: stw r4, 0(r3)
+; CHECK-NEXT: .LBB0_2: # %if.end
+; CHECK-NEXT: li r3, 1
+; CHECK-NEXT: blr
+entry:
+ %0 = tail call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %input)
+ %1 = and i32 %0, 1
+ %cmp.not = icmp eq i32 %1, 0
+ br i1 %cmp.not, label %if.end, label %if.then
+
+if.then: ; preds = %entry
+ store i32 100, i32* @val, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ ret i32 1
+}
+
+define dso_local signext i32 @xvtsqrtdp_and_2_eq(<2 x double> %input) {
+; CHECK-LABEL: xvtsqrtdp_and_2_eq:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvtsqrtdp cr0, v2
+; CHECK-NEXT: bne cr0, .LBB1_2
+; CHECK-NEXT: # %bb.1: # %if.then
+; CHECK-NEXT: addis r3, r2, .LC0 at toc@ha
+; CHECK-NEXT: li r4, 100
+; CHECK-NEXT: ld r3, .LC0 at toc@l(r3)
+; CHECK-NEXT: stw r4, 0(r3)
+; CHECK-NEXT: .LBB1_2: # %if.end
+; CHECK-NEXT: li r3, 1
+; CHECK-NEXT: blr
+entry:
+ %0 = tail call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %input)
+ %1 = and i32 %0, 2
+ %cmp.not = icmp eq i32 %1, 0
+ br i1 %cmp.not, label %if.end, label %if.then
+
+if.then: ; preds = %entry
+ store i32 100, i32* @val, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ ret i32 1
+}
+
+define dso_local signext i32 @xvtsqrtdp_and_4_eq(<2 x double> %input) {
+; CHECK-LABEL: xvtsqrtdp_and_4_eq:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvtsqrtdp cr0, v2
+; CHECK-NEXT: ble cr0, .LBB2_2
+; CHECK-NEXT: # %bb.1: # %if.then
+; CHECK-NEXT: addis r3, r2, .LC0 at toc@ha
+; CHECK-NEXT: li r4, 100
+; CHECK-NEXT: ld r3, .LC0 at toc@l(r3)
+; CHECK-NEXT: stw r4, 0(r3)
+; CHECK-NEXT: .LBB2_2: # %if.end
+; CHECK-NEXT: li r3, 1
+; CHECK-NEXT: blr
+entry:
+ %0 = tail call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %input)
+ %1 = and i32 %0, 4
+ %cmp.not = icmp eq i32 %1, 0
+ br i1 %cmp.not, label %if.end, label %if.then
+
+if.then: ; preds = %entry
+ store i32 100, i32* @val, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ ret i32 1
+}
+
+define dso_local signext i32 @xvtsqrtdp_and_8_eq(<2 x double> %input) {
+; CHECK-LABEL: xvtsqrtdp_and_8_eq:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvtsqrtdp cr0, v2
+; CHECK-NEXT: bge cr0, .LBB3_2
+; CHECK-NEXT: # %bb.1: # %if.then
+; CHECK-NEXT: addis r3, r2, .LC0 at toc@ha
+; CHECK-NEXT: li r4, 100
+; CHECK-NEXT: ld r3, .LC0 at toc@l(r3)
+; CHECK-NEXT: stw r4, 0(r3)
+; CHECK-NEXT: .LBB3_2: # %if.end
+; CHECK-NEXT: li r3, 1
+; CHECK-NEXT: blr
+entry:
+ %0 = tail call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %input)
+ %1 = and i32 %0, 8
+ %cmp.not = icmp eq i32 %1, 0
+ br i1 %cmp.not, label %if.end, label %if.then
+
+if.then: ; preds = %entry
+ store i32 100, i32* @val, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ ret i32 1
+}
+
+define dso_local signext i32 @xvtsqrtdp_and_1_ne(<2 x double> %input) {
+; CHECK-LABEL: xvtsqrtdp_and_1_ne:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvtsqrtdp cr0, v2
+; CHECK-NEXT: bun cr0, .LBB4_2
+; CHECK-NEXT: # %bb.1: # %if.then
+; CHECK-NEXT: addis r3, r2, .LC0 at toc@ha
+; CHECK-NEXT: li r4, 100
+; CHECK-NEXT: ld r3, .LC0 at toc@l(r3)
+; CHECK-NEXT: stw r4, 0(r3)
+; CHECK-NEXT: .LBB4_2: # %if.end
+; CHECK-NEXT: li r3, 1
+; CHECK-NEXT: blr
+entry:
+ %0 = tail call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %input)
+ %1 = and i32 %0, 1
+ %cmp.not = icmp ne i32 %1, 0
+ br i1 %cmp.not, label %if.end, label %if.then
+
+if.then: ; preds = %entry
+ store i32 100, i32* @val, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ ret i32 1
+}
+
+define dso_local signext i32 @xvtsqrtdp_and_2_ne(<2 x double> %input) {
+; CHECK-LABEL: xvtsqrtdp_and_2_ne:
+; CHECK: # %bb.0: # %if.end
+; CHECK-NEXT: li r3, 1
+; CHECK-NEXT: blr
+if.end: ; preds = %if.then, %entry
+ ret i32 1
+}
+
+define dso_local signext i32 @xvtsqrtdp_and_4_ne(<2 x double> %input) {
+; CHECK-LABEL: xvtsqrtdp_and_4_ne:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvtsqrtdp cr0, v2
+; CHECK-NEXT: bgt cr0, .LBB6_2
+; CHECK-NEXT: # %bb.1: # %if.then
+; CHECK-NEXT: addis r3, r2, .LC0 at toc@ha
+; CHECK-NEXT: li r4, 100
+; CHECK-NEXT: ld r3, .LC0 at toc@l(r3)
+; CHECK-NEXT: stw r4, 0(r3)
+; CHECK-NEXT: .LBB6_2: # %if.end
+; CHECK-NEXT: li r3, 1
+; CHECK-NEXT: blr
+entry:
+ %0 = tail call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %input)
+ %1 = and i32 %0, 4
+ %cmp.not = icmp ne i32 %1, 0
+ br i1 %cmp.not, label %if.end, label %if.then
+
+if.then: ; preds = %entry
+ store i32 100, i32* @val, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ ret i32 1
+}
+
+define dso_local signext i32 @xvtsqrtdp_and_8_ne(<2 x double> %input) {
+; CHECK-LABEL: xvtsqrtdp_and_8_ne:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvtsqrtdp cr0, v2
+; CHECK-NEXT: blt cr0, .LBB7_2
+; CHECK-NEXT: # %bb.1: # %if.then
+; CHECK-NEXT: addis r3, r2, .LC0 at toc@ha
+; CHECK-NEXT: li r4, 100
+; CHECK-NEXT: ld r3, .LC0 at toc@l(r3)
+; CHECK-NEXT: stw r4, 0(r3)
+; CHECK-NEXT: .LBB7_2: # %if.end
+; CHECK-NEXT: li r3, 1
+; CHECK-NEXT: blr
+entry:
+ %0 = tail call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %input)
+ %1 = and i32 %0, 8
+ %cmp.not = icmp ne i32 %1, 0
+ br i1 %cmp.not, label %if.end, label %if.then
+
+if.then: ; preds = %entry
+ store i32 100, i32* @val, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ ret i32 1
+}
More information about the llvm-commits
mailing list