[llvm-branch-commits] [llvm] ff40fb0 - [PowerPC] Try to fold sqrt/sdiv test results with the branch.

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Jan 13 18:21:10 PST 2021


Author: Esme-Yi
Date: 2021-01-14T02:15:19Z
New Revision: ff40fb07ad6309131c2448ca00572a078c7a2d59

URL: https://github.com/llvm/llvm-project/commit/ff40fb07ad6309131c2448ca00572a078c7a2d59
DIFF: https://github.com/llvm/llvm-project/commit/ff40fb07ad6309131c2448ca00572a078c7a2d59.diff

LOG: [PowerPC] Try to fold sqrt/sdiv test results with the branch.

Summary: The patch tries to fold sqrt/sdiv test node, i.g FTSQRT, XVTDIVDP, and the branch, i.e br_cc if they meet these patterns:
(br_cc seteq, (truncateToi1 SWTestOp), 0) -> (BCC PRED_NU, SWTestOp)
(br_cc seteq, (and SWTestOp, 2), 0) -> (BCC PRED_NE, SWTestOp)
(br_cc seteq, (and SWTestOp, 4), 0) -> (BCC PRED_LE, SWTestOp)
(br_cc seteq, (and SWTestOp, 8), 0) -> (BCC PRED_GE, SWTestOp)
(br_cc setne, (truncateToi1 SWTestOp), 0) -> (BCC PRED_UN, SWTestOp)
(br_cc setne, (and SWTestOp, 2), 0) -> (BCC PRED_EQ, SWTestOp)
(br_cc setne, (and SWTestOp, 4), 0) -> (BCC PRED_GT, SWTestOp)
(br_cc setne, (and SWTestOp, 8), 0) -> (BCC PRED_LT, SWTestOp)

Reviewed By: steven.zhang

Differential Revision: https://reviews.llvm.org/D94054

Added: 
    llvm/test/CodeGen/PowerPC/fold_swtest_br.ll

Modified: 
    llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 07b81a4325fc..693b0adaede4 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -352,6 +352,7 @@ namespace {
 
 private:
     bool trySETCC(SDNode *N);
+    bool tryFoldSWTestBRCC(SDNode *N);
     bool tryAsSingleRLDICL(SDNode *N);
     bool tryAsSingleRLDICR(SDNode *N);
     bool tryAsSingleRLWINM(SDNode *N);
@@ -4378,6 +4379,81 @@ static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG,
   return true;
 }
 
+// Return true if it's a software square-root/divide operand.
+static bool isSWTestOp(SDValue N) {
+  if (N.getOpcode() == PPCISD::FTSQRT)
+    return true;
+  if (N.getNumOperands() < 1 || !isa<ConstantSDNode>(N.getOperand(0)))
+    return false;
+  switch (N.getConstantOperandVal(0)) {
+  case Intrinsic::ppc_vsx_xvtdivdp:
+  case Intrinsic::ppc_vsx_xvtdivsp:
+  case Intrinsic::ppc_vsx_xvtsqrtdp:
+  case Intrinsic::ppc_vsx_xvtsqrtsp:
+    return true;
+  }
+  return false;
+}
+
+bool PPCDAGToDAGISel::tryFoldSWTestBRCC(SDNode *N) {
+  assert(N->getOpcode() == ISD::BR_CC && "ISD::BR_CC is expected.");
+  // We are looking for following patterns, where `truncate to i1` actually has
+  // the same semantic with `and 1`.
+  // (br_cc seteq, (truncateToi1 SWTestOp), 0) -> (BCC PRED_NU, SWTestOp)
+  // (br_cc seteq, (and SWTestOp, 2), 0) -> (BCC PRED_NE, SWTestOp)
+  // (br_cc seteq, (and SWTestOp, 4), 0) -> (BCC PRED_LE, SWTestOp)
+  // (br_cc seteq, (and SWTestOp, 8), 0) -> (BCC PRED_GE, SWTestOp)
+  // (br_cc setne, (truncateToi1 SWTestOp), 0) -> (BCC PRED_UN, SWTestOp)
+  // (br_cc setne, (and SWTestOp, 2), 0) -> (BCC PRED_EQ, SWTestOp)
+  // (br_cc setne, (and SWTestOp, 4), 0) -> (BCC PRED_GT, SWTestOp)
+  // (br_cc setne, (and SWTestOp, 8), 0) -> (BCC PRED_LT, SWTestOp)
+  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
+  if (CC != ISD::SETEQ && CC != ISD::SETNE)
+    return false;
+
+  SDValue CmpRHS = N->getOperand(3);
+  if (!isa<ConstantSDNode>(CmpRHS) ||
+      cast<ConstantSDNode>(CmpRHS)->getSExtValue() != 0)
+    return false;
+
+  SDValue CmpLHS = N->getOperand(2);
+  if (CmpLHS.getNumOperands() < 1 || !isSWTestOp(CmpLHS.getOperand(0)))
+    return false;
+
+  unsigned PCC = 0;
+  bool IsCCNE = CC == ISD::SETNE;
+  if (CmpLHS.getOpcode() == ISD::AND &&
+      isa<ConstantSDNode>(CmpLHS.getOperand(1)))
+    switch (CmpLHS.getConstantOperandVal(1)) {
+    case 1:
+      PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;
+      break;
+    case 2:
+      PCC = IsCCNE ? PPC::PRED_EQ : PPC::PRED_NE;
+      break;
+    case 4:
+      PCC = IsCCNE ? PPC::PRED_GT : PPC::PRED_LE;
+      break;
+    case 8:
+      PCC = IsCCNE ? PPC::PRED_LT : PPC::PRED_GE;
+      break;
+    default:
+      return false;
+    }
+  else if (CmpLHS.getOpcode() == ISD::TRUNCATE &&
+           CmpLHS.getValueType() == MVT::i1)
+    PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;
+
+  if (PCC) {
+    SDLoc dl(N);
+    SDValue Ops[] = {getI32Imm(PCC, dl), CmpLHS.getOperand(0), N->getOperand(4),
+                     N->getOperand(0)};
+    CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
+    return true;
+  }
+  return false;
+}
+
 bool PPCDAGToDAGISel::tryAsSingleRLWINM(SDNode *N) {
   assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
   unsigned Imm;
@@ -5247,6 +5323,8 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
     return;
   }
   case ISD::BR_CC: {
+    if (tryFoldSWTestBRCC(N))
+      return;
     ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
     unsigned PCC =
         getPredicateForSetCC(CC, N->getOperand(2).getValueType(), Subtarget);

diff  --git a/llvm/test/CodeGen/PowerPC/fold_swtest_br.ll b/llvm/test/CodeGen/PowerPC/fold_swtest_br.ll
new file mode 100644
index 000000000000..0d5581f8b26f
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/fold_swtest_br.ll
@@ -0,0 +1,204 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \
+; RUN:   -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s | FileCheck %s
+
+ at val = external local_unnamed_addr global i32, align 4
+declare i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double>)
+
+define dso_local signext i32 @xvtsqrtdp_and_1_eq(<2 x double> %input) {
+; CHECK-LABEL: xvtsqrtdp_and_1_eq:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvtsqrtdp cr0, v2
+; CHECK-NEXT:    bnu cr0, .LBB0_2
+; CHECK-NEXT:  # %bb.1: # %if.then
+; CHECK-NEXT:    addis r3, r2, .LC0 at toc@ha
+; CHECK-NEXT:    li r4, 100
+; CHECK-NEXT:    ld r3, .LC0 at toc@l(r3)
+; CHECK-NEXT:    stw r4, 0(r3)
+; CHECK-NEXT:  .LBB0_2: # %if.end
+; CHECK-NEXT:    li r3, 1
+; CHECK-NEXT:    blr
+entry:
+  %0 = tail call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %input)
+  %1 = and i32 %0, 1
+  %cmp.not = icmp eq i32 %1, 0
+  br i1 %cmp.not, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  store i32 100, i32* @val, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret i32 1
+}
+
+define dso_local signext i32 @xvtsqrtdp_and_2_eq(<2 x double> %input) {
+; CHECK-LABEL: xvtsqrtdp_and_2_eq:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvtsqrtdp cr0, v2
+; CHECK-NEXT:    bne cr0, .LBB1_2
+; CHECK-NEXT:  # %bb.1: # %if.then
+; CHECK-NEXT:    addis r3, r2, .LC0 at toc@ha
+; CHECK-NEXT:    li r4, 100
+; CHECK-NEXT:    ld r3, .LC0 at toc@l(r3)
+; CHECK-NEXT:    stw r4, 0(r3)
+; CHECK-NEXT:  .LBB1_2: # %if.end
+; CHECK-NEXT:    li r3, 1
+; CHECK-NEXT:    blr
+entry:
+  %0 = tail call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %input)
+  %1 = and i32 %0, 2
+  %cmp.not = icmp eq i32 %1, 0
+  br i1 %cmp.not, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  store i32 100, i32* @val, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret i32 1
+}
+
+define dso_local signext i32 @xvtsqrtdp_and_4_eq(<2 x double> %input) {
+; CHECK-LABEL: xvtsqrtdp_and_4_eq:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvtsqrtdp cr0, v2
+; CHECK-NEXT:    ble cr0, .LBB2_2
+; CHECK-NEXT:  # %bb.1: # %if.then
+; CHECK-NEXT:    addis r3, r2, .LC0 at toc@ha
+; CHECK-NEXT:    li r4, 100
+; CHECK-NEXT:    ld r3, .LC0 at toc@l(r3)
+; CHECK-NEXT:    stw r4, 0(r3)
+; CHECK-NEXT:  .LBB2_2: # %if.end
+; CHECK-NEXT:    li r3, 1
+; CHECK-NEXT:    blr
+entry:
+  %0 = tail call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %input)
+  %1 = and i32 %0, 4
+  %cmp.not = icmp eq i32 %1, 0
+  br i1 %cmp.not, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  store i32 100, i32* @val, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret i32 1
+}
+
+define dso_local signext i32 @xvtsqrtdp_and_8_eq(<2 x double> %input) {
+; CHECK-LABEL: xvtsqrtdp_and_8_eq:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvtsqrtdp cr0, v2
+; CHECK-NEXT:    bge cr0, .LBB3_2
+; CHECK-NEXT:  # %bb.1: # %if.then
+; CHECK-NEXT:    addis r3, r2, .LC0 at toc@ha
+; CHECK-NEXT:    li r4, 100
+; CHECK-NEXT:    ld r3, .LC0 at toc@l(r3)
+; CHECK-NEXT:    stw r4, 0(r3)
+; CHECK-NEXT:  .LBB3_2: # %if.end
+; CHECK-NEXT:    li r3, 1
+; CHECK-NEXT:    blr
+entry:
+  %0 = tail call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %input)
+  %1 = and i32 %0, 8
+  %cmp.not = icmp eq i32 %1, 0
+  br i1 %cmp.not, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  store i32 100, i32* @val, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret i32 1
+}
+
+define dso_local signext i32 @xvtsqrtdp_and_1_ne(<2 x double> %input) {
+; CHECK-LABEL: xvtsqrtdp_and_1_ne:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvtsqrtdp cr0, v2
+; CHECK-NEXT:    bun cr0, .LBB4_2
+; CHECK-NEXT:  # %bb.1: # %if.then
+; CHECK-NEXT:    addis r3, r2, .LC0 at toc@ha
+; CHECK-NEXT:    li r4, 100
+; CHECK-NEXT:    ld r3, .LC0 at toc@l(r3)
+; CHECK-NEXT:    stw r4, 0(r3)
+; CHECK-NEXT:  .LBB4_2: # %if.end
+; CHECK-NEXT:    li r3, 1
+; CHECK-NEXT:    blr
+entry:
+  %0 = tail call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %input)
+  %1 = and i32 %0, 1
+  %cmp.not = icmp ne i32 %1, 0
+  br i1 %cmp.not, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  store i32 100, i32* @val, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret i32 1
+}
+
+define dso_local signext i32 @xvtsqrtdp_and_2_ne(<2 x double> %input) {
+; CHECK-LABEL: xvtsqrtdp_and_2_ne:
+; CHECK:       # %bb.0: # %if.end
+; CHECK-NEXT:    li r3, 1
+; CHECK-NEXT:    blr
+if.end:                                           ; preds = %if.then, %entry
+  ret i32 1
+}
+
+define dso_local signext i32 @xvtsqrtdp_and_4_ne(<2 x double> %input) {
+; CHECK-LABEL: xvtsqrtdp_and_4_ne:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvtsqrtdp cr0, v2
+; CHECK-NEXT:    bgt cr0, .LBB6_2
+; CHECK-NEXT:  # %bb.1: # %if.then
+; CHECK-NEXT:    addis r3, r2, .LC0 at toc@ha
+; CHECK-NEXT:    li r4, 100
+; CHECK-NEXT:    ld r3, .LC0 at toc@l(r3)
+; CHECK-NEXT:    stw r4, 0(r3)
+; CHECK-NEXT:  .LBB6_2: # %if.end
+; CHECK-NEXT:    li r3, 1
+; CHECK-NEXT:    blr
+entry:
+  %0 = tail call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %input)
+  %1 = and i32 %0, 4
+  %cmp.not = icmp ne i32 %1, 0
+  br i1 %cmp.not, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  store i32 100, i32* @val, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret i32 1
+}
+
+define dso_local signext i32 @xvtsqrtdp_and_8_ne(<2 x double> %input) {
+; CHECK-LABEL: xvtsqrtdp_and_8_ne:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvtsqrtdp cr0, v2
+; CHECK-NEXT:    blt cr0, .LBB7_2
+; CHECK-NEXT:  # %bb.1: # %if.then
+; CHECK-NEXT:    addis r3, r2, .LC0 at toc@ha
+; CHECK-NEXT:    li r4, 100
+; CHECK-NEXT:    ld r3, .LC0 at toc@l(r3)
+; CHECK-NEXT:    stw r4, 0(r3)
+; CHECK-NEXT:  .LBB7_2: # %if.end
+; CHECK-NEXT:    li r3, 1
+; CHECK-NEXT:    blr
+entry:
+  %0 = tail call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %input)
+  %1 = and i32 %0, 8
+  %cmp.not = icmp ne i32 %1, 0
+  br i1 %cmp.not, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  store i32 100, i32* @val, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret i32 1
+}


        


More information about the llvm-branch-commits mailing list