[llvm] r287329 - [PPC][DAGCombine] Convert SETCC to subtract when the result is zero extended

Fri Nov 18 02:41:45 PST 2016

Author: amehsan
Date: Fri Nov 18 04:41:44 2016
New Revision: 287329

URL: http://llvm.org/viewvc/llvm-project?rev=287329&view=rev
Log:
[PPC][DAGCombine] Convert SETCC to subtract when the result is zero extended

When we see a SETCC whose only users are zero extend operations, we can replace
it with a subtraction. This results in doing all calculations in GPRs and
avoids CR use.

Currently we do this only for ULT, ULE, UGT and UGE condition codes. There are
ways that this can be extended. For example for signed condition codes. In that
case we will be introducing additional sign extend instructions, so more careful
profitability analysis may be required.

Another direction to extend this is for equal, not equal conditions. Also when
users of SETCC are any_ext or sign_ext, we might be able to do something 
similar.

Added:
    llvm/trunk/test/CodeGen/PowerPC/setcc-to-sub.ll
Modified:
    llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h

Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp?rev=287329&r1=287328&r2=287329&view=diff
==============================================================================

--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp Fri Nov 18 04:41:44 2016
@@ -9976,6 +9976,87 @@ static bool findConsecutiveLoad(LoadSDNo
   return false;
 }
 
+
+/// This function is called when we have proved that a SETCC node can be replaced
+/// by subtraction (and other supporting instructions) so that the result of
+/// comparison is kept in a GPR instead of CR. This function is purely for
+/// codegen purposes and has some flags to guide the codegen process.
+static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
+                                     bool Swap, SDLoc &DL, SelectionDAG &DAG) {
+
+  assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
+
+  // Zero extend the operands to the largest legal integer. Originally, they
+  // must be of a strictly smaller size.
+  auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
+                         DAG.getConstant(Size, DL, MVT::i32));
+  auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
+                         DAG.getConstant(Size, DL, MVT::i32));
+
+  // Swap if needed. Depends on the condition code.
+  if (Swap)
+    std::swap(Op0, Op1);
+
+  // Subtract extended integers.
+  auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
+
+  // Move the sign bit to the least significant position and zero out the rest.
+  // Now the least significant bit carries the result of original comparison.
+  auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
+                             DAG.getConstant(Size - 1, DL, MVT::i32));
+  auto Final = Shifted;
+
+  // Complement the result if needed. Based on the condition code.
+  if (Complement)
+    Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
+                        DAG.getConstant(1, DL, MVT::i64));
+
+  return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
+}
+
+SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
+                                                  DAGCombinerInfo &DCI) const {
+
+  assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
+
+  SelectionDAG &DAG = DCI.DAG;
+  SDLoc DL(N);
+
+  // Size of integers being compared has a critical role in the following
+  // analysis, so we prefer to do this when all types are legal.
+  if (!DCI.isAfterLegalizeVectorOps())
+    return SDValue();
+
+  // If all users of SETCC extend its value to a legal integer type
+  // then we replace SETCC with a subtraction
+  for (SDNode::use_iterator UI = N->use_begin(),
+       UE = N->use_end(); UI != UE; ++UI) {
+    if (UI->getOpcode() != ISD::ZERO_EXTEND)
+      return SDValue();
+  }
+
+  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+  auto OpSize = N->getOperand(0).getValueSizeInBits();
+
+  unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits();
+
+  if (OpSize < Size) {
+    switch (CC) {
+    default: break;
+    case ISD::SETULT:
+      return generateEquivalentSub(N, Size, false, false, DL, DAG);
+    case ISD::SETULE:
+      return generateEquivalentSub(N, Size, true, true, DL, DAG);
+    case ISD::SETUGT:
+      return generateEquivalentSub(N, Size, false, true, DL, DAG);
+    case ISD::SETUGE:
+      return generateEquivalentSub(N, Size, true, false, DL, DAG);
+    }
+  }
+
+  return SDValue();
+}
+
 SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
                                                   DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
@@ -10017,7 +10098,8 @@ SDValue PPCTargetLowering::DAGCombineTru
                                  APInt::getHighBitsSet(OpBits, OpBits-1)) ||
           !DAG.MaskedValueIsZero(N->getOperand(1),
                                  APInt::getHighBitsSet(OpBits, OpBits-1)))
-        return SDValue();
+        return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
+                                             : SDValue());
     } else {
       // This is neither a signed nor an unsigned comparison, just make sure
       // that the high bits are equal.

Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h?rev=287329&r1=287328&r2=287329&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h Fri Nov 18 04:41:44 2016
@@ -977,6 +977,11 @@ namespace llvm {
     SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const;
     SDValue combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const;
 
+    /// ConvertSETCCToSubtract - looks at SETCC that compares ints. It replaces
+    /// SETCC with integer subtraction when (1) there is a legal way of doing it
+    /// (2) keeping the result of comparison in GPR has performance benefit.
+    SDValue ConvertSETCCToSubtract(SDNode *N, DAGCombinerInfo &DCI) const;
+
     SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
                             int &RefinementSteps, bool &UseOneConstNR,
                             bool Reciprocal) const override;

Added: llvm/trunk/test/CodeGen/PowerPC/setcc-to-sub.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/setcc-to-sub.ll?rev=287329&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/setcc-to-sub.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/setcc-to-sub.ll Fri Nov 18 04:41:44 2016
@@ -0,0 +1,96 @@
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -mcpu=pwr8 < %s | FileCheck %s
+
+%class.PB2 = type { [1 x i32], %class.PB1* }
+%class.PB1 = type { [1 x i32], i64, i64, i32 }
+
+; Function Attrs: norecurse nounwind readonly
+define zeroext i1 @test1(%class.PB2* %s_a, %class.PB2* %s_b) local_unnamed_addr #0 {
+entry:
+  %arrayidx.i6 = bitcast %class.PB2* %s_a to i32*
+  %0 = load i32, i32* %arrayidx.i6, align 8, !tbaa !1
+  %and.i = and i32 %0, 8
+  %arrayidx.i37 = bitcast %class.PB2* %s_b to i32*
+  %1 = load i32, i32* %arrayidx.i37, align 8, !tbaa !1
+  %and.i4 = and i32 %1, 8
+  %cmp.i5 = icmp ult i32 %and.i, %and.i4
+  ret i1 %cmp.i5
+
+; CHECK-LABEL: @test1
+; CHECK: rlwinm [[REG1:[0-9]*]]
+; CHECK-NEXT: rlwinm [[REG2:[0-9]*]]
+; CHECK-NEXT: sub [[REG3:[0-9]*]], [[REG1]], [[REG2]]
+; CHECK-NEXT: rldicl 3, [[REG3]]
+; CHECK: blr
+
+}
+
+; Function Attrs: norecurse nounwind readonly
+define zeroext i1 @test2(%class.PB2* %s_a, %class.PB2* %s_b) local_unnamed_addr #0 {
+entry:
+  %arrayidx.i6 = bitcast %class.PB2* %s_a to i32*
+  %0 = load i32, i32* %arrayidx.i6, align 8, !tbaa !1
+  %and.i = and i32 %0, 8
+  %arrayidx.i37 = bitcast %class.PB2* %s_b to i32*
+  %1 = load i32, i32* %arrayidx.i37, align 8, !tbaa !1
+  %and.i4 = and i32 %1, 8
+  %cmp.i5 = icmp ule i32 %and.i, %and.i4
+  ret i1 %cmp.i5
+
+; CHECK-LABEL: @test2
+; CHECK: rlwinm [[REG1:[0-9]*]]
+; CHECK-NEXT: rlwinm [[REG2:[0-9]*]]
+; CHECK-NEXT: sub [[REG3:[0-9]*]], [[REG2]], [[REG1]]
+; CHECK-NEXT: rldicl [[REG4:[0-9]*]], [[REG3]]
+; CHECK-NEXT: xori 3, [[REG4]], 1
+; CHECK: blr
+
+}
+
+; Function Attrs: norecurse nounwind readonly
+define zeroext i1 @test3(%class.PB2* %s_a, %class.PB2* %s_b) local_unnamed_addr #0 {
+entry:
+  %arrayidx.i6 = bitcast %class.PB2* %s_a to i32*
+  %0 = load i32, i32* %arrayidx.i6, align 8, !tbaa !1
+  %and.i = and i32 %0, 8
+  %arrayidx.i37 = bitcast %class.PB2* %s_b to i32*
+  %1 = load i32, i32* %arrayidx.i37, align 8, !tbaa !1
+  %and.i4 = and i32 %1, 8
+  %cmp.i5 = icmp ugt i32 %and.i, %and.i4
+  ret i1 %cmp.i5
+
+; CHECK-LABEL: @test3
+; CHECK: rlwinm [[REG1:[0-9]*]]
+; CHECK-NEXT: rlwinm [[REG2:[0-9]*]]
+; CHECK-NEXT: sub [[REG3:[0-9]*]], [[REG2]], [[REG1]]
+; CHECK-NEXT: rldicl 3, [[REG3]]
+; CHECK: blr
+
+}
+
+; Function Attrs: norecurse nounwind readonly
+define zeroext i1 @test4(%class.PB2* %s_a, %class.PB2* %s_b) local_unnamed_addr #0 {
+entry:
+  %arrayidx.i6 = bitcast %class.PB2* %s_a to i32*
+  %0 = load i32, i32* %arrayidx.i6, align 8, !tbaa !1
+  %and.i = and i32 %0, 8
+  %arrayidx.i37 = bitcast %class.PB2* %s_b to i32*
+  %1 = load i32, i32* %arrayidx.i37, align 8, !tbaa !1
+  %and.i4 = and i32 %1, 8
+  %cmp.i5 = icmp uge i32 %and.i, %and.i4
+  ret i1 %cmp.i5
+
+; CHECK-LABEL: @test4
+; CHECK: rlwinm [[REG1:[0-9]*]]
+; CHECK-NEXT: rlwinm [[REG2:[0-9]*]]
+; CHECK-NEXT: sub [[REG3:[0-9]*]], [[REG1]], [[REG2]]
+; CHECK-NEXT: rldicl [[REG4:[0-9]*]], [[REG3]]
+; CHECK-NEXT: xori 3, [[REG4]], 1
+; CHECK: blr
+
+}
+
+!1 = !{!2, !2, i64 0}
+!2 = !{!"int", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C++ TBAA"}