[llvm] r346484 - [SelectionDAG] swap select_cc operands to enable folding

Fri Nov 9 03:09:40 PST 2018

Author: alelab01
Date: Fri Nov  9 03:09:40 2018
New Revision: 346484

URL: http://llvm.org/viewvc/llvm-project?rev=346484&view=rev
Log:
[SelectionDAG] swap select_cc operands to enable folding

The DAGCombiner tries to SimplifySelectCC as follows:

  select_cc(x, y, 16, 0, cc) -> shl(zext(set_cc(x, y, cc)), 4)

It can't cope with the situation of reordered operands:

  select_cc(x, y, 0, 16, cc)

In that case we just need to swap the operands and invert the Condition Code:

  select_cc(x, y, 16, 0, ~cc)

Differential Revision: https://reviews.llvm.org/D53236

Added:
    llvm/trunk/test/CodeGen/AArch64/select_cc.ll
Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/trunk/test/CodeGen/Thumb/branchless-cmp.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=346484&r1=346483&r2=346484&view=diff
==============================================================================

--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Fri Nov  9 03:09:40 2018
@@ -18173,6 +18173,7 @@ SDValue DAGCombiner::SimplifySelectCC(co
   EVT VT = N2.getValueType();
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
   ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
+  ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
 
   // Determine if the condition we're dealing with is constant
   SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
@@ -18273,48 +18274,49 @@ SDValue DAGCombiner::SimplifySelectCC(co
   }
 
   // fold select C, 16, 0 -> shl C, 4
-  if (N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2() &&
+  bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
+  bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
+
+  if ((Fold || Swap) &&
       TLI.getBooleanContents(N0.getValueType()) ==
-          TargetLowering::ZeroOrOneBooleanContent) {
+          TargetLowering::ZeroOrOneBooleanContent &&
+      (!LegalOperations ||
+       TLI.isOperationLegal(ISD::SETCC, N0.getValueType()))) {
+
+    if (Swap) {
+      CC = ISD::getSetCCInverse(CC, N0.getValueType().isInteger());
+      std::swap(N2C, N3C);
+    }
 
     // If the caller doesn't want us to simplify this into a zext of a compare,
     // don't do it.
     if (NotExtCompare && N2C->isOne())
       return SDValue();
 
-    // Get a SetCC of the condition
-    // NOTE: Don't create a SETCC if it's not legal on this target.
-    if (!LegalOperations ||
-        TLI.isOperationLegal(ISD::SETCC, N0.getValueType())) {
-      SDValue Temp, SCC;
-      // cast from setcc result type to select result type
-      if (LegalTypes) {
-        SCC  = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()),
-                            N0, N1, CC);
-        if (N2.getValueType().bitsLT(SCC.getValueType()))
-          Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2),
-                                        N2.getValueType());
-        else
-          Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
-                             N2.getValueType(), SCC);
-      } else {
-        SCC  = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
-        Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
-                           N2.getValueType(), SCC);
-      }
+    SDValue Temp, SCC;
+    // zext (setcc n0, n1)
+    if (LegalTypes) {
+      SCC = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
+      if (N2.getValueType().bitsLT(SCC.getValueType()))
+        Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), N2.getValueType());
+      else
+        Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), N2.getValueType(), SCC);
+    } else {
+      SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
+      Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), N2.getValueType(), SCC);
+    }
 
-      AddToWorklist(SCC.getNode());
-      AddToWorklist(Temp.getNode());
+    AddToWorklist(SCC.getNode());
+    AddToWorklist(Temp.getNode());
 
-      if (N2C->isOne())
-        return Temp;
+    if (N2C->isOne())
+      return Temp;
 
-      // shl setcc result by log2 n2c
-      return DAG.getNode(
-          ISD::SHL, DL, N2.getValueType(), Temp,
-          DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp),
-                          getShiftAmountTy(Temp.getValueType())));
-    }
+    // shl setcc result by log2 n2c
+    return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
+                       DAG.getConstant(N2C->getAPIntValue().logBase2(),
+                                       SDLoc(Temp),
+                                       getShiftAmountTy(Temp.getValueType())));
   }
 
   // Check to see if this is an integer abs.

Added: llvm/trunk/test/CodeGen/AArch64/select_cc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/select_cc.ll?rev=346484&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/select_cc.ll (added)
+++ llvm/trunk/test/CodeGen/AArch64/select_cc.ll Fri Nov  9 03:09:40 2018
@@ -0,0 +1,54 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64 | FileCheck %s
+
+define i64 @select_ogt_float(float %a, float %b) {
+; CHECK-LABEL: select_ogt_float:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcmp s0, s1
+; CHECK-NEXT:    cset w8, gt
+; CHECK-NEXT:    lsl x0, x8, #2
+; CHECK-NEXT:    ret
+entry:
+  %cc = fcmp ogt float %a, %b
+  %sel = select i1 %cc, i64 4, i64 0
+  ret i64 %sel
+}
+
+define i64 @select_ule_float_inverse(float %a, float %b) {
+; CHECK-LABEL: select_ule_float_inverse:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcmp s0, s1
+; CHECK-NEXT:    cset w8, gt
+; CHECK-NEXT:    lsl x0, x8, #2
+; CHECK-NEXT:    ret
+entry:
+  %cc = fcmp ule float %a, %b
+  %sel = select i1 %cc, i64 0, i64 4
+  ret i64 %sel
+}
+
+define i64 @select_eq_i32(i32 %a, i32 %b) {
+; CHECK-LABEL: select_eq_i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    cmp w0, w1
+; CHECK-NEXT:    cset w8, eq
+; CHECK-NEXT:    lsl x0, x8, #2
+; CHECK-NEXT:    ret
+entry:
+  %cc = icmp eq i32 %a, %b
+  %sel = select i1 %cc, i64 4, i64 0
+  ret i64 %sel
+}
+
+define i64 @select_ne_i32_inverse(i32 %a, i32 %b) {
+; CHECK-LABEL: select_ne_i32_inverse:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    cmp w0, w1
+; CHECK-NEXT:    cset w8, eq
+; CHECK-NEXT:    lsl x0, x8, #2
+; CHECK-NEXT:    ret
+entry:
+  %cc = icmp ne i32 %a, %b
+  %sel = select i1 %cc, i64 0, i64 4
+  ret i64 %sel
+}

Modified: llvm/trunk/test/CodeGen/Thumb/branchless-cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb/branchless-cmp.ll?rev=346484&r1=346483&r2=346484&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb/branchless-cmp.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb/branchless-cmp.ll Fri Nov  9 03:09:40 2018
@@ -74,23 +74,17 @@ entry:
 ; CHECK-NEXT: lsls	r0, r1, #2
 }
 
-; FIXME: This one hasn't changed actually
-; but could look like test3b
 define i32 @test4a(i32 %a, i32 %b) {
 entry:
   %cmp = icmp ne i32 %a, %b
   %cond = select i1 %cmp, i32 0, i32 4
   ret i32 %cond
 ; CHECK-LABEL: test4a:
-; CHECK: bb.0:
-; CHECK-NEXT:  cmp     r0, r1
-; CHECK-NEXT:  bne     .LBB6_2
-; CHECK-NEXT: bb.1:
-; CHECK-NEXT:  movs    r0, #4
-; CHECK-NEXT:  bx      lr
-; CHECK-NEXT: .LBB6_2:
-; CHECK-NEXT:  movs    r0, #0
-; CHECK-NEXT:  bx      lr
+; CHECK-NOT: b{{(ne)|(eq)}}
+; CHECK:      subs	r0, r0, r1
+; CHECK-NEXT: rsbs	r1, r0, #0
+; CHECK-NEXT: adcs	r1, r0
+; CHECK-NEXT: lsls	r0, r1, #2
 }
 
 define i32 @test4b(i32 %a, i32 %b) {