[llvm] 54a9e99 - Add Transform for `(and/or (eq/ne A,Pow2),(eq/ne A,-Pow2))`->`(eq/ne (and (and A,Pow2),~(Pow2*2)), 0)`

Tue Feb 14 16:59:22 PST 2023

Author: Noah Goldstein
Date: 2023-02-14T18:59:04-06:00
New Revision: 54a9e992c84cc4b1668329494f4355e7d0b8e470

URL: https://github.com/llvm/llvm-project/commit/54a9e992c84cc4b1668329494f4355e7d0b8e470
DIFF: https://github.com/llvm/llvm-project/commit/54a9e992c84cc4b1668329494f4355e7d0b8e470.diff

LOG: Add Transform for `(and/or (eq/ne A,Pow2),(eq/ne A,-Pow2))`->`(eq/ne (and (and A,Pow2),~(Pow2*2)), 0)`

In many instances this can be preferable if the `icmp` -> `i1` cannot be
done in one instruction (such as X86 for scalars).

At the moment guarded behind `TLI.isDesirableToCombineLogicOpOfSETCC`.

alive2 links:
https://alive2.llvm.org/ce/z/nLm5sN
https://alive2.llvm.org/ce/z/moEcyE

Reviewed By: RKSimon

Differential Revision: https://reviews.llvm.org/D142344

Added: 
    

Modified: 
    llvm/include/llvm/CodeGen/TargetLowering.h
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/lib/Target/X86/X86ISelLowering.h
    llvm/test/CodeGen/X86/icmp-pow2-logic-npow2.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 5b1dea3ed8a8..f6fb97df778a 100644

--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -4002,6 +4002,23 @@ class TargetLowering : public TargetLoweringBase {
     return true;
   }
 
+  // Return true if its desirable to try and optimize LogicOp(SETCC0, SETCC1).
+  // An example (what is implemented as of writing this) is:
+  //    With C as a power of 2 and C != 0 and C != INT_MIN:
+  //       (icmp eq A, C) | (icmp eq A, -C)
+  //            -> (icmp eq and(add(A, C), ~(C + C)), 0)
+  //     (icmp ne A, C) & (icmp ne A, -C)w
+  //            -> (icmp ne and(add(A, C), ~(C + C)), 0)
+  //
+  // @param LogicOp the logic op
+  // @param SETCC0 the first of the SETCC nodes
+  // @param SETCC0 the second of the SETCC nodes
+  virtual bool isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp,
+                                                  const SDNode *SETCC0,
+                                                  const SDNode *SETCC1) const {
+    return false;
+  }
+
   /// Return true if it is profitable to combine an XOR of a logical shift
   /// to create a logical shift of NOT. This transformation may not be desirable
   /// if it disrupts a particularly auspicious target-specific tree (e.g.

diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index cd9eceb21536..742686ea2f40 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -5866,6 +5866,65 @@ SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
   return SDValue();
 }
 
+static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
+  assert(
+      (LogicOp->getOpcode() == ISD::AND || LogicOp->getOpcode() == ISD::OR) &&
+      "Invalid Op to combine SETCC with");
+
+  // TODO: Search past casts/truncates.
+  SDValue LHS = LogicOp->getOperand(0);
+  SDValue RHS = LogicOp->getOperand(1);
+  if (LHS->getOpcode() != ISD::SETCC || RHS->getOpcode() != ISD::SETCC)
+    return SDValue();
+
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  if (!TLI.isDesirableToCombineLogicOpOfSETCC(LogicOp, LHS.getNode(),
+                                              RHS.getNode()))
+    return SDValue();
+
+  ISD::CondCode CCL = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
+  ISD::CondCode CCR = cast<CondCodeSDNode>(RHS.getOperand(2))->get();
+
+  SDValue LHS0 = LHS->getOperand(0);
+  SDValue RHS0 = RHS->getOperand(0);
+  SDValue LHS1 = LHS->getOperand(1);
+  SDValue RHS1 = RHS->getOperand(1);
+
+  auto *LHS1C = dyn_cast<ConstantSDNode>(LHS1);
+  auto *RHS1C = dyn_cast<ConstantSDNode>(RHS1);
+  EVT VT = LogicOp->getValueType(0);
+  EVT OpVT = LHS0.getValueType();
+  SDLoc DL(LogicOp);
+
+  // With C as a power of 2 and C != 0 and C != INT_MIN:
+  //   (icmp eq A, C) | (icmp eq A, -C)
+  //        -> (icmp eq and(add(A, C), ~(C + C)), 0)
+  //   (icmp ne A, C) & (icmp ne A, -C)w
+  //        -> (icmp ne and(add(A, C), ~(C + C)), 0)
+  if (CCL == CCR &&
+      CCL == (LogicOp->getOpcode() == ISD::AND ? ISD::SETNE : ISD::SETEQ) &&
+      LHS0 == RHS0 && LHS1C && RHS1C && OpVT.isInteger() && LHS.hasOneUse() &&
+      RHS.hasOneUse() && LHS1C->getAPIntValue() == (-RHS1C->getAPIntValue())) {
+    const ConstantSDNode *Pow2 = nullptr;
+    if (LHS1C->getAPIntValue().isPowerOf2())
+      Pow2 = LHS1C;
+    else if (RHS1C->getAPIntValue().isPowerOf2())
+      Pow2 = RHS1C;
+    // isPowerOf2 is only for non-zero powers of 2.
+    if (Pow2 != nullptr && !Pow2->getAPIntValue().isMinSignedValue()) {
+      const APInt &C = Pow2->getAPIntValue();
+      SDValue AddOp =
+          DAG.getNode(ISD::ADD, DL, OpVT, LHS0, DAG.getConstant(C, DL, OpVT));
+      SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, AddOp,
+                                  DAG.getConstant(~(C + C), DL, OpVT));
+      return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
+                         DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
+    }
+  }
+
+  return SDValue();
+}
+
 /// This contains all DAGCombine rules which reduce two values combined by
 /// an And operation to a single value. This makes them reusable in the context
 /// of visitSELECT(). Rules involving constants are not included as
@@ -6567,6 +6626,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(BitWidth)))
     return DAG.getConstant(0, SDLoc(N), VT);
 
+  if (SDValue R = foldAndOrOfSETCC(N, DAG))
+    return R;
+
   if (SDValue NewSel = foldBinOpIntoSelect(N))
     return NewSel;
 
@@ -7457,6 +7519,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
     return N1;
 
+  if (SDValue R = foldAndOrOfSETCC(N, DAG))
+    return R;
+
   if (SDValue Combined = visitORLike(N0, N1, N))
     return Combined;
 

diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b3f892fe80e5..792e7c6d47b6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -57124,6 +57124,12 @@ SDValue X86TargetLowering::expandIndirectJTBranch(const SDLoc& dl,
   return TargetLowering::expandIndirectJTBranch(dl, Value, Addr, DAG);
 }
 
+bool X86TargetLowering::isDesirableToCombineLogicOpOfSETCC(
+    const SDNode *LogicOp, const SDNode *SETCC0, const SDNode *SETCC1) const {
+  EVT VT = LogicOp->getValueType(0);
+  return VT.isScalarInteger();
+}
+
 bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
   EVT VT = Op.getValueType();
   bool Is8BitMulByConstant = VT == MVT::i8 && Op.getOpcode() == ISD::MUL &&

diff  --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index c9fd51c5ff54..cb46f48d14be 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1058,6 +1058,12 @@ namespace llvm {
     /// and some i16 instructions are slow.
     bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
 
+    /// Return true if this is operating on scalar integers.
+    bool
+    isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp,
+                                       const SDNode *SETCC0,
+                                       const SDNode *SETCC1) const override;
+
     /// Return the newly negated expression if the cost is not expensive and
     /// set the cost in \p Cost to indicate that if it is cheaper or neutral to
     /// do the negation.

diff  --git a/llvm/test/CodeGen/X86/icmp-pow2-logic-npow2.ll b/llvm/test/CodeGen/X86/icmp-pow2-logic-npow2.ll
index 38ea9aa97d5c..5a0c8e94a577 100644
--- a/llvm/test/CodeGen/X86/icmp-pow2-logic-npow2.ll
+++ b/llvm/test/CodeGen/X86/icmp-pow2-logic-npow2.ll
@@ -12,20 +12,16 @@ define i1 @eq_pow_or(i32 %0) nounwind {
 ; X86-LABEL: eq_pow_or:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    cmpl $32, %eax
-; X86-NEXT:    sete %cl
-; X86-NEXT:    cmpl $-32, %eax
+; X86-NEXT:    addl $32, %eax
+; X86-NEXT:    testl $-65, %eax
 ; X86-NEXT:    sete %al
-; X86-NEXT:    orb %cl, %al
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: eq_pow_or:
 ; X64:       # %bb.0:
-; X64-NEXT:    cmpl $32, %edi
-; X64-NEXT:    sete %cl
-; X64-NEXT:    cmpl $-32, %edi
+; X64-NEXT:    addl $32, %edi
+; X64-NEXT:    testl $-65, %edi
 ; X64-NEXT:    sete %al
-; X64-NEXT:    orb %cl, %al
 ; X64-NEXT:    retq
   %2 = icmp eq i32 %0, 32
   %3 = icmp eq i32 %0, -32
@@ -37,20 +33,16 @@ define i1 @ne_pow_and(i8 %0) nounwind {
 ; X86-LABEL: ne_pow_and:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    cmpb $16, %al
-; X86-NEXT:    setne %cl
-; X86-NEXT:    cmpb $-16, %al
+; X86-NEXT:    addb $16, %al
+; X86-NEXT:    testb $-33, %al
 ; X86-NEXT:    setne %al
-; X86-NEXT:    andb %cl, %al
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: ne_pow_and:
 ; X64:       # %bb.0:
-; X64-NEXT:    cmpb $16, %dil
-; X64-NEXT:    setne %cl
-; X64-NEXT:    cmpb $-16, %dil
+; X64-NEXT:    addb $16, %dil
+; X64-NEXT:    testb $-33, %dil
 ; X64-NEXT:    setne %al
-; X64-NEXT:    andb %cl, %al
 ; X64-NEXT:    retq
   %2 = icmp ne i8 %0, 16
   %3 = icmp ne i8 %0, -16