[llvm] 54a9e99 - Add Transform for `(and/or (eq/ne A,Pow2),(eq/ne A,-Pow2))`->`(eq/ne (and (and A,Pow2),~(Pow2*2)), 0)`
Noah Goldstein via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 14 16:59:22 PST 2023
Author: Noah Goldstein
Date: 2023-02-14T18:59:04-06:00
New Revision: 54a9e992c84cc4b1668329494f4355e7d0b8e470
URL: https://github.com/llvm/llvm-project/commit/54a9e992c84cc4b1668329494f4355e7d0b8e470
DIFF: https://github.com/llvm/llvm-project/commit/54a9e992c84cc4b1668329494f4355e7d0b8e470.diff
LOG: Add Transform for `(and/or (eq/ne A,Pow2),(eq/ne A,-Pow2))`->`(eq/ne (and (and A,Pow2),~(Pow2*2)), 0)`
In many instances this can be preferable if the `icmp` -> `i1` cannot be
done in one instruction (such as X86 for scalars).
At the moment guarded behind `TLI.isDesirableToCombineLogicOpOfSETCC`.
alive2 links:
https://alive2.llvm.org/ce/z/nLm5sN
https://alive2.llvm.org/ce/z/moEcyE
Reviewed By: RKSimon
Differential Revision: https://reviews.llvm.org/D142344
Added:
Modified:
llvm/include/llvm/CodeGen/TargetLowering.h
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/lib/Target/X86/X86ISelLowering.h
llvm/test/CodeGen/X86/icmp-pow2-logic-npow2.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 5b1dea3ed8a8..f6fb97df778a 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -4002,6 +4002,23 @@ class TargetLowering : public TargetLoweringBase {
return true;
}
+ // Return true if its desirable to try and optimize LogicOp(SETCC0, SETCC1).
+ // An example (what is implemented as of writing this) is:
+ // With C as a power of 2 and C != 0 and C != INT_MIN:
+ // (icmp eq A, C) | (icmp eq A, -C)
+ // -> (icmp eq and(add(A, C), ~(C + C)), 0)
+ // (icmp ne A, C) & (icmp ne A, -C)w
+ // -> (icmp ne and(add(A, C), ~(C + C)), 0)
+ //
+ // @param LogicOp the logic op
+ // @param SETCC0 the first of the SETCC nodes
+ // @param SETCC0 the second of the SETCC nodes
+ virtual bool isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp,
+ const SDNode *SETCC0,
+ const SDNode *SETCC1) const {
+ return false;
+ }
+
/// Return true if it is profitable to combine an XOR of a logical shift
/// to create a logical shift of NOT. This transformation may not be desirable
/// if it disrupts a particularly auspicious target-specific tree (e.g.
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index cd9eceb21536..742686ea2f40 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -5866,6 +5866,65 @@ SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
return SDValue();
}
+static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
+ assert(
+ (LogicOp->getOpcode() == ISD::AND || LogicOp->getOpcode() == ISD::OR) &&
+ "Invalid Op to combine SETCC with");
+
+ // TODO: Search past casts/truncates.
+ SDValue LHS = LogicOp->getOperand(0);
+ SDValue RHS = LogicOp->getOperand(1);
+ if (LHS->getOpcode() != ISD::SETCC || RHS->getOpcode() != ISD::SETCC)
+ return SDValue();
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.isDesirableToCombineLogicOpOfSETCC(LogicOp, LHS.getNode(),
+ RHS.getNode()))
+ return SDValue();
+
+ ISD::CondCode CCL = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
+ ISD::CondCode CCR = cast<CondCodeSDNode>(RHS.getOperand(2))->get();
+
+ SDValue LHS0 = LHS->getOperand(0);
+ SDValue RHS0 = RHS->getOperand(0);
+ SDValue LHS1 = LHS->getOperand(1);
+ SDValue RHS1 = RHS->getOperand(1);
+
+ auto *LHS1C = dyn_cast<ConstantSDNode>(LHS1);
+ auto *RHS1C = dyn_cast<ConstantSDNode>(RHS1);
+ EVT VT = LogicOp->getValueType(0);
+ EVT OpVT = LHS0.getValueType();
+ SDLoc DL(LogicOp);
+
+ // With C as a power of 2 and C != 0 and C != INT_MIN:
+ // (icmp eq A, C) | (icmp eq A, -C)
+ // -> (icmp eq and(add(A, C), ~(C + C)), 0)
+ // (icmp ne A, C) & (icmp ne A, -C)w
+ // -> (icmp ne and(add(A, C), ~(C + C)), 0)
+ if (CCL == CCR &&
+ CCL == (LogicOp->getOpcode() == ISD::AND ? ISD::SETNE : ISD::SETEQ) &&
+ LHS0 == RHS0 && LHS1C && RHS1C && OpVT.isInteger() && LHS.hasOneUse() &&
+ RHS.hasOneUse() && LHS1C->getAPIntValue() == (-RHS1C->getAPIntValue())) {
+ const ConstantSDNode *Pow2 = nullptr;
+ if (LHS1C->getAPIntValue().isPowerOf2())
+ Pow2 = LHS1C;
+ else if (RHS1C->getAPIntValue().isPowerOf2())
+ Pow2 = RHS1C;
+ // isPowerOf2 is only for non-zero powers of 2.
+ if (Pow2 != nullptr && !Pow2->getAPIntValue().isMinSignedValue()) {
+ const APInt &C = Pow2->getAPIntValue();
+ SDValue AddOp =
+ DAG.getNode(ISD::ADD, DL, OpVT, LHS0, DAG.getConstant(C, DL, OpVT));
+ SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, AddOp,
+ DAG.getConstant(~(C + C), DL, OpVT));
+ return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
+ DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
+ }
+ }
+
+ return SDValue();
+}
+
/// This contains all DAGCombine rules which reduce two values combined by
/// an And operation to a single value. This makes them reusable in the context
/// of visitSELECT(). Rules involving constants are not included as
@@ -6567,6 +6626,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(BitWidth)))
return DAG.getConstant(0, SDLoc(N), VT);
+ if (SDValue R = foldAndOrOfSETCC(N, DAG))
+ return R;
+
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
@@ -7457,6 +7519,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
return N1;
+ if (SDValue R = foldAndOrOfSETCC(N, DAG))
+ return R;
+
if (SDValue Combined = visitORLike(N0, N1, N))
return Combined;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b3f892fe80e5..792e7c6d47b6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -57124,6 +57124,12 @@ SDValue X86TargetLowering::expandIndirectJTBranch(const SDLoc& dl,
return TargetLowering::expandIndirectJTBranch(dl, Value, Addr, DAG);
}
+bool X86TargetLowering::isDesirableToCombineLogicOpOfSETCC(
+ const SDNode *LogicOp, const SDNode *SETCC0, const SDNode *SETCC1) const {
+ EVT VT = LogicOp->getValueType(0);
+ return VT.isScalarInteger();
+}
+
bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
EVT VT = Op.getValueType();
bool Is8BitMulByConstant = VT == MVT::i8 && Op.getOpcode() == ISD::MUL &&
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index c9fd51c5ff54..cb46f48d14be 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1058,6 +1058,12 @@ namespace llvm {
/// and some i16 instructions are slow.
bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
+ /// Return true if this is operating on scalar integers.
+ bool
+ isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp,
+ const SDNode *SETCC0,
+ const SDNode *SETCC1) const override;
+
/// Return the newly negated expression if the cost is not expensive and
/// set the cost in \p Cost to indicate that if it is cheaper or neutral to
/// do the negation.
diff --git a/llvm/test/CodeGen/X86/icmp-pow2-logic-npow2.ll b/llvm/test/CodeGen/X86/icmp-pow2-logic-npow2.ll
index 38ea9aa97d5c..5a0c8e94a577 100644
--- a/llvm/test/CodeGen/X86/icmp-pow2-logic-npow2.ll
+++ b/llvm/test/CodeGen/X86/icmp-pow2-logic-npow2.ll
@@ -12,20 +12,16 @@ define i1 @eq_pow_or(i32 %0) nounwind {
; X86-LABEL: eq_pow_or:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl $32, %eax
-; X86-NEXT: sete %cl
-; X86-NEXT: cmpl $-32, %eax
+; X86-NEXT: addl $32, %eax
+; X86-NEXT: testl $-65, %eax
; X86-NEXT: sete %al
-; X86-NEXT: orb %cl, %al
; X86-NEXT: retl
;
; X64-LABEL: eq_pow_or:
; X64: # %bb.0:
-; X64-NEXT: cmpl $32, %edi
-; X64-NEXT: sete %cl
-; X64-NEXT: cmpl $-32, %edi
+; X64-NEXT: addl $32, %edi
+; X64-NEXT: testl $-65, %edi
; X64-NEXT: sete %al
-; X64-NEXT: orb %cl, %al
; X64-NEXT: retq
%2 = icmp eq i32 %0, 32
%3 = icmp eq i32 %0, -32
@@ -37,20 +33,16 @@ define i1 @ne_pow_and(i8 %0) nounwind {
; X86-LABEL: ne_pow_and:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpb $16, %al
-; X86-NEXT: setne %cl
-; X86-NEXT: cmpb $-16, %al
+; X86-NEXT: addb $16, %al
+; X86-NEXT: testb $-33, %al
; X86-NEXT: setne %al
-; X86-NEXT: andb %cl, %al
; X86-NEXT: retl
;
; X64-LABEL: ne_pow_and:
; X64: # %bb.0:
-; X64-NEXT: cmpb $16, %dil
-; X64-NEXT: setne %cl
-; X64-NEXT: cmpb $-16, %dil
+; X64-NEXT: addb $16, %dil
+; X64-NEXT: testb $-33, %dil
; X64-NEXT: setne %al
-; X64-NEXT: andb %cl, %al
; X64-NEXT: retq
%2 = icmp ne i8 %0, 16
%3 = icmp ne i8 %0, -16
More information about the llvm-commits
mailing list