[llvm] 71f0ec2 - [AArch64] Add `foldCSELOfCSEL` combine.
Karl Meakin via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 18 17:04:50 PDT 2022
Author: Karl Meakin
Date: 2022-08-19T01:04:29+01:00
New Revision: 71f0ec242f348e7d22988cf6ef1a5185b53df70b
URL: https://github.com/llvm/llvm-project/commit/71f0ec242f348e7d22988cf6ef1a5185b53df70b
DIFF: https://github.com/llvm/llvm-project/commit/71f0ec242f348e7d22988cf6ef1a5185b53df70b.diff
LOG: [AArch64] Add `foldCSELOfCSEL` combine.
This time more conservative.
Differential review: https://reviews.llvm.org/D125504
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
llvm/test/CodeGen/AArch64/i128-math.ll
llvm/test/CodeGen/AArch64/i256-math.ll
llvm/test/CodeGen/AArch64/sadd_sat_vec.ll
llvm/test/CodeGen/AArch64/ssub_sat_vec.ll
llvm/test/CodeGen/AArch64/uadd_sat_vec.ll
llvm/test/CodeGen/AArch64/usub_sat_vec.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index a1f423246db7a..16c3d96807674 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -18410,6 +18410,58 @@ static SDValue foldCSELofCTTZ(SDNode *N, SelectionDAG &DAG) {
BitWidthMinusOne);
}
+// (CSEL l r EQ (CMP (CSEL x y cc2 cond) x)) => (CSEL l r cc2 cond)
+// (CSEL l r EQ (CMP (CSEL x y cc2 cond) y)) => (CSEL l r !cc2 cond)
+// Where x and y are constants
+
+// (CSEL l r NE (CMP (CSEL x y cc2 cond) x)) => (CSEL l r !cc2 cond)
+// (CSEL l r NE (CMP (CSEL x y cc2 cond) y)) => (CSEL l r cc2 cond)
+// Where x and y are constants
+static SDValue foldCSELOfCSEL(SDNode *Op, SelectionDAG &DAG) {
+ SDValue L = Op->getOperand(0);
+ SDValue R = Op->getOperand(1);
+ AArch64CC::CondCode OpCC =
+ static_cast<AArch64CC::CondCode>(Op->getConstantOperandVal(2));
+
+ SDValue OpCmp = Op->getOperand(3);
+ if (!isCMP(OpCmp))
+ return SDValue();
+
+ SDValue CmpLHS = OpCmp.getOperand(0);
+ SDValue CmpRHS = OpCmp.getOperand(1);
+
+ if (CmpRHS.getOpcode() == AArch64ISD::CSEL)
+ std::swap(CmpLHS, CmpRHS);
+ else if (CmpLHS.getOpcode() != AArch64ISD::CSEL)
+ return SDValue();
+
+ SDValue X = CmpLHS->getOperand(0);
+ SDValue Y = CmpLHS->getOperand(1);
+ if (!isa<ConstantSDNode>(X) || !isa<ConstantSDNode>(Y)) {
+ return SDValue();
+ }
+
+ AArch64CC::CondCode CC =
+ static_cast<AArch64CC::CondCode>(CmpLHS->getConstantOperandVal(2));
+ SDValue Cond = CmpLHS->getOperand(3);
+
+ if (CmpRHS == Y)
+ CC = AArch64CC::getInvertedCondCode(CC);
+ else if (CmpRHS != X)
+ return SDValue();
+
+ if (OpCC == AArch64CC::NE)
+ CC = AArch64CC::getInvertedCondCode(CC);
+ else if (OpCC != AArch64CC::EQ)
+ return SDValue();
+
+ SDLoc DL(Op);
+ EVT VT = Op->getValueType(0);
+
+ SDValue CCValue = DAG.getConstant(CC, DL, MVT::i32);
+ return DAG.getNode(AArch64ISD::CSEL, DL, VT, L, R, CCValue, Cond);
+}
+
// Optimize CSEL instructions
static SDValue performCSELCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
@@ -18418,6 +18470,9 @@ static SDValue performCSELCombine(SDNode *N,
if (N->getOperand(0) == N->getOperand(1))
return N->getOperand(0);
+ if (SDValue R = foldCSELOfCSEL(N, DAG))
+ return R;
+
// CSEL 0, cttz(X), eq(X, 0) -> AND cttz bitwidth-1
// CSEL cttz(X), 0, ne(X, 0) -> AND cttz bitwidth-1
if (SDValue Folded = foldCSELofCTTZ(N, DAG))
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index cf8891cff1b3e..be08a3a1a67e3 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -331,6 +331,41 @@ inline static unsigned getNZCVToSatisfyCondCode(CondCode Code) {
case LE: return Z; // Z == 1 || N != V
}
}
+
+/// Return true if Code is a reflexive relationship:
+/// forall x. (CSET Code (CMP x x)) == 1
+inline static bool isReflexive(CondCode Code) {
+ switch (Code) {
+ case EQ:
+ case HS:
+ case PL:
+ case LS:
+ case GE:
+ case LE:
+ case AL:
+ case NV:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// Return true if Code is an irreflexive relationship:
+/// forall x. (CSET Code (CMP x x)) == 0
+inline static bool isIrreflexive(CondCode Code) {
+ switch (Code) {
+ case NE:
+ case LO:
+ case MI:
+ case HI:
+ case LT:
+ case GT:
+ return true;
+ default:
+ return false;
+ }
+}
+
} // end namespace AArch64CC
struct SysAlias {
diff --git a/llvm/test/CodeGen/AArch64/i128-math.ll b/llvm/test/CodeGen/AArch64/i128-math.ll
index 7ac419621382f..86bd92bf8cee9 100644
--- a/llvm/test/CodeGen/AArch64/i128-math.ll
+++ b/llvm/test/CodeGen/AArch64/i128-math.ll
@@ -68,10 +68,8 @@ define i128 @u128_saturating_add(i128 %x, i128 %y) {
; CHECK: // %bb.0:
; CHECK-NEXT: adds x8, x0, x2
; CHECK-NEXT: adcs x9, x1, x3
-; CHECK-NEXT: cset w10, hs
-; CHECK-NEXT: cmp w10, #0
-; CHECK-NEXT: csinv x0, x8, xzr, eq
-; CHECK-NEXT: csinv x1, x9, xzr, eq
+; CHECK-NEXT: csinv x0, x8, xzr, lo
+; CHECK-NEXT: csinv x1, x9, xzr, lo
; CHECK-NEXT: ret
%1 = tail call i128 @llvm.uadd.sat.i128(i128 %x, i128 %y)
ret i128 %1
@@ -126,10 +124,8 @@ define i128 @u128_saturating_sub(i128 %x, i128 %y) {
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x2
; CHECK-NEXT: sbcs x9, x1, x3
-; CHECK-NEXT: cset w10, lo
-; CHECK-NEXT: cmp w10, #0
-; CHECK-NEXT: csel x0, xzr, x8, ne
-; CHECK-NEXT: csel x1, xzr, x9, ne
+; CHECK-NEXT: csel x0, xzr, x8, lo
+; CHECK-NEXT: csel x1, xzr, x9, lo
; CHECK-NEXT: ret
%1 = tail call i128 @llvm.usub.sat.i128(i128 %x, i128 %y)
ret i128 %1
@@ -185,11 +181,9 @@ define i128 @i128_saturating_add(i128 %x, i128 %y) {
; CHECK-NEXT: adds x8, x0, x2
; CHECK-NEXT: adcs x9, x1, x3
; CHECK-NEXT: asr x10, x9, #63
-; CHECK-NEXT: cset w11, vs
-; CHECK-NEXT: cmp w11, #0
; CHECK-NEXT: eor x11, x10, #0x8000000000000000
-; CHECK-NEXT: csel x0, x10, x8, ne
-; CHECK-NEXT: csel x1, x11, x9, ne
+; CHECK-NEXT: csel x0, x10, x8, vs
+; CHECK-NEXT: csel x1, x11, x9, vs
; CHECK-NEXT: ret
%1 = tail call i128 @llvm.sadd.sat.i128(i128 %x, i128 %y)
ret i128 %1
@@ -245,11 +239,9 @@ define i128 @i128_saturating_sub(i128 %x, i128 %y) {
; CHECK-NEXT: subs x8, x0, x2
; CHECK-NEXT: sbcs x9, x1, x3
; CHECK-NEXT: asr x10, x9, #63
-; CHECK-NEXT: cset w11, vs
-; CHECK-NEXT: cmp w11, #0
; CHECK-NEXT: eor x11, x10, #0x8000000000000000
-; CHECK-NEXT: csel x0, x10, x8, ne
-; CHECK-NEXT: csel x1, x11, x9, ne
+; CHECK-NEXT: csel x0, x10, x8, vs
+; CHECK-NEXT: csel x1, x11, x9, vs
; CHECK-NEXT: ret
%1 = tail call i128 @llvm.ssub.sat.i128(i128 %x, i128 %y)
ret i128 %1
diff --git a/llvm/test/CodeGen/AArch64/i256-math.ll b/llvm/test/CodeGen/AArch64/i256-math.ll
index 232346e7fabe4..c275337389c7e 100644
--- a/llvm/test/CodeGen/AArch64/i256-math.ll
+++ b/llvm/test/CodeGen/AArch64/i256-math.ll
@@ -70,12 +70,10 @@ define i256 @u256_saturating_add(i256 %x, i256 %y) {
; CHECK-NEXT: adcs x9, x1, x5
; CHECK-NEXT: adcs x10, x2, x6
; CHECK-NEXT: adcs x11, x3, x7
-; CHECK-NEXT: cset w12, hs
-; CHECK-NEXT: cmp w12, #0
-; CHECK-NEXT: csinv x0, x8, xzr, eq
-; CHECK-NEXT: csinv x1, x9, xzr, eq
-; CHECK-NEXT: csinv x2, x10, xzr, eq
-; CHECK-NEXT: csinv x3, x11, xzr, eq
+; CHECK-NEXT: csinv x0, x8, xzr, lo
+; CHECK-NEXT: csinv x1, x9, xzr, lo
+; CHECK-NEXT: csinv x2, x10, xzr, lo
+; CHECK-NEXT: csinv x3, x11, xzr, lo
; CHECK-NEXT: ret
%1 = tail call i256 @llvm.uadd.sat.i256(i256 %x, i256 %y)
ret i256 %1
@@ -138,12 +136,10 @@ define i256 @u256_saturating_sub(i256 %x, i256 %y) {
; CHECK-NEXT: sbcs x9, x1, x5
; CHECK-NEXT: sbcs x10, x2, x6
; CHECK-NEXT: sbcs x11, x3, x7
-; CHECK-NEXT: cset w12, lo
-; CHECK-NEXT: cmp w12, #0
-; CHECK-NEXT: csel x0, xzr, x8, ne
-; CHECK-NEXT: csel x1, xzr, x9, ne
-; CHECK-NEXT: csel x2, xzr, x10, ne
-; CHECK-NEXT: csel x3, xzr, x11, ne
+; CHECK-NEXT: csel x0, xzr, x8, lo
+; CHECK-NEXT: csel x1, xzr, x9, lo
+; CHECK-NEXT: csel x2, xzr, x10, lo
+; CHECK-NEXT: csel x3, xzr, x11, lo
; CHECK-NEXT: ret
%1 = tail call i256 @llvm.usub.sat.i256(i256 %x, i256 %y)
ret i256 %1
@@ -206,14 +202,12 @@ define i256 @i256_saturating_add(i256 %x, i256 %y) {
; CHECK-NEXT: adcs x9, x1, x5
; CHECK-NEXT: adcs x10, x2, x6
; CHECK-NEXT: adcs x11, x3, x7
-; CHECK-NEXT: cset w12, vs
-; CHECK-NEXT: asr x13, x11, #63
-; CHECK-NEXT: cmp w12, #0
-; CHECK-NEXT: csel x0, x13, x8, ne
-; CHECK-NEXT: eor x8, x13, #0x8000000000000000
-; CHECK-NEXT: csel x1, x13, x9, ne
-; CHECK-NEXT: csel x2, x13, x10, ne
-; CHECK-NEXT: csel x3, x8, x11, ne
+; CHECK-NEXT: asr x12, x11, #63
+; CHECK-NEXT: csel x0, x12, x8, vs
+; CHECK-NEXT: eor x8, x12, #0x8000000000000000
+; CHECK-NEXT: csel x1, x12, x9, vs
+; CHECK-NEXT: csel x2, x12, x10, vs
+; CHECK-NEXT: csel x3, x8, x11, vs
; CHECK-NEXT: ret
%1 = tail call i256 @llvm.sadd.sat.i256(i256 %x, i256 %y)
ret i256 %1
@@ -276,14 +270,12 @@ define i256 @i256_saturating_sub(i256 %x, i256 %y) {
; CHECK-NEXT: sbcs x9, x1, x5
; CHECK-NEXT: sbcs x10, x2, x6
; CHECK-NEXT: sbcs x11, x3, x7
-; CHECK-NEXT: cset w12, vs
-; CHECK-NEXT: asr x13, x11, #63
-; CHECK-NEXT: cmp w12, #0
-; CHECK-NEXT: csel x0, x13, x8, ne
-; CHECK-NEXT: eor x8, x13, #0x8000000000000000
-; CHECK-NEXT: csel x1, x13, x9, ne
-; CHECK-NEXT: csel x2, x13, x10, ne
-; CHECK-NEXT: csel x3, x8, x11, ne
+; CHECK-NEXT: asr x12, x11, #63
+; CHECK-NEXT: csel x0, x12, x8, vs
+; CHECK-NEXT: eor x8, x12, #0x8000000000000000
+; CHECK-NEXT: csel x1, x12, x9, vs
+; CHECK-NEXT: csel x2, x12, x10, vs
+; CHECK-NEXT: csel x3, x8, x11, vs
; CHECK-NEXT: ret
%1 = tail call i256 @llvm.ssub.sat.i256(i256 %x, i256 %y)
ret i256 %1
diff --git a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll
index 8b1d961cf2834..0f3810c818281 100644
--- a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll
@@ -352,20 +352,16 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: adds x8, x2, x6
; CHECK-NEXT: adcs x9, x3, x7
-; CHECK-NEXT: cset w10, vs
-; CHECK-NEXT: asr x11, x9, #63
-; CHECK-NEXT: cmp w10, #0
-; CHECK-NEXT: csel x2, x11, x8, ne
-; CHECK-NEXT: eor x8, x11, #0x8000000000000000
-; CHECK-NEXT: csel x3, x8, x9, ne
+; CHECK-NEXT: asr x10, x9, #63
+; CHECK-NEXT: csel x2, x10, x8, vs
+; CHECK-NEXT: eor x8, x10, #0x8000000000000000
+; CHECK-NEXT: csel x3, x8, x9, vs
; CHECK-NEXT: adds x8, x0, x4
; CHECK-NEXT: adcs x9, x1, x5
-; CHECK-NEXT: cset w10, vs
-; CHECK-NEXT: asr x11, x9, #63
-; CHECK-NEXT: cmp w10, #0
-; CHECK-NEXT: eor x10, x11, #0x8000000000000000
-; CHECK-NEXT: csel x8, x11, x8, ne
-; CHECK-NEXT: csel x1, x10, x9, ne
+; CHECK-NEXT: asr x10, x9, #63
+; CHECK-NEXT: csel x8, x10, x8, vs
+; CHECK-NEXT: eor x10, x10, #0x8000000000000000
+; CHECK-NEXT: csel x1, x10, x9, vs
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: mov v0.d[1], x1
; CHECK-NEXT: fmov x0, d0
diff --git a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll
index e9b65363d281b..0d6a93cc63c95 100644
--- a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll
@@ -355,20 +355,16 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x2, x6
; CHECK-NEXT: sbcs x9, x3, x7
-; CHECK-NEXT: cset w10, vs
-; CHECK-NEXT: asr x11, x9, #63
-; CHECK-NEXT: cmp w10, #0
-; CHECK-NEXT: csel x2, x11, x8, ne
-; CHECK-NEXT: eor x8, x11, #0x8000000000000000
-; CHECK-NEXT: csel x3, x8, x9, ne
+; CHECK-NEXT: asr x10, x9, #63
+; CHECK-NEXT: csel x2, x10, x8, vs
+; CHECK-NEXT: eor x8, x10, #0x8000000000000000
+; CHECK-NEXT: csel x3, x8, x9, vs
; CHECK-NEXT: subs x8, x0, x4
; CHECK-NEXT: sbcs x9, x1, x5
-; CHECK-NEXT: cset w10, vs
-; CHECK-NEXT: asr x11, x9, #63
-; CHECK-NEXT: cmp w10, #0
-; CHECK-NEXT: eor x10, x11, #0x8000000000000000
-; CHECK-NEXT: csel x8, x11, x8, ne
-; CHECK-NEXT: csel x1, x10, x9, ne
+; CHECK-NEXT: asr x10, x9, #63
+; CHECK-NEXT: csel x8, x10, x8, vs
+; CHECK-NEXT: eor x10, x10, #0x8000000000000000
+; CHECK-NEXT: csel x1, x10, x9, vs
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: mov v0.d[1], x1
; CHECK-NEXT: fmov x0, d0
diff --git a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll
index 96f2d6e553fe7..c7c06dbc302d6 100644
--- a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll
@@ -350,16 +350,12 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: adds x8, x2, x6
; CHECK-NEXT: adcs x9, x3, x7
-; CHECK-NEXT: cset w10, hs
-; CHECK-NEXT: cmp w10, #0
-; CHECK-NEXT: csinv x2, x8, xzr, eq
-; CHECK-NEXT: csinv x3, x9, xzr, eq
+; CHECK-NEXT: csinv x2, x8, xzr, lo
+; CHECK-NEXT: csinv x3, x9, xzr, lo
; CHECK-NEXT: adds x8, x0, x4
; CHECK-NEXT: adcs x9, x1, x5
-; CHECK-NEXT: cset w10, hs
-; CHECK-NEXT: cmp w10, #0
-; CHECK-NEXT: csinv x8, x8, xzr, eq
-; CHECK-NEXT: csinv x1, x9, xzr, eq
+; CHECK-NEXT: csinv x8, x8, xzr, lo
+; CHECK-NEXT: csinv x1, x9, xzr, lo
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: mov v0.d[1], x1
; CHECK-NEXT: fmov x0, d0
diff --git a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll
index 9ed64b7c7b2fb..733553afb1287 100644
--- a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll
@@ -346,16 +346,12 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x2, x6
; CHECK-NEXT: sbcs x9, x3, x7
-; CHECK-NEXT: cset w10, lo
-; CHECK-NEXT: cmp w10, #0
-; CHECK-NEXT: csel x2, xzr, x8, ne
-; CHECK-NEXT: csel x3, xzr, x9, ne
+; CHECK-NEXT: csel x2, xzr, x8, lo
+; CHECK-NEXT: csel x3, xzr, x9, lo
; CHECK-NEXT: subs x8, x0, x4
; CHECK-NEXT: sbcs x9, x1, x5
-; CHECK-NEXT: cset w10, lo
-; CHECK-NEXT: cmp w10, #0
-; CHECK-NEXT: csel x8, xzr, x8, ne
-; CHECK-NEXT: csel x1, xzr, x9, ne
+; CHECK-NEXT: csel x8, xzr, x8, lo
+; CHECK-NEXT: csel x1, xzr, x9, lo
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: mov v0.d[1], x1
; CHECK-NEXT: fmov x0, d0
More information about the llvm-commits
mailing list