[llvm] [AArch64] Eliminate Common SUBS by Reassociating Non-Constants (PR #123344)
Marius Kamp via llvm-commits
llvm-commits at lists.llvm.org
Sun Jan 19 04:43:08 PST 2025
https://github.com/mskamp updated https://github.com/llvm/llvm-project/pull/123344
>From d40c111801fb9caf3ccd35ce0b32bde2fd4f1b45 Mon Sep 17 00:00:00 2001
From: Marius Kamp <msk at posteo.org>
Date: Sat, 11 Jan 2025 14:33:17 +0100
Subject: [PATCH 1/2] [AArch64] Add Tests for Reassociation of SUBS with
Non-Constant; NFC
---
llvm/test/CodeGen/AArch64/csel-cmp-cse.ll | 364 +++++++++++++++++++++-
1 file changed, 348 insertions(+), 16 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/csel-cmp-cse.ll b/llvm/test/CodeGen/AArch64/csel-cmp-cse.ll
index d8904cc6e35e34..77c5b4ba0c453e 100644
--- a/llvm/test/CodeGen/AArch64/csel-cmp-cse.ll
+++ b/llvm/test/CodeGen/AArch64/csel-cmp-cse.ll
@@ -335,6 +335,306 @@ define i32 @test_eq0_multi_use_sub_i32(i32 %x0, i32 %x1) {
ret i32 %ret
}
+define i32 @test_eq_nonconst_sub_add_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_eq_nonconst_sub_add_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w1, w2
+; CHECK-NEXT: sub w8, w8, w2
+; CHECK-NEXT: csel w0, wzr, w8, eq
+; CHECK-NEXT: ret
+ %cmp = icmp eq i32 %x1, %x2
+ %add = add nuw i32 %x0, %x1
+ %sub = sub i32 %add, %x2
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_ne_nonconst_sub_add_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_ne_nonconst_sub_add_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w1, w2
+; CHECK-NEXT: sub w8, w8, w2
+; CHECK-NEXT: csel w0, wzr, w8, ne
+; CHECK-NEXT: ret
+ %cmp = icmp ne i32 %x1, %x2
+ %add = add nuw i32 %x0, %x1
+ %sub = sub i32 %add, %x2
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_ult_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_ult_nonconst_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w1, w2
+; CHECK-NEXT: sub w8, w8, w2
+; CHECK-NEXT: csel w0, wzr, w8, lo
+; CHECK-NEXT: ret
+ %cmp = icmp ult i32 %x1, %x2
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, %x2
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_ule_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_ule_nonconst_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w1, w2
+; CHECK-NEXT: sub w8, w8, w2
+; CHECK-NEXT: csel w0, wzr, w8, ls
+; CHECK-NEXT: ret
+ %cmp = icmp ule i32 %x1, %x2
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, %x2
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_ugt_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_ugt_nonconst_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w1, w2
+; CHECK-NEXT: sub w8, w8, w2
+; CHECK-NEXT: csel w0, wzr, w8, hi
+; CHECK-NEXT: ret
+ %cmp = icmp ugt i32 %x1, %x2
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, %x2
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_uge_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_uge_nonconst_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w1, w2
+; CHECK-NEXT: sub w8, w8, w2
+; CHECK-NEXT: csel w0, wzr, w8, hs
+; CHECK-NEXT: ret
+ %cmp = icmp uge i32 %x1, %x2
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, %x2
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_slt_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_slt_nonconst_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w1, w2
+; CHECK-NEXT: sub w8, w8, w2
+; CHECK-NEXT: csel w0, wzr, w8, lt
+; CHECK-NEXT: ret
+ %cmp = icmp slt i32 %x1, %x2
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, %x2
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_sle_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_sle_nonconst_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w1, w2
+; CHECK-NEXT: sub w8, w8, w2
+; CHECK-NEXT: csel w0, wzr, w8, le
+; CHECK-NEXT: ret
+ %cmp = icmp sle i32 %x1, %x2
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, %x2
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_sgt_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_sgt_nonconst_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w1, w2
+; CHECK-NEXT: sub w8, w8, w2
+; CHECK-NEXT: csel w0, wzr, w8, gt
+; CHECK-NEXT: ret
+ %cmp = icmp sgt i32 %x1, %x2
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, %x2
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_sge_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_sge_nonconst_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w1, w2
+; CHECK-NEXT: sub w8, w8, w2
+; CHECK-NEXT: csel w0, wzr, w8, ge
+; CHECK-NEXT: ret
+ %cmp = icmp sge i32 %x1, %x2
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, %x2
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_eq_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_eq_nonconst_sub_add_comm_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w2, w1
+; CHECK-NEXT: sub w8, w8, w2
+; CHECK-NEXT: csel w0, wzr, w8, eq
+; CHECK-NEXT: ret
+ %cmp = icmp eq i32 %x2, %x1
+ %add = add nuw i32 %x0, %x1
+ %sub = sub i32 %add, %x2
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_ne_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_ne_nonconst_sub_add_comm_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w2, w1
+; CHECK-NEXT: sub w8, w8, w2
+; CHECK-NEXT: csel w0, wzr, w8, ne
+; CHECK-NEXT: ret
+ %cmp = icmp ne i32 %x2, %x1
+ %add = add nuw i32 %x0, %x1
+ %sub = sub i32 %add, %x2
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_ult_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_ult_nonconst_sub_add_comm_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w2, w1
+; CHECK-NEXT: sub w8, w8, w2
+; CHECK-NEXT: csel w0, wzr, w8, lo
+; CHECK-NEXT: ret
+ %cmp = icmp ult i32 %x2, %x1
+ %add = add nuw i32 %x0, %x1
+ %sub = sub i32 %add, %x2
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_ule_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_ule_nonconst_sub_add_comm_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w2, w1
+; CHECK-NEXT: sub w8, w8, w2
+; CHECK-NEXT: csel w0, wzr, w8, ls
+; CHECK-NEXT: ret
+ %cmp = icmp ule i32 %x2, %x1
+ %add = add nuw i32 %x0, %x1
+ %sub = sub i32 %add, %x2
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_ugt_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_ugt_nonconst_sub_add_comm_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w2, w1
+; CHECK-NEXT: sub w8, w8, w2
+; CHECK-NEXT: csel w0, wzr, w8, hi
+; CHECK-NEXT: ret
+ %cmp = icmp ugt i32 %x2, %x1
+ %add = add nuw i32 %x0, %x1
+ %sub = sub i32 %add, %x2
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_uge_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_uge_nonconst_sub_add_comm_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w2, w1
+; CHECK-NEXT: sub w8, w8, w2
+; CHECK-NEXT: csel w0, wzr, w8, hs
+; CHECK-NEXT: ret
+ %cmp = icmp uge i32 %x2, %x1
+ %add = add nuw i32 %x0, %x1
+ %sub = sub i32 %add, %x2
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_slt_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_slt_nonconst_sub_add_comm_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w2, w1
+; CHECK-NEXT: sub w8, w8, w2
+; CHECK-NEXT: csel w0, wzr, w8, lt
+; CHECK-NEXT: ret
+ %cmp = icmp slt i32 %x2, %x1
+ %add = add nuw i32 %x0, %x1
+ %sub = sub i32 %add, %x2
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_sle_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_sle_nonconst_sub_add_comm_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w2, w1
+; CHECK-NEXT: sub w8, w8, w2
+; CHECK-NEXT: csel w0, wzr, w8, le
+; CHECK-NEXT: ret
+ %cmp = icmp sle i32 %x2, %x1
+ %add = add nuw i32 %x0, %x1
+ %sub = sub i32 %add, %x2
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_sgt_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_sgt_nonconst_sub_add_comm_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w2, w1
+; CHECK-NEXT: sub w8, w8, w2
+; CHECK-NEXT: csel w0, wzr, w8, gt
+; CHECK-NEXT: ret
+ %cmp = icmp sgt i32 %x2, %x1
+ %add = add nuw i32 %x0, %x1
+ %sub = sub i32 %add, %x2
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_sge_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_sge_nonconst_sub_add_comm_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w2, w1
+; CHECK-NEXT: sub w8, w8, w2
+; CHECK-NEXT: csel w0, wzr, w8, ge
+; CHECK-NEXT: ret
+ %cmp = icmp sge i32 %x2, %x1
+ %add = add nuw i32 %x0, %x1
+ %sub = sub i32 %add, %x2
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
; Negative test
define i32 @test_eq0_multi_use_cmp_i32(i32 %x0, i32 %x1) {
; CHECK-LABEL: test_eq0_multi_use_cmp_i32:
@@ -421,22 +721,6 @@ define i32 @test_ugtsmax_sub_add_i32(i32 %x0, i32 %x1) {
ret i32 %ret
}
-; Negative test
-define i32 @test_ult_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
-; CHECK-LABEL: test_ult_nonconst_i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, w1
-; CHECK-NEXT: cmp w1, w2
-; CHECK-NEXT: sub w8, w8, w2
-; CHECK-NEXT: csel w0, wzr, w8, lo
-; CHECK-NEXT: ret
- %cmp = icmp ult i32 %x1, %x2
- %add = add i32 %x0, %x1
- %sub = sub i32 %add, %x2
- %ret = select i1 %cmp, i32 0, i32 %sub
- ret i32 %ret
-}
-
; Negative test
define i32 @test_eq_const_mismatch_i32(i32 %x0, i32 %x1) {
; CHECK-LABEL: test_eq_const_mismatch_i32:
@@ -771,3 +1055,51 @@ define i32 @test_eq0_bitwidth_mismatch_2(i32 %x0, i64 %x1) {
%ret = select i1 %cmp, i32 0, i32 %sub
ret i32 %ret
}
+
+; Negative test
+define i32 @test_ult_nonconst_op_mismatch_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_ult_nonconst_op_mismatch_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w1, w2
+; CHECK-NEXT: add w8, w8, w2
+; CHECK-NEXT: csel w0, wzr, w8, lo
+; CHECK-NEXT: ret
+ %cmp = icmp ult i32 %x1, %x2
+ %add = add i32 %x0, %x1
+ %sub = add i32 %add, %x2
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+; Negative test
+define i32 @test_ult_nonconst_unrelated_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) {
+; CHECK-LABEL: test_ult_nonconst_unrelated_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w1, w2
+; CHECK-NEXT: sub w8, w8, w3
+; CHECK-NEXT: csel w0, wzr, w8, lo
+; CHECK-NEXT: ret
+ %cmp = icmp ult i32 %x1, %x2
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, %x3
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+; Negative test
+define i32 @test_ult_nonconst_unrelated_2_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) {
+; CHECK-LABEL: test_ult_nonconst_unrelated_2_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w2, w1
+; CHECK-NEXT: sub w8, w8, w3
+; CHECK-NEXT: csel w0, wzr, w8, lo
+; CHECK-NEXT: ret
+ %cmp = icmp ult i32 %x2, %x1
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, %x3
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
>From f2222feb427a834602f77b8a3adc1e4db587aeb0 Mon Sep 17 00:00:00 2001
From: Marius Kamp <msk at posteo.org>
Date: Sat, 11 Jan 2025 16:10:06 +0100
Subject: [PATCH 2/2] [AArch64] Eliminate Common SUBS by Reassociating
Non-Constants
Commit 1eed46960c217f9480865702f06fb730c7521e61 added logic to
reassociate a (add (add x y) -c) operand to a CSEL instruction with a
comparison involving x and c (or a similar constant) in order to obtain
a common (SUBS x c) instruction.
This commit extends this logic to non-constants. In this way, we also
reassociate a (sub (add x y) z) operand of a CSEL instruction to
(add (sub x z) y) if the CSEL compares x and z, for example.
Alive proof: https://alive2.llvm.org/ce/z/SEVpRm
---
.../Target/AArch64/AArch64ISelLowering.cpp | 63 +++++++---
llvm/test/CodeGen/AArch64/csel-cmp-cse.ll | 116 ++++++++----------
2 files changed, 95 insertions(+), 84 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index e4715018d84caf..c66a59869b798e 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -24898,16 +24898,34 @@ static SDValue reassociateCSELOperandsForCSE(SDNode *N, SelectionDAG &DAG) {
SDValue SubsNode = N->getOperand(3);
if (SubsNode.getOpcode() != AArch64ISD::SUBS || !SubsNode.hasOneUse())
return SDValue();
- auto *CmpOpConst = dyn_cast<ConstantSDNode>(SubsNode.getOperand(1));
- if (!CmpOpConst)
- return SDValue();
+ SDValue CmpOpToMatch = SubsNode.getOperand(1);
SDValue CmpOpOther = SubsNode.getOperand(0);
EVT VT = N->getValueType(0);
+ unsigned ExpectedOpcode;
+ std::function<bool(SDValue)> CheckOp;
+ SDValue SubsOp;
+ auto *CmpOpConst = dyn_cast<ConstantSDNode>(CmpOpToMatch);
+ if (CmpOpConst) {
+ ExpectedOpcode = ISD::ADD;
+ CheckOp = [&](SDValue Op) {
+ auto *AddOpConst = dyn_cast<ConstantSDNode>(Op);
+ return AddOpConst &&
+ AddOpConst->getAPIntValue() == -CmpOpConst->getAPIntValue();
+ };
+ SubsOp = DAG.getConstant(CmpOpConst->getAPIntValue(), SDLoc(CmpOpConst),
+ CmpOpConst->getValueType(0));
+ } else {
+ ExpectedOpcode = ISD::SUB;
+ CheckOp = [&](SDValue Op) { return Op == CmpOpToMatch; };
+ SubsOp = CmpOpToMatch;
+ }
+
// Get the operand that can be reassociated with the SUBS instruction.
- auto GetReassociationOp = [&](SDValue Op, APInt ExpectedConst) {
- if (Op.getOpcode() != ISD::ADD)
+ auto GetReassociationOp = [&](SDValue Op,
+ std::function<bool(SDValue)> CheckOp) {
+ if (Op.getOpcode() != ExpectedOpcode)
return SDValue();
if (Op.getOperand(0).getOpcode() != ISD::ADD ||
!Op.getOperand(0).hasOneUse())
@@ -24918,24 +24936,21 @@ static SDValue reassociateCSELOperandsForCSE(SDNode *N, SelectionDAG &DAG) {
std::swap(X, Y);
if (X != CmpOpOther)
return SDValue();
- auto *AddOpConst = dyn_cast<ConstantSDNode>(Op.getOperand(1));
- if (!AddOpConst || AddOpConst->getAPIntValue() != ExpectedConst)
+ if (!CheckOp(Op.getOperand(1)))
return SDValue();
return Y;
};
// Try the reassociation using the given constant and condition code.
- auto Fold = [&](APInt NewCmpConst, AArch64CC::CondCode NewCC) {
- APInt ExpectedConst = -NewCmpConst;
- SDValue TReassocOp = GetReassociationOp(N->getOperand(0), ExpectedConst);
- SDValue FReassocOp = GetReassociationOp(N->getOperand(1), ExpectedConst);
+ auto Fold = [&](AArch64CC::CondCode NewCC,
+ std::function<bool(SDValue)> CheckOp, SDValue SubsOp) {
+ SDValue TReassocOp = GetReassociationOp(N->getOperand(0), CheckOp);
+ SDValue FReassocOp = GetReassociationOp(N->getOperand(1), CheckOp);
if (!TReassocOp && !FReassocOp)
return SDValue();
SDValue NewCmp = DAG.getNode(AArch64ISD::SUBS, SDLoc(SubsNode),
- DAG.getVTList(VT, MVT_CC), CmpOpOther,
- DAG.getConstant(NewCmpConst, SDLoc(CmpOpConst),
- CmpOpConst->getValueType(0)));
+ DAG.getVTList(VT, MVT_CC), CmpOpOther, SubsOp);
auto Reassociate = [&](SDValue ReassocOp, unsigned OpNum) {
if (!ReassocOp)
@@ -24957,9 +24972,19 @@ static SDValue reassociateCSELOperandsForCSE(SDNode *N, SelectionDAG &DAG) {
// First, try to eliminate the compare instruction by searching for a
// subtraction with the same constant.
- if (SDValue R = Fold(CmpOpConst->getAPIntValue(), CC))
+ if (SDValue R = Fold(CC, CheckOp, SubsOp))
return R;
+ if (!CmpOpConst) {
+ // Try again with the operands of the SUBS instruction and the condition
+ // swapped. Due to canonicalization, this only helps for non-constant
+ // operands of the SUBS instruction.
+ std::swap(CmpOpToMatch, CmpOpOther);
+ if (SDValue R = Fold(getSwappedCondition(CC), CheckOp, CmpOpToMatch))
+ return R;
+ return SDValue();
+ }
+
if ((CC == AArch64CC::EQ || CC == AArch64CC::NE) && !CmpOpConst->isZero())
return SDValue();
@@ -24971,7 +24996,13 @@ static SDValue reassociateCSELOperandsForCSE(SDNode *N, SelectionDAG &DAG) {
// them here but check for them nevertheless to be on the safe side.
auto CheckedFold = [&](bool Check, APInt NewCmpConst,
AArch64CC::CondCode NewCC) {
- return Check ? Fold(NewCmpConst, NewCC) : SDValue();
+ auto CheckOp = [=](SDValue Op) {
+ auto *AddOpConst = dyn_cast<ConstantSDNode>(Op);
+ return AddOpConst && AddOpConst->getAPIntValue() == -NewCmpConst;
+ };
+ auto SubsOp = DAG.getConstant(NewCmpConst, SDLoc(CmpOpConst),
+ CmpOpConst->getValueType(0));
+ return Check ? Fold(NewCC, CheckOp, SubsOp) : SDValue();
};
switch (CC) {
case AArch64CC::EQ:
diff --git a/llvm/test/CodeGen/AArch64/csel-cmp-cse.ll b/llvm/test/CodeGen/AArch64/csel-cmp-cse.ll
index 77c5b4ba0c453e..42b1656fa78725 100644
--- a/llvm/test/CodeGen/AArch64/csel-cmp-cse.ll
+++ b/llvm/test/CodeGen/AArch64/csel-cmp-cse.ll
@@ -338,9 +338,8 @@ define i32 @test_eq0_multi_use_sub_i32(i32 %x0, i32 %x1) {
define i32 @test_eq_nonconst_sub_add_i32(i32 %x0, i32 %x1, i32 %x2) {
; CHECK-LABEL: test_eq_nonconst_sub_add_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, w1
-; CHECK-NEXT: cmp w1, w2
-; CHECK-NEXT: sub w8, w8, w2
+; CHECK-NEXT: subs w8, w1, w2
+; CHECK-NEXT: add w8, w8, w0
; CHECK-NEXT: csel w0, wzr, w8, eq
; CHECK-NEXT: ret
%cmp = icmp eq i32 %x1, %x2
@@ -353,9 +352,8 @@ define i32 @test_eq_nonconst_sub_add_i32(i32 %x0, i32 %x1, i32 %x2) {
define i32 @test_ne_nonconst_sub_add_i32(i32 %x0, i32 %x1, i32 %x2) {
; CHECK-LABEL: test_ne_nonconst_sub_add_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, w1
-; CHECK-NEXT: cmp w1, w2
-; CHECK-NEXT: sub w8, w8, w2
+; CHECK-NEXT: subs w8, w1, w2
+; CHECK-NEXT: add w8, w8, w0
; CHECK-NEXT: csel w0, wzr, w8, ne
; CHECK-NEXT: ret
%cmp = icmp ne i32 %x1, %x2
@@ -368,9 +366,8 @@ define i32 @test_ne_nonconst_sub_add_i32(i32 %x0, i32 %x1, i32 %x2) {
define i32 @test_ult_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
; CHECK-LABEL: test_ult_nonconst_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, w1
-; CHECK-NEXT: cmp w1, w2
-; CHECK-NEXT: sub w8, w8, w2
+; CHECK-NEXT: subs w8, w1, w2
+; CHECK-NEXT: add w8, w8, w0
; CHECK-NEXT: csel w0, wzr, w8, lo
; CHECK-NEXT: ret
%cmp = icmp ult i32 %x1, %x2
@@ -383,9 +380,8 @@ define i32 @test_ult_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
define i32 @test_ule_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
; CHECK-LABEL: test_ule_nonconst_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, w1
-; CHECK-NEXT: cmp w1, w2
-; CHECK-NEXT: sub w8, w8, w2
+; CHECK-NEXT: subs w8, w1, w2
+; CHECK-NEXT: add w8, w8, w0
; CHECK-NEXT: csel w0, wzr, w8, ls
; CHECK-NEXT: ret
%cmp = icmp ule i32 %x1, %x2
@@ -398,9 +394,8 @@ define i32 @test_ule_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
define i32 @test_ugt_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
; CHECK-LABEL: test_ugt_nonconst_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, w1
-; CHECK-NEXT: cmp w1, w2
-; CHECK-NEXT: sub w8, w8, w2
+; CHECK-NEXT: subs w8, w1, w2
+; CHECK-NEXT: add w8, w8, w0
; CHECK-NEXT: csel w0, wzr, w8, hi
; CHECK-NEXT: ret
%cmp = icmp ugt i32 %x1, %x2
@@ -413,9 +408,8 @@ define i32 @test_ugt_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
define i32 @test_uge_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
; CHECK-LABEL: test_uge_nonconst_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, w1
-; CHECK-NEXT: cmp w1, w2
-; CHECK-NEXT: sub w8, w8, w2
+; CHECK-NEXT: subs w8, w1, w2
+; CHECK-NEXT: add w8, w8, w0
; CHECK-NEXT: csel w0, wzr, w8, hs
; CHECK-NEXT: ret
%cmp = icmp uge i32 %x1, %x2
@@ -428,9 +422,8 @@ define i32 @test_uge_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
define i32 @test_slt_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
; CHECK-LABEL: test_slt_nonconst_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, w1
-; CHECK-NEXT: cmp w1, w2
-; CHECK-NEXT: sub w8, w8, w2
+; CHECK-NEXT: subs w8, w1, w2
+; CHECK-NEXT: add w8, w8, w0
; CHECK-NEXT: csel w0, wzr, w8, lt
; CHECK-NEXT: ret
%cmp = icmp slt i32 %x1, %x2
@@ -443,9 +436,8 @@ define i32 @test_slt_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
define i32 @test_sle_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
; CHECK-LABEL: test_sle_nonconst_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, w1
-; CHECK-NEXT: cmp w1, w2
-; CHECK-NEXT: sub w8, w8, w2
+; CHECK-NEXT: subs w8, w1, w2
+; CHECK-NEXT: add w8, w8, w0
; CHECK-NEXT: csel w0, wzr, w8, le
; CHECK-NEXT: ret
%cmp = icmp sle i32 %x1, %x2
@@ -458,9 +450,8 @@ define i32 @test_sle_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
define i32 @test_sgt_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
; CHECK-LABEL: test_sgt_nonconst_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, w1
-; CHECK-NEXT: cmp w1, w2
-; CHECK-NEXT: sub w8, w8, w2
+; CHECK-NEXT: subs w8, w1, w2
+; CHECK-NEXT: add w8, w8, w0
; CHECK-NEXT: csel w0, wzr, w8, gt
; CHECK-NEXT: ret
%cmp = icmp sgt i32 %x1, %x2
@@ -473,9 +464,8 @@ define i32 @test_sgt_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
define i32 @test_sge_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
; CHECK-LABEL: test_sge_nonconst_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, w1
-; CHECK-NEXT: cmp w1, w2
-; CHECK-NEXT: sub w8, w8, w2
+; CHECK-NEXT: subs w8, w1, w2
+; CHECK-NEXT: add w8, w8, w0
; CHECK-NEXT: csel w0, wzr, w8, ge
; CHECK-NEXT: ret
%cmp = icmp sge i32 %x1, %x2
@@ -488,9 +478,8 @@ define i32 @test_sge_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
define i32 @test_eq_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
; CHECK-LABEL: test_eq_nonconst_sub_add_comm_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, w1
-; CHECK-NEXT: cmp w2, w1
-; CHECK-NEXT: sub w8, w8, w2
+; CHECK-NEXT: subs w8, w1, w2
+; CHECK-NEXT: add w8, w8, w0
; CHECK-NEXT: csel w0, wzr, w8, eq
; CHECK-NEXT: ret
%cmp = icmp eq i32 %x2, %x1
@@ -503,9 +492,8 @@ define i32 @test_eq_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
define i32 @test_ne_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
; CHECK-LABEL: test_ne_nonconst_sub_add_comm_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, w1
-; CHECK-NEXT: cmp w2, w1
-; CHECK-NEXT: sub w8, w8, w2
+; CHECK-NEXT: subs w8, w1, w2
+; CHECK-NEXT: add w8, w8, w0
; CHECK-NEXT: csel w0, wzr, w8, ne
; CHECK-NEXT: ret
%cmp = icmp ne i32 %x2, %x1
@@ -518,10 +506,9 @@ define i32 @test_ne_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
define i32 @test_ult_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
; CHECK-LABEL: test_ult_nonconst_sub_add_comm_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, w1
-; CHECK-NEXT: cmp w2, w1
-; CHECK-NEXT: sub w8, w8, w2
-; CHECK-NEXT: csel w0, wzr, w8, lo
+; CHECK-NEXT: subs w8, w1, w2
+; CHECK-NEXT: add w8, w8, w0
+; CHECK-NEXT: csel w0, wzr, w8, hi
; CHECK-NEXT: ret
%cmp = icmp ult i32 %x2, %x1
%add = add nuw i32 %x0, %x1
@@ -533,10 +520,9 @@ define i32 @test_ult_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
define i32 @test_ule_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
; CHECK-LABEL: test_ule_nonconst_sub_add_comm_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, w1
-; CHECK-NEXT: cmp w2, w1
-; CHECK-NEXT: sub w8, w8, w2
-; CHECK-NEXT: csel w0, wzr, w8, ls
+; CHECK-NEXT: subs w8, w1, w2
+; CHECK-NEXT: add w8, w8, w0
+; CHECK-NEXT: csel w0, wzr, w8, hs
; CHECK-NEXT: ret
%cmp = icmp ule i32 %x2, %x1
%add = add nuw i32 %x0, %x1
@@ -548,10 +534,9 @@ define i32 @test_ule_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
define i32 @test_ugt_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
; CHECK-LABEL: test_ugt_nonconst_sub_add_comm_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, w1
-; CHECK-NEXT: cmp w2, w1
-; CHECK-NEXT: sub w8, w8, w2
-; CHECK-NEXT: csel w0, wzr, w8, hi
+; CHECK-NEXT: subs w8, w1, w2
+; CHECK-NEXT: add w8, w8, w0
+; CHECK-NEXT: csel w0, wzr, w8, lo
; CHECK-NEXT: ret
%cmp = icmp ugt i32 %x2, %x1
%add = add nuw i32 %x0, %x1
@@ -563,10 +548,9 @@ define i32 @test_ugt_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
define i32 @test_uge_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
; CHECK-LABEL: test_uge_nonconst_sub_add_comm_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, w1
-; CHECK-NEXT: cmp w2, w1
-; CHECK-NEXT: sub w8, w8, w2
-; CHECK-NEXT: csel w0, wzr, w8, hs
+; CHECK-NEXT: subs w8, w1, w2
+; CHECK-NEXT: add w8, w8, w0
+; CHECK-NEXT: csel w0, wzr, w8, ls
; CHECK-NEXT: ret
%cmp = icmp uge i32 %x2, %x1
%add = add nuw i32 %x0, %x1
@@ -578,10 +562,9 @@ define i32 @test_uge_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
define i32 @test_slt_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
; CHECK-LABEL: test_slt_nonconst_sub_add_comm_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, w1
-; CHECK-NEXT: cmp w2, w1
-; CHECK-NEXT: sub w8, w8, w2
-; CHECK-NEXT: csel w0, wzr, w8, lt
+; CHECK-NEXT: subs w8, w1, w2
+; CHECK-NEXT: add w8, w8, w0
+; CHECK-NEXT: csel w0, wzr, w8, gt
; CHECK-NEXT: ret
%cmp = icmp slt i32 %x2, %x1
%add = add nuw i32 %x0, %x1
@@ -593,10 +576,9 @@ define i32 @test_slt_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
define i32 @test_sle_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
; CHECK-LABEL: test_sle_nonconst_sub_add_comm_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, w1
-; CHECK-NEXT: cmp w2, w1
-; CHECK-NEXT: sub w8, w8, w2
-; CHECK-NEXT: csel w0, wzr, w8, le
+; CHECK-NEXT: subs w8, w1, w2
+; CHECK-NEXT: add w8, w8, w0
+; CHECK-NEXT: csel w0, wzr, w8, ge
; CHECK-NEXT: ret
%cmp = icmp sle i32 %x2, %x1
%add = add nuw i32 %x0, %x1
@@ -608,10 +590,9 @@ define i32 @test_sle_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
define i32 @test_sgt_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
; CHECK-LABEL: test_sgt_nonconst_sub_add_comm_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, w1
-; CHECK-NEXT: cmp w2, w1
-; CHECK-NEXT: sub w8, w8, w2
-; CHECK-NEXT: csel w0, wzr, w8, gt
+; CHECK-NEXT: subs w8, w1, w2
+; CHECK-NEXT: add w8, w8, w0
+; CHECK-NEXT: csel w0, wzr, w8, lt
; CHECK-NEXT: ret
%cmp = icmp sgt i32 %x2, %x1
%add = add nuw i32 %x0, %x1
@@ -623,10 +604,9 @@ define i32 @test_sgt_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
define i32 @test_sge_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
; CHECK-LABEL: test_sge_nonconst_sub_add_comm_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, w1
-; CHECK-NEXT: cmp w2, w1
-; CHECK-NEXT: sub w8, w8, w2
-; CHECK-NEXT: csel w0, wzr, w8, ge
+; CHECK-NEXT: subs w8, w1, w2
+; CHECK-NEXT: add w8, w8, w0
+; CHECK-NEXT: csel w0, wzr, w8, le
; CHECK-NEXT: ret
%cmp = icmp sge i32 %x2, %x1
%add = add nuw i32 %x0, %x1
More information about the llvm-commits
mailing list