[llvm] [AArch64] Eliminate Common SUBS by Reassociating Non-Constants (PR #123344)

Marius Kamp via llvm-commits llvm-commits at lists.llvm.org
Sun Jan 19 04:43:08 PST 2025


https://github.com/mskamp updated https://github.com/llvm/llvm-project/pull/123344

>From d40c111801fb9caf3ccd35ce0b32bde2fd4f1b45 Mon Sep 17 00:00:00 2001
From: Marius Kamp <msk at posteo.org>
Date: Sat, 11 Jan 2025 14:33:17 +0100
Subject: [PATCH 1/2] [AArch64] Add Tests for Reassociation of SUBS with
 Non-Constant; NFC

---
 llvm/test/CodeGen/AArch64/csel-cmp-cse.ll | 364 +++++++++++++++++++++-
 1 file changed, 348 insertions(+), 16 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/csel-cmp-cse.ll b/llvm/test/CodeGen/AArch64/csel-cmp-cse.ll
index d8904cc6e35e34..77c5b4ba0c453e 100644
--- a/llvm/test/CodeGen/AArch64/csel-cmp-cse.ll
+++ b/llvm/test/CodeGen/AArch64/csel-cmp-cse.ll
@@ -335,6 +335,306 @@ define i32 @test_eq0_multi_use_sub_i32(i32 %x0, i32 %x1) {
   ret i32 %ret
 }
 
+define i32 @test_eq_nonconst_sub_add_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_eq_nonconst_sub_add_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add w8, w0, w1
+; CHECK-NEXT:    cmp w1, w2
+; CHECK-NEXT:    sub w8, w8, w2
+; CHECK-NEXT:    csel w0, wzr, w8, eq
+; CHECK-NEXT:    ret
+  %cmp = icmp eq i32 %x1, %x2
+  %add = add nuw i32 %x0, %x1
+  %sub = sub i32 %add, %x2
+  %ret = select i1 %cmp, i32 0, i32 %sub
+  ret i32 %ret
+}
+
+define i32 @test_ne_nonconst_sub_add_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_ne_nonconst_sub_add_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add w8, w0, w1
+; CHECK-NEXT:    cmp w1, w2
+; CHECK-NEXT:    sub w8, w8, w2
+; CHECK-NEXT:    csel w0, wzr, w8, ne
+; CHECK-NEXT:    ret
+  %cmp = icmp ne i32 %x1, %x2
+  %add = add nuw i32 %x0, %x1
+  %sub = sub i32 %add, %x2
+  %ret = select i1 %cmp, i32 0, i32 %sub
+  ret i32 %ret
+}
+
+define i32 @test_ult_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_ult_nonconst_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add w8, w0, w1
+; CHECK-NEXT:    cmp w1, w2
+; CHECK-NEXT:    sub w8, w8, w2
+; CHECK-NEXT:    csel w0, wzr, w8, lo
+; CHECK-NEXT:    ret
+  %cmp = icmp ult i32 %x1, %x2
+  %add = add i32 %x0, %x1
+  %sub = sub i32 %add, %x2
+  %ret = select i1 %cmp, i32 0, i32 %sub
+  ret i32 %ret
+}
+
+define i32 @test_ule_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_ule_nonconst_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add w8, w0, w1
+; CHECK-NEXT:    cmp w1, w2
+; CHECK-NEXT:    sub w8, w8, w2
+; CHECK-NEXT:    csel w0, wzr, w8, ls
+; CHECK-NEXT:    ret
+  %cmp = icmp ule i32 %x1, %x2
+  %add = add i32 %x0, %x1
+  %sub = sub i32 %add, %x2
+  %ret = select i1 %cmp, i32 0, i32 %sub
+  ret i32 %ret
+}
+
+define i32 @test_ugt_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_ugt_nonconst_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add w8, w0, w1
+; CHECK-NEXT:    cmp w1, w2
+; CHECK-NEXT:    sub w8, w8, w2
+; CHECK-NEXT:    csel w0, wzr, w8, hi
+; CHECK-NEXT:    ret
+  %cmp = icmp ugt i32 %x1, %x2
+  %add = add i32 %x0, %x1
+  %sub = sub i32 %add, %x2
+  %ret = select i1 %cmp, i32 0, i32 %sub
+  ret i32 %ret
+}
+
+define i32 @test_uge_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_uge_nonconst_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add w8, w0, w1
+; CHECK-NEXT:    cmp w1, w2
+; CHECK-NEXT:    sub w8, w8, w2
+; CHECK-NEXT:    csel w0, wzr, w8, hs
+; CHECK-NEXT:    ret
+  %cmp = icmp uge i32 %x1, %x2
+  %add = add i32 %x0, %x1
+  %sub = sub i32 %add, %x2
+  %ret = select i1 %cmp, i32 0, i32 %sub
+  ret i32 %ret
+}
+
+define i32 @test_slt_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_slt_nonconst_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add w8, w0, w1
+; CHECK-NEXT:    cmp w1, w2
+; CHECK-NEXT:    sub w8, w8, w2
+; CHECK-NEXT:    csel w0, wzr, w8, lt
+; CHECK-NEXT:    ret
+  %cmp = icmp slt i32 %x1, %x2
+  %add = add i32 %x0, %x1
+  %sub = sub i32 %add, %x2
+  %ret = select i1 %cmp, i32 0, i32 %sub
+  ret i32 %ret
+}
+
+define i32 @test_sle_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_sle_nonconst_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add w8, w0, w1
+; CHECK-NEXT:    cmp w1, w2
+; CHECK-NEXT:    sub w8, w8, w2
+; CHECK-NEXT:    csel w0, wzr, w8, le
+; CHECK-NEXT:    ret
+  %cmp = icmp sle i32 %x1, %x2
+  %add = add i32 %x0, %x1
+  %sub = sub i32 %add, %x2
+  %ret = select i1 %cmp, i32 0, i32 %sub
+  ret i32 %ret
+}
+
+define i32 @test_sgt_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_sgt_nonconst_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add w8, w0, w1
+; CHECK-NEXT:    cmp w1, w2
+; CHECK-NEXT:    sub w8, w8, w2
+; CHECK-NEXT:    csel w0, wzr, w8, gt
+; CHECK-NEXT:    ret
+  %cmp = icmp sgt i32 %x1, %x2
+  %add = add i32 %x0, %x1
+  %sub = sub i32 %add, %x2
+  %ret = select i1 %cmp, i32 0, i32 %sub
+  ret i32 %ret
+}
+
+define i32 @test_sge_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_sge_nonconst_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add w8, w0, w1
+; CHECK-NEXT:    cmp w1, w2
+; CHECK-NEXT:    sub w8, w8, w2
+; CHECK-NEXT:    csel w0, wzr, w8, ge
+; CHECK-NEXT:    ret
+  %cmp = icmp sge i32 %x1, %x2
+  %add = add i32 %x0, %x1
+  %sub = sub i32 %add, %x2
+  %ret = select i1 %cmp, i32 0, i32 %sub
+  ret i32 %ret
+}
+
+define i32 @test_eq_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_eq_nonconst_sub_add_comm_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add w8, w0, w1
+; CHECK-NEXT:    cmp w2, w1
+; CHECK-NEXT:    sub w8, w8, w2
+; CHECK-NEXT:    csel w0, wzr, w8, eq
+; CHECK-NEXT:    ret
+  %cmp = icmp eq i32 %x2, %x1
+  %add = add nuw i32 %x0, %x1
+  %sub = sub i32 %add, %x2
+  %ret = select i1 %cmp, i32 0, i32 %sub
+  ret i32 %ret
+}
+
+define i32 @test_ne_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_ne_nonconst_sub_add_comm_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add w8, w0, w1
+; CHECK-NEXT:    cmp w2, w1
+; CHECK-NEXT:    sub w8, w8, w2
+; CHECK-NEXT:    csel w0, wzr, w8, ne
+; CHECK-NEXT:    ret
+  %cmp = icmp ne i32 %x2, %x1
+  %add = add nuw i32 %x0, %x1
+  %sub = sub i32 %add, %x2
+  %ret = select i1 %cmp, i32 0, i32 %sub
+  ret i32 %ret
+}
+
+define i32 @test_ult_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_ult_nonconst_sub_add_comm_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add w8, w0, w1
+; CHECK-NEXT:    cmp w2, w1
+; CHECK-NEXT:    sub w8, w8, w2
+; CHECK-NEXT:    csel w0, wzr, w8, lo
+; CHECK-NEXT:    ret
+  %cmp = icmp ult i32 %x2, %x1
+  %add = add nuw i32 %x0, %x1
+  %sub = sub i32 %add, %x2
+  %ret = select i1 %cmp, i32 0, i32 %sub
+  ret i32 %ret
+}
+
+define i32 @test_ule_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_ule_nonconst_sub_add_comm_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add w8, w0, w1
+; CHECK-NEXT:    cmp w2, w1
+; CHECK-NEXT:    sub w8, w8, w2
+; CHECK-NEXT:    csel w0, wzr, w8, ls
+; CHECK-NEXT:    ret
+  %cmp = icmp ule i32 %x2, %x1
+  %add = add nuw i32 %x0, %x1
+  %sub = sub i32 %add, %x2
+  %ret = select i1 %cmp, i32 0, i32 %sub
+  ret i32 %ret
+}
+
+define i32 @test_ugt_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_ugt_nonconst_sub_add_comm_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add w8, w0, w1
+; CHECK-NEXT:    cmp w2, w1
+; CHECK-NEXT:    sub w8, w8, w2
+; CHECK-NEXT:    csel w0, wzr, w8, hi
+; CHECK-NEXT:    ret
+  %cmp = icmp ugt i32 %x2, %x1
+  %add = add nuw i32 %x0, %x1
+  %sub = sub i32 %add, %x2
+  %ret = select i1 %cmp, i32 0, i32 %sub
+  ret i32 %ret
+}
+
+define i32 @test_uge_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_uge_nonconst_sub_add_comm_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add w8, w0, w1
+; CHECK-NEXT:    cmp w2, w1
+; CHECK-NEXT:    sub w8, w8, w2
+; CHECK-NEXT:    csel w0, wzr, w8, hs
+; CHECK-NEXT:    ret
+  %cmp = icmp uge i32 %x2, %x1
+  %add = add nuw i32 %x0, %x1
+  %sub = sub i32 %add, %x2
+  %ret = select i1 %cmp, i32 0, i32 %sub
+  ret i32 %ret
+}
+
+define i32 @test_slt_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_slt_nonconst_sub_add_comm_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add w8, w0, w1
+; CHECK-NEXT:    cmp w2, w1
+; CHECK-NEXT:    sub w8, w8, w2
+; CHECK-NEXT:    csel w0, wzr, w8, lt
+; CHECK-NEXT:    ret
+  %cmp = icmp slt i32 %x2, %x1
+  %add = add nuw i32 %x0, %x1
+  %sub = sub i32 %add, %x2
+  %ret = select i1 %cmp, i32 0, i32 %sub
+  ret i32 %ret
+}
+
+define i32 @test_sle_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_sle_nonconst_sub_add_comm_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add w8, w0, w1
+; CHECK-NEXT:    cmp w2, w1
+; CHECK-NEXT:    sub w8, w8, w2
+; CHECK-NEXT:    csel w0, wzr, w8, le
+; CHECK-NEXT:    ret
+  %cmp = icmp sle i32 %x2, %x1
+  %add = add nuw i32 %x0, %x1
+  %sub = sub i32 %add, %x2
+  %ret = select i1 %cmp, i32 0, i32 %sub
+  ret i32 %ret
+}
+
+define i32 @test_sgt_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_sgt_nonconst_sub_add_comm_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add w8, w0, w1
+; CHECK-NEXT:    cmp w2, w1
+; CHECK-NEXT:    sub w8, w8, w2
+; CHECK-NEXT:    csel w0, wzr, w8, gt
+; CHECK-NEXT:    ret
+  %cmp = icmp sgt i32 %x2, %x1
+  %add = add nuw i32 %x0, %x1
+  %sub = sub i32 %add, %x2
+  %ret = select i1 %cmp, i32 0, i32 %sub
+  ret i32 %ret
+}
+
+define i32 @test_sge_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_sge_nonconst_sub_add_comm_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add w8, w0, w1
+; CHECK-NEXT:    cmp w2, w1
+; CHECK-NEXT:    sub w8, w8, w2
+; CHECK-NEXT:    csel w0, wzr, w8, ge
+; CHECK-NEXT:    ret
+  %cmp = icmp sge i32 %x2, %x1
+  %add = add nuw i32 %x0, %x1
+  %sub = sub i32 %add, %x2
+  %ret = select i1 %cmp, i32 0, i32 %sub
+  ret i32 %ret
+}
+
 ; Negative test
 define i32 @test_eq0_multi_use_cmp_i32(i32 %x0, i32 %x1) {
 ; CHECK-LABEL: test_eq0_multi_use_cmp_i32:
@@ -421,22 +721,6 @@ define i32 @test_ugtsmax_sub_add_i32(i32 %x0, i32 %x1) {
   ret i32 %ret
 }
 
-; Negative test
-define i32 @test_ult_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
-; CHECK-LABEL: test_ult_nonconst_i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    add w8, w0, w1
-; CHECK-NEXT:    cmp w1, w2
-; CHECK-NEXT:    sub w8, w8, w2
-; CHECK-NEXT:    csel w0, wzr, w8, lo
-; CHECK-NEXT:    ret
-  %cmp = icmp ult i32 %x1, %x2
-  %add = add i32 %x0, %x1
-  %sub = sub i32 %add, %x2
-  %ret = select i1 %cmp, i32 0, i32 %sub
-  ret i32 %ret
-}
-
 ; Negative test
 define i32 @test_eq_const_mismatch_i32(i32 %x0, i32 %x1) {
 ; CHECK-LABEL: test_eq_const_mismatch_i32:
@@ -771,3 +1055,51 @@ define i32 @test_eq0_bitwidth_mismatch_2(i32 %x0, i64 %x1) {
   %ret = select i1 %cmp, i32 0, i32 %sub
   ret i32 %ret
 }
+
+; Negative test
+define i32 @test_ult_nonconst_op_mismatch_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_ult_nonconst_op_mismatch_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add w8, w0, w1
+; CHECK-NEXT:    cmp w1, w2
+; CHECK-NEXT:    add w8, w8, w2
+; CHECK-NEXT:    csel w0, wzr, w8, lo
+; CHECK-NEXT:    ret
+  %cmp = icmp ult i32 %x1, %x2
+  %add = add i32 %x0, %x1
+  %sub = add i32 %add, %x2
+  %ret = select i1 %cmp, i32 0, i32 %sub
+  ret i32 %ret
+}
+
+; Negative test
+define i32 @test_ult_nonconst_unrelated_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) {
+; CHECK-LABEL: test_ult_nonconst_unrelated_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add w8, w0, w1
+; CHECK-NEXT:    cmp w1, w2
+; CHECK-NEXT:    sub w8, w8, w3
+; CHECK-NEXT:    csel w0, wzr, w8, lo
+; CHECK-NEXT:    ret
+  %cmp = icmp ult i32 %x1, %x2
+  %add = add i32 %x0, %x1
+  %sub = sub i32 %add, %x3
+  %ret = select i1 %cmp, i32 0, i32 %sub
+  ret i32 %ret
+}
+
+; Negative test
+define i32 @test_ult_nonconst_unrelated_2_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) {
+; CHECK-LABEL: test_ult_nonconst_unrelated_2_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add w8, w0, w1
+; CHECK-NEXT:    cmp w2, w1
+; CHECK-NEXT:    sub w8, w8, w3
+; CHECK-NEXT:    csel w0, wzr, w8, lo
+; CHECK-NEXT:    ret
+  %cmp = icmp ult i32 %x2, %x1
+  %add = add i32 %x0, %x1
+  %sub = sub i32 %add, %x3
+  %ret = select i1 %cmp, i32 0, i32 %sub
+  ret i32 %ret
+}

>From f2222feb427a834602f77b8a3adc1e4db587aeb0 Mon Sep 17 00:00:00 2001
From: Marius Kamp <msk at posteo.org>
Date: Sat, 11 Jan 2025 16:10:06 +0100
Subject: [PATCH 2/2] [AArch64] Eliminate Common SUBS by Reassociating
 Non-Constants

Commit 1eed46960c217f9480865702f06fb730c7521e61 added logic to
reassociate a (add (add x y) -c) operand to a CSEL instruction with a
comparison involving x and c (or a similar constant) in order to obtain
a common (SUBS x c) instruction.

This commit extends this logic to non-constants. In this way, we also
reassociate a (sub (add x y) z) operand of a CSEL instruction to
(add (sub x z) y) if the CSEL compares x and z, for example.

Alive proof: https://alive2.llvm.org/ce/z/SEVpRm
---
 .../Target/AArch64/AArch64ISelLowering.cpp    |  63 +++++++---
 llvm/test/CodeGen/AArch64/csel-cmp-cse.ll     | 116 ++++++++----------
 2 files changed, 95 insertions(+), 84 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index e4715018d84caf..c66a59869b798e 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -24898,16 +24898,34 @@ static SDValue reassociateCSELOperandsForCSE(SDNode *N, SelectionDAG &DAG) {
   SDValue SubsNode = N->getOperand(3);
   if (SubsNode.getOpcode() != AArch64ISD::SUBS || !SubsNode.hasOneUse())
     return SDValue();
-  auto *CmpOpConst = dyn_cast<ConstantSDNode>(SubsNode.getOperand(1));
-  if (!CmpOpConst)
-    return SDValue();
 
+  SDValue CmpOpToMatch = SubsNode.getOperand(1);
   SDValue CmpOpOther = SubsNode.getOperand(0);
   EVT VT = N->getValueType(0);
 
+  unsigned ExpectedOpcode;
+  std::function<bool(SDValue)> CheckOp;
+  SDValue SubsOp;
+  auto *CmpOpConst = dyn_cast<ConstantSDNode>(CmpOpToMatch);
+  if (CmpOpConst) {
+    ExpectedOpcode = ISD::ADD;
+    CheckOp = [&](SDValue Op) {
+      auto *AddOpConst = dyn_cast<ConstantSDNode>(Op);
+      return AddOpConst &&
+             AddOpConst->getAPIntValue() == -CmpOpConst->getAPIntValue();
+    };
+    SubsOp = DAG.getConstant(CmpOpConst->getAPIntValue(), SDLoc(CmpOpConst),
+                             CmpOpConst->getValueType(0));
+  } else {
+    ExpectedOpcode = ISD::SUB;
+    CheckOp = [&](SDValue Op) { return Op == CmpOpToMatch; };
+    SubsOp = CmpOpToMatch;
+  }
+
   // Get the operand that can be reassociated with the SUBS instruction.
-  auto GetReassociationOp = [&](SDValue Op, APInt ExpectedConst) {
-    if (Op.getOpcode() != ISD::ADD)
+  auto GetReassociationOp = [&](SDValue Op,
+                                std::function<bool(SDValue)> CheckOp) {
+    if (Op.getOpcode() != ExpectedOpcode)
       return SDValue();
     if (Op.getOperand(0).getOpcode() != ISD::ADD ||
         !Op.getOperand(0).hasOneUse())
@@ -24918,24 +24936,21 @@ static SDValue reassociateCSELOperandsForCSE(SDNode *N, SelectionDAG &DAG) {
       std::swap(X, Y);
     if (X != CmpOpOther)
       return SDValue();
-    auto *AddOpConst = dyn_cast<ConstantSDNode>(Op.getOperand(1));
-    if (!AddOpConst || AddOpConst->getAPIntValue() != ExpectedConst)
+    if (!CheckOp(Op.getOperand(1)))
       return SDValue();
     return Y;
   };
 
   // Try the reassociation using the given constant and condition code.
-  auto Fold = [&](APInt NewCmpConst, AArch64CC::CondCode NewCC) {
-    APInt ExpectedConst = -NewCmpConst;
-    SDValue TReassocOp = GetReassociationOp(N->getOperand(0), ExpectedConst);
-    SDValue FReassocOp = GetReassociationOp(N->getOperand(1), ExpectedConst);
+  auto Fold = [&](AArch64CC::CondCode NewCC,
+                  std::function<bool(SDValue)> CheckOp, SDValue SubsOp) {
+    SDValue TReassocOp = GetReassociationOp(N->getOperand(0), CheckOp);
+    SDValue FReassocOp = GetReassociationOp(N->getOperand(1), CheckOp);
     if (!TReassocOp && !FReassocOp)
       return SDValue();
 
     SDValue NewCmp = DAG.getNode(AArch64ISD::SUBS, SDLoc(SubsNode),
-                                 DAG.getVTList(VT, MVT_CC), CmpOpOther,
-                                 DAG.getConstant(NewCmpConst, SDLoc(CmpOpConst),
-                                                 CmpOpConst->getValueType(0)));
+                                 DAG.getVTList(VT, MVT_CC), CmpOpOther, SubsOp);
 
     auto Reassociate = [&](SDValue ReassocOp, unsigned OpNum) {
       if (!ReassocOp)
@@ -24957,9 +24972,19 @@ static SDValue reassociateCSELOperandsForCSE(SDNode *N, SelectionDAG &DAG) {
 
   // First, try to eliminate the compare instruction by searching for a
   // subtraction with the same constant.
-  if (SDValue R = Fold(CmpOpConst->getAPIntValue(), CC))
+  if (SDValue R = Fold(CC, CheckOp, SubsOp))
     return R;
 
+  if (!CmpOpConst) {
+    // Try again with the operands of the SUBS instruction and the condition
+    // swapped. Due to canonicalization, this only helps for non-constant
+    // operands of the SUBS instruction.
+    std::swap(CmpOpToMatch, CmpOpOther);
+    if (SDValue R = Fold(getSwappedCondition(CC), CheckOp, CmpOpToMatch))
+      return R;
+    return SDValue();
+  }
+
   if ((CC == AArch64CC::EQ || CC == AArch64CC::NE) && !CmpOpConst->isZero())
     return SDValue();
 
@@ -24971,7 +24996,13 @@ static SDValue reassociateCSELOperandsForCSE(SDNode *N, SelectionDAG &DAG) {
   // them here but check for them nevertheless to be on the safe side.
   auto CheckedFold = [&](bool Check, APInt NewCmpConst,
                          AArch64CC::CondCode NewCC) {
-    return Check ? Fold(NewCmpConst, NewCC) : SDValue();
+    auto CheckOp = [=](SDValue Op) {
+      auto *AddOpConst = dyn_cast<ConstantSDNode>(Op);
+      return AddOpConst && AddOpConst->getAPIntValue() == -NewCmpConst;
+    };
+    auto SubsOp = DAG.getConstant(NewCmpConst, SDLoc(CmpOpConst),
+                                  CmpOpConst->getValueType(0));
+    return Check ? Fold(NewCC, CheckOp, SubsOp) : SDValue();
   };
   switch (CC) {
   case AArch64CC::EQ:
diff --git a/llvm/test/CodeGen/AArch64/csel-cmp-cse.ll b/llvm/test/CodeGen/AArch64/csel-cmp-cse.ll
index 77c5b4ba0c453e..42b1656fa78725 100644
--- a/llvm/test/CodeGen/AArch64/csel-cmp-cse.ll
+++ b/llvm/test/CodeGen/AArch64/csel-cmp-cse.ll
@@ -338,9 +338,8 @@ define i32 @test_eq0_multi_use_sub_i32(i32 %x0, i32 %x1) {
 define i32 @test_eq_nonconst_sub_add_i32(i32 %x0, i32 %x1, i32 %x2) {
 ; CHECK-LABEL: test_eq_nonconst_sub_add_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add w8, w0, w1
-; CHECK-NEXT:    cmp w1, w2
-; CHECK-NEXT:    sub w8, w8, w2
+; CHECK-NEXT:    subs w8, w1, w2
+; CHECK-NEXT:    add w8, w8, w0
 ; CHECK-NEXT:    csel w0, wzr, w8, eq
 ; CHECK-NEXT:    ret
   %cmp = icmp eq i32 %x1, %x2
@@ -353,9 +352,8 @@ define i32 @test_eq_nonconst_sub_add_i32(i32 %x0, i32 %x1, i32 %x2) {
 define i32 @test_ne_nonconst_sub_add_i32(i32 %x0, i32 %x1, i32 %x2) {
 ; CHECK-LABEL: test_ne_nonconst_sub_add_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add w8, w0, w1
-; CHECK-NEXT:    cmp w1, w2
-; CHECK-NEXT:    sub w8, w8, w2
+; CHECK-NEXT:    subs w8, w1, w2
+; CHECK-NEXT:    add w8, w8, w0
 ; CHECK-NEXT:    csel w0, wzr, w8, ne
 ; CHECK-NEXT:    ret
   %cmp = icmp ne i32 %x1, %x2
@@ -368,9 +366,8 @@ define i32 @test_ne_nonconst_sub_add_i32(i32 %x0, i32 %x1, i32 %x2) {
 define i32 @test_ult_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
 ; CHECK-LABEL: test_ult_nonconst_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add w8, w0, w1
-; CHECK-NEXT:    cmp w1, w2
-; CHECK-NEXT:    sub w8, w8, w2
+; CHECK-NEXT:    subs w8, w1, w2
+; CHECK-NEXT:    add w8, w8, w0
 ; CHECK-NEXT:    csel w0, wzr, w8, lo
 ; CHECK-NEXT:    ret
   %cmp = icmp ult i32 %x1, %x2
@@ -383,9 +380,8 @@ define i32 @test_ult_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
 define i32 @test_ule_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
 ; CHECK-LABEL: test_ule_nonconst_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add w8, w0, w1
-; CHECK-NEXT:    cmp w1, w2
-; CHECK-NEXT:    sub w8, w8, w2
+; CHECK-NEXT:    subs w8, w1, w2
+; CHECK-NEXT:    add w8, w8, w0
 ; CHECK-NEXT:    csel w0, wzr, w8, ls
 ; CHECK-NEXT:    ret
   %cmp = icmp ule i32 %x1, %x2
@@ -398,9 +394,8 @@ define i32 @test_ule_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
 define i32 @test_ugt_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
 ; CHECK-LABEL: test_ugt_nonconst_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add w8, w0, w1
-; CHECK-NEXT:    cmp w1, w2
-; CHECK-NEXT:    sub w8, w8, w2
+; CHECK-NEXT:    subs w8, w1, w2
+; CHECK-NEXT:    add w8, w8, w0
 ; CHECK-NEXT:    csel w0, wzr, w8, hi
 ; CHECK-NEXT:    ret
   %cmp = icmp ugt i32 %x1, %x2
@@ -413,9 +408,8 @@ define i32 @test_ugt_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
 define i32 @test_uge_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
 ; CHECK-LABEL: test_uge_nonconst_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add w8, w0, w1
-; CHECK-NEXT:    cmp w1, w2
-; CHECK-NEXT:    sub w8, w8, w2
+; CHECK-NEXT:    subs w8, w1, w2
+; CHECK-NEXT:    add w8, w8, w0
 ; CHECK-NEXT:    csel w0, wzr, w8, hs
 ; CHECK-NEXT:    ret
   %cmp = icmp uge i32 %x1, %x2
@@ -428,9 +422,8 @@ define i32 @test_uge_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
 define i32 @test_slt_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
 ; CHECK-LABEL: test_slt_nonconst_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add w8, w0, w1
-; CHECK-NEXT:    cmp w1, w2
-; CHECK-NEXT:    sub w8, w8, w2
+; CHECK-NEXT:    subs w8, w1, w2
+; CHECK-NEXT:    add w8, w8, w0
 ; CHECK-NEXT:    csel w0, wzr, w8, lt
 ; CHECK-NEXT:    ret
   %cmp = icmp slt i32 %x1, %x2
@@ -443,9 +436,8 @@ define i32 @test_slt_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
 define i32 @test_sle_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
 ; CHECK-LABEL: test_sle_nonconst_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add w8, w0, w1
-; CHECK-NEXT:    cmp w1, w2
-; CHECK-NEXT:    sub w8, w8, w2
+; CHECK-NEXT:    subs w8, w1, w2
+; CHECK-NEXT:    add w8, w8, w0
 ; CHECK-NEXT:    csel w0, wzr, w8, le
 ; CHECK-NEXT:    ret
   %cmp = icmp sle i32 %x1, %x2
@@ -458,9 +450,8 @@ define i32 @test_sle_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
 define i32 @test_sgt_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
 ; CHECK-LABEL: test_sgt_nonconst_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add w8, w0, w1
-; CHECK-NEXT:    cmp w1, w2
-; CHECK-NEXT:    sub w8, w8, w2
+; CHECK-NEXT:    subs w8, w1, w2
+; CHECK-NEXT:    add w8, w8, w0
 ; CHECK-NEXT:    csel w0, wzr, w8, gt
 ; CHECK-NEXT:    ret
   %cmp = icmp sgt i32 %x1, %x2
@@ -473,9 +464,8 @@ define i32 @test_sgt_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
 define i32 @test_sge_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
 ; CHECK-LABEL: test_sge_nonconst_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add w8, w0, w1
-; CHECK-NEXT:    cmp w1, w2
-; CHECK-NEXT:    sub w8, w8, w2
+; CHECK-NEXT:    subs w8, w1, w2
+; CHECK-NEXT:    add w8, w8, w0
 ; CHECK-NEXT:    csel w0, wzr, w8, ge
 ; CHECK-NEXT:    ret
   %cmp = icmp sge i32 %x1, %x2
@@ -488,9 +478,8 @@ define i32 @test_sge_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
 define i32 @test_eq_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
 ; CHECK-LABEL: test_eq_nonconst_sub_add_comm_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add w8, w0, w1
-; CHECK-NEXT:    cmp w2, w1
-; CHECK-NEXT:    sub w8, w8, w2
+; CHECK-NEXT:    subs w8, w1, w2
+; CHECK-NEXT:    add w8, w8, w0
 ; CHECK-NEXT:    csel w0, wzr, w8, eq
 ; CHECK-NEXT:    ret
   %cmp = icmp eq i32 %x2, %x1
@@ -503,9 +492,8 @@ define i32 @test_eq_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
 define i32 @test_ne_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
 ; CHECK-LABEL: test_ne_nonconst_sub_add_comm_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add w8, w0, w1
-; CHECK-NEXT:    cmp w2, w1
-; CHECK-NEXT:    sub w8, w8, w2
+; CHECK-NEXT:    subs w8, w1, w2
+; CHECK-NEXT:    add w8, w8, w0
 ; CHECK-NEXT:    csel w0, wzr, w8, ne
 ; CHECK-NEXT:    ret
   %cmp = icmp ne i32 %x2, %x1
@@ -518,10 +506,9 @@ define i32 @test_ne_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
 define i32 @test_ult_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
 ; CHECK-LABEL: test_ult_nonconst_sub_add_comm_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add w8, w0, w1
-; CHECK-NEXT:    cmp w2, w1
-; CHECK-NEXT:    sub w8, w8, w2
-; CHECK-NEXT:    csel w0, wzr, w8, lo
+; CHECK-NEXT:    subs w8, w1, w2
+; CHECK-NEXT:    add w8, w8, w0
+; CHECK-NEXT:    csel w0, wzr, w8, hi
 ; CHECK-NEXT:    ret
   %cmp = icmp ult i32 %x2, %x1
   %add = add nuw i32 %x0, %x1
@@ -533,10 +520,9 @@ define i32 @test_ult_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
 define i32 @test_ule_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
 ; CHECK-LABEL: test_ule_nonconst_sub_add_comm_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add w8, w0, w1
-; CHECK-NEXT:    cmp w2, w1
-; CHECK-NEXT:    sub w8, w8, w2
-; CHECK-NEXT:    csel w0, wzr, w8, ls
+; CHECK-NEXT:    subs w8, w1, w2
+; CHECK-NEXT:    add w8, w8, w0
+; CHECK-NEXT:    csel w0, wzr, w8, hs
 ; CHECK-NEXT:    ret
   %cmp = icmp ule i32 %x2, %x1
   %add = add nuw i32 %x0, %x1
@@ -548,10 +534,9 @@ define i32 @test_ule_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
 define i32 @test_ugt_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
 ; CHECK-LABEL: test_ugt_nonconst_sub_add_comm_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add w8, w0, w1
-; CHECK-NEXT:    cmp w2, w1
-; CHECK-NEXT:    sub w8, w8, w2
-; CHECK-NEXT:    csel w0, wzr, w8, hi
+; CHECK-NEXT:    subs w8, w1, w2
+; CHECK-NEXT:    add w8, w8, w0
+; CHECK-NEXT:    csel w0, wzr, w8, lo
 ; CHECK-NEXT:    ret
   %cmp = icmp ugt i32 %x2, %x1
   %add = add nuw i32 %x0, %x1
@@ -563,10 +548,9 @@ define i32 @test_ugt_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
 define i32 @test_uge_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
 ; CHECK-LABEL: test_uge_nonconst_sub_add_comm_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add w8, w0, w1
-; CHECK-NEXT:    cmp w2, w1
-; CHECK-NEXT:    sub w8, w8, w2
-; CHECK-NEXT:    csel w0, wzr, w8, hs
+; CHECK-NEXT:    subs w8, w1, w2
+; CHECK-NEXT:    add w8, w8, w0
+; CHECK-NEXT:    csel w0, wzr, w8, ls
 ; CHECK-NEXT:    ret
   %cmp = icmp uge i32 %x2, %x1
   %add = add nuw i32 %x0, %x1
@@ -578,10 +562,9 @@ define i32 @test_uge_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
 define i32 @test_slt_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
 ; CHECK-LABEL: test_slt_nonconst_sub_add_comm_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add w8, w0, w1
-; CHECK-NEXT:    cmp w2, w1
-; CHECK-NEXT:    sub w8, w8, w2
-; CHECK-NEXT:    csel w0, wzr, w8, lt
+; CHECK-NEXT:    subs w8, w1, w2
+; CHECK-NEXT:    add w8, w8, w0
+; CHECK-NEXT:    csel w0, wzr, w8, gt
 ; CHECK-NEXT:    ret
   %cmp = icmp slt i32 %x2, %x1
   %add = add nuw i32 %x0, %x1
@@ -593,10 +576,9 @@ define i32 @test_slt_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
 define i32 @test_sle_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
 ; CHECK-LABEL: test_sle_nonconst_sub_add_comm_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add w8, w0, w1
-; CHECK-NEXT:    cmp w2, w1
-; CHECK-NEXT:    sub w8, w8, w2
-; CHECK-NEXT:    csel w0, wzr, w8, le
+; CHECK-NEXT:    subs w8, w1, w2
+; CHECK-NEXT:    add w8, w8, w0
+; CHECK-NEXT:    csel w0, wzr, w8, ge
 ; CHECK-NEXT:    ret
   %cmp = icmp sle i32 %x2, %x1
   %add = add nuw i32 %x0, %x1
@@ -608,10 +590,9 @@ define i32 @test_sle_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
 define i32 @test_sgt_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
 ; CHECK-LABEL: test_sgt_nonconst_sub_add_comm_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add w8, w0, w1
-; CHECK-NEXT:    cmp w2, w1
-; CHECK-NEXT:    sub w8, w8, w2
-; CHECK-NEXT:    csel w0, wzr, w8, gt
+; CHECK-NEXT:    subs w8, w1, w2
+; CHECK-NEXT:    add w8, w8, w0
+; CHECK-NEXT:    csel w0, wzr, w8, lt
 ; CHECK-NEXT:    ret
   %cmp = icmp sgt i32 %x2, %x1
   %add = add nuw i32 %x0, %x1
@@ -623,10 +604,9 @@ define i32 @test_sgt_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
 define i32 @test_sge_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
 ; CHECK-LABEL: test_sge_nonconst_sub_add_comm_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add w8, w0, w1
-; CHECK-NEXT:    cmp w2, w1
-; CHECK-NEXT:    sub w8, w8, w2
-; CHECK-NEXT:    csel w0, wzr, w8, ge
+; CHECK-NEXT:    subs w8, w1, w2
+; CHECK-NEXT:    add w8, w8, w0
+; CHECK-NEXT:    csel w0, wzr, w8, le
 ; CHECK-NEXT:    ret
   %cmp = icmp sge i32 %x2, %x1
   %add = add nuw i32 %x0, %x1



More information about the llvm-commits mailing list