[llvm] [AArch64] Use isKnownNonZero to optimize to cmn instead of cmp (PR #96349)

Fri Jun 21 12:30:20 PDT 2024

https://github.com/AtariDreams created https://github.com/llvm/llvm-project/pull/96349

None

>From 9ba051d5b520c9a56c88222fc11171ff6fb9853e Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Fri, 21 Jun 2024 15:12:41 -0400
Subject: [PATCH 1/2] Pre-commit tests (NFC)

---
 llvm/test/CodeGen/AArch64/cmp-chains.ll | 32 +++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/llvm/test/CodeGen/AArch64/cmp-chains.ll b/llvm/test/CodeGen/AArch64/cmp-chains.ll
index 14cb0c82b1c03..d51c9c946f467 100644
--- a/llvm/test/CodeGen/AArch64/cmp-chains.ll
+++ b/llvm/test/CodeGen/AArch64/cmp-chains.ll
@@ -258,3 +258,35 @@ define i32 @neg_range_int(i32 %a, i32 %b, i32 %c) {
   ret i32 %retval.0
 }
 
+; (b > -3 || a < -(c | 1))
+define i32 @neg_range_int_cmn(i32 %a, i32 %b, i32 %c) {
+; SDISEL-LABEL: neg_range_int_cmn:
+; SDISEL:       // %bb.0:
+; SDISEL-NEXT:    orr w8, w2, #0x1
+; SDISEL-NEXT:    neg w8, w8
+; SDISEL-NEXT:    cmp w8, w0
+; SDISEL-NEXT:    ccmn w1, #3, #0, le
+; SDISEL-NEXT:    csel w0, w1, w0, gt
+; SDISEL-NEXT:    ret
+;
+; GISEL-LABEL: neg_range_int_cmn:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    orr w8, w2, #0x1
+; GISEL-NEXT:    cmn w1, #3
+; GISEL-NEXT:    neg w8, w8
+; GISEL-NEXT:    cset w9, gt
+; GISEL-NEXT:    cmp w8, w0
+; GISEL-NEXT:    cset w8, gt
+; GISEL-NEXT:    orr w8, w9, w8
+; GISEL-NEXT:    and w8, w8, #0x1
+; GISEL-NEXT:    tst w8, #0x1
+; GISEL-NEXT:    csel w0, w1, w0, ne
+; GISEL-NEXT:    ret
+  %or = or i32 %c, 1
+  %sub = sub nsw i32 0, %or
+  %cmp = icmp sgt i32 %b, -3
+  %cmp1 = icmp sgt i32 %sub, %a
+  %1 = select i1 %cmp, i1 true, i1 %cmp1
+  %ret = select i1 %1, i32 %b, i32 %a
+  ret i32 %ret
+}

>From 27d7956cef30a308f1655f572eff7c1796892691 Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Fri, 21 Jun 2024 15:26:02 -0400
Subject: [PATCH 2/2] [AArch64] Use isKnownNonZero to optimize to cmn instead
 of cmp

---
 llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 14 ++++++++------
 llvm/test/CodeGen/AArch64/cmp-chains.ll         |  3 +--
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 0c834acacdca9..6b6cfe7e82a1e 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -3372,9 +3372,10 @@ static bool isLegalArithImmed(uint64_t C) {
 // So, finally, the only LLVM-native comparisons that don't mention C and V
 // are SETEQ and SETNE. They're the only ones we can safely use CMN for in
 // the absence of information about op2.
-static bool isCMN(SDValue Op, ISD::CondCode CC) {
+static bool isCMN(SDValue Op, ISD::CondCode CC, SelectionDAG &DAG) {
   return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)) &&
-         (CC == ISD::SETEQ || CC == ISD::SETNE);
+         (CC == ISD::SETEQ || CC == ISD::SETNE ||
+          DAG.isKnownNeverZero(Op.getOperand(1)));
 }
 
 static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &dl,
@@ -3419,11 +3420,11 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
   // register to WZR/XZR if it ends up being unused.
   unsigned Opcode = AArch64ISD::SUBS;
 
-  if (isCMN(RHS, CC)) {
+  if (isCMN(RHS, CC, DAG)) {
     // Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ?
     Opcode = AArch64ISD::ADDS;
     RHS = RHS.getOperand(1);
-  } else if (isCMN(LHS, CC)) {
+  } else if (isCMN(LHS, CC, DAG)) {
     // As we are looking for EQ/NE compares, the operands can be commuted ; can
     // we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
     Opcode = AArch64ISD::ADDS;
@@ -3527,7 +3528,8 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
     }
   } else if (RHS.getOpcode() == ISD::SUB) {
     SDValue SubOp0 = RHS.getOperand(0);
-    if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+    if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE ||
+                                   DAG.isKnownNeverZero(RHS.getOperand(1)))) {
       // See emitComparison() on why we can only do this for SETEQ and SETNE.
       Opcode = AArch64ISD::CCMN;
       RHS = RHS.getOperand(1);
@@ -3848,7 +3850,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
   // can be turned into:
   //    cmp     w12, w11, lsl #1
   if (!isa<ConstantSDNode>(RHS) || !isLegalArithImmed(RHS->getAsZExtVal())) {
-    SDValue TheLHS = isCMN(LHS, CC) ? LHS.getOperand(1) : LHS;
+    SDValue TheLHS = isCMN(LHS, CC, DAG) ? LHS.getOperand(1) : LHS;
 
     if (getCmpOperandFoldingProfit(TheLHS) > getCmpOperandFoldingProfit(RHS)) {
       std::swap(LHS, RHS);
diff --git a/llvm/test/CodeGen/AArch64/cmp-chains.ll b/llvm/test/CodeGen/AArch64/cmp-chains.ll
index d51c9c946f467..0713fdb7283a9 100644
--- a/llvm/test/CodeGen/AArch64/cmp-chains.ll
+++ b/llvm/test/CodeGen/AArch64/cmp-chains.ll
@@ -263,8 +263,7 @@ define i32 @neg_range_int_cmn(i32 %a, i32 %b, i32 %c) {
 ; SDISEL-LABEL: neg_range_int_cmn:
 ; SDISEL:       // %bb.0:
 ; SDISEL-NEXT:    orr w8, w2, #0x1
-; SDISEL-NEXT:    neg w8, w8
-; SDISEL-NEXT:    cmp w8, w0
+; SDISEL-NEXT:    cmn w8, w0
 ; SDISEL-NEXT:    ccmn w1, #3, #0, le
 ; SDISEL-NEXT:    csel w0, w1, w0, gt
 ; SDISEL-NEXT:    ret