[llvm] [AArch64] Lower abds and abdu on AArch64 (PR #159085)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Sep 20 08:12:26 PDT 2025
https://github.com/AZero13 updated https://github.com/llvm/llvm-project/pull/159085
>From 70a825c116e769ee12b76cab17ad92854770c6b3 Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Sat, 13 Sep 2025 14:47:36 -0400
Subject: [PATCH 1/5] Remove CSEL a, b, cc, SUBS(SUB(x,y), 0) -> CSEL a, b, cc,
SUBS(x,y)
This should be done in peephole, not in DAG.
---
.../Target/AArch64/AArch64ISelLowering.cpp | 23 --
llvm/test/CodeGen/AArch64/abds-neg.ll | 15 +-
llvm/test/CodeGen/AArch64/abds.ll | 41 ++--
llvm/test/CodeGen/AArch64/abdu-neg.ll | 15 +-
llvm/test/CodeGen/AArch64/abdu.ll | 41 ++--
llvm/test/CodeGen/AArch64/alias_mask.ll | 214 +++++++++---------
.../CodeGen/AArch64/csel-subs-dag-combine.ll | 112 ---------
llvm/test/CodeGen/AArch64/midpoint-int.ll | 90 ++++----
8 files changed, 228 insertions(+), 323 deletions(-)
delete mode 100644 llvm/test/CodeGen/AArch64/csel-subs-dag-combine.ll
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index cd7f0e719ad0c..86511ee192f11 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -25815,29 +25815,6 @@ static SDValue performCSELCombine(SDNode *N,
}
}
- // CSEL a, b, cc, SUBS(SUB(x,y), 0) -> CSEL a, b, cc, SUBS(x,y) if cc doesn't
- // use overflow flags, to avoid the comparison with zero. In case of success,
- // this also replaces the original SUB(x,y) with the newly created SUBS(x,y).
- // NOTE: Perhaps in the future use performFlagSettingCombine to replace SUB
- // nodes with their SUBS equivalent as is already done for other flag-setting
- // operators, in which case doing the replacement here becomes redundant.
- if (Cond.getOpcode() == AArch64ISD::SUBS && Cond->hasNUsesOfValue(1, 1) &&
- isNullConstant(Cond.getOperand(1))) {
- SDValue Sub = Cond.getOperand(0);
- AArch64CC::CondCode CC =
- static_cast<AArch64CC::CondCode>(N->getConstantOperandVal(2));
- if (Sub.getOpcode() == ISD::SUB &&
- (CC == AArch64CC::EQ || CC == AArch64CC::NE || CC == AArch64CC::MI ||
- CC == AArch64CC::PL)) {
- SDLoc DL(N);
- SDValue Subs = DAG.getNode(AArch64ISD::SUBS, DL, Cond->getVTList(),
- Sub.getOperand(0), Sub.getOperand(1));
- DCI.CombineTo(Sub.getNode(), Subs);
- DCI.CombineTo(Cond.getNode(), Subs, Subs.getValue(1));
- return SDValue(N, 0);
- }
- }
-
// CSEL (LASTB P, Z), X, NE(ANY P) -> CLASTB P, X, Z
if (SDValue CondLast = foldCSELofLASTB(N, DAG))
return CondLast;
diff --git a/llvm/test/CodeGen/AArch64/abds-neg.ll b/llvm/test/CodeGen/AArch64/abds-neg.ll
index 37319642f5b34..2f74b8bb7de05 100644
--- a/llvm/test/CodeGen/AArch64/abds-neg.ll
+++ b/llvm/test/CodeGen/AArch64/abds-neg.ll
@@ -9,7 +9,8 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_ext_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: sxtb w8, w0
-; CHECK-NEXT: subs w8, w8, w1, sxtb
+; CHECK-NEXT: sub w8, w8, w1, sxtb
+; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%aext = sext i8 %a to i64
@@ -25,7 +26,8 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i8_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: sxtb w8, w0
-; CHECK-NEXT: subs w8, w8, w1, sxth
+; CHECK-NEXT: sub w8, w8, w1, sxth
+; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%aext = sext i8 %a to i64
@@ -41,7 +43,8 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_ext_i8_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: sxtb w8, w0
-; CHECK-NEXT: subs w8, w8, w1, sxtb
+; CHECK-NEXT: sub w8, w8, w1, sxtb
+; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%aext = sext i8 %a to i64
@@ -57,7 +60,8 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: sxth w8, w0
-; CHECK-NEXT: subs w8, w8, w1, sxth
+; CHECK-NEXT: sub w8, w8, w1, sxth
+; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%aext = sext i16 %a to i64
@@ -89,7 +93,8 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i16_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: sxth w8, w0
-; CHECK-NEXT: subs w8, w8, w1, sxth
+; CHECK-NEXT: sub w8, w8, w1, sxth
+; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%aext = sext i16 %a to i64
diff --git a/llvm/test/CodeGen/AArch64/abds.ll b/llvm/test/CodeGen/AArch64/abds.ll
index 1ef1c1c68c7bb..96c2a879c631c 100644
--- a/llvm/test/CodeGen/AArch64/abds.ll
+++ b/llvm/test/CodeGen/AArch64/abds.ll
@@ -9,7 +9,8 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_ext_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: sxtb w8, w0
-; CHECK-NEXT: subs w8, w8, w1, sxtb
+; CHECK-NEXT: sub w8, w8, w1, sxtb
+; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%aext = sext i8 %a to i64
@@ -24,7 +25,8 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i8_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: sxtb w8, w0
-; CHECK-NEXT: subs w8, w8, w1, sxth
+; CHECK-NEXT: sub w8, w8, w1, sxth
+; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%aext = sext i8 %a to i64
@@ -39,7 +41,8 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_ext_i8_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: sxtb w8, w0
-; CHECK-NEXT: subs w8, w8, w1, sxtb
+; CHECK-NEXT: sub w8, w8, w1, sxtb
+; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%aext = sext i8 %a to i64
@@ -54,7 +57,8 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: sxth w8, w0
-; CHECK-NEXT: subs w8, w8, w1, sxth
+; CHECK-NEXT: sub w8, w8, w1, sxth
+; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%aext = sext i16 %a to i64
@@ -84,7 +88,8 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i16_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: sxth w8, w0
-; CHECK-NEXT: subs w8, w8, w1, sxth
+; CHECK-NEXT: sub w8, w8, w1, sxth
+; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%aext = sext i16 %a to i64
@@ -209,7 +214,8 @@ define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_minmax_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: sxtb w8, w0
-; CHECK-NEXT: subs w8, w8, w1, sxtb
+; CHECK-NEXT: sub w8, w8, w1, sxtb
+; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%min = call i8 @llvm.smin.i8(i8 %a, i8 %b)
@@ -222,7 +228,8 @@ define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_minmax_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: sxth w8, w0
-; CHECK-NEXT: subs w8, w8, w1, sxth
+; CHECK-NEXT: sub w8, w8, w1, sxth
+; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%min = call i16 @llvm.smin.i16(i16 %a, i16 %b)
@@ -279,7 +286,8 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_cmp_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: sxtb w8, w0
-; CHECK-NEXT: subs w8, w8, w1, sxtb
+; CHECK-NEXT: sub w8, w8, w1, sxtb
+; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%cmp = icmp sgt i8 %a, %b
@@ -293,7 +301,8 @@ define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_cmp_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: sxth w8, w0
-; CHECK-NEXT: subs w8, w8, w1, sxth
+; CHECK-NEXT: sub w8, w8, w1, sxth
+; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%cmp = icmp sge i16 %a, %b
@@ -497,8 +506,12 @@ define i64 @vector_legalized(i16 %a, i16 %b) {
; CHECK-LABEL: vector_legalized:
; CHECK: // %bb.0:
; CHECK-NEXT: sxth w8, w0
-; CHECK-NEXT: subs w8, w8, w1, sxth
-; CHECK-NEXT: cneg w0, w8, mi
+; CHECK-NEXT: sub w8, w8, w1, sxth
+; CHECK-NEXT: addp d0, v0.2d
+; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: cneg w8, w8, mi
+; CHECK-NEXT: fmov x9, d0
+; CHECK-NEXT: add x0, x9, x8
; CHECK-NEXT: ret
%ea = sext i16 %a to i32
%eb = sext i16 %b to i32
@@ -518,7 +531,8 @@ define i8 @abd_select_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_select_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: sxtb w8, w0
-; CHECK-NEXT: subs w8, w8, w1, sxtb
+; CHECK-NEXT: sub w8, w8, w1, sxtb
+; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%cmp = icmp slt i8 %a, %b
@@ -532,7 +546,8 @@ define i16 @abd_select_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_select_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: sxth w8, w0
-; CHECK-NEXT: subs w8, w8, w1, sxth
+; CHECK-NEXT: sub w8, w8, w1, sxth
+; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%cmp = icmp sle i16 %a, %b
diff --git a/llvm/test/CodeGen/AArch64/abdu-neg.ll b/llvm/test/CodeGen/AArch64/abdu-neg.ll
index 79fc12ea76f63..98613627f7af1 100644
--- a/llvm/test/CodeGen/AArch64/abdu-neg.ll
+++ b/llvm/test/CodeGen/AArch64/abdu-neg.ll
@@ -9,7 +9,8 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_ext_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: subs w8, w8, w1, uxtb
+; CHECK-NEXT: sub w8, w8, w1, uxtb
+; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%aext = zext i8 %a to i64
@@ -25,7 +26,8 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i8_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: subs w8, w8, w1, uxth
+; CHECK-NEXT: sub w8, w8, w1, uxth
+; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%aext = zext i8 %a to i64
@@ -41,7 +43,8 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_ext_i8_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: subs w8, w8, w1, uxtb
+; CHECK-NEXT: sub w8, w8, w1, uxtb
+; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%aext = zext i8 %a to i64
@@ -57,7 +60,8 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: subs w8, w8, w1, uxth
+; CHECK-NEXT: sub w8, w8, w1, uxth
+; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%aext = zext i16 %a to i64
@@ -89,7 +93,8 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i16_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: subs w8, w8, w1, uxth
+; CHECK-NEXT: sub w8, w8, w1, uxth
+; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%aext = zext i16 %a to i64
diff --git a/llvm/test/CodeGen/AArch64/abdu.ll b/llvm/test/CodeGen/AArch64/abdu.ll
index 6db7693fb3a1c..801e44247e6a6 100644
--- a/llvm/test/CodeGen/AArch64/abdu.ll
+++ b/llvm/test/CodeGen/AArch64/abdu.ll
@@ -9,7 +9,8 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_ext_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: subs w8, w8, w1, uxtb
+; CHECK-NEXT: sub w8, w8, w1, uxtb
+; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%aext = zext i8 %a to i64
@@ -24,7 +25,8 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i8_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: subs w8, w8, w1, uxth
+; CHECK-NEXT: sub w8, w8, w1, uxth
+; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%aext = zext i8 %a to i64
@@ -39,7 +41,8 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_ext_i8_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: subs w8, w8, w1, uxtb
+; CHECK-NEXT: sub w8, w8, w1, uxtb
+; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%aext = zext i8 %a to i64
@@ -54,7 +57,8 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: subs w8, w8, w1, uxth
+; CHECK-NEXT: sub w8, w8, w1, uxth
+; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%aext = zext i16 %a to i64
@@ -84,7 +88,8 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i16_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: subs w8, w8, w1, uxth
+; CHECK-NEXT: sub w8, w8, w1, uxth
+; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%aext = zext i16 %a to i64
@@ -213,7 +218,8 @@ define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_minmax_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: subs w8, w8, w1, uxtb
+; CHECK-NEXT: sub w8, w8, w1, uxtb
+; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%min = call i8 @llvm.umin.i8(i8 %a, i8 %b)
@@ -226,7 +232,8 @@ define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_minmax_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: subs w8, w8, w1, uxth
+; CHECK-NEXT: sub w8, w8, w1, uxth
+; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%min = call i16 @llvm.umin.i16(i16 %a, i16 %b)
@@ -285,7 +292,8 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_cmp_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: subs w8, w8, w1, uxtb
+; CHECK-NEXT: sub w8, w8, w1, uxtb
+; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%cmp = icmp ugt i8 %a, %b
@@ -299,7 +307,8 @@ define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_cmp_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: subs w8, w8, w1, uxth
+; CHECK-NEXT: sub w8, w8, w1, uxth
+; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%cmp = icmp uge i16 %a, %b
@@ -362,8 +371,12 @@ define i64 @vector_legalized(i16 %a, i16 %b) {
; CHECK-LABEL: vector_legalized:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: subs w8, w8, w1, uxth
-; CHECK-NEXT: cneg w0, w8, mi
+; CHECK-NEXT: sub w8, w8, w1, uxth
+; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: addp d0, v0.2d
+; CHECK-NEXT: cneg w8, w8, mi
+; CHECK-NEXT: fmov x9, d0
+; CHECK-NEXT: add x0, x9, x8
; CHECK-NEXT: ret
%ea = zext i16 %a to i32
%eb = zext i16 %b to i32
@@ -383,7 +396,8 @@ define i8 @abd_select_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_select_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: subs w8, w8, w1, uxtb
+; CHECK-NEXT: sub w8, w8, w1, uxtb
+; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%cmp = icmp ult i8 %a, %b
@@ -397,7 +411,8 @@ define i16 @abd_select_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_select_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: subs w8, w8, w1, uxth
+; CHECK-NEXT: sub w8, w8, w1, uxth
+; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%cmp = icmp ule i16 %a, %b
diff --git a/llvm/test/CodeGen/AArch64/alias_mask.ll b/llvm/test/CodeGen/AArch64/alias_mask.ll
index 9b9c020016bab..c5d3677366480 100644
--- a/llvm/test/CodeGen/AArch64/alias_mask.ll
+++ b/llvm/test/CodeGen/AArch64/alias_mask.ll
@@ -393,70 +393,71 @@ entry:
define <32 x i1> @whilewr_32_expand3(ptr %a, ptr %b) {
; CHECK-LABEL: whilewr_32_expand3:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: subs x9, x1, x0
+; CHECK-NEXT: sub x10, x1, x0
; CHECK-NEXT: index z0.d, #0, #1
-; CHECK-NEXT: add x10, x9, #3
-; CHECK-NEXT: sub x11, x9, #61
-; CHECK-NEXT: csel x10, x10, x9, mi
-; CHECK-NEXT: subs x9, x9, #64
-; CHECK-NEXT: csel x9, x11, x9, mi
-; CHECK-NEXT: asr x10, x10, #2
-; CHECK-NEXT: asr x9, x9, #2
+; CHECK-NEXT: sub x9, x10, #61
+; CHECK-NEXT: subs x11, x10, #64
+; CHECK-NEXT: add x12, x10, #3
+; CHECK-NEXT: csel x9, x9, x11, mi
+; CHECK-NEXT: asr x11, x9, #2
; CHECK-NEXT: mov z1.d, z0.d
; CHECK-NEXT: mov z2.d, z0.d
; CHECK-NEXT: mov z3.d, z0.d
+; CHECK-NEXT: cmp x11, #1
; CHECK-NEXT: mov z4.d, z0.d
; CHECK-NEXT: mov z5.d, z0.d
+; CHECK-NEXT: cset w9, lt
+; CHECK-NEXT: cmp x10, #0
; CHECK-NEXT: mov z6.d, z0.d
-; CHECK-NEXT: dup v7.2d, x10
-; CHECK-NEXT: dup v16.2d, x9
+; CHECK-NEXT: csel x10, x12, x10, mi
+; CHECK-NEXT: dup v7.2d, x11
; CHECK-NEXT: add z1.d, z1.d, #12 // =0xc
+; CHECK-NEXT: asr x10, x10, #2
; CHECK-NEXT: add z2.d, z2.d, #10 // =0xa
-; CHECK-NEXT: cmp x9, #1
; CHECK-NEXT: add z3.d, z3.d, #8 // =0x8
; CHECK-NEXT: add z4.d, z4.d, #6 // =0x6
; CHECK-NEXT: add z5.d, z5.d, #4 // =0x4
; CHECK-NEXT: add z6.d, z6.d, #2 // =0x2
+; CHECK-NEXT: dup v16.2d, x10
; CHECK-NEXT: cmhi v17.2d, v7.2d, v0.2d
-; CHECK-NEXT: cmhi v18.2d, v16.2d, v0.2d
-; CHECK-NEXT: add z0.d, z0.d, #14 // =0xe
; CHECK-NEXT: cmhi v19.2d, v7.2d, v1.2d
; CHECK-NEXT: cmhi v20.2d, v7.2d, v2.2d
; CHECK-NEXT: cmhi v21.2d, v7.2d, v3.2d
+; CHECK-NEXT: cmp x10, #1
; CHECK-NEXT: cmhi v22.2d, v7.2d, v4.2d
-; CHECK-NEXT: cmhi v23.2d, v7.2d, v5.2d
-; CHECK-NEXT: cmhi v24.2d, v7.2d, v6.2d
+; CHECK-NEXT: cset w10, lt
+; CHECK-NEXT: cmhi v18.2d, v16.2d, v0.2d
+; CHECK-NEXT: add z0.d, z0.d, #14 // =0xe
; CHECK-NEXT: cmhi v1.2d, v16.2d, v1.2d
; CHECK-NEXT: cmhi v2.2d, v16.2d, v2.2d
; CHECK-NEXT: cmhi v3.2d, v16.2d, v3.2d
; CHECK-NEXT: cmhi v4.2d, v16.2d, v4.2d
-; CHECK-NEXT: cmhi v7.2d, v7.2d, v0.2d
-; CHECK-NEXT: cmhi v5.2d, v16.2d, v5.2d
-; CHECK-NEXT: cmhi v6.2d, v16.2d, v6.2d
-; CHECK-NEXT: cset w9, lt
-; CHECK-NEXT: cmhi v0.2d, v16.2d, v0.2d
-; CHECK-NEXT: uzp1 v16.4s, v21.4s, v20.4s
-; CHECK-NEXT: cmp x10, #1
-; CHECK-NEXT: uzp1 v20.4s, v23.4s, v22.4s
-; CHECK-NEXT: uzp1 v17.4s, v17.4s, v24.4s
-; CHECK-NEXT: cset w10, lt
+; CHECK-NEXT: cmhi v23.2d, v16.2d, v5.2d
+; CHECK-NEXT: cmhi v24.2d, v16.2d, v6.2d
+; CHECK-NEXT: cmhi v5.2d, v7.2d, v5.2d
+; CHECK-NEXT: cmhi v16.2d, v16.2d, v0.2d
+; CHECK-NEXT: cmhi v6.2d, v7.2d, v6.2d
+; CHECK-NEXT: cmhi v0.2d, v7.2d, v0.2d
+; CHECK-NEXT: uzp1 v7.4s, v21.4s, v20.4s
; CHECK-NEXT: uzp1 v2.4s, v3.4s, v2.4s
-; CHECK-NEXT: uzp1 v3.4s, v19.4s, v7.4s
-; CHECK-NEXT: uzp1 v4.4s, v5.4s, v4.4s
-; CHECK-NEXT: uzp1 v5.4s, v18.4s, v6.4s
-; CHECK-NEXT: uzp1 v0.4s, v1.4s, v0.4s
-; CHECK-NEXT: uzp1 v1.8h, v17.8h, v20.8h
-; CHECK-NEXT: uzp1 v3.8h, v16.8h, v3.8h
-; CHECK-NEXT: uzp1 v4.8h, v5.8h, v4.8h
-; CHECK-NEXT: uzp1 v0.8h, v2.8h, v0.8h
-; CHECK-NEXT: dup v2.16b, w10
-; CHECK-NEXT: uzp1 v1.16b, v1.16b, v3.16b
-; CHECK-NEXT: dup v3.16b, w9
+; CHECK-NEXT: uzp1 v3.4s, v23.4s, v4.4s
+; CHECK-NEXT: uzp1 v4.4s, v18.4s, v24.4s
+; CHECK-NEXT: uzp1 v5.4s, v5.4s, v22.4s
+; CHECK-NEXT: uzp1 v1.4s, v1.4s, v16.4s
+; CHECK-NEXT: uzp1 v6.4s, v17.4s, v6.4s
+; CHECK-NEXT: uzp1 v0.4s, v19.4s, v0.4s
+; CHECK-NEXT: uzp1 v3.8h, v4.8h, v3.8h
+; CHECK-NEXT: uzp1 v1.8h, v2.8h, v1.8h
+; CHECK-NEXT: uzp1 v2.8h, v6.8h, v5.8h
+; CHECK-NEXT: uzp1 v0.8h, v7.8h, v0.8h
+; CHECK-NEXT: uzp1 v1.16b, v3.16b, v1.16b
+; CHECK-NEXT: uzp1 v0.16b, v2.16b, v0.16b
+; CHECK-NEXT: dup v3.16b, w10
+; CHECK-NEXT: dup v2.16b, w9
; CHECK-NEXT: adrp x9, .LCPI14_0
-; CHECK-NEXT: uzp1 v0.16b, v4.16b, v0.16b
-; CHECK-NEXT: orr v1.16b, v1.16b, v2.16b
+; CHECK-NEXT: orr v1.16b, v1.16b, v3.16b
+; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI14_0]
-; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
; CHECK-NEXT: shl v1.16b, v1.16b, #7
; CHECK-NEXT: shl v0.16b, v0.16b, #7
; CHECK-NEXT: cmlt v1.16b, v1.16b, #0
@@ -586,70 +587,71 @@ entry:
define <32 x i1> @whilewr_64_expand4(ptr %a, ptr %b) {
; CHECK-LABEL: whilewr_64_expand4:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: subs x9, x1, x0
+; CHECK-NEXT: sub x10, x1, x0
; CHECK-NEXT: index z0.d, #0, #1
-; CHECK-NEXT: add x10, x9, #7
-; CHECK-NEXT: sub x11, x9, #121
-; CHECK-NEXT: csel x10, x10, x9, mi
-; CHECK-NEXT: subs x9, x9, #128
-; CHECK-NEXT: csel x9, x11, x9, mi
-; CHECK-NEXT: asr x10, x10, #3
-; CHECK-NEXT: asr x9, x9, #3
+; CHECK-NEXT: sub x9, x10, #121
+; CHECK-NEXT: subs x11, x10, #128
+; CHECK-NEXT: add x12, x10, #7
+; CHECK-NEXT: csel x9, x9, x11, mi
+; CHECK-NEXT: asr x11, x9, #3
; CHECK-NEXT: mov z1.d, z0.d
; CHECK-NEXT: mov z2.d, z0.d
; CHECK-NEXT: mov z3.d, z0.d
+; CHECK-NEXT: cmp x11, #1
; CHECK-NEXT: mov z4.d, z0.d
; CHECK-NEXT: mov z5.d, z0.d
+; CHECK-NEXT: cset w9, lt
+; CHECK-NEXT: cmp x10, #0
; CHECK-NEXT: mov z6.d, z0.d
-; CHECK-NEXT: dup v7.2d, x10
-; CHECK-NEXT: dup v16.2d, x9
+; CHECK-NEXT: csel x10, x12, x10, mi
+; CHECK-NEXT: dup v7.2d, x11
; CHECK-NEXT: add z1.d, z1.d, #12 // =0xc
+; CHECK-NEXT: asr x10, x10, #3
; CHECK-NEXT: add z2.d, z2.d, #10 // =0xa
-; CHECK-NEXT: cmp x9, #1
; CHECK-NEXT: add z3.d, z3.d, #8 // =0x8
; CHECK-NEXT: add z4.d, z4.d, #6 // =0x6
; CHECK-NEXT: add z5.d, z5.d, #4 // =0x4
; CHECK-NEXT: add z6.d, z6.d, #2 // =0x2
+; CHECK-NEXT: dup v16.2d, x10
; CHECK-NEXT: cmhi v17.2d, v7.2d, v0.2d
-; CHECK-NEXT: cmhi v18.2d, v16.2d, v0.2d
-; CHECK-NEXT: add z0.d, z0.d, #14 // =0xe
; CHECK-NEXT: cmhi v19.2d, v7.2d, v1.2d
; CHECK-NEXT: cmhi v20.2d, v7.2d, v2.2d
; CHECK-NEXT: cmhi v21.2d, v7.2d, v3.2d
+; CHECK-NEXT: cmp x10, #1
; CHECK-NEXT: cmhi v22.2d, v7.2d, v4.2d
-; CHECK-NEXT: cmhi v23.2d, v7.2d, v5.2d
-; CHECK-NEXT: cmhi v24.2d, v7.2d, v6.2d
+; CHECK-NEXT: cset w10, lt
+; CHECK-NEXT: cmhi v18.2d, v16.2d, v0.2d
+; CHECK-NEXT: add z0.d, z0.d, #14 // =0xe
; CHECK-NEXT: cmhi v1.2d, v16.2d, v1.2d
; CHECK-NEXT: cmhi v2.2d, v16.2d, v2.2d
; CHECK-NEXT: cmhi v3.2d, v16.2d, v3.2d
; CHECK-NEXT: cmhi v4.2d, v16.2d, v4.2d
-; CHECK-NEXT: cmhi v7.2d, v7.2d, v0.2d
-; CHECK-NEXT: cmhi v5.2d, v16.2d, v5.2d
-; CHECK-NEXT: cmhi v6.2d, v16.2d, v6.2d
-; CHECK-NEXT: cset w9, lt
-; CHECK-NEXT: cmhi v0.2d, v16.2d, v0.2d
-; CHECK-NEXT: uzp1 v16.4s, v21.4s, v20.4s
-; CHECK-NEXT: cmp x10, #1
-; CHECK-NEXT: uzp1 v20.4s, v23.4s, v22.4s
-; CHECK-NEXT: uzp1 v17.4s, v17.4s, v24.4s
-; CHECK-NEXT: cset w10, lt
+; CHECK-NEXT: cmhi v23.2d, v16.2d, v5.2d
+; CHECK-NEXT: cmhi v24.2d, v16.2d, v6.2d
+; CHECK-NEXT: cmhi v5.2d, v7.2d, v5.2d
+; CHECK-NEXT: cmhi v16.2d, v16.2d, v0.2d
+; CHECK-NEXT: cmhi v6.2d, v7.2d, v6.2d
+; CHECK-NEXT: cmhi v0.2d, v7.2d, v0.2d
+; CHECK-NEXT: uzp1 v7.4s, v21.4s, v20.4s
; CHECK-NEXT: uzp1 v2.4s, v3.4s, v2.4s
-; CHECK-NEXT: uzp1 v3.4s, v19.4s, v7.4s
-; CHECK-NEXT: uzp1 v4.4s, v5.4s, v4.4s
-; CHECK-NEXT: uzp1 v5.4s, v18.4s, v6.4s
-; CHECK-NEXT: uzp1 v0.4s, v1.4s, v0.4s
-; CHECK-NEXT: uzp1 v1.8h, v17.8h, v20.8h
-; CHECK-NEXT: uzp1 v3.8h, v16.8h, v3.8h
-; CHECK-NEXT: uzp1 v4.8h, v5.8h, v4.8h
-; CHECK-NEXT: uzp1 v0.8h, v2.8h, v0.8h
-; CHECK-NEXT: dup v2.16b, w10
-; CHECK-NEXT: uzp1 v1.16b, v1.16b, v3.16b
-; CHECK-NEXT: dup v3.16b, w9
+; CHECK-NEXT: uzp1 v3.4s, v23.4s, v4.4s
+; CHECK-NEXT: uzp1 v4.4s, v18.4s, v24.4s
+; CHECK-NEXT: uzp1 v5.4s, v5.4s, v22.4s
+; CHECK-NEXT: uzp1 v1.4s, v1.4s, v16.4s
+; CHECK-NEXT: uzp1 v6.4s, v17.4s, v6.4s
+; CHECK-NEXT: uzp1 v0.4s, v19.4s, v0.4s
+; CHECK-NEXT: uzp1 v3.8h, v4.8h, v3.8h
+; CHECK-NEXT: uzp1 v1.8h, v2.8h, v1.8h
+; CHECK-NEXT: uzp1 v2.8h, v6.8h, v5.8h
+; CHECK-NEXT: uzp1 v0.8h, v7.8h, v0.8h
+; CHECK-NEXT: uzp1 v1.16b, v3.16b, v1.16b
+; CHECK-NEXT: uzp1 v0.16b, v2.16b, v0.16b
+; CHECK-NEXT: dup v3.16b, w10
+; CHECK-NEXT: dup v2.16b, w9
; CHECK-NEXT: adrp x9, .LCPI18_0
-; CHECK-NEXT: uzp1 v0.16b, v4.16b, v0.16b
-; CHECK-NEXT: orr v1.16b, v1.16b, v2.16b
+; CHECK-NEXT: orr v1.16b, v1.16b, v3.16b
+; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI18_0]
-; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
; CHECK-NEXT: shl v1.16b, v1.16b, #7
; CHECK-NEXT: shl v0.16b, v0.16b, #7
; CHECK-NEXT: cmlt v1.16b, v1.16b, #0
@@ -790,11 +792,10 @@ entry:
define <1 x i1> @whilewr_8_scalarize(ptr %a, ptr %b) {
; CHECK-LABEL: whilewr_8_scalarize:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: subs x8, x1, x0
+; CHECK-NEXT: sub x8, x1, x0
; CHECK-NEXT: cmp x8, #0
-; CHECK-NEXT: cset w8, gt
-; CHECK-NEXT: cmp x1, x0
-; CHECK-NEXT: csinc w0, w8, wzr, ne
+; CHECK-NEXT: ccmp x8, #0, #4, le
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
entry:
%0 = call <1 x i1> @llvm.loop.dependence.war.mask.v1i1(ptr %a, ptr %b, i64 1)
@@ -804,11 +805,10 @@ entry:
define <1 x i1> @whilewr_16_scalarize(ptr %a, ptr %b) {
; CHECK-LABEL: whilewr_16_scalarize:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: subs x8, x1, x0
+; CHECK-NEXT: sub x8, x1, x0
; CHECK-NEXT: cmp x8, #1
-; CHECK-NEXT: cset w8, gt
-; CHECK-NEXT: cmp x1, x0
-; CHECK-NEXT: csinc w0, w8, wzr, ne
+; CHECK-NEXT: ccmp x8, #0, #4, le
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
entry:
%0 = call <1 x i1> @llvm.loop.dependence.war.mask.v1i1(ptr %a, ptr %b, i64 2)
@@ -818,11 +818,10 @@ entry:
define <1 x i1> @whilewr_32_scalarize(ptr %a, ptr %b) {
; CHECK-LABEL: whilewr_32_scalarize:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: subs x8, x1, x0
+; CHECK-NEXT: sub x8, x1, x0
; CHECK-NEXT: cmp x8, #3
-; CHECK-NEXT: cset w8, gt
-; CHECK-NEXT: cmp x1, x0
-; CHECK-NEXT: csinc w0, w8, wzr, ne
+; CHECK-NEXT: ccmp x8, #0, #4, le
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
entry:
%0 = call <1 x i1> @llvm.loop.dependence.war.mask.v1i1(ptr %a, ptr %b, i64 4)
@@ -832,11 +831,10 @@ entry:
define <1 x i1> @whilewr_64_scalarize(ptr %a, ptr %b) {
; CHECK-LABEL: whilewr_64_scalarize:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: subs x8, x1, x0
+; CHECK-NEXT: sub x8, x1, x0
; CHECK-NEXT: cmp x8, #7
-; CHECK-NEXT: cset w8, gt
-; CHECK-NEXT: cmp x1, x0
-; CHECK-NEXT: csinc w0, w8, wzr, ne
+; CHECK-NEXT: ccmp x8, #0, #4, le
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
entry:
%0 = call <1 x i1> @llvm.loop.dependence.war.mask.v1i1(ptr %a, ptr %b, i64 8)
@@ -846,11 +844,10 @@ entry:
define <1 x i1> @whilerw_8_scalarize(ptr %a, ptr %b) {
; CHECK-LABEL: whilerw_8_scalarize:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: subs x8, x1, x0
+; CHECK-NEXT: sub x8, x1, x0
; CHECK-NEXT: cmp x8, #0
-; CHECK-NEXT: cset w8, gt
-; CHECK-NEXT: cmp x1, x0
-; CHECK-NEXT: csinc w0, w8, wzr, ne
+; CHECK-NEXT: ccmp x8, #0, #4, le
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
entry:
%0 = call <1 x i1> @llvm.loop.dependence.raw.mask.v1i1(ptr %a, ptr %b, i64 1)
@@ -860,11 +857,10 @@ entry:
define <1 x i1> @whilerw_16_scalarize(ptr %a, ptr %b) {
; CHECK-LABEL: whilerw_16_scalarize:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: subs x8, x1, x0
+; CHECK-NEXT: sub x8, x1, x0
; CHECK-NEXT: cmp x8, #1
-; CHECK-NEXT: cset w8, gt
-; CHECK-NEXT: cmp x1, x0
-; CHECK-NEXT: csinc w0, w8, wzr, ne
+; CHECK-NEXT: ccmp x8, #0, #4, le
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
entry:
%0 = call <1 x i1> @llvm.loop.dependence.raw.mask.v1i1(ptr %a, ptr %b, i64 2)
@@ -874,11 +870,10 @@ entry:
define <1 x i1> @whilerw_32_scalarize(ptr %a, ptr %b) {
; CHECK-LABEL: whilerw_32_scalarize:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: subs x8, x1, x0
+; CHECK-NEXT: sub x8, x1, x0
; CHECK-NEXT: cmp x8, #3
-; CHECK-NEXT: cset w8, gt
-; CHECK-NEXT: cmp x1, x0
-; CHECK-NEXT: csinc w0, w8, wzr, ne
+; CHECK-NEXT: ccmp x8, #0, #4, le
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
entry:
%0 = call <1 x i1> @llvm.loop.dependence.raw.mask.v1i1(ptr %a, ptr %b, i64 4)
@@ -888,11 +883,10 @@ entry:
define <1 x i1> @whilerw_64_scalarize(ptr %a, ptr %b) {
; CHECK-LABEL: whilerw_64_scalarize:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: subs x8, x1, x0
+; CHECK-NEXT: sub x8, x1, x0
; CHECK-NEXT: cmp x8, #7
-; CHECK-NEXT: cset w8, gt
-; CHECK-NEXT: cmp x1, x0
-; CHECK-NEXT: csinc w0, w8, wzr, ne
+; CHECK-NEXT: ccmp x8, #0, #4, le
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
entry:
%0 = call <1 x i1> @llvm.loop.dependence.raw.mask.v1i1(ptr %a, ptr %b, i64 8)
diff --git a/llvm/test/CodeGen/AArch64/csel-subs-dag-combine.ll b/llvm/test/CodeGen/AArch64/csel-subs-dag-combine.ll
deleted file mode 100644
index 5036be9c45e69..0000000000000
--- a/llvm/test/CodeGen/AArch64/csel-subs-dag-combine.ll
+++ /dev/null
@@ -1,112 +0,0 @@
-; RUN: llc -debug-only=isel -o /dev/null < %s 2>&1 | FileCheck %s
-
-; REQUIRES: asserts
-
-; These tests ensure that we don't combine
-; CSEL a, b, cc, SUBS(SUB(x,y), 0) -> CSEL a, b, cc, SUBS(x,y)
-; if the flags set by SUBS(SUB(x,y), 0) have more than one use.
-;
-; This restriction exists because combining SUBS(SUB(x,y), 0) -> SUBS(x,y) is
-; only valid if there are no users of the overflow flags (C/V) generated by the
-; SUBS. Currently, we only check the flags used by the CSEL, and therefore we
-; conservatively reject cases where the SUBS's flags have other uses.
-
-target triple = "aarch64-unknown-linux-gnu"
-
-; CHECK-LABEL: Legalized selection DAG: %bb.0 'combine_subs:'
-; CHECK-NEXT: SelectionDAG has 13 nodes:
-; CHECK-NEXT: t0: ch,glue = EntryToken
-; CHECK-NEXT: t2: i32,ch = CopyFromReg t0, Register:i32 %0
-; CHECK-NEXT: t4: i32,ch = CopyFromReg t0, Register:i32 %1
-; CHECK-NEXT: t5: i32 = sub t2, t4
-; CHECK-NEXT: t14: i32,i32 = AArch64ISD::SUBS t5, Constant:i32<0>
-; CHECK-NEXT: t16: i32 = AArch64ISD::CSEL t2, t4, Constant:i32<1>, t14:1
-; CHECK-NEXT: t11: ch,glue = CopyToReg t0, Register:i32 $w0, t16
-; CHECK-NEXT: t12: ch = AArch64ISD::RET_GLUE t11, Register:i32 $w0, t11:1
-
-; CHECK-LABEL: Optimized legalized selection DAG: %bb.0 'combine_subs:'
-; CHECK-NEXT: SelectionDAG has 11 nodes:
-; CHECK-NEXT: t0: ch,glue = EntryToken
-; CHECK-NEXT: t2: i32,ch = CopyFromReg t0, Register:i32 %0
-; CHECK-NEXT: t4: i32,ch = CopyFromReg t0, Register:i32 %1
-; CHECK-NEXT: t18: i32,i32 = AArch64ISD::SUBS t2, t4
-; CHECK-NEXT: t16: i32 = AArch64ISD::CSEL t2, t4, Constant:i32<1>, t18:1
-; CHECK-NEXT: t11: ch,glue = CopyToReg t0, Register:i32 $w0, t16
-; CHECK-NEXT: t12: ch = AArch64ISD::RET_GLUE t11, Register:i32 $w0, t11:1
-
-define i32 @combine_subs(i32 %a, i32 %b) {
- %sub = sub i32 %a, %b
- %cc = icmp ne i32 %sub, 0
- %sel = select i1 %cc, i32 %a, i32 %b
- ret i32 %sel
-}
-
-; CHECK-LABEL: Legalized selection DAG: %bb.0 'combine_subs_multiple_sub_uses:'
-; CHECK-NEXT: SelectionDAG has 14 nodes:
-; CHECK-NEXT: t0: ch,glue = EntryToken
-; CHECK-NEXT: t2: i32,ch = CopyFromReg t0, Register:i32 %0
-; CHECK-NEXT: t4: i32,ch = CopyFromReg t0, Register:i32 %1
-; CHECK-NEXT: t5: i32 = sub t2, t4
-; CHECK-NEXT: t15: i32,i32 = AArch64ISD::SUBS t5, Constant:i32<0>
-; CHECK-NEXT: t17: i32 = AArch64ISD::CSEL t2, t4, Constant:i32<1>, t15:1
-; CHECK-NEXT: t10: i32 = add t17, t5
-; CHECK-NEXT: t12: ch,glue = CopyToReg t0, Register:i32 $w0, t10
-; CHECK-NEXT: t13: ch = AArch64ISD::RET_GLUE t12, Register:i32 $w0, t12:1
-
-; CHECK-LABEL: Optimized legalized selection DAG: %bb.0 'combine_subs_multiple_sub_uses:'
-; CHECK-NEXT: SelectionDAG has 12 nodes:
-; CHECK-NEXT: t0: ch,glue = EntryToken
-; CHECK-NEXT: t2: i32,ch = CopyFromReg t0, Register:i32 %0
-; CHECK-NEXT: t4: i32,ch = CopyFromReg t0, Register:i32 %1
-; CHECK-NEXT: t17: i32 = AArch64ISD::CSEL t2, t4, Constant:i32<1>, t19:1
-; CHECK-NEXT: t10: i32 = add t17, t19
-; CHECK-NEXT: t12: ch,glue = CopyToReg t0, Register:i32 $w0, t10
-; CHECK-NEXT: t19: i32,i32 = AArch64ISD::SUBS t2, t4
-; CHECK-NEXT: t13: ch = AArch64ISD::RET_GLUE t12, Register:i32 $w0, t12:1
-
-define i32 @combine_subs_multiple_sub_uses(i32 %a, i32 %b) {
- %sub = sub i32 %a, %b
- %cc = icmp ne i32 %sub, 0
- %sel = select i1 %cc, i32 %a, i32 %b
- %add = add i32 %sel, %sub
- ret i32 %add
-}
-
-; CHECK-LABEL: Legalized selection DAG: %bb.0 'do_not_combine_subs_multiple_flag_uses:'
-; CHECK-NEXT: SelectionDAG has 19 nodes:
-; CHECK-NEXT: t0: ch,glue = EntryToken
-; CHECK-NEXT: t2: i32,ch = CopyFromReg t0, Register:i32 %0
-; CHECK-NEXT: t4: i32,ch = CopyFromReg t0, Register:i32 %1
-; CHECK-NEXT: t24: i32 = AArch64ISD::CSEL t2, t4, Constant:i32<1>, t21:1
-; CHECK-NEXT: t6: i32,ch = CopyFromReg t0, Register:i32 %2
-; CHECK-NEXT: t8: i32,ch = CopyFromReg t0, Register:i32 %3
-; CHECK-NEXT: t23: i32 = AArch64ISD::CSEL t6, t8, Constant:i32<1>, t21:1
-; CHECK-NEXT: t15: i32 = add t24, t23
-; CHECK-NEXT: t17: ch,glue = CopyToReg t0, Register:i32 $w0, t15
-; CHECK-NEXT: t9: i32 = sub t2, t4
-; CHECK-NEXT: t21: i32,i32 = AArch64ISD::SUBS t9, Constant:i32<0>
-; CHECK-NEXT: t18: ch = AArch64ISD::RET_GLUE t17, Register:i32 $w0, t17:1
-
-; CHECK-LABEL: Optimized legalized selection DAG: %bb.0 'do_not_combine_subs_multiple_flag_uses:'
-; CHECK-NEXT: SelectionDAG has 19 nodes:
-; CHECK-NEXT: t0: ch,glue = EntryToken
-; CHECK-NEXT: t2: i32,ch = CopyFromReg t0, Register:i32 %0
-; CHECK-NEXT: t4: i32,ch = CopyFromReg t0, Register:i32 %1
-; CHECK-NEXT: t24: i32 = AArch64ISD::CSEL t2, t4, Constant:i32<1>, t21:1
-; CHECK-NEXT: t6: i32,ch = CopyFromReg t0, Register:i32 %2
-; CHECK-NEXT: t8: i32,ch = CopyFromReg t0, Register:i32 %3
-; CHECK-NEXT: t23: i32 = AArch64ISD::CSEL t6, t8, Constant:i32<1>, t21:1
-; CHECK-NEXT: t15: i32 = add t24, t23
-; CHECK-NEXT: t17: ch,glue = CopyToReg t0, Register:i32 $w0, t15
-; CHECK-NEXT: t9: i32 = sub t2, t4
-; CHECK-NEXT: t21: i32,i32 = AArch64ISD::SUBS t9, Constant:i32<0>
-; CHECK-NEXT: t18: ch = AArch64ISD::RET_GLUE t17, Register:i32 $w0, t17:1
-
-define i32 @do_not_combine_subs_multiple_flag_uses(i32 %a, i32 %b, i32 %c, i32 %d) {
- %sub = sub i32 %a, %b
- %cc = icmp ne i32 %sub, 0
- %sel = select i1 %cc, i32 %a, i32 %b
- %other = select i1 %cc, i32 %c, i32 %d
- %add = add i32 %sel, %other
- ret i32 %add
-}
diff --git a/llvm/test/CodeGen/AArch64/midpoint-int.ll b/llvm/test/CodeGen/AArch64/midpoint-int.ll
index 79bba5363188b..e2e12b9c1d4e6 100644
--- a/llvm/test/CodeGen/AArch64/midpoint-int.ll
+++ b/llvm/test/CodeGen/AArch64/midpoint-int.ll
@@ -255,13 +255,14 @@ define i64 @scalar_i64_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
define i16 @scalar_i16_signed_reg_reg(i16 %a1, i16 %a2) nounwind {
; CHECK-LABEL: scalar_i16_signed_reg_reg:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxth w9, w0
-; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: subs w9, w9, w1, sxth
-; CHECK-NEXT: cneg w8, w8, le
-; CHECK-NEXT: cneg w9, w9, mi
-; CHECK-NEXT: lsr w9, w9, #1
-; CHECK-NEXT: madd w0, w9, w8, w0
+; CHECK-NEXT: sxth w8, w0
+; CHECK-NEXT: mov w9, #-1 // =0xffffffff
+; CHECK-NEXT: subs w8, w8, w1, sxth
+; CHECK-NEXT: cneg w9, w9, le
+; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: cneg w8, w8, mi
+; CHECK-NEXT: lsr w8, w8, #1
+; CHECK-NEXT: madd w0, w8, w9, w0
; CHECK-NEXT: ret
%t3 = icmp sgt i16 %a1, %a2 ; signed
%t4 = select i1 %t3, i16 -1, i16 1
@@ -277,13 +278,14 @@ define i16 @scalar_i16_signed_reg_reg(i16 %a1, i16 %a2) nounwind {
define i16 @scalar_i16_unsigned_reg_reg(i16 %a1, i16 %a2) nounwind {
; CHECK-LABEL: scalar_i16_unsigned_reg_reg:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w9, w0, #0xffff
-; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: subs w9, w9, w1, uxth
-; CHECK-NEXT: cneg w8, w8, ls
-; CHECK-NEXT: cneg w9, w9, mi
-; CHECK-NEXT: lsr w9, w9, #1
-; CHECK-NEXT: madd w0, w9, w8, w0
+; CHECK-NEXT: and w8, w0, #0xffff
+; CHECK-NEXT: mov w9, #-1 // =0xffffffff
+; CHECK-NEXT: subs w8, w8, w1, uxth
+; CHECK-NEXT: cneg w9, w9, ls
+; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: cneg w8, w8, mi
+; CHECK-NEXT: lsr w8, w8, #1
+; CHECK-NEXT: madd w0, w8, w9, w0
; CHECK-NEXT: ret
%t3 = icmp ugt i16 %a1, %a2
%t4 = select i1 %t3, i16 -1, i16 1
@@ -301,13 +303,14 @@ define i16 @scalar_i16_unsigned_reg_reg(i16 %a1, i16 %a2) nounwind {
define i16 @scalar_i16_signed_mem_reg(ptr %a1_addr, i16 %a2) nounwind {
; CHECK-LABEL: scalar_i16_signed_mem_reg:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldrsh w9, [x0]
-; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: subs w10, w9, w1, sxth
-; CHECK-NEXT: cneg w8, w8, le
-; CHECK-NEXT: cneg w10, w10, mi
-; CHECK-NEXT: lsr w10, w10, #1
-; CHECK-NEXT: madd w0, w10, w8, w9
+; CHECK-NEXT: ldrsh w8, [x0]
+; CHECK-NEXT: mov w10, #-1 // =0xffffffff
+; CHECK-NEXT: subs w9, w8, w1, sxth
+; CHECK-NEXT: cneg w10, w10, le
+; CHECK-NEXT: cmp w9, #0
+; CHECK-NEXT: cneg w9, w9, mi
+; CHECK-NEXT: lsr w9, w9, #1
+; CHECK-NEXT: madd w0, w9, w10, w8
; CHECK-NEXT: ret
%a1 = load i16, ptr %a1_addr
%t3 = icmp sgt i16 %a1, %a2 ; signed
@@ -379,13 +382,14 @@ define i16 @scalar_i16_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
define i8 @scalar_i8_signed_reg_reg(i8 %a1, i8 %a2) nounwind {
; CHECK-LABEL: scalar_i8_signed_reg_reg:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxtb w9, w0
-; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: subs w9, w9, w1, sxtb
-; CHECK-NEXT: cneg w8, w8, le
-; CHECK-NEXT: cneg w9, w9, mi
-; CHECK-NEXT: lsr w9, w9, #1
-; CHECK-NEXT: madd w0, w9, w8, w0
+; CHECK-NEXT: sxtb w8, w0
+; CHECK-NEXT: mov w9, #-1 // =0xffffffff
+; CHECK-NEXT: subs w8, w8, w1, sxtb
+; CHECK-NEXT: cneg w9, w9, le
+; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: cneg w8, w8, mi
+; CHECK-NEXT: lsr w8, w8, #1
+; CHECK-NEXT: madd w0, w8, w9, w0
; CHECK-NEXT: ret
%t3 = icmp sgt i8 %a1, %a2 ; signed
%t4 = select i1 %t3, i8 -1, i8 1
@@ -401,13 +405,14 @@ define i8 @scalar_i8_signed_reg_reg(i8 %a1, i8 %a2) nounwind {
define i8 @scalar_i8_unsigned_reg_reg(i8 %a1, i8 %a2) nounwind {
; CHECK-LABEL: scalar_i8_unsigned_reg_reg:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w9, w0, #0xff
-; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: subs w9, w9, w1, uxtb
-; CHECK-NEXT: cneg w8, w8, ls
-; CHECK-NEXT: cneg w9, w9, mi
-; CHECK-NEXT: lsr w9, w9, #1
-; CHECK-NEXT: madd w0, w9, w8, w0
+; CHECK-NEXT: and w8, w0, #0xff
+; CHECK-NEXT: mov w9, #-1 // =0xffffffff
+; CHECK-NEXT: subs w8, w8, w1, uxtb
+; CHECK-NEXT: cneg w9, w9, ls
+; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: cneg w8, w8, mi
+; CHECK-NEXT: lsr w8, w8, #1
+; CHECK-NEXT: madd w0, w8, w9, w0
; CHECK-NEXT: ret
%t3 = icmp ugt i8 %a1, %a2
%t4 = select i1 %t3, i8 -1, i8 1
@@ -425,13 +430,14 @@ define i8 @scalar_i8_unsigned_reg_reg(i8 %a1, i8 %a2) nounwind {
define i8 @scalar_i8_signed_mem_reg(ptr %a1_addr, i8 %a2) nounwind {
; CHECK-LABEL: scalar_i8_signed_mem_reg:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldrsb w9, [x0]
-; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: subs w10, w9, w1, sxtb
-; CHECK-NEXT: cneg w8, w8, le
-; CHECK-NEXT: cneg w10, w10, mi
-; CHECK-NEXT: lsr w10, w10, #1
-; CHECK-NEXT: madd w0, w10, w8, w9
+; CHECK-NEXT: ldrsb w8, [x0]
+; CHECK-NEXT: mov w10, #-1 // =0xffffffff
+; CHECK-NEXT: subs w9, w8, w1, sxtb
+; CHECK-NEXT: cneg w10, w10, le
+; CHECK-NEXT: cmp w9, #0
+; CHECK-NEXT: cneg w9, w9, mi
+; CHECK-NEXT: lsr w9, w9, #1
+; CHECK-NEXT: madd w0, w9, w10, w8
; CHECK-NEXT: ret
%a1 = load i8, ptr %a1_addr
%t3 = icmp sgt i8 %a1, %a2 ; signed
>From 77cbcb45929a2fcf15dfdbf0fd14965bcad523ce Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Sat, 13 Sep 2025 20:51:04 -0400
Subject: [PATCH 2/5] [AArch64] Add missing sForms to sForm
---
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 16 ++++
llvm/test/CodeGen/AArch64/abds-neg.ll | 15 ++--
llvm/test/CodeGen/AArch64/abds.ll | 36 +++-----
llvm/test/CodeGen/AArch64/abdu-neg.ll | 15 ++--
llvm/test/CodeGen/AArch64/abdu.ll | 38 +++------
llvm/test/CodeGen/AArch64/abs.ll | 30 +++++++
llvm/test/CodeGen/AArch64/midpoint-int.ll | 90 +++++++++-----------
7 files changed, 123 insertions(+), 117 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 5a51c812732e6..6f469d73761ae 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1730,12 +1730,20 @@ static unsigned sForm(MachineInstr &Instr) {
case AArch64::ADDSWrr:
case AArch64::ADDSWri:
+ case AArch64::ADDSWrx:
case AArch64::ADDSXrr:
case AArch64::ADDSXri:
+ case AArch64::ADDSXrx:
case AArch64::SUBSWrr:
case AArch64::SUBSWri:
+ case AArch64::SUBSWrx:
case AArch64::SUBSXrr:
case AArch64::SUBSXri:
+ case AArch64::SUBSXrx:
+ case AArch64::ADCSWr:
+ case AArch64::ADCSXr:
+ case AArch64::SBCSWr:
+ case AArch64::SBCSXr:
return Instr.getOpcode();
case AArch64::ADDWrr:
@@ -1746,6 +1754,10 @@ static unsigned sForm(MachineInstr &Instr) {
return AArch64::ADDSXrr;
case AArch64::ADDXri:
return AArch64::ADDSXri;
+ case AArch64::ADDWrx:
+ return AArch64::ADDSWrx;
+ case AArch64::ADDXrx:
+ return AArch64::ADDSXrx;
case AArch64::ADCWr:
return AArch64::ADCSWr;
case AArch64::ADCXr:
@@ -1758,6 +1770,10 @@ static unsigned sForm(MachineInstr &Instr) {
return AArch64::SUBSXrr;
case AArch64::SUBXri:
return AArch64::SUBSXri;
+ case AArch64::SUBWrx:
+ return AArch64::SUBSWrx;
+ case AArch64::SUBXrx:
+ return AArch64::SUBSXrx;
case AArch64::SBCWr:
return AArch64::SBCSWr;
case AArch64::SBCXr:
diff --git a/llvm/test/CodeGen/AArch64/abds-neg.ll b/llvm/test/CodeGen/AArch64/abds-neg.ll
index 2f74b8bb7de05..37319642f5b34 100644
--- a/llvm/test/CodeGen/AArch64/abds-neg.ll
+++ b/llvm/test/CodeGen/AArch64/abds-neg.ll
@@ -9,8 +9,7 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_ext_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: sxtb w8, w0
-; CHECK-NEXT: sub w8, w8, w1, sxtb
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: subs w8, w8, w1, sxtb
; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%aext = sext i8 %a to i64
@@ -26,8 +25,7 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i8_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: sxtb w8, w0
-; CHECK-NEXT: sub w8, w8, w1, sxth
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: subs w8, w8, w1, sxth
; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%aext = sext i8 %a to i64
@@ -43,8 +41,7 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_ext_i8_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: sxtb w8, w0
-; CHECK-NEXT: sub w8, w8, w1, sxtb
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: subs w8, w8, w1, sxtb
; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%aext = sext i8 %a to i64
@@ -60,8 +57,7 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: sxth w8, w0
-; CHECK-NEXT: sub w8, w8, w1, sxth
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: subs w8, w8, w1, sxth
; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%aext = sext i16 %a to i64
@@ -93,8 +89,7 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i16_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: sxth w8, w0
-; CHECK-NEXT: sub w8, w8, w1, sxth
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: subs w8, w8, w1, sxth
; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%aext = sext i16 %a to i64
diff --git a/llvm/test/CodeGen/AArch64/abds.ll b/llvm/test/CodeGen/AArch64/abds.ll
index 96c2a879c631c..ffa566ddf922c 100644
--- a/llvm/test/CodeGen/AArch64/abds.ll
+++ b/llvm/test/CodeGen/AArch64/abds.ll
@@ -9,8 +9,7 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_ext_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: sxtb w8, w0
-; CHECK-NEXT: sub w8, w8, w1, sxtb
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: subs w8, w8, w1, sxtb
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%aext = sext i8 %a to i64
@@ -25,8 +24,7 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i8_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: sxtb w8, w0
-; CHECK-NEXT: sub w8, w8, w1, sxth
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: subs w8, w8, w1, sxth
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%aext = sext i8 %a to i64
@@ -41,8 +39,7 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_ext_i8_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: sxtb w8, w0
-; CHECK-NEXT: sub w8, w8, w1, sxtb
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: subs w8, w8, w1, sxtb
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%aext = sext i8 %a to i64
@@ -57,8 +54,7 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: sxth w8, w0
-; CHECK-NEXT: sub w8, w8, w1, sxth
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: subs w8, w8, w1, sxth
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%aext = sext i16 %a to i64
@@ -88,8 +84,7 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i16_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: sxth w8, w0
-; CHECK-NEXT: sub w8, w8, w1, sxth
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: subs w8, w8, w1, sxth
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%aext = sext i16 %a to i64
@@ -214,8 +209,7 @@ define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_minmax_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: sxtb w8, w0
-; CHECK-NEXT: sub w8, w8, w1, sxtb
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: subs w8, w8, w1, sxtb
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%min = call i8 @llvm.smin.i8(i8 %a, i8 %b)
@@ -228,8 +222,7 @@ define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_minmax_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: sxth w8, w0
-; CHECK-NEXT: sub w8, w8, w1, sxth
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: subs w8, w8, w1, sxth
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%min = call i16 @llvm.smin.i16(i16 %a, i16 %b)
@@ -286,8 +279,7 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_cmp_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: sxtb w8, w0
-; CHECK-NEXT: sub w8, w8, w1, sxtb
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: subs w8, w8, w1, sxtb
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%cmp = icmp sgt i8 %a, %b
@@ -301,8 +293,7 @@ define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_cmp_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: sxth w8, w0
-; CHECK-NEXT: sub w8, w8, w1, sxth
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: subs w8, w8, w1, sxth
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%cmp = icmp sge i16 %a, %b
@@ -506,9 +497,8 @@ define i64 @vector_legalized(i16 %a, i16 %b) {
; CHECK-LABEL: vector_legalized:
; CHECK: // %bb.0:
; CHECK-NEXT: sxth w8, w0
-; CHECK-NEXT: sub w8, w8, w1, sxth
+; CHECK-NEXT: subs w8, w8, w1, sxth
; CHECK-NEXT: addp d0, v0.2d
-; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cneg w8, w8, mi
; CHECK-NEXT: fmov x9, d0
; CHECK-NEXT: add x0, x9, x8
@@ -531,8 +521,7 @@ define i8 @abd_select_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_select_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: sxtb w8, w0
-; CHECK-NEXT: sub w8, w8, w1, sxtb
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: subs w8, w8, w1, sxtb
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%cmp = icmp slt i8 %a, %b
@@ -546,8 +535,7 @@ define i16 @abd_select_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_select_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: sxth w8, w0
-; CHECK-NEXT: sub w8, w8, w1, sxth
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: subs w8, w8, w1, sxth
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%cmp = icmp sle i16 %a, %b
diff --git a/llvm/test/CodeGen/AArch64/abdu-neg.ll b/llvm/test/CodeGen/AArch64/abdu-neg.ll
index 98613627f7af1..79fc12ea76f63 100644
--- a/llvm/test/CodeGen/AArch64/abdu-neg.ll
+++ b/llvm/test/CodeGen/AArch64/abdu-neg.ll
@@ -9,8 +9,7 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_ext_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: sub w8, w8, w1, uxtb
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: subs w8, w8, w1, uxtb
; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%aext = zext i8 %a to i64
@@ -26,8 +25,7 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i8_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: sub w8, w8, w1, uxth
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: subs w8, w8, w1, uxth
; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%aext = zext i8 %a to i64
@@ -43,8 +41,7 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_ext_i8_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: sub w8, w8, w1, uxtb
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: subs w8, w8, w1, uxtb
; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%aext = zext i8 %a to i64
@@ -60,8 +57,7 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: sub w8, w8, w1, uxth
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: subs w8, w8, w1, uxth
; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%aext = zext i16 %a to i64
@@ -93,8 +89,7 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i16_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: sub w8, w8, w1, uxth
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: subs w8, w8, w1, uxth
; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%aext = zext i16 %a to i64
diff --git a/llvm/test/CodeGen/AArch64/abdu.ll b/llvm/test/CodeGen/AArch64/abdu.ll
index 801e44247e6a6..3a1d5a89a9276 100644
--- a/llvm/test/CodeGen/AArch64/abdu.ll
+++ b/llvm/test/CodeGen/AArch64/abdu.ll
@@ -9,8 +9,7 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_ext_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: sub w8, w8, w1, uxtb
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: subs w8, w8, w1, uxtb
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%aext = zext i8 %a to i64
@@ -25,8 +24,7 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i8_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: sub w8, w8, w1, uxth
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: subs w8, w8, w1, uxth
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%aext = zext i8 %a to i64
@@ -41,8 +39,7 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_ext_i8_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: sub w8, w8, w1, uxtb
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: subs w8, w8, w1, uxtb
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%aext = zext i8 %a to i64
@@ -57,8 +54,7 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: sub w8, w8, w1, uxth
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: subs w8, w8, w1, uxth
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%aext = zext i16 %a to i64
@@ -88,8 +84,7 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i16_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: sub w8, w8, w1, uxth
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: subs w8, w8, w1, uxth
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%aext = zext i16 %a to i64
@@ -218,8 +213,7 @@ define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_minmax_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: sub w8, w8, w1, uxtb
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: subs w8, w8, w1, uxtb
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%min = call i8 @llvm.umin.i8(i8 %a, i8 %b)
@@ -232,8 +226,7 @@ define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_minmax_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: sub w8, w8, w1, uxth
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: subs w8, w8, w1, uxth
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%min = call i16 @llvm.umin.i16(i16 %a, i16 %b)
@@ -292,8 +285,7 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_cmp_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: sub w8, w8, w1, uxtb
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: subs w8, w8, w1, uxtb
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%cmp = icmp ugt i8 %a, %b
@@ -307,8 +299,7 @@ define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_cmp_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: sub w8, w8, w1, uxth
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: subs w8, w8, w1, uxth
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%cmp = icmp uge i16 %a, %b
@@ -371,10 +362,9 @@ define i64 @vector_legalized(i16 %a, i16 %b) {
; CHECK-LABEL: vector_legalized:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: sub w8, w8, w1, uxth
-; CHECK-NEXT: cmp w8, #0
-; CHECK-NEXT: addp d0, v0.2d
+; CHECK-NEXT: subs w8, w8, w1, uxth
; CHECK-NEXT: cneg w8, w8, mi
+; CHECK-NEXT: addp d0, v0.2d
; CHECK-NEXT: fmov x9, d0
; CHECK-NEXT: add x0, x9, x8
; CHECK-NEXT: ret
@@ -396,8 +386,7 @@ define i8 @abd_select_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_select_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: sub w8, w8, w1, uxtb
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: subs w8, w8, w1, uxtb
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%cmp = icmp ult i8 %a, %b
@@ -411,8 +400,7 @@ define i16 @abd_select_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_select_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: sub w8, w8, w1, uxth
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: subs w8, w8, w1, uxth
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%cmp = icmp ule i16 %a, %b
diff --git a/llvm/test/CodeGen/AArch64/abs.ll b/llvm/test/CodeGen/AArch64/abs.ll
index 0f56d25a47b2a..92d8ba242e0de 100644
--- a/llvm/test/CodeGen/AArch64/abs.ll
+++ b/llvm/test/CodeGen/AArch64/abs.ll
@@ -388,3 +388,33 @@ entry:
ret <3 x i32> %res
}
declare <3 x i32> @llvm.abs.v3i32(<3 x i32>, i1)
+
+define i32 @combine_subs_multiple_sub_uses(i32 %a, i32 %b) {
+; CHECK-LABEL: combine_subs_multiple_sub_uses:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subs w8, w0, w1
+; CHECK-NEXT: csel w9, w0, w1, ne
+; CHECK-NEXT: add w0, w9, w8
+; CHECK-NEXT: ret
+ %sub = sub i32 %a, %b
+ %cc = icmp ne i32 %sub, 0
+ %sel = select i1 %cc, i32 %a, i32 %b
+ %add = add i32 %sel, %sub
+ ret i32 %add
+}
+
+define i32 @do_not_combine_subs_multiple_flag_uses(i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK-LABEL: do_not_combine_subs_multiple_flag_uses:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmp w0, w1
+; CHECK-NEXT: csel w8, w0, w1, ne
+; CHECK-NEXT: csel w9, w2, w3, ne
+; CHECK-NEXT: add w0, w8, w9
+; CHECK-NEXT: ret
+ %sub = sub i32 %a, %b
+ %cc = icmp ne i32 %sub, 0
+ %sel = select i1 %cc, i32 %a, i32 %b
+ %other = select i1 %cc, i32 %c, i32 %d
+ %add = add i32 %sel, %other
+ ret i32 %add
+}
diff --git a/llvm/test/CodeGen/AArch64/midpoint-int.ll b/llvm/test/CodeGen/AArch64/midpoint-int.ll
index e2e12b9c1d4e6..79bba5363188b 100644
--- a/llvm/test/CodeGen/AArch64/midpoint-int.ll
+++ b/llvm/test/CodeGen/AArch64/midpoint-int.ll
@@ -255,14 +255,13 @@ define i64 @scalar_i64_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
define i16 @scalar_i16_signed_reg_reg(i16 %a1, i16 %a2) nounwind {
; CHECK-LABEL: scalar_i16_signed_reg_reg:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxth w8, w0
-; CHECK-NEXT: mov w9, #-1 // =0xffffffff
-; CHECK-NEXT: subs w8, w8, w1, sxth
-; CHECK-NEXT: cneg w9, w9, le
-; CHECK-NEXT: cmp w8, #0
-; CHECK-NEXT: cneg w8, w8, mi
-; CHECK-NEXT: lsr w8, w8, #1
-; CHECK-NEXT: madd w0, w8, w9, w0
+; CHECK-NEXT: sxth w9, w0
+; CHECK-NEXT: mov w8, #-1 // =0xffffffff
+; CHECK-NEXT: subs w9, w9, w1, sxth
+; CHECK-NEXT: cneg w8, w8, le
+; CHECK-NEXT: cneg w9, w9, mi
+; CHECK-NEXT: lsr w9, w9, #1
+; CHECK-NEXT: madd w0, w9, w8, w0
; CHECK-NEXT: ret
%t3 = icmp sgt i16 %a1, %a2 ; signed
%t4 = select i1 %t3, i16 -1, i16 1
@@ -278,14 +277,13 @@ define i16 @scalar_i16_signed_reg_reg(i16 %a1, i16 %a2) nounwind {
define i16 @scalar_i16_unsigned_reg_reg(i16 %a1, i16 %a2) nounwind {
; CHECK-LABEL: scalar_i16_unsigned_reg_reg:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: mov w9, #-1 // =0xffffffff
-; CHECK-NEXT: subs w8, w8, w1, uxth
-; CHECK-NEXT: cneg w9, w9, ls
-; CHECK-NEXT: cmp w8, #0
-; CHECK-NEXT: cneg w8, w8, mi
-; CHECK-NEXT: lsr w8, w8, #1
-; CHECK-NEXT: madd w0, w8, w9, w0
+; CHECK-NEXT: and w9, w0, #0xffff
+; CHECK-NEXT: mov w8, #-1 // =0xffffffff
+; CHECK-NEXT: subs w9, w9, w1, uxth
+; CHECK-NEXT: cneg w8, w8, ls
+; CHECK-NEXT: cneg w9, w9, mi
+; CHECK-NEXT: lsr w9, w9, #1
+; CHECK-NEXT: madd w0, w9, w8, w0
; CHECK-NEXT: ret
%t3 = icmp ugt i16 %a1, %a2
%t4 = select i1 %t3, i16 -1, i16 1
@@ -303,14 +301,13 @@ define i16 @scalar_i16_unsigned_reg_reg(i16 %a1, i16 %a2) nounwind {
define i16 @scalar_i16_signed_mem_reg(ptr %a1_addr, i16 %a2) nounwind {
; CHECK-LABEL: scalar_i16_signed_mem_reg:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldrsh w8, [x0]
-; CHECK-NEXT: mov w10, #-1 // =0xffffffff
-; CHECK-NEXT: subs w9, w8, w1, sxth
-; CHECK-NEXT: cneg w10, w10, le
-; CHECK-NEXT: cmp w9, #0
-; CHECK-NEXT: cneg w9, w9, mi
-; CHECK-NEXT: lsr w9, w9, #1
-; CHECK-NEXT: madd w0, w9, w10, w8
+; CHECK-NEXT: ldrsh w9, [x0]
+; CHECK-NEXT: mov w8, #-1 // =0xffffffff
+; CHECK-NEXT: subs w10, w9, w1, sxth
+; CHECK-NEXT: cneg w8, w8, le
+; CHECK-NEXT: cneg w10, w10, mi
+; CHECK-NEXT: lsr w10, w10, #1
+; CHECK-NEXT: madd w0, w10, w8, w9
; CHECK-NEXT: ret
%a1 = load i16, ptr %a1_addr
%t3 = icmp sgt i16 %a1, %a2 ; signed
@@ -382,14 +379,13 @@ define i16 @scalar_i16_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
define i8 @scalar_i8_signed_reg_reg(i8 %a1, i8 %a2) nounwind {
; CHECK-LABEL: scalar_i8_signed_reg_reg:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxtb w8, w0
-; CHECK-NEXT: mov w9, #-1 // =0xffffffff
-; CHECK-NEXT: subs w8, w8, w1, sxtb
-; CHECK-NEXT: cneg w9, w9, le
-; CHECK-NEXT: cmp w8, #0
-; CHECK-NEXT: cneg w8, w8, mi
-; CHECK-NEXT: lsr w8, w8, #1
-; CHECK-NEXT: madd w0, w8, w9, w0
+; CHECK-NEXT: sxtb w9, w0
+; CHECK-NEXT: mov w8, #-1 // =0xffffffff
+; CHECK-NEXT: subs w9, w9, w1, sxtb
+; CHECK-NEXT: cneg w8, w8, le
+; CHECK-NEXT: cneg w9, w9, mi
+; CHECK-NEXT: lsr w9, w9, #1
+; CHECK-NEXT: madd w0, w9, w8, w0
; CHECK-NEXT: ret
%t3 = icmp sgt i8 %a1, %a2 ; signed
%t4 = select i1 %t3, i8 -1, i8 1
@@ -405,14 +401,13 @@ define i8 @scalar_i8_signed_reg_reg(i8 %a1, i8 %a2) nounwind {
define i8 @scalar_i8_unsigned_reg_reg(i8 %a1, i8 %a2) nounwind {
; CHECK-LABEL: scalar_i8_unsigned_reg_reg:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: mov w9, #-1 // =0xffffffff
-; CHECK-NEXT: subs w8, w8, w1, uxtb
-; CHECK-NEXT: cneg w9, w9, ls
-; CHECK-NEXT: cmp w8, #0
-; CHECK-NEXT: cneg w8, w8, mi
-; CHECK-NEXT: lsr w8, w8, #1
-; CHECK-NEXT: madd w0, w8, w9, w0
+; CHECK-NEXT: and w9, w0, #0xff
+; CHECK-NEXT: mov w8, #-1 // =0xffffffff
+; CHECK-NEXT: subs w9, w9, w1, uxtb
+; CHECK-NEXT: cneg w8, w8, ls
+; CHECK-NEXT: cneg w9, w9, mi
+; CHECK-NEXT: lsr w9, w9, #1
+; CHECK-NEXT: madd w0, w9, w8, w0
; CHECK-NEXT: ret
%t3 = icmp ugt i8 %a1, %a2
%t4 = select i1 %t3, i8 -1, i8 1
@@ -430,14 +425,13 @@ define i8 @scalar_i8_unsigned_reg_reg(i8 %a1, i8 %a2) nounwind {
define i8 @scalar_i8_signed_mem_reg(ptr %a1_addr, i8 %a2) nounwind {
; CHECK-LABEL: scalar_i8_signed_mem_reg:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldrsb w8, [x0]
-; CHECK-NEXT: mov w10, #-1 // =0xffffffff
-; CHECK-NEXT: subs w9, w8, w1, sxtb
-; CHECK-NEXT: cneg w10, w10, le
-; CHECK-NEXT: cmp w9, #0
-; CHECK-NEXT: cneg w9, w9, mi
-; CHECK-NEXT: lsr w9, w9, #1
-; CHECK-NEXT: madd w0, w9, w10, w8
+; CHECK-NEXT: ldrsb w9, [x0]
+; CHECK-NEXT: mov w8, #-1 // =0xffffffff
+; CHECK-NEXT: subs w10, w9, w1, sxtb
+; CHECK-NEXT: cneg w8, w8, le
+; CHECK-NEXT: cneg w10, w10, mi
+; CHECK-NEXT: lsr w10, w10, #1
+; CHECK-NEXT: madd w0, w10, w8, w9
; CHECK-NEXT: ret
%a1 = load i8, ptr %a1_addr
%t3 = icmp sgt i8 %a1, %a2 ; signed
>From 086ff8c2efde77e6dce3f50b70759fe704f4428e Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Sat, 13 Sep 2025 22:09:33 -0400
Subject: [PATCH 3/5] Fix and mitigate alias mask
---
.../Target/AArch64/AArch64ISelLowering.cpp | 71 ++++++-
llvm/test/CodeGen/AArch64/alias_mask.ll | 178 +++++++++---------
llvm/test/CodeGen/AArch64/pr72777.ll | 4 +-
3 files changed, 152 insertions(+), 101 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 86511ee192f11..17918de5e2985 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -3712,7 +3712,8 @@ static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &DL,
}
static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
- const SDLoc &DL, SelectionDAG &DAG) {
+ const SDLoc &DL, SelectionDAG &DAG,
+ bool MIOrPLSupported = false) {
EVT VT = LHS.getValueType();
const bool FullFP16 = DAG.getSubtarget<AArch64Subtarget>().hasFullFP16();
@@ -3755,6 +3756,33 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
} else if (LHS.getOpcode() == AArch64ISD::ANDS) {
// Use result of ANDS
return LHS.getValue(1);
+ } else if (MIOrPLSupported) {
+ // For MIOrPLSupported, optimize SUB/ADD operations with zero comparison
+ if (LHS.getOpcode() == ISD::SUB && CC == ISD::SETLT) {
+ // SUB(x, y) < 0 -> SUBS(x, y)
+ return DAG
+ .getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, FlagsVT),
+ LHS.getOperand(0), LHS.getOperand(1))
+ .getValue(1);
+ } else if (LHS.getOpcode() == ISD::ADD && CC == ISD::SETGE) {
+ // ADD(x, y) >= 0 -> ADDS(x, y)
+ return DAG
+ .getNode(AArch64ISD::ADDS, DL, DAG.getVTList(VT, FlagsVT),
+ LHS.getOperand(0), LHS.getOperand(1))
+ .getValue(1);
+ } else if (LHS.getOpcode() == ISD::ADD && CC == ISD::SETLT) {
+ // ADD(x, y) < 0 -> SUBS(x, y)
+ return DAG
+ .getNode(AArch64ISD::ADDS, DL, DAG.getVTList(VT, FlagsVT),
+ LHS.getOperand(0), LHS.getOperand(1))
+ .getValue(1);
+ } else if (LHS.getOpcode() == ISD::SUB && CC == ISD::SETGE) {
+ // SUB(x, y) >= 0 -> ADDS(x, y)
+ return DAG
+ .getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, FlagsVT),
+ LHS.getOperand(0), LHS.getOperand(1))
+ .getValue(1);
+ }
}
}
@@ -3819,7 +3847,8 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
ISD::CondCode CC, SDValue CCOp,
AArch64CC::CondCode Predicate,
AArch64CC::CondCode OutCC,
- const SDLoc &DL, SelectionDAG &DAG) {
+ const SDLoc &DL, SelectionDAG &DAG,
+ bool MIOrPLSupported = false) {
unsigned Opcode = 0;
const bool FullFP16 = DAG.getSubtarget<AArch64Subtarget>().hasFullFP16();
@@ -3846,6 +3875,30 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
// we combine a (CCMP (sub 0, op1), op2) into a CCMN instruction ?
Opcode = AArch64ISD::CCMN;
LHS = LHS.getOperand(1);
+ } else if (isNullConstant(RHS) && !isUnsignedIntSetCC(CC) &&
+ MIOrPLSupported) {
+ // For MIOrPLSupported, optimize SUB/ADD operations with zero comparison
+ if (LHS.getOpcode() == ISD::SUB && CC == ISD::SETLT) {
+ // SUB(x, y) < 0 -> CCMP(x, y) with appropriate condition
+ Opcode = AArch64ISD::CCMP;
+ RHS = LHS.getOperand(1);
+ LHS = LHS.getOperand(0);
+ } else if (LHS.getOpcode() == ISD::ADD && CC == ISD::SETGE) {
+ // ADD(x, y) >= 0 -> CCMP(x, y) with appropriate condition
+ Opcode = AArch64ISD::CCMN;
+ RHS = LHS.getOperand(1);
+ LHS = LHS.getOperand(0);
+ } else if (LHS.getOpcode() == ISD::ADD && CC == ISD::SETLT) {
+ // ADD(x, y) < 0 -> CCMP(x, -y) with appropriate condition
+ Opcode = AArch64ISD::CCMN;
+ RHS = LHS.getOperand(1);
+ LHS = LHS.getOperand(0);
+ } else if (LHS.getOpcode() == ISD::SUB && CC == ISD::SETGE) {
+ // SUB(x, y) >= 0 -> CCMP(-x, y) with appropriate condition
+ Opcode = AArch64ISD::CCMP;
+ RHS = LHS.getOperand(1);
+ LHS = LHS.getOperand(0);
+ }
}
if (Opcode == 0)
Opcode = AArch64ISD::CCMP;
@@ -3972,7 +4025,7 @@ static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val,
return emitComparison(LHS, RHS, CC, DL, DAG);
// Otherwise produce a ccmp.
return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL,
- DAG);
+ DAG, true);
}
assert(Val->hasOneUse() && "Valid conjunction/disjunction tree");
@@ -4251,7 +4304,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
}
if (!Cmp) {
- Cmp = emitComparison(LHS, RHS, CC, DL, DAG);
+ Cmp = emitComparison(LHS, RHS, CC, DL, DAG, true);
AArch64CC = changeIntCCToAArch64CC(CC, RHS);
}
AArch64cc = getCondCode(DAG, AArch64CC);
@@ -7371,13 +7424,13 @@ SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABS_MERGE_PASSTHRU);
SDLoc DL(Op);
- SDValue Neg = DAG.getNegative(Op.getOperand(0), DL, VT);
- // Generate SUBS & CSEL.
- SDValue Cmp = DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, FlagsVT),
- Op.getOperand(0), DAG.getConstant(0, DL, VT));
+ // Generate CMP & CSEL.
+ SDValue Cmp = emitComparison(Op.getOperand(0), DAG.getConstant(0, DL, VT),
+ ISD::SETGE, DL, DAG, true);
+ SDValue Neg = DAG.getNegative(Op.getOperand(0), DL, VT);
return DAG.getNode(AArch64ISD::CSEL, DL, VT, Op.getOperand(0), Neg,
- getCondCode(DAG, AArch64CC::PL), Cmp.getValue(1));
+ getCondCode(DAG, AArch64CC::PL), Cmp);
}
static SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) {
diff --git a/llvm/test/CodeGen/AArch64/alias_mask.ll b/llvm/test/CodeGen/AArch64/alias_mask.ll
index c5d3677366480..48c66ad5bac1c 100644
--- a/llvm/test/CodeGen/AArch64/alias_mask.ll
+++ b/llvm/test/CodeGen/AArch64/alias_mask.ll
@@ -393,70 +393,69 @@ entry:
define <32 x i1> @whilewr_32_expand3(ptr %a, ptr %b) {
; CHECK-LABEL: whilewr_32_expand3:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: sub x10, x1, x0
+; CHECK-NEXT: subs x10, x1, x0
; CHECK-NEXT: index z0.d, #0, #1
-; CHECK-NEXT: sub x9, x10, #61
-; CHECK-NEXT: subs x11, x10, #64
-; CHECK-NEXT: add x12, x10, #3
-; CHECK-NEXT: csel x9, x9, x11, mi
+; CHECK-NEXT: add x9, x10, #3
+; CHECK-NEXT: sub x12, x10, #61
+; CHECK-NEXT: csel x9, x9, x10, mi
; CHECK-NEXT: asr x11, x9, #2
-; CHECK-NEXT: mov z1.d, z0.d
; CHECK-NEXT: mov z2.d, z0.d
; CHECK-NEXT: mov z3.d, z0.d
-; CHECK-NEXT: cmp x11, #1
; CHECK-NEXT: mov z4.d, z0.d
+; CHECK-NEXT: cmp x11, #1
+; CHECK-NEXT: dup v1.2d, x11
; CHECK-NEXT: mov z5.d, z0.d
; CHECK-NEXT: cset w9, lt
-; CHECK-NEXT: cmp x10, #0
+; CHECK-NEXT: subs x10, x10, #64
; CHECK-NEXT: mov z6.d, z0.d
; CHECK-NEXT: csel x10, x12, x10, mi
-; CHECK-NEXT: dup v7.2d, x11
-; CHECK-NEXT: add z1.d, z1.d, #12 // =0xc
+; CHECK-NEXT: mov z7.d, z0.d
+; CHECK-NEXT: add z2.d, z2.d, #12 // =0xc
; CHECK-NEXT: asr x10, x10, #2
-; CHECK-NEXT: add z2.d, z2.d, #10 // =0xa
-; CHECK-NEXT: add z3.d, z3.d, #8 // =0x8
-; CHECK-NEXT: add z4.d, z4.d, #6 // =0x6
-; CHECK-NEXT: add z5.d, z5.d, #4 // =0x4
-; CHECK-NEXT: add z6.d, z6.d, #2 // =0x2
+; CHECK-NEXT: add z3.d, z3.d, #10 // =0xa
+; CHECK-NEXT: add z4.d, z4.d, #8 // =0x8
+; CHECK-NEXT: add z5.d, z5.d, #6 // =0x6
+; CHECK-NEXT: add z6.d, z6.d, #4 // =0x4
+; CHECK-NEXT: cmhi v17.2d, v1.2d, v0.2d
; CHECK-NEXT: dup v16.2d, x10
-; CHECK-NEXT: cmhi v17.2d, v7.2d, v0.2d
-; CHECK-NEXT: cmhi v19.2d, v7.2d, v1.2d
-; CHECK-NEXT: cmhi v20.2d, v7.2d, v2.2d
-; CHECK-NEXT: cmhi v21.2d, v7.2d, v3.2d
+; CHECK-NEXT: add z7.d, z7.d, #2 // =0x2
+; CHECK-NEXT: cmhi v19.2d, v1.2d, v2.2d
+; CHECK-NEXT: cmhi v20.2d, v1.2d, v3.2d
+; CHECK-NEXT: cmhi v21.2d, v1.2d, v4.2d
; CHECK-NEXT: cmp x10, #1
-; CHECK-NEXT: cmhi v22.2d, v7.2d, v4.2d
+; CHECK-NEXT: cmhi v22.2d, v1.2d, v5.2d
; CHECK-NEXT: cset w10, lt
; CHECK-NEXT: cmhi v18.2d, v16.2d, v0.2d
; CHECK-NEXT: add z0.d, z0.d, #14 // =0xe
-; CHECK-NEXT: cmhi v1.2d, v16.2d, v1.2d
; CHECK-NEXT: cmhi v2.2d, v16.2d, v2.2d
; CHECK-NEXT: cmhi v3.2d, v16.2d, v3.2d
; CHECK-NEXT: cmhi v4.2d, v16.2d, v4.2d
-; CHECK-NEXT: cmhi v23.2d, v16.2d, v5.2d
-; CHECK-NEXT: cmhi v24.2d, v16.2d, v6.2d
-; CHECK-NEXT: cmhi v5.2d, v7.2d, v5.2d
+; CHECK-NEXT: cmhi v5.2d, v16.2d, v5.2d
+; CHECK-NEXT: cmhi v23.2d, v16.2d, v6.2d
+; CHECK-NEXT: cmhi v24.2d, v16.2d, v7.2d
+; CHECK-NEXT: cmhi v6.2d, v1.2d, v6.2d
; CHECK-NEXT: cmhi v16.2d, v16.2d, v0.2d
-; CHECK-NEXT: cmhi v6.2d, v7.2d, v6.2d
-; CHECK-NEXT: cmhi v0.2d, v7.2d, v0.2d
-; CHECK-NEXT: uzp1 v7.4s, v21.4s, v20.4s
-; CHECK-NEXT: uzp1 v2.4s, v3.4s, v2.4s
-; CHECK-NEXT: uzp1 v3.4s, v23.4s, v4.4s
-; CHECK-NEXT: uzp1 v4.4s, v18.4s, v24.4s
-; CHECK-NEXT: uzp1 v5.4s, v5.4s, v22.4s
-; CHECK-NEXT: uzp1 v1.4s, v1.4s, v16.4s
-; CHECK-NEXT: uzp1 v6.4s, v17.4s, v6.4s
+; CHECK-NEXT: cmhi v7.2d, v1.2d, v7.2d
+; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d
+; CHECK-NEXT: uzp1 v1.4s, v21.4s, v20.4s
+; CHECK-NEXT: uzp1 v3.4s, v4.4s, v3.4s
+; CHECK-NEXT: uzp1 v4.4s, v23.4s, v5.4s
+; CHECK-NEXT: uzp1 v5.4s, v18.4s, v24.4s
+; CHECK-NEXT: uzp1 v6.4s, v6.4s, v22.4s
+; CHECK-NEXT: uzp1 v2.4s, v2.4s, v16.4s
+; CHECK-NEXT: uzp1 v7.4s, v17.4s, v7.4s
; CHECK-NEXT: uzp1 v0.4s, v19.4s, v0.4s
-; CHECK-NEXT: uzp1 v3.8h, v4.8h, v3.8h
-; CHECK-NEXT: uzp1 v1.8h, v2.8h, v1.8h
-; CHECK-NEXT: uzp1 v2.8h, v6.8h, v5.8h
-; CHECK-NEXT: uzp1 v0.8h, v7.8h, v0.8h
-; CHECK-NEXT: uzp1 v1.16b, v3.16b, v1.16b
-; CHECK-NEXT: uzp1 v0.16b, v2.16b, v0.16b
-; CHECK-NEXT: dup v3.16b, w10
-; CHECK-NEXT: dup v2.16b, w9
+; CHECK-NEXT: uzp1 v4.8h, v5.8h, v4.8h
+; CHECK-NEXT: uzp1 v2.8h, v3.8h, v2.8h
+; CHECK-NEXT: uzp1 v3.8h, v7.8h, v6.8h
+; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-NEXT: uzp1 v1.16b, v4.16b, v2.16b
+; CHECK-NEXT: uzp1 v0.16b, v3.16b, v0.16b
+; CHECK-NEXT: dup v2.16b, w10
+; CHECK-NEXT: dup v3.16b, w9
; CHECK-NEXT: adrp x9, .LCPI14_0
-; CHECK-NEXT: orr v1.16b, v1.16b, v3.16b
-; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
+; CHECK-NEXT: orr v1.16b, v1.16b, v2.16b
+; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI14_0]
; CHECK-NEXT: shl v1.16b, v1.16b, #7
; CHECK-NEXT: shl v0.16b, v0.16b, #7
@@ -470,8 +469,8 @@ define <32 x i1> @whilewr_32_expand3(ptr %a, ptr %b) {
; CHECK-NEXT: zip1 v0.16b, v0.16b, v3.16b
; CHECK-NEXT: addv h1, v1.8h
; CHECK-NEXT: addv h0, v0.8h
-; CHECK-NEXT: str h1, [x8]
-; CHECK-NEXT: str h0, [x8, #2]
+; CHECK-NEXT: str h1, [x8, #2]
+; CHECK-NEXT: str h0, [x8]
; CHECK-NEXT: ret
entry:
%0 = call <32 x i1> @llvm.loop.dependence.war.mask.v32i1(ptr %a, ptr %b, i64 4)
@@ -587,70 +586,69 @@ entry:
define <32 x i1> @whilewr_64_expand4(ptr %a, ptr %b) {
; CHECK-LABEL: whilewr_64_expand4:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: sub x10, x1, x0
+; CHECK-NEXT: subs x10, x1, x0
; CHECK-NEXT: index z0.d, #0, #1
-; CHECK-NEXT: sub x9, x10, #121
-; CHECK-NEXT: subs x11, x10, #128
-; CHECK-NEXT: add x12, x10, #7
-; CHECK-NEXT: csel x9, x9, x11, mi
+; CHECK-NEXT: add x9, x10, #7
+; CHECK-NEXT: sub x12, x10, #121
+; CHECK-NEXT: csel x9, x9, x10, mi
; CHECK-NEXT: asr x11, x9, #3
-; CHECK-NEXT: mov z1.d, z0.d
; CHECK-NEXT: mov z2.d, z0.d
; CHECK-NEXT: mov z3.d, z0.d
-; CHECK-NEXT: cmp x11, #1
; CHECK-NEXT: mov z4.d, z0.d
+; CHECK-NEXT: cmp x11, #1
+; CHECK-NEXT: dup v1.2d, x11
; CHECK-NEXT: mov z5.d, z0.d
; CHECK-NEXT: cset w9, lt
-; CHECK-NEXT: cmp x10, #0
+; CHECK-NEXT: subs x10, x10, #128
; CHECK-NEXT: mov z6.d, z0.d
; CHECK-NEXT: csel x10, x12, x10, mi
-; CHECK-NEXT: dup v7.2d, x11
-; CHECK-NEXT: add z1.d, z1.d, #12 // =0xc
+; CHECK-NEXT: mov z7.d, z0.d
+; CHECK-NEXT: add z2.d, z2.d, #12 // =0xc
; CHECK-NEXT: asr x10, x10, #3
-; CHECK-NEXT: add z2.d, z2.d, #10 // =0xa
-; CHECK-NEXT: add z3.d, z3.d, #8 // =0x8
-; CHECK-NEXT: add z4.d, z4.d, #6 // =0x6
-; CHECK-NEXT: add z5.d, z5.d, #4 // =0x4
-; CHECK-NEXT: add z6.d, z6.d, #2 // =0x2
+; CHECK-NEXT: add z3.d, z3.d, #10 // =0xa
+; CHECK-NEXT: add z4.d, z4.d, #8 // =0x8
+; CHECK-NEXT: add z5.d, z5.d, #6 // =0x6
+; CHECK-NEXT: add z6.d, z6.d, #4 // =0x4
+; CHECK-NEXT: cmhi v17.2d, v1.2d, v0.2d
; CHECK-NEXT: dup v16.2d, x10
-; CHECK-NEXT: cmhi v17.2d, v7.2d, v0.2d
-; CHECK-NEXT: cmhi v19.2d, v7.2d, v1.2d
-; CHECK-NEXT: cmhi v20.2d, v7.2d, v2.2d
-; CHECK-NEXT: cmhi v21.2d, v7.2d, v3.2d
+; CHECK-NEXT: add z7.d, z7.d, #2 // =0x2
+; CHECK-NEXT: cmhi v19.2d, v1.2d, v2.2d
+; CHECK-NEXT: cmhi v20.2d, v1.2d, v3.2d
+; CHECK-NEXT: cmhi v21.2d, v1.2d, v4.2d
; CHECK-NEXT: cmp x10, #1
-; CHECK-NEXT: cmhi v22.2d, v7.2d, v4.2d
+; CHECK-NEXT: cmhi v22.2d, v1.2d, v5.2d
; CHECK-NEXT: cset w10, lt
; CHECK-NEXT: cmhi v18.2d, v16.2d, v0.2d
; CHECK-NEXT: add z0.d, z0.d, #14 // =0xe
-; CHECK-NEXT: cmhi v1.2d, v16.2d, v1.2d
; CHECK-NEXT: cmhi v2.2d, v16.2d, v2.2d
; CHECK-NEXT: cmhi v3.2d, v16.2d, v3.2d
; CHECK-NEXT: cmhi v4.2d, v16.2d, v4.2d
-; CHECK-NEXT: cmhi v23.2d, v16.2d, v5.2d
-; CHECK-NEXT: cmhi v24.2d, v16.2d, v6.2d
-; CHECK-NEXT: cmhi v5.2d, v7.2d, v5.2d
+; CHECK-NEXT: cmhi v5.2d, v16.2d, v5.2d
+; CHECK-NEXT: cmhi v23.2d, v16.2d, v6.2d
+; CHECK-NEXT: cmhi v24.2d, v16.2d, v7.2d
+; CHECK-NEXT: cmhi v6.2d, v1.2d, v6.2d
; CHECK-NEXT: cmhi v16.2d, v16.2d, v0.2d
-; CHECK-NEXT: cmhi v6.2d, v7.2d, v6.2d
-; CHECK-NEXT: cmhi v0.2d, v7.2d, v0.2d
-; CHECK-NEXT: uzp1 v7.4s, v21.4s, v20.4s
-; CHECK-NEXT: uzp1 v2.4s, v3.4s, v2.4s
-; CHECK-NEXT: uzp1 v3.4s, v23.4s, v4.4s
-; CHECK-NEXT: uzp1 v4.4s, v18.4s, v24.4s
-; CHECK-NEXT: uzp1 v5.4s, v5.4s, v22.4s
-; CHECK-NEXT: uzp1 v1.4s, v1.4s, v16.4s
-; CHECK-NEXT: uzp1 v6.4s, v17.4s, v6.4s
+; CHECK-NEXT: cmhi v7.2d, v1.2d, v7.2d
+; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d
+; CHECK-NEXT: uzp1 v1.4s, v21.4s, v20.4s
+; CHECK-NEXT: uzp1 v3.4s, v4.4s, v3.4s
+; CHECK-NEXT: uzp1 v4.4s, v23.4s, v5.4s
+; CHECK-NEXT: uzp1 v5.4s, v18.4s, v24.4s
+; CHECK-NEXT: uzp1 v6.4s, v6.4s, v22.4s
+; CHECK-NEXT: uzp1 v2.4s, v2.4s, v16.4s
+; CHECK-NEXT: uzp1 v7.4s, v17.4s, v7.4s
; CHECK-NEXT: uzp1 v0.4s, v19.4s, v0.4s
-; CHECK-NEXT: uzp1 v3.8h, v4.8h, v3.8h
-; CHECK-NEXT: uzp1 v1.8h, v2.8h, v1.8h
-; CHECK-NEXT: uzp1 v2.8h, v6.8h, v5.8h
-; CHECK-NEXT: uzp1 v0.8h, v7.8h, v0.8h
-; CHECK-NEXT: uzp1 v1.16b, v3.16b, v1.16b
-; CHECK-NEXT: uzp1 v0.16b, v2.16b, v0.16b
-; CHECK-NEXT: dup v3.16b, w10
-; CHECK-NEXT: dup v2.16b, w9
+; CHECK-NEXT: uzp1 v4.8h, v5.8h, v4.8h
+; CHECK-NEXT: uzp1 v2.8h, v3.8h, v2.8h
+; CHECK-NEXT: uzp1 v3.8h, v7.8h, v6.8h
+; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-NEXT: uzp1 v1.16b, v4.16b, v2.16b
+; CHECK-NEXT: uzp1 v0.16b, v3.16b, v0.16b
+; CHECK-NEXT: dup v2.16b, w10
+; CHECK-NEXT: dup v3.16b, w9
; CHECK-NEXT: adrp x9, .LCPI18_0
-; CHECK-NEXT: orr v1.16b, v1.16b, v3.16b
-; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
+; CHECK-NEXT: orr v1.16b, v1.16b, v2.16b
+; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI18_0]
; CHECK-NEXT: shl v1.16b, v1.16b, #7
; CHECK-NEXT: shl v0.16b, v0.16b, #7
@@ -664,8 +662,8 @@ define <32 x i1> @whilewr_64_expand4(ptr %a, ptr %b) {
; CHECK-NEXT: zip1 v0.16b, v0.16b, v3.16b
; CHECK-NEXT: addv h1, v1.8h
; CHECK-NEXT: addv h0, v0.8h
-; CHECK-NEXT: str h1, [x8]
-; CHECK-NEXT: str h0, [x8, #2]
+; CHECK-NEXT: str h1, [x8, #2]
+; CHECK-NEXT: str h0, [x8]
; CHECK-NEXT: ret
entry:
%0 = call <32 x i1> @llvm.loop.dependence.war.mask.v32i1(ptr %a, ptr %b, i64 8)
diff --git a/llvm/test/CodeGen/AArch64/pr72777.ll b/llvm/test/CodeGen/AArch64/pr72777.ll
index fa9f82f8c93c2..c8966bf9ce68b 100644
--- a/llvm/test/CodeGen/AArch64/pr72777.ll
+++ b/llvm/test/CodeGen/AArch64/pr72777.ll
@@ -5,9 +5,9 @@ define i64 @f(i64 %0, i64 %1) {
; CHECK-LABEL: f:
; CHECK: // %bb.0:
; CHECK-NEXT: orr x8, x1, #0x1
-; CHECK-NEXT: add x9, x0, x0
; CHECK-NEXT: mov x10, #-9223372036854775808 // =0x8000000000000000
-; CHECK-NEXT: adds x8, x8, x9
+; CHECK-NEXT: add x8, x8, x0
+; CHECK-NEXT: adds x8, x8, x0
; CHECK-NEXT: lsl x9, x8, #1
; CHECK-NEXT: cinv x10, x10, pl
; CHECK-NEXT: cmp x8, x9, asr #1
>From 367e689228cb4539f42139ea4e009447f0ac2294 Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Tue, 16 Sep 2025 09:07:21 -0400
Subject: [PATCH 4/5] Custom Lower abds and abdu
---
.../Target/AArch64/AArch64ISelLowering.cpp | 82 ++++++++++++++++++-
llvm/lib/Target/AArch64/AArch64ISelLowering.h | 1 +
llvm/test/CodeGen/AArch64/abds-neg.ll | 50 +++++------
llvm/test/CodeGen/AArch64/abds.ll | 32 ++++----
llvm/test/CodeGen/AArch64/abdu-neg.ll | 42 ++++------
llvm/test/CodeGen/AArch64/abdu.ll | 24 +++---
llvm/test/CodeGen/AArch64/arm64-csel.ll | 4 +-
llvm/test/CodeGen/AArch64/midpoint-int.ll | 20 ++---
8 files changed, 157 insertions(+), 98 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 17918de5e2985..3538f8fdd6bc0 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -698,6 +698,11 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::ABS, MVT::i64, Custom);
}
+ setOperationAction(ISD::ABDS, MVT::i32, Custom);
+ setOperationAction(ISD::ABDS, MVT::i64, Custom);
+ setOperationAction(ISD::ABDU, MVT::i32, Custom);
+ setOperationAction(ISD::ABDU, MVT::i64, Custom);
+
setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
@@ -7433,6 +7438,80 @@ SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
getCondCode(DAG, AArch64CC::PL), Cmp);
}
+// Generate SUBS and CNEG for absolute difference.
+SDValue AArch64TargetLowering::LowerABD(SDValue Op, SelectionDAG &DAG) const {
+ MVT VT = Op.getSimpleValueType();
+
+ bool IsSigned = Op.getOpcode() == ISD::ABDS;
+ if (VT.isVector()) {
+ if (IsSigned)
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDS_PRED);
+ else
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDU_PRED);
+ }
+
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDLoc DL(Op);
+
+ // If the subtract doesn't overflow then just use abs(sub())
+ bool IsNonNegative = DAG.SignBitIsZero(LHS) && DAG.SignBitIsZero(RHS);
+
+ if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, LHS, RHS))
+ return DAG.getNode(ISD::ABS, DL, VT,
+ DAG.getNode(ISD::SUB, DL, VT, LHS, RHS));
+
+ if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, RHS, LHS))
+ return DAG.getNode(ISD::ABS, DL, VT,
+ DAG.getNode(ISD::SUB, DL, VT, RHS, LHS));
+
+ unsigned Opcode = AArch64ISD::SUBS;
+ // Check if RHS is a subtraction against 0: (0 - X)
+ if (RHS.getOpcode() == ISD::SUB) {
+ SDValue SubLHS = RHS.getOperand(0);
+ SDValue SubRHS = RHS.getOperand(1);
+
+ // Check if it's 0 - X
+ if (isNullConstant(SubLHS)) {
+ bool CanUseAdd = false;
+ if (IsSigned) {
+ // For UCMP: only if X is known to never be INT_MIN (to avoid overflow)
+ if (RHS->getFlags().hasNoSignedWrap() || !DAG.computeKnownBits(SubRHS)
+ .getSignedMinValue()
+ .isMinSignedValue()) {
+ CanUseAdd = true;
+ }
+ } else {
+ // For UCMP: only if X is known to never be zero
+ if (DAG.isKnownNeverZero(SubRHS)) {
+ CanUseAdd = true;
+ }
+ }
+
+ if (CanUseAdd) {
+ Opcode = AArch64ISD::ADDS;
+ RHS = SubRHS; // Replace RHS with X, so we do LHS + X instead of
+ // LHS - (0 - X)
+ }
+ }
+ }
+
+ // Generate SUBS and CSEL for absolute difference (like LowerABS)
+ // Compute a - b with flags
+ SDValue Cmp = DAG.getNode(Opcode, DL, DAG.getVTList(VT, FlagsVT), LHS, RHS);
+
+ // Compute b - a (negative of a - b)
+ SDValue Neg = DAG.getNegative(Cmp.getValue(0), DL, VT);
+
+ // For unsigned: use HS (a >= b) to select a-b, otherwise b-a
+ // For signed: use GE (a >= b) to select a-b, otherwise b-a
+ AArch64CC::CondCode CC = IsSigned ? AArch64CC::GE : AArch64CC::HS;
+
+ // CSEL: if a > b, select a-b, otherwise b-a
+ return DAG.getNode(AArch64ISD::CSEL, DL, VT, Cmp.getValue(0), Neg,
+ getCondCode(DAG, CC), Cmp.getValue(1));
+}
+
static SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) {
SDValue Chain = Op.getOperand(0);
SDValue Cond = Op.getOperand(1);
@@ -7885,9 +7964,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
case ISD::ABS:
return LowerABS(Op, DAG);
case ISD::ABDS:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDS_PRED);
case ISD::ABDU:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDU_PRED);
+ return LowerABD(Op, DAG);
case ISD::AVGFLOORS:
return LowerAVG(Op, DAG, AArch64ISD::HADDS_PRED);
case ISD::AVGFLOORU:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index ff073d3eafb1f..4de8985a44ff8 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -604,6 +604,7 @@ class AArch64TargetLowering : public TargetLowering {
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerStore128(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerABD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/AArch64/abds-neg.ll b/llvm/test/CodeGen/AArch64/abds-neg.ll
index 37319642f5b34..8512bf43815bf 100644
--- a/llvm/test/CodeGen/AArch64/abds-neg.ll
+++ b/llvm/test/CodeGen/AArch64/abds-neg.ll
@@ -74,7 +74,7 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: sxth w8, w0
; CHECK-NEXT: subs w8, w8, w1
-; CHECK-NEXT: cneg w0, w8, gt
+; CHECK-NEXT: cneg w0, w8, ge
; CHECK-NEXT: ret
%aext = sext i16 %a to i64
%bext = sext i32 %b to i64
@@ -105,7 +105,7 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: subs w8, w0, w1
-; CHECK-NEXT: cneg w0, w8, gt
+; CHECK-NEXT: cneg w0, w8, ge
; CHECK-NEXT: ret
%aext = sext i32 %a to i64
%bext = sext i32 %b to i64
@@ -120,7 +120,7 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i32_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: subs w8, w0, w1, sxth
-; CHECK-NEXT: cneg w0, w8, gt
+; CHECK-NEXT: cneg w0, w8, ge
; CHECK-NEXT: ret
%aext = sext i32 %a to i64
%bext = sext i16 %b to i64
@@ -135,7 +135,7 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i32_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: subs w8, w0, w1
-; CHECK-NEXT: cneg w0, w8, gt
+; CHECK-NEXT: cneg w0, w8, ge
; CHECK-NEXT: ret
%aext = sext i32 %a to i64
%bext = sext i32 %b to i64
@@ -150,7 +150,7 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_ext_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x1
-; CHECK-NEXT: cneg x0, x8, gt
+; CHECK-NEXT: cneg x0, x8, ge
; CHECK-NEXT: ret
%aext = sext i64 %a to i128
%bext = sext i64 %b to i128
@@ -165,7 +165,7 @@ define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_ext_i64_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x1
-; CHECK-NEXT: cneg x0, x8, gt
+; CHECK-NEXT: cneg x0, x8, ge
; CHECK-NEXT: ret
%aext = sext i64 %a to i128
%bext = sext i64 %b to i128
@@ -225,12 +225,9 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_minmax_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxtb w8, w1
-; CHECK-NEXT: sxtb w9, w0
-; CHECK-NEXT: cmp w9, w8
-; CHECK-NEXT: csel w10, w9, w8, lt
-; CHECK-NEXT: csel w8, w9, w8, gt
-; CHECK-NEXT: sub w0, w10, w8
+; CHECK-NEXT: sxtb w8, w0
+; CHECK-NEXT: subs w8, w8, w1, sxtb
+; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%min = call i8 @llvm.smin.i8(i8 %a, i8 %b)
%max = call i8 @llvm.smax.i8(i8 %a, i8 %b)
@@ -241,12 +238,9 @@ define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_minmax_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxth w8, w1
-; CHECK-NEXT: sxth w9, w0
-; CHECK-NEXT: cmp w9, w8
-; CHECK-NEXT: csel w10, w9, w8, lt
-; CHECK-NEXT: csel w8, w9, w8, gt
-; CHECK-NEXT: sub w0, w10, w8
+; CHECK-NEXT: sxth w8, w0
+; CHECK-NEXT: subs w8, w8, w1, sxth
+; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%min = call i16 @llvm.smin.i16(i16 %a, i16 %b)
%max = call i16 @llvm.smax.i16(i16 %a, i16 %b)
@@ -257,10 +251,8 @@ define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_minmax_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp w0, w1
-; CHECK-NEXT: csel w8, w0, w1, lt
-; CHECK-NEXT: csel w9, w0, w1, gt
-; CHECK-NEXT: sub w0, w8, w9
+; CHECK-NEXT: subs w8, w0, w1
+; CHECK-NEXT: cneg w0, w8, ge
; CHECK-NEXT: ret
%min = call i32 @llvm.smin.i32(i32 %a, i32 %b)
%max = call i32 @llvm.smax.i32(i32 %a, i32 %b)
@@ -271,10 +263,8 @@ define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind {
define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_minmax_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp x0, x1
-; CHECK-NEXT: csel x8, x0, x1, lt
-; CHECK-NEXT: csel x9, x0, x1, gt
-; CHECK-NEXT: sub x0, x8, x9
+; CHECK-NEXT: subs x8, x0, x1
+; CHECK-NEXT: cneg x0, x8, ge
; CHECK-NEXT: ret
%min = call i64 @llvm.smin.i64(i64 %a, i64 %b)
%max = call i64 @llvm.smax.i64(i64 %a, i64 %b)
@@ -445,7 +435,7 @@ define i32 @abd_subnsw_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_subnsw_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: subs w8, w0, w1
-; CHECK-NEXT: cneg w0, w8, pl
+; CHECK-NEXT: cneg w0, w8, ge
; CHECK-NEXT: ret
%sub = sub nsw i32 %a, %b
%abs = call i32 @llvm.abs.i32(i32 %sub, i1 false)
@@ -457,7 +447,7 @@ define i32 @abd_subnsw_i32_undef(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_subnsw_i32_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: subs w8, w0, w1
-; CHECK-NEXT: cneg w0, w8, pl
+; CHECK-NEXT: cneg w0, w8, ge
; CHECK-NEXT: ret
%sub = sub nsw i32 %a, %b
%abs = call i32 @llvm.abs.i32(i32 %sub, i1 true)
@@ -469,7 +459,7 @@ define i64 @abd_subnsw_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_subnsw_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x1
-; CHECK-NEXT: cneg x0, x8, pl
+; CHECK-NEXT: cneg x0, x8, ge
; CHECK-NEXT: ret
%sub = sub nsw i64 %a, %b
%abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
@@ -481,7 +471,7 @@ define i64 @abd_subnsw_i64_undef(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_subnsw_i64_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x1
-; CHECK-NEXT: cneg x0, x8, pl
+; CHECK-NEXT: cneg x0, x8, ge
; CHECK-NEXT: ret
%sub = sub nsw i64 %a, %b
%abs = call i64 @llvm.abs.i64(i64 %sub, i1 true)
diff --git a/llvm/test/CodeGen/AArch64/abds.ll b/llvm/test/CodeGen/AArch64/abds.ll
index ffa566ddf922c..281eccb3f3459 100644
--- a/llvm/test/CodeGen/AArch64/abds.ll
+++ b/llvm/test/CodeGen/AArch64/abds.ll
@@ -70,7 +70,7 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: sxth w8, w0
; CHECK-NEXT: subs w8, w8, w1
-; CHECK-NEXT: cneg w0, w8, le
+; CHECK-NEXT: cneg w0, w8, lt
; CHECK-NEXT: ret
%aext = sext i16 %a to i64
%bext = sext i32 %b to i64
@@ -99,7 +99,7 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: subs w8, w0, w1
-; CHECK-NEXT: cneg w0, w8, le
+; CHECK-NEXT: cneg w0, w8, lt
; CHECK-NEXT: ret
%aext = sext i32 %a to i64
%bext = sext i32 %b to i64
@@ -113,7 +113,7 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i32_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: subs w8, w0, w1, sxth
-; CHECK-NEXT: cneg w0, w8, le
+; CHECK-NEXT: cneg w0, w8, lt
; CHECK-NEXT: ret
%aext = sext i32 %a to i64
%bext = sext i16 %b to i64
@@ -127,7 +127,7 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i32_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: subs w8, w0, w1
-; CHECK-NEXT: cneg w0, w8, le
+; CHECK-NEXT: cneg w0, w8, lt
; CHECK-NEXT: ret
%aext = sext i32 %a to i64
%bext = sext i32 %b to i64
@@ -141,7 +141,7 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_ext_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x1
-; CHECK-NEXT: cneg x0, x8, le
+; CHECK-NEXT: cneg x0, x8, lt
; CHECK-NEXT: ret
%aext = sext i64 %a to i128
%bext = sext i64 %b to i128
@@ -155,7 +155,7 @@ define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_ext_i64_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x1
-; CHECK-NEXT: cneg x0, x8, le
+; CHECK-NEXT: cneg x0, x8, lt
; CHECK-NEXT: ret
%aext = sext i64 %a to i128
%bext = sext i64 %b to i128
@@ -235,7 +235,7 @@ define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_minmax_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: subs w8, w0, w1
-; CHECK-NEXT: cneg w0, w8, le
+; CHECK-NEXT: cneg w0, w8, lt
; CHECK-NEXT: ret
%min = call i32 @llvm.smin.i32(i32 %a, i32 %b)
%max = call i32 @llvm.smax.i32(i32 %a, i32 %b)
@@ -247,7 +247,7 @@ define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_minmax_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x1
-; CHECK-NEXT: cneg x0, x8, le
+; CHECK-NEXT: cneg x0, x8, lt
; CHECK-NEXT: ret
%min = call i64 @llvm.smin.i64(i64 %a, i64 %b)
%max = call i64 @llvm.smax.i64(i64 %a, i64 %b)
@@ -307,7 +307,7 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_cmp_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: subs w8, w0, w1
-; CHECK-NEXT: cneg w0, w8, le
+; CHECK-NEXT: cneg w0, w8, lt
; CHECK-NEXT: ret
%cmp = icmp slt i32 %a, %b
%ab = sub i32 %a, %b
@@ -320,7 +320,7 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_cmp_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x1
-; CHECK-NEXT: cneg x0, x8, le
+; CHECK-NEXT: cneg x0, x8, lt
; CHECK-NEXT: ret
%cmp = icmp sge i64 %a, %b
%ab = sub i64 %a, %b
@@ -406,7 +406,7 @@ define i32 @abd_subnsw_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_subnsw_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: subs w8, w0, w1
-; CHECK-NEXT: cneg w0, w8, mi
+; CHECK-NEXT: cneg w0, w8, lt
; CHECK-NEXT: ret
%sub = sub nsw i32 %a, %b
%abs = call i32 @llvm.abs.i32(i32 %sub, i1 false)
@@ -417,7 +417,7 @@ define i32 @abd_subnsw_i32_undef(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_subnsw_i32_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: subs w8, w0, w1
-; CHECK-NEXT: cneg w0, w8, mi
+; CHECK-NEXT: cneg w0, w8, lt
; CHECK-NEXT: ret
%sub = sub nsw i32 %a, %b
%abs = call i32 @llvm.abs.i32(i32 %sub, i1 true)
@@ -428,7 +428,7 @@ define i64 @abd_subnsw_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_subnsw_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x1
-; CHECK-NEXT: cneg x0, x8, mi
+; CHECK-NEXT: cneg x0, x8, lt
; CHECK-NEXT: ret
%sub = sub nsw i64 %a, %b
%abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
@@ -439,7 +439,7 @@ define i64 @abd_subnsw_i64_undef(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_subnsw_i64_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x1
-; CHECK-NEXT: cneg x0, x8, mi
+; CHECK-NEXT: cneg x0, x8, lt
; CHECK-NEXT: ret
%sub = sub nsw i64 %a, %b
%abs = call i64 @llvm.abs.i64(i64 %sub, i1 true)
@@ -549,7 +549,7 @@ define i32 @abd_select_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_select_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: subs w8, w0, w1
-; CHECK-NEXT: cneg w0, w8, le
+; CHECK-NEXT: cneg w0, w8, lt
; CHECK-NEXT: ret
%cmp = icmp sgt i32 %a, %b
%ab = select i1 %cmp, i32 %a, i32 %b
@@ -562,7 +562,7 @@ define i64 @abd_select_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_select_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x1
-; CHECK-NEXT: cneg x0, x8, le
+; CHECK-NEXT: cneg x0, x8, lt
; CHECK-NEXT: ret
%cmp = icmp sge i64 %a, %b
%ab = select i1 %cmp, i64 %a, i64 %b
diff --git a/llvm/test/CodeGen/AArch64/abdu-neg.ll b/llvm/test/CodeGen/AArch64/abdu-neg.ll
index 79fc12ea76f63..92867408ba502 100644
--- a/llvm/test/CodeGen/AArch64/abdu-neg.ll
+++ b/llvm/test/CodeGen/AArch64/abdu-neg.ll
@@ -74,7 +74,7 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xffff
; CHECK-NEXT: subs w8, w8, w1
-; CHECK-NEXT: cneg w0, w8, hi
+; CHECK-NEXT: cneg w0, w8, hs
; CHECK-NEXT: ret
%aext = zext i16 %a to i64
%bext = zext i32 %b to i64
@@ -105,7 +105,7 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: subs w8, w0, w1
-; CHECK-NEXT: cneg w0, w8, hi
+; CHECK-NEXT: cneg w0, w8, hs
; CHECK-NEXT: ret
%aext = zext i32 %a to i64
%bext = zext i32 %b to i64
@@ -120,7 +120,7 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i32_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: subs w8, w0, w1, uxth
-; CHECK-NEXT: cneg w0, w8, hi
+; CHECK-NEXT: cneg w0, w8, hs
; CHECK-NEXT: ret
%aext = zext i32 %a to i64
%bext = zext i16 %b to i64
@@ -135,7 +135,7 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i32_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: subs w8, w0, w1
-; CHECK-NEXT: cneg w0, w8, hi
+; CHECK-NEXT: cneg w0, w8, hs
; CHECK-NEXT: ret
%aext = zext i32 %a to i64
%bext = zext i32 %b to i64
@@ -150,7 +150,7 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_ext_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x1
-; CHECK-NEXT: cneg x0, x8, hi
+; CHECK-NEXT: cneg x0, x8, hs
; CHECK-NEXT: ret
%aext = zext i64 %a to i128
%bext = zext i64 %b to i128
@@ -165,7 +165,7 @@ define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_ext_i64_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x1
-; CHECK-NEXT: cneg x0, x8, hi
+; CHECK-NEXT: cneg x0, x8, hs
; CHECK-NEXT: ret
%aext = zext i64 %a to i128
%bext = zext i64 %b to i128
@@ -229,12 +229,9 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_minmax_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w1, #0xff
-; CHECK-NEXT: and w9, w0, #0xff
-; CHECK-NEXT: cmp w9, w8
-; CHECK-NEXT: csel w10, w9, w8, lo
-; CHECK-NEXT: csel w8, w9, w8, hi
-; CHECK-NEXT: sub w0, w10, w8
+; CHECK-NEXT: and w8, w0, #0xff
+; CHECK-NEXT: subs w8, w8, w1, uxtb
+; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%min = call i8 @llvm.umin.i8(i8 %a, i8 %b)
%max = call i8 @llvm.umax.i8(i8 %a, i8 %b)
@@ -245,12 +242,9 @@ define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_minmax_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w1, #0xffff
-; CHECK-NEXT: and w9, w0, #0xffff
-; CHECK-NEXT: cmp w9, w8
-; CHECK-NEXT: csel w10, w9, w8, lo
-; CHECK-NEXT: csel w8, w9, w8, hi
-; CHECK-NEXT: sub w0, w10, w8
+; CHECK-NEXT: and w8, w0, #0xffff
+; CHECK-NEXT: subs w8, w8, w1, uxth
+; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%min = call i16 @llvm.umin.i16(i16 %a, i16 %b)
%max = call i16 @llvm.umax.i16(i16 %a, i16 %b)
@@ -261,10 +255,8 @@ define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_minmax_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp w0, w1
-; CHECK-NEXT: csel w8, w0, w1, lo
-; CHECK-NEXT: csel w9, w0, w1, hi
-; CHECK-NEXT: sub w0, w8, w9
+; CHECK-NEXT: subs w8, w0, w1
+; CHECK-NEXT: cneg w0, w8, hs
; CHECK-NEXT: ret
%min = call i32 @llvm.umin.i32(i32 %a, i32 %b)
%max = call i32 @llvm.umax.i32(i32 %a, i32 %b)
@@ -275,10 +267,8 @@ define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind {
define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_minmax_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp x0, x1
-; CHECK-NEXT: csel x8, x0, x1, lo
-; CHECK-NEXT: csel x9, x0, x1, hi
-; CHECK-NEXT: sub x0, x8, x9
+; CHECK-NEXT: subs x8, x0, x1
+; CHECK-NEXT: cneg x0, x8, hs
; CHECK-NEXT: ret
%min = call i64 @llvm.umin.i64(i64 %a, i64 %b)
%max = call i64 @llvm.umax.i64(i64 %a, i64 %b)
diff --git a/llvm/test/CodeGen/AArch64/abdu.ll b/llvm/test/CodeGen/AArch64/abdu.ll
index 3a1d5a89a9276..fee82cc5f42d3 100644
--- a/llvm/test/CodeGen/AArch64/abdu.ll
+++ b/llvm/test/CodeGen/AArch64/abdu.ll
@@ -70,7 +70,7 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xffff
; CHECK-NEXT: subs w8, w8, w1
-; CHECK-NEXT: cneg w0, w8, ls
+; CHECK-NEXT: cneg w0, w8, lo
; CHECK-NEXT: ret
%aext = zext i16 %a to i64
%bext = zext i32 %b to i64
@@ -99,7 +99,7 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: subs w8, w0, w1
-; CHECK-NEXT: cneg w0, w8, ls
+; CHECK-NEXT: cneg w0, w8, lo
; CHECK-NEXT: ret
%aext = zext i32 %a to i64
%bext = zext i32 %b to i64
@@ -113,7 +113,7 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i32_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: subs w8, w0, w1, uxth
-; CHECK-NEXT: cneg w0, w8, ls
+; CHECK-NEXT: cneg w0, w8, lo
; CHECK-NEXT: ret
%aext = zext i32 %a to i64
%bext = zext i16 %b to i64
@@ -127,7 +127,7 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i32_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: subs w8, w0, w1
-; CHECK-NEXT: cneg w0, w8, ls
+; CHECK-NEXT: cneg w0, w8, lo
; CHECK-NEXT: ret
%aext = zext i32 %a to i64
%bext = zext i32 %b to i64
@@ -141,7 +141,7 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_ext_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x1
-; CHECK-NEXT: cneg x0, x8, ls
+; CHECK-NEXT: cneg x0, x8, lo
; CHECK-NEXT: ret
%aext = zext i64 %a to i128
%bext = zext i64 %b to i128
@@ -155,7 +155,7 @@ define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_ext_i64_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x1
-; CHECK-NEXT: cneg x0, x8, ls
+; CHECK-NEXT: cneg x0, x8, lo
; CHECK-NEXT: ret
%aext = zext i64 %a to i128
%bext = zext i64 %b to i128
@@ -239,7 +239,7 @@ define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_minmax_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: subs w8, w0, w1
-; CHECK-NEXT: cneg w0, w8, ls
+; CHECK-NEXT: cneg w0, w8, lo
; CHECK-NEXT: ret
%min = call i32 @llvm.umin.i32(i32 %a, i32 %b)
%max = call i32 @llvm.umax.i32(i32 %a, i32 %b)
@@ -251,7 +251,7 @@ define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_minmax_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x1
-; CHECK-NEXT: cneg x0, x8, ls
+; CHECK-NEXT: cneg x0, x8, lo
; CHECK-NEXT: ret
%min = call i64 @llvm.umin.i64(i64 %a, i64 %b)
%max = call i64 @llvm.umax.i64(i64 %a, i64 %b)
@@ -313,7 +313,7 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_cmp_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: subs w8, w0, w1
-; CHECK-NEXT: cneg w0, w8, ls
+; CHECK-NEXT: cneg w0, w8, lo
; CHECK-NEXT: ret
%cmp = icmp ult i32 %a, %b
%ab = sub i32 %a, %b
@@ -326,7 +326,7 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_cmp_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x1
-; CHECK-NEXT: cneg x0, x8, ls
+; CHECK-NEXT: cneg x0, x8, lo
; CHECK-NEXT: ret
%cmp = icmp uge i64 %a, %b
%ab = sub i64 %a, %b
@@ -414,7 +414,7 @@ define i32 @abd_select_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_select_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: subs w8, w0, w1
-; CHECK-NEXT: cneg w0, w8, ls
+; CHECK-NEXT: cneg w0, w8, lo
; CHECK-NEXT: ret
%cmp = icmp ugt i32 %a, %b
%ab = select i1 %cmp, i32 %a, i32 %b
@@ -427,7 +427,7 @@ define i64 @abd_select_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_select_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x1
-; CHECK-NEXT: cneg x0, x8, ls
+; CHECK-NEXT: cneg x0, x8, lo
; CHECK-NEXT: ret
%cmp = icmp uge i64 %a, %b
%ab = select i1 %cmp, i64 %a, i64 %b
diff --git a/llvm/test/CodeGen/AArch64/arm64-csel.ll b/llvm/test/CodeGen/AArch64/arm64-csel.ll
index 69fad57a683ac..584e61b734527 100644
--- a/llvm/test/CodeGen/AArch64/arm64-csel.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-csel.ll
@@ -65,7 +65,7 @@ define i32 at foo5(i32 %a, i32 %b) nounwind ssp {
; CHECK-LABEL: foo5:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: subs w8, w0, w1
-; CHECK-NEXT: cneg w0, w8, mi
+; CHECK-NEXT: cneg w0, w8, lt
; CHECK-NEXT: ret
entry:
%sub = sub nsw i32 %a, %b
@@ -98,7 +98,7 @@ define i32 @foo7(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: foo7:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: subs w8, w0, w1
-; CHECK-NEXT: cneg w9, w8, mi
+; CHECK-NEXT: cneg w9, w8, lt
; CHECK-NEXT: cmn w8, #1
; CHECK-NEXT: csel w8, w9, w0, lt
; CHECK-NEXT: csel w0, w8, w9, gt
diff --git a/llvm/test/CodeGen/AArch64/midpoint-int.ll b/llvm/test/CodeGen/AArch64/midpoint-int.ll
index 79bba5363188b..02bbca72b5a97 100644
--- a/llvm/test/CodeGen/AArch64/midpoint-int.ll
+++ b/llvm/test/CodeGen/AArch64/midpoint-int.ll
@@ -15,7 +15,7 @@ define i32 @scalar_i32_signed_reg_reg(i32 %a1, i32 %a2) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: subs w9, w0, w1
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cneg w9, w9, le
+; CHECK-NEXT: cneg w9, w9, lt
; CHECK-NEXT: cneg w8, w8, le
; CHECK-NEXT: lsr w9, w9, #1
; CHECK-NEXT: madd w0, w9, w8, w0
@@ -36,7 +36,7 @@ define i32 @scalar_i32_unsigned_reg_reg(i32 %a1, i32 %a2) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: subs w9, w0, w1
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cneg w9, w9, ls
+; CHECK-NEXT: cneg w9, w9, lo
; CHECK-NEXT: cneg w8, w8, ls
; CHECK-NEXT: lsr w9, w9, #1
; CHECK-NEXT: madd w0, w9, w8, w0
@@ -60,7 +60,7 @@ define i32 @scalar_i32_signed_mem_reg(ptr %a1_addr, i32 %a2) nounwind {
; CHECK-NEXT: ldr w9, [x0]
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: subs w10, w9, w1
-; CHECK-NEXT: cneg w10, w10, le
+; CHECK-NEXT: cneg w10, w10, lt
; CHECK-NEXT: cneg w8, w8, le
; CHECK-NEXT: lsr w10, w10, #1
; CHECK-NEXT: madd w0, w10, w8, w9
@@ -83,7 +83,7 @@ define i32 @scalar_i32_signed_reg_mem(i32 %a1, ptr %a2_addr) nounwind {
; CHECK-NEXT: ldr w9, [x1]
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: subs w9, w0, w9
-; CHECK-NEXT: cneg w9, w9, le
+; CHECK-NEXT: cneg w9, w9, lt
; CHECK-NEXT: cneg w8, w8, le
; CHECK-NEXT: lsr w9, w9, #1
; CHECK-NEXT: madd w0, w9, w8, w0
@@ -107,7 +107,7 @@ define i32 @scalar_i32_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
; CHECK-NEXT: ldr w10, [x1]
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: subs w10, w9, w10
-; CHECK-NEXT: cneg w10, w10, le
+; CHECK-NEXT: cneg w10, w10, lt
; CHECK-NEXT: cneg w8, w8, le
; CHECK-NEXT: lsr w10, w10, #1
; CHECK-NEXT: madd w0, w10, w8, w9
@@ -136,7 +136,7 @@ define i64 @scalar_i64_signed_reg_reg(i64 %a1, i64 %a2) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: subs x9, x0, x1
; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: cneg x9, x9, le
+; CHECK-NEXT: cneg x9, x9, lt
; CHECK-NEXT: cneg x8, x8, le
; CHECK-NEXT: lsr x9, x9, #1
; CHECK-NEXT: madd x0, x9, x8, x0
@@ -157,7 +157,7 @@ define i64 @scalar_i64_unsigned_reg_reg(i64 %a1, i64 %a2) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: subs x9, x0, x1
; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: cneg x9, x9, ls
+; CHECK-NEXT: cneg x9, x9, lo
; CHECK-NEXT: cneg x8, x8, ls
; CHECK-NEXT: lsr x9, x9, #1
; CHECK-NEXT: madd x0, x9, x8, x0
@@ -181,7 +181,7 @@ define i64 @scalar_i64_signed_mem_reg(ptr %a1_addr, i64 %a2) nounwind {
; CHECK-NEXT: ldr x9, [x0]
; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff
; CHECK-NEXT: subs x10, x9, x1
-; CHECK-NEXT: cneg x10, x10, le
+; CHECK-NEXT: cneg x10, x10, lt
; CHECK-NEXT: cneg x8, x8, le
; CHECK-NEXT: lsr x10, x10, #1
; CHECK-NEXT: madd x0, x10, x8, x9
@@ -204,7 +204,7 @@ define i64 @scalar_i64_signed_reg_mem(i64 %a1, ptr %a2_addr) nounwind {
; CHECK-NEXT: ldr x9, [x1]
; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff
; CHECK-NEXT: subs x9, x0, x9
-; CHECK-NEXT: cneg x9, x9, le
+; CHECK-NEXT: cneg x9, x9, lt
; CHECK-NEXT: cneg x8, x8, le
; CHECK-NEXT: lsr x9, x9, #1
; CHECK-NEXT: madd x0, x9, x8, x0
@@ -228,7 +228,7 @@ define i64 @scalar_i64_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
; CHECK-NEXT: ldr x10, [x1]
; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff
; CHECK-NEXT: subs x10, x9, x10
-; CHECK-NEXT: cneg x10, x10, le
+; CHECK-NEXT: cneg x10, x10, lt
; CHECK-NEXT: cneg x8, x8, le
; CHECK-NEXT: lsr x10, x10, #1
; CHECK-NEXT: madd x0, x10, x8, x9
>From 9b10191201aafddfa8d395dca9be7883eab02843 Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Tue, 16 Sep 2025 15:44:22 -0400
Subject: [PATCH 5/5] One more
---
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 13 +++++++++++++
llvm/test/CodeGen/AArch64/abds-neg.ll | 3 +--
llvm/test/CodeGen/AArch64/abds.ll | 8 ++------
llvm/test/CodeGen/AArch64/abdu-neg.ll | 3 +--
llvm/test/CodeGen/AArch64/abdu.ll | 8 ++------
5 files changed, 19 insertions(+), 16 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 3538f8fdd6bc0..9dbcab630d5a5 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -7454,6 +7454,19 @@ SDValue AArch64TargetLowering::LowerABD(SDValue Op, SelectionDAG &DAG) const {
SDValue RHS = Op.getOperand(1);
SDLoc DL(Op);
+ if (!isa<ConstantSDNode>(RHS) || !isLegalCmpImmed(RHS->getAsAPIntVal())) {
+ SDValue TheLHS = isCMN(LHS, IsSigned ? ISD::SETGE : ISD::SETUGE, DAG)
+ ? LHS.getOperand(1)
+ : LHS;
+ SDValue TheRHS = isCMN(RHS, IsSigned ? ISD::SETGE : ISD::SETUGE, DAG)
+ ? RHS.getOperand(1)
+ : RHS;
+ if (getCmpOperandFoldingProfit(TheLHS) >
+ getCmpOperandFoldingProfit(TheRHS)) {
+ std::swap(LHS, RHS);
+ }
+ }
+
// If the subtract doesn't overflow then just use abs(sub())
bool IsNonNegative = DAG.SignBitIsZero(LHS) && DAG.SignBitIsZero(RHS);
diff --git a/llvm/test/CodeGen/AArch64/abds-neg.ll b/llvm/test/CodeGen/AArch64/abds-neg.ll
index 8512bf43815bf..11bfcb92593cb 100644
--- a/llvm/test/CodeGen/AArch64/abds-neg.ll
+++ b/llvm/test/CodeGen/AArch64/abds-neg.ll
@@ -72,8 +72,7 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i16_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxth w8, w0
-; CHECK-NEXT: subs w8, w8, w1
+; CHECK-NEXT: subs w8, w1, w0, sxth
; CHECK-NEXT: cneg w0, w8, ge
; CHECK-NEXT: ret
%aext = sext i16 %a to i64
diff --git a/llvm/test/CodeGen/AArch64/abds.ll b/llvm/test/CodeGen/AArch64/abds.ll
index 281eccb3f3459..42618ac0cf68d 100644
--- a/llvm/test/CodeGen/AArch64/abds.ll
+++ b/llvm/test/CodeGen/AArch64/abds.ll
@@ -68,8 +68,7 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i16_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxth w8, w0
-; CHECK-NEXT: subs w8, w8, w1
+; CHECK-NEXT: subs w8, w1, w0, sxth
; CHECK-NEXT: cneg w0, w8, lt
; CHECK-NEXT: ret
%aext = sext i16 %a to i64
@@ -498,10 +497,7 @@ define i64 @vector_legalized(i16 %a, i16 %b) {
; CHECK: // %bb.0:
; CHECK-NEXT: sxth w8, w0
; CHECK-NEXT: subs w8, w8, w1, sxth
-; CHECK-NEXT: addp d0, v0.2d
-; CHECK-NEXT: cneg w8, w8, mi
-; CHECK-NEXT: fmov x9, d0
-; CHECK-NEXT: add x0, x9, x8
+; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%ea = sext i16 %a to i32
%eb = sext i16 %b to i32
diff --git a/llvm/test/CodeGen/AArch64/abdu-neg.ll b/llvm/test/CodeGen/AArch64/abdu-neg.ll
index 92867408ba502..8da576121d881 100644
--- a/llvm/test/CodeGen/AArch64/abdu-neg.ll
+++ b/llvm/test/CodeGen/AArch64/abdu-neg.ll
@@ -72,8 +72,7 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i16_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: subs w8, w8, w1
+; CHECK-NEXT: subs w8, w1, w0, uxth
; CHECK-NEXT: cneg w0, w8, hs
; CHECK-NEXT: ret
%aext = zext i16 %a to i64
diff --git a/llvm/test/CodeGen/AArch64/abdu.ll b/llvm/test/CodeGen/AArch64/abdu.ll
index fee82cc5f42d3..e7e38b6cd5d99 100644
--- a/llvm/test/CodeGen/AArch64/abdu.ll
+++ b/llvm/test/CodeGen/AArch64/abdu.ll
@@ -68,8 +68,7 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i16_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: subs w8, w8, w1
+; CHECK-NEXT: subs w8, w1, w0, uxth
; CHECK-NEXT: cneg w0, w8, lo
; CHECK-NEXT: ret
%aext = zext i16 %a to i64
@@ -363,10 +362,7 @@ define i64 @vector_legalized(i16 %a, i16 %b) {
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xffff
; CHECK-NEXT: subs w8, w8, w1, uxth
-; CHECK-NEXT: cneg w8, w8, mi
-; CHECK-NEXT: addp d0, v0.2d
-; CHECK-NEXT: fmov x9, d0
-; CHECK-NEXT: add x0, x9, x8
+; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%ea = zext i16 %a to i32
%eb = zext i16 %b to i32
More information about the llvm-commits
mailing list