[llvm] 4fdf07f - [AArch64] Use CNEG for absolute difference patterns. (#151177)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 30 04:29:16 PDT 2025
Author: Ricardo Jesus
Date: 2025-07-30T12:29:13+01:00
New Revision: 4fdf07fd46f250804bc1ce5f9193a3ed990e308b
URL: https://github.com/llvm/llvm-project/commit/4fdf07fd46f250804bc1ce5f9193a3ed990e308b
DIFF: https://github.com/llvm/llvm-project/commit/4fdf07fd46f250804bc1ce5f9193a3ed990e308b.diff
LOG: [AArch64] Use CNEG for absolute difference patterns. (#151177)
The current code generated for absolute difference patterns
(a > b ? a - b : b - a) typically consists of sequences of:
```
sub w8, w1, w0
subs w9, w0, w1
csel w0, w9, w8, hi
```
The first sub is redundant if the csel is replaced by a cneg:
```
subs w8, w0, w1
cneg w0, w8, ls
```
This is achieved by canonicalising
```
select_cc lhs, rhs, sub(lhs, rhs), sub(rhs, lhs), cc ->
select_cc lhs, rhs, sub(lhs, rhs), neg(sub(lhs, rhs)), cc
select_cc lhs, rhs, sub(rhs, lhs), sub(lhs, rhs), cc ->
select_cc lhs, rhs, neg(sub(lhs, rhs)), sub(lhs, rhs), cc
```
as the second forms can already be matched.
This helps with some of the patterns in #118413.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/abds-neg.ll
llvm/test/CodeGen/AArch64/abds.ll
llvm/test/CodeGen/AArch64/abdu-neg.ll
llvm/test/CodeGen/AArch64/abdu.ll
llvm/test/CodeGen/AArch64/midpoint-int.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 77c0773d28945..11ce56b32c695 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -11386,6 +11386,22 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(
return DAG.getNode(ISD::AND, DL, VT, LHS, Shift);
}
+ // Canonicalise absolute
diff erence patterns:
+ // select_cc lhs, rhs, sub(lhs, rhs), sub(rhs, lhs), cc ->
+ // select_cc lhs, rhs, sub(lhs, rhs), neg(sub(lhs, rhs)), cc
+ //
+ // select_cc lhs, rhs, sub(rhs, lhs), sub(lhs, rhs), cc ->
+ // select_cc lhs, rhs, neg(sub(lhs, rhs)), sub(lhs, rhs), cc
+ // The second forms can be matched into subs+cneg.
+ if (TVal.getOpcode() == ISD::SUB && FVal.getOpcode() == ISD::SUB) {
+ if (TVal.getOperand(0) == LHS && TVal.getOperand(1) == RHS &&
+ FVal.getOperand(0) == RHS && FVal.getOperand(1) == LHS)
+ FVal = DAG.getNegative(TVal, DL, TVal.getValueType());
+ else if (TVal.getOperand(0) == RHS && TVal.getOperand(1) == LHS &&
+ FVal.getOperand(0) == LHS && FVal.getOperand(1) == RHS)
+ TVal = DAG.getNegative(FVal, DL, FVal.getValueType());
+ }
+
unsigned Opcode = AArch64ISD::CSEL;
// If both the TVal and the FVal are constants, see if we can swap them in
diff --git a/llvm/test/CodeGen/AArch64/abds-neg.ll b/llvm/test/CodeGen/AArch64/abds-neg.ll
index 432ffc30eec5e..75247823ee793 100644
--- a/llvm/test/CodeGen/AArch64/abds-neg.ll
+++ b/llvm/test/CodeGen/AArch64/abds-neg.ll
@@ -77,10 +77,8 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i16_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: sxth w8, w0
-; CHECK-NEXT: sub w9, w1, w8
-; CHECK-NEXT: subs w8, w8, w1
-; CHECK-NEXT: csel w8, w8, w9, gt
-; CHECK-NEXT: neg w0, w8
+; CHECK-NEXT: subs w8, w1, w8
+; CHECK-NEXT: cneg w0, w8, ge
; CHECK-NEXT: ret
%aext = sext i16 %a to i64
%bext = sext i32 %b to i64
@@ -111,10 +109,8 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub w8, w1, w0
-; CHECK-NEXT: subs w9, w0, w1
-; CHECK-NEXT: csel w8, w9, w8, gt
-; CHECK-NEXT: neg w0, w8
+; CHECK-NEXT: subs w8, w1, w0
+; CHECK-NEXT: cneg w0, w8, ge
; CHECK-NEXT: ret
%aext = sext i32 %a to i64
%bext = sext i32 %b to i64
@@ -129,10 +125,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i32_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: sxth w8, w1
-; CHECK-NEXT: sub w9, w8, w0
-; CHECK-NEXT: subs w8, w0, w8
-; CHECK-NEXT: csel w8, w8, w9, gt
-; CHECK-NEXT: neg w0, w8
+; CHECK-NEXT: subs w8, w8, w0
+; CHECK-NEXT: cneg w0, w8, ge
; CHECK-NEXT: ret
%aext = sext i32 %a to i64
%bext = sext i16 %b to i64
@@ -146,10 +140,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i32_undef:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub w8, w1, w0
-; CHECK-NEXT: subs w9, w0, w1
-; CHECK-NEXT: csel w8, w9, w8, gt
-; CHECK-NEXT: neg w0, w8
+; CHECK-NEXT: subs w8, w1, w0
+; CHECK-NEXT: cneg w0, w8, ge
; CHECK-NEXT: ret
%aext = sext i32 %a to i64
%bext = sext i32 %b to i64
@@ -163,10 +155,8 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_ext_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub x8, x1, x0
-; CHECK-NEXT: subs x9, x0, x1
-; CHECK-NEXT: csel x8, x9, x8, gt
-; CHECK-NEXT: neg x0, x8
+; CHECK-NEXT: subs x8, x1, x0
+; CHECK-NEXT: cneg x0, x8, ge
; CHECK-NEXT: ret
%aext = sext i64 %a to i128
%bext = sext i64 %b to i128
@@ -180,10 +170,8 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_ext_i64_undef:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub x8, x1, x0
-; CHECK-NEXT: subs x9, x0, x1
-; CHECK-NEXT: csel x8, x9, x8, gt
-; CHECK-NEXT: neg x0, x8
+; CHECK-NEXT: subs x8, x1, x0
+; CHECK-NEXT: cneg x0, x8, ge
; CHECK-NEXT: ret
%aext = sext i64 %a to i128
%bext = sext i64 %b to i128
@@ -359,9 +347,8 @@ define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind {
define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_cmp_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub w8, w1, w0
-; CHECK-NEXT: subs w9, w0, w1
-; CHECK-NEXT: csel w0, w8, w9, ge
+; CHECK-NEXT: subs w8, w0, w1
+; CHECK-NEXT: cneg w0, w8, ge
; CHECK-NEXT: ret
%cmp = icmp sge i32 %a, %b
%ab = sub i32 %a, %b
@@ -373,9 +360,8 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_cmp_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub x8, x1, x0
-; CHECK-NEXT: subs x9, x0, x1
-; CHECK-NEXT: csel x0, x9, x8, lt
+; CHECK-NEXT: subs x8, x0, x1
+; CHECK-NEXT: cneg x0, x8, ge
; CHECK-NEXT: ret
%cmp = icmp slt i64 %a, %b
%ab = sub i64 %a, %b
diff --git a/llvm/test/CodeGen/AArch64/abds.ll b/llvm/test/CodeGen/AArch64/abds.ll
index ed1e6077948ee..bbdb116851710 100644
--- a/llvm/test/CodeGen/AArch64/abds.ll
+++ b/llvm/test/CodeGen/AArch64/abds.ll
@@ -73,9 +73,8 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i16_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: sxth w8, w0
-; CHECK-NEXT: sub w9, w1, w8
; CHECK-NEXT: subs w8, w8, w1
-; CHECK-NEXT: csel w0, w8, w9, gt
+; CHECK-NEXT: cneg w0, w8, le
; CHECK-NEXT: ret
%aext = sext i16 %a to i64
%bext = sext i32 %b to i64
@@ -104,9 +103,8 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub w8, w1, w0
-; CHECK-NEXT: subs w9, w0, w1
-; CHECK-NEXT: csel w0, w9, w8, gt
+; CHECK-NEXT: subs w8, w0, w1
+; CHECK-NEXT: cneg w0, w8, le
; CHECK-NEXT: ret
%aext = sext i32 %a to i64
%bext = sext i32 %b to i64
@@ -120,9 +118,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i32_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: sxth w8, w1
-; CHECK-NEXT: sub w9, w8, w0
; CHECK-NEXT: subs w8, w0, w8
-; CHECK-NEXT: csel w0, w8, w9, gt
+; CHECK-NEXT: cneg w0, w8, le
; CHECK-NEXT: ret
%aext = sext i32 %a to i64
%bext = sext i16 %b to i64
@@ -135,9 +132,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i32_undef:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub w8, w1, w0
-; CHECK-NEXT: subs w9, w0, w1
-; CHECK-NEXT: csel w0, w9, w8, gt
+; CHECK-NEXT: subs w8, w0, w1
+; CHECK-NEXT: cneg w0, w8, le
; CHECK-NEXT: ret
%aext = sext i32 %a to i64
%bext = sext i32 %b to i64
@@ -150,9 +146,8 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_ext_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub x8, x1, x0
-; CHECK-NEXT: subs x9, x0, x1
-; CHECK-NEXT: csel x0, x9, x8, gt
+; CHECK-NEXT: subs x8, x0, x1
+; CHECK-NEXT: cneg x0, x8, le
; CHECK-NEXT: ret
%aext = sext i64 %a to i128
%bext = sext i64 %b to i128
@@ -165,9 +160,8 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_ext_i64_undef:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub x8, x1, x0
-; CHECK-NEXT: subs x9, x0, x1
-; CHECK-NEXT: csel x0, x9, x8, gt
+; CHECK-NEXT: subs x8, x0, x1
+; CHECK-NEXT: cneg x0, x8, le
; CHECK-NEXT: ret
%aext = sext i64 %a to i128
%bext = sext i64 %b to i128
@@ -248,9 +242,8 @@ define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_minmax_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub w8, w1, w0
-; CHECK-NEXT: subs w9, w0, w1
-; CHECK-NEXT: csel w0, w9, w8, gt
+; CHECK-NEXT: subs w8, w0, w1
+; CHECK-NEXT: cneg w0, w8, le
; CHECK-NEXT: ret
%min = call i32 @llvm.smin.i32(i32 %a, i32 %b)
%max = call i32 @llvm.smax.i32(i32 %a, i32 %b)
@@ -261,9 +254,8 @@ define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind {
define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_minmax_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub x8, x1, x0
-; CHECK-NEXT: subs x9, x0, x1
-; CHECK-NEXT: csel x0, x9, x8, gt
+; CHECK-NEXT: subs x8, x0, x1
+; CHECK-NEXT: cneg x0, x8, le
; CHECK-NEXT: ret
%min = call i64 @llvm.smin.i64(i64 %a, i64 %b)
%max = call i64 @llvm.smax.i64(i64 %a, i64 %b)
@@ -324,9 +316,8 @@ define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind {
define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_cmp_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub w8, w1, w0
-; CHECK-NEXT: subs w9, w0, w1
-; CHECK-NEXT: csel w0, w9, w8, gt
+; CHECK-NEXT: subs w8, w0, w1
+; CHECK-NEXT: cneg w0, w8, le
; CHECK-NEXT: ret
%cmp = icmp slt i32 %a, %b
%ab = sub i32 %a, %b
@@ -338,9 +329,8 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_cmp_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub x8, x1, x0
-; CHECK-NEXT: subs x9, x0, x1
-; CHECK-NEXT: csel x0, x9, x8, gt
+; CHECK-NEXT: subs x8, x0, x1
+; CHECK-NEXT: cneg x0, x8, le
; CHECK-NEXT: ret
%cmp = icmp sge i64 %a, %b
%ab = sub i64 %a, %b
@@ -572,9 +562,8 @@ define i16 @abd_select_i16(i16 %a, i16 %b) nounwind {
define i32 @abd_select_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_select_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub w8, w1, w0
-; CHECK-NEXT: subs w9, w0, w1
-; CHECK-NEXT: csel w0, w9, w8, gt
+; CHECK-NEXT: subs w8, w0, w1
+; CHECK-NEXT: cneg w0, w8, le
; CHECK-NEXT: ret
%cmp = icmp sgt i32 %a, %b
%ab = select i1 %cmp, i32 %a, i32 %b
@@ -586,9 +575,8 @@ define i32 @abd_select_i32(i32 %a, i32 %b) nounwind {
define i64 @abd_select_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_select_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub x8, x1, x0
-; CHECK-NEXT: subs x9, x0, x1
-; CHECK-NEXT: csel x0, x9, x8, gt
+; CHECK-NEXT: subs x8, x0, x1
+; CHECK-NEXT: cneg x0, x8, le
; CHECK-NEXT: ret
%cmp = icmp sge i64 %a, %b
%ab = select i1 %cmp, i64 %a, i64 %b
diff --git a/llvm/test/CodeGen/AArch64/abdu-neg.ll b/llvm/test/CodeGen/AArch64/abdu-neg.ll
index 8fb106e92866e..d07f099a536ab 100644
--- a/llvm/test/CodeGen/AArch64/abdu-neg.ll
+++ b/llvm/test/CodeGen/AArch64/abdu-neg.ll
@@ -77,10 +77,8 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i16_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: sub w9, w1, w8
-; CHECK-NEXT: subs w8, w8, w1
-; CHECK-NEXT: csel w8, w8, w9, hi
-; CHECK-NEXT: neg w0, w8
+; CHECK-NEXT: subs w8, w1, w8
+; CHECK-NEXT: cneg w0, w8, hs
; CHECK-NEXT: ret
%aext = zext i16 %a to i64
%bext = zext i32 %b to i64
@@ -111,10 +109,8 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub w8, w1, w0
-; CHECK-NEXT: subs w9, w0, w1
-; CHECK-NEXT: csel w8, w9, w8, hi
-; CHECK-NEXT: neg w0, w8
+; CHECK-NEXT: subs w8, w1, w0
+; CHECK-NEXT: cneg w0, w8, hs
; CHECK-NEXT: ret
%aext = zext i32 %a to i64
%bext = zext i32 %b to i64
@@ -129,10 +125,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i32_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w1, #0xffff
-; CHECK-NEXT: sub w9, w8, w0
-; CHECK-NEXT: subs w8, w0, w8
-; CHECK-NEXT: csel w8, w8, w9, hi
-; CHECK-NEXT: neg w0, w8
+; CHECK-NEXT: subs w8, w8, w0
+; CHECK-NEXT: cneg w0, w8, hs
; CHECK-NEXT: ret
%aext = zext i32 %a to i64
%bext = zext i16 %b to i64
@@ -146,10 +140,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i32_undef:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub w8, w1, w0
-; CHECK-NEXT: subs w9, w0, w1
-; CHECK-NEXT: csel w8, w9, w8, hi
-; CHECK-NEXT: neg w0, w8
+; CHECK-NEXT: subs w8, w1, w0
+; CHECK-NEXT: cneg w0, w8, hs
; CHECK-NEXT: ret
%aext = zext i32 %a to i64
%bext = zext i32 %b to i64
@@ -163,10 +155,8 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_ext_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub x8, x1, x0
-; CHECK-NEXT: subs x9, x0, x1
-; CHECK-NEXT: csel x8, x9, x8, hi
-; CHECK-NEXT: neg x0, x8
+; CHECK-NEXT: subs x8, x1, x0
+; CHECK-NEXT: cneg x0, x8, hs
; CHECK-NEXT: ret
%aext = zext i64 %a to i128
%bext = zext i64 %b to i128
@@ -180,10 +170,8 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_ext_i64_undef:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub x8, x1, x0
-; CHECK-NEXT: subs x9, x0, x1
-; CHECK-NEXT: csel x8, x9, x8, hi
-; CHECK-NEXT: neg x0, x8
+; CHECK-NEXT: subs x8, x1, x0
+; CHECK-NEXT: cneg x0, x8, hs
; CHECK-NEXT: ret
%aext = zext i64 %a to i128
%bext = zext i64 %b to i128
@@ -363,9 +351,8 @@ define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind {
define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_cmp_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub w8, w1, w0
-; CHECK-NEXT: subs w9, w0, w1
-; CHECK-NEXT: csel w0, w8, w9, hs
+; CHECK-NEXT: subs w8, w0, w1
+; CHECK-NEXT: cneg w0, w8, hs
; CHECK-NEXT: ret
%cmp = icmp uge i32 %a, %b
%ab = sub i32 %a, %b
@@ -377,9 +364,8 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_cmp_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub x8, x1, x0
-; CHECK-NEXT: subs x9, x0, x1
-; CHECK-NEXT: csel x0, x9, x8, lo
+; CHECK-NEXT: subs x8, x0, x1
+; CHECK-NEXT: cneg x0, x8, hs
; CHECK-NEXT: ret
%cmp = icmp ult i64 %a, %b
%ab = sub i64 %a, %b
diff --git a/llvm/test/CodeGen/AArch64/abdu.ll b/llvm/test/CodeGen/AArch64/abdu.ll
index 4585de96c848f..1045ee20dc734 100644
--- a/llvm/test/CodeGen/AArch64/abdu.ll
+++ b/llvm/test/CodeGen/AArch64/abdu.ll
@@ -73,9 +73,8 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i16_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: sub w9, w1, w8
; CHECK-NEXT: subs w8, w8, w1
-; CHECK-NEXT: csel w0, w8, w9, hi
+; CHECK-NEXT: cneg w0, w8, ls
; CHECK-NEXT: ret
%aext = zext i16 %a to i64
%bext = zext i32 %b to i64
@@ -104,9 +103,8 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub w8, w1, w0
-; CHECK-NEXT: subs w9, w0, w1
-; CHECK-NEXT: csel w0, w9, w8, hi
+; CHECK-NEXT: subs w8, w0, w1
+; CHECK-NEXT: cneg w0, w8, ls
; CHECK-NEXT: ret
%aext = zext i32 %a to i64
%bext = zext i32 %b to i64
@@ -120,9 +118,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i32_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w1, #0xffff
-; CHECK-NEXT: sub w9, w8, w0
; CHECK-NEXT: subs w8, w0, w8
-; CHECK-NEXT: csel w0, w8, w9, hi
+; CHECK-NEXT: cneg w0, w8, ls
; CHECK-NEXT: ret
%aext = zext i32 %a to i64
%bext = zext i16 %b to i64
@@ -135,9 +132,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i32_undef:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub w8, w1, w0
-; CHECK-NEXT: subs w9, w0, w1
-; CHECK-NEXT: csel w0, w9, w8, hi
+; CHECK-NEXT: subs w8, w0, w1
+; CHECK-NEXT: cneg w0, w8, ls
; CHECK-NEXT: ret
%aext = zext i32 %a to i64
%bext = zext i32 %b to i64
@@ -150,9 +146,8 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_ext_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub x8, x1, x0
-; CHECK-NEXT: subs x9, x0, x1
-; CHECK-NEXT: csel x0, x9, x8, hi
+; CHECK-NEXT: subs x8, x0, x1
+; CHECK-NEXT: cneg x0, x8, ls
; CHECK-NEXT: ret
%aext = zext i64 %a to i128
%bext = zext i64 %b to i128
@@ -165,9 +160,8 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_ext_i64_undef:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub x8, x1, x0
-; CHECK-NEXT: subs x9, x0, x1
-; CHECK-NEXT: csel x0, x9, x8, hi
+; CHECK-NEXT: subs x8, x0, x1
+; CHECK-NEXT: cneg x0, x8, ls
; CHECK-NEXT: ret
%aext = zext i64 %a to i128
%bext = zext i64 %b to i128
@@ -252,9 +246,8 @@ define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_minmax_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub w8, w1, w0
-; CHECK-NEXT: subs w9, w0, w1
-; CHECK-NEXT: csel w0, w9, w8, hi
+; CHECK-NEXT: subs w8, w0, w1
+; CHECK-NEXT: cneg w0, w8, ls
; CHECK-NEXT: ret
%min = call i32 @llvm.umin.i32(i32 %a, i32 %b)
%max = call i32 @llvm.umax.i32(i32 %a, i32 %b)
@@ -265,9 +258,8 @@ define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind {
define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_minmax_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub x8, x1, x0
-; CHECK-NEXT: subs x9, x0, x1
-; CHECK-NEXT: csel x0, x9, x8, hi
+; CHECK-NEXT: subs x8, x0, x1
+; CHECK-NEXT: cneg x0, x8, ls
; CHECK-NEXT: ret
%min = call i64 @llvm.umin.i64(i64 %a, i64 %b)
%max = call i64 @llvm.umax.i64(i64 %a, i64 %b)
@@ -330,9 +322,8 @@ define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind {
define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_cmp_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub w8, w1, w0
-; CHECK-NEXT: subs w9, w0, w1
-; CHECK-NEXT: csel w0, w9, w8, hi
+; CHECK-NEXT: subs w8, w0, w1
+; CHECK-NEXT: cneg w0, w8, ls
; CHECK-NEXT: ret
%cmp = icmp ult i32 %a, %b
%ab = sub i32 %a, %b
@@ -344,9 +335,8 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_cmp_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub x8, x1, x0
-; CHECK-NEXT: subs x9, x0, x1
-; CHECK-NEXT: csel x0, x9, x8, hi
+; CHECK-NEXT: subs x8, x0, x1
+; CHECK-NEXT: cneg x0, x8, ls
; CHECK-NEXT: ret
%cmp = icmp uge i64 %a, %b
%ab = sub i64 %a, %b
@@ -437,9 +427,8 @@ define i16 @abd_select_i16(i16 %a, i16 %b) nounwind {
define i32 @abd_select_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_select_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub w8, w1, w0
-; CHECK-NEXT: subs w9, w0, w1
-; CHECK-NEXT: csel w0, w9, w8, hi
+; CHECK-NEXT: subs w8, w0, w1
+; CHECK-NEXT: cneg w0, w8, ls
; CHECK-NEXT: ret
%cmp = icmp ugt i32 %a, %b
%ab = select i1 %cmp, i32 %a, i32 %b
@@ -451,9 +440,8 @@ define i32 @abd_select_i32(i32 %a, i32 %b) nounwind {
define i64 @abd_select_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_select_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub x8, x1, x0
-; CHECK-NEXT: subs x9, x0, x1
-; CHECK-NEXT: csel x0, x9, x8, hi
+; CHECK-NEXT: subs x8, x0, x1
+; CHECK-NEXT: cneg x0, x8, ls
; CHECK-NEXT: ret
%cmp = icmp uge i64 %a, %b
%ab = select i1 %cmp, i64 %a, i64 %b
diff --git a/llvm/test/CodeGen/AArch64/midpoint-int.ll b/llvm/test/CodeGen/AArch64/midpoint-int.ll
index bbdce7c6e933b..15c1dffae749e 100644
--- a/llvm/test/CodeGen/AArch64/midpoint-int.ll
+++ b/llvm/test/CodeGen/AArch64/midpoint-int.ll
@@ -13,10 +13,9 @@
define i32 @scalar_i32_signed_reg_reg(i32 %a1, i32 %a2) nounwind {
; CHECK-LABEL: scalar_i32_signed_reg_reg:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub w9, w1, w0
-; CHECK-NEXT: subs w10, w0, w1
+; CHECK-NEXT: subs w9, w0, w1
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: csel w9, w10, w9, gt
+; CHECK-NEXT: cneg w9, w9, le
; CHECK-NEXT: cneg w8, w8, le
; CHECK-NEXT: lsr w9, w9, #1
; CHECK-NEXT: madd w0, w9, w8, w0
@@ -35,10 +34,9 @@ define i32 @scalar_i32_signed_reg_reg(i32 %a1, i32 %a2) nounwind {
define i32 @scalar_i32_unsigned_reg_reg(i32 %a1, i32 %a2) nounwind {
; CHECK-LABEL: scalar_i32_unsigned_reg_reg:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub w9, w1, w0
-; CHECK-NEXT: subs w10, w0, w1
+; CHECK-NEXT: subs w9, w0, w1
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: csel w9, w10, w9, hi
+; CHECK-NEXT: cneg w9, w9, ls
; CHECK-NEXT: cneg w8, w8, ls
; CHECK-NEXT: lsr w9, w9, #1
; CHECK-NEXT: madd w0, w9, w8, w0
@@ -61,11 +59,9 @@ define i32 @scalar_i32_signed_mem_reg(ptr %a1_addr, i32 %a2) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: ldr w9, [x0]
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cmp w9, w1
-; CHECK-NEXT: sub w10, w1, w9
+; CHECK-NEXT: subs w10, w9, w1
+; CHECK-NEXT: cneg w10, w10, le
; CHECK-NEXT: cneg w8, w8, le
-; CHECK-NEXT: subs w11, w9, w1
-; CHECK-NEXT: csel w10, w11, w10, gt
; CHECK-NEXT: lsr w10, w10, #1
; CHECK-NEXT: madd w0, w10, w8, w9
; CHECK-NEXT: ret
@@ -86,11 +82,9 @@ define i32 @scalar_i32_signed_reg_mem(i32 %a1, ptr %a2_addr) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: ldr w9, [x1]
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cmp w0, w9
-; CHECK-NEXT: sub w10, w9, w0
-; CHECK-NEXT: cneg w8, w8, le
; CHECK-NEXT: subs w9, w0, w9
-; CHECK-NEXT: csel w9, w9, w10, gt
+; CHECK-NEXT: cneg w9, w9, le
+; CHECK-NEXT: cneg w8, w8, le
; CHECK-NEXT: lsr w9, w9, #1
; CHECK-NEXT: madd w0, w9, w8, w0
; CHECK-NEXT: ret
@@ -112,11 +106,9 @@ define i32 @scalar_i32_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
; CHECK-NEXT: ldr w9, [x0]
; CHECK-NEXT: ldr w10, [x1]
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cmp w9, w10
-; CHECK-NEXT: sub w11, w10, w9
-; CHECK-NEXT: cneg w8, w8, le
; CHECK-NEXT: subs w10, w9, w10
-; CHECK-NEXT: csel w10, w10, w11, gt
+; CHECK-NEXT: cneg w10, w10, le
+; CHECK-NEXT: cneg w8, w8, le
; CHECK-NEXT: lsr w10, w10, #1
; CHECK-NEXT: madd w0, w10, w8, w9
; CHECK-NEXT: ret
@@ -142,10 +134,9 @@ define i32 @scalar_i32_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
define i64 @scalar_i64_signed_reg_reg(i64 %a1, i64 %a2) nounwind {
; CHECK-LABEL: scalar_i64_signed_reg_reg:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub x9, x1, x0
-; CHECK-NEXT: subs x10, x0, x1
+; CHECK-NEXT: subs x9, x0, x1
; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: csel x9, x10, x9, gt
+; CHECK-NEXT: cneg x9, x9, le
; CHECK-NEXT: cneg x8, x8, le
; CHECK-NEXT: lsr x9, x9, #1
; CHECK-NEXT: madd x0, x9, x8, x0
@@ -164,10 +155,9 @@ define i64 @scalar_i64_signed_reg_reg(i64 %a1, i64 %a2) nounwind {
define i64 @scalar_i64_unsigned_reg_reg(i64 %a1, i64 %a2) nounwind {
; CHECK-LABEL: scalar_i64_unsigned_reg_reg:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub x9, x1, x0
-; CHECK-NEXT: subs x10, x0, x1
+; CHECK-NEXT: subs x9, x0, x1
; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: csel x9, x10, x9, hi
+; CHECK-NEXT: cneg x9, x9, ls
; CHECK-NEXT: cneg x8, x8, ls
; CHECK-NEXT: lsr x9, x9, #1
; CHECK-NEXT: madd x0, x9, x8, x0
@@ -190,11 +180,9 @@ define i64 @scalar_i64_signed_mem_reg(ptr %a1_addr, i64 %a2) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: ldr x9, [x0]
; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: cmp x9, x1
-; CHECK-NEXT: sub x10, x1, x9
+; CHECK-NEXT: subs x10, x9, x1
+; CHECK-NEXT: cneg x10, x10, le
; CHECK-NEXT: cneg x8, x8, le
-; CHECK-NEXT: subs x11, x9, x1
-; CHECK-NEXT: csel x10, x11, x10, gt
; CHECK-NEXT: lsr x10, x10, #1
; CHECK-NEXT: madd x0, x10, x8, x9
; CHECK-NEXT: ret
@@ -215,11 +203,9 @@ define i64 @scalar_i64_signed_reg_mem(i64 %a1, ptr %a2_addr) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: ldr x9, [x1]
; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: cmp x0, x9
-; CHECK-NEXT: sub x10, x9, x0
-; CHECK-NEXT: cneg x8, x8, le
; CHECK-NEXT: subs x9, x0, x9
-; CHECK-NEXT: csel x9, x9, x10, gt
+; CHECK-NEXT: cneg x9, x9, le
+; CHECK-NEXT: cneg x8, x8, le
; CHECK-NEXT: lsr x9, x9, #1
; CHECK-NEXT: madd x0, x9, x8, x0
; CHECK-NEXT: ret
@@ -241,11 +227,9 @@ define i64 @scalar_i64_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
; CHECK-NEXT: ldr x9, [x0]
; CHECK-NEXT: ldr x10, [x1]
; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: cmp x9, x10
-; CHECK-NEXT: sub x11, x10, x9
-; CHECK-NEXT: cneg x8, x8, le
; CHECK-NEXT: subs x10, x9, x10
-; CHECK-NEXT: csel x10, x10, x11, gt
+; CHECK-NEXT: cneg x10, x10, le
+; CHECK-NEXT: cneg x8, x8, le
; CHECK-NEXT: lsr x10, x10, #1
; CHECK-NEXT: madd x0, x10, x8, x9
; CHECK-NEXT: ret
More information about the llvm-commits
mailing list