[llvm] [AArch64] Improve lowering of scalar abs(sub(a, b)). (PR #151180)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 29 09:09:43 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Ricardo Jesus (rj-jesus)
<details>
<summary>Changes</summary>
This patch avoids a comparison against zero when lowering abs(sub(a, b)) patterns, instead reusing the condition codes generated by a subs of the operands directly.
For example, currently:
```
sxtb w8, w0
sub w8, w8, w1, sxtb
cmp w8, #<!-- -->0
cneg w0, w8, pl
```
becomes:
```
sxtb w8, w0
sxtb w9, w1
subs w8, w9, w8
cneg w0, w8, gt
```
Whilst this doesn't decrease the number of instructions, the new version should expose more ILP and use ``cheaper'' instructions having lower latency and/or higher throughput.
This patch also includes a somewhat orthogonal change in `performNegCSelCombine`, which I included in this patch to avoid a code generation regression in `CodeGen/AArch64/abd[su]-neg.ll` due to the combine negating the operands of the csel, leading to an extra sub instruction. If preferable, I can open a separate PR for this.
Together with #<!-- -->151177, this should handle the remaining patterns in #<!-- -->118413 (although I believe now some of the `tgt` cases in the issue should generate slightly worse code than the `src` cases, presumably because the former are not combined as abd[us] nodes, but I believe these can be taken on separately).
---
Full diff: https://github.com/llvm/llvm-project/pull/151180.diff
5 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+24-9)
- (modified) llvm/test/CodeGen/AArch64/abds-neg.ll (+15-15)
- (modified) llvm/test/CodeGen/AArch64/abds.ll (+37-37)
- (modified) llvm/test/CodeGen/AArch64/abdu-neg.ll (+15-15)
- (modified) llvm/test/CodeGen/AArch64/abdu.ll (+37-37)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 7b49754ee7e1f..8b515fb649ee7 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -7118,12 +7118,21 @@ SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABS_MERGE_PASSTHRU);
SDLoc DL(Op);
- SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
- Op.getOperand(0));
- // Generate SUBS & CSEL.
- SDValue Cmp = DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, FlagsVT),
- Op.getOperand(0), DAG.getConstant(0, DL, VT));
- return DAG.getNode(AArch64ISD::CSEL, DL, VT, Op.getOperand(0), Neg,
+ SDValue Val = Op.getOperand(0);
+ SDValue Neg = DAG.getNegative(Val, DL, VT);
+ SDValue Cmp;
+
+ // For abs(sub(lhs, rhs)), we can compare lhs and rhs directly. This allows
+ // reusing the subs operation for the calculation and comparison.
+ if (Val.getOpcode() == ISD::SUB)
+ Cmp = DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, FlagsVT),
+ Val.getOperand(0), Val.getOperand(1));
+ else
+ // Otherwise, compare with zero.
+ Cmp = DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, FlagsVT), Val,
+ DAG.getConstant(0, DL, VT));
+
+ return DAG.getNode(AArch64ISD::CSEL, DL, VT, Val, Neg,
DAG.getConstant(AArch64CC::PL, DL, MVT::i32),
Cmp.getValue(1));
}
@@ -20835,11 +20844,17 @@ static SDValue performNegCSelCombine(SDNode *N, SelectionDAG &DAG) {
if (!isNegatedInteger(N0) && !isNegatedInteger(N1))
return SDValue();
- SDValue N0N = getNegatedInteger(N0, DAG);
- SDValue N1N = getNegatedInteger(N1, DAG);
-
SDLoc DL(N);
EVT VT = CSel.getValueType();
+
+ // If the operands are negations of each other, reverse them.
+ if ((isNegatedInteger(N0) && N0.getOperand(1) == N1) ||
+ (isNegatedInteger(N1) && N1.getOperand(1) == N0))
+ return DAG.getNode(AArch64ISD::CSEL, DL, VT, N1, N0, CSel.getOperand(2),
+ CSel.getOperand(3));
+
+ SDValue N0N = getNegatedInteger(N0, DAG);
+ SDValue N1N = getNegatedInteger(N1, DAG);
return DAG.getNode(AArch64ISD::CSEL, DL, VT, N0N, N1N, CSel.getOperand(2),
CSel.getOperand(3));
}
diff --git a/llvm/test/CodeGen/AArch64/abds-neg.ll b/llvm/test/CodeGen/AArch64/abds-neg.ll
index 432ffc30eec5e..a7c27894e6125 100644
--- a/llvm/test/CodeGen/AArch64/abds-neg.ll
+++ b/llvm/test/CodeGen/AArch64/abds-neg.ll
@@ -8,9 +8,9 @@
define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_ext_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxtb w8, w0
-; CHECK-NEXT: sub w8, w8, w1, sxtb
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: sxtb w8, w1
+; CHECK-NEXT: sxtb w9, w0
+; CHECK-NEXT: subs w8, w9, w8
; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%aext = sext i8 %a to i64
@@ -25,9 +25,9 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i8_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxtb w8, w0
-; CHECK-NEXT: sub w8, w8, w1, sxth
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: sxth w8, w1
+; CHECK-NEXT: sxtb w9, w0
+; CHECK-NEXT: subs w8, w9, w8
; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%aext = sext i8 %a to i64
@@ -42,9 +42,9 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_ext_i8_undef:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxtb w8, w0
-; CHECK-NEXT: sub w8, w8, w1, sxtb
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: sxtb w8, w1
+; CHECK-NEXT: sxtb w9, w0
+; CHECK-NEXT: subs w8, w9, w8
; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%aext = sext i8 %a to i64
@@ -59,9 +59,9 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxth w8, w0
-; CHECK-NEXT: sub w8, w8, w1, sxth
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: sxth w8, w1
+; CHECK-NEXT: sxth w9, w0
+; CHECK-NEXT: subs w8, w9, w8
; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%aext = sext i16 %a to i64
@@ -94,9 +94,9 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i16_undef:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxth w8, w0
-; CHECK-NEXT: sub w8, w8, w1, sxth
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: sxth w8, w1
+; CHECK-NEXT: sxth w9, w0
+; CHECK-NEXT: subs w8, w9, w8
; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%aext = sext i16 %a to i64
diff --git a/llvm/test/CodeGen/AArch64/abds.ll b/llvm/test/CodeGen/AArch64/abds.ll
index ed1e6077948ee..3e1d8522aa9d8 100644
--- a/llvm/test/CodeGen/AArch64/abds.ll
+++ b/llvm/test/CodeGen/AArch64/abds.ll
@@ -8,9 +8,9 @@
define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_ext_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxtb w8, w0
-; CHECK-NEXT: sub w8, w8, w1, sxtb
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: sxtb w8, w1
+; CHECK-NEXT: sxtb w9, w0
+; CHECK-NEXT: subs w8, w9, w8
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%aext = sext i8 %a to i64
@@ -24,9 +24,9 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i8_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxtb w8, w0
-; CHECK-NEXT: sub w8, w8, w1, sxth
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: sxth w8, w1
+; CHECK-NEXT: sxtb w9, w0
+; CHECK-NEXT: subs w8, w9, w8
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%aext = sext i8 %a to i64
@@ -40,9 +40,9 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_ext_i8_undef:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxtb w8, w0
-; CHECK-NEXT: sub w8, w8, w1, sxtb
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: sxtb w8, w1
+; CHECK-NEXT: sxtb w9, w0
+; CHECK-NEXT: subs w8, w9, w8
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%aext = sext i8 %a to i64
@@ -56,9 +56,9 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxth w8, w0
-; CHECK-NEXT: sub w8, w8, w1, sxth
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: sxth w8, w1
+; CHECK-NEXT: sxth w9, w0
+; CHECK-NEXT: subs w8, w9, w8
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%aext = sext i16 %a to i64
@@ -88,9 +88,9 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i16_undef:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxth w8, w0
-; CHECK-NEXT: sub w8, w8, w1, sxth
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: sxth w8, w1
+; CHECK-NEXT: sxth w9, w0
+; CHECK-NEXT: subs w8, w9, w8
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%aext = sext i16 %a to i64
@@ -220,9 +220,9 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_minmax_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxtb w8, w0
-; CHECK-NEXT: sub w8, w8, w1, sxtb
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: sxtb w8, w1
+; CHECK-NEXT: sxtb w9, w0
+; CHECK-NEXT: subs w8, w9, w8
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%min = call i8 @llvm.smin.i8(i8 %a, i8 %b)
@@ -234,9 +234,9 @@ define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_minmax_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxth w8, w0
-; CHECK-NEXT: sub w8, w8, w1, sxth
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: sxth w8, w1
+; CHECK-NEXT: sxth w9, w0
+; CHECK-NEXT: subs w8, w9, w8
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%min = call i16 @llvm.smin.i16(i16 %a, i16 %b)
@@ -294,9 +294,9 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_cmp_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxtb w8, w0
-; CHECK-NEXT: sub w8, w8, w1, sxtb
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: sxtb w8, w1
+; CHECK-NEXT: sxtb w9, w0
+; CHECK-NEXT: subs w8, w9, w8
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%cmp = icmp sgt i8 %a, %b
@@ -309,9 +309,9 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_cmp_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxth w8, w0
-; CHECK-NEXT: sub w8, w8, w1, sxth
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: sxth w8, w1
+; CHECK-NEXT: sxth w9, w0
+; CHECK-NEXT: subs w8, w9, w8
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%cmp = icmp sge i16 %a, %b
@@ -517,11 +517,11 @@ define i64 @vector_legalized(i16 %a, i16 %b) {
; CHECK-LABEL: vector_legalized:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v0.2d, #0000000000000000
-; CHECK-NEXT: sxth w8, w0
-; CHECK-NEXT: sub w8, w8, w1, sxth
-; CHECK-NEXT: addp d0, v0.2d
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: sxth w8, w1
+; CHECK-NEXT: sxth w9, w0
+; CHECK-NEXT: subs w8, w9, w8
; CHECK-NEXT: cneg w8, w8, mi
+; CHECK-NEXT: addp d0, v0.2d
; CHECK-NEXT: fmov x9, d0
; CHECK-NEXT: add x0, x9, x8
; CHECK-NEXT: ret
@@ -542,9 +542,9 @@ define i64 @vector_legalized(i16 %a, i16 %b) {
define i8 @abd_select_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_select_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxtb w8, w0
-; CHECK-NEXT: sub w8, w8, w1, sxtb
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: sxtb w8, w1
+; CHECK-NEXT: sxtb w9, w0
+; CHECK-NEXT: subs w8, w9, w8
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%cmp = icmp slt i8 %a, %b
@@ -557,9 +557,9 @@ define i8 @abd_select_i8(i8 %a, i8 %b) nounwind {
define i16 @abd_select_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_select_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxth w8, w0
-; CHECK-NEXT: sub w8, w8, w1, sxth
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: sxth w8, w1
+; CHECK-NEXT: sxth w9, w0
+; CHECK-NEXT: subs w8, w9, w8
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%cmp = icmp sle i16 %a, %b
diff --git a/llvm/test/CodeGen/AArch64/abdu-neg.ll b/llvm/test/CodeGen/AArch64/abdu-neg.ll
index 8fb106e92866e..a83323650330b 100644
--- a/llvm/test/CodeGen/AArch64/abdu-neg.ll
+++ b/llvm/test/CodeGen/AArch64/abdu-neg.ll
@@ -8,9 +8,9 @@
define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_ext_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: sub w8, w8, w1, uxtb
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: and w8, w1, #0xff
+; CHECK-NEXT: and w9, w0, #0xff
+; CHECK-NEXT: subs w8, w9, w8
; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%aext = zext i8 %a to i64
@@ -25,9 +25,9 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i8_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: sub w8, w8, w1, uxth
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: and w8, w1, #0xffff
+; CHECK-NEXT: and w9, w0, #0xff
+; CHECK-NEXT: subs w8, w9, w8
; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%aext = zext i8 %a to i64
@@ -42,9 +42,9 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_ext_i8_undef:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: sub w8, w8, w1, uxtb
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: and w8, w1, #0xff
+; CHECK-NEXT: and w9, w0, #0xff
+; CHECK-NEXT: subs w8, w9, w8
; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%aext = zext i8 %a to i64
@@ -59,9 +59,9 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: sub w8, w8, w1, uxth
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: and w8, w1, #0xffff
+; CHECK-NEXT: and w9, w0, #0xffff
+; CHECK-NEXT: subs w8, w9, w8
; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%aext = zext i16 %a to i64
@@ -94,9 +94,9 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i16_undef:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: sub w8, w8, w1, uxth
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: and w8, w1, #0xffff
+; CHECK-NEXT: and w9, w0, #0xffff
+; CHECK-NEXT: subs w8, w9, w8
; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%aext = zext i16 %a to i64
diff --git a/llvm/test/CodeGen/AArch64/abdu.ll b/llvm/test/CodeGen/AArch64/abdu.ll
index 4585de96c848f..a058121236a5b 100644
--- a/llvm/test/CodeGen/AArch64/abdu.ll
+++ b/llvm/test/CodeGen/AArch64/abdu.ll
@@ -8,9 +8,9 @@
define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_ext_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: sub w8, w8, w1, uxtb
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: and w8, w1, #0xff
+; CHECK-NEXT: and w9, w0, #0xff
+; CHECK-NEXT: subs w8, w9, w8
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%aext = zext i8 %a to i64
@@ -24,9 +24,9 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i8_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: sub w8, w8, w1, uxth
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: and w8, w1, #0xffff
+; CHECK-NEXT: and w9, w0, #0xff
+; CHECK-NEXT: subs w8, w9, w8
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%aext = zext i8 %a to i64
@@ -40,9 +40,9 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_ext_i8_undef:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: sub w8, w8, w1, uxtb
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: and w8, w1, #0xff
+; CHECK-NEXT: and w9, w0, #0xff
+; CHECK-NEXT: subs w8, w9, w8
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%aext = zext i8 %a to i64
@@ -56,9 +56,9 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: sub w8, w8, w1, uxth
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: and w8, w1, #0xffff
+; CHECK-NEXT: and w9, w0, #0xffff
+; CHECK-NEXT: subs w8, w9, w8
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%aext = zext i16 %a to i64
@@ -88,9 +88,9 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i16_undef:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: sub w8, w8, w1, uxth
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: and w8, w1, #0xffff
+; CHECK-NEXT: and w9, w0, #0xffff
+; CHECK-NEXT: subs w8, w9, w8
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%aext = zext i16 %a to i64
@@ -224,9 +224,9 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_minmax_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: sub w8, w8, w1, uxtb
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: and w8, w1, #0xff
+; CHECK-NEXT: and w9, w0, #0xff
+; CHECK-NEXT: subs w8, w9, w8
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%min = call i8 @llvm.umin.i8(i8 %a, i8 %b)
@@ -238,9 +238,9 @@ define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_minmax_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: sub w8, w8, w1, uxth
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: and w8, w1, #0xffff
+; CHECK-NEXT: and w9, w0, #0xffff
+; CHECK-NEXT: subs w8, w9, w8
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%min = call i16 @llvm.umin.i16(i16 %a, i16 %b)
@@ -300,9 +300,9 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_cmp_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: sub w8, w8, w1, uxtb
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: and w8, w1, #0xff
+; CHECK-NEXT: and w9, w0, #0xff
+; CHECK-NEXT: subs w8, w9, w8
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%cmp = icmp ugt i8 %a, %b
@@ -315,9 +315,9 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_cmp_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: sub w8, w8, w1, uxth
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: and w8, w1, #0xffff
+; CHECK-NEXT: and w9, w0, #0xffff
+; CHECK-NEXT: subs w8, w9, w8
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%cmp = icmp uge i16 %a, %b
@@ -382,11 +382,11 @@ define i64 @vector_legalized(i16 %a, i16 %b) {
; CHECK-LABEL: vector_legalized:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v0.2d, #0000000000000000
-; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: sub w8, w8, w1, uxth
-; CHECK-NEXT: cmp w8, #0
-; CHECK-NEXT: addp d0, v0.2d
+; CHECK-NEXT: and w8, w1, #0xffff
+; CHECK-NEXT: and w9, w0, #0xffff
+; CHECK-NEXT: subs w8, w9, w8
; CHECK-NEXT: cneg w8, w8, mi
+; CHECK-NEXT: addp d0, v0.2d
; CHECK-NEXT: fmov x9, d0
; CHECK-NEXT: add x0, x9, x8
; CHECK-NEXT: ret
@@ -407,9 +407,9 @@ define i64 @vector_legalized(i16 %a, i16 %b) {
define i8 @abd_select_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_select_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: sub w8, w8, w1, uxtb
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: and w8, w1, #0xff
+; CHECK-NEXT: and w9, w0, #0xff
+; CHECK-NEXT: subs w8, w9, w8
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%cmp = icmp ult i8 %a, %b
@@ -422,9 +422,9 @@ define i8 @abd_select_i8(i8 %a, i8 %b) nounwind {
define i16 @abd_select_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_select_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: sub w8, w8, w1, uxth
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: and w8, w1, #0xffff
+; CHECK-NEXT: and w9, w0, #0xffff
+; CHECK-NEXT: subs w8, w9, w8
; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%cmp = icmp ule i16 %a, %b
``````````
</details>
https://github.com/llvm/llvm-project/pull/151180
More information about the llvm-commits
mailing list