[llvm] [DAG] foldABSToABD - fallback to value tracking if the (ABS (SUB LHS, RHS)) operands aren't extended (PR #147053)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 4 07:05:32 PDT 2025
https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/147053
>From ada675714cba2de7e2eefeee0db8afa60669ef14 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Fri, 4 Jul 2025 14:26:40 +0100
Subject: [PATCH] [DAG] foldABSToABD - fallback to value tracking if the (ABS
(SUB LHS, RHS)) operands aren't extended
ISD::ABDS can be used if the subtraction will not overwrap (this is an extension is handle cases where the NSW flag has been lost)
ISD::ABDU can be used if both operands have at least 1 zero sign bit.
Fixes #147049
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 21 +++++---
llvm/test/CodeGen/AArch64/abd-combine.ll | 53 +++++++------------
llvm/test/CodeGen/AArch64/sve-abd.ll | 3 +-
llvm/test/CodeGen/RISCV/rvv/abd.ll | 10 ++--
4 files changed, 40 insertions(+), 47 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 586eb2f3cf45e..1556e893ca050 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11402,16 +11402,25 @@ SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) {
SDValue AbsOp0 = N->getOperand(0);
unsigned Opc0 = Op0.getOpcode();
- // Check if the operands of the sub are (zero|sign)-extended.
- // TODO: Should we use ValueTracking instead?
+ // Check if the operands of the sub are (zero|sign)-extended, otherwise
+ // fallback to ValueTracking.
if (Opc0 != Op1.getOpcode() ||
(Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND &&
Opc0 != ISD::SIGN_EXTEND_INREG)) {
// fold (abs (sub nsw x, y)) -> abds(x, y)
- // Don't fold this for unsupported types as we lose the NSW handling.
- if (AbsOp0->getFlags().hasNoSignedWrap() && hasOperation(ISD::ABDS, VT) &&
- TLI.preferABDSToABSWithNSW(VT)) {
- SDValue ABD = DAG.getNode(ISD::ABDS, DL, VT, Op0, Op1);
+ if (hasOperation(ISD::ABDS, VT)) {
+ // Don't fold this for unsupported types as we lose the NSW handling.
+ if (TLI.preferABDSToABSWithNSW(VT) &&
+ (AbsOp0->getFlags().hasNoSignedWrap() ||
+ DAG.willNotOverflowSub(/*IsSigned=*/true, Op0, Op1))) {
+ SDValue ABD = DAG.getNode(ISD::ABDS, DL, VT, Op0, Op1);
+ return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
+ }
+ }
+ // fold (abs (sub x, y)) -> abdu(x, y)
+ if (hasOperation(ISD::ABDU, VT) && DAG.SignBitIsZero(Op0) &&
+ DAG.SignBitIsZero(Op1)) {
+ SDValue ABD = DAG.getNode(ISD::ABDU, DL, VT, Op0, Op1);
return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
}
return SDValue();
diff --git a/llvm/test/CodeGen/AArch64/abd-combine.ll b/llvm/test/CodeGen/AArch64/abd-combine.ll
index e48680f4be98b..d0257890d2c43 100644
--- a/llvm/test/CodeGen/AArch64/abd-combine.ll
+++ b/llvm/test/CodeGen/AArch64/abd-combine.ll
@@ -18,13 +18,11 @@ define <8 x i16> @abdu_const(<8 x i16> %src1) {
; CHECK-LABEL: abdu_const:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v1.4s, #1
-; CHECK-NEXT: ushll2 v2.4s, v0.8h, #0
-; CHECK-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: sub v1.4s, v2.4s, v1.4s
-; CHECK-NEXT: abs v1.4s, v1.4s
-; CHECK-NEXT: abs v0.4s, v0.4s
-; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ushll v2.4s, v0.4h, #0
+; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0
+; CHECK-NEXT: uabd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: uabd v1.4s, v2.4s, v1.4s
+; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
; CHECK-NEXT: ret
%zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
%sub = sub <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
@@ -37,10 +35,10 @@ define <8 x i16> @abdu_const_lhs(<8 x i16> %src1) {
; CHECK-LABEL: abdu_const_lhs:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v1.4s, #1
-; CHECK-NEXT: usubw v2.4s, v1.4s, v0.4h
-; CHECK-NEXT: usubw2 v0.4s, v1.4s, v0.8h
-; CHECK-NEXT: abs v0.4s, v0.4s
-; CHECK-NEXT: abs v1.4s, v2.4s
+; CHECK-NEXT: ushll v2.4s, v0.4h, #0
+; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0
+; CHECK-NEXT: uabd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: uabd v1.4s, v2.4s, v1.4s
; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
; CHECK-NEXT: ret
%zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
@@ -53,13 +51,6 @@ define <8 x i16> @abdu_const_lhs(<8 x i16> %src1) {
define <8 x i16> @abdu_const_zero(<8 x i16> %src1) {
; CHECK-LABEL: abdu_const_zero:
; CHECK: // %bb.0:
-; CHECK-NEXT: movi v1.2d, #0000000000000000
-; CHECK-NEXT: ushll v2.4s, v0.4h, #0
-; CHECK-NEXT: usubw2 v0.4s, v1.4s, v0.8h
-; CHECK-NEXT: neg v1.4s, v2.4s
-; CHECK-NEXT: abs v0.4s, v0.4s
-; CHECK-NEXT: abs v1.4s, v1.4s
-; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
; CHECK-NEXT: ret
%zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
%sub = sub <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, %zextsrc1
@@ -328,13 +319,11 @@ define <8 x i16> @abds_const(<8 x i16> %src1) {
; CHECK-LABEL: abds_const:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v1.4s, #1
-; CHECK-NEXT: sshll2 v2.4s, v0.8h, #0
-; CHECK-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: sub v1.4s, v2.4s, v1.4s
-; CHECK-NEXT: abs v1.4s, v1.4s
-; CHECK-NEXT: abs v0.4s, v0.4s
-; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: sshll v2.4s, v0.4h, #0
+; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0
+; CHECK-NEXT: sabd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: sabd v1.4s, v2.4s, v1.4s
+; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
; CHECK-NEXT: ret
%zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
%sub = sub <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
@@ -347,10 +336,10 @@ define <8 x i16> @abds_const_lhs(<8 x i16> %src1) {
; CHECK-LABEL: abds_const_lhs:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v1.4s, #1
-; CHECK-NEXT: ssubw v2.4s, v1.4s, v0.4h
-; CHECK-NEXT: ssubw2 v0.4s, v1.4s, v0.8h
-; CHECK-NEXT: abs v0.4s, v0.4s
-; CHECK-NEXT: abs v1.4s, v2.4s
+; CHECK-NEXT: sshll v2.4s, v0.4h, #0
+; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0
+; CHECK-NEXT: sabd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: sabd v1.4s, v2.4s, v1.4s
; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
; CHECK-NEXT: ret
%zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
@@ -363,10 +352,8 @@ define <8 x i16> @abds_const_lhs(<8 x i16> %src1) {
define <8 x i16> @abds_const_zero(<8 x i16> %src1) {
; CHECK-LABEL: abds_const_zero:
; CHECK: // %bb.0:
-; CHECK-NEXT: movi v1.2d, #0000000000000000
-; CHECK-NEXT: sshll v2.4s, v0.4h, #0
-; CHECK-NEXT: ssubw2 v0.4s, v1.4s, v0.8h
-; CHECK-NEXT: neg v1.4s, v2.4s
+; CHECK-NEXT: sshll v1.4s, v0.4h, #0
+; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0
; CHECK-NEXT: abs v0.4s, v0.4s
; CHECK-NEXT: abs v1.4s, v1.4s
; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
diff --git a/llvm/test/CodeGen/AArch64/sve-abd.ll b/llvm/test/CodeGen/AArch64/sve-abd.ll
index 72790155d046f..92bbd4e7f2759 100644
--- a/llvm/test/CodeGen/AArch64/sve-abd.ll
+++ b/llvm/test/CodeGen/AArch64/sve-abd.ll
@@ -283,8 +283,7 @@ define <vscale x 4 x i32> @uabd_non_matching_promotion(<vscale x 4 x i8> %a, <vs
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: and z0.s, z0.s, #0xff
; CHECK-NEXT: sxtb z1.s, p0/m, z1.s
-; CHECK-NEXT: sub z0.s, z0.s, z1.s
-; CHECK-NEXT: abs z0.s, p0/m, z0.s
+; CHECK-NEXT: sabd z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
%a.zext = zext <vscale x 4 x i8> %a to <vscale x 4 x i32>
%b.zext = sext <vscale x 4 x i8> %b to <vscale x 4 x i32>
diff --git a/llvm/test/CodeGen/RISCV/rvv/abd.ll b/llvm/test/CodeGen/RISCV/rvv/abd.ll
index 583d872238df7..be4292c9902eb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/abd.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/abd.ll
@@ -316,12 +316,10 @@ define <vscale x 4 x i32> @uabd_non_matching_promotion(<vscale x 4 x i8> %a, <vs
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vzext.vf4 v10, v8
-; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vsext.vf2 v8, v9
-; CHECK-NEXT: vwsub.wv v10, v10, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vrsub.vi v8, v10, 0
-; CHECK-NEXT: vmax.vv v8, v10, v8
+; CHECK-NEXT: vsext.vf4 v12, v9
+; CHECK-NEXT: vmin.vv v8, v10, v12
+; CHECK-NEXT: vmax.vv v10, v10, v12
+; CHECK-NEXT: vsub.vv v8, v10, v8
; CHECK-NEXT: ret
%a.zext = zext <vscale x 4 x i8> %a to <vscale x 4 x i32>
%b.zext = sext <vscale x 4 x i8> %b to <vscale x 4 x i32>
More information about the llvm-commits
mailing list