[llvm] [DAG] foldABSToABD - fallback to value tracking if the (ABS (SUB LHS, RHS)) operands aren't extended (PR #147053)

via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 4 06:28:24 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-selectiondag

Author: Simon Pilgrim (RKSimon)

<details>
<summary>Changes</summary>

ISD::ABDS can be used if the subtraction will not overwrap (this is an extension is handle cases where the NSW flag has been lost)

ISD::ABDU can be used if both operands have at least 1 zero sign bit.

Fixes #<!-- -->147049

---
Full diff: https://github.com/llvm/llvm-project/pull/147053.diff


3 Files Affected:

- (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+15-6) 
- (modified) llvm/test/CodeGen/AArch64/abd-combine.ll (+20-33) 
- (modified) llvm/test/CodeGen/AArch64/sve-abd.ll (+1-2) 


``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 586eb2f3cf45e..1556e893ca050 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11402,16 +11402,25 @@ SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) {
   SDValue AbsOp0 = N->getOperand(0);
   unsigned Opc0 = Op0.getOpcode();
 
-  // Check if the operands of the sub are (zero|sign)-extended.
-  // TODO: Should we use ValueTracking instead?
+  // Check if the operands of the sub are (zero|sign)-extended, otherwise
+  // fallback to ValueTracking.
   if (Opc0 != Op1.getOpcode() ||
       (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND &&
        Opc0 != ISD::SIGN_EXTEND_INREG)) {
     // fold (abs (sub nsw x, y)) -> abds(x, y)
-    // Don't fold this for unsupported types as we lose the NSW handling.
-    if (AbsOp0->getFlags().hasNoSignedWrap() && hasOperation(ISD::ABDS, VT) &&
-        TLI.preferABDSToABSWithNSW(VT)) {
-      SDValue ABD = DAG.getNode(ISD::ABDS, DL, VT, Op0, Op1);
+    if (hasOperation(ISD::ABDS, VT)) {
+      // Don't fold this for unsupported types as we lose the NSW handling.
+      if (TLI.preferABDSToABSWithNSW(VT) &&
+          (AbsOp0->getFlags().hasNoSignedWrap() ||
+           DAG.willNotOverflowSub(/*IsSigned=*/true, Op0, Op1))) {
+        SDValue ABD = DAG.getNode(ISD::ABDS, DL, VT, Op0, Op1);
+        return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
+      }
+    }
+    // fold (abs (sub x, y)) -> abdu(x, y)
+    if (hasOperation(ISD::ABDU, VT) && DAG.SignBitIsZero(Op0) &&
+        DAG.SignBitIsZero(Op1)) {
+      SDValue ABD = DAG.getNode(ISD::ABDU, DL, VT, Op0, Op1);
       return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
     }
     return SDValue();
diff --git a/llvm/test/CodeGen/AArch64/abd-combine.ll b/llvm/test/CodeGen/AArch64/abd-combine.ll
index e48680f4be98b..d0257890d2c43 100644
--- a/llvm/test/CodeGen/AArch64/abd-combine.ll
+++ b/llvm/test/CodeGen/AArch64/abd-combine.ll
@@ -18,13 +18,11 @@ define <8 x i16> @abdu_const(<8 x i16> %src1) {
 ; CHECK-LABEL: abdu_const:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi v1.4s, #1
-; CHECK-NEXT:    ushll2 v2.4s, v0.8h, #0
-; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
-; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    sub v1.4s, v2.4s, v1.4s
-; CHECK-NEXT:    abs v1.4s, v1.4s
-; CHECK-NEXT:    abs v0.4s, v0.4s
-; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    ushll v2.4s, v0.4h, #0
+; CHECK-NEXT:    ushll2 v0.4s, v0.8h, #0
+; CHECK-NEXT:    uabd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    uabd v1.4s, v2.4s, v1.4s
+; CHECK-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
 ; CHECK-NEXT:    ret
   %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
   %sub = sub <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
@@ -37,10 +35,10 @@ define <8 x i16> @abdu_const_lhs(<8 x i16> %src1) {
 ; CHECK-LABEL: abdu_const_lhs:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi v1.4s, #1
-; CHECK-NEXT:    usubw v2.4s, v1.4s, v0.4h
-; CHECK-NEXT:    usubw2 v0.4s, v1.4s, v0.8h
-; CHECK-NEXT:    abs v0.4s, v0.4s
-; CHECK-NEXT:    abs v1.4s, v2.4s
+; CHECK-NEXT:    ushll v2.4s, v0.4h, #0
+; CHECK-NEXT:    ushll2 v0.4s, v0.8h, #0
+; CHECK-NEXT:    uabd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    uabd v1.4s, v2.4s, v1.4s
 ; CHECK-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
 ; CHECK-NEXT:    ret
   %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
@@ -53,13 +51,6 @@ define <8 x i16> @abdu_const_lhs(<8 x i16> %src1) {
 define <8 x i16> @abdu_const_zero(<8 x i16> %src1) {
 ; CHECK-LABEL: abdu_const_zero:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v1.2d, #0000000000000000
-; CHECK-NEXT:    ushll v2.4s, v0.4h, #0
-; CHECK-NEXT:    usubw2 v0.4s, v1.4s, v0.8h
-; CHECK-NEXT:    neg v1.4s, v2.4s
-; CHECK-NEXT:    abs v0.4s, v0.4s
-; CHECK-NEXT:    abs v1.4s, v1.4s
-; CHECK-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
 ; CHECK-NEXT:    ret
   %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
   %sub = sub <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, %zextsrc1
@@ -328,13 +319,11 @@ define <8 x i16> @abds_const(<8 x i16> %src1) {
 ; CHECK-LABEL: abds_const:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi v1.4s, #1
-; CHECK-NEXT:    sshll2 v2.4s, v0.8h, #0
-; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
-; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    sub v1.4s, v2.4s, v1.4s
-; CHECK-NEXT:    abs v1.4s, v1.4s
-; CHECK-NEXT:    abs v0.4s, v0.4s
-; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    sshll v2.4s, v0.4h, #0
+; CHECK-NEXT:    sshll2 v0.4s, v0.8h, #0
+; CHECK-NEXT:    sabd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    sabd v1.4s, v2.4s, v1.4s
+; CHECK-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
 ; CHECK-NEXT:    ret
   %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
   %sub = sub <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
@@ -347,10 +336,10 @@ define <8 x i16> @abds_const_lhs(<8 x i16> %src1) {
 ; CHECK-LABEL: abds_const_lhs:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi v1.4s, #1
-; CHECK-NEXT:    ssubw v2.4s, v1.4s, v0.4h
-; CHECK-NEXT:    ssubw2 v0.4s, v1.4s, v0.8h
-; CHECK-NEXT:    abs v0.4s, v0.4s
-; CHECK-NEXT:    abs v1.4s, v2.4s
+; CHECK-NEXT:    sshll v2.4s, v0.4h, #0
+; CHECK-NEXT:    sshll2 v0.4s, v0.8h, #0
+; CHECK-NEXT:    sabd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    sabd v1.4s, v2.4s, v1.4s
 ; CHECK-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
 ; CHECK-NEXT:    ret
   %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
@@ -363,10 +352,8 @@ define <8 x i16> @abds_const_lhs(<8 x i16> %src1) {
 define <8 x i16> @abds_const_zero(<8 x i16> %src1) {
 ; CHECK-LABEL: abds_const_zero:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v1.2d, #0000000000000000
-; CHECK-NEXT:    sshll v2.4s, v0.4h, #0
-; CHECK-NEXT:    ssubw2 v0.4s, v1.4s, v0.8h
-; CHECK-NEXT:    neg v1.4s, v2.4s
+; CHECK-NEXT:    sshll v1.4s, v0.4h, #0
+; CHECK-NEXT:    sshll2 v0.4s, v0.8h, #0
 ; CHECK-NEXT:    abs v0.4s, v0.4s
 ; CHECK-NEXT:    abs v1.4s, v1.4s
 ; CHECK-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
diff --git a/llvm/test/CodeGen/AArch64/sve-abd.ll b/llvm/test/CodeGen/AArch64/sve-abd.ll
index 72790155d046f..92bbd4e7f2759 100644
--- a/llvm/test/CodeGen/AArch64/sve-abd.ll
+++ b/llvm/test/CodeGen/AArch64/sve-abd.ll
@@ -283,8 +283,7 @@ define <vscale x 4 x i32> @uabd_non_matching_promotion(<vscale x 4 x i8> %a, <vs
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    and z0.s, z0.s, #0xff
 ; CHECK-NEXT:    sxtb z1.s, p0/m, z1.s
-; CHECK-NEXT:    sub z0.s, z0.s, z1.s
-; CHECK-NEXT:    abs z0.s, p0/m, z0.s
+; CHECK-NEXT:    sabd z0.s, p0/m, z0.s, z1.s
 ; CHECK-NEXT:    ret
   %a.zext = zext <vscale x 4 x i8> %a to <vscale x 4 x i32>
   %b.zext = sext <vscale x 4 x i8> %b to <vscale x 4 x i32>

``````````

</details>


https://github.com/llvm/llvm-project/pull/147053


More information about the llvm-commits mailing list