[llvm] Retry "[SDAG] (abs (add nsw a, -b)) -> (abds a, b) (#175801)" (PR #186659)

Sun Mar 15 05:10:55 PDT 2026

https://github.com/DaKnig created https://github.com/llvm/llvm-project/pull/186659

A better version of https://github.com/llvm/llvm-project/pull/175801 . see that for more info.

Fixes #185467 .

The original patch was checking the correctness of the transformation based on the original Op1 , which was then negated (in the case of IsAdd). This patch fixes that issue by inverting the sign bit in that case.

Also pushed a slight nfc there to simplify the code and remove some duplication.

alive2 proofs:
...

>From a06011815d3ebf864111bb3400f02f6c21f0f673 Mon Sep 17 00:00:00 2001
From: DaKnig <ZannyKnig at disroot.org>
Date: Sun, 15 Mar 2026 13:59:45 +0200
Subject: [PATCH 1/2] [SDAG] Add regression test

see https://github.com/llvm/llvm-project/issues/185467
---
 llvm/test/CodeGen/X86/abds.ll | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/llvm/test/CodeGen/X86/abds.ll b/llvm/test/CodeGen/X86/abds.ll
index a1a4ba81ae493..c18de18c174a5 100644
--- a/llvm/test/CodeGen/X86/abds.ll
+++ b/llvm/test/CodeGen/X86/abds.ll
@@ -1364,6 +1364,31 @@ define i128 @abd_select_i128(i128 %a, i128 %b) nounwind {
   ret i128 %sub
 }
 
+; This used to be miscompiled into (abdu %v, i32:-1)
+; https://github.com/llvm/llvm-project/issues/185467
+define i32 @issue185467(i32 range(i32 0, 2147483647) %v) {
+; X86-LABEL: issue185467:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    incl %ecx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    cmovsl %ecx, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: issue185467:
+; X64:       # %bb.0:
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-NEXT:    leal 1(%rdi), %ecx
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    negl %eax
+; X64-NEXT:    cmovsl %ecx, %eax
+; X64-NEXT:    retq
+  %v1   = add i32 %v, 1
+  %absx = call i32 @llvm.abs.i32(i32 %v1, i1 false)
+  ret i32 %absx
+}
+
 declare i8 @llvm.abs.i8(i8, i1)
 declare i16 @llvm.abs.i16(i16, i1)
 declare i32 @llvm.abs.i32(i32, i1)

>From e017bc4a059fc7354d5c537ccd3954befc54ca88 Mon Sep 17 00:00:00 2001
From: DaKnig <ZannyKnig at disroot.org>
Date: Sun, 15 Mar 2026 14:00:00 +0200
Subject: [PATCH 2/2] [SDAG] (abs (add nsw a, -b)) -> (abds a, b) (#175801)

This is beneficial for bv of constants.

alive2: https://alive2.llvm.org/ce/z/e3GsWZ
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 61 +++++++++++++++----
 llvm/test/CodeGen/AArch64/neon-abd.ll         | 54 ++++++++++++++++
 2 files changed, 104 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 0f4503ae27998..783a060301b89 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11847,10 +11847,29 @@ SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) {
   EVT VT = N->getValueType(0);
   SDValue Op0, Op1;
 
-  if (!sd_match(N, m_Abs(m_Sub(m_Value(Op0), m_Value(Op1)))))
+  if (!sd_match(N, m_Abs(m_AnyOf(m_Sub(m_Value(Op0), m_Value(Op1)),
+                                 m_Add(m_Value(Op0), m_Value(Op1))))))
     return SDValue();
 
   SDValue AbsOp0 = N->getOperand(0);
+  bool IsAdd = AbsOp0.getOpcode() == ISD::ADD;
+  // Make sure (neg B) is positive.
+  if (IsAdd) {
+    // Elements of Op1 must be constant and != VT.minSignedValue() (or undef)
+    std::function<bool(ConstantSDNode *)> IsNotMinSignedInt =
+        [VT](ConstantSDNode *C) {
+          if (C == nullptr)
+            return true;
+          return !C->getAPIntValue()
+                      .trunc(VT.getScalarSizeInBits())
+                      .isMinSignedValue();
+        };
+
+    if (!ISD::matchUnaryPredicate(Op1, IsNotMinSignedInt, /*AllowUndefs=*/true,
+                                  /*AllowTruncation=*/true))
+      return SDValue();
+  }
+
   unsigned Opc0 = Op0.getOpcode();
 
   // Check if the operands of the sub are (zero|sign)-extended, otherwise
@@ -11858,23 +11877,43 @@ SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) {
   if (Opc0 != Op1.getOpcode() ||
       (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND &&
        Opc0 != ISD::SIGN_EXTEND_INREG)) {
+
+    auto CreateZextedAbd = [&](unsigned AbdOpc) {
+      if (IsAdd)
+        Op1 = DAG.getNegative(Op1, SDLoc(Op1), VT);
+      SDValue ABD = DAG.getNode(AbdOpc, DL, VT, Op0, Op1);
+      return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
+    };
+
     // fold (abs (sub nsw x, y)) -> abds(x, y)
+    // fold (abs (add nsw x, -y)) -> abds(x, y)
+    bool AbsOpWillNSW =
+        AbsOp0->getFlags().hasNoSignedWrap() ||
+        (IsAdd ? DAG.willNotOverflowAdd(/*IsSigned=*/true, Op0, Op1)
+               : DAG.willNotOverflowSub(/*IsSigned=*/true, Op0, Op1));
+
     // Don't fold this for unsupported types as we lose the NSW handling.
     if (hasOperation(ISD::ABDS, VT) && TLI.preferABDSToABSWithNSW(VT) &&
-        (AbsOp0->getFlags().hasNoSignedWrap() ||
-         DAG.willNotOverflowSub(/*IsSigned=*/true, Op0, Op1))) {
-      SDValue ABD = DAG.getNode(ISD::ABDS, DL, VT, Op0, Op1);
-      return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
-    }
+        AbsOpWillNSW)
+      return CreateZextedAbd(ISD::ABDS);
+
     // fold (abs (sub x, y)) -> abdu(x, y)
-    if (hasOperation(ISD::ABDU, VT) && DAG.SignBitIsZero(Op0) &&
-        DAG.SignBitIsZero(Op1)) {
-      SDValue ABD = DAG.getNode(ISD::ABDU, DL, VT, Op0, Op1);
-      return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
-    }
+    // fold (abs (add x, -y)) -> abdu(x, y)
+    bool Op1SignBitIsOne = DAG.computeKnownBits(Op1).countMinLeadingOnes() > 0;
+    bool AbsOpWillNUW = DAG.SignBitIsZero(Op0) &&
+                        (IsAdd ? DAG.SignBitIsZero(Op1) : Op1SignBitIsOne);
+
+    if (hasOperation(ISD::ABDU, VT) && AbsOpWillNUW)
+      return CreateZextedAbd(ISD::ABDU);
+
     return SDValue();
   }
 
+  // The IsAdd case explicitly checks for const/bv-of-const. This implies either
+  // (Opc0 != Op1.getOpcode() || Opc0 is not in {zext/sext/sign_ext_inreg}. This
+  // implies it was alrady handled by the above if statement.
+  assert(!IsAdd && "Unexpected abs(add(x,y)) pattern");
+
   EVT VT0, VT1;
   if (Opc0 == ISD::SIGN_EXTEND_INREG) {
     VT0 = cast<VTSDNode>(Op0.getOperand(1))->getVT();
diff --git a/llvm/test/CodeGen/AArch64/neon-abd.ll b/llvm/test/CodeGen/AArch64/neon-abd.ll
index 98833d36dbb91..931963ee0e1e5 100644
--- a/llvm/test/CodeGen/AArch64/neon-abd.ll
+++ b/llvm/test/CodeGen/AArch64/neon-abd.ll
@@ -744,6 +744,60 @@ entry:
   ret <8 x i32> %r
 }
 
+define <4 x i32> @abs_sub(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: abs_sub:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sabd v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    ret
+entry:
+  %add =  sub nsw <4 x i32> %b, %a
+  %cmp.i = icmp slt <4 x i32> %add, zeroinitializer
+  %sub.i = sub nsw <4 x i32> zeroinitializer, %add
+  %cond.i = select <4 x i1> %cmp.i, <4 x i32> %sub.i, <4 x i32> %add
+  ret <4 x i32> %cond.i
+}
+
+; short abs_diff_add_i16_rir(short a, short c) {
+;   return abs(a - 0x492) + c;
+; }
+define <4 x i16> @abs_diff_add_v4i16(<4 x i16> %a, <4 x i16> %c) {
+; CHECK-LABEL: abs_diff_add_v4i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w8, #1170 // =0x492
+; CHECK-NEXT:    dup v2.4h, w8
+; CHECK-NEXT:    saba v1.4h, v0.4h, v2.4h
+; CHECK-NEXT:    fmov d0, d1
+; CHECK-NEXT:    ret
+entry:
+  %conv = sext <4 x i16> %a to <4 x i32>
+  %sub = add nsw <4 x i32> %conv, splat(i32 -1170)
+  %0 = tail call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true)
+  %1 = trunc <4 x i32> %0 to <4 x i16>
+  %conv2 = add <4 x i16> %1, %c
+  ret <4 x i16> %conv2
+}
+
+; short abs_diff_add_<4 x i16>_rii(short a) {
+;   return abs(a - 0x93) + 0x943;
+; }
+define <4 x i16> @abs_diff_add_v4i16_rii(<4 x i16> %a) {
+; CHECK-LABEL: abs_diff_add_v4i16_rii:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w8, #2371 // =0x943
+; CHECK-NEXT:    movi v2.4h, #147
+; CHECK-NEXT:    dup v1.4h, w8
+; CHECK-NEXT:    saba v1.4h, v0.4h, v2.4h
+; CHECK-NEXT:    fmov d0, d1
+; CHECK-NEXT:    ret
+entry:
+  %conv = sext <4 x i16> %a to <4 x i32>
+  %sub = add nsw <4 x i32> %conv, splat(i32 -147)
+  %0 = tail call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true)
+  %1 = trunc <4 x i32> %0 to <4 x i16>
+  %conv1 = add nuw <4 x i16> %1, splat(i16 2371)
+  ret <4 x i16> %conv1
+}
+
 declare <8 x i8> @llvm.abs.v8i8(<8 x i8>, i1)
 declare <16 x i8> @llvm.abs.v16i8(<16 x i8>, i1)