[llvm] [SDAG] (abs (add nsw a, -b)) -> (abds a, b) (PR #175801)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 30 14:58:57 PST 2026
https://github.com/DaKnig updated https://github.com/llvm/llvm-project/pull/175801
>From 681266d946bf31a0e278d2be5cdb93571e5cb972 Mon Sep 17 00:00:00 2001
From: DaKnig <ZannyKnig at disroot.org>
Date: Thu, 29 Jan 2026 02:04:32 +0200
Subject: [PATCH 1/3] [SDAG] (abs (add nsw a, -b)) -> (abds a, b)
This is beneficial for constants
Note to self: we should really do something about constants and sub
turning to add...
>From b7b046327ded7d0eb49470b6b6fd86edb652eade Mon Sep 17 00:00:00 2001
From: DaKnig <ZannyKnig at disroot.org>
Date: Thu, 29 Jan 2026 02:04:49 +0200
Subject: [PATCH 2/3] lit
---
llvm/test/CodeGen/AArch64/neon-abd.ll | 58 +++++++++++++++++++++++++++
1 file changed, 58 insertions(+)
diff --git a/llvm/test/CodeGen/AArch64/neon-abd.ll b/llvm/test/CodeGen/AArch64/neon-abd.ll
index c9f3fc44ddcb1..87a88f8c8e41f 100644
--- a/llvm/test/CodeGen/AArch64/neon-abd.ll
+++ b/llvm/test/CodeGen/AArch64/neon-abd.ll
@@ -743,6 +743,64 @@ entry:
ret <8 x i32> %r
}
+define <4 x i32> @abs_sub(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: abs_sub:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sabd v0.4s, v1.4s, v0.4s
+; CHECK-NEXT: ret
+entry:
+ %add = sub nsw <4 x i32> %b, %a
+ %cmp.i = icmp slt <4 x i32> %add, zeroinitializer
+ %sub.i = sub nsw <4 x i32> zeroinitializer, %add
+ %cond.i = select <4 x i1> %cmp.i, <4 x i32> %sub.i, <4 x i32> %add
+ ret <4 x i32> %cond.i
+}
+
+; short abs_diff_add_i16_rir(short a, short c) {
+; return abs(a - 0x492) + c;
+; }
+define <4 x i16> @abs_diff_add_v4i16(<4 x i16> %a, <4 x i16> %c) {
+; CHECK-LABEL: abs_diff_add_v4i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #-1170 // =0xfffffb6e
+; CHECK-NEXT: dup v2.4s, w8
+; CHECK-NEXT: saddw v0.4s, v2.4s, v0.4h
+; CHECK-NEXT: abs v0.4s, v0.4s
+; CHECK-NEXT: xtn v0.4h, v0.4s
+; CHECK-NEXT: add v0.4h, v0.4h, v1.4h
+; CHECK-NEXT: ret
+entry:
+ %conv = sext <4 x i16> %a to <4 x i32>
+ %sub = add nsw <4 x i32> %conv, splat(i32 -1170)
+ %0 = tail call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true)
+ %1 = trunc <4 x i32> %0 to <4 x i16>
+ %conv2 = add <4 x i16> %1, %c
+ ret <4 x i16> %conv2
+}
+
+; short abs_diff_add_<4 x i16>_rii(short a) {
+; return abs(a - 0x93) + 0x943;
+; }
+define <4 x i16> @abs_diff_add_v4i16_rii(<4 x i16> %a) {
+; CHECK-LABEL: abs_diff_add_v4i16_rii:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mvni v1.4s, #146
+; CHECK-NEXT: mov w8, #2371 // =0x943
+; CHECK-NEXT: saddw v0.4s, v1.4s, v0.4h
+; CHECK-NEXT: dup v1.4h, w8
+; CHECK-NEXT: abs v0.4s, v0.4s
+; CHECK-NEXT: xtn v0.4h, v0.4s
+; CHECK-NEXT: add v0.4h, v0.4h, v1.4h
+; CHECK-NEXT: ret
+entry:
+ %conv = sext <4 x i16> %a to <4 x i32>
+ %sub = add nsw <4 x i32> %conv, splat(i32 -147)
+ %0 = tail call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true)
+ %1 = trunc <4 x i32> %0 to <4 x i16>
+ %conv1 = add nuw <4 x i16> %1, splat(i16 2371)
+ ret <4 x i16> %conv1
+}
+
declare <8 x i8> @llvm.abs.v8i8(<8 x i8>, i1)
declare <16 x i8> @llvm.abs.v16i8(<16 x i8>, i1)
>From 92b2d68e882ccdec8b6c22f66bc90c3922597029 Mon Sep 17 00:00:00 2001
From: DaKnig <ZannyKnig at disroot.org>
Date: Thu, 29 Jan 2026 02:05:09 +0200
Subject: [PATCH 3/3] code
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 39 +++++++++++++++++--
llvm/test/CodeGen/AArch64/neon-abd.ll | 18 ++++-----
2 files changed, 43 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 02ca8197161a5..33a09d66c5460 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11738,10 +11738,29 @@ SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) {
EVT VT = N->getValueType(0);
SDValue Op0, Op1;
- if (!sd_match(N, m_Abs(m_Sub(m_Value(Op0), m_Value(Op1)))))
+ if (!sd_match(N, m_Abs(m_AnyOf(m_Sub(m_Value(Op0), m_Value(Op1)),
+ m_Add(m_Value(Op0), m_Value(Op1))))))
return SDValue();
SDValue AbsOp0 = N->getOperand(0);
+ bool IsAdd = AbsOp0.getOpcode() == ISD::ADD;
+ // Make sure (neg B) is positive.
+ if (IsAdd) {
+ // Elements of Op1 must be constant and != VT.minSignedValue() (or undef)
+ std::function<bool(ConstantSDNode *)> IsNotMinSignedInt =
+ [VT](ConstantSDNode *C) {
+ if (C == nullptr)
+ return true;
+ return !C->getAPIntValue()
+ .trunc(VT.getScalarSizeInBits())
+ .isMinSignedValue();
+ };
+
+ if (!ISD::matchUnaryPredicate(Op1, IsNotMinSignedInt, /*AllowUndefs=*/true,
+ /*AllowTruncation=*/true))
+ return SDValue();
+ }
+
unsigned Opc0 = Op0.getOpcode();
// Check if the operands of the sub are (zero|sign)-extended, otherwise
@@ -11750,22 +11769,36 @@ SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) {
(Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND &&
Opc0 != ISD::SIGN_EXTEND_INREG)) {
// fold (abs (sub nsw x, y)) -> abds(x, y)
+ // fold (abs (add nsw x, -y)) -> abds(x, y)
+ bool AbsOpWillNSW =
+ AbsOp0->getFlags().hasNoSignedWrap() ||
+ (IsAdd ? DAG.willNotOverflowAdd(/*IsSigned=*/true, Op0, Op1)
+ : DAG.willNotOverflowSub(/*IsSigned=*/true, Op0, Op1));
+
// Don't fold this for unsupported types as we lose the NSW handling.
if (hasOperation(ISD::ABDS, VT) && TLI.preferABDSToABSWithNSW(VT) &&
- (AbsOp0->getFlags().hasNoSignedWrap() ||
- DAG.willNotOverflowSub(/*IsSigned=*/true, Op0, Op1))) {
+ AbsOpWillNSW) {
+ if (IsAdd)
+ Op1 = DAG.getNegative(Op1, SDLoc(Op1), VT);
SDValue ABD = DAG.getNode(ISD::ABDS, DL, VT, Op0, Op1);
return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
}
// fold (abs (sub x, y)) -> abdu(x, y)
if (hasOperation(ISD::ABDU, VT) && DAG.SignBitIsZero(Op0) &&
DAG.SignBitIsZero(Op1)) {
+ if (IsAdd)
+ Op1 = DAG.getNegative(Op1, SDLoc(Op1), VT);
SDValue ABD = DAG.getNode(ISD::ABDU, DL, VT, Op0, Op1);
return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
}
return SDValue();
}
+ // The IsAdd case explicitly checks for const/bv-of-const. This implies eihter
+ // (Opc0 != Op1.getOpcode() || Opc0 is not in {zext/sext/sign_ext_inreg}. This
+ // implies it was alrady handled by the above if statement.
+ assert(!IsAdd);
+
EVT VT0, VT1;
if (Opc0 == ISD::SIGN_EXTEND_INREG) {
VT0 = cast<VTSDNode>(Op0.getOperand(1))->getVT();
diff --git a/llvm/test/CodeGen/AArch64/neon-abd.ll b/llvm/test/CodeGen/AArch64/neon-abd.ll
index 87a88f8c8e41f..e0406e40ff6de 100644
--- a/llvm/test/CodeGen/AArch64/neon-abd.ll
+++ b/llvm/test/CodeGen/AArch64/neon-abd.ll
@@ -762,12 +762,10 @@ entry:
define <4 x i16> @abs_diff_add_v4i16(<4 x i16> %a, <4 x i16> %c) {
; CHECK-LABEL: abs_diff_add_v4i16:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov w8, #-1170 // =0xfffffb6e
-; CHECK-NEXT: dup v2.4s, w8
-; CHECK-NEXT: saddw v0.4s, v2.4s, v0.4h
-; CHECK-NEXT: abs v0.4s, v0.4s
-; CHECK-NEXT: xtn v0.4h, v0.4s
-; CHECK-NEXT: add v0.4h, v0.4h, v1.4h
+; CHECK-NEXT: mov w8, #1170 // =0x492
+; CHECK-NEXT: dup v2.4h, w8
+; CHECK-NEXT: saba v1.4h, v0.4h, v2.4h
+; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
entry:
%conv = sext <4 x i16> %a to <4 x i32>
@@ -784,13 +782,11 @@ entry:
define <4 x i16> @abs_diff_add_v4i16_rii(<4 x i16> %a) {
; CHECK-LABEL: abs_diff_add_v4i16_rii:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mvni v1.4s, #146
; CHECK-NEXT: mov w8, #2371 // =0x943
-; CHECK-NEXT: saddw v0.4s, v1.4s, v0.4h
+; CHECK-NEXT: movi v2.4h, #147
; CHECK-NEXT: dup v1.4h, w8
-; CHECK-NEXT: abs v0.4s, v0.4s
-; CHECK-NEXT: xtn v0.4h, v0.4s
-; CHECK-NEXT: add v0.4h, v0.4h, v1.4h
+; CHECK-NEXT: saba v1.4h, v0.4h, v2.4h
+; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
entry:
%conv = sext <4 x i16> %a to <4 x i32>
More information about the llvm-commits
mailing list