[llvm] [SDAG] (abd? (?ext x), c) -> (zext (abd? x, c)) (PR #176366)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 22 07:38:31 PST 2026
https://github.com/DaKnig updated https://github.com/llvm/llvm-project/pull/176366
>From 6109225498aaee149d584e4a67ef302b0ef8a039 Mon Sep 17 00:00:00 2001
From: DaKnig <37626476+DaKnig at users.noreply.github.com>
Date: Fri, 16 Jan 2026 00:24:32 +0200
Subject: [PATCH 1/2] [SDAG] (abd? (?ext x), c) -> (zext (abd? x, c))
`abd?` on smaller types
---
llvm/test/CodeGen/AArch64/neon-abd.ll | 193 ++++++++++++++++++++++++++
1 file changed, 193 insertions(+)
diff --git a/llvm/test/CodeGen/AArch64/neon-abd.ll b/llvm/test/CodeGen/AArch64/neon-abd.ll
index 314edd2fc81a7..45d368882e110 100644
--- a/llvm/test/CodeGen/AArch64/neon-abd.ll
+++ b/llvm/test/CodeGen/AArch64/neon-abd.ll
@@ -554,6 +554,199 @@ define <2 x i32> @combine_sabd_2s_zerosign_negative(<2 x i32> %a, <2 x i32> %b)
ret <2 x i32> %mask
}
+; select pattern with constant and type shrinking
+define <8 x i32> @sabd_8h_splat_imm(<8 x i16> %a) {
+; CHECK-LABEL: sabd_8h_splat_imm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movi v2.4s, #89
+; CHECK-NEXT: sshll v3.4s, v0.4h, #0
+; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0
+; CHECK-NEXT: sabd v1.4s, v0.4s, v2.4s
+; CHECK-NEXT: sabd v0.4s, v3.4s, v2.4s
+; CHECK-NEXT: ret
+entry:
+ %conv = sext <8 x i16> %a to <8 x i32>
+ %sub = sub <8 x i32> %conv, splat(i32 89)
+ %cmp.i = icmp slt <8 x i32> %sub, splat(i32 0)
+ %sub.i = sub <8 x i32> splat(i32 89), %conv
+ %cond.i = select <8 x i1> %cmp.i, <8 x i32> %sub.i, <8 x i32> %sub
+ %conv1 = trunc <8 x i32> %cond.i to <8 x i16>
+ %r = zext <8 x i16> %conv1 to <8 x i32>
+ ret <8 x i32> %r
+}
+
+; ... with uabd
+define <8 x i32> @uabd_8h_splat_imm(<8 x i16> %a) {
+; CHECK-LABEL: uabd_8h_splat_imm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movi v2.4s, #89
+; CHECK-NEXT: ushll v3.4s, v0.4h, #0
+; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0
+; CHECK-NEXT: uabd v1.4s, v0.4s, v2.4s
+; CHECK-NEXT: uabd v0.4s, v3.4s, v2.4s
+; CHECK-NEXT: ret
+entry:
+ %conv = zext <8 x i16> %a to <8 x i32>
+ %sub = sub <8 x i32> %conv, splat(i32 89)
+ %cmp.i = icmp slt <8 x i32> %sub, splat(i32 0)
+ %sub.i = sub <8 x i32> splat(i32 89), %conv
+ %cond.i = select <8 x i1> %cmp.i, <8 x i32> %sub.i, <8 x i32> %sub
+ %conv1 = trunc <8 x i32> %cond.i to <8 x i16>
+ %r = zext <8 x i16> %conv1 to <8 x i32>
+ ret <8 x i32> %r
+}
+
+; And now it's buildvector of const
+define <8 x i32> @sabd_8h_bv_imm(<8 x i16> %a) {
+; CHECK-LABEL: sabd_8h_bv_imm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: adrp x8, .LCPI45_0
+; CHECK-NEXT: sshll v2.4s, v0.4h, #0
+; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0
+; CHECK-NEXT: adrp x9, .LCPI45_1
+; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI45_0]
+; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI45_1]
+; CHECK-NEXT: sabd v1.4s, v0.4s, v1.4s
+; CHECK-NEXT: sabd v0.4s, v2.4s, v3.4s
+; CHECK-NEXT: ret
+entry:
+ %conv = sext <8 x i16> %a to <8 x i32>
+ %sub = sub <8 x i32> %conv, <i32 39, i32 42, i32 51, i32 51, i32 0, i32 -128, i32 127, i32 69>
+ %cmp.i = icmp slt <8 x i32> %sub, splat(i32 0)
+ %sub.i = sub <8 x i32> <i32 39, i32 42, i32 51, i32 51, i32 0, i32 -128, i32 127, i32 69>, %conv
+ %cond.i = select <8 x i1> %cmp.i, <8 x i32> %sub.i, <8 x i32> %sub
+ %conv1 = trunc <8 x i32> %cond.i to <8 x i16>
+ %r = zext <8 x i16> %conv1 to <8 x i32>
+ ret <8 x i32> %r
+}
+
+; ... uabd version
+define <8 x i32> @uabd_8h_bv_imm(<8 x i16> %a) {
+; CHECK-LABEL: uabd_8h_bv_imm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: adrp x8, .LCPI46_0
+; CHECK-NEXT: ushll v2.4s, v0.4h, #0
+; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0
+; CHECK-NEXT: adrp x9, .LCPI46_1
+; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI46_0]
+; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI46_1]
+; CHECK-NEXT: uabd v1.4s, v0.4s, v1.4s
+; CHECK-NEXT: uabd v0.4s, v2.4s, v3.4s
+; CHECK-NEXT: ret
+entry:
+ %conv = zext <8 x i16> %a to <8 x i32>
+ %sub = sub <8 x i32> %conv, <i32 39, i32 42, i32 51, i32 51, i32 0, i32 64000, i32 127, i32 69>
+ %cmp.i = icmp slt <8 x i32> %sub, splat(i32 0)
+ %sub.i = sub <8 x i32> <i32 39, i32 42, i32 51, i32 51, i32 0, i32 64000, i32 127, i32 69>, %conv
+ %cond.i = select <8 x i1> %cmp.i, <8 x i32> %sub.i, <8 x i32> %sub
+ %conv1 = trunc <8 x i32> %cond.i to <8 x i16>
+ %r = zext <8 x i16> %conv1 to <8 x i32>
+ ret <8 x i32> %r
+}
+
+; And now it's buildvector with sext and constants
+define <4 x i32> @sabd_4h_bv_non_imm(<4 x i16> %a, i16 %b) {
+; CHECK-LABEL: sabd_4h_bv_non_imm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sxth w9, w0
+; CHECK-NEXT: mov w8, #-128 // =0xffffff80
+; CHECK-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-NEXT: fmov s1, w9
+; CHECK-NEXT: mov v1.s[1], w8
+; CHECK-NEXT: mov w8, #42 // =0x2a
+; CHECK-NEXT: mov v1.s[2], w8
+; CHECK-NEXT: mov w8, #69 // =0x45
+; CHECK-NEXT: mov v1.s[3], w8
+; CHECK-NEXT: sabd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
+entry:
+ %conv = sext <4 x i16> %a to <4 x i32>
+ %exted.b = sext i16 %b to i32
+ %ze.vec = insertelement <4 x i32> <i32 poison, i32 -128, i32 42, i32 69>, i32 %exted.b, i32 0
+ %sub = sub <4 x i32> %conv, %ze.vec
+ %cmp.i = icmp slt <4 x i32> %sub, splat(i32 0)
+ %sub.i = sub <4 x i32> %ze.vec, %conv
+ %cond.i = select <4 x i1> %cmp.i, <4 x i32> %sub.i, <4 x i32> %sub
+ %conv1 = trunc <4 x i32> %cond.i to <4 x i16>
+ %r = zext <4 x i16> %conv1 to <4 x i32>
+ ret <4 x i32> %r
+}
+
+; ... uabd
+define <4 x i32> @uabd_4h_bv_non_imm(<4 x i16> %a, i16 %b) {
+; CHECK-LABEL: uabd_4h_bv_non_imm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: and w9, w0, #0xffff
+; CHECK-NEXT: mov w8, #64000 // =0xfa00
+; CHECK-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-NEXT: fmov s1, w9
+; CHECK-NEXT: mov v1.s[1], w8
+; CHECK-NEXT: mov w8, #13 // =0xd
+; CHECK-NEXT: mov v1.s[2], w8
+; CHECK-NEXT: mov w8, #37 // =0x25
+; CHECK-NEXT: mov v1.s[3], w8
+; CHECK-NEXT: uabd v0.4s, v1.4s, v0.4s
+; CHECK-NEXT: ret
+entry:
+ %conv = zext <4 x i16> %a to <4 x i32>
+ %exted.b = zext i16 %b to i32
+ %ze.vec = insertelement <4 x i32> <i32 poison, i32 64000, i32 13, i32 37>, i32 %exted.b, i32 0
+ %sub = sub <4 x i32> %conv, %ze.vec
+ %cmp.i = icmp slt <4 x i32> %sub, splat(i32 0)
+ %sub.i = sub <4 x i32> %ze.vec, %conv
+ %cond.i = select <4 x i1> %cmp.i, <4 x i32> %sub.i, <4 x i32> %sub
+ %conv1 = trunc <4 x i32> %cond.i to <4 x i16>
+ %r = zext <4 x i16> %conv1 to <4 x i32>
+ ret <4 x i32> %r
+}
+
+; negative: immediate wont fit in signed i16
+define <8 x i32> @sabd_8s_splat_imm_no_shrink(<8 x i16> %a) {
+; CHECK-LABEL: sabd_8s_splat_imm_no_shrink:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movi v1.4s, #250, lsl #8
+; CHECK-NEXT: sshll2 v2.4s, v0.8h, #0
+; CHECK-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-NEXT: sabd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: sabd v1.4s, v2.4s, v1.4s
+; CHECK-NEXT: bic v1.4s, #3, lsl #16
+; CHECK-NEXT: bic v0.4s, #3, lsl #16
+; CHECK-NEXT: ret
+entry:
+ %conv = sext <8 x i16> %a to <8 x i32>
+ %sub = sub <8 x i32> %conv, splat(i32 64000)
+ %cmp.i = icmp slt <8 x i32> %sub, splat(i32 0)
+ %sub.i = sub <8 x i32> splat(i32 64000), %conv
+ %cond.i = select <8 x i1> %cmp.i, <8 x i32> %sub.i, <8 x i32> %sub
+ %conv1 = trunc <8 x i32> %cond.i to <8 x i16>
+ %r = zext <8 x i16> %conv1 to <8 x i32>
+ ret <8 x i32> %r
+}
+
+; negative: value out of range of unsigned i16
+define <8 x i32> @uabd_8s_splat_imm_no_shrink(<8 x i16> %a) {
+; CHECK-LABEL: uabd_8s_splat_imm_no_shrink:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff
+; CHECK-NEXT: ushll2 v2.4s, v0.8h, #0
+; CHECK-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-NEXT: movi v3.2d, #0x00ffff0000ffff
+; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: sub v1.4s, v2.4s, v1.4s
+; CHECK-NEXT: and v1.16b, v1.16b, v3.16b
+; CHECK-NEXT: and v0.16b, v0.16b, v3.16b
+; CHECK-NEXT: ret
+entry:
+ %conv = zext <8 x i16> %a to <8 x i32>
+ %sub = sub <8 x i32> %conv, splat(i32 -1)
+ %cmp.i = icmp slt <8 x i32> %sub, splat(i32 0)
+ %sub.i = sub <8 x i32> splat(i32 -1), %conv
+ %cond.i = select <8 x i1> %cmp.i, <8 x i32> %sub.i, <8 x i32> %sub
+ %conv1 = trunc <8 x i32> %cond.i to <8 x i16>
+ %r = zext <8 x i16> %conv1 to <8 x i32>
+ ret <8 x i32> %r
+}
+
declare <8 x i8> @llvm.abs.v8i8(<8 x i8>, i1)
declare <16 x i8> @llvm.abs.v16i8(<16 x i8>, i1)
>From 08d59992e2c1a14376afdd74f465f3052dcb1b6e Mon Sep 17 00:00:00 2001
From: DaKnig <37626476+DaKnig at users.noreply.github.com>
Date: Fri, 16 Jan 2026 00:06:29 +0200
Subject: [PATCH 2/2] code and changes to lit
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 28 +++++++++++++
llvm/test/CodeGen/AArch64/arm64-vabs.ll | 22 ++++------
llvm/test/CodeGen/AArch64/neon-abd.ll | 40 +++++++++----------
3 files changed, 54 insertions(+), 36 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 9cd4f6378b4f7..862b128633a94 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -5812,6 +5812,34 @@ SDValue DAGCombiner::visitABD(SDNode *N) {
}
}
+ // fold (abd? (?ext ty:x), small_const:c) -> (zext (abd? x, c))
+ if (sd_match(N, m_c_BinOp(ISD::ABDU, m_ZExt(m_Value(X)), m_Value(Y))) ||
+ sd_match(N, m_c_BinOp(ISD::ABDS, m_SExt(m_Value(X)), m_Value(Y)))) {
+ EVT SmallVT = X.getValueType();
+ if (!LegalOperations || hasOperation(Opcode, SmallVT)) {
+ uint64_t Bits = SmallVT.getScalarSizeInBits();
+ unsigned RelevantBits =
+ (Opcode == ISD::ABDS) ? DAG.ComputeMaxSignificantBits(Y)
+ : DAG.computeKnownBits(Y).countMaxActiveBits();
+ bool TruncatingYIsCheap = TLI.isTruncateFree(Y, SmallVT) ||
+ ISD::matchUnaryPredicate(
+ Y,
+ [&](auto *C) {
+ const APInt &YConst = C->getAsAPIntVal();
+ return (Opcode == ISD::ABDS)
+ ? YConst.isSignedIntN(Bits)
+ : YConst.isIntN(Bits);
+ },
+ /*AllowUndefs=*/true);
+
+ if (RelevantBits <= Bits && TruncatingYIsCheap) {
+ SDValue NewY = DAG.getNode(ISD::TRUNCATE, SDLoc(Y), SmallVT, Y);
+ SDValue SmallABD = DAG.getNode(Opcode, DL, SmallVT, {X, NewY});
+ return DAG.getZExtOrTrunc(SmallABD, DL, VT);
+ }
+ }
+ }
+
return SDValue();
}
diff --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
index 3271b2f49cbbf..4ceba23df55a8 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
@@ -2024,13 +2024,10 @@ define <16 x i16> @uabd16b_i16_const_select(<16 x i8> %a) {
; CHECK-SD-LABEL: uabd16b_i16_const_select:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: adrp x8, .LCPI106_0
-; CHECK-SD-NEXT: ushll.8h v2, v0, #0
-; CHECK-SD-NEXT: ushll2.8h v0, v0, #0
-; CHECK-SD-NEXT: adrp x9, .LCPI106_1
-; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI106_0]
-; CHECK-SD-NEXT: ldr q1, [x9, :lo12:.LCPI106_1]
-; CHECK-SD-NEXT: uabd.8h v1, v0, v1
-; CHECK-SD-NEXT: uabd.8h v0, v2, v3
+; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI106_0]
+; CHECK-SD-NEXT: uabd.16b v0, v0, v1
+; CHECK-SD-NEXT: ushll2.8h v1, v0, #0
+; CHECK-SD-NEXT: ushll.8h v0, v0, #0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uabd16b_i16_const_select:
@@ -2069,13 +2066,10 @@ define <16 x i16> @sabd16b_i16_const_select(<16 x i8> %a) {
; CHECK-SD-LABEL: sabd16b_i16_const_select:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: adrp x8, .LCPI107_0
-; CHECK-SD-NEXT: sshll.8h v2, v0, #0
-; CHECK-SD-NEXT: sshll2.8h v0, v0, #0
-; CHECK-SD-NEXT: adrp x9, .LCPI107_1
-; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI107_0]
-; CHECK-SD-NEXT: ldr q1, [x9, :lo12:.LCPI107_1]
-; CHECK-SD-NEXT: sabd.8h v1, v0, v1
-; CHECK-SD-NEXT: sabd.8h v0, v2, v3
+; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI107_0]
+; CHECK-SD-NEXT: sabd.16b v0, v0, v1
+; CHECK-SD-NEXT: ushll2.8h v1, v0, #0
+; CHECK-SD-NEXT: ushll.8h v0, v0, #0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sabd16b_i16_const_select:
diff --git a/llvm/test/CodeGen/AArch64/neon-abd.ll b/llvm/test/CodeGen/AArch64/neon-abd.ll
index 45d368882e110..c9f3fc44ddcb1 100644
--- a/llvm/test/CodeGen/AArch64/neon-abd.ll
+++ b/llvm/test/CodeGen/AArch64/neon-abd.ll
@@ -558,11 +558,10 @@ define <2 x i32> @combine_sabd_2s_zerosign_negative(<2 x i32> %a, <2 x i32> %b)
define <8 x i32> @sabd_8h_splat_imm(<8 x i16> %a) {
; CHECK-LABEL: sabd_8h_splat_imm:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: movi v2.4s, #89
-; CHECK-NEXT: sshll v3.4s, v0.4h, #0
-; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0
-; CHECK-NEXT: sabd v1.4s, v0.4s, v2.4s
-; CHECK-NEXT: sabd v0.4s, v3.4s, v2.4s
+; CHECK-NEXT: movi v2.4h, #89
+; CHECK-NEXT: movi v1.8h, #89
+; CHECK-NEXT: sabdl2 v1.4s, v0.8h, v1.8h
+; CHECK-NEXT: sabdl v0.4s, v0.4h, v2.4h
; CHECK-NEXT: ret
entry:
%conv = sext <8 x i16> %a to <8 x i32>
@@ -579,11 +578,10 @@ entry:
define <8 x i32> @uabd_8h_splat_imm(<8 x i16> %a) {
; CHECK-LABEL: uabd_8h_splat_imm:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: movi v2.4s, #89
-; CHECK-NEXT: ushll v3.4s, v0.4h, #0
-; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0
-; CHECK-NEXT: uabd v1.4s, v0.4s, v2.4s
-; CHECK-NEXT: uabd v0.4s, v3.4s, v2.4s
+; CHECK-NEXT: movi v2.4h, #89
+; CHECK-NEXT: movi v1.8h, #89
+; CHECK-NEXT: uabdl2 v1.4s, v0.8h, v1.8h
+; CHECK-NEXT: uabdl v0.4s, v0.4h, v2.4h
; CHECK-NEXT: ret
entry:
%conv = zext <8 x i16> %a to <8 x i32>
@@ -600,14 +598,13 @@ entry:
define <8 x i32> @sabd_8h_bv_imm(<8 x i16> %a) {
; CHECK-LABEL: sabd_8h_bv_imm:
; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: adrp x8, .LCPI45_0
-; CHECK-NEXT: sshll v2.4s, v0.4h, #0
-; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0
; CHECK-NEXT: adrp x9, .LCPI45_1
-; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI45_0]
-; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI45_1]
-; CHECK-NEXT: sabd v1.4s, v0.4s, v1.4s
-; CHECK-NEXT: sabd v0.4s, v2.4s, v3.4s
+; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI45_0]
+; CHECK-NEXT: ldr d3, [x9, :lo12:.LCPI45_1]
+; CHECK-NEXT: sabdl v0.4s, v0.4h, v3.4h
+; CHECK-NEXT: sabdl v1.4s, v1.4h, v2.4h
; CHECK-NEXT: ret
entry:
%conv = sext <8 x i16> %a to <8 x i32>
@@ -624,14 +621,13 @@ entry:
define <8 x i32> @uabd_8h_bv_imm(<8 x i16> %a) {
; CHECK-LABEL: uabd_8h_bv_imm:
; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: adrp x8, .LCPI46_0
-; CHECK-NEXT: ushll v2.4s, v0.4h, #0
-; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0
; CHECK-NEXT: adrp x9, .LCPI46_1
-; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI46_0]
-; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI46_1]
-; CHECK-NEXT: uabd v1.4s, v0.4s, v1.4s
-; CHECK-NEXT: uabd v0.4s, v2.4s, v3.4s
+; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI46_0]
+; CHECK-NEXT: ldr d3, [x9, :lo12:.LCPI46_1]
+; CHECK-NEXT: uabdl v0.4s, v0.4h, v3.4h
+; CHECK-NEXT: uabdl v1.4s, v1.4h, v2.4h
; CHECK-NEXT: ret
entry:
%conv = zext <8 x i16> %a to <8 x i32>
More information about the llvm-commits
mailing list