[llvm] [DAGCombiner] Handle type-promoted constants in UDIV lowering (PR #169491)
Valeriy Savchenko via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 25 06:31:43 PST 2025
https://github.com/SavchenkoValeriy updated https://github.com/llvm/llvm-project/pull/169491
>From da782143cf53c46352fbb08a060baf028917d096 Mon Sep 17 00:00:00 2001
From: Valeriy Savchenko <vsavchenko at apple.com>
Date: Tue, 25 Nov 2025 12:01:50 +0000
Subject: [PATCH 1/2] [AArch64][NFC] Add test for vector udiv scalarization
---
.../AArch64/udiv-by-const-promoted-ops.ll | 99 +++++++++++++++++++
1 file changed, 99 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/udiv-by-const-promoted-ops.ll
diff --git a/llvm/test/CodeGen/AArch64/udiv-by-const-promoted-ops.ll b/llvm/test/CodeGen/AArch64/udiv-by-const-promoted-ops.ll
new file mode 100644
index 0000000000000..d552d0b8f9ba9
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/udiv-by-const-promoted-ops.ll
@@ -0,0 +1,99 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+; This test verifies that udiv by constant works correctly even when type
+; legalization promotes constant operands (e.g., i16 -> i32 in BUILD_VECTOR).
+; This is a regression test for a bug where v16i16 would be split into two
+; v8i16 operations during legalization, the i16 constants would be promoted
+; to i32, and then the second DAGCombine round would fail to recognize the
+; promoted constants when trying to convert udiv into mul+shift.
+
+define <8 x i16> @udiv_v8i16_by_255(<8 x i16> %x) {
+; CHECK-LABEL: udiv_v8i16_by_255:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #32897 // =0x8081
+; CHECK-NEXT: dup v1.8h, w8
+; CHECK-NEXT: umull2 v2.4s, v0.8h, v1.8h
+; CHECK-NEXT: umull v0.4s, v0.4h, v1.4h
+; CHECK-NEXT: uzp2 v0.8h, v0.8h, v2.8h
+; CHECK-NEXT: ushr v0.8h, v0.8h, #7
+; CHECK-NEXT: ret
+ %div = udiv <8 x i16> %x, splat (i16 255)
+ ret <8 x i16> %div
+}
+
+define <16 x i16> @udiv_v16i16_by_255(<16 x i16> %x) {
+; CHECK-LABEL: udiv_v16i16_by_255:
+; CHECK: // %bb.0:
+; CHECK-NEXT: umov w9, v0.h[0]
+; CHECK-NEXT: umov w11, v1.h[0]
+; CHECK-NEXT: mov w8, #258 // =0x102
+; CHECK-NEXT: movk w8, #257, lsl #16
+; CHECK-NEXT: umov w10, v0.h[1]
+; CHECK-NEXT: umov w12, v1.h[1]
+; CHECK-NEXT: umov w13, v0.h[2]
+; CHECK-NEXT: umov w14, v1.h[2]
+; CHECK-NEXT: umull x9, w9, w8
+; CHECK-NEXT: umull x11, w11, w8
+; CHECK-NEXT: umull x10, w10, w8
+; CHECK-NEXT: umull x12, w12, w8
+; CHECK-NEXT: lsr x9, x9, #32
+; CHECK-NEXT: lsr x11, x11, #32
+; CHECK-NEXT: umull x13, w13, w8
+; CHECK-NEXT: fmov s2, w9
+; CHECK-NEXT: lsr x10, x10, #32
+; CHECK-NEXT: umov w9, v0.h[3]
+; CHECK-NEXT: fmov s3, w11
+; CHECK-NEXT: lsr x12, x12, #32
+; CHECK-NEXT: umull x11, w14, w8
+; CHECK-NEXT: umov w14, v1.h[3]
+; CHECK-NEXT: mov v2.h[1], w10
+; CHECK-NEXT: lsr x10, x13, #32
+; CHECK-NEXT: mov v3.h[1], w12
+; CHECK-NEXT: umov w12, v0.h[4]
+; CHECK-NEXT: lsr x11, x11, #32
+; CHECK-NEXT: umull x9, w9, w8
+; CHECK-NEXT: umull x13, w14, w8
+; CHECK-NEXT: umov w14, v1.h[4]
+; CHECK-NEXT: mov v2.h[2], w10
+; CHECK-NEXT: mov v3.h[2], w11
+; CHECK-NEXT: lsr x9, x9, #32
+; CHECK-NEXT: umull x10, w12, w8
+; CHECK-NEXT: lsr x12, x13, #32
+; CHECK-NEXT: umov w11, v0.h[5]
+; CHECK-NEXT: umull x13, w14, w8
+; CHECK-NEXT: umov w14, v1.h[5]
+; CHECK-NEXT: mov v2.h[3], w9
+; CHECK-NEXT: lsr x9, x10, #32
+; CHECK-NEXT: mov v3.h[3], w12
+; CHECK-NEXT: lsr x12, x13, #32
+; CHECK-NEXT: umull x10, w11, w8
+; CHECK-NEXT: umov w11, v0.h[6]
+; CHECK-NEXT: umull x13, w14, w8
+; CHECK-NEXT: umov w14, v1.h[6]
+; CHECK-NEXT: mov v2.h[4], w9
+; CHECK-NEXT: umov w9, v0.h[7]
+; CHECK-NEXT: mov v3.h[4], w12
+; CHECK-NEXT: lsr x10, x10, #32
+; CHECK-NEXT: lsr x12, x13, #32
+; CHECK-NEXT: umull x11, w11, w8
+; CHECK-NEXT: umull x13, w14, w8
+; CHECK-NEXT: umov w14, v1.h[7]
+; CHECK-NEXT: mov v2.h[5], w10
+; CHECK-NEXT: umull x9, w9, w8
+; CHECK-NEXT: mov v3.h[5], w12
+; CHECK-NEXT: lsr x10, x11, #32
+; CHECK-NEXT: lsr x11, x13, #32
+; CHECK-NEXT: umull x8, w14, w8
+; CHECK-NEXT: lsr x9, x9, #32
+; CHECK-NEXT: mov v2.h[6], w10
+; CHECK-NEXT: mov v3.h[6], w11
+; CHECK-NEXT: lsr x8, x8, #32
+; CHECK-NEXT: mov v2.h[7], w9
+; CHECK-NEXT: mov v3.h[7], w8
+; CHECK-NEXT: mov v0.16b, v2.16b
+; CHECK-NEXT: mov v1.16b, v3.16b
+; CHECK-NEXT: ret
+ %div = udiv <16 x i16> %x, splat (i16 255)
+ ret <16 x i16> %div
+}
>From 3850f0f7ae3a1b21d87356ab045dcee2b47359eb Mon Sep 17 00:00:00 2001
From: Valeriy Savchenko <vsavchenko at apple.com>
Date: Tue, 25 Nov 2025 12:04:21 +0000
Subject: [PATCH 2/2] [DAGCombiner] Allow promoted constants when lowering
vector UDIVs
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 26 +++++--
.../CodeGen/SelectionDAG/TargetLowering.cpp | 7 +-
llvm/test/CodeGen/AArch64/rem-by-const.ll | 65 +++-------------
.../AArch64/udiv-by-const-promoted-ops.ll | 78 +++----------------
4 files changed, 46 insertions(+), 130 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 6b79dbb46cadc..33c6855349928 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1065,8 +1065,9 @@ static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) {
// Determines if it is a constant integer or a splat/build vector of constant
// integers (and undefs).
-// Do not permit build vector implicit truncation.
-static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
+// Do not permit build vector implicit truncation unless AllowTruncation is set.
+static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false,
+ bool AllowTruncation = false) {
if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
return !(Const->isOpaque() && NoOpaques);
if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
@@ -1076,9 +1077,17 @@ static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
if (Op.isUndef())
continue;
ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
- if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
- (Const->isOpaque() && NoOpaques))
+ if (!Const || (Const->isOpaque() && NoOpaques))
return false;
+ // When AllowTruncation is true, allow constants that have been promoted
+ // during type legalization as long as the value fits in the target type.
+ if (AllowTruncation) {
+ if (Const->getAPIntValue().getActiveBits() > BitWidth)
+ return false;
+ } else {
+ if (Const->getAPIntValue().getBitWidth() != BitWidth)
+ return false;
+ }
}
return true;
}
@@ -5322,7 +5331,8 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
EVT VT = N->getValueType(0);
// fold (udiv x, (1 << c)) -> x >>u c
- if (isConstantOrConstantVector(N1, /*NoOpaques*/ true)) {
+ if (isConstantOrConstantVector(N1, /*NoOpaques=*/true,
+ /*AllowTruncation=*/true)) {
if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
AddToWorklist(LogBase2.getNode());
@@ -5336,7 +5346,8 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
// fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
if (N1.getOpcode() == ISD::SHL) {
SDValue N10 = N1.getOperand(0);
- if (isConstantOrConstantVector(N10, /*NoOpaques*/ true)) {
+ if (isConstantOrConstantVector(N10, /*NoOpaques=*/true,
+ /*AllowTruncation=*/true)) {
if (SDValue LogBase2 = BuildLogBase2(N10, DL)) {
AddToWorklist(LogBase2.getNode());
@@ -5352,7 +5363,8 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
// fold (udiv x, c) -> alternate
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
- if (isConstantOrConstantVector(N1) &&
+ if (isConstantOrConstantVector(N1, /*NoOpaques=*/false,
+ /*AllowTruncation=*/true) &&
!TLI.isIntDivCheap(N->getValueType(0), Attr))
if (SDValue Op = BuildUDIV(N))
return Op;
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 5684e0e4c26c4..a3a9b98f0ce6e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -6738,7 +6738,9 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
auto BuildUDIVPattern = [&](ConstantSDNode *C) {
if (C->isZero())
return false;
- const APInt& Divisor = C->getAPIntValue();
+ // Truncate the divisor to the target scalar type in case it was promoted
+ // during type legalization.
+ APInt Divisor = C->getAPIntValue().trunc(EltBits);
SDValue PreShift, MagicFactor, NPQFactor, PostShift;
@@ -6779,7 +6781,8 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
};
// Collect the shifts/magic values from each element.
- if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
+ if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern, /*AllowUndefs=*/false,
+ /*AllowTruncation=*/true))
return SDValue();
SDValue PreShift, PostShift, MagicFactor, NPQFactor;
diff --git a/llvm/test/CodeGen/AArch64/rem-by-const.ll b/llvm/test/CodeGen/AArch64/rem-by-const.ll
index a55aaeb62830f..ffaf045fa45c2 100644
--- a/llvm/test/CodeGen/AArch64/rem-by-const.ll
+++ b/llvm/test/CodeGen/AArch64/rem-by-const.ll
@@ -1433,35 +1433,13 @@ entry:
define <4 x i8> @uv4i8_7(<4 x i8> %d, <4 x i8> %e) {
; CHECK-SD-LABEL: uv4i8_7:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: mov w8, #18725 // =0x4925
+; CHECK-SD-NEXT: mov w8, #9363 // =0x2493
; CHECK-SD-NEXT: bic v0.4h, #255, lsl #8
-; CHECK-SD-NEXT: movk w8, #9362, lsl #16
-; CHECK-SD-NEXT: umov w9, v0.h[0]
-; CHECK-SD-NEXT: umov w10, v0.h[1]
-; CHECK-SD-NEXT: umov w13, v0.h[2]
-; CHECK-SD-NEXT: umov w15, v0.h[3]
-; CHECK-SD-NEXT: umull x11, w9, w8
-; CHECK-SD-NEXT: umull x12, w10, w8
-; CHECK-SD-NEXT: umull x14, w13, w8
-; CHECK-SD-NEXT: lsr x11, x11, #32
-; CHECK-SD-NEXT: umull x8, w15, w8
-; CHECK-SD-NEXT: lsr x12, x12, #32
-; CHECK-SD-NEXT: sub w11, w11, w11, lsl #3
-; CHECK-SD-NEXT: sub w12, w12, w12, lsl #3
-; CHECK-SD-NEXT: lsr x8, x8, #32
-; CHECK-SD-NEXT: add w9, w9, w11
-; CHECK-SD-NEXT: fmov s0, w9
-; CHECK-SD-NEXT: add w10, w10, w12
-; CHECK-SD-NEXT: lsr x9, x14, #32
-; CHECK-SD-NEXT: sub w8, w8, w8, lsl #3
-; CHECK-SD-NEXT: sub w9, w9, w9, lsl #3
-; CHECK-SD-NEXT: mov v0.h[1], w10
-; CHECK-SD-NEXT: add w8, w15, w8
-; CHECK-SD-NEXT: add w9, w13, w9
-; CHECK-SD-NEXT: mov v0.h[2], w9
-; CHECK-SD-NEXT: mov v0.h[3], w8
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: movi v2.4h, #7
+; CHECK-SD-NEXT: dup v1.4h, w8
+; CHECK-SD-NEXT: umull v1.4s, v0.4h, v1.4h
+; CHECK-SD-NEXT: shrn v1.4h, v1.4s, #16
+; CHECK-SD-NEXT: mls v0.4h, v1.4h, v2.4h
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uv4i8_7:
@@ -1508,32 +1486,13 @@ entry:
define <4 x i8> @uv4i8_100(<4 x i8> %d, <4 x i8> %e) {
; CHECK-SD-LABEL: uv4i8_100:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: mov w8, #23593 // =0x5c29
-; CHECK-SD-NEXT: mov w14, #100 // =0x64
+; CHECK-SD-NEXT: mov w8, #656 // =0x290
; CHECK-SD-NEXT: bic v0.4h, #255, lsl #8
-; CHECK-SD-NEXT: movk w8, #655, lsl #16
-; CHECK-SD-NEXT: umov w9, v0.h[0]
-; CHECK-SD-NEXT: umov w10, v0.h[1]
-; CHECK-SD-NEXT: umov w12, v0.h[2]
-; CHECK-SD-NEXT: umov w15, v0.h[3]
-; CHECK-SD-NEXT: umull x11, w9, w8
-; CHECK-SD-NEXT: umull x13, w10, w8
-; CHECK-SD-NEXT: lsr x11, x11, #32
-; CHECK-SD-NEXT: lsr x13, x13, #32
-; CHECK-SD-NEXT: msub w9, w11, w14, w9
-; CHECK-SD-NEXT: umull x11, w12, w8
-; CHECK-SD-NEXT: msub w10, w13, w14, w10
-; CHECK-SD-NEXT: fmov s0, w9
-; CHECK-SD-NEXT: umull x8, w15, w8
-; CHECK-SD-NEXT: lsr x9, x11, #32
-; CHECK-SD-NEXT: mov v0.h[1], w10
-; CHECK-SD-NEXT: msub w9, w9, w14, w12
-; CHECK-SD-NEXT: lsr x8, x8, #32
-; CHECK-SD-NEXT: msub w8, w8, w14, w15
-; CHECK-SD-NEXT: mov v0.h[2], w9
-; CHECK-SD-NEXT: mov v0.h[3], w8
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: movi v2.4h, #100
+; CHECK-SD-NEXT: dup v1.4h, w8
+; CHECK-SD-NEXT: umull v1.4s, v0.4h, v1.4h
+; CHECK-SD-NEXT: shrn v1.4h, v1.4s, #16
+; CHECK-SD-NEXT: mls v0.4h, v1.4h, v2.4h
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uv4i8_100:
diff --git a/llvm/test/CodeGen/AArch64/udiv-by-const-promoted-ops.ll b/llvm/test/CodeGen/AArch64/udiv-by-const-promoted-ops.ll
index d552d0b8f9ba9..efe3b84e7a0f0 100644
--- a/llvm/test/CodeGen/AArch64/udiv-by-const-promoted-ops.ll
+++ b/llvm/test/CodeGen/AArch64/udiv-by-const-promoted-ops.ll
@@ -25,74 +25,16 @@ define <8 x i16> @udiv_v8i16_by_255(<8 x i16> %x) {
define <16 x i16> @udiv_v16i16_by_255(<16 x i16> %x) {
; CHECK-LABEL: udiv_v16i16_by_255:
; CHECK: // %bb.0:
-; CHECK-NEXT: umov w9, v0.h[0]
-; CHECK-NEXT: umov w11, v1.h[0]
-; CHECK-NEXT: mov w8, #258 // =0x102
-; CHECK-NEXT: movk w8, #257, lsl #16
-; CHECK-NEXT: umov w10, v0.h[1]
-; CHECK-NEXT: umov w12, v1.h[1]
-; CHECK-NEXT: umov w13, v0.h[2]
-; CHECK-NEXT: umov w14, v1.h[2]
-; CHECK-NEXT: umull x9, w9, w8
-; CHECK-NEXT: umull x11, w11, w8
-; CHECK-NEXT: umull x10, w10, w8
-; CHECK-NEXT: umull x12, w12, w8
-; CHECK-NEXT: lsr x9, x9, #32
-; CHECK-NEXT: lsr x11, x11, #32
-; CHECK-NEXT: umull x13, w13, w8
-; CHECK-NEXT: fmov s2, w9
-; CHECK-NEXT: lsr x10, x10, #32
-; CHECK-NEXT: umov w9, v0.h[3]
-; CHECK-NEXT: fmov s3, w11
-; CHECK-NEXT: lsr x12, x12, #32
-; CHECK-NEXT: umull x11, w14, w8
-; CHECK-NEXT: umov w14, v1.h[3]
-; CHECK-NEXT: mov v2.h[1], w10
-; CHECK-NEXT: lsr x10, x13, #32
-; CHECK-NEXT: mov v3.h[1], w12
-; CHECK-NEXT: umov w12, v0.h[4]
-; CHECK-NEXT: lsr x11, x11, #32
-; CHECK-NEXT: umull x9, w9, w8
-; CHECK-NEXT: umull x13, w14, w8
-; CHECK-NEXT: umov w14, v1.h[4]
-; CHECK-NEXT: mov v2.h[2], w10
-; CHECK-NEXT: mov v3.h[2], w11
-; CHECK-NEXT: lsr x9, x9, #32
-; CHECK-NEXT: umull x10, w12, w8
-; CHECK-NEXT: lsr x12, x13, #32
-; CHECK-NEXT: umov w11, v0.h[5]
-; CHECK-NEXT: umull x13, w14, w8
-; CHECK-NEXT: umov w14, v1.h[5]
-; CHECK-NEXT: mov v2.h[3], w9
-; CHECK-NEXT: lsr x9, x10, #32
-; CHECK-NEXT: mov v3.h[3], w12
-; CHECK-NEXT: lsr x12, x13, #32
-; CHECK-NEXT: umull x10, w11, w8
-; CHECK-NEXT: umov w11, v0.h[6]
-; CHECK-NEXT: umull x13, w14, w8
-; CHECK-NEXT: umov w14, v1.h[6]
-; CHECK-NEXT: mov v2.h[4], w9
-; CHECK-NEXT: umov w9, v0.h[7]
-; CHECK-NEXT: mov v3.h[4], w12
-; CHECK-NEXT: lsr x10, x10, #32
-; CHECK-NEXT: lsr x12, x13, #32
-; CHECK-NEXT: umull x11, w11, w8
-; CHECK-NEXT: umull x13, w14, w8
-; CHECK-NEXT: umov w14, v1.h[7]
-; CHECK-NEXT: mov v2.h[5], w10
-; CHECK-NEXT: umull x9, w9, w8
-; CHECK-NEXT: mov v3.h[5], w12
-; CHECK-NEXT: lsr x10, x11, #32
-; CHECK-NEXT: lsr x11, x13, #32
-; CHECK-NEXT: umull x8, w14, w8
-; CHECK-NEXT: lsr x9, x9, #32
-; CHECK-NEXT: mov v2.h[6], w10
-; CHECK-NEXT: mov v3.h[6], w11
-; CHECK-NEXT: lsr x8, x8, #32
-; CHECK-NEXT: mov v2.h[7], w9
-; CHECK-NEXT: mov v3.h[7], w8
-; CHECK-NEXT: mov v0.16b, v2.16b
-; CHECK-NEXT: mov v1.16b, v3.16b
+; CHECK-NEXT: mov w8, #32897 // =0x8081
+; CHECK-NEXT: dup v2.8h, w8
+; CHECK-NEXT: umull2 v3.4s, v0.8h, v2.8h
+; CHECK-NEXT: umull v0.4s, v0.4h, v2.4h
+; CHECK-NEXT: umull2 v4.4s, v1.8h, v2.8h
+; CHECK-NEXT: umull v1.4s, v1.4h, v2.4h
+; CHECK-NEXT: uzp2 v0.8h, v0.8h, v3.8h
+; CHECK-NEXT: uzp2 v1.8h, v1.8h, v4.8h
+; CHECK-NEXT: ushr v0.8h, v0.8h, #7
+; CHECK-NEXT: ushr v1.8h, v1.8h, #7
; CHECK-NEXT: ret
%div = udiv <16 x i16> %x, splat (i16 255)
ret <16 x i16> %div
More information about the llvm-commits
mailing list