[llvm] [DAGCombiner] Handle type-promoted constants in SDIV exact lowering (PR #169950)

Valeriy Savchenko via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 28 10:19:31 PST 2025


https://github.com/SavchenkoValeriy created https://github.com/llvm/llvm-project/pull/169950

Builds up on the solution proposed for #169491 and #169924 and applies it for SDIV exact as well. Almost a carbon copy of UDIV exact solution from #169949.

>From da782143cf53c46352fbb08a060baf028917d096 Mon Sep 17 00:00:00 2001
From: Valeriy Savchenko <vsavchenko at apple.com>
Date: Tue, 25 Nov 2025 12:01:50 +0000
Subject: [PATCH 1/6] [AArch64][NFC] Add test for vector udiv scalarization

---
 .../AArch64/udiv-by-const-promoted-ops.ll     | 99 +++++++++++++++++++
 1 file changed, 99 insertions(+)
 create mode 100644 llvm/test/CodeGen/AArch64/udiv-by-const-promoted-ops.ll

diff --git a/llvm/test/CodeGen/AArch64/udiv-by-const-promoted-ops.ll b/llvm/test/CodeGen/AArch64/udiv-by-const-promoted-ops.ll
new file mode 100644
index 0000000000000..d552d0b8f9ba9
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/udiv-by-const-promoted-ops.ll
@@ -0,0 +1,99 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+; This test verifies that udiv by constant works correctly even when type
+; legalization promotes constant operands (e.g., i16 -> i32 in BUILD_VECTOR).
+; This is a regression test for a bug where v16i16 would be split into two
+; v8i16 operations during legalization, the i16 constants would be promoted
+; to i32, and then the second DAGCombine round would fail to recognize the
+; promoted constants when trying to convert udiv into mul+shift.
+
+define <8 x i16> @udiv_v8i16_by_255(<8 x i16> %x) {
+; CHECK-LABEL: udiv_v8i16_by_255:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #32897 // =0x8081
+; CHECK-NEXT:    dup v1.8h, w8
+; CHECK-NEXT:    umull2 v2.4s, v0.8h, v1.8h
+; CHECK-NEXT:    umull v0.4s, v0.4h, v1.4h
+; CHECK-NEXT:    uzp2 v0.8h, v0.8h, v2.8h
+; CHECK-NEXT:    ushr v0.8h, v0.8h, #7
+; CHECK-NEXT:    ret
+  %div = udiv <8 x i16> %x, splat (i16 255)
+  ret <8 x i16> %div
+}
+
+define <16 x i16> @udiv_v16i16_by_255(<16 x i16> %x) {
+; CHECK-LABEL: udiv_v16i16_by_255:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umov w9, v0.h[0]
+; CHECK-NEXT:    umov w11, v1.h[0]
+; CHECK-NEXT:    mov w8, #258 // =0x102
+; CHECK-NEXT:    movk w8, #257, lsl #16
+; CHECK-NEXT:    umov w10, v0.h[1]
+; CHECK-NEXT:    umov w12, v1.h[1]
+; CHECK-NEXT:    umov w13, v0.h[2]
+; CHECK-NEXT:    umov w14, v1.h[2]
+; CHECK-NEXT:    umull x9, w9, w8
+; CHECK-NEXT:    umull x11, w11, w8
+; CHECK-NEXT:    umull x10, w10, w8
+; CHECK-NEXT:    umull x12, w12, w8
+; CHECK-NEXT:    lsr x9, x9, #32
+; CHECK-NEXT:    lsr x11, x11, #32
+; CHECK-NEXT:    umull x13, w13, w8
+; CHECK-NEXT:    fmov s2, w9
+; CHECK-NEXT:    lsr x10, x10, #32
+; CHECK-NEXT:    umov w9, v0.h[3]
+; CHECK-NEXT:    fmov s3, w11
+; CHECK-NEXT:    lsr x12, x12, #32
+; CHECK-NEXT:    umull x11, w14, w8
+; CHECK-NEXT:    umov w14, v1.h[3]
+; CHECK-NEXT:    mov v2.h[1], w10
+; CHECK-NEXT:    lsr x10, x13, #32
+; CHECK-NEXT:    mov v3.h[1], w12
+; CHECK-NEXT:    umov w12, v0.h[4]
+; CHECK-NEXT:    lsr x11, x11, #32
+; CHECK-NEXT:    umull x9, w9, w8
+; CHECK-NEXT:    umull x13, w14, w8
+; CHECK-NEXT:    umov w14, v1.h[4]
+; CHECK-NEXT:    mov v2.h[2], w10
+; CHECK-NEXT:    mov v3.h[2], w11
+; CHECK-NEXT:    lsr x9, x9, #32
+; CHECK-NEXT:    umull x10, w12, w8
+; CHECK-NEXT:    lsr x12, x13, #32
+; CHECK-NEXT:    umov w11, v0.h[5]
+; CHECK-NEXT:    umull x13, w14, w8
+; CHECK-NEXT:    umov w14, v1.h[5]
+; CHECK-NEXT:    mov v2.h[3], w9
+; CHECK-NEXT:    lsr x9, x10, #32
+; CHECK-NEXT:    mov v3.h[3], w12
+; CHECK-NEXT:    lsr x12, x13, #32
+; CHECK-NEXT:    umull x10, w11, w8
+; CHECK-NEXT:    umov w11, v0.h[6]
+; CHECK-NEXT:    umull x13, w14, w8
+; CHECK-NEXT:    umov w14, v1.h[6]
+; CHECK-NEXT:    mov v2.h[4], w9
+; CHECK-NEXT:    umov w9, v0.h[7]
+; CHECK-NEXT:    mov v3.h[4], w12
+; CHECK-NEXT:    lsr x10, x10, #32
+; CHECK-NEXT:    lsr x12, x13, #32
+; CHECK-NEXT:    umull x11, w11, w8
+; CHECK-NEXT:    umull x13, w14, w8
+; CHECK-NEXT:    umov w14, v1.h[7]
+; CHECK-NEXT:    mov v2.h[5], w10
+; CHECK-NEXT:    umull x9, w9, w8
+; CHECK-NEXT:    mov v3.h[5], w12
+; CHECK-NEXT:    lsr x10, x11, #32
+; CHECK-NEXT:    lsr x11, x13, #32
+; CHECK-NEXT:    umull x8, w14, w8
+; CHECK-NEXT:    lsr x9, x9, #32
+; CHECK-NEXT:    mov v2.h[6], w10
+; CHECK-NEXT:    mov v3.h[6], w11
+; CHECK-NEXT:    lsr x8, x8, #32
+; CHECK-NEXT:    mov v2.h[7], w9
+; CHECK-NEXT:    mov v3.h[7], w8
+; CHECK-NEXT:    mov v0.16b, v2.16b
+; CHECK-NEXT:    mov v1.16b, v3.16b
+; CHECK-NEXT:    ret
+  %div = udiv <16 x i16> %x, splat (i16 255)
+  ret <16 x i16> %div
+}

>From 3850f0f7ae3a1b21d87356ab045dcee2b47359eb Mon Sep 17 00:00:00 2001
From: Valeriy Savchenko <vsavchenko at apple.com>
Date: Tue, 25 Nov 2025 12:04:21 +0000
Subject: [PATCH 2/6] [DAGCombiner] Allow promoted constants when lowering
 vector UDIVs

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 26 +++++--
 .../CodeGen/SelectionDAG/TargetLowering.cpp   |  7 +-
 llvm/test/CodeGen/AArch64/rem-by-const.ll     | 65 +++-------------
 .../AArch64/udiv-by-const-promoted-ops.ll     | 78 +++----------------
 4 files changed, 46 insertions(+), 130 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 6b79dbb46cadc..33c6855349928 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1065,8 +1065,9 @@ static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) {
 
 // Determines if it is a constant integer or a splat/build vector of constant
 // integers (and undefs).
-// Do not permit build vector implicit truncation.
-static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
+// Do not permit build vector implicit truncation unless AllowTruncation is set.
+static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false,
+                                       bool AllowTruncation = false) {
   if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
     return !(Const->isOpaque() && NoOpaques);
   if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
@@ -1076,9 +1077,17 @@ static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
     if (Op.isUndef())
       continue;
     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
-    if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
-        (Const->isOpaque() && NoOpaques))
+    if (!Const || (Const->isOpaque() && NoOpaques))
       return false;
+    // When AllowTruncation is true, allow constants that have been promoted
+    // during type legalization as long as the value fits in the target type.
+    if (AllowTruncation) {
+      if (Const->getAPIntValue().getActiveBits() > BitWidth)
+        return false;
+    } else {
+      if (Const->getAPIntValue().getBitWidth() != BitWidth)
+        return false;
+    }
   }
   return true;
 }
@@ -5322,7 +5331,8 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
   EVT VT = N->getValueType(0);
 
   // fold (udiv x, (1 << c)) -> x >>u c
-  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true)) {
+  if (isConstantOrConstantVector(N1, /*NoOpaques=*/true,
+                                 /*AllowTruncation=*/true)) {
     if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
       AddToWorklist(LogBase2.getNode());
 
@@ -5336,7 +5346,8 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
   if (N1.getOpcode() == ISD::SHL) {
     SDValue N10 = N1.getOperand(0);
-    if (isConstantOrConstantVector(N10, /*NoOpaques*/ true)) {
+    if (isConstantOrConstantVector(N10, /*NoOpaques=*/true,
+                                   /*AllowTruncation=*/true)) {
       if (SDValue LogBase2 = BuildLogBase2(N10, DL)) {
         AddToWorklist(LogBase2.getNode());
 
@@ -5352,7 +5363,8 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
 
   // fold (udiv x, c) -> alternate
   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
-  if (isConstantOrConstantVector(N1) &&
+  if (isConstantOrConstantVector(N1, /*NoOpaques=*/false,
+                                 /*AllowTruncation=*/true) &&
       !TLI.isIntDivCheap(N->getValueType(0), Attr))
     if (SDValue Op = BuildUDIV(N))
       return Op;
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 5684e0e4c26c4..a3a9b98f0ce6e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -6738,7 +6738,9 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
   auto BuildUDIVPattern = [&](ConstantSDNode *C) {
     if (C->isZero())
       return false;
-    const APInt& Divisor = C->getAPIntValue();
+    // Truncate the divisor to the target scalar type in case it was promoted
+    // during type legalization.
+    APInt Divisor = C->getAPIntValue().trunc(EltBits);
 
     SDValue PreShift, MagicFactor, NPQFactor, PostShift;
 
@@ -6779,7 +6781,8 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
   };
 
   // Collect the shifts/magic values from each element.
-  if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
+  if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern, /*AllowUndefs=*/false,
+                                /*AllowTruncation=*/true))
     return SDValue();
 
   SDValue PreShift, PostShift, MagicFactor, NPQFactor;
diff --git a/llvm/test/CodeGen/AArch64/rem-by-const.ll b/llvm/test/CodeGen/AArch64/rem-by-const.ll
index a55aaeb62830f..ffaf045fa45c2 100644
--- a/llvm/test/CodeGen/AArch64/rem-by-const.ll
+++ b/llvm/test/CodeGen/AArch64/rem-by-const.ll
@@ -1433,35 +1433,13 @@ entry:
 define <4 x i8> @uv4i8_7(<4 x i8> %d, <4 x i8> %e) {
 ; CHECK-SD-LABEL: uv4i8_7:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    mov w8, #18725 // =0x4925
+; CHECK-SD-NEXT:    mov w8, #9363 // =0x2493
 ; CHECK-SD-NEXT:    bic v0.4h, #255, lsl #8
-; CHECK-SD-NEXT:    movk w8, #9362, lsl #16
-; CHECK-SD-NEXT:    umov w9, v0.h[0]
-; CHECK-SD-NEXT:    umov w10, v0.h[1]
-; CHECK-SD-NEXT:    umov w13, v0.h[2]
-; CHECK-SD-NEXT:    umov w15, v0.h[3]
-; CHECK-SD-NEXT:    umull x11, w9, w8
-; CHECK-SD-NEXT:    umull x12, w10, w8
-; CHECK-SD-NEXT:    umull x14, w13, w8
-; CHECK-SD-NEXT:    lsr x11, x11, #32
-; CHECK-SD-NEXT:    umull x8, w15, w8
-; CHECK-SD-NEXT:    lsr x12, x12, #32
-; CHECK-SD-NEXT:    sub w11, w11, w11, lsl #3
-; CHECK-SD-NEXT:    sub w12, w12, w12, lsl #3
-; CHECK-SD-NEXT:    lsr x8, x8, #32
-; CHECK-SD-NEXT:    add w9, w9, w11
-; CHECK-SD-NEXT:    fmov s0, w9
-; CHECK-SD-NEXT:    add w10, w10, w12
-; CHECK-SD-NEXT:    lsr x9, x14, #32
-; CHECK-SD-NEXT:    sub w8, w8, w8, lsl #3
-; CHECK-SD-NEXT:    sub w9, w9, w9, lsl #3
-; CHECK-SD-NEXT:    mov v0.h[1], w10
-; CHECK-SD-NEXT:    add w8, w15, w8
-; CHECK-SD-NEXT:    add w9, w13, w9
-; CHECK-SD-NEXT:    mov v0.h[2], w9
-; CHECK-SD-NEXT:    mov v0.h[3], w8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    movi v2.4h, #7
+; CHECK-SD-NEXT:    dup v1.4h, w8
+; CHECK-SD-NEXT:    umull v1.4s, v0.4h, v1.4h
+; CHECK-SD-NEXT:    shrn v1.4h, v1.4s, #16
+; CHECK-SD-NEXT:    mls v0.4h, v1.4h, v2.4h
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: uv4i8_7:
@@ -1508,32 +1486,13 @@ entry:
 define <4 x i8> @uv4i8_100(<4 x i8> %d, <4 x i8> %e) {
 ; CHECK-SD-LABEL: uv4i8_100:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    mov w8, #23593 // =0x5c29
-; CHECK-SD-NEXT:    mov w14, #100 // =0x64
+; CHECK-SD-NEXT:    mov w8, #656 // =0x290
 ; CHECK-SD-NEXT:    bic v0.4h, #255, lsl #8
-; CHECK-SD-NEXT:    movk w8, #655, lsl #16
-; CHECK-SD-NEXT:    umov w9, v0.h[0]
-; CHECK-SD-NEXT:    umov w10, v0.h[1]
-; CHECK-SD-NEXT:    umov w12, v0.h[2]
-; CHECK-SD-NEXT:    umov w15, v0.h[3]
-; CHECK-SD-NEXT:    umull x11, w9, w8
-; CHECK-SD-NEXT:    umull x13, w10, w8
-; CHECK-SD-NEXT:    lsr x11, x11, #32
-; CHECK-SD-NEXT:    lsr x13, x13, #32
-; CHECK-SD-NEXT:    msub w9, w11, w14, w9
-; CHECK-SD-NEXT:    umull x11, w12, w8
-; CHECK-SD-NEXT:    msub w10, w13, w14, w10
-; CHECK-SD-NEXT:    fmov s0, w9
-; CHECK-SD-NEXT:    umull x8, w15, w8
-; CHECK-SD-NEXT:    lsr x9, x11, #32
-; CHECK-SD-NEXT:    mov v0.h[1], w10
-; CHECK-SD-NEXT:    msub w9, w9, w14, w12
-; CHECK-SD-NEXT:    lsr x8, x8, #32
-; CHECK-SD-NEXT:    msub w8, w8, w14, w15
-; CHECK-SD-NEXT:    mov v0.h[2], w9
-; CHECK-SD-NEXT:    mov v0.h[3], w8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    movi v2.4h, #100
+; CHECK-SD-NEXT:    dup v1.4h, w8
+; CHECK-SD-NEXT:    umull v1.4s, v0.4h, v1.4h
+; CHECK-SD-NEXT:    shrn v1.4h, v1.4s, #16
+; CHECK-SD-NEXT:    mls v0.4h, v1.4h, v2.4h
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: uv4i8_100:
diff --git a/llvm/test/CodeGen/AArch64/udiv-by-const-promoted-ops.ll b/llvm/test/CodeGen/AArch64/udiv-by-const-promoted-ops.ll
index d552d0b8f9ba9..efe3b84e7a0f0 100644
--- a/llvm/test/CodeGen/AArch64/udiv-by-const-promoted-ops.ll
+++ b/llvm/test/CodeGen/AArch64/udiv-by-const-promoted-ops.ll
@@ -25,74 +25,16 @@ define <8 x i16> @udiv_v8i16_by_255(<8 x i16> %x) {
 define <16 x i16> @udiv_v16i16_by_255(<16 x i16> %x) {
 ; CHECK-LABEL: udiv_v16i16_by_255:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    umov w9, v0.h[0]
-; CHECK-NEXT:    umov w11, v1.h[0]
-; CHECK-NEXT:    mov w8, #258 // =0x102
-; CHECK-NEXT:    movk w8, #257, lsl #16
-; CHECK-NEXT:    umov w10, v0.h[1]
-; CHECK-NEXT:    umov w12, v1.h[1]
-; CHECK-NEXT:    umov w13, v0.h[2]
-; CHECK-NEXT:    umov w14, v1.h[2]
-; CHECK-NEXT:    umull x9, w9, w8
-; CHECK-NEXT:    umull x11, w11, w8
-; CHECK-NEXT:    umull x10, w10, w8
-; CHECK-NEXT:    umull x12, w12, w8
-; CHECK-NEXT:    lsr x9, x9, #32
-; CHECK-NEXT:    lsr x11, x11, #32
-; CHECK-NEXT:    umull x13, w13, w8
-; CHECK-NEXT:    fmov s2, w9
-; CHECK-NEXT:    lsr x10, x10, #32
-; CHECK-NEXT:    umov w9, v0.h[3]
-; CHECK-NEXT:    fmov s3, w11
-; CHECK-NEXT:    lsr x12, x12, #32
-; CHECK-NEXT:    umull x11, w14, w8
-; CHECK-NEXT:    umov w14, v1.h[3]
-; CHECK-NEXT:    mov v2.h[1], w10
-; CHECK-NEXT:    lsr x10, x13, #32
-; CHECK-NEXT:    mov v3.h[1], w12
-; CHECK-NEXT:    umov w12, v0.h[4]
-; CHECK-NEXT:    lsr x11, x11, #32
-; CHECK-NEXT:    umull x9, w9, w8
-; CHECK-NEXT:    umull x13, w14, w8
-; CHECK-NEXT:    umov w14, v1.h[4]
-; CHECK-NEXT:    mov v2.h[2], w10
-; CHECK-NEXT:    mov v3.h[2], w11
-; CHECK-NEXT:    lsr x9, x9, #32
-; CHECK-NEXT:    umull x10, w12, w8
-; CHECK-NEXT:    lsr x12, x13, #32
-; CHECK-NEXT:    umov w11, v0.h[5]
-; CHECK-NEXT:    umull x13, w14, w8
-; CHECK-NEXT:    umov w14, v1.h[5]
-; CHECK-NEXT:    mov v2.h[3], w9
-; CHECK-NEXT:    lsr x9, x10, #32
-; CHECK-NEXT:    mov v3.h[3], w12
-; CHECK-NEXT:    lsr x12, x13, #32
-; CHECK-NEXT:    umull x10, w11, w8
-; CHECK-NEXT:    umov w11, v0.h[6]
-; CHECK-NEXT:    umull x13, w14, w8
-; CHECK-NEXT:    umov w14, v1.h[6]
-; CHECK-NEXT:    mov v2.h[4], w9
-; CHECK-NEXT:    umov w9, v0.h[7]
-; CHECK-NEXT:    mov v3.h[4], w12
-; CHECK-NEXT:    lsr x10, x10, #32
-; CHECK-NEXT:    lsr x12, x13, #32
-; CHECK-NEXT:    umull x11, w11, w8
-; CHECK-NEXT:    umull x13, w14, w8
-; CHECK-NEXT:    umov w14, v1.h[7]
-; CHECK-NEXT:    mov v2.h[5], w10
-; CHECK-NEXT:    umull x9, w9, w8
-; CHECK-NEXT:    mov v3.h[5], w12
-; CHECK-NEXT:    lsr x10, x11, #32
-; CHECK-NEXT:    lsr x11, x13, #32
-; CHECK-NEXT:    umull x8, w14, w8
-; CHECK-NEXT:    lsr x9, x9, #32
-; CHECK-NEXT:    mov v2.h[6], w10
-; CHECK-NEXT:    mov v3.h[6], w11
-; CHECK-NEXT:    lsr x8, x8, #32
-; CHECK-NEXT:    mov v2.h[7], w9
-; CHECK-NEXT:    mov v3.h[7], w8
-; CHECK-NEXT:    mov v0.16b, v2.16b
-; CHECK-NEXT:    mov v1.16b, v3.16b
+; CHECK-NEXT:    mov w8, #32897 // =0x8081
+; CHECK-NEXT:    dup v2.8h, w8
+; CHECK-NEXT:    umull2 v3.4s, v0.8h, v2.8h
+; CHECK-NEXT:    umull v0.4s, v0.4h, v2.4h
+; CHECK-NEXT:    umull2 v4.4s, v1.8h, v2.8h
+; CHECK-NEXT:    umull v1.4s, v1.4h, v2.4h
+; CHECK-NEXT:    uzp2 v0.8h, v0.8h, v3.8h
+; CHECK-NEXT:    uzp2 v1.8h, v1.8h, v4.8h
+; CHECK-NEXT:    ushr v0.8h, v0.8h, #7
+; CHECK-NEXT:    ushr v1.8h, v1.8h, #7
 ; CHECK-NEXT:    ret
   %div = udiv <16 x i16> %x, splat (i16 255)
   ret <16 x i16> %div

>From 46635125ed367289bf94e9356d0cefe37b602363 Mon Sep 17 00:00:00 2001
From: Valeriy Savchenko <vsavchenko at apple.com>
Date: Fri, 28 Nov 2025 14:21:13 +0000
Subject: [PATCH 3/6] [AArch64][NFC] Add test for vector sdiv scalarization

---
 .../AArch64/sdiv-by-const-promoted-ops.ll     | 141 ++++++++++++++++++
 1 file changed, 141 insertions(+)
 create mode 100644 llvm/test/CodeGen/AArch64/sdiv-by-const-promoted-ops.ll

diff --git a/llvm/test/CodeGen/AArch64/sdiv-by-const-promoted-ops.ll b/llvm/test/CodeGen/AArch64/sdiv-by-const-promoted-ops.ll
new file mode 100644
index 0000000000000..1960a633c0229
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sdiv-by-const-promoted-ops.ll
@@ -0,0 +1,141 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+define <8 x i16> @sdiv_v8i16_by_7(<8 x i16> %x) {
+; CHECK-LABEL: sdiv_v8i16_by_7:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #18725 // =0x4925
+; CHECK-NEXT:    dup v1.8h, w8
+; CHECK-NEXT:    smull2 v2.4s, v0.8h, v1.8h
+; CHECK-NEXT:    smull v0.4s, v0.4h, v1.4h
+; CHECK-NEXT:    uzp2 v0.8h, v0.8h, v2.8h
+; CHECK-NEXT:    sshr v0.8h, v0.8h, #1
+; CHECK-NEXT:    usra v0.8h, v0.8h, #15
+; CHECK-NEXT:    ret
+  %div = sdiv <8 x i16> %x, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+  ret <8 x i16> %div
+}
+
+define <16 x i16> @sdiv_v16i16_by_7(<16 x i16> %x) {
+; CHECK-LABEL: sdiv_v16i16_by_7:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smov x11, v0.h[1]
+; CHECK-NEXT:    smov x10, v0.h[0]
+; CHECK-NEXT:    mov x8, #-56173 // =0xffffffffffff2493
+; CHECK-NEXT:    smov x13, v0.h[3]
+; CHECK-NEXT:    smov x14, v1.h[1]
+; CHECK-NEXT:    movk x8, #37449, lsl #16
+; CHECK-NEXT:    smov x16, v1.h[0]
+; CHECK-NEXT:    smov w12, v0.h[1]
+; CHECK-NEXT:    smov w15, v0.h[0]
+; CHECK-NEXT:    smov x18, v1.h[2]
+; CHECK-NEXT:    smov w0, v0.h[3]
+; CHECK-NEXT:    smov w1, v1.h[1]
+; CHECK-NEXT:    smull x11, w11, w8
+; CHECK-NEXT:    smov w2, v1.h[0]
+; CHECK-NEXT:    smov x9, v0.h[2]
+; CHECK-NEXT:    smull x10, w10, w8
+; CHECK-NEXT:    smov w17, v0.h[2]
+; CHECK-NEXT:    smov w3, v1.h[2]
+; CHECK-NEXT:    smull x13, w13, w8
+; CHECK-NEXT:    smull x14, w14, w8
+; CHECK-NEXT:    add x12, x12, x11, lsr #32
+; CHECK-NEXT:    smull x16, w16, w8
+; CHECK-NEXT:    add x10, x15, x10, lsr #32
+; CHECK-NEXT:    smull x15, w18, w8
+; CHECK-NEXT:    add x11, x0, x13, lsr #32
+; CHECK-NEXT:    smov x0, v0.h[4]
+; CHECK-NEXT:    add x13, x1, x14, lsr #32
+; CHECK-NEXT:    asr w18, w10, #2
+; CHECK-NEXT:    smull x9, w9, w8
+; CHECK-NEXT:    add x14, x2, x16, lsr #32
+; CHECK-NEXT:    asr w16, w12, #2
+; CHECK-NEXT:    smov x2, v1.h[3]
+; CHECK-NEXT:    add w18, w18, w10, lsr #31
+; CHECK-NEXT:    add x15, x3, x15, lsr #32
+; CHECK-NEXT:    smov w10, v0.h[5]
+; CHECK-NEXT:    add w12, w16, w12, lsr #31
+; CHECK-NEXT:    asr w16, w14, #2
+; CHECK-NEXT:    add x9, x17, x9, lsr #32
+; CHECK-NEXT:    fmov s2, w18
+; CHECK-NEXT:    smov w17, v0.h[4]
+; CHECK-NEXT:    smull x0, w0, w8
+; CHECK-NEXT:    add w14, w16, w14, lsr #31
+; CHECK-NEXT:    asr w16, w13, #2
+; CHECK-NEXT:    asr w1, w9, #2
+; CHECK-NEXT:    smov x18, v0.h[5]
+; CHECK-NEXT:    fmov s3, w14
+; CHECK-NEXT:    mov v2.h[1], w12
+; CHECK-NEXT:    add w12, w16, w13, lsr #31
+; CHECK-NEXT:    smov w13, v1.h[3]
+; CHECK-NEXT:    smov x14, v1.h[4]
+; CHECK-NEXT:    smull x16, w2, w8
+; CHECK-NEXT:    add w1, w1, w9, lsr #31
+; CHECK-NEXT:    add x17, x17, x0, lsr #32
+; CHECK-NEXT:    asr w0, w15, #2
+; CHECK-NEXT:    mov v3.h[1], w12
+; CHECK-NEXT:    smov w12, v1.h[4]
+; CHECK-NEXT:    smull x18, w18, w8
+; CHECK-NEXT:    mov v2.h[2], w1
+; CHECK-NEXT:    asr w1, w11, #2
+; CHECK-NEXT:    add w15, w0, w15, lsr #31
+; CHECK-NEXT:    add x13, x13, x16, lsr #32
+; CHECK-NEXT:    smov x16, v1.h[5]
+; CHECK-NEXT:    smull x14, w14, w8
+; CHECK-NEXT:    add w11, w1, w11, lsr #31
+; CHECK-NEXT:    smov x0, v0.h[6]
+; CHECK-NEXT:    add x10, x10, x18, lsr #32
+; CHECK-NEXT:    asr w1, w13, #2
+; CHECK-NEXT:    mov v3.h[2], w15
+; CHECK-NEXT:    smov w15, v1.h[5]
+; CHECK-NEXT:    add x12, x12, x14, lsr #32
+; CHECK-NEXT:    mov v2.h[3], w11
+; CHECK-NEXT:    asr w11, w17, #2
+; CHECK-NEXT:    add w13, w1, w13, lsr #31
+; CHECK-NEXT:    smull x16, w16, w8
+; CHECK-NEXT:    smov x14, v1.h[6]
+; CHECK-NEXT:    asr w18, w12, #2
+; CHECK-NEXT:    add w11, w11, w17, lsr #31
+; CHECK-NEXT:    smov w9, v0.h[6]
+; CHECK-NEXT:    mov v3.h[3], w13
+; CHECK-NEXT:    smull x17, w0, w8
+; CHECK-NEXT:    smov x0, v1.h[7]
+; CHECK-NEXT:    add x13, x15, x16, lsr #32
+; CHECK-NEXT:    add w12, w18, w12, lsr #31
+; CHECK-NEXT:    smov w16, v1.h[6]
+; CHECK-NEXT:    mov v2.h[4], w11
+; CHECK-NEXT:    smov x11, v0.h[7]
+; CHECK-NEXT:    smull x14, w14, w8
+; CHECK-NEXT:    asr w15, w10, #2
+; CHECK-NEXT:    asr w18, w13, #2
+; CHECK-NEXT:    smov w1, v0.h[7]
+; CHECK-NEXT:    mov v3.h[4], w12
+; CHECK-NEXT:    add x9, x9, x17, lsr #32
+; CHECK-NEXT:    add w10, w15, w10, lsr #31
+; CHECK-NEXT:    add w12, w18, w13, lsr #31
+; CHECK-NEXT:    add x13, x16, x14, lsr #32
+; CHECK-NEXT:    smov w14, v1.h[7]
+; CHECK-NEXT:    smull x11, w11, w8
+; CHECK-NEXT:    smull x8, w0, w8
+; CHECK-NEXT:    mov v2.h[5], w10
+; CHECK-NEXT:    asr w10, w9, #2
+; CHECK-NEXT:    mov v3.h[5], w12
+; CHECK-NEXT:    asr w12, w13, #2
+; CHECK-NEXT:    add w9, w10, w9, lsr #31
+; CHECK-NEXT:    add x10, x1, x11, lsr #32
+; CHECK-NEXT:    add w11, w12, w13, lsr #31
+; CHECK-NEXT:    add x8, x14, x8, lsr #32
+; CHECK-NEXT:    mov v2.h[6], w9
+; CHECK-NEXT:    asr w9, w10, #2
+; CHECK-NEXT:    mov v3.h[6], w11
+; CHECK-NEXT:    asr w11, w8, #2
+; CHECK-NEXT:    add w9, w9, w10, lsr #31
+; CHECK-NEXT:    add w8, w11, w8, lsr #31
+; CHECK-NEXT:    mov v2.h[7], w9
+; CHECK-NEXT:    mov v3.h[7], w8
+; CHECK-NEXT:    mov v0.16b, v2.16b
+; CHECK-NEXT:    mov v1.16b, v3.16b
+; CHECK-NEXT:    ret
+  %div = sdiv <16 x i16> %x, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+  ret <16 x i16> %div
+}

>From 27f623c513cd3027656b5cd3f941544ecbeb7ab2 Mon Sep 17 00:00:00 2001
From: Valeriy Savchenko <vsavchenko at apple.com>
Date: Fri, 28 Nov 2025 14:22:01 +0000
Subject: [PATCH 4/6] [DAGCombiner] Allow promoted constants when lowering
 vector SDIVs

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |   6 +-
 .../CodeGen/SelectionDAG/TargetLowering.cpp   |   8 +-
 llvm/test/CodeGen/AArch64/rem-by-const.ll     |  89 +++---------
 .../AArch64/sdiv-by-const-promoted-ops.ll     | 128 ++----------------
 4 files changed, 38 insertions(+), 193 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 33c6855349928..82a5a4c3744c1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -5189,7 +5189,8 @@ static bool isDivisorPowerOfTwo(SDValue Divisor) {
     return false;
   };
 
-  return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo);
+  return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo, /*AllowUndefs=*/false,
+                                  /*AllowTruncation=*/true);
 }
 
 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
@@ -5253,7 +5254,8 @@ SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
   // alternate sequence.  Targets may check function attributes for size/speed
   // trade-offs.
   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
-  if (isConstantOrConstantVector(N1) &&
+  if (isConstantOrConstantVector(N1, /*NoOpaques=*/false,
+                                 /*AllowTruncation=*/true) &&
       !TLI.isIntDivCheap(N->getValueType(0), Attr))
     if (SDValue Op = BuildSDIV(N))
       return Op;
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index a3a9b98f0ce6e..df211d9a318d0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -6562,8 +6562,9 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
   auto BuildSDIVPattern = [&](ConstantSDNode *C) {
     if (C->isZero())
       return false;
-
-    const APInt &Divisor = C->getAPIntValue();
+    // Truncate the divisor to the target scalar type in case it was promoted
+    // during type legalization.
+    APInt Divisor = C->getAPIntValue().trunc(EltBits);
     SignedDivisionByConstantInfo magics = SignedDivisionByConstantInfo::get(Divisor);
     int NumeratorFactor = 0;
     int ShiftMask = -1;
@@ -6593,7 +6594,8 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
   SDValue N1 = N->getOperand(1);
 
   // Collect the shifts / magic values from each element.
-  if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
+  if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern, /*AllowUndefs=*/false,
+                                /*AllowTruncation=*/true))
     return SDValue();
 
   SDValue MagicFactor, Factor, Shift, ShiftMask;
diff --git a/llvm/test/CodeGen/AArch64/rem-by-const.ll b/llvm/test/CodeGen/AArch64/rem-by-const.ll
index ffaf045fa45c2..c19ded18c94c9 100644
--- a/llvm/test/CodeGen/AArch64/rem-by-const.ll
+++ b/llvm/test/CodeGen/AArch64/rem-by-const.ll
@@ -893,46 +893,15 @@ define <4 x i8> @sv4i8_7(<4 x i8> %d, <4 x i8> %e) {
 ; CHECK-SD-LABEL: sv4i8_7:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    shl v0.4h, v0.4h, #8
-; CHECK-SD-NEXT:    mov x8, #-56173 // =0xffffffffffff2493
-; CHECK-SD-NEXT:    movk x8, #37449, lsl #16
+; CHECK-SD-NEXT:    mov w8, #18725 // =0x4925
+; CHECK-SD-NEXT:    movi v2.4h, #7
+; CHECK-SD-NEXT:    dup v1.4h, w8
 ; CHECK-SD-NEXT:    sshr v0.4h, v0.4h, #8
-; CHECK-SD-NEXT:    smov x10, v0.h[0]
-; CHECK-SD-NEXT:    smov x9, v0.h[1]
-; CHECK-SD-NEXT:    smov w12, v0.h[0]
-; CHECK-SD-NEXT:    smov w11, v0.h[1]
-; CHECK-SD-NEXT:    smov x13, v0.h[2]
-; CHECK-SD-NEXT:    smov w14, v0.h[2]
-; CHECK-SD-NEXT:    smov x17, v0.h[3]
-; CHECK-SD-NEXT:    smull x10, w10, w8
-; CHECK-SD-NEXT:    smull x9, w9, w8
-; CHECK-SD-NEXT:    smull x13, w13, w8
-; CHECK-SD-NEXT:    add x10, x12, x10, lsr #32
-; CHECK-SD-NEXT:    smull x8, w17, w8
-; CHECK-SD-NEXT:    add x9, x11, x9, lsr #32
-; CHECK-SD-NEXT:    asr w16, w10, #2
-; CHECK-SD-NEXT:    add x13, x14, x13, lsr #32
-; CHECK-SD-NEXT:    asr w15, w9, #2
-; CHECK-SD-NEXT:    add w10, w16, w10, lsr #31
-; CHECK-SD-NEXT:    asr w16, w13, #2
-; CHECK-SD-NEXT:    add w9, w15, w9, lsr #31
-; CHECK-SD-NEXT:    smov w15, v0.h[3]
-; CHECK-SD-NEXT:    sub w10, w10, w10, lsl #3
-; CHECK-SD-NEXT:    sub w9, w9, w9, lsl #3
-; CHECK-SD-NEXT:    add w10, w12, w10
-; CHECK-SD-NEXT:    fmov s0, w10
-; CHECK-SD-NEXT:    add w9, w11, w9
-; CHECK-SD-NEXT:    add w10, w16, w13, lsr #31
-; CHECK-SD-NEXT:    add x8, x15, x8, lsr #32
-; CHECK-SD-NEXT:    mov v0.h[1], w9
-; CHECK-SD-NEXT:    sub w9, w10, w10, lsl #3
-; CHECK-SD-NEXT:    asr w10, w8, #2
-; CHECK-SD-NEXT:    add w9, w14, w9
-; CHECK-SD-NEXT:    add w8, w10, w8, lsr #31
-; CHECK-SD-NEXT:    mov v0.h[2], w9
-; CHECK-SD-NEXT:    sub w8, w8, w8, lsl #3
-; CHECK-SD-NEXT:    add w8, w15, w8
-; CHECK-SD-NEXT:    mov v0.h[3], w8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    smull v1.4s, v0.4h, v1.4h
+; CHECK-SD-NEXT:    sshr v1.4s, v1.4s, #17
+; CHECK-SD-NEXT:    xtn v1.4h, v1.4s
+; CHECK-SD-NEXT:    usra v1.4h, v1.4h, #15
+; CHECK-SD-NEXT:    mls v0.4h, v1.4h, v2.4h
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sv4i8_7:
@@ -978,39 +947,15 @@ define <4 x i8> @sv4i8_100(<4 x i8> %d, <4 x i8> %e) {
 ; CHECK-SD-LABEL: sv4i8_100:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    shl v0.4h, v0.4h, #8
-; CHECK-SD-NEXT:    mov w8, #34079 // =0x851f
-; CHECK-SD-NEXT:    mov w14, #100 // =0x64
-; CHECK-SD-NEXT:    movk w8, #20971, lsl #16
-; CHECK-SD-NEXT:    sshr v1.4h, v0.4h, #8
-; CHECK-SD-NEXT:    smov x9, v1.h[0]
-; CHECK-SD-NEXT:    smov x10, v1.h[1]
-; CHECK-SD-NEXT:    smov x11, v1.h[2]
-; CHECK-SD-NEXT:    smov w12, v1.h[0]
-; CHECK-SD-NEXT:    smov x13, v1.h[3]
-; CHECK-SD-NEXT:    smov w15, v1.h[1]
-; CHECK-SD-NEXT:    smull x9, w9, w8
-; CHECK-SD-NEXT:    smull x10, w10, w8
-; CHECK-SD-NEXT:    smull x11, w11, w8
-; CHECK-SD-NEXT:    asr x9, x9, #37
-; CHECK-SD-NEXT:    smull x8, w13, w8
-; CHECK-SD-NEXT:    asr x10, x10, #37
-; CHECK-SD-NEXT:    add w9, w9, w9, lsr #31
-; CHECK-SD-NEXT:    asr x11, x11, #37
-; CHECK-SD-NEXT:    add w10, w10, w10, lsr #31
-; CHECK-SD-NEXT:    asr x8, x8, #37
-; CHECK-SD-NEXT:    msub w9, w9, w14, w12
-; CHECK-SD-NEXT:    msub w10, w10, w14, w15
-; CHECK-SD-NEXT:    add w8, w8, w8, lsr #31
-; CHECK-SD-NEXT:    fmov s0, w9
-; CHECK-SD-NEXT:    add w9, w11, w11, lsr #31
-; CHECK-SD-NEXT:    smov w11, v1.h[2]
-; CHECK-SD-NEXT:    msub w9, w9, w14, w11
-; CHECK-SD-NEXT:    mov v0.h[1], w10
-; CHECK-SD-NEXT:    smov w10, v1.h[3]
-; CHECK-SD-NEXT:    msub w8, w8, w14, w10
-; CHECK-SD-NEXT:    mov v0.h[2], w9
-; CHECK-SD-NEXT:    mov v0.h[3], w8
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    mov w8, #5243 // =0x147b
+; CHECK-SD-NEXT:    movi v2.4h, #100
+; CHECK-SD-NEXT:    dup v1.4h, w8
+; CHECK-SD-NEXT:    sshr v0.4h, v0.4h, #8
+; CHECK-SD-NEXT:    smull v1.4s, v0.4h, v1.4h
+; CHECK-SD-NEXT:    sshr v1.4s, v1.4s, #19
+; CHECK-SD-NEXT:    xtn v1.4h, v1.4s
+; CHECK-SD-NEXT:    usra v1.4h, v1.4h, #15
+; CHECK-SD-NEXT:    mls v0.4h, v1.4h, v2.4h
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sv4i8_100:
diff --git a/llvm/test/CodeGen/AArch64/sdiv-by-const-promoted-ops.ll b/llvm/test/CodeGen/AArch64/sdiv-by-const-promoted-ops.ll
index 1960a633c0229..4298e1e398c70 100644
--- a/llvm/test/CodeGen/AArch64/sdiv-by-const-promoted-ops.ll
+++ b/llvm/test/CodeGen/AArch64/sdiv-by-const-promoted-ops.ll
@@ -19,122 +19,18 @@ define <8 x i16> @sdiv_v8i16_by_7(<8 x i16> %x) {
 define <16 x i16> @sdiv_v16i16_by_7(<16 x i16> %x) {
 ; CHECK-LABEL: sdiv_v16i16_by_7:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    smov x11, v0.h[1]
-; CHECK-NEXT:    smov x10, v0.h[0]
-; CHECK-NEXT:    mov x8, #-56173 // =0xffffffffffff2493
-; CHECK-NEXT:    smov x13, v0.h[3]
-; CHECK-NEXT:    smov x14, v1.h[1]
-; CHECK-NEXT:    movk x8, #37449, lsl #16
-; CHECK-NEXT:    smov x16, v1.h[0]
-; CHECK-NEXT:    smov w12, v0.h[1]
-; CHECK-NEXT:    smov w15, v0.h[0]
-; CHECK-NEXT:    smov x18, v1.h[2]
-; CHECK-NEXT:    smov w0, v0.h[3]
-; CHECK-NEXT:    smov w1, v1.h[1]
-; CHECK-NEXT:    smull x11, w11, w8
-; CHECK-NEXT:    smov w2, v1.h[0]
-; CHECK-NEXT:    smov x9, v0.h[2]
-; CHECK-NEXT:    smull x10, w10, w8
-; CHECK-NEXT:    smov w17, v0.h[2]
-; CHECK-NEXT:    smov w3, v1.h[2]
-; CHECK-NEXT:    smull x13, w13, w8
-; CHECK-NEXT:    smull x14, w14, w8
-; CHECK-NEXT:    add x12, x12, x11, lsr #32
-; CHECK-NEXT:    smull x16, w16, w8
-; CHECK-NEXT:    add x10, x15, x10, lsr #32
-; CHECK-NEXT:    smull x15, w18, w8
-; CHECK-NEXT:    add x11, x0, x13, lsr #32
-; CHECK-NEXT:    smov x0, v0.h[4]
-; CHECK-NEXT:    add x13, x1, x14, lsr #32
-; CHECK-NEXT:    asr w18, w10, #2
-; CHECK-NEXT:    smull x9, w9, w8
-; CHECK-NEXT:    add x14, x2, x16, lsr #32
-; CHECK-NEXT:    asr w16, w12, #2
-; CHECK-NEXT:    smov x2, v1.h[3]
-; CHECK-NEXT:    add w18, w18, w10, lsr #31
-; CHECK-NEXT:    add x15, x3, x15, lsr #32
-; CHECK-NEXT:    smov w10, v0.h[5]
-; CHECK-NEXT:    add w12, w16, w12, lsr #31
-; CHECK-NEXT:    asr w16, w14, #2
-; CHECK-NEXT:    add x9, x17, x9, lsr #32
-; CHECK-NEXT:    fmov s2, w18
-; CHECK-NEXT:    smov w17, v0.h[4]
-; CHECK-NEXT:    smull x0, w0, w8
-; CHECK-NEXT:    add w14, w16, w14, lsr #31
-; CHECK-NEXT:    asr w16, w13, #2
-; CHECK-NEXT:    asr w1, w9, #2
-; CHECK-NEXT:    smov x18, v0.h[5]
-; CHECK-NEXT:    fmov s3, w14
-; CHECK-NEXT:    mov v2.h[1], w12
-; CHECK-NEXT:    add w12, w16, w13, lsr #31
-; CHECK-NEXT:    smov w13, v1.h[3]
-; CHECK-NEXT:    smov x14, v1.h[4]
-; CHECK-NEXT:    smull x16, w2, w8
-; CHECK-NEXT:    add w1, w1, w9, lsr #31
-; CHECK-NEXT:    add x17, x17, x0, lsr #32
-; CHECK-NEXT:    asr w0, w15, #2
-; CHECK-NEXT:    mov v3.h[1], w12
-; CHECK-NEXT:    smov w12, v1.h[4]
-; CHECK-NEXT:    smull x18, w18, w8
-; CHECK-NEXT:    mov v2.h[2], w1
-; CHECK-NEXT:    asr w1, w11, #2
-; CHECK-NEXT:    add w15, w0, w15, lsr #31
-; CHECK-NEXT:    add x13, x13, x16, lsr #32
-; CHECK-NEXT:    smov x16, v1.h[5]
-; CHECK-NEXT:    smull x14, w14, w8
-; CHECK-NEXT:    add w11, w1, w11, lsr #31
-; CHECK-NEXT:    smov x0, v0.h[6]
-; CHECK-NEXT:    add x10, x10, x18, lsr #32
-; CHECK-NEXT:    asr w1, w13, #2
-; CHECK-NEXT:    mov v3.h[2], w15
-; CHECK-NEXT:    smov w15, v1.h[5]
-; CHECK-NEXT:    add x12, x12, x14, lsr #32
-; CHECK-NEXT:    mov v2.h[3], w11
-; CHECK-NEXT:    asr w11, w17, #2
-; CHECK-NEXT:    add w13, w1, w13, lsr #31
-; CHECK-NEXT:    smull x16, w16, w8
-; CHECK-NEXT:    smov x14, v1.h[6]
-; CHECK-NEXT:    asr w18, w12, #2
-; CHECK-NEXT:    add w11, w11, w17, lsr #31
-; CHECK-NEXT:    smov w9, v0.h[6]
-; CHECK-NEXT:    mov v3.h[3], w13
-; CHECK-NEXT:    smull x17, w0, w8
-; CHECK-NEXT:    smov x0, v1.h[7]
-; CHECK-NEXT:    add x13, x15, x16, lsr #32
-; CHECK-NEXT:    add w12, w18, w12, lsr #31
-; CHECK-NEXT:    smov w16, v1.h[6]
-; CHECK-NEXT:    mov v2.h[4], w11
-; CHECK-NEXT:    smov x11, v0.h[7]
-; CHECK-NEXT:    smull x14, w14, w8
-; CHECK-NEXT:    asr w15, w10, #2
-; CHECK-NEXT:    asr w18, w13, #2
-; CHECK-NEXT:    smov w1, v0.h[7]
-; CHECK-NEXT:    mov v3.h[4], w12
-; CHECK-NEXT:    add x9, x9, x17, lsr #32
-; CHECK-NEXT:    add w10, w15, w10, lsr #31
-; CHECK-NEXT:    add w12, w18, w13, lsr #31
-; CHECK-NEXT:    add x13, x16, x14, lsr #32
-; CHECK-NEXT:    smov w14, v1.h[7]
-; CHECK-NEXT:    smull x11, w11, w8
-; CHECK-NEXT:    smull x8, w0, w8
-; CHECK-NEXT:    mov v2.h[5], w10
-; CHECK-NEXT:    asr w10, w9, #2
-; CHECK-NEXT:    mov v3.h[5], w12
-; CHECK-NEXT:    asr w12, w13, #2
-; CHECK-NEXT:    add w9, w10, w9, lsr #31
-; CHECK-NEXT:    add x10, x1, x11, lsr #32
-; CHECK-NEXT:    add w11, w12, w13, lsr #31
-; CHECK-NEXT:    add x8, x14, x8, lsr #32
-; CHECK-NEXT:    mov v2.h[6], w9
-; CHECK-NEXT:    asr w9, w10, #2
-; CHECK-NEXT:    mov v3.h[6], w11
-; CHECK-NEXT:    asr w11, w8, #2
-; CHECK-NEXT:    add w9, w9, w10, lsr #31
-; CHECK-NEXT:    add w8, w11, w8, lsr #31
-; CHECK-NEXT:    mov v2.h[7], w9
-; CHECK-NEXT:    mov v3.h[7], w8
-; CHECK-NEXT:    mov v0.16b, v2.16b
-; CHECK-NEXT:    mov v1.16b, v3.16b
+; CHECK-NEXT:    mov w8, #18725 // =0x4925
+; CHECK-NEXT:    dup v2.8h, w8
+; CHECK-NEXT:    smull2 v3.4s, v0.8h, v2.8h
+; CHECK-NEXT:    smull v0.4s, v0.4h, v2.4h
+; CHECK-NEXT:    smull2 v4.4s, v1.8h, v2.8h
+; CHECK-NEXT:    smull v1.4s, v1.4h, v2.4h
+; CHECK-NEXT:    uzp2 v0.8h, v0.8h, v3.8h
+; CHECK-NEXT:    uzp2 v1.8h, v1.8h, v4.8h
+; CHECK-NEXT:    sshr v0.8h, v0.8h, #1
+; CHECK-NEXT:    sshr v1.8h, v1.8h, #1
+; CHECK-NEXT:    usra v0.8h, v0.8h, #15
+; CHECK-NEXT:    usra v1.8h, v1.8h, #15
 ; CHECK-NEXT:    ret
   %div = sdiv <16 x i16> %x, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
   ret <16 x i16> %div

>From 0fb68f43c94b8be584e3de4e87d1b7e259a75b11 Mon Sep 17 00:00:00 2001
From: Valeriy Savchenko <vsavchenko at apple.com>
Date: Fri, 28 Nov 2025 18:13:26 +0000
Subject: [PATCH 5/6] [AArch64][NFC] Add test for vector sdiv exact
 scalarization

---
 .../AArch64/sdiv-by-const-promoted-ops.ll     | 134 ++++++++++++++++++
 1 file changed, 134 insertions(+)

diff --git a/llvm/test/CodeGen/AArch64/sdiv-by-const-promoted-ops.ll b/llvm/test/CodeGen/AArch64/sdiv-by-const-promoted-ops.ll
index 4298e1e398c70..fa4b742901e8f 100644
--- a/llvm/test/CodeGen/AArch64/sdiv-by-const-promoted-ops.ll
+++ b/llvm/test/CodeGen/AArch64/sdiv-by-const-promoted-ops.ll
@@ -35,3 +35,137 @@ define <16 x i16> @sdiv_v16i16_by_7(<16 x i16> %x) {
   %div = sdiv <16 x i16> %x, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
   ret <16 x i16> %div
 }
+
+define <8 x i16> @sdiv_exact_v8i16_by_255(<8 x i16> %x) {
+; CHECK-LABEL: sdiv_exact_v8i16_by_255:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mvni v1.8h, #1, lsl #8
+; CHECK-NEXT:    mul v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    ret
+  %div = sdiv exact <8 x i16> %x, splat (i16 255)
+  ret <8 x i16> %div
+}
+
+define <16 x i16> @sdiv_exact_v16i16_by_255(<16 x i16> %x) {
+; CHECK-LABEL: sdiv_exact_v16i16_by_255:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smov x11, v0.h[1]
+; CHECK-NEXT:    smov x10, v0.h[0]
+; CHECK-NEXT:    mov x8, #-32639 // =0xffffffffffff8081
+; CHECK-NEXT:    smov x13, v0.h[3]
+; CHECK-NEXT:    smov x14, v1.h[1]
+; CHECK-NEXT:    movk x8, #32896, lsl #16
+; CHECK-NEXT:    smov x16, v1.h[0]
+; CHECK-NEXT:    smov w12, v0.h[1]
+; CHECK-NEXT:    smov w15, v0.h[0]
+; CHECK-NEXT:    smov x18, v1.h[2]
+; CHECK-NEXT:    smov w0, v0.h[3]
+; CHECK-NEXT:    smov w1, v1.h[1]
+; CHECK-NEXT:    smull x11, w11, w8
+; CHECK-NEXT:    smov w2, v1.h[0]
+; CHECK-NEXT:    smov x9, v0.h[2]
+; CHECK-NEXT:    smull x10, w10, w8
+; CHECK-NEXT:    smov w17, v0.h[2]
+; CHECK-NEXT:    smov w3, v1.h[2]
+; CHECK-NEXT:    smull x13, w13, w8
+; CHECK-NEXT:    smull x14, w14, w8
+; CHECK-NEXT:    add x12, x12, x11, lsr #32
+; CHECK-NEXT:    smull x16, w16, w8
+; CHECK-NEXT:    add x10, x15, x10, lsr #32
+; CHECK-NEXT:    smull x15, w18, w8
+; CHECK-NEXT:    add x11, x0, x13, lsr #32
+; CHECK-NEXT:    smov x0, v0.h[4]
+; CHECK-NEXT:    add x13, x1, x14, lsr #32
+; CHECK-NEXT:    asr w18, w10, #7
+; CHECK-NEXT:    smull x9, w9, w8
+; CHECK-NEXT:    add x14, x2, x16, lsr #32
+; CHECK-NEXT:    asr w16, w12, #7
+; CHECK-NEXT:    smov x2, v1.h[3]
+; CHECK-NEXT:    add w18, w18, w10, lsr #31
+; CHECK-NEXT:    add x15, x3, x15, lsr #32
+; CHECK-NEXT:    smov w10, v0.h[5]
+; CHECK-NEXT:    add w12, w16, w12, lsr #31
+; CHECK-NEXT:    asr w16, w14, #7
+; CHECK-NEXT:    add x9, x17, x9, lsr #32
+; CHECK-NEXT:    fmov s2, w18
+; CHECK-NEXT:    smov w17, v0.h[4]
+; CHECK-NEXT:    smull x0, w0, w8
+; CHECK-NEXT:    add w14, w16, w14, lsr #31
+; CHECK-NEXT:    asr w16, w13, #7
+; CHECK-NEXT:    asr w1, w9, #7
+; CHECK-NEXT:    smov x18, v0.h[5]
+; CHECK-NEXT:    fmov s3, w14
+; CHECK-NEXT:    mov v2.h[1], w12
+; CHECK-NEXT:    add w12, w16, w13, lsr #31
+; CHECK-NEXT:    smov w13, v1.h[3]
+; CHECK-NEXT:    smov x14, v1.h[4]
+; CHECK-NEXT:    smull x16, w2, w8
+; CHECK-NEXT:    add w1, w1, w9, lsr #31
+; CHECK-NEXT:    add x17, x17, x0, lsr #32
+; CHECK-NEXT:    asr w0, w15, #7
+; CHECK-NEXT:    mov v3.h[1], w12
+; CHECK-NEXT:    smov w12, v1.h[4]
+; CHECK-NEXT:    smull x18, w18, w8
+; CHECK-NEXT:    mov v2.h[2], w1
+; CHECK-NEXT:    asr w1, w11, #7
+; CHECK-NEXT:    add w15, w0, w15, lsr #31
+; CHECK-NEXT:    add x13, x13, x16, lsr #32
+; CHECK-NEXT:    smov x16, v1.h[5]
+; CHECK-NEXT:    smull x14, w14, w8
+; CHECK-NEXT:    add w11, w1, w11, lsr #31
+; CHECK-NEXT:    smov x0, v0.h[6]
+; CHECK-NEXT:    add x10, x10, x18, lsr #32
+; CHECK-NEXT:    asr w1, w13, #7
+; CHECK-NEXT:    mov v3.h[2], w15
+; CHECK-NEXT:    smov w15, v1.h[5]
+; CHECK-NEXT:    add x12, x12, x14, lsr #32
+; CHECK-NEXT:    mov v2.h[3], w11
+; CHECK-NEXT:    asr w11, w17, #7
+; CHECK-NEXT:    add w13, w1, w13, lsr #31
+; CHECK-NEXT:    smull x16, w16, w8
+; CHECK-NEXT:    smov x14, v1.h[6]
+; CHECK-NEXT:    asr w18, w12, #7
+; CHECK-NEXT:    add w11, w11, w17, lsr #31
+; CHECK-NEXT:    smov w9, v0.h[6]
+; CHECK-NEXT:    mov v3.h[3], w13
+; CHECK-NEXT:    smull x17, w0, w8
+; CHECK-NEXT:    smov x0, v1.h[7]
+; CHECK-NEXT:    add x13, x15, x16, lsr #32
+; CHECK-NEXT:    add w12, w18, w12, lsr #31
+; CHECK-NEXT:    smov w16, v1.h[6]
+; CHECK-NEXT:    mov v2.h[4], w11
+; CHECK-NEXT:    smov x11, v0.h[7]
+; CHECK-NEXT:    smull x14, w14, w8
+; CHECK-NEXT:    asr w15, w10, #7
+; CHECK-NEXT:    asr w18, w13, #7
+; CHECK-NEXT:    smov w1, v0.h[7]
+; CHECK-NEXT:    mov v3.h[4], w12
+; CHECK-NEXT:    add x9, x9, x17, lsr #32
+; CHECK-NEXT:    add w10, w15, w10, lsr #31
+; CHECK-NEXT:    add w12, w18, w13, lsr #31
+; CHECK-NEXT:    add x13, x16, x14, lsr #32
+; CHECK-NEXT:    smov w14, v1.h[7]
+; CHECK-NEXT:    smull x11, w11, w8
+; CHECK-NEXT:    smull x8, w0, w8
+; CHECK-NEXT:    mov v2.h[5], w10
+; CHECK-NEXT:    asr w10, w9, #7
+; CHECK-NEXT:    mov v3.h[5], w12
+; CHECK-NEXT:    asr w12, w13, #7
+; CHECK-NEXT:    add w9, w10, w9, lsr #31
+; CHECK-NEXT:    add x10, x1, x11, lsr #32
+; CHECK-NEXT:    add w11, w12, w13, lsr #31
+; CHECK-NEXT:    add x8, x14, x8, lsr #32
+; CHECK-NEXT:    mov v2.h[6], w9
+; CHECK-NEXT:    asr w9, w10, #7
+; CHECK-NEXT:    mov v3.h[6], w11
+; CHECK-NEXT:    asr w11, w8, #7
+; CHECK-NEXT:    add w9, w9, w10, lsr #31
+; CHECK-NEXT:    add w8, w11, w8, lsr #31
+; CHECK-NEXT:    mov v2.h[7], w9
+; CHECK-NEXT:    mov v3.h[7], w8
+; CHECK-NEXT:    mov v0.16b, v2.16b
+; CHECK-NEXT:    mov v1.16b, v3.16b
+; CHECK-NEXT:    ret
+  %div = sdiv exact <16 x i16> %x, splat (i16 255)
+  ret <16 x i16> %div
+}

>From feebe2672a336bccf3cfff599cd33d042eb44109 Mon Sep 17 00:00:00 2001
From: Valeriy Savchenko <vsavchenko at apple.com>
Date: Fri, 28 Nov 2025 18:16:13 +0000
Subject: [PATCH 6/6] [DAGCombiner] Allow promoted constants when lowering
 vector SDIV exacts

---
 .../CodeGen/SelectionDAG/TargetLowering.cpp   |   8 +-
 .../AArch64/sdiv-by-const-promoted-ops.ll     | 119 +-----------------
 2 files changed, 8 insertions(+), 119 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index df211d9a318d0..21b105db6b82c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -6345,7 +6345,6 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
   SDValue Op0 = N->getOperand(0);
   SDValue Op1 = N->getOperand(1);
   EVT VT = N->getValueType(0);
-  EVT SVT = VT.getScalarType();
   EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
   EVT ShSVT = ShVT.getScalarType();
 
@@ -6355,6 +6354,8 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
   auto BuildSDIVPattern = [&](ConstantSDNode *C) {
     if (C->isZero())
       return false;
+
+    EVT CT = C->getValueType(0);
     APInt Divisor = C->getAPIntValue();
     unsigned Shift = Divisor.countr_zero();
     if (Shift) {
@@ -6363,12 +6364,13 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
     }
     APInt Factor = Divisor.multiplicativeInverse();
     Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
-    Factors.push_back(DAG.getConstant(Factor, dl, SVT));
+    Factors.push_back(DAG.getConstant(Factor, dl, CT));
     return true;
   };
 
   // Collect all magic values from the build vector.
-  if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
+  if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern, /*AllowUndefs=*/false,
+                                /*AllowTruncation=*/true))
     return SDValue();
 
   SDValue Shift, Factor;
diff --git a/llvm/test/CodeGen/AArch64/sdiv-by-const-promoted-ops.ll b/llvm/test/CodeGen/AArch64/sdiv-by-const-promoted-ops.ll
index fa4b742901e8f..b686a855b5276 100644
--- a/llvm/test/CodeGen/AArch64/sdiv-by-const-promoted-ops.ll
+++ b/llvm/test/CodeGen/AArch64/sdiv-by-const-promoted-ops.ll
@@ -49,122 +49,9 @@ define <8 x i16> @sdiv_exact_v8i16_by_255(<8 x i16> %x) {
 define <16 x i16> @sdiv_exact_v16i16_by_255(<16 x i16> %x) {
 ; CHECK-LABEL: sdiv_exact_v16i16_by_255:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    smov x11, v0.h[1]
-; CHECK-NEXT:    smov x10, v0.h[0]
-; CHECK-NEXT:    mov x8, #-32639 // =0xffffffffffff8081
-; CHECK-NEXT:    smov x13, v0.h[3]
-; CHECK-NEXT:    smov x14, v1.h[1]
-; CHECK-NEXT:    movk x8, #32896, lsl #16
-; CHECK-NEXT:    smov x16, v1.h[0]
-; CHECK-NEXT:    smov w12, v0.h[1]
-; CHECK-NEXT:    smov w15, v0.h[0]
-; CHECK-NEXT:    smov x18, v1.h[2]
-; CHECK-NEXT:    smov w0, v0.h[3]
-; CHECK-NEXT:    smov w1, v1.h[1]
-; CHECK-NEXT:    smull x11, w11, w8
-; CHECK-NEXT:    smov w2, v1.h[0]
-; CHECK-NEXT:    smov x9, v0.h[2]
-; CHECK-NEXT:    smull x10, w10, w8
-; CHECK-NEXT:    smov w17, v0.h[2]
-; CHECK-NEXT:    smov w3, v1.h[2]
-; CHECK-NEXT:    smull x13, w13, w8
-; CHECK-NEXT:    smull x14, w14, w8
-; CHECK-NEXT:    add x12, x12, x11, lsr #32
-; CHECK-NEXT:    smull x16, w16, w8
-; CHECK-NEXT:    add x10, x15, x10, lsr #32
-; CHECK-NEXT:    smull x15, w18, w8
-; CHECK-NEXT:    add x11, x0, x13, lsr #32
-; CHECK-NEXT:    smov x0, v0.h[4]
-; CHECK-NEXT:    add x13, x1, x14, lsr #32
-; CHECK-NEXT:    asr w18, w10, #7
-; CHECK-NEXT:    smull x9, w9, w8
-; CHECK-NEXT:    add x14, x2, x16, lsr #32
-; CHECK-NEXT:    asr w16, w12, #7
-; CHECK-NEXT:    smov x2, v1.h[3]
-; CHECK-NEXT:    add w18, w18, w10, lsr #31
-; CHECK-NEXT:    add x15, x3, x15, lsr #32
-; CHECK-NEXT:    smov w10, v0.h[5]
-; CHECK-NEXT:    add w12, w16, w12, lsr #31
-; CHECK-NEXT:    asr w16, w14, #7
-; CHECK-NEXT:    add x9, x17, x9, lsr #32
-; CHECK-NEXT:    fmov s2, w18
-; CHECK-NEXT:    smov w17, v0.h[4]
-; CHECK-NEXT:    smull x0, w0, w8
-; CHECK-NEXT:    add w14, w16, w14, lsr #31
-; CHECK-NEXT:    asr w16, w13, #7
-; CHECK-NEXT:    asr w1, w9, #7
-; CHECK-NEXT:    smov x18, v0.h[5]
-; CHECK-NEXT:    fmov s3, w14
-; CHECK-NEXT:    mov v2.h[1], w12
-; CHECK-NEXT:    add w12, w16, w13, lsr #31
-; CHECK-NEXT:    smov w13, v1.h[3]
-; CHECK-NEXT:    smov x14, v1.h[4]
-; CHECK-NEXT:    smull x16, w2, w8
-; CHECK-NEXT:    add w1, w1, w9, lsr #31
-; CHECK-NEXT:    add x17, x17, x0, lsr #32
-; CHECK-NEXT:    asr w0, w15, #7
-; CHECK-NEXT:    mov v3.h[1], w12
-; CHECK-NEXT:    smov w12, v1.h[4]
-; CHECK-NEXT:    smull x18, w18, w8
-; CHECK-NEXT:    mov v2.h[2], w1
-; CHECK-NEXT:    asr w1, w11, #7
-; CHECK-NEXT:    add w15, w0, w15, lsr #31
-; CHECK-NEXT:    add x13, x13, x16, lsr #32
-; CHECK-NEXT:    smov x16, v1.h[5]
-; CHECK-NEXT:    smull x14, w14, w8
-; CHECK-NEXT:    add w11, w1, w11, lsr #31
-; CHECK-NEXT:    smov x0, v0.h[6]
-; CHECK-NEXT:    add x10, x10, x18, lsr #32
-; CHECK-NEXT:    asr w1, w13, #7
-; CHECK-NEXT:    mov v3.h[2], w15
-; CHECK-NEXT:    smov w15, v1.h[5]
-; CHECK-NEXT:    add x12, x12, x14, lsr #32
-; CHECK-NEXT:    mov v2.h[3], w11
-; CHECK-NEXT:    asr w11, w17, #7
-; CHECK-NEXT:    add w13, w1, w13, lsr #31
-; CHECK-NEXT:    smull x16, w16, w8
-; CHECK-NEXT:    smov x14, v1.h[6]
-; CHECK-NEXT:    asr w18, w12, #7
-; CHECK-NEXT:    add w11, w11, w17, lsr #31
-; CHECK-NEXT:    smov w9, v0.h[6]
-; CHECK-NEXT:    mov v3.h[3], w13
-; CHECK-NEXT:    smull x17, w0, w8
-; CHECK-NEXT:    smov x0, v1.h[7]
-; CHECK-NEXT:    add x13, x15, x16, lsr #32
-; CHECK-NEXT:    add w12, w18, w12, lsr #31
-; CHECK-NEXT:    smov w16, v1.h[6]
-; CHECK-NEXT:    mov v2.h[4], w11
-; CHECK-NEXT:    smov x11, v0.h[7]
-; CHECK-NEXT:    smull x14, w14, w8
-; CHECK-NEXT:    asr w15, w10, #7
-; CHECK-NEXT:    asr w18, w13, #7
-; CHECK-NEXT:    smov w1, v0.h[7]
-; CHECK-NEXT:    mov v3.h[4], w12
-; CHECK-NEXT:    add x9, x9, x17, lsr #32
-; CHECK-NEXT:    add w10, w15, w10, lsr #31
-; CHECK-NEXT:    add w12, w18, w13, lsr #31
-; CHECK-NEXT:    add x13, x16, x14, lsr #32
-; CHECK-NEXT:    smov w14, v1.h[7]
-; CHECK-NEXT:    smull x11, w11, w8
-; CHECK-NEXT:    smull x8, w0, w8
-; CHECK-NEXT:    mov v2.h[5], w10
-; CHECK-NEXT:    asr w10, w9, #7
-; CHECK-NEXT:    mov v3.h[5], w12
-; CHECK-NEXT:    asr w12, w13, #7
-; CHECK-NEXT:    add w9, w10, w9, lsr #31
-; CHECK-NEXT:    add x10, x1, x11, lsr #32
-; CHECK-NEXT:    add w11, w12, w13, lsr #31
-; CHECK-NEXT:    add x8, x14, x8, lsr #32
-; CHECK-NEXT:    mov v2.h[6], w9
-; CHECK-NEXT:    asr w9, w10, #7
-; CHECK-NEXT:    mov v3.h[6], w11
-; CHECK-NEXT:    asr w11, w8, #7
-; CHECK-NEXT:    add w9, w9, w10, lsr #31
-; CHECK-NEXT:    add w8, w11, w8, lsr #31
-; CHECK-NEXT:    mov v2.h[7], w9
-; CHECK-NEXT:    mov v3.h[7], w8
-; CHECK-NEXT:    mov v0.16b, v2.16b
-; CHECK-NEXT:    mov v1.16b, v3.16b
+; CHECK-NEXT:    mvni v2.8h, #1, lsl #8
+; CHECK-NEXT:    mul v0.8h, v0.8h, v2.8h
+; CHECK-NEXT:    mul v1.8h, v1.8h, v2.8h
 ; CHECK-NEXT:    ret
   %div = sdiv exact <16 x i16> %x, splat (i16 255)
   ret <16 x i16> %div



More information about the llvm-commits mailing list