[llvm] [DAGCombiner] Handle type-promoted constants in SDIV lowering (PR #169924)
Valeriy Savchenko via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 3 13:55:19 PST 2025
https://github.com/SavchenkoValeriy updated https://github.com/llvm/llvm-project/pull/169924
>From 2850ba8793728310cda396014c2468e43e5f5161 Mon Sep 17 00:00:00 2001
From: Valeriy Savchenko <vsavchenko at apple.com>
Date: Fri, 28 Nov 2025 14:21:13 +0000
Subject: [PATCH 1/2] [AArch64][NFC] Add test for vector sdiv scalarization
---
.../AArch64/sdiv-by-const-promoted-ops.ll | 318 ++++++++++++++++++
1 file changed, 318 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/sdiv-by-const-promoted-ops.ll
diff --git a/llvm/test/CodeGen/AArch64/sdiv-by-const-promoted-ops.ll b/llvm/test/CodeGen/AArch64/sdiv-by-const-promoted-ops.ll
new file mode 100644
index 0000000000000..c6b1ab871d81f
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sdiv-by-const-promoted-ops.ll
@@ -0,0 +1,318 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+define <8 x i16> @sdiv_v8i16_by_7(<8 x i16> %x) {
+; CHECK-LABEL: sdiv_v8i16_by_7:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #18725 // =0x4925
+; CHECK-NEXT: dup v1.8h, w8
+; CHECK-NEXT: smull2 v2.4s, v0.8h, v1.8h
+; CHECK-NEXT: smull v0.4s, v0.4h, v1.4h
+; CHECK-NEXT: uzp2 v0.8h, v0.8h, v2.8h
+; CHECK-NEXT: sshr v0.8h, v0.8h, #1
+; CHECK-NEXT: usra v0.8h, v0.8h, #15
+; CHECK-NEXT: ret
+ %div = sdiv <8 x i16> %x, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+ ret <8 x i16> %div
+}
+
+define <16 x i16> @sdiv_v16i16_by_7(<16 x i16> %x) {
+; CHECK-LABEL: sdiv_v16i16_by_7:
+; CHECK: // %bb.0:
+; CHECK-NEXT: smov x11, v0.h[1]
+; CHECK-NEXT: smov x10, v0.h[0]
+; CHECK-NEXT: mov x8, #-56173 // =0xffffffffffff2493
+; CHECK-NEXT: smov x13, v0.h[3]
+; CHECK-NEXT: smov x14, v1.h[1]
+; CHECK-NEXT: movk x8, #37449, lsl #16
+; CHECK-NEXT: smov x16, v1.h[0]
+; CHECK-NEXT: smov w12, v0.h[1]
+; CHECK-NEXT: smov w15, v0.h[0]
+; CHECK-NEXT: smov x18, v1.h[2]
+; CHECK-NEXT: smov w0, v0.h[3]
+; CHECK-NEXT: smov w1, v1.h[1]
+; CHECK-NEXT: smull x11, w11, w8
+; CHECK-NEXT: smov w2, v1.h[0]
+; CHECK-NEXT: smov x9, v0.h[2]
+; CHECK-NEXT: smull x10, w10, w8
+; CHECK-NEXT: smov w17, v0.h[2]
+; CHECK-NEXT: smov w3, v1.h[2]
+; CHECK-NEXT: smull x13, w13, w8
+; CHECK-NEXT: smull x14, w14, w8
+; CHECK-NEXT: add x12, x12, x11, lsr #32
+; CHECK-NEXT: smull x16, w16, w8
+; CHECK-NEXT: add x10, x15, x10, lsr #32
+; CHECK-NEXT: smull x15, w18, w8
+; CHECK-NEXT: add x11, x0, x13, lsr #32
+; CHECK-NEXT: smov x0, v0.h[4]
+; CHECK-NEXT: add x13, x1, x14, lsr #32
+; CHECK-NEXT: asr w18, w10, #2
+; CHECK-NEXT: smull x9, w9, w8
+; CHECK-NEXT: add x14, x2, x16, lsr #32
+; CHECK-NEXT: asr w16, w12, #2
+; CHECK-NEXT: smov x2, v1.h[3]
+; CHECK-NEXT: add w18, w18, w10, lsr #31
+; CHECK-NEXT: add x15, x3, x15, lsr #32
+; CHECK-NEXT: smov w10, v0.h[5]
+; CHECK-NEXT: add w12, w16, w12, lsr #31
+; CHECK-NEXT: asr w16, w14, #2
+; CHECK-NEXT: add x9, x17, x9, lsr #32
+; CHECK-NEXT: fmov s2, w18
+; CHECK-NEXT: smov w17, v0.h[4]
+; CHECK-NEXT: smull x0, w0, w8
+; CHECK-NEXT: add w14, w16, w14, lsr #31
+; CHECK-NEXT: asr w16, w13, #2
+; CHECK-NEXT: asr w1, w9, #2
+; CHECK-NEXT: smov x18, v0.h[5]
+; CHECK-NEXT: fmov s3, w14
+; CHECK-NEXT: mov v2.h[1], w12
+; CHECK-NEXT: add w12, w16, w13, lsr #31
+; CHECK-NEXT: smov w13, v1.h[3]
+; CHECK-NEXT: smov x14, v1.h[4]
+; CHECK-NEXT: smull x16, w2, w8
+; CHECK-NEXT: add w1, w1, w9, lsr #31
+; CHECK-NEXT: add x17, x17, x0, lsr #32
+; CHECK-NEXT: asr w0, w15, #2
+; CHECK-NEXT: mov v3.h[1], w12
+; CHECK-NEXT: smov w12, v1.h[4]
+; CHECK-NEXT: smull x18, w18, w8
+; CHECK-NEXT: mov v2.h[2], w1
+; CHECK-NEXT: asr w1, w11, #2
+; CHECK-NEXT: add w15, w0, w15, lsr #31
+; CHECK-NEXT: add x13, x13, x16, lsr #32
+; CHECK-NEXT: smov x16, v1.h[5]
+; CHECK-NEXT: smull x14, w14, w8
+; CHECK-NEXT: add w11, w1, w11, lsr #31
+; CHECK-NEXT: smov x0, v0.h[6]
+; CHECK-NEXT: add x10, x10, x18, lsr #32
+; CHECK-NEXT: asr w1, w13, #2
+; CHECK-NEXT: mov v3.h[2], w15
+; CHECK-NEXT: smov w15, v1.h[5]
+; CHECK-NEXT: add x12, x12, x14, lsr #32
+; CHECK-NEXT: mov v2.h[3], w11
+; CHECK-NEXT: asr w11, w17, #2
+; CHECK-NEXT: add w13, w1, w13, lsr #31
+; CHECK-NEXT: smull x16, w16, w8
+; CHECK-NEXT: smov x14, v1.h[6]
+; CHECK-NEXT: asr w18, w12, #2
+; CHECK-NEXT: add w11, w11, w17, lsr #31
+; CHECK-NEXT: smov w9, v0.h[6]
+; CHECK-NEXT: mov v3.h[3], w13
+; CHECK-NEXT: smull x17, w0, w8
+; CHECK-NEXT: smov x0, v1.h[7]
+; CHECK-NEXT: add x13, x15, x16, lsr #32
+; CHECK-NEXT: add w12, w18, w12, lsr #31
+; CHECK-NEXT: smov w16, v1.h[6]
+; CHECK-NEXT: mov v2.h[4], w11
+; CHECK-NEXT: smov x11, v0.h[7]
+; CHECK-NEXT: smull x14, w14, w8
+; CHECK-NEXT: asr w15, w10, #2
+; CHECK-NEXT: asr w18, w13, #2
+; CHECK-NEXT: smov w1, v0.h[7]
+; CHECK-NEXT: mov v3.h[4], w12
+; CHECK-NEXT: add x9, x9, x17, lsr #32
+; CHECK-NEXT: add w10, w15, w10, lsr #31
+; CHECK-NEXT: add w12, w18, w13, lsr #31
+; CHECK-NEXT: add x13, x16, x14, lsr #32
+; CHECK-NEXT: smov w14, v1.h[7]
+; CHECK-NEXT: smull x11, w11, w8
+; CHECK-NEXT: smull x8, w0, w8
+; CHECK-NEXT: mov v2.h[5], w10
+; CHECK-NEXT: asr w10, w9, #2
+; CHECK-NEXT: mov v3.h[5], w12
+; CHECK-NEXT: asr w12, w13, #2
+; CHECK-NEXT: add w9, w10, w9, lsr #31
+; CHECK-NEXT: add x10, x1, x11, lsr #32
+; CHECK-NEXT: add w11, w12, w13, lsr #31
+; CHECK-NEXT: add x8, x14, x8, lsr #32
+; CHECK-NEXT: mov v2.h[6], w9
+; CHECK-NEXT: asr w9, w10, #2
+; CHECK-NEXT: mov v3.h[6], w11
+; CHECK-NEXT: asr w11, w8, #2
+; CHECK-NEXT: add w9, w9, w10, lsr #31
+; CHECK-NEXT: add w8, w11, w8, lsr #31
+; CHECK-NEXT: mov v2.h[7], w9
+; CHECK-NEXT: mov v3.h[7], w8
+; CHECK-NEXT: mov v0.16b, v2.16b
+; CHECK-NEXT: mov v1.16b, v3.16b
+; CHECK-NEXT: ret
+ %div = sdiv <16 x i16> %x, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+ ret <16 x i16> %div
+}
+
+define <8 x i16> @srem_v8i16_by_7(<8 x i16> %x) {
+; CHECK-LABEL: srem_v8i16_by_7:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #18725 // =0x4925
+; CHECK-NEXT: dup v1.8h, w8
+; CHECK-NEXT: smull2 v2.4s, v0.8h, v1.8h
+; CHECK-NEXT: smull v1.4s, v0.4h, v1.4h
+; CHECK-NEXT: uzp2 v1.8h, v1.8h, v2.8h
+; CHECK-NEXT: movi v2.8h, #7
+; CHECK-NEXT: sshr v1.8h, v1.8h, #1
+; CHECK-NEXT: usra v1.8h, v1.8h, #15
+; CHECK-NEXT: mls v0.8h, v1.8h, v2.8h
+; CHECK-NEXT: ret
+ %rem = srem <8 x i16> %x, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+ ret <8 x i16> %rem
+}
+
+define <16 x i16> @srem_v16i16_by_7(<16 x i16> %x) {
+; CHECK-LABEL: srem_v16i16_by_7:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x19, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w19, -16
+; CHECK-NEXT: smov x10, v0.h[0]
+; CHECK-NEXT: smov x9, v0.h[1]
+; CHECK-NEXT: mov x8, #-56173 // =0xffffffffffff2493
+; CHECK-NEXT: smov x15, v0.h[2]
+; CHECK-NEXT: movk x8, #37449, lsl #16
+; CHECK-NEXT: smov x16, v0.h[3]
+; CHECK-NEXT: smov w13, v0.h[0]
+; CHECK-NEXT: smov w12, v0.h[1]
+; CHECK-NEXT: smov x18, v1.h[0]
+; CHECK-NEXT: smov w17, v0.h[2]
+; CHECK-NEXT: smov w14, v0.h[3]
+; CHECK-NEXT: smov w2, v1.h[0]
+; CHECK-NEXT: smull x10, w10, w8
+; CHECK-NEXT: smov x4, v1.h[1]
+; CHECK-NEXT: smov x0, v0.h[4]
+; CHECK-NEXT: smull x9, w9, w8
+; CHECK-NEXT: smov w11, v0.h[4]
+; CHECK-NEXT: smull x15, w15, w8
+; CHECK-NEXT: smull x1, w16, w8
+; CHECK-NEXT: add x10, x13, x10, lsr #32
+; CHECK-NEXT: smov x16, v0.h[5]
+; CHECK-NEXT: add x3, x12, x9, lsr #32
+; CHECK-NEXT: smull x18, w18, w8
+; CHECK-NEXT: smov w9, v0.h[5]
+; CHECK-NEXT: add x15, x17, x15, lsr #32
+; CHECK-NEXT: asr w6, w10, #2
+; CHECK-NEXT: smull x4, w4, w8
+; CHECK-NEXT: asr w5, w3, #2
+; CHECK-NEXT: add x1, x14, x1, lsr #32
+; CHECK-NEXT: smull x0, w0, w8
+; CHECK-NEXT: add x7, x2, x18, lsr #32
+; CHECK-NEXT: asr w19, w15, #2
+; CHECK-NEXT: smov w18, v1.h[1]
+; CHECK-NEXT: add w10, w6, w10, lsr #31
+; CHECK-NEXT: add w3, w5, w3, lsr #31
+; CHECK-NEXT: asr w5, w1, #2
+; CHECK-NEXT: add w6, w19, w15, lsr #31
+; CHECK-NEXT: asr w15, w7, #2
+; CHECK-NEXT: sub w19, w10, w10, lsl #3
+; CHECK-NEXT: add w1, w5, w1, lsr #31
+; CHECK-NEXT: smov x5, v1.h[2]
+; CHECK-NEXT: sub w3, w3, w3, lsl #3
+; CHECK-NEXT: add x4, x18, x4, lsr #32
+; CHECK-NEXT: add w7, w15, w7, lsr #31
+; CHECK-NEXT: add w13, w13, w19
+; CHECK-NEXT: sub w6, w6, w6, lsl #3
+; CHECK-NEXT: sub w1, w1, w1, lsl #3
+; CHECK-NEXT: fmov s2, w13
+; CHECK-NEXT: add w12, w12, w3
+; CHECK-NEXT: add x3, x11, x0, lsr #32
+; CHECK-NEXT: smov w0, v1.h[2]
+; CHECK-NEXT: asr w13, w4, #2
+; CHECK-NEXT: smull x5, w5, w8
+; CHECK-NEXT: sub w7, w7, w7, lsl #3
+; CHECK-NEXT: add w6, w17, w6
+; CHECK-NEXT: smov w17, v1.h[3]
+; CHECK-NEXT: mov v2.h[1], w12
+; CHECK-NEXT: add w13, w13, w4, lsr #31
+; CHECK-NEXT: smov x4, v1.h[3]
+; CHECK-NEXT: add w2, w2, w7
+; CHECK-NEXT: smov x7, v1.h[4]
+; CHECK-NEXT: add w14, w14, w1
+; CHECK-NEXT: add x5, x0, x5, lsr #32
+; CHECK-NEXT: sub w13, w13, w13, lsl #3
+; CHECK-NEXT: fmov s3, w2
+; CHECK-NEXT: smull x2, w16, w8
+; CHECK-NEXT: smov w16, v1.h[4]
+; CHECK-NEXT: asr w12, w3, #2
+; CHECK-NEXT: mov v2.h[2], w6
+; CHECK-NEXT: asr w6, w5, #2
+; CHECK-NEXT: smull x4, w4, w8
+; CHECK-NEXT: add w18, w18, w13
+; CHECK-NEXT: smov x15, v0.h[6]
+; CHECK-NEXT: add w3, w12, w3, lsr #31
+; CHECK-NEXT: add w5, w6, w5, lsr #31
+; CHECK-NEXT: smull x6, w7, w8
+; CHECK-NEXT: mov v3.h[1], w18
+; CHECK-NEXT: add x4, x17, x4, lsr #32
+; CHECK-NEXT: add x1, x9, x2, lsr #32
+; CHECK-NEXT: sub w3, w3, w3, lsl #3
+; CHECK-NEXT: sub w18, w5, w5, lsl #3
+; CHECK-NEXT: smov x5, v1.h[5]
+; CHECK-NEXT: mov v2.h[3], w14
+; CHECK-NEXT: asr w2, w4, #2
+; CHECK-NEXT: smov w10, v0.h[6]
+; CHECK-NEXT: smov x13, v0.h[7]
+; CHECK-NEXT: add w18, w0, w18
+; CHECK-NEXT: add x0, x16, x6, lsr #32
+; CHECK-NEXT: add w11, w11, w3
+; CHECK-NEXT: add w2, w2, w4, lsr #31
+; CHECK-NEXT: smov w4, v1.h[5]
+; CHECK-NEXT: mov v3.h[2], w18
+; CHECK-NEXT: asr w18, w0, #2
+; CHECK-NEXT: smull x5, w5, w8
+; CHECK-NEXT: smov x3, v1.h[7]
+; CHECK-NEXT: sub w14, w2, w2, lsl #3
+; CHECK-NEXT: smov x2, v1.h[6]
+; CHECK-NEXT: smull x15, w15, w8
+; CHECK-NEXT: add w18, w18, w0, lsr #31
+; CHECK-NEXT: asr w6, w1, #2
+; CHECK-NEXT: mov v2.h[4], w11
+; CHECK-NEXT: add w14, w17, w14
+; CHECK-NEXT: add x17, x4, x5, lsr #32
+; CHECK-NEXT: smov w12, v0.h[7]
+; CHECK-NEXT: mov v3.h[3], w14
+; CHECK-NEXT: sub w14, w18, w18, lsl #3
+; CHECK-NEXT: smov w18, v1.h[6]
+; CHECK-NEXT: smull x2, w2, w8
+; CHECK-NEXT: asr w0, w17, #2
+; CHECK-NEXT: add w1, w6, w1, lsr #31
+; CHECK-NEXT: add w11, w16, w14
+; CHECK-NEXT: add x15, x10, x15, lsr #32
+; CHECK-NEXT: smull x13, w13, w8
+; CHECK-NEXT: add w16, w0, w17, lsr #31
+; CHECK-NEXT: smov w17, v1.h[7]
+; CHECK-NEXT: smull x8, w3, w8
+; CHECK-NEXT: mov v3.h[4], w11
+; CHECK-NEXT: add x11, x18, x2, lsr #32
+; CHECK-NEXT: sub w14, w1, w1, lsl #3
+; CHECK-NEXT: asr w0, w15, #2
+; CHECK-NEXT: sub w16, w16, w16, lsl #3
+; CHECK-NEXT: add x13, x12, x13, lsr #32
+; CHECK-NEXT: asr w1, w11, #2
+; CHECK-NEXT: add w9, w9, w14
+; CHECK-NEXT: add w14, w0, w15, lsr #31
+; CHECK-NEXT: add w15, w4, w16
+; CHECK-NEXT: add x8, x17, x8, lsr #32
+; CHECK-NEXT: add w11, w1, w11, lsr #31
+; CHECK-NEXT: mov v2.h[5], w9
+; CHECK-NEXT: mov v3.h[5], w15
+; CHECK-NEXT: sub w9, w14, w14, lsl #3
+; CHECK-NEXT: asr w14, w13, #2
+; CHECK-NEXT: asr w15, w8, #2
+; CHECK-NEXT: sub w11, w11, w11, lsl #3
+; CHECK-NEXT: add w9, w10, w9
+; CHECK-NEXT: add w10, w14, w13, lsr #31
+; CHECK-NEXT: add w8, w15, w8, lsr #31
+; CHECK-NEXT: add w11, w18, w11
+; CHECK-NEXT: mov v2.h[6], w9
+; CHECK-NEXT: mov v3.h[6], w11
+; CHECK-NEXT: sub w9, w10, w10, lsl #3
+; CHECK-NEXT: sub w8, w8, w8, lsl #3
+; CHECK-NEXT: add w9, w12, w9
+; CHECK-NEXT: add w8, w17, w8
+; CHECK-NEXT: mov v2.h[7], w9
+; CHECK-NEXT: mov v3.h[7], w8
+; CHECK-NEXT: mov v0.16b, v2.16b
+; CHECK-NEXT: mov v1.16b, v3.16b
+; CHECK-NEXT: ldr x19, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %rem = srem <16 x i16> %x, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+ ret <16 x i16> %rem
+}
>From a758c8a3b76905deb31671aa2edc196047ac8f22 Mon Sep 17 00:00:00 2001
From: Valeriy Savchenko <vsavchenko at apple.com>
Date: Fri, 28 Nov 2025 14:22:01 +0000
Subject: [PATCH 2/2] [DAGCombiner] Allow promoted constants when lowering
vector SDIVs
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 6 +-
.../CodeGen/SelectionDAG/TargetLowering.cpp | 8 +-
llvm/test/CodeGen/AArch64/rem-by-const.ll | 89 +-----
.../AArch64/sdiv-by-const-promoted-ops.ll | 295 ++----------------
4 files changed, 53 insertions(+), 345 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 5377f22e5c61f..0f539655ff0ed 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -5186,7 +5186,8 @@ static bool isDivisorPowerOfTwo(SDValue Divisor) {
return false;
};
- return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo);
+ return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo, /*AllowUndefs=*/false,
+ /*AllowTruncation=*/true);
}
SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
@@ -5250,7 +5251,8 @@ SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
// alternate sequence. Targets may check function attributes for size/speed
// trade-offs.
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
- if (isConstantOrConstantVector(N1) &&
+ if (isConstantOrConstantVector(N1, /*NoOpaques=*/false,
+ /*AllowTruncation=*/true) &&
!TLI.isIntDivCheap(N->getValueType(0), Attr))
if (SDValue Op = BuildSDIV(N))
return Op;
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 1e71937372159..e8110ed549653 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -6562,8 +6562,9 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
auto BuildSDIVPattern = [&](ConstantSDNode *C) {
if (C->isZero())
return false;
-
- const APInt &Divisor = C->getAPIntValue();
+ // Truncate the divisor to the target scalar type in case it was promoted
+ // during type legalization.
+ APInt Divisor = C->getAPIntValue().trunc(EltBits);
SignedDivisionByConstantInfo magics = SignedDivisionByConstantInfo::get(Divisor);
int NumeratorFactor = 0;
int ShiftMask = -1;
@@ -6593,7 +6594,8 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
SDValue N1 = N->getOperand(1);
// Collect the shifts / magic values from each element.
- if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
+ if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern, /*AllowUndefs=*/false,
+ /*AllowTruncation=*/true))
return SDValue();
SDValue MagicFactor, Factor, Shift, ShiftMask;
diff --git a/llvm/test/CodeGen/AArch64/rem-by-const.ll b/llvm/test/CodeGen/AArch64/rem-by-const.ll
index ffaf045fa45c2..c19ded18c94c9 100644
--- a/llvm/test/CodeGen/AArch64/rem-by-const.ll
+++ b/llvm/test/CodeGen/AArch64/rem-by-const.ll
@@ -893,46 +893,15 @@ define <4 x i8> @sv4i8_7(<4 x i8> %d, <4 x i8> %e) {
; CHECK-SD-LABEL: sv4i8_7:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: shl v0.4h, v0.4h, #8
-; CHECK-SD-NEXT: mov x8, #-56173 // =0xffffffffffff2493
-; CHECK-SD-NEXT: movk x8, #37449, lsl #16
+; CHECK-SD-NEXT: mov w8, #18725 // =0x4925
+; CHECK-SD-NEXT: movi v2.4h, #7
+; CHECK-SD-NEXT: dup v1.4h, w8
; CHECK-SD-NEXT: sshr v0.4h, v0.4h, #8
-; CHECK-SD-NEXT: smov x10, v0.h[0]
-; CHECK-SD-NEXT: smov x9, v0.h[1]
-; CHECK-SD-NEXT: smov w12, v0.h[0]
-; CHECK-SD-NEXT: smov w11, v0.h[1]
-; CHECK-SD-NEXT: smov x13, v0.h[2]
-; CHECK-SD-NEXT: smov w14, v0.h[2]
-; CHECK-SD-NEXT: smov x17, v0.h[3]
-; CHECK-SD-NEXT: smull x10, w10, w8
-; CHECK-SD-NEXT: smull x9, w9, w8
-; CHECK-SD-NEXT: smull x13, w13, w8
-; CHECK-SD-NEXT: add x10, x12, x10, lsr #32
-; CHECK-SD-NEXT: smull x8, w17, w8
-; CHECK-SD-NEXT: add x9, x11, x9, lsr #32
-; CHECK-SD-NEXT: asr w16, w10, #2
-; CHECK-SD-NEXT: add x13, x14, x13, lsr #32
-; CHECK-SD-NEXT: asr w15, w9, #2
-; CHECK-SD-NEXT: add w10, w16, w10, lsr #31
-; CHECK-SD-NEXT: asr w16, w13, #2
-; CHECK-SD-NEXT: add w9, w15, w9, lsr #31
-; CHECK-SD-NEXT: smov w15, v0.h[3]
-; CHECK-SD-NEXT: sub w10, w10, w10, lsl #3
-; CHECK-SD-NEXT: sub w9, w9, w9, lsl #3
-; CHECK-SD-NEXT: add w10, w12, w10
-; CHECK-SD-NEXT: fmov s0, w10
-; CHECK-SD-NEXT: add w9, w11, w9
-; CHECK-SD-NEXT: add w10, w16, w13, lsr #31
-; CHECK-SD-NEXT: add x8, x15, x8, lsr #32
-; CHECK-SD-NEXT: mov v0.h[1], w9
-; CHECK-SD-NEXT: sub w9, w10, w10, lsl #3
-; CHECK-SD-NEXT: asr w10, w8, #2
-; CHECK-SD-NEXT: add w9, w14, w9
-; CHECK-SD-NEXT: add w8, w10, w8, lsr #31
-; CHECK-SD-NEXT: mov v0.h[2], w9
-; CHECK-SD-NEXT: sub w8, w8, w8, lsl #3
-; CHECK-SD-NEXT: add w8, w15, w8
-; CHECK-SD-NEXT: mov v0.h[3], w8
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: smull v1.4s, v0.4h, v1.4h
+; CHECK-SD-NEXT: sshr v1.4s, v1.4s, #17
+; CHECK-SD-NEXT: xtn v1.4h, v1.4s
+; CHECK-SD-NEXT: usra v1.4h, v1.4h, #15
+; CHECK-SD-NEXT: mls v0.4h, v1.4h, v2.4h
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sv4i8_7:
@@ -978,39 +947,15 @@ define <4 x i8> @sv4i8_100(<4 x i8> %d, <4 x i8> %e) {
; CHECK-SD-LABEL: sv4i8_100:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: shl v0.4h, v0.4h, #8
-; CHECK-SD-NEXT: mov w8, #34079 // =0x851f
-; CHECK-SD-NEXT: mov w14, #100 // =0x64
-; CHECK-SD-NEXT: movk w8, #20971, lsl #16
-; CHECK-SD-NEXT: sshr v1.4h, v0.4h, #8
-; CHECK-SD-NEXT: smov x9, v1.h[0]
-; CHECK-SD-NEXT: smov x10, v1.h[1]
-; CHECK-SD-NEXT: smov x11, v1.h[2]
-; CHECK-SD-NEXT: smov w12, v1.h[0]
-; CHECK-SD-NEXT: smov x13, v1.h[3]
-; CHECK-SD-NEXT: smov w15, v1.h[1]
-; CHECK-SD-NEXT: smull x9, w9, w8
-; CHECK-SD-NEXT: smull x10, w10, w8
-; CHECK-SD-NEXT: smull x11, w11, w8
-; CHECK-SD-NEXT: asr x9, x9, #37
-; CHECK-SD-NEXT: smull x8, w13, w8
-; CHECK-SD-NEXT: asr x10, x10, #37
-; CHECK-SD-NEXT: add w9, w9, w9, lsr #31
-; CHECK-SD-NEXT: asr x11, x11, #37
-; CHECK-SD-NEXT: add w10, w10, w10, lsr #31
-; CHECK-SD-NEXT: asr x8, x8, #37
-; CHECK-SD-NEXT: msub w9, w9, w14, w12
-; CHECK-SD-NEXT: msub w10, w10, w14, w15
-; CHECK-SD-NEXT: add w8, w8, w8, lsr #31
-; CHECK-SD-NEXT: fmov s0, w9
-; CHECK-SD-NEXT: add w9, w11, w11, lsr #31
-; CHECK-SD-NEXT: smov w11, v1.h[2]
-; CHECK-SD-NEXT: msub w9, w9, w14, w11
-; CHECK-SD-NEXT: mov v0.h[1], w10
-; CHECK-SD-NEXT: smov w10, v1.h[3]
-; CHECK-SD-NEXT: msub w8, w8, w14, w10
-; CHECK-SD-NEXT: mov v0.h[2], w9
-; CHECK-SD-NEXT: mov v0.h[3], w8
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: mov w8, #5243 // =0x147b
+; CHECK-SD-NEXT: movi v2.4h, #100
+; CHECK-SD-NEXT: dup v1.4h, w8
+; CHECK-SD-NEXT: sshr v0.4h, v0.4h, #8
+; CHECK-SD-NEXT: smull v1.4s, v0.4h, v1.4h
+; CHECK-SD-NEXT: sshr v1.4s, v1.4s, #19
+; CHECK-SD-NEXT: xtn v1.4h, v1.4s
+; CHECK-SD-NEXT: usra v1.4h, v1.4h, #15
+; CHECK-SD-NEXT: mls v0.4h, v1.4h, v2.4h
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sv4i8_100:
diff --git a/llvm/test/CodeGen/AArch64/sdiv-by-const-promoted-ops.ll b/llvm/test/CodeGen/AArch64/sdiv-by-const-promoted-ops.ll
index c6b1ab871d81f..053cbc0616454 100644
--- a/llvm/test/CodeGen/AArch64/sdiv-by-const-promoted-ops.ll
+++ b/llvm/test/CodeGen/AArch64/sdiv-by-const-promoted-ops.ll
@@ -19,122 +19,18 @@ define <8 x i16> @sdiv_v8i16_by_7(<8 x i16> %x) {
define <16 x i16> @sdiv_v16i16_by_7(<16 x i16> %x) {
; CHECK-LABEL: sdiv_v16i16_by_7:
; CHECK: // %bb.0:
-; CHECK-NEXT: smov x11, v0.h[1]
-; CHECK-NEXT: smov x10, v0.h[0]
-; CHECK-NEXT: mov x8, #-56173 // =0xffffffffffff2493
-; CHECK-NEXT: smov x13, v0.h[3]
-; CHECK-NEXT: smov x14, v1.h[1]
-; CHECK-NEXT: movk x8, #37449, lsl #16
-; CHECK-NEXT: smov x16, v1.h[0]
-; CHECK-NEXT: smov w12, v0.h[1]
-; CHECK-NEXT: smov w15, v0.h[0]
-; CHECK-NEXT: smov x18, v1.h[2]
-; CHECK-NEXT: smov w0, v0.h[3]
-; CHECK-NEXT: smov w1, v1.h[1]
-; CHECK-NEXT: smull x11, w11, w8
-; CHECK-NEXT: smov w2, v1.h[0]
-; CHECK-NEXT: smov x9, v0.h[2]
-; CHECK-NEXT: smull x10, w10, w8
-; CHECK-NEXT: smov w17, v0.h[2]
-; CHECK-NEXT: smov w3, v1.h[2]
-; CHECK-NEXT: smull x13, w13, w8
-; CHECK-NEXT: smull x14, w14, w8
-; CHECK-NEXT: add x12, x12, x11, lsr #32
-; CHECK-NEXT: smull x16, w16, w8
-; CHECK-NEXT: add x10, x15, x10, lsr #32
-; CHECK-NEXT: smull x15, w18, w8
-; CHECK-NEXT: add x11, x0, x13, lsr #32
-; CHECK-NEXT: smov x0, v0.h[4]
-; CHECK-NEXT: add x13, x1, x14, lsr #32
-; CHECK-NEXT: asr w18, w10, #2
-; CHECK-NEXT: smull x9, w9, w8
-; CHECK-NEXT: add x14, x2, x16, lsr #32
-; CHECK-NEXT: asr w16, w12, #2
-; CHECK-NEXT: smov x2, v1.h[3]
-; CHECK-NEXT: add w18, w18, w10, lsr #31
-; CHECK-NEXT: add x15, x3, x15, lsr #32
-; CHECK-NEXT: smov w10, v0.h[5]
-; CHECK-NEXT: add w12, w16, w12, lsr #31
-; CHECK-NEXT: asr w16, w14, #2
-; CHECK-NEXT: add x9, x17, x9, lsr #32
-; CHECK-NEXT: fmov s2, w18
-; CHECK-NEXT: smov w17, v0.h[4]
-; CHECK-NEXT: smull x0, w0, w8
-; CHECK-NEXT: add w14, w16, w14, lsr #31
-; CHECK-NEXT: asr w16, w13, #2
-; CHECK-NEXT: asr w1, w9, #2
-; CHECK-NEXT: smov x18, v0.h[5]
-; CHECK-NEXT: fmov s3, w14
-; CHECK-NEXT: mov v2.h[1], w12
-; CHECK-NEXT: add w12, w16, w13, lsr #31
-; CHECK-NEXT: smov w13, v1.h[3]
-; CHECK-NEXT: smov x14, v1.h[4]
-; CHECK-NEXT: smull x16, w2, w8
-; CHECK-NEXT: add w1, w1, w9, lsr #31
-; CHECK-NEXT: add x17, x17, x0, lsr #32
-; CHECK-NEXT: asr w0, w15, #2
-; CHECK-NEXT: mov v3.h[1], w12
-; CHECK-NEXT: smov w12, v1.h[4]
-; CHECK-NEXT: smull x18, w18, w8
-; CHECK-NEXT: mov v2.h[2], w1
-; CHECK-NEXT: asr w1, w11, #2
-; CHECK-NEXT: add w15, w0, w15, lsr #31
-; CHECK-NEXT: add x13, x13, x16, lsr #32
-; CHECK-NEXT: smov x16, v1.h[5]
-; CHECK-NEXT: smull x14, w14, w8
-; CHECK-NEXT: add w11, w1, w11, lsr #31
-; CHECK-NEXT: smov x0, v0.h[6]
-; CHECK-NEXT: add x10, x10, x18, lsr #32
-; CHECK-NEXT: asr w1, w13, #2
-; CHECK-NEXT: mov v3.h[2], w15
-; CHECK-NEXT: smov w15, v1.h[5]
-; CHECK-NEXT: add x12, x12, x14, lsr #32
-; CHECK-NEXT: mov v2.h[3], w11
-; CHECK-NEXT: asr w11, w17, #2
-; CHECK-NEXT: add w13, w1, w13, lsr #31
-; CHECK-NEXT: smull x16, w16, w8
-; CHECK-NEXT: smov x14, v1.h[6]
-; CHECK-NEXT: asr w18, w12, #2
-; CHECK-NEXT: add w11, w11, w17, lsr #31
-; CHECK-NEXT: smov w9, v0.h[6]
-; CHECK-NEXT: mov v3.h[3], w13
-; CHECK-NEXT: smull x17, w0, w8
-; CHECK-NEXT: smov x0, v1.h[7]
-; CHECK-NEXT: add x13, x15, x16, lsr #32
-; CHECK-NEXT: add w12, w18, w12, lsr #31
-; CHECK-NEXT: smov w16, v1.h[6]
-; CHECK-NEXT: mov v2.h[4], w11
-; CHECK-NEXT: smov x11, v0.h[7]
-; CHECK-NEXT: smull x14, w14, w8
-; CHECK-NEXT: asr w15, w10, #2
-; CHECK-NEXT: asr w18, w13, #2
-; CHECK-NEXT: smov w1, v0.h[7]
-; CHECK-NEXT: mov v3.h[4], w12
-; CHECK-NEXT: add x9, x9, x17, lsr #32
-; CHECK-NEXT: add w10, w15, w10, lsr #31
-; CHECK-NEXT: add w12, w18, w13, lsr #31
-; CHECK-NEXT: add x13, x16, x14, lsr #32
-; CHECK-NEXT: smov w14, v1.h[7]
-; CHECK-NEXT: smull x11, w11, w8
-; CHECK-NEXT: smull x8, w0, w8
-; CHECK-NEXT: mov v2.h[5], w10
-; CHECK-NEXT: asr w10, w9, #2
-; CHECK-NEXT: mov v3.h[5], w12
-; CHECK-NEXT: asr w12, w13, #2
-; CHECK-NEXT: add w9, w10, w9, lsr #31
-; CHECK-NEXT: add x10, x1, x11, lsr #32
-; CHECK-NEXT: add w11, w12, w13, lsr #31
-; CHECK-NEXT: add x8, x14, x8, lsr #32
-; CHECK-NEXT: mov v2.h[6], w9
-; CHECK-NEXT: asr w9, w10, #2
-; CHECK-NEXT: mov v3.h[6], w11
-; CHECK-NEXT: asr w11, w8, #2
-; CHECK-NEXT: add w9, w9, w10, lsr #31
-; CHECK-NEXT: add w8, w11, w8, lsr #31
-; CHECK-NEXT: mov v2.h[7], w9
-; CHECK-NEXT: mov v3.h[7], w8
-; CHECK-NEXT: mov v0.16b, v2.16b
-; CHECK-NEXT: mov v1.16b, v3.16b
+; CHECK-NEXT: mov w8, #18725 // =0x4925
+; CHECK-NEXT: dup v2.8h, w8
+; CHECK-NEXT: smull2 v3.4s, v0.8h, v2.8h
+; CHECK-NEXT: smull v0.4s, v0.4h, v2.4h
+; CHECK-NEXT: smull2 v4.4s, v1.8h, v2.8h
+; CHECK-NEXT: smull v1.4s, v1.4h, v2.4h
+; CHECK-NEXT: uzp2 v0.8h, v0.8h, v3.8h
+; CHECK-NEXT: uzp2 v1.8h, v1.8h, v4.8h
+; CHECK-NEXT: sshr v0.8h, v0.8h, #1
+; CHECK-NEXT: sshr v1.8h, v1.8h, #1
+; CHECK-NEXT: usra v0.8h, v0.8h, #15
+; CHECK-NEXT: usra v1.8h, v1.8h, #15
; CHECK-NEXT: ret
%div = sdiv <16 x i16> %x, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
ret <16 x i16> %div
@@ -160,158 +56,21 @@ define <8 x i16> @srem_v8i16_by_7(<8 x i16> %x) {
define <16 x i16> @srem_v16i16_by_7(<16 x i16> %x) {
; CHECK-LABEL: srem_v16i16_by_7:
; CHECK: // %bb.0:
-; CHECK-NEXT: str x19, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: .cfi_offset w19, -16
-; CHECK-NEXT: smov x10, v0.h[0]
-; CHECK-NEXT: smov x9, v0.h[1]
-; CHECK-NEXT: mov x8, #-56173 // =0xffffffffffff2493
-; CHECK-NEXT: smov x15, v0.h[2]
-; CHECK-NEXT: movk x8, #37449, lsl #16
-; CHECK-NEXT: smov x16, v0.h[3]
-; CHECK-NEXT: smov w13, v0.h[0]
-; CHECK-NEXT: smov w12, v0.h[1]
-; CHECK-NEXT: smov x18, v1.h[0]
-; CHECK-NEXT: smov w17, v0.h[2]
-; CHECK-NEXT: smov w14, v0.h[3]
-; CHECK-NEXT: smov w2, v1.h[0]
-; CHECK-NEXT: smull x10, w10, w8
-; CHECK-NEXT: smov x4, v1.h[1]
-; CHECK-NEXT: smov x0, v0.h[4]
-; CHECK-NEXT: smull x9, w9, w8
-; CHECK-NEXT: smov w11, v0.h[4]
-; CHECK-NEXT: smull x15, w15, w8
-; CHECK-NEXT: smull x1, w16, w8
-; CHECK-NEXT: add x10, x13, x10, lsr #32
-; CHECK-NEXT: smov x16, v0.h[5]
-; CHECK-NEXT: add x3, x12, x9, lsr #32
-; CHECK-NEXT: smull x18, w18, w8
-; CHECK-NEXT: smov w9, v0.h[5]
-; CHECK-NEXT: add x15, x17, x15, lsr #32
-; CHECK-NEXT: asr w6, w10, #2
-; CHECK-NEXT: smull x4, w4, w8
-; CHECK-NEXT: asr w5, w3, #2
-; CHECK-NEXT: add x1, x14, x1, lsr #32
-; CHECK-NEXT: smull x0, w0, w8
-; CHECK-NEXT: add x7, x2, x18, lsr #32
-; CHECK-NEXT: asr w19, w15, #2
-; CHECK-NEXT: smov w18, v1.h[1]
-; CHECK-NEXT: add w10, w6, w10, lsr #31
-; CHECK-NEXT: add w3, w5, w3, lsr #31
-; CHECK-NEXT: asr w5, w1, #2
-; CHECK-NEXT: add w6, w19, w15, lsr #31
-; CHECK-NEXT: asr w15, w7, #2
-; CHECK-NEXT: sub w19, w10, w10, lsl #3
-; CHECK-NEXT: add w1, w5, w1, lsr #31
-; CHECK-NEXT: smov x5, v1.h[2]
-; CHECK-NEXT: sub w3, w3, w3, lsl #3
-; CHECK-NEXT: add x4, x18, x4, lsr #32
-; CHECK-NEXT: add w7, w15, w7, lsr #31
-; CHECK-NEXT: add w13, w13, w19
-; CHECK-NEXT: sub w6, w6, w6, lsl #3
-; CHECK-NEXT: sub w1, w1, w1, lsl #3
-; CHECK-NEXT: fmov s2, w13
-; CHECK-NEXT: add w12, w12, w3
-; CHECK-NEXT: add x3, x11, x0, lsr #32
-; CHECK-NEXT: smov w0, v1.h[2]
-; CHECK-NEXT: asr w13, w4, #2
-; CHECK-NEXT: smull x5, w5, w8
-; CHECK-NEXT: sub w7, w7, w7, lsl #3
-; CHECK-NEXT: add w6, w17, w6
-; CHECK-NEXT: smov w17, v1.h[3]
-; CHECK-NEXT: mov v2.h[1], w12
-; CHECK-NEXT: add w13, w13, w4, lsr #31
-; CHECK-NEXT: smov x4, v1.h[3]
-; CHECK-NEXT: add w2, w2, w7
-; CHECK-NEXT: smov x7, v1.h[4]
-; CHECK-NEXT: add w14, w14, w1
-; CHECK-NEXT: add x5, x0, x5, lsr #32
-; CHECK-NEXT: sub w13, w13, w13, lsl #3
-; CHECK-NEXT: fmov s3, w2
-; CHECK-NEXT: smull x2, w16, w8
-; CHECK-NEXT: smov w16, v1.h[4]
-; CHECK-NEXT: asr w12, w3, #2
-; CHECK-NEXT: mov v2.h[2], w6
-; CHECK-NEXT: asr w6, w5, #2
-; CHECK-NEXT: smull x4, w4, w8
-; CHECK-NEXT: add w18, w18, w13
-; CHECK-NEXT: smov x15, v0.h[6]
-; CHECK-NEXT: add w3, w12, w3, lsr #31
-; CHECK-NEXT: add w5, w6, w5, lsr #31
-; CHECK-NEXT: smull x6, w7, w8
-; CHECK-NEXT: mov v3.h[1], w18
-; CHECK-NEXT: add x4, x17, x4, lsr #32
-; CHECK-NEXT: add x1, x9, x2, lsr #32
-; CHECK-NEXT: sub w3, w3, w3, lsl #3
-; CHECK-NEXT: sub w18, w5, w5, lsl #3
-; CHECK-NEXT: smov x5, v1.h[5]
-; CHECK-NEXT: mov v2.h[3], w14
-; CHECK-NEXT: asr w2, w4, #2
-; CHECK-NEXT: smov w10, v0.h[6]
-; CHECK-NEXT: smov x13, v0.h[7]
-; CHECK-NEXT: add w18, w0, w18
-; CHECK-NEXT: add x0, x16, x6, lsr #32
-; CHECK-NEXT: add w11, w11, w3
-; CHECK-NEXT: add w2, w2, w4, lsr #31
-; CHECK-NEXT: smov w4, v1.h[5]
-; CHECK-NEXT: mov v3.h[2], w18
-; CHECK-NEXT: asr w18, w0, #2
-; CHECK-NEXT: smull x5, w5, w8
-; CHECK-NEXT: smov x3, v1.h[7]
-; CHECK-NEXT: sub w14, w2, w2, lsl #3
-; CHECK-NEXT: smov x2, v1.h[6]
-; CHECK-NEXT: smull x15, w15, w8
-; CHECK-NEXT: add w18, w18, w0, lsr #31
-; CHECK-NEXT: asr w6, w1, #2
-; CHECK-NEXT: mov v2.h[4], w11
-; CHECK-NEXT: add w14, w17, w14
-; CHECK-NEXT: add x17, x4, x5, lsr #32
-; CHECK-NEXT: smov w12, v0.h[7]
-; CHECK-NEXT: mov v3.h[3], w14
-; CHECK-NEXT: sub w14, w18, w18, lsl #3
-; CHECK-NEXT: smov w18, v1.h[6]
-; CHECK-NEXT: smull x2, w2, w8
-; CHECK-NEXT: asr w0, w17, #2
-; CHECK-NEXT: add w1, w6, w1, lsr #31
-; CHECK-NEXT: add w11, w16, w14
-; CHECK-NEXT: add x15, x10, x15, lsr #32
-; CHECK-NEXT: smull x13, w13, w8
-; CHECK-NEXT: add w16, w0, w17, lsr #31
-; CHECK-NEXT: smov w17, v1.h[7]
-; CHECK-NEXT: smull x8, w3, w8
-; CHECK-NEXT: mov v3.h[4], w11
-; CHECK-NEXT: add x11, x18, x2, lsr #32
-; CHECK-NEXT: sub w14, w1, w1, lsl #3
-; CHECK-NEXT: asr w0, w15, #2
-; CHECK-NEXT: sub w16, w16, w16, lsl #3
-; CHECK-NEXT: add x13, x12, x13, lsr #32
-; CHECK-NEXT: asr w1, w11, #2
-; CHECK-NEXT: add w9, w9, w14
-; CHECK-NEXT: add w14, w0, w15, lsr #31
-; CHECK-NEXT: add w15, w4, w16
-; CHECK-NEXT: add x8, x17, x8, lsr #32
-; CHECK-NEXT: add w11, w1, w11, lsr #31
-; CHECK-NEXT: mov v2.h[5], w9
-; CHECK-NEXT: mov v3.h[5], w15
-; CHECK-NEXT: sub w9, w14, w14, lsl #3
-; CHECK-NEXT: asr w14, w13, #2
-; CHECK-NEXT: asr w15, w8, #2
-; CHECK-NEXT: sub w11, w11, w11, lsl #3
-; CHECK-NEXT: add w9, w10, w9
-; CHECK-NEXT: add w10, w14, w13, lsr #31
-; CHECK-NEXT: add w8, w15, w8, lsr #31
-; CHECK-NEXT: add w11, w18, w11
-; CHECK-NEXT: mov v2.h[6], w9
-; CHECK-NEXT: mov v3.h[6], w11
-; CHECK-NEXT: sub w9, w10, w10, lsl #3
-; CHECK-NEXT: sub w8, w8, w8, lsl #3
-; CHECK-NEXT: add w9, w12, w9
-; CHECK-NEXT: add w8, w17, w8
-; CHECK-NEXT: mov v2.h[7], w9
-; CHECK-NEXT: mov v3.h[7], w8
-; CHECK-NEXT: mov v0.16b, v2.16b
-; CHECK-NEXT: mov v1.16b, v3.16b
-; CHECK-NEXT: ldr x19, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: mov w8, #18725 // =0x4925
+; CHECK-NEXT: dup v2.8h, w8
+; CHECK-NEXT: smull2 v3.4s, v0.8h, v2.8h
+; CHECK-NEXT: smull v4.4s, v0.4h, v2.4h
+; CHECK-NEXT: smull2 v5.4s, v1.8h, v2.8h
+; CHECK-NEXT: smull v2.4s, v1.4h, v2.4h
+; CHECK-NEXT: uzp2 v3.8h, v4.8h, v3.8h
+; CHECK-NEXT: movi v4.8h, #7
+; CHECK-NEXT: uzp2 v2.8h, v2.8h, v5.8h
+; CHECK-NEXT: sshr v3.8h, v3.8h, #1
+; CHECK-NEXT: sshr v2.8h, v2.8h, #1
+; CHECK-NEXT: usra v3.8h, v3.8h, #15
+; CHECK-NEXT: usra v2.8h, v2.8h, #15
+; CHECK-NEXT: mls v0.8h, v3.8h, v4.8h
+; CHECK-NEXT: mls v1.8h, v2.8h, v4.8h
; CHECK-NEXT: ret
%rem = srem <16 x i16> %x, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
ret <16 x i16> %rem
More information about the llvm-commits
mailing list