[llvm] [DAG] Fold mul 0 -> 0 when expanding mul into parts. (PR #168780)

David Green via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 20 10:08:15 PST 2025


https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/168780

>From 2e3cc1e33afcce97b00fda31fabdc6af97e143e9 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Thu, 20 Nov 2025 18:08:00 +0000
Subject: [PATCH] [DAG] Optimize Mul x, 0 -> 0 in getNode.

This didn't work well on some tests, but they can be fixed with some better
tablegen patterns.
---
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp |   2 +
 llvm/test/CodeGen/AArch64/combine-sdiv.ll     |   3 +-
 llvm/test/CodeGen/AArch64/rem-by-const.ll     | 130 ++++++++++--------
 llvm/test/CodeGen/AArch64/srem-lkk.ll         |  15 +-
 llvm/test/CodeGen/AArch64/srem-vector-lkk.ll  |   8 +-
 llvm/test/CodeGen/RISCV/mul.ll                |  66 ++++-----
 .../Thumb/umulo-128-legalisation-lowering.ll  |  92 +++++--------
 7 files changed, 150 insertions(+), 166 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 8827bff111c22..e7e761a5eb8e6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -7615,6 +7615,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
            N1.getValueType() == VT && "Binary operator types must match!");
     if (VT.getScalarType() == MVT::i1)
       return getNode(ISD::AND, DL, VT, N1, N2);
+    if (N2CV && N2CV->isZero())
+      return N2;
     if (N2C && (N1.getOpcode() == ISD::VSCALE) && Flags.hasNoSignedWrap()) {
       const APInt &MulImm = N1->getConstantOperandAPInt(0);
       const APInt &N2CImm = N2C->getAPIntValue();
diff --git a/llvm/test/CodeGen/AArch64/combine-sdiv.ll b/llvm/test/CodeGen/AArch64/combine-sdiv.ll
index cca190f08df2b..557627bf8eaf1 100644
--- a/llvm/test/CodeGen/AArch64/combine-sdiv.ll
+++ b/llvm/test/CodeGen/AArch64/combine-sdiv.ll
@@ -1674,8 +1674,9 @@ define i32 @combine_i32_sdiv_const100(i32 %x) {
 ; CHECK-SD-NEXT:    mov w8, #34079 // =0x851f
 ; CHECK-SD-NEXT:    movk w8, #20971, lsl #16
 ; CHECK-SD-NEXT:    smull x8, w0, w8
+; CHECK-SD-NEXT:    lsr x9, x8, #63
 ; CHECK-SD-NEXT:    asr x8, x8, #37
-; CHECK-SD-NEXT:    add w0, w8, w8, lsr #31
+; CHECK-SD-NEXT:    add w0, w8, w9
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: combine_i32_sdiv_const100:
diff --git a/llvm/test/CodeGen/AArch64/rem-by-const.ll b/llvm/test/CodeGen/AArch64/rem-by-const.ll
index 87b11086e28d5..d988afd24d15a 100644
--- a/llvm/test/CodeGen/AArch64/rem-by-const.ll
+++ b/llvm/test/CodeGen/AArch64/rem-by-const.ll
@@ -279,11 +279,12 @@ define i32 @si32_100(i32 %a, i32 %b) {
 ; CHECK-SD-LABEL: si32_100:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    mov w8, #34079 // =0x851f
-; CHECK-SD-NEXT:    mov w9, #100 // =0x64
 ; CHECK-SD-NEXT:    movk w8, #20971, lsl #16
 ; CHECK-SD-NEXT:    smull x8, w0, w8
+; CHECK-SD-NEXT:    lsr x9, x8, #63
 ; CHECK-SD-NEXT:    asr x8, x8, #37
-; CHECK-SD-NEXT:    add w8, w8, w8, lsr #31
+; CHECK-SD-NEXT:    add w8, w8, w9
+; CHECK-SD-NEXT:    mov w9, #100 // =0x64
 ; CHECK-SD-NEXT:    msub w0, w8, w9, w0
 ; CHECK-SD-NEXT:    ret
 ;
@@ -723,17 +724,19 @@ entry:
 define <2 x i8> @sv2i8_100(<2 x i8> %d, <2 x i8> %e) {
 ; CHECK-SD-LABEL: sv2i8_100:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    shl v0.2s, v0.2s, #24
 ; CHECK-SD-NEXT:    mov w8, #34079 // =0x851f
-; CHECK-SD-NEXT:    movi v2.2s, #100
+; CHECK-SD-NEXT:    shl v0.2s, v0.2s, #24
+; CHECK-SD-NEXT:    movi v3.2s, #100
 ; CHECK-SD-NEXT:    movk w8, #20971, lsl #16
 ; CHECK-SD-NEXT:    dup v1.2s, w8
 ; CHECK-SD-NEXT:    sshr v0.2s, v0.2s, #24
 ; CHECK-SD-NEXT:    smull v1.2d, v0.2s, v1.2s
+; CHECK-SD-NEXT:    ushr v2.2d, v1.2d, #63
 ; CHECK-SD-NEXT:    sshr v1.2d, v1.2d, #37
+; CHECK-SD-NEXT:    xtn v2.2s, v2.2d
 ; CHECK-SD-NEXT:    xtn v1.2s, v1.2d
-; CHECK-SD-NEXT:    usra v1.2s, v1.2s, #31
-; CHECK-SD-NEXT:    mls v0.2s, v1.2s, v2.2s
+; CHECK-SD-NEXT:    add v1.2s, v1.2s, v2.2s
+; CHECK-SD-NEXT:    mls v0.2s, v1.2s, v3.2s
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sv2i8_100:
@@ -856,22 +859,25 @@ define <3 x i8> @sv3i8_100(<3 x i8> %d, <3 x i8> %e) {
 ; CHECK-SD-NEXT:    sxtb x10, w1
 ; CHECK-SD-NEXT:    movk w9, #20971, lsl #16
 ; CHECK-SD-NEXT:    sxtb x11, w2
-; CHECK-SD-NEXT:    sxtb w12, w0
+; CHECK-SD-NEXT:    mov w12, #100 // =0x64
 ; CHECK-SD-NEXT:    smull x8, w8, w9
 ; CHECK-SD-NEXT:    smull x10, w10, w9
 ; CHECK-SD-NEXT:    smull x9, w11, w9
-; CHECK-SD-NEXT:    mov w11, #100 // =0x64
+; CHECK-SD-NEXT:    lsr x11, x8, #63
 ; CHECK-SD-NEXT:    asr x8, x8, #37
+; CHECK-SD-NEXT:    lsr x13, x10, #63
 ; CHECK-SD-NEXT:    asr x10, x10, #37
+; CHECK-SD-NEXT:    add w8, w8, w11
+; CHECK-SD-NEXT:    lsr x11, x9, #63
 ; CHECK-SD-NEXT:    asr x9, x9, #37
-; CHECK-SD-NEXT:    add w8, w8, w8, lsr #31
-; CHECK-SD-NEXT:    add w10, w10, w10, lsr #31
-; CHECK-SD-NEXT:    add w9, w9, w9, lsr #31
-; CHECK-SD-NEXT:    msub w0, w8, w11, w12
+; CHECK-SD-NEXT:    add w10, w10, w13
+; CHECK-SD-NEXT:    sxtb w13, w0
+; CHECK-SD-NEXT:    msub w0, w8, w12, w13
 ; CHECK-SD-NEXT:    sxtb w8, w1
-; CHECK-SD-NEXT:    msub w1, w10, w11, w8
+; CHECK-SD-NEXT:    add w9, w9, w11
+; CHECK-SD-NEXT:    msub w1, w10, w12, w8
 ; CHECK-SD-NEXT:    sxtb w8, w2
-; CHECK-SD-NEXT:    msub w2, w9, w11, w8
+; CHECK-SD-NEXT:    msub w2, w9, w12, w8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sv3i8_100:
@@ -989,33 +995,37 @@ define <4 x i8> @sv4i8_100(<4 x i8> %d, <4 x i8> %e) {
 ; CHECK-SD-NEXT:    mov w14, #100 // =0x64
 ; CHECK-SD-NEXT:    movk w8, #20971, lsl #16
 ; CHECK-SD-NEXT:    sshr v1.4h, v0.4h, #8
-; CHECK-SD-NEXT:    smov x9, v1.h[0]
-; CHECK-SD-NEXT:    smov x10, v1.h[1]
+; CHECK-SD-NEXT:    smov x10, v1.h[0]
+; CHECK-SD-NEXT:    smov x9, v1.h[1]
 ; CHECK-SD-NEXT:    smov x11, v1.h[2]
-; CHECK-SD-NEXT:    smov w12, v1.h[0]
-; CHECK-SD-NEXT:    smov x13, v1.h[3]
+; CHECK-SD-NEXT:    smov w16, v1.h[0]
 ; CHECK-SD-NEXT:    smov w15, v1.h[1]
-; CHECK-SD-NEXT:    smull x9, w9, w8
 ; CHECK-SD-NEXT:    smull x10, w10, w8
+; CHECK-SD-NEXT:    smull x9, w9, w8
 ; CHECK-SD-NEXT:    smull x11, w11, w8
-; CHECK-SD-NEXT:    asr x9, x9, #37
-; CHECK-SD-NEXT:    smull x8, w13, w8
+; CHECK-SD-NEXT:    lsr x13, x10, #63
 ; CHECK-SD-NEXT:    asr x10, x10, #37
-; CHECK-SD-NEXT:    add w9, w9, w9, lsr #31
+; CHECK-SD-NEXT:    lsr x12, x9, #63
+; CHECK-SD-NEXT:    asr x9, x9, #37
+; CHECK-SD-NEXT:    add w10, w10, w13
+; CHECK-SD-NEXT:    smov x13, v1.h[3]
+; CHECK-SD-NEXT:    msub w10, w10, w14, w16
+; CHECK-SD-NEXT:    add w9, w9, w12
+; CHECK-SD-NEXT:    lsr x12, x11, #63
+; CHECK-SD-NEXT:    msub w9, w9, w14, w15
 ; CHECK-SD-NEXT:    asr x11, x11, #37
-; CHECK-SD-NEXT:    add w10, w10, w10, lsr #31
+; CHECK-SD-NEXT:    fmov s0, w10
+; CHECK-SD-NEXT:    smull x8, w13, w8
+; CHECK-SD-NEXT:    smov w10, v1.h[2]
+; CHECK-SD-NEXT:    mov v0.h[1], w9
+; CHECK-SD-NEXT:    add w9, w11, w12
+; CHECK-SD-NEXT:    smov w11, v1.h[3]
+; CHECK-SD-NEXT:    msub w9, w9, w14, w10
+; CHECK-SD-NEXT:    lsr x10, x8, #63
 ; CHECK-SD-NEXT:    asr x8, x8, #37
-; CHECK-SD-NEXT:    msub w9, w9, w14, w12
-; CHECK-SD-NEXT:    msub w10, w10, w14, w15
-; CHECK-SD-NEXT:    add w8, w8, w8, lsr #31
-; CHECK-SD-NEXT:    fmov s0, w9
-; CHECK-SD-NEXT:    add w9, w11, w11, lsr #31
-; CHECK-SD-NEXT:    smov w11, v1.h[2]
-; CHECK-SD-NEXT:    msub w9, w9, w14, w11
-; CHECK-SD-NEXT:    mov v0.h[1], w10
-; CHECK-SD-NEXT:    smov w10, v1.h[3]
-; CHECK-SD-NEXT:    msub w8, w8, w14, w10
+; CHECK-SD-NEXT:    add w8, w8, w10
 ; CHECK-SD-NEXT:    mov v0.h[2], w9
+; CHECK-SD-NEXT:    msub w8, w8, w14, w11
 ; CHECK-SD-NEXT:    mov v0.h[3], w8
 ; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
@@ -1716,17 +1726,19 @@ entry:
 define <2 x i16> @sv2i16_100(<2 x i16> %d, <2 x i16> %e) {
 ; CHECK-SD-LABEL: sv2i16_100:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    shl v0.2s, v0.2s, #16
 ; CHECK-SD-NEXT:    mov w8, #34079 // =0x851f
-; CHECK-SD-NEXT:    movi v2.2s, #100
+; CHECK-SD-NEXT:    shl v0.2s, v0.2s, #16
+; CHECK-SD-NEXT:    movi v3.2s, #100
 ; CHECK-SD-NEXT:    movk w8, #20971, lsl #16
 ; CHECK-SD-NEXT:    dup v1.2s, w8
 ; CHECK-SD-NEXT:    sshr v0.2s, v0.2s, #16
 ; CHECK-SD-NEXT:    smull v1.2d, v0.2s, v1.2s
+; CHECK-SD-NEXT:    ushr v2.2d, v1.2d, #63
 ; CHECK-SD-NEXT:    sshr v1.2d, v1.2d, #37
+; CHECK-SD-NEXT:    xtn v2.2s, v2.2d
 ; CHECK-SD-NEXT:    xtn v1.2s, v1.2d
-; CHECK-SD-NEXT:    usra v1.2s, v1.2s, #31
-; CHECK-SD-NEXT:    mls v0.2s, v1.2s, v2.2s
+; CHECK-SD-NEXT:    add v1.2s, v1.2s, v2.2s
+; CHECK-SD-NEXT:    mls v0.2s, v1.2s, v3.2s
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sv2i16_100:
@@ -1839,23 +1851,26 @@ define <3 x i16> @sv3i16_100(<3 x i16> %d, <3 x i16> %e) {
 ; CHECK-SD-NEXT:    smov x10, v0.h[1]
 ; CHECK-SD-NEXT:    movk w8, #20971, lsl #16
 ; CHECK-SD-NEXT:    smov x11, v0.h[2]
-; CHECK-SD-NEXT:    mov w12, #100 // =0x64
-; CHECK-SD-NEXT:    smov w13, v0.h[1]
+; CHECK-SD-NEXT:    smov w13, v0.h[0]
+; CHECK-SD-NEXT:    mov w14, #100 // =0x64
 ; CHECK-SD-NEXT:    smull x9, w9, w8
 ; CHECK-SD-NEXT:    smull x10, w10, w8
 ; CHECK-SD-NEXT:    smull x8, w11, w8
-; CHECK-SD-NEXT:    smov w11, v0.h[0]
+; CHECK-SD-NEXT:    lsr x11, x9, #63
 ; CHECK-SD-NEXT:    asr x9, x9, #37
+; CHECK-SD-NEXT:    lsr x12, x10, #63
 ; CHECK-SD-NEXT:    asr x10, x10, #37
-; CHECK-SD-NEXT:    add w9, w9, w9, lsr #31
+; CHECK-SD-NEXT:    add w9, w9, w11
+; CHECK-SD-NEXT:    smov w11, v0.h[1]
+; CHECK-SD-NEXT:    msub w9, w9, w14, w13
+; CHECK-SD-NEXT:    add w10, w10, w12
+; CHECK-SD-NEXT:    smov w12, v0.h[2]
+; CHECK-SD-NEXT:    msub w10, w10, w14, w11
+; CHECK-SD-NEXT:    lsr x11, x8, #63
 ; CHECK-SD-NEXT:    asr x8, x8, #37
-; CHECK-SD-NEXT:    add w10, w10, w10, lsr #31
-; CHECK-SD-NEXT:    msub w9, w9, w12, w11
-; CHECK-SD-NEXT:    smov w11, v0.h[2]
-; CHECK-SD-NEXT:    add w8, w8, w8, lsr #31
-; CHECK-SD-NEXT:    msub w10, w10, w12, w13
-; CHECK-SD-NEXT:    msub w8, w8, w12, w11
 ; CHECK-SD-NEXT:    fmov s0, w9
+; CHECK-SD-NEXT:    add w8, w8, w11
+; CHECK-SD-NEXT:    msub w8, w8, w14, w12
 ; CHECK-SD-NEXT:    mov v0.h[1], w10
 ; CHECK-SD-NEXT:    mov v0.h[2], w8
 ; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
@@ -2407,14 +2422,16 @@ define <2 x i32> @sv2i32_100(<2 x i32> %d, <2 x i32> %e) {
 ; CHECK-SD-LABEL: sv2i32_100:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    mov w8, #34079 // =0x851f
-; CHECK-SD-NEXT:    movi v2.2s, #100
+; CHECK-SD-NEXT:    movi v3.2s, #100
 ; CHECK-SD-NEXT:    movk w8, #20971, lsl #16
 ; CHECK-SD-NEXT:    dup v1.2s, w8
 ; CHECK-SD-NEXT:    smull v1.2d, v0.2s, v1.2s
+; CHECK-SD-NEXT:    ushr v2.2d, v1.2d, #63
 ; CHECK-SD-NEXT:    sshr v1.2d, v1.2d, #37
+; CHECK-SD-NEXT:    xtn v2.2s, v2.2d
 ; CHECK-SD-NEXT:    xtn v1.2s, v1.2d
-; CHECK-SD-NEXT:    usra v1.2s, v1.2s, #31
-; CHECK-SD-NEXT:    mls v0.2s, v1.2s, v2.2s
+; CHECK-SD-NEXT:    add v1.2s, v1.2s, v2.2s
+; CHECK-SD-NEXT:    mls v0.2s, v1.2s, v3.2s
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sv2i32_100:
@@ -2492,19 +2509,22 @@ define <3 x i32> @sv3i32_100(<3 x i32> %d, <3 x i32> %e) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    mov w8, #34079 // =0x851f
 ; CHECK-SD-NEXT:    mov w9, v0.s[2]
-; CHECK-SD-NEXT:    movi v2.2s, #100
+; CHECK-SD-NEXT:    movi v3.2s, #100
 ; CHECK-SD-NEXT:    movk w8, #20971, lsl #16
-; CHECK-SD-NEXT:    mov w10, #100 // =0x64
 ; CHECK-SD-NEXT:    dup v1.2s, w8
 ; CHECK-SD-NEXT:    smull x8, w9, w8
 ; CHECK-SD-NEXT:    smull v1.2d, v0.2s, v1.2s
+; CHECK-SD-NEXT:    lsr x10, x8, #63
 ; CHECK-SD-NEXT:    asr x8, x8, #37
-; CHECK-SD-NEXT:    add w8, w8, w8, lsr #31
+; CHECK-SD-NEXT:    add w8, w8, w10
+; CHECK-SD-NEXT:    mov w10, #100 // =0x64
+; CHECK-SD-NEXT:    ushr v2.2d, v1.2d, #63
 ; CHECK-SD-NEXT:    sshr v1.2d, v1.2d, #37
 ; CHECK-SD-NEXT:    msub w8, w8, w10, w9
+; CHECK-SD-NEXT:    xtn v2.2s, v2.2d
 ; CHECK-SD-NEXT:    xtn v1.2s, v1.2d
-; CHECK-SD-NEXT:    usra v1.2s, v1.2s, #31
-; CHECK-SD-NEXT:    mls v0.2s, v1.2s, v2.2s
+; CHECK-SD-NEXT:    add v1.2s, v1.2s, v2.2s
+; CHECK-SD-NEXT:    mls v0.2s, v1.2s, v3.2s
 ; CHECK-SD-NEXT:    mov v0.s[2], w8
 ; CHECK-SD-NEXT:    ret
 ;
diff --git a/llvm/test/CodeGen/AArch64/srem-lkk.ll b/llvm/test/CodeGen/AArch64/srem-lkk.ll
index 1223ae3a15e7b..d9f91449dffb8 100644
--- a/llvm/test/CodeGen/AArch64/srem-lkk.ll
+++ b/llvm/test/CodeGen/AArch64/srem-lkk.ll
@@ -23,11 +23,12 @@ define i32 @fold_srem_positive_even(i32 %x) {
 ; CHECK-LABEL: fold_srem_positive_even:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #36849 // =0x8ff1
-; CHECK-NEXT:    mov w9, #1060 // =0x424
 ; CHECK-NEXT:    movk w8, #15827, lsl #16
 ; CHECK-NEXT:    smull x8, w0, w8
+; CHECK-NEXT:    lsr x9, x8, #63
 ; CHECK-NEXT:    asr x8, x8, #40
-; CHECK-NEXT:    add w8, w8, w8, lsr #31
+; CHECK-NEXT:    add w8, w8, w9
+; CHECK-NEXT:    mov w9, #1060 // =0x424
 ; CHECK-NEXT:    msub w0, w8, w9, w0
 ; CHECK-NEXT:    ret
   %1 = srem i32 %x, 1060
@@ -39,11 +40,12 @@ define i32 @fold_srem_negative_odd(i32 %x) {
 ; CHECK-LABEL: fold_srem_negative_odd:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #65445 // =0xffa5
-; CHECK-NEXT:    mov w9, #-723 // =0xfffffd2d
 ; CHECK-NEXT:    movk w8, #42330, lsl #16
 ; CHECK-NEXT:    smull x8, w0, w8
+; CHECK-NEXT:    lsr x9, x8, #63
 ; CHECK-NEXT:    asr x8, x8, #40
-; CHECK-NEXT:    add w8, w8, w8, lsr #31
+; CHECK-NEXT:    add w8, w8, w9
+; CHECK-NEXT:    mov w9, #-723 // =0xfffffd2d
 ; CHECK-NEXT:    msub w0, w8, w9, w0
 ; CHECK-NEXT:    ret
   %1 = srem i32 %x, -723
@@ -55,11 +57,12 @@ define i32 @fold_srem_negative_even(i32 %x) {
 ; CHECK-LABEL: fold_srem_negative_even:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #62439 // =0xf3e7
-; CHECK-NEXT:    mov w9, #-22981 // =0xffffa63b
 ; CHECK-NEXT:    movk w8, #64805, lsl #16
 ; CHECK-NEXT:    smull x8, w0, w8
+; CHECK-NEXT:    lsr x9, x8, #63
 ; CHECK-NEXT:    asr x8, x8, #40
-; CHECK-NEXT:    add w8, w8, w8, lsr #31
+; CHECK-NEXT:    add w8, w8, w9
+; CHECK-NEXT:    mov w9, #-22981 // =0xffffa63b
 ; CHECK-NEXT:    msub w0, w8, w9, w0
 ; CHECK-NEXT:    ret
   %1 = srem i32 %x, -22981
diff --git a/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll b/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll
index b165ac0d56d20..a74f0c86fe185 100644
--- a/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll
+++ b/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll
@@ -263,14 +263,16 @@ define <2 x i32> @fold_srem_v2i32(<2 x i32> %x) {
 ; CHECK-LABEL: fold_srem_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #26215 // =0x6667
-; CHECK-NEXT:    movi v2.2s, #10
+; CHECK-NEXT:    movi v3.2s, #10
 ; CHECK-NEXT:    movk w8, #26214, lsl #16
 ; CHECK-NEXT:    dup v1.2s, w8
 ; CHECK-NEXT:    smull v1.2d, v0.2s, v1.2s
+; CHECK-NEXT:    ushr v2.2d, v1.2d, #63
 ; CHECK-NEXT:    sshr v1.2d, v1.2d, #34
+; CHECK-NEXT:    xtn v2.2s, v2.2d
 ; CHECK-NEXT:    xtn v1.2s, v1.2d
-; CHECK-NEXT:    usra v1.2s, v1.2s, #31
-; CHECK-NEXT:    mls v0.2s, v1.2s, v2.2s
+; CHECK-NEXT:    add v1.2s, v1.2s, v2.2s
+; CHECK-NEXT:    mls v0.2s, v1.2s, v3.2s
 ; CHECK-NEXT:    ret
   %1 = srem <2 x i32> %x, <i32 10, i32 10>
   ret <2 x i32> %1
diff --git a/llvm/test/CodeGen/RISCV/mul.ll b/llvm/test/CodeGen/RISCV/mul.ll
index 4533e14c672e7..d691b1c278a48 100644
--- a/llvm/test/CodeGen/RISCV/mul.ll
+++ b/llvm/test/CodeGen/RISCV/mul.ll
@@ -1829,67 +1829,53 @@ define i64 @mulhsu_i64(i64 %a, i64 %b) nounwind {
 ; RV32I-NEXT:    sw s5, 20(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s6, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s7, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s8, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s9, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a3
-; RV32I-NEXT:    mv s3, a2
-; RV32I-NEXT:    mv s0, a1
-; RV32I-NEXT:    mv s1, a0
+; RV32I-NEXT:    mv s0, a3
+; RV32I-NEXT:    mv s1, a2
+; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s3, a0
 ; RV32I-NEXT:    srai s4, a3, 31
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    li a3, 0
 ; RV32I-NEXT:    call __muldi3
 ; RV32I-NEXT:    mv s5, a1
-; RV32I-NEXT:    mv a0, s0
+; RV32I-NEXT:    mv a0, s2
 ; RV32I-NEXT:    li a1, 0
-; RV32I-NEXT:    mv a2, s3
+; RV32I-NEXT:    mv a2, s1
 ; RV32I-NEXT:    li a3, 0
 ; RV32I-NEXT:    call __muldi3
 ; RV32I-NEXT:    add s5, a0, s5
 ; RV32I-NEXT:    sltu a0, s5, a0
-; RV32I-NEXT:    add s7, a1, a0
-; RV32I-NEXT:    mv a0, s1
+; RV32I-NEXT:    add s6, a1, a0
+; RV32I-NEXT:    mv a0, s3
 ; RV32I-NEXT:    li a1, 0
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s0
 ; RV32I-NEXT:    li a3, 0
 ; RV32I-NEXT:    call __muldi3
 ; RV32I-NEXT:    add s5, a0, s5
 ; RV32I-NEXT:    sltu a0, s5, a0
 ; RV32I-NEXT:    add a0, a1, a0
-; RV32I-NEXT:    add s8, s7, a0
-; RV32I-NEXT:    mv a0, s0
+; RV32I-NEXT:    add s5, s6, a0
+; RV32I-NEXT:    mv a0, s2
 ; RV32I-NEXT:    li a1, 0
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s0
 ; RV32I-NEXT:    li a3, 0
 ; RV32I-NEXT:    call __muldi3
-; RV32I-NEXT:    mv s5, a0
-; RV32I-NEXT:    mv s6, a1
-; RV32I-NEXT:    add s9, a0, s8
-; RV32I-NEXT:    mv a0, s3
-; RV32I-NEXT:    mv a1, s2
-; RV32I-NEXT:    li a2, 0
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    call __muldi3
-; RV32I-NEXT:    mv s2, a0
-; RV32I-NEXT:    mv s3, a1
+; RV32I-NEXT:    mv s0, a0
+; RV32I-NEXT:    mv s1, a1
+; RV32I-NEXT:    add s7, a0, s5
 ; RV32I-NEXT:    mv a0, s4
 ; RV32I-NEXT:    mv a1, s4
-; RV32I-NEXT:    mv a2, s1
-; RV32I-NEXT:    mv a3, s0
+; RV32I-NEXT:    mv a2, s3
+; RV32I-NEXT:    mv a3, s2
 ; RV32I-NEXT:    call __muldi3
-; RV32I-NEXT:    add s2, a0, s2
-; RV32I-NEXT:    sltu a3, s9, s5
-; RV32I-NEXT:    sltu a4, s8, s7
-; RV32I-NEXT:    add a1, a1, s3
-; RV32I-NEXT:    add a2, s9, s2
-; RV32I-NEXT:    add a4, s6, a4
-; RV32I-NEXT:    sltu a0, s2, a0
-; RV32I-NEXT:    sltu a5, a2, s9
-; RV32I-NEXT:    add a3, a4, a3
-; RV32I-NEXT:    add a0, a1, a0
-; RV32I-NEXT:    add a0, a3, a0
-; RV32I-NEXT:    add a1, a0, a5
-; RV32I-NEXT:    mv a0, a2
+; RV32I-NEXT:    add a0, s7, a0
+; RV32I-NEXT:    sltu a2, s7, s0
+; RV32I-NEXT:    sltu a3, s5, s6
+; RV32I-NEXT:    sltu a4, a0, s7
+; RV32I-NEXT:    add a3, s1, a3
+; RV32I-NEXT:    add a2, a3, a2
+; RV32I-NEXT:    add a1, a2, a1
+; RV32I-NEXT:    add a1, a1, a4
 ; RV32I-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s1, 36(sp) # 4-byte Folded Reload
@@ -1899,8 +1885,6 @@ define i64 @mulhsu_i64(i64 %a, i64 %b) nounwind {
 ; RV32I-NEXT:    lw s5, 20(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s6, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s7, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s8, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s9, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 48
 ; RV32I-NEXT:    ret
 ;
diff --git a/llvm/test/CodeGen/Thumb/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/Thumb/umulo-128-legalisation-lowering.ll
index 9b5fa1c2bc811..94080c02ded80 100644
--- a/llvm/test/CodeGen/Thumb/umulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/Thumb/umulo-128-legalisation-lowering.ll
@@ -10,11 +10,11 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
 ; THUMBV6-NEXT:    sub sp, #60
 ; THUMBV6-NEXT:    mov r6, r3
 ; THUMBV6-NEXT:    mov r1, r2
-; THUMBV6-NEXT:    str r2, [sp, #52] @ 4-byte Spill
+; THUMBV6-NEXT:    str r2, [sp, #36] @ 4-byte Spill
 ; THUMBV6-NEXT:    mov r4, r0
-; THUMBV6-NEXT:    str r0, [sp, #40] @ 4-byte Spill
+; THUMBV6-NEXT:    str r0, [sp, #48] @ 4-byte Spill
 ; THUMBV6-NEXT:    ldr r2, [sp, #88]
-; THUMBV6-NEXT:    str r2, [sp, #48] @ 4-byte Spill
+; THUMBV6-NEXT:    str r2, [sp, #56] @ 4-byte Spill
 ; THUMBV6-NEXT:    movs r5, #0
 ; THUMBV6-NEXT:    mov r0, r1
 ; THUMBV6-NEXT:    mov r1, r5
@@ -23,21 +23,21 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
 ; THUMBV6-NEXT:    str r1, [sp, #28] @ 4-byte Spill
 ; THUMBV6-NEXT:    str r0, [r4]
 ; THUMBV6-NEXT:    ldr r2, [sp, #96]
-; THUMBV6-NEXT:    str r2, [sp, #36] @ 4-byte Spill
+; THUMBV6-NEXT:    str r2, [sp, #40] @ 4-byte Spill
 ; THUMBV6-NEXT:    mov r4, r6
-; THUMBV6-NEXT:    str r6, [sp, #56] @ 4-byte Spill
+; THUMBV6-NEXT:    str r6, [sp, #44] @ 4-byte Spill
 ; THUMBV6-NEXT:    mov r0, r6
 ; THUMBV6-NEXT:    mov r1, r5
 ; THUMBV6-NEXT:    mov r3, r5
 ; THUMBV6-NEXT:    bl __aeabi_lmul
-; THUMBV6-NEXT:    str r0, [sp, #44] @ 4-byte Spill
+; THUMBV6-NEXT:    str r0, [sp, #52] @ 4-byte Spill
 ; THUMBV6-NEXT:    mov r7, r1
 ; THUMBV6-NEXT:    subs r0, r1, #1
 ; THUMBV6-NEXT:    sbcs r7, r0
 ; THUMBV6-NEXT:    ldr r0, [sp, #100]
 ; THUMBV6-NEXT:    str r0, [sp, #32] @ 4-byte Spill
 ; THUMBV6-NEXT:    mov r1, r5
-; THUMBV6-NEXT:    ldr r6, [sp, #52] @ 4-byte Reload
+; THUMBV6-NEXT:    ldr r6, [sp, #36] @ 4-byte Reload
 ; THUMBV6-NEXT:    mov r2, r6
 ; THUMBV6-NEXT:    mov r3, r5
 ; THUMBV6-NEXT:    bl __aeabi_lmul
@@ -53,10 +53,10 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
 ; THUMBV6-NEXT:    ands r4, r3
 ; THUMBV6-NEXT:    orrs r4, r1
 ; THUMBV6-NEXT:    orrs r4, r7
-; THUMBV6-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
+; THUMBV6-NEXT:    ldr r0, [sp, #52] @ 4-byte Reload
 ; THUMBV6-NEXT:    ldr r1, [sp, #24] @ 4-byte Reload
 ; THUMBV6-NEXT:    adds r7, r1, r0
-; THUMBV6-NEXT:    ldr r0, [sp, #36] @ 4-byte Reload
+; THUMBV6-NEXT:    ldr r0, [sp, #40] @ 4-byte Reload
 ; THUMBV6-NEXT:    mov r1, r5
 ; THUMBV6-NEXT:    mov r2, r6
 ; THUMBV6-NEXT:    mov r3, r5
@@ -69,7 +69,7 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
 ; THUMBV6-NEXT:    orrs r0, r4
 ; THUMBV6-NEXT:    str r0, [sp, #16] @ 4-byte Spill
 ; THUMBV6-NEXT:    ldr r0, [sp, #92]
-; THUMBV6-NEXT:    str r0, [sp, #44] @ 4-byte Spill
+; THUMBV6-NEXT:    str r0, [sp, #52] @ 4-byte Spill
 ; THUMBV6-NEXT:    ldr r7, [sp, #80]
 ; THUMBV6-NEXT:    mov r1, r5
 ; THUMBV6-NEXT:    mov r2, r7
@@ -82,13 +82,13 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
 ; THUMBV6-NEXT:    ldr r6, [sp, #84]
 ; THUMBV6-NEXT:    mov r0, r6
 ; THUMBV6-NEXT:    mov r1, r5
-; THUMBV6-NEXT:    ldr r2, [sp, #48] @ 4-byte Reload
+; THUMBV6-NEXT:    ldr r2, [sp, #56] @ 4-byte Reload
 ; THUMBV6-NEXT:    mov r3, r5
 ; THUMBV6-NEXT:    bl __aeabi_lmul
 ; THUMBV6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
 ; THUMBV6-NEXT:    subs r2, r1, #1
 ; THUMBV6-NEXT:    sbcs r1, r2
-; THUMBV6-NEXT:    ldr r3, [sp, #44] @ 4-byte Reload
+; THUMBV6-NEXT:    ldr r3, [sp, #52] @ 4-byte Reload
 ; THUMBV6-NEXT:    subs r2, r3, #1
 ; THUMBV6-NEXT:    sbcs r3, r2
 ; THUMBV6-NEXT:    str r6, [sp, #8] @ 4-byte Spill
@@ -99,21 +99,17 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
 ; THUMBV6-NEXT:    orrs r6, r4
 ; THUMBV6-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
 ; THUMBV6-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
-; THUMBV6-NEXT:    adds r0, r1, r0
-; THUMBV6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; THUMBV6-NEXT:    adds r4, r1, r0
 ; THUMBV6-NEXT:    mov r0, r7
 ; THUMBV6-NEXT:    mov r1, r5
-; THUMBV6-NEXT:    ldr r4, [sp, #48] @ 4-byte Reload
-; THUMBV6-NEXT:    mov r2, r4
+; THUMBV6-NEXT:    ldr r2, [sp, #56] @ 4-byte Reload
 ; THUMBV6-NEXT:    mov r3, r5
 ; THUMBV6-NEXT:    bl __aeabi_lmul
-; THUMBV6-NEXT:    str r0, [sp, #12] @ 4-byte Spill
-; THUMBV6-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; THUMBV6-NEXT:    adds r0, r1, r0
+; THUMBV6-NEXT:    adds r4, r1, r4
 ; THUMBV6-NEXT:    mov r1, r5
 ; THUMBV6-NEXT:    adcs r1, r5
 ; THUMBV6-NEXT:    orrs r1, r6
-; THUMBV6-NEXT:    ldr r3, [sp, #36] @ 4-byte Reload
+; THUMBV6-NEXT:    ldr r3, [sp, #40] @ 4-byte Reload
 ; THUMBV6-NEXT:    ldr r2, [sp, #32] @ 4-byte Reload
 ; THUMBV6-NEXT:    orrs r3, r2
 ; THUMBV6-NEXT:    subs r2, r3, #1
@@ -127,68 +123,44 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
 ; THUMBV6-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
 ; THUMBV6-NEXT:    orrs r7, r1
 ; THUMBV6-NEXT:    ldr r1, [sp, #24] @ 4-byte Reload
-; THUMBV6-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
-; THUMBV6-NEXT:    adds r1, r2, r1
-; THUMBV6-NEXT:    str r1, [sp, #32] @ 4-byte Spill
-; THUMBV6-NEXT:    ldr r1, [sp, #20] @ 4-byte Reload
-; THUMBV6-NEXT:    adcs r0, r1
-; THUMBV6-NEXT:    str r0, [sp, #36] @ 4-byte Spill
-; THUMBV6-NEXT:    ldr r0, [sp, #56] @ 4-byte Reload
+; THUMBV6-NEXT:    adds r0, r0, r1
+; THUMBV6-NEXT:    str r0, [sp, #32] @ 4-byte Spill
+; THUMBV6-NEXT:    ldr r0, [sp, #20] @ 4-byte Reload
+; THUMBV6-NEXT:    adcs r4, r0
+; THUMBV6-NEXT:    str r4, [sp, #40] @ 4-byte Spill
+; THUMBV6-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
 ; THUMBV6-NEXT:    mov r1, r5
-; THUMBV6-NEXT:    mov r2, r4
+; THUMBV6-NEXT:    ldr r2, [sp, #56] @ 4-byte Reload
 ; THUMBV6-NEXT:    mov r3, r5
 ; THUMBV6-NEXT:    bl __aeabi_lmul
 ; THUMBV6-NEXT:    mov r4, r1
 ; THUMBV6-NEXT:    ldr r1, [sp, #28] @ 4-byte Reload
 ; THUMBV6-NEXT:    adds r6, r0, r1
 ; THUMBV6-NEXT:    adcs r4, r5
-; THUMBV6-NEXT:    ldr r0, [sp, #52] @ 4-byte Reload
+; THUMBV6-NEXT:    ldr r0, [sp, #36] @ 4-byte Reload
 ; THUMBV6-NEXT:    mov r1, r5
-; THUMBV6-NEXT:    ldr r2, [sp, #44] @ 4-byte Reload
+; THUMBV6-NEXT:    ldr r2, [sp, #52] @ 4-byte Reload
 ; THUMBV6-NEXT:    mov r3, r5
 ; THUMBV6-NEXT:    bl __aeabi_lmul
 ; THUMBV6-NEXT:    adds r0, r0, r6
-; THUMBV6-NEXT:    ldr r2, [sp, #40] @ 4-byte Reload
+; THUMBV6-NEXT:    ldr r2, [sp, #48] @ 4-byte Reload
 ; THUMBV6-NEXT:    str r0, [r2, #4]
 ; THUMBV6-NEXT:    adcs r1, r5
-; THUMBV6-NEXT:    adds r0, r4, r1
-; THUMBV6-NEXT:    str r0, [sp, #28] @ 4-byte Spill
+; THUMBV6-NEXT:    adds r4, r4, r1
 ; THUMBV6-NEXT:    mov r6, r5
 ; THUMBV6-NEXT:    adcs r6, r5
-; THUMBV6-NEXT:    ldr r0, [sp, #56] @ 4-byte Reload
+; THUMBV6-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
 ; THUMBV6-NEXT:    mov r1, r5
-; THUMBV6-NEXT:    ldr r4, [sp, #44] @ 4-byte Reload
-; THUMBV6-NEXT:    mov r2, r4
+; THUMBV6-NEXT:    ldr r2, [sp, #52] @ 4-byte Reload
 ; THUMBV6-NEXT:    mov r3, r5
 ; THUMBV6-NEXT:    bl __aeabi_lmul
-; THUMBV6-NEXT:    ldr r2, [sp, #28] @ 4-byte Reload
-; THUMBV6-NEXT:    adds r0, r0, r2
-; THUMBV6-NEXT:    str r0, [sp, #28] @ 4-byte Spill
+; THUMBV6-NEXT:    adds r0, r0, r4
 ; THUMBV6-NEXT:    adcs r1, r6
-; THUMBV6-NEXT:    str r1, [sp, #24] @ 4-byte Spill
-; THUMBV6-NEXT:    ldr r0, [sp, #48] @ 4-byte Reload
-; THUMBV6-NEXT:    mov r1, r4
-; THUMBV6-NEXT:    mov r2, r5
-; THUMBV6-NEXT:    mov r3, r5
-; THUMBV6-NEXT:    bl __aeabi_lmul
-; THUMBV6-NEXT:    mov r6, r0
-; THUMBV6-NEXT:    mov r4, r1
-; THUMBV6-NEXT:    ldr r0, [sp, #52] @ 4-byte Reload
-; THUMBV6-NEXT:    ldr r1, [sp, #56] @ 4-byte Reload
-; THUMBV6-NEXT:    mov r2, r5
-; THUMBV6-NEXT:    mov r3, r5
-; THUMBV6-NEXT:    bl __aeabi_lmul
-; THUMBV6-NEXT:    adds r0, r0, r6
-; THUMBV6-NEXT:    adcs r1, r4
-; THUMBV6-NEXT:    ldr r2, [sp, #28] @ 4-byte Reload
-; THUMBV6-NEXT:    adds r0, r2, r0
-; THUMBV6-NEXT:    ldr r2, [sp, #24] @ 4-byte Reload
-; THUMBV6-NEXT:    adcs r1, r2
 ; THUMBV6-NEXT:    ldr r2, [sp, #32] @ 4-byte Reload
 ; THUMBV6-NEXT:    adds r0, r0, r2
-; THUMBV6-NEXT:    ldr r2, [sp, #40] @ 4-byte Reload
+; THUMBV6-NEXT:    ldr r2, [sp, #48] @ 4-byte Reload
 ; THUMBV6-NEXT:    str r0, [r2, #8]
-; THUMBV6-NEXT:    ldr r0, [sp, #36] @ 4-byte Reload
+; THUMBV6-NEXT:    ldr r0, [sp, #40] @ 4-byte Reload
 ; THUMBV6-NEXT:    adcs r1, r0
 ; THUMBV6-NEXT:    str r1, [r2, #12]
 ; THUMBV6-NEXT:    adcs r5, r5



More information about the llvm-commits mailing list