[llvm] [DAG] Fold mul 0 -> 0 when expanding mul into parts. (PR #168780)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 20 10:08:15 PST 2025
https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/168780
>From 2e3cc1e33afcce97b00fda31fabdc6af97e143e9 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Thu, 20 Nov 2025 18:08:00 +0000
Subject: [PATCH] [DAG] Optimize Mul x, 0 -> 0 in getNode.
This didn't work well on some tests, but they can be fixed with some better
tablegen patterns.
---
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 2 +
llvm/test/CodeGen/AArch64/combine-sdiv.ll | 3 +-
llvm/test/CodeGen/AArch64/rem-by-const.ll | 130 ++++++++++--------
llvm/test/CodeGen/AArch64/srem-lkk.ll | 15 +-
llvm/test/CodeGen/AArch64/srem-vector-lkk.ll | 8 +-
llvm/test/CodeGen/RISCV/mul.ll | 66 ++++-----
.../Thumb/umulo-128-legalisation-lowering.ll | 92 +++++--------
7 files changed, 150 insertions(+), 166 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 8827bff111c22..e7e761a5eb8e6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -7615,6 +7615,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
N1.getValueType() == VT && "Binary operator types must match!");
if (VT.getScalarType() == MVT::i1)
return getNode(ISD::AND, DL, VT, N1, N2);
+ if (N2CV && N2CV->isZero())
+ return N2;
if (N2C && (N1.getOpcode() == ISD::VSCALE) && Flags.hasNoSignedWrap()) {
const APInt &MulImm = N1->getConstantOperandAPInt(0);
const APInt &N2CImm = N2C->getAPIntValue();
diff --git a/llvm/test/CodeGen/AArch64/combine-sdiv.ll b/llvm/test/CodeGen/AArch64/combine-sdiv.ll
index cca190f08df2b..557627bf8eaf1 100644
--- a/llvm/test/CodeGen/AArch64/combine-sdiv.ll
+++ b/llvm/test/CodeGen/AArch64/combine-sdiv.ll
@@ -1674,8 +1674,9 @@ define i32 @combine_i32_sdiv_const100(i32 %x) {
; CHECK-SD-NEXT: mov w8, #34079 // =0x851f
; CHECK-SD-NEXT: movk w8, #20971, lsl #16
; CHECK-SD-NEXT: smull x8, w0, w8
+; CHECK-SD-NEXT: lsr x9, x8, #63
; CHECK-SD-NEXT: asr x8, x8, #37
-; CHECK-SD-NEXT: add w0, w8, w8, lsr #31
+; CHECK-SD-NEXT: add w0, w8, w9
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: combine_i32_sdiv_const100:
diff --git a/llvm/test/CodeGen/AArch64/rem-by-const.ll b/llvm/test/CodeGen/AArch64/rem-by-const.ll
index 87b11086e28d5..d988afd24d15a 100644
--- a/llvm/test/CodeGen/AArch64/rem-by-const.ll
+++ b/llvm/test/CodeGen/AArch64/rem-by-const.ll
@@ -279,11 +279,12 @@ define i32 @si32_100(i32 %a, i32 %b) {
; CHECK-SD-LABEL: si32_100:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: mov w8, #34079 // =0x851f
-; CHECK-SD-NEXT: mov w9, #100 // =0x64
; CHECK-SD-NEXT: movk w8, #20971, lsl #16
; CHECK-SD-NEXT: smull x8, w0, w8
+; CHECK-SD-NEXT: lsr x9, x8, #63
; CHECK-SD-NEXT: asr x8, x8, #37
-; CHECK-SD-NEXT: add w8, w8, w8, lsr #31
+; CHECK-SD-NEXT: add w8, w8, w9
+; CHECK-SD-NEXT: mov w9, #100 // =0x64
; CHECK-SD-NEXT: msub w0, w8, w9, w0
; CHECK-SD-NEXT: ret
;
@@ -723,17 +724,19 @@ entry:
define <2 x i8> @sv2i8_100(<2 x i8> %d, <2 x i8> %e) {
; CHECK-SD-LABEL: sv2i8_100:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: shl v0.2s, v0.2s, #24
; CHECK-SD-NEXT: mov w8, #34079 // =0x851f
-; CHECK-SD-NEXT: movi v2.2s, #100
+; CHECK-SD-NEXT: shl v0.2s, v0.2s, #24
+; CHECK-SD-NEXT: movi v3.2s, #100
; CHECK-SD-NEXT: movk w8, #20971, lsl #16
; CHECK-SD-NEXT: dup v1.2s, w8
; CHECK-SD-NEXT: sshr v0.2s, v0.2s, #24
; CHECK-SD-NEXT: smull v1.2d, v0.2s, v1.2s
+; CHECK-SD-NEXT: ushr v2.2d, v1.2d, #63
; CHECK-SD-NEXT: sshr v1.2d, v1.2d, #37
+; CHECK-SD-NEXT: xtn v2.2s, v2.2d
; CHECK-SD-NEXT: xtn v1.2s, v1.2d
-; CHECK-SD-NEXT: usra v1.2s, v1.2s, #31
-; CHECK-SD-NEXT: mls v0.2s, v1.2s, v2.2s
+; CHECK-SD-NEXT: add v1.2s, v1.2s, v2.2s
+; CHECK-SD-NEXT: mls v0.2s, v1.2s, v3.2s
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sv2i8_100:
@@ -856,22 +859,25 @@ define <3 x i8> @sv3i8_100(<3 x i8> %d, <3 x i8> %e) {
; CHECK-SD-NEXT: sxtb x10, w1
; CHECK-SD-NEXT: movk w9, #20971, lsl #16
; CHECK-SD-NEXT: sxtb x11, w2
-; CHECK-SD-NEXT: sxtb w12, w0
+; CHECK-SD-NEXT: mov w12, #100 // =0x64
; CHECK-SD-NEXT: smull x8, w8, w9
; CHECK-SD-NEXT: smull x10, w10, w9
; CHECK-SD-NEXT: smull x9, w11, w9
-; CHECK-SD-NEXT: mov w11, #100 // =0x64
+; CHECK-SD-NEXT: lsr x11, x8, #63
; CHECK-SD-NEXT: asr x8, x8, #37
+; CHECK-SD-NEXT: lsr x13, x10, #63
; CHECK-SD-NEXT: asr x10, x10, #37
+; CHECK-SD-NEXT: add w8, w8, w11
+; CHECK-SD-NEXT: lsr x11, x9, #63
; CHECK-SD-NEXT: asr x9, x9, #37
-; CHECK-SD-NEXT: add w8, w8, w8, lsr #31
-; CHECK-SD-NEXT: add w10, w10, w10, lsr #31
-; CHECK-SD-NEXT: add w9, w9, w9, lsr #31
-; CHECK-SD-NEXT: msub w0, w8, w11, w12
+; CHECK-SD-NEXT: add w10, w10, w13
+; CHECK-SD-NEXT: sxtb w13, w0
+; CHECK-SD-NEXT: msub w0, w8, w12, w13
; CHECK-SD-NEXT: sxtb w8, w1
-; CHECK-SD-NEXT: msub w1, w10, w11, w8
+; CHECK-SD-NEXT: add w9, w9, w11
+; CHECK-SD-NEXT: msub w1, w10, w12, w8
; CHECK-SD-NEXT: sxtb w8, w2
-; CHECK-SD-NEXT: msub w2, w9, w11, w8
+; CHECK-SD-NEXT: msub w2, w9, w12, w8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sv3i8_100:
@@ -989,33 +995,37 @@ define <4 x i8> @sv4i8_100(<4 x i8> %d, <4 x i8> %e) {
; CHECK-SD-NEXT: mov w14, #100 // =0x64
; CHECK-SD-NEXT: movk w8, #20971, lsl #16
; CHECK-SD-NEXT: sshr v1.4h, v0.4h, #8
-; CHECK-SD-NEXT: smov x9, v1.h[0]
-; CHECK-SD-NEXT: smov x10, v1.h[1]
+; CHECK-SD-NEXT: smov x10, v1.h[0]
+; CHECK-SD-NEXT: smov x9, v1.h[1]
; CHECK-SD-NEXT: smov x11, v1.h[2]
-; CHECK-SD-NEXT: smov w12, v1.h[0]
-; CHECK-SD-NEXT: smov x13, v1.h[3]
+; CHECK-SD-NEXT: smov w16, v1.h[0]
; CHECK-SD-NEXT: smov w15, v1.h[1]
-; CHECK-SD-NEXT: smull x9, w9, w8
; CHECK-SD-NEXT: smull x10, w10, w8
+; CHECK-SD-NEXT: smull x9, w9, w8
; CHECK-SD-NEXT: smull x11, w11, w8
-; CHECK-SD-NEXT: asr x9, x9, #37
-; CHECK-SD-NEXT: smull x8, w13, w8
+; CHECK-SD-NEXT: lsr x13, x10, #63
; CHECK-SD-NEXT: asr x10, x10, #37
-; CHECK-SD-NEXT: add w9, w9, w9, lsr #31
+; CHECK-SD-NEXT: lsr x12, x9, #63
+; CHECK-SD-NEXT: asr x9, x9, #37
+; CHECK-SD-NEXT: add w10, w10, w13
+; CHECK-SD-NEXT: smov x13, v1.h[3]
+; CHECK-SD-NEXT: msub w10, w10, w14, w16
+; CHECK-SD-NEXT: add w9, w9, w12
+; CHECK-SD-NEXT: lsr x12, x11, #63
+; CHECK-SD-NEXT: msub w9, w9, w14, w15
; CHECK-SD-NEXT: asr x11, x11, #37
-; CHECK-SD-NEXT: add w10, w10, w10, lsr #31
+; CHECK-SD-NEXT: fmov s0, w10
+; CHECK-SD-NEXT: smull x8, w13, w8
+; CHECK-SD-NEXT: smov w10, v1.h[2]
+; CHECK-SD-NEXT: mov v0.h[1], w9
+; CHECK-SD-NEXT: add w9, w11, w12
+; CHECK-SD-NEXT: smov w11, v1.h[3]
+; CHECK-SD-NEXT: msub w9, w9, w14, w10
+; CHECK-SD-NEXT: lsr x10, x8, #63
; CHECK-SD-NEXT: asr x8, x8, #37
-; CHECK-SD-NEXT: msub w9, w9, w14, w12
-; CHECK-SD-NEXT: msub w10, w10, w14, w15
-; CHECK-SD-NEXT: add w8, w8, w8, lsr #31
-; CHECK-SD-NEXT: fmov s0, w9
-; CHECK-SD-NEXT: add w9, w11, w11, lsr #31
-; CHECK-SD-NEXT: smov w11, v1.h[2]
-; CHECK-SD-NEXT: msub w9, w9, w14, w11
-; CHECK-SD-NEXT: mov v0.h[1], w10
-; CHECK-SD-NEXT: smov w10, v1.h[3]
-; CHECK-SD-NEXT: msub w8, w8, w14, w10
+; CHECK-SD-NEXT: add w8, w8, w10
; CHECK-SD-NEXT: mov v0.h[2], w9
+; CHECK-SD-NEXT: msub w8, w8, w14, w11
; CHECK-SD-NEXT: mov v0.h[3], w8
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT: ret
@@ -1716,17 +1726,19 @@ entry:
define <2 x i16> @sv2i16_100(<2 x i16> %d, <2 x i16> %e) {
; CHECK-SD-LABEL: sv2i16_100:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: shl v0.2s, v0.2s, #16
; CHECK-SD-NEXT: mov w8, #34079 // =0x851f
-; CHECK-SD-NEXT: movi v2.2s, #100
+; CHECK-SD-NEXT: shl v0.2s, v0.2s, #16
+; CHECK-SD-NEXT: movi v3.2s, #100
; CHECK-SD-NEXT: movk w8, #20971, lsl #16
; CHECK-SD-NEXT: dup v1.2s, w8
; CHECK-SD-NEXT: sshr v0.2s, v0.2s, #16
; CHECK-SD-NEXT: smull v1.2d, v0.2s, v1.2s
+; CHECK-SD-NEXT: ushr v2.2d, v1.2d, #63
; CHECK-SD-NEXT: sshr v1.2d, v1.2d, #37
+; CHECK-SD-NEXT: xtn v2.2s, v2.2d
; CHECK-SD-NEXT: xtn v1.2s, v1.2d
-; CHECK-SD-NEXT: usra v1.2s, v1.2s, #31
-; CHECK-SD-NEXT: mls v0.2s, v1.2s, v2.2s
+; CHECK-SD-NEXT: add v1.2s, v1.2s, v2.2s
+; CHECK-SD-NEXT: mls v0.2s, v1.2s, v3.2s
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sv2i16_100:
@@ -1839,23 +1851,26 @@ define <3 x i16> @sv3i16_100(<3 x i16> %d, <3 x i16> %e) {
; CHECK-SD-NEXT: smov x10, v0.h[1]
; CHECK-SD-NEXT: movk w8, #20971, lsl #16
; CHECK-SD-NEXT: smov x11, v0.h[2]
-; CHECK-SD-NEXT: mov w12, #100 // =0x64
-; CHECK-SD-NEXT: smov w13, v0.h[1]
+; CHECK-SD-NEXT: smov w13, v0.h[0]
+; CHECK-SD-NEXT: mov w14, #100 // =0x64
; CHECK-SD-NEXT: smull x9, w9, w8
; CHECK-SD-NEXT: smull x10, w10, w8
; CHECK-SD-NEXT: smull x8, w11, w8
-; CHECK-SD-NEXT: smov w11, v0.h[0]
+; CHECK-SD-NEXT: lsr x11, x9, #63
; CHECK-SD-NEXT: asr x9, x9, #37
+; CHECK-SD-NEXT: lsr x12, x10, #63
; CHECK-SD-NEXT: asr x10, x10, #37
-; CHECK-SD-NEXT: add w9, w9, w9, lsr #31
+; CHECK-SD-NEXT: add w9, w9, w11
+; CHECK-SD-NEXT: smov w11, v0.h[1]
+; CHECK-SD-NEXT: msub w9, w9, w14, w13
+; CHECK-SD-NEXT: add w10, w10, w12
+; CHECK-SD-NEXT: smov w12, v0.h[2]
+; CHECK-SD-NEXT: msub w10, w10, w14, w11
+; CHECK-SD-NEXT: lsr x11, x8, #63
; CHECK-SD-NEXT: asr x8, x8, #37
-; CHECK-SD-NEXT: add w10, w10, w10, lsr #31
-; CHECK-SD-NEXT: msub w9, w9, w12, w11
-; CHECK-SD-NEXT: smov w11, v0.h[2]
-; CHECK-SD-NEXT: add w8, w8, w8, lsr #31
-; CHECK-SD-NEXT: msub w10, w10, w12, w13
-; CHECK-SD-NEXT: msub w8, w8, w12, w11
; CHECK-SD-NEXT: fmov s0, w9
+; CHECK-SD-NEXT: add w8, w8, w11
+; CHECK-SD-NEXT: msub w8, w8, w14, w12
; CHECK-SD-NEXT: mov v0.h[1], w10
; CHECK-SD-NEXT: mov v0.h[2], w8
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
@@ -2407,14 +2422,16 @@ define <2 x i32> @sv2i32_100(<2 x i32> %d, <2 x i32> %e) {
; CHECK-SD-LABEL: sv2i32_100:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: mov w8, #34079 // =0x851f
-; CHECK-SD-NEXT: movi v2.2s, #100
+; CHECK-SD-NEXT: movi v3.2s, #100
; CHECK-SD-NEXT: movk w8, #20971, lsl #16
; CHECK-SD-NEXT: dup v1.2s, w8
; CHECK-SD-NEXT: smull v1.2d, v0.2s, v1.2s
+; CHECK-SD-NEXT: ushr v2.2d, v1.2d, #63
; CHECK-SD-NEXT: sshr v1.2d, v1.2d, #37
+; CHECK-SD-NEXT: xtn v2.2s, v2.2d
; CHECK-SD-NEXT: xtn v1.2s, v1.2d
-; CHECK-SD-NEXT: usra v1.2s, v1.2s, #31
-; CHECK-SD-NEXT: mls v0.2s, v1.2s, v2.2s
+; CHECK-SD-NEXT: add v1.2s, v1.2s, v2.2s
+; CHECK-SD-NEXT: mls v0.2s, v1.2s, v3.2s
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sv2i32_100:
@@ -2492,19 +2509,22 @@ define <3 x i32> @sv3i32_100(<3 x i32> %d, <3 x i32> %e) {
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: mov w8, #34079 // =0x851f
; CHECK-SD-NEXT: mov w9, v0.s[2]
-; CHECK-SD-NEXT: movi v2.2s, #100
+; CHECK-SD-NEXT: movi v3.2s, #100
; CHECK-SD-NEXT: movk w8, #20971, lsl #16
-; CHECK-SD-NEXT: mov w10, #100 // =0x64
; CHECK-SD-NEXT: dup v1.2s, w8
; CHECK-SD-NEXT: smull x8, w9, w8
; CHECK-SD-NEXT: smull v1.2d, v0.2s, v1.2s
+; CHECK-SD-NEXT: lsr x10, x8, #63
; CHECK-SD-NEXT: asr x8, x8, #37
-; CHECK-SD-NEXT: add w8, w8, w8, lsr #31
+; CHECK-SD-NEXT: add w8, w8, w10
+; CHECK-SD-NEXT: mov w10, #100 // =0x64
+; CHECK-SD-NEXT: ushr v2.2d, v1.2d, #63
; CHECK-SD-NEXT: sshr v1.2d, v1.2d, #37
; CHECK-SD-NEXT: msub w8, w8, w10, w9
+; CHECK-SD-NEXT: xtn v2.2s, v2.2d
; CHECK-SD-NEXT: xtn v1.2s, v1.2d
-; CHECK-SD-NEXT: usra v1.2s, v1.2s, #31
-; CHECK-SD-NEXT: mls v0.2s, v1.2s, v2.2s
+; CHECK-SD-NEXT: add v1.2s, v1.2s, v2.2s
+; CHECK-SD-NEXT: mls v0.2s, v1.2s, v3.2s
; CHECK-SD-NEXT: mov v0.s[2], w8
; CHECK-SD-NEXT: ret
;
diff --git a/llvm/test/CodeGen/AArch64/srem-lkk.ll b/llvm/test/CodeGen/AArch64/srem-lkk.ll
index 1223ae3a15e7b..d9f91449dffb8 100644
--- a/llvm/test/CodeGen/AArch64/srem-lkk.ll
+++ b/llvm/test/CodeGen/AArch64/srem-lkk.ll
@@ -23,11 +23,12 @@ define i32 @fold_srem_positive_even(i32 %x) {
; CHECK-LABEL: fold_srem_positive_even:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #36849 // =0x8ff1
-; CHECK-NEXT: mov w9, #1060 // =0x424
; CHECK-NEXT: movk w8, #15827, lsl #16
; CHECK-NEXT: smull x8, w0, w8
+; CHECK-NEXT: lsr x9, x8, #63
; CHECK-NEXT: asr x8, x8, #40
-; CHECK-NEXT: add w8, w8, w8, lsr #31
+; CHECK-NEXT: add w8, w8, w9
+; CHECK-NEXT: mov w9, #1060 // =0x424
; CHECK-NEXT: msub w0, w8, w9, w0
; CHECK-NEXT: ret
%1 = srem i32 %x, 1060
@@ -39,11 +40,12 @@ define i32 @fold_srem_negative_odd(i32 %x) {
; CHECK-LABEL: fold_srem_negative_odd:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #65445 // =0xffa5
-; CHECK-NEXT: mov w9, #-723 // =0xfffffd2d
; CHECK-NEXT: movk w8, #42330, lsl #16
; CHECK-NEXT: smull x8, w0, w8
+; CHECK-NEXT: lsr x9, x8, #63
; CHECK-NEXT: asr x8, x8, #40
-; CHECK-NEXT: add w8, w8, w8, lsr #31
+; CHECK-NEXT: add w8, w8, w9
+; CHECK-NEXT: mov w9, #-723 // =0xfffffd2d
; CHECK-NEXT: msub w0, w8, w9, w0
; CHECK-NEXT: ret
%1 = srem i32 %x, -723
@@ -55,11 +57,12 @@ define i32 @fold_srem_negative_even(i32 %x) {
; CHECK-LABEL: fold_srem_negative_even:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #62439 // =0xf3e7
-; CHECK-NEXT: mov w9, #-22981 // =0xffffa63b
; CHECK-NEXT: movk w8, #64805, lsl #16
; CHECK-NEXT: smull x8, w0, w8
+; CHECK-NEXT: lsr x9, x8, #63
; CHECK-NEXT: asr x8, x8, #40
-; CHECK-NEXT: add w8, w8, w8, lsr #31
+; CHECK-NEXT: add w8, w8, w9
+; CHECK-NEXT: mov w9, #-22981 // =0xffffa63b
; CHECK-NEXT: msub w0, w8, w9, w0
; CHECK-NEXT: ret
%1 = srem i32 %x, -22981
diff --git a/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll b/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll
index b165ac0d56d20..a74f0c86fe185 100644
--- a/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll
+++ b/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll
@@ -263,14 +263,16 @@ define <2 x i32> @fold_srem_v2i32(<2 x i32> %x) {
; CHECK-LABEL: fold_srem_v2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #26215 // =0x6667
-; CHECK-NEXT: movi v2.2s, #10
+; CHECK-NEXT: movi v3.2s, #10
; CHECK-NEXT: movk w8, #26214, lsl #16
; CHECK-NEXT: dup v1.2s, w8
; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s
+; CHECK-NEXT: ushr v2.2d, v1.2d, #63
; CHECK-NEXT: sshr v1.2d, v1.2d, #34
+; CHECK-NEXT: xtn v2.2s, v2.2d
; CHECK-NEXT: xtn v1.2s, v1.2d
-; CHECK-NEXT: usra v1.2s, v1.2s, #31
-; CHECK-NEXT: mls v0.2s, v1.2s, v2.2s
+; CHECK-NEXT: add v1.2s, v1.2s, v2.2s
+; CHECK-NEXT: mls v0.2s, v1.2s, v3.2s
; CHECK-NEXT: ret
%1 = srem <2 x i32> %x, <i32 10, i32 10>
ret <2 x i32> %1
diff --git a/llvm/test/CodeGen/RISCV/mul.ll b/llvm/test/CodeGen/RISCV/mul.ll
index 4533e14c672e7..d691b1c278a48 100644
--- a/llvm/test/CodeGen/RISCV/mul.ll
+++ b/llvm/test/CodeGen/RISCV/mul.ll
@@ -1829,67 +1829,53 @@ define i64 @mulhsu_i64(i64 %a, i64 %b) nounwind {
; RV32I-NEXT: sw s5, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s8, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s9, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s2, a3
-; RV32I-NEXT: mv s3, a2
-; RV32I-NEXT: mv s0, a1
-; RV32I-NEXT: mv s1, a0
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: mv s2, a1
+; RV32I-NEXT: mv s3, a0
; RV32I-NEXT: srai s4, a3, 31
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: call __muldi3
; RV32I-NEXT: mv s5, a1
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, s2
; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: mv a2, s3
+; RV32I-NEXT: mv a2, s1
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: call __muldi3
; RV32I-NEXT: add s5, a0, s5
; RV32I-NEXT: sltu a0, s5, a0
-; RV32I-NEXT: add s7, a1, a0
-; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: add s6, a1, a0
+; RV32I-NEXT: mv a0, s3
; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: mv a2, s2
+; RV32I-NEXT: mv a2, s0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: call __muldi3
; RV32I-NEXT: add s5, a0, s5
; RV32I-NEXT: sltu a0, s5, a0
; RV32I-NEXT: add a0, a1, a0
-; RV32I-NEXT: add s8, s7, a0
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: add s5, s6, a0
+; RV32I-NEXT: mv a0, s2
; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: mv a2, s2
+; RV32I-NEXT: mv a2, s0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: call __muldi3
-; RV32I-NEXT: mv s5, a0
-; RV32I-NEXT: mv s6, a1
-; RV32I-NEXT: add s9, a0, s8
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: mv a1, s2
-; RV32I-NEXT: li a2, 0
-; RV32I-NEXT: li a3, 0
-; RV32I-NEXT: call __muldi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: mv s3, a1
+; RV32I-NEXT: mv s0, a0
+; RV32I-NEXT: mv s1, a1
+; RV32I-NEXT: add s7, a0, s5
; RV32I-NEXT: mv a0, s4
; RV32I-NEXT: mv a1, s4
-; RV32I-NEXT: mv a2, s1
-; RV32I-NEXT: mv a3, s0
+; RV32I-NEXT: mv a2, s3
+; RV32I-NEXT: mv a3, s2
; RV32I-NEXT: call __muldi3
-; RV32I-NEXT: add s2, a0, s2
-; RV32I-NEXT: sltu a3, s9, s5
-; RV32I-NEXT: sltu a4, s8, s7
-; RV32I-NEXT: add a1, a1, s3
-; RV32I-NEXT: add a2, s9, s2
-; RV32I-NEXT: add a4, s6, a4
-; RV32I-NEXT: sltu a0, s2, a0
-; RV32I-NEXT: sltu a5, a2, s9
-; RV32I-NEXT: add a3, a4, a3
-; RV32I-NEXT: add a0, a1, a0
-; RV32I-NEXT: add a0, a3, a0
-; RV32I-NEXT: add a1, a0, a5
-; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: add a0, s7, a0
+; RV32I-NEXT: sltu a2, s7, s0
+; RV32I-NEXT: sltu a3, s5, s6
+; RV32I-NEXT: sltu a4, a0, s7
+; RV32I-NEXT: add a3, s1, a3
+; RV32I-NEXT: add a2, a3, a2
+; RV32I-NEXT: add a1, a2, a1
+; RV32I-NEXT: add a1, a1, a4
; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
@@ -1899,8 +1885,6 @@ define i64 @mulhsu_i64(i64 %a, i64 %b) nounwind {
; RV32I-NEXT: lw s5, 20(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s8, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s9, 4(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 48
; RV32I-NEXT: ret
;
diff --git a/llvm/test/CodeGen/Thumb/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/Thumb/umulo-128-legalisation-lowering.ll
index 9b5fa1c2bc811..94080c02ded80 100644
--- a/llvm/test/CodeGen/Thumb/umulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/Thumb/umulo-128-legalisation-lowering.ll
@@ -10,11 +10,11 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; THUMBV6-NEXT: sub sp, #60
; THUMBV6-NEXT: mov r6, r3
; THUMBV6-NEXT: mov r1, r2
-; THUMBV6-NEXT: str r2, [sp, #52] @ 4-byte Spill
+; THUMBV6-NEXT: str r2, [sp, #36] @ 4-byte Spill
; THUMBV6-NEXT: mov r4, r0
-; THUMBV6-NEXT: str r0, [sp, #40] @ 4-byte Spill
+; THUMBV6-NEXT: str r0, [sp, #48] @ 4-byte Spill
; THUMBV6-NEXT: ldr r2, [sp, #88]
-; THUMBV6-NEXT: str r2, [sp, #48] @ 4-byte Spill
+; THUMBV6-NEXT: str r2, [sp, #56] @ 4-byte Spill
; THUMBV6-NEXT: movs r5, #0
; THUMBV6-NEXT: mov r0, r1
; THUMBV6-NEXT: mov r1, r5
@@ -23,21 +23,21 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; THUMBV6-NEXT: str r1, [sp, #28] @ 4-byte Spill
; THUMBV6-NEXT: str r0, [r4]
; THUMBV6-NEXT: ldr r2, [sp, #96]
-; THUMBV6-NEXT: str r2, [sp, #36] @ 4-byte Spill
+; THUMBV6-NEXT: str r2, [sp, #40] @ 4-byte Spill
; THUMBV6-NEXT: mov r4, r6
-; THUMBV6-NEXT: str r6, [sp, #56] @ 4-byte Spill
+; THUMBV6-NEXT: str r6, [sp, #44] @ 4-byte Spill
; THUMBV6-NEXT: mov r0, r6
; THUMBV6-NEXT: mov r1, r5
; THUMBV6-NEXT: mov r3, r5
; THUMBV6-NEXT: bl __aeabi_lmul
-; THUMBV6-NEXT: str r0, [sp, #44] @ 4-byte Spill
+; THUMBV6-NEXT: str r0, [sp, #52] @ 4-byte Spill
; THUMBV6-NEXT: mov r7, r1
; THUMBV6-NEXT: subs r0, r1, #1
; THUMBV6-NEXT: sbcs r7, r0
; THUMBV6-NEXT: ldr r0, [sp, #100]
; THUMBV6-NEXT: str r0, [sp, #32] @ 4-byte Spill
; THUMBV6-NEXT: mov r1, r5
-; THUMBV6-NEXT: ldr r6, [sp, #52] @ 4-byte Reload
+; THUMBV6-NEXT: ldr r6, [sp, #36] @ 4-byte Reload
; THUMBV6-NEXT: mov r2, r6
; THUMBV6-NEXT: mov r3, r5
; THUMBV6-NEXT: bl __aeabi_lmul
@@ -53,10 +53,10 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; THUMBV6-NEXT: ands r4, r3
; THUMBV6-NEXT: orrs r4, r1
; THUMBV6-NEXT: orrs r4, r7
-; THUMBV6-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
+; THUMBV6-NEXT: ldr r0, [sp, #52] @ 4-byte Reload
; THUMBV6-NEXT: ldr r1, [sp, #24] @ 4-byte Reload
; THUMBV6-NEXT: adds r7, r1, r0
-; THUMBV6-NEXT: ldr r0, [sp, #36] @ 4-byte Reload
+; THUMBV6-NEXT: ldr r0, [sp, #40] @ 4-byte Reload
; THUMBV6-NEXT: mov r1, r5
; THUMBV6-NEXT: mov r2, r6
; THUMBV6-NEXT: mov r3, r5
@@ -69,7 +69,7 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; THUMBV6-NEXT: orrs r0, r4
; THUMBV6-NEXT: str r0, [sp, #16] @ 4-byte Spill
; THUMBV6-NEXT: ldr r0, [sp, #92]
-; THUMBV6-NEXT: str r0, [sp, #44] @ 4-byte Spill
+; THUMBV6-NEXT: str r0, [sp, #52] @ 4-byte Spill
; THUMBV6-NEXT: ldr r7, [sp, #80]
; THUMBV6-NEXT: mov r1, r5
; THUMBV6-NEXT: mov r2, r7
@@ -82,13 +82,13 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; THUMBV6-NEXT: ldr r6, [sp, #84]
; THUMBV6-NEXT: mov r0, r6
; THUMBV6-NEXT: mov r1, r5
-; THUMBV6-NEXT: ldr r2, [sp, #48] @ 4-byte Reload
+; THUMBV6-NEXT: ldr r2, [sp, #56] @ 4-byte Reload
; THUMBV6-NEXT: mov r3, r5
; THUMBV6-NEXT: bl __aeabi_lmul
; THUMBV6-NEXT: str r0, [sp, #4] @ 4-byte Spill
; THUMBV6-NEXT: subs r2, r1, #1
; THUMBV6-NEXT: sbcs r1, r2
-; THUMBV6-NEXT: ldr r3, [sp, #44] @ 4-byte Reload
+; THUMBV6-NEXT: ldr r3, [sp, #52] @ 4-byte Reload
; THUMBV6-NEXT: subs r2, r3, #1
; THUMBV6-NEXT: sbcs r3, r2
; THUMBV6-NEXT: str r6, [sp, #8] @ 4-byte Spill
@@ -99,21 +99,17 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; THUMBV6-NEXT: orrs r6, r4
; THUMBV6-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
; THUMBV6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
-; THUMBV6-NEXT: adds r0, r1, r0
-; THUMBV6-NEXT: str r0, [sp, #4] @ 4-byte Spill
+; THUMBV6-NEXT: adds r4, r1, r0
; THUMBV6-NEXT: mov r0, r7
; THUMBV6-NEXT: mov r1, r5
-; THUMBV6-NEXT: ldr r4, [sp, #48] @ 4-byte Reload
-; THUMBV6-NEXT: mov r2, r4
+; THUMBV6-NEXT: ldr r2, [sp, #56] @ 4-byte Reload
; THUMBV6-NEXT: mov r3, r5
; THUMBV6-NEXT: bl __aeabi_lmul
-; THUMBV6-NEXT: str r0, [sp, #12] @ 4-byte Spill
-; THUMBV6-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
-; THUMBV6-NEXT: adds r0, r1, r0
+; THUMBV6-NEXT: adds r4, r1, r4
; THUMBV6-NEXT: mov r1, r5
; THUMBV6-NEXT: adcs r1, r5
; THUMBV6-NEXT: orrs r1, r6
-; THUMBV6-NEXT: ldr r3, [sp, #36] @ 4-byte Reload
+; THUMBV6-NEXT: ldr r3, [sp, #40] @ 4-byte Reload
; THUMBV6-NEXT: ldr r2, [sp, #32] @ 4-byte Reload
; THUMBV6-NEXT: orrs r3, r2
; THUMBV6-NEXT: subs r2, r3, #1
@@ -127,68 +123,44 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; THUMBV6-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
; THUMBV6-NEXT: orrs r7, r1
; THUMBV6-NEXT: ldr r1, [sp, #24] @ 4-byte Reload
-; THUMBV6-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
-; THUMBV6-NEXT: adds r1, r2, r1
-; THUMBV6-NEXT: str r1, [sp, #32] @ 4-byte Spill
-; THUMBV6-NEXT: ldr r1, [sp, #20] @ 4-byte Reload
-; THUMBV6-NEXT: adcs r0, r1
-; THUMBV6-NEXT: str r0, [sp, #36] @ 4-byte Spill
-; THUMBV6-NEXT: ldr r0, [sp, #56] @ 4-byte Reload
+; THUMBV6-NEXT: adds r0, r0, r1
+; THUMBV6-NEXT: str r0, [sp, #32] @ 4-byte Spill
+; THUMBV6-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
+; THUMBV6-NEXT: adcs r4, r0
+; THUMBV6-NEXT: str r4, [sp, #40] @ 4-byte Spill
+; THUMBV6-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
; THUMBV6-NEXT: mov r1, r5
-; THUMBV6-NEXT: mov r2, r4
+; THUMBV6-NEXT: ldr r2, [sp, #56] @ 4-byte Reload
; THUMBV6-NEXT: mov r3, r5
; THUMBV6-NEXT: bl __aeabi_lmul
; THUMBV6-NEXT: mov r4, r1
; THUMBV6-NEXT: ldr r1, [sp, #28] @ 4-byte Reload
; THUMBV6-NEXT: adds r6, r0, r1
; THUMBV6-NEXT: adcs r4, r5
-; THUMBV6-NEXT: ldr r0, [sp, #52] @ 4-byte Reload
+; THUMBV6-NEXT: ldr r0, [sp, #36] @ 4-byte Reload
; THUMBV6-NEXT: mov r1, r5
-; THUMBV6-NEXT: ldr r2, [sp, #44] @ 4-byte Reload
+; THUMBV6-NEXT: ldr r2, [sp, #52] @ 4-byte Reload
; THUMBV6-NEXT: mov r3, r5
; THUMBV6-NEXT: bl __aeabi_lmul
; THUMBV6-NEXT: adds r0, r0, r6
-; THUMBV6-NEXT: ldr r2, [sp, #40] @ 4-byte Reload
+; THUMBV6-NEXT: ldr r2, [sp, #48] @ 4-byte Reload
; THUMBV6-NEXT: str r0, [r2, #4]
; THUMBV6-NEXT: adcs r1, r5
-; THUMBV6-NEXT: adds r0, r4, r1
-; THUMBV6-NEXT: str r0, [sp, #28] @ 4-byte Spill
+; THUMBV6-NEXT: adds r4, r4, r1
; THUMBV6-NEXT: mov r6, r5
; THUMBV6-NEXT: adcs r6, r5
-; THUMBV6-NEXT: ldr r0, [sp, #56] @ 4-byte Reload
+; THUMBV6-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
; THUMBV6-NEXT: mov r1, r5
-; THUMBV6-NEXT: ldr r4, [sp, #44] @ 4-byte Reload
-; THUMBV6-NEXT: mov r2, r4
+; THUMBV6-NEXT: ldr r2, [sp, #52] @ 4-byte Reload
; THUMBV6-NEXT: mov r3, r5
; THUMBV6-NEXT: bl __aeabi_lmul
-; THUMBV6-NEXT: ldr r2, [sp, #28] @ 4-byte Reload
-; THUMBV6-NEXT: adds r0, r0, r2
-; THUMBV6-NEXT: str r0, [sp, #28] @ 4-byte Spill
+; THUMBV6-NEXT: adds r0, r0, r4
; THUMBV6-NEXT: adcs r1, r6
-; THUMBV6-NEXT: str r1, [sp, #24] @ 4-byte Spill
-; THUMBV6-NEXT: ldr r0, [sp, #48] @ 4-byte Reload
-; THUMBV6-NEXT: mov r1, r4
-; THUMBV6-NEXT: mov r2, r5
-; THUMBV6-NEXT: mov r3, r5
-; THUMBV6-NEXT: bl __aeabi_lmul
-; THUMBV6-NEXT: mov r6, r0
-; THUMBV6-NEXT: mov r4, r1
-; THUMBV6-NEXT: ldr r0, [sp, #52] @ 4-byte Reload
-; THUMBV6-NEXT: ldr r1, [sp, #56] @ 4-byte Reload
-; THUMBV6-NEXT: mov r2, r5
-; THUMBV6-NEXT: mov r3, r5
-; THUMBV6-NEXT: bl __aeabi_lmul
-; THUMBV6-NEXT: adds r0, r0, r6
-; THUMBV6-NEXT: adcs r1, r4
-; THUMBV6-NEXT: ldr r2, [sp, #28] @ 4-byte Reload
-; THUMBV6-NEXT: adds r0, r2, r0
-; THUMBV6-NEXT: ldr r2, [sp, #24] @ 4-byte Reload
-; THUMBV6-NEXT: adcs r1, r2
; THUMBV6-NEXT: ldr r2, [sp, #32] @ 4-byte Reload
; THUMBV6-NEXT: adds r0, r0, r2
-; THUMBV6-NEXT: ldr r2, [sp, #40] @ 4-byte Reload
+; THUMBV6-NEXT: ldr r2, [sp, #48] @ 4-byte Reload
; THUMBV6-NEXT: str r0, [r2, #8]
-; THUMBV6-NEXT: ldr r0, [sp, #36] @ 4-byte Reload
+; THUMBV6-NEXT: ldr r0, [sp, #40] @ 4-byte Reload
; THUMBV6-NEXT: adcs r1, r0
; THUMBV6-NEXT: str r1, [r2, #12]
; THUMBV6-NEXT: adcs r5, r5
More information about the llvm-commits
mailing list