[llvm] [DAG] Fold mul 0 -> 0 when expanding mul into parts. (PR #168780)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 19 13:49:31 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: David Green (davemgreen)
<details>
<summary>Changes</summary>
If the upper bits are zero, but we expand mulitply then immediately convert the multiple into a libcall, there is no opportunity to optimize away the mul. Do so manually to make sure extending multiplies optimise cleanly.
---
Full diff: https://github.com/llvm/llvm-project/pull/168780.diff
3 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp (+8-4)
- (modified) llvm/test/CodeGen/RISCV/mul.ll (+25-41)
- (modified) llvm/test/CodeGen/Thumb/umulo-128-legalisation-lowering.ll (+32-60)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index bb64f4ee70280..f1924a8900044 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -11095,10 +11095,14 @@ void TargetLowering::forceExpandMultiply(SelectionDAG &DAG, const SDLoc &dl,
// If HiLHS and HiRHS are set, multiply them by the opposite low part and add
// the products to Hi.
if (HiLHS) {
- Hi = DAG.getNode(ISD::ADD, dl, VT, Hi,
- DAG.getNode(ISD::ADD, dl, VT,
- DAG.getNode(ISD::MUL, dl, VT, HiRHS, LHS),
- DAG.getNode(ISD::MUL, dl, VT, RHS, HiLHS)));
+ Hi = DAG.getNode(
+ ISD::ADD, dl, VT, Hi,
+ DAG.getNode(
+ ISD::ADD, dl, VT,
+ isNullConstant(HiRHS) ? HiRHS
+ : DAG.getNode(ISD::MUL, dl, VT, HiRHS, LHS),
+ isNullConstant(HiLHS) ? HiLHS
+ : DAG.getNode(ISD::MUL, dl, VT, RHS, HiLHS)));
}
}
diff --git a/llvm/test/CodeGen/RISCV/mul.ll b/llvm/test/CodeGen/RISCV/mul.ll
index 4533e14c672e7..d691b1c278a48 100644
--- a/llvm/test/CodeGen/RISCV/mul.ll
+++ b/llvm/test/CodeGen/RISCV/mul.ll
@@ -1829,67 +1829,53 @@ define i64 @mulhsu_i64(i64 %a, i64 %b) nounwind {
; RV32I-NEXT: sw s5, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s8, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s9, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s2, a3
-; RV32I-NEXT: mv s3, a2
-; RV32I-NEXT: mv s0, a1
-; RV32I-NEXT: mv s1, a0
+; RV32I-NEXT: mv s0, a3
+; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: mv s2, a1
+; RV32I-NEXT: mv s3, a0
; RV32I-NEXT: srai s4, a3, 31
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: call __muldi3
; RV32I-NEXT: mv s5, a1
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, s2
; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: mv a2, s3
+; RV32I-NEXT: mv a2, s1
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: call __muldi3
; RV32I-NEXT: add s5, a0, s5
; RV32I-NEXT: sltu a0, s5, a0
-; RV32I-NEXT: add s7, a1, a0
-; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: add s6, a1, a0
+; RV32I-NEXT: mv a0, s3
; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: mv a2, s2
+; RV32I-NEXT: mv a2, s0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: call __muldi3
; RV32I-NEXT: add s5, a0, s5
; RV32I-NEXT: sltu a0, s5, a0
; RV32I-NEXT: add a0, a1, a0
-; RV32I-NEXT: add s8, s7, a0
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: add s5, s6, a0
+; RV32I-NEXT: mv a0, s2
; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: mv a2, s2
+; RV32I-NEXT: mv a2, s0
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: call __muldi3
-; RV32I-NEXT: mv s5, a0
-; RV32I-NEXT: mv s6, a1
-; RV32I-NEXT: add s9, a0, s8
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: mv a1, s2
-; RV32I-NEXT: li a2, 0
-; RV32I-NEXT: li a3, 0
-; RV32I-NEXT: call __muldi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: mv s3, a1
+; RV32I-NEXT: mv s0, a0
+; RV32I-NEXT: mv s1, a1
+; RV32I-NEXT: add s7, a0, s5
; RV32I-NEXT: mv a0, s4
; RV32I-NEXT: mv a1, s4
-; RV32I-NEXT: mv a2, s1
-; RV32I-NEXT: mv a3, s0
+; RV32I-NEXT: mv a2, s3
+; RV32I-NEXT: mv a3, s2
; RV32I-NEXT: call __muldi3
-; RV32I-NEXT: add s2, a0, s2
-; RV32I-NEXT: sltu a3, s9, s5
-; RV32I-NEXT: sltu a4, s8, s7
-; RV32I-NEXT: add a1, a1, s3
-; RV32I-NEXT: add a2, s9, s2
-; RV32I-NEXT: add a4, s6, a4
-; RV32I-NEXT: sltu a0, s2, a0
-; RV32I-NEXT: sltu a5, a2, s9
-; RV32I-NEXT: add a3, a4, a3
-; RV32I-NEXT: add a0, a1, a0
-; RV32I-NEXT: add a0, a3, a0
-; RV32I-NEXT: add a1, a0, a5
-; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: add a0, s7, a0
+; RV32I-NEXT: sltu a2, s7, s0
+; RV32I-NEXT: sltu a3, s5, s6
+; RV32I-NEXT: sltu a4, a0, s7
+; RV32I-NEXT: add a3, s1, a3
+; RV32I-NEXT: add a2, a3, a2
+; RV32I-NEXT: add a1, a2, a1
+; RV32I-NEXT: add a1, a1, a4
; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
@@ -1899,8 +1885,6 @@ define i64 @mulhsu_i64(i64 %a, i64 %b) nounwind {
; RV32I-NEXT: lw s5, 20(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s8, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s9, 4(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 48
; RV32I-NEXT: ret
;
diff --git a/llvm/test/CodeGen/Thumb/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/Thumb/umulo-128-legalisation-lowering.ll
index 9b5fa1c2bc811..94080c02ded80 100644
--- a/llvm/test/CodeGen/Thumb/umulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/Thumb/umulo-128-legalisation-lowering.ll
@@ -10,11 +10,11 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; THUMBV6-NEXT: sub sp, #60
; THUMBV6-NEXT: mov r6, r3
; THUMBV6-NEXT: mov r1, r2
-; THUMBV6-NEXT: str r2, [sp, #52] @ 4-byte Spill
+; THUMBV6-NEXT: str r2, [sp, #36] @ 4-byte Spill
; THUMBV6-NEXT: mov r4, r0
-; THUMBV6-NEXT: str r0, [sp, #40] @ 4-byte Spill
+; THUMBV6-NEXT: str r0, [sp, #48] @ 4-byte Spill
; THUMBV6-NEXT: ldr r2, [sp, #88]
-; THUMBV6-NEXT: str r2, [sp, #48] @ 4-byte Spill
+; THUMBV6-NEXT: str r2, [sp, #56] @ 4-byte Spill
; THUMBV6-NEXT: movs r5, #0
; THUMBV6-NEXT: mov r0, r1
; THUMBV6-NEXT: mov r1, r5
@@ -23,21 +23,21 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; THUMBV6-NEXT: str r1, [sp, #28] @ 4-byte Spill
; THUMBV6-NEXT: str r0, [r4]
; THUMBV6-NEXT: ldr r2, [sp, #96]
-; THUMBV6-NEXT: str r2, [sp, #36] @ 4-byte Spill
+; THUMBV6-NEXT: str r2, [sp, #40] @ 4-byte Spill
; THUMBV6-NEXT: mov r4, r6
-; THUMBV6-NEXT: str r6, [sp, #56] @ 4-byte Spill
+; THUMBV6-NEXT: str r6, [sp, #44] @ 4-byte Spill
; THUMBV6-NEXT: mov r0, r6
; THUMBV6-NEXT: mov r1, r5
; THUMBV6-NEXT: mov r3, r5
; THUMBV6-NEXT: bl __aeabi_lmul
-; THUMBV6-NEXT: str r0, [sp, #44] @ 4-byte Spill
+; THUMBV6-NEXT: str r0, [sp, #52] @ 4-byte Spill
; THUMBV6-NEXT: mov r7, r1
; THUMBV6-NEXT: subs r0, r1, #1
; THUMBV6-NEXT: sbcs r7, r0
; THUMBV6-NEXT: ldr r0, [sp, #100]
; THUMBV6-NEXT: str r0, [sp, #32] @ 4-byte Spill
; THUMBV6-NEXT: mov r1, r5
-; THUMBV6-NEXT: ldr r6, [sp, #52] @ 4-byte Reload
+; THUMBV6-NEXT: ldr r6, [sp, #36] @ 4-byte Reload
; THUMBV6-NEXT: mov r2, r6
; THUMBV6-NEXT: mov r3, r5
; THUMBV6-NEXT: bl __aeabi_lmul
@@ -53,10 +53,10 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; THUMBV6-NEXT: ands r4, r3
; THUMBV6-NEXT: orrs r4, r1
; THUMBV6-NEXT: orrs r4, r7
-; THUMBV6-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
+; THUMBV6-NEXT: ldr r0, [sp, #52] @ 4-byte Reload
; THUMBV6-NEXT: ldr r1, [sp, #24] @ 4-byte Reload
; THUMBV6-NEXT: adds r7, r1, r0
-; THUMBV6-NEXT: ldr r0, [sp, #36] @ 4-byte Reload
+; THUMBV6-NEXT: ldr r0, [sp, #40] @ 4-byte Reload
; THUMBV6-NEXT: mov r1, r5
; THUMBV6-NEXT: mov r2, r6
; THUMBV6-NEXT: mov r3, r5
@@ -69,7 +69,7 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; THUMBV6-NEXT: orrs r0, r4
; THUMBV6-NEXT: str r0, [sp, #16] @ 4-byte Spill
; THUMBV6-NEXT: ldr r0, [sp, #92]
-; THUMBV6-NEXT: str r0, [sp, #44] @ 4-byte Spill
+; THUMBV6-NEXT: str r0, [sp, #52] @ 4-byte Spill
; THUMBV6-NEXT: ldr r7, [sp, #80]
; THUMBV6-NEXT: mov r1, r5
; THUMBV6-NEXT: mov r2, r7
@@ -82,13 +82,13 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; THUMBV6-NEXT: ldr r6, [sp, #84]
; THUMBV6-NEXT: mov r0, r6
; THUMBV6-NEXT: mov r1, r5
-; THUMBV6-NEXT: ldr r2, [sp, #48] @ 4-byte Reload
+; THUMBV6-NEXT: ldr r2, [sp, #56] @ 4-byte Reload
; THUMBV6-NEXT: mov r3, r5
; THUMBV6-NEXT: bl __aeabi_lmul
; THUMBV6-NEXT: str r0, [sp, #4] @ 4-byte Spill
; THUMBV6-NEXT: subs r2, r1, #1
; THUMBV6-NEXT: sbcs r1, r2
-; THUMBV6-NEXT: ldr r3, [sp, #44] @ 4-byte Reload
+; THUMBV6-NEXT: ldr r3, [sp, #52] @ 4-byte Reload
; THUMBV6-NEXT: subs r2, r3, #1
; THUMBV6-NEXT: sbcs r3, r2
; THUMBV6-NEXT: str r6, [sp, #8] @ 4-byte Spill
@@ -99,21 +99,17 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; THUMBV6-NEXT: orrs r6, r4
; THUMBV6-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
; THUMBV6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
-; THUMBV6-NEXT: adds r0, r1, r0
-; THUMBV6-NEXT: str r0, [sp, #4] @ 4-byte Spill
+; THUMBV6-NEXT: adds r4, r1, r0
; THUMBV6-NEXT: mov r0, r7
; THUMBV6-NEXT: mov r1, r5
-; THUMBV6-NEXT: ldr r4, [sp, #48] @ 4-byte Reload
-; THUMBV6-NEXT: mov r2, r4
+; THUMBV6-NEXT: ldr r2, [sp, #56] @ 4-byte Reload
; THUMBV6-NEXT: mov r3, r5
; THUMBV6-NEXT: bl __aeabi_lmul
-; THUMBV6-NEXT: str r0, [sp, #12] @ 4-byte Spill
-; THUMBV6-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
-; THUMBV6-NEXT: adds r0, r1, r0
+; THUMBV6-NEXT: adds r4, r1, r4
; THUMBV6-NEXT: mov r1, r5
; THUMBV6-NEXT: adcs r1, r5
; THUMBV6-NEXT: orrs r1, r6
-; THUMBV6-NEXT: ldr r3, [sp, #36] @ 4-byte Reload
+; THUMBV6-NEXT: ldr r3, [sp, #40] @ 4-byte Reload
; THUMBV6-NEXT: ldr r2, [sp, #32] @ 4-byte Reload
; THUMBV6-NEXT: orrs r3, r2
; THUMBV6-NEXT: subs r2, r3, #1
@@ -127,68 +123,44 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; THUMBV6-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
; THUMBV6-NEXT: orrs r7, r1
; THUMBV6-NEXT: ldr r1, [sp, #24] @ 4-byte Reload
-; THUMBV6-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
-; THUMBV6-NEXT: adds r1, r2, r1
-; THUMBV6-NEXT: str r1, [sp, #32] @ 4-byte Spill
-; THUMBV6-NEXT: ldr r1, [sp, #20] @ 4-byte Reload
-; THUMBV6-NEXT: adcs r0, r1
-; THUMBV6-NEXT: str r0, [sp, #36] @ 4-byte Spill
-; THUMBV6-NEXT: ldr r0, [sp, #56] @ 4-byte Reload
+; THUMBV6-NEXT: adds r0, r0, r1
+; THUMBV6-NEXT: str r0, [sp, #32] @ 4-byte Spill
+; THUMBV6-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
+; THUMBV6-NEXT: adcs r4, r0
+; THUMBV6-NEXT: str r4, [sp, #40] @ 4-byte Spill
+; THUMBV6-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
; THUMBV6-NEXT: mov r1, r5
-; THUMBV6-NEXT: mov r2, r4
+; THUMBV6-NEXT: ldr r2, [sp, #56] @ 4-byte Reload
; THUMBV6-NEXT: mov r3, r5
; THUMBV6-NEXT: bl __aeabi_lmul
; THUMBV6-NEXT: mov r4, r1
; THUMBV6-NEXT: ldr r1, [sp, #28] @ 4-byte Reload
; THUMBV6-NEXT: adds r6, r0, r1
; THUMBV6-NEXT: adcs r4, r5
-; THUMBV6-NEXT: ldr r0, [sp, #52] @ 4-byte Reload
+; THUMBV6-NEXT: ldr r0, [sp, #36] @ 4-byte Reload
; THUMBV6-NEXT: mov r1, r5
-; THUMBV6-NEXT: ldr r2, [sp, #44] @ 4-byte Reload
+; THUMBV6-NEXT: ldr r2, [sp, #52] @ 4-byte Reload
; THUMBV6-NEXT: mov r3, r5
; THUMBV6-NEXT: bl __aeabi_lmul
; THUMBV6-NEXT: adds r0, r0, r6
-; THUMBV6-NEXT: ldr r2, [sp, #40] @ 4-byte Reload
+; THUMBV6-NEXT: ldr r2, [sp, #48] @ 4-byte Reload
; THUMBV6-NEXT: str r0, [r2, #4]
; THUMBV6-NEXT: adcs r1, r5
-; THUMBV6-NEXT: adds r0, r4, r1
-; THUMBV6-NEXT: str r0, [sp, #28] @ 4-byte Spill
+; THUMBV6-NEXT: adds r4, r4, r1
; THUMBV6-NEXT: mov r6, r5
; THUMBV6-NEXT: adcs r6, r5
-; THUMBV6-NEXT: ldr r0, [sp, #56] @ 4-byte Reload
+; THUMBV6-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
; THUMBV6-NEXT: mov r1, r5
-; THUMBV6-NEXT: ldr r4, [sp, #44] @ 4-byte Reload
-; THUMBV6-NEXT: mov r2, r4
+; THUMBV6-NEXT: ldr r2, [sp, #52] @ 4-byte Reload
; THUMBV6-NEXT: mov r3, r5
; THUMBV6-NEXT: bl __aeabi_lmul
-; THUMBV6-NEXT: ldr r2, [sp, #28] @ 4-byte Reload
-; THUMBV6-NEXT: adds r0, r0, r2
-; THUMBV6-NEXT: str r0, [sp, #28] @ 4-byte Spill
+; THUMBV6-NEXT: adds r0, r0, r4
; THUMBV6-NEXT: adcs r1, r6
-; THUMBV6-NEXT: str r1, [sp, #24] @ 4-byte Spill
-; THUMBV6-NEXT: ldr r0, [sp, #48] @ 4-byte Reload
-; THUMBV6-NEXT: mov r1, r4
-; THUMBV6-NEXT: mov r2, r5
-; THUMBV6-NEXT: mov r3, r5
-; THUMBV6-NEXT: bl __aeabi_lmul
-; THUMBV6-NEXT: mov r6, r0
-; THUMBV6-NEXT: mov r4, r1
-; THUMBV6-NEXT: ldr r0, [sp, #52] @ 4-byte Reload
-; THUMBV6-NEXT: ldr r1, [sp, #56] @ 4-byte Reload
-; THUMBV6-NEXT: mov r2, r5
-; THUMBV6-NEXT: mov r3, r5
-; THUMBV6-NEXT: bl __aeabi_lmul
-; THUMBV6-NEXT: adds r0, r0, r6
-; THUMBV6-NEXT: adcs r1, r4
-; THUMBV6-NEXT: ldr r2, [sp, #28] @ 4-byte Reload
-; THUMBV6-NEXT: adds r0, r2, r0
-; THUMBV6-NEXT: ldr r2, [sp, #24] @ 4-byte Reload
-; THUMBV6-NEXT: adcs r1, r2
; THUMBV6-NEXT: ldr r2, [sp, #32] @ 4-byte Reload
; THUMBV6-NEXT: adds r0, r0, r2
-; THUMBV6-NEXT: ldr r2, [sp, #40] @ 4-byte Reload
+; THUMBV6-NEXT: ldr r2, [sp, #48] @ 4-byte Reload
; THUMBV6-NEXT: str r0, [r2, #8]
-; THUMBV6-NEXT: ldr r0, [sp, #36] @ 4-byte Reload
+; THUMBV6-NEXT: ldr r0, [sp, #40] @ 4-byte Reload
; THUMBV6-NEXT: adcs r1, r0
; THUMBV6-NEXT: str r1, [r2, #12]
; THUMBV6-NEXT: adcs r5, r5
``````````
</details>
https://github.com/llvm/llvm-project/pull/168780
More information about the llvm-commits
mailing list