[llvm] Add divmod functions to RuntimeLibcalls.def (PR #68462)
Angelo Bulfone via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 6 19:28:24 PDT 2023
https://github.com/boomshroom created https://github.com/llvm/llvm-project/pull/68462
Partially addresses #46350 by adding `__{,u}divmod{s,d,t}i4` entries to `RuntimeLibcalls.def`. These functions exist in `compiler-rt` and appear to exist in `libgcc`, so it should be safe to emit calls to them. `__{,u}divmod{q,h}i4` are not included due to not having generic implementations in `compiler-rt` (though they are enabled for specific targets for which they have optimized implementations).
`__{,u}divmodti4` do not appear to be built for 32-bit platforms, though neither are `__{,u}{div,mod}ti3`, but they still have `RuntimeLibcalls.def` entries despite that.
>From e3a64b04f024a314ca807e0dfe11e4c6beddf12c Mon Sep 17 00:00:00 2001
From: Angelo Bulfone <mbulfone at gmail.com>
Date: Fri, 6 Oct 2023 18:58:28 -0700
Subject: [PATCH] Add divmod functions to RuntimeLibcalls.def
---
llvm/include/llvm/IR/RuntimeLibcalls.def | 12 +-
llvm/test/CodeGen/RISCV/srem-lkk.ll | 40 ++----
llvm/test/CodeGen/RISCV/srem-vector-lkk.ll | 148 ++++++++-------------
llvm/test/CodeGen/RISCV/urem-lkk.ll | 40 ++----
llvm/test/CodeGen/RISCV/urem-vector-lkk.ll | 148 ++++++++-------------
5 files changed, 148 insertions(+), 240 deletions(-)
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def
index 6ec98e278988428..7a461e86820aa1c 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.def
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.def
@@ -72,14 +72,14 @@ HANDLE_LIBCALL(UREM_I64, "__umoddi3")
HANDLE_LIBCALL(UREM_I128, "__umodti3")
HANDLE_LIBCALL(SDIVREM_I8, nullptr)
HANDLE_LIBCALL(SDIVREM_I16, nullptr)
-HANDLE_LIBCALL(SDIVREM_I32, nullptr)
-HANDLE_LIBCALL(SDIVREM_I64, nullptr)
-HANDLE_LIBCALL(SDIVREM_I128, nullptr)
+HANDLE_LIBCALL(SDIVREM_I32, "__divmodsi4")
+HANDLE_LIBCALL(SDIVREM_I64, "__divmoddi4")
+HANDLE_LIBCALL(SDIVREM_I128, "__divmodti4")
HANDLE_LIBCALL(UDIVREM_I8, nullptr)
HANDLE_LIBCALL(UDIVREM_I16, nullptr)
-HANDLE_LIBCALL(UDIVREM_I32, nullptr)
-HANDLE_LIBCALL(UDIVREM_I64, nullptr)
-HANDLE_LIBCALL(UDIVREM_I128, nullptr)
+HANDLE_LIBCALL(UDIVREM_I32, "__udivmodsi4")
+HANDLE_LIBCALL(UDIVREM_I64, "__udivmoddi4")
+HANDLE_LIBCALL(UDIVREM_I128, "__udivmodti4")
HANDLE_LIBCALL(NEG_I32, "__negsi2")
HANDLE_LIBCALL(NEG_I64, "__negdi2")
HANDLE_LIBCALL(CTLZ_I32, "__clzsi2")
diff --git a/llvm/test/CodeGen/RISCV/srem-lkk.ll b/llvm/test/CodeGen/RISCV/srem-lkk.ll
index 24e740fd143d131..a933f7a45eb9c10 100644
--- a/llvm/test/CodeGen/RISCV/srem-lkk.ll
+++ b/llvm/test/CodeGen/RISCV/srem-lkk.ll
@@ -213,19 +213,12 @@ define i32 @combine_srem_sdiv(i32 %x) nounwind {
; RV32I: # %bb.0:
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: li a1, 95
-; RV32I-NEXT: call __modsi3 at plt
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: li a1, 95
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __divsi3 at plt
-; RV32I-NEXT: add a0, s1, a0
+; RV32I-NEXT: addi a2, sp, 8
+; RV32I-NEXT: call __divmodsi4 at plt
+; RV32I-NEXT: lw a1, 8(sp)
+; RV32I-NEXT: add a0, a1, a0
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
@@ -246,23 +239,16 @@ define i32 @combine_srem_sdiv(i32 %x) nounwind {
;
; RV64I-LABEL: combine_srem_sdiv:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sext.w s0, a0
-; RV64I-NEXT: li a1, 95
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __moddi3 at plt
-; RV64I-NEXT: mv s1, a0
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: li a1, 95
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __divdi3 at plt
-; RV64I-NEXT: addw a0, s1, a0
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: mv a2, sp
+; RV64I-NEXT: call __divmoddi4 at plt
+; RV64I-NEXT: ld a1, 0(sp)
+; RV64I-NEXT: addw a0, a1, a0
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV64IM-LABEL: combine_srem_sdiv:
diff --git a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll
index b5f1efa4b160ba9..08c167a80255c65 100644
--- a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll
+++ b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll
@@ -382,64 +382,48 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) nounwind {
; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s4, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s8, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: lh s1, 0(a1)
-; RV32I-NEXT: lh s2, 4(a1)
-; RV32I-NEXT: lh s3, 8(a1)
-; RV32I-NEXT: lh s4, 12(a1)
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: li a1, 95
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __modsi3 at plt
-; RV32I-NEXT: mv s5, a0
-; RV32I-NEXT: li a1, 95
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __modsi3 at plt
-; RV32I-NEXT: mv s6, a0
-; RV32I-NEXT: li a1, 95
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __modsi3 at plt
-; RV32I-NEXT: mv s7, a0
-; RV32I-NEXT: li a1, 95
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __modsi3 at plt
-; RV32I-NEXT: mv s8, a0
+; RV32I-NEXT: lh s0, 0(a1)
+; RV32I-NEXT: lh s1, 4(a1)
+; RV32I-NEXT: lh s2, 8(a1)
+; RV32I-NEXT: lh a3, 12(a1)
+; RV32I-NEXT: mv s3, a0
; RV32I-NEXT: li a1, 95
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __divsi3 at plt
+; RV32I-NEXT: addi a2, sp, 12
+; RV32I-NEXT: mv a0, a3
+; RV32I-NEXT: call __divmodsi4 at plt
; RV32I-NEXT: mv s4, a0
; RV32I-NEXT: li a1, 95
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __divsi3 at plt
-; RV32I-NEXT: mv s3, a0
-; RV32I-NEXT: li a1, 95
+; RV32I-NEXT: addi a2, sp, 16
; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __divsi3 at plt
+; RV32I-NEXT: call __divmodsi4 at plt
; RV32I-NEXT: mv s2, a0
; RV32I-NEXT: li a1, 95
+; RV32I-NEXT: addi a2, sp, 20
; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __divsi3 at plt
-; RV32I-NEXT: add a0, s8, a0
-; RV32I-NEXT: add s2, s7, s2
-; RV32I-NEXT: add s3, s6, s3
-; RV32I-NEXT: add s4, s5, s4
-; RV32I-NEXT: sh s4, 6(s0)
-; RV32I-NEXT: sh s3, 4(s0)
-; RV32I-NEXT: sh s2, 2(s0)
-; RV32I-NEXT: sh a0, 0(s0)
+; RV32I-NEXT: call __divmodsi4 at plt
+; RV32I-NEXT: mv s1, a0
+; RV32I-NEXT: li a1, 95
+; RV32I-NEXT: addi a2, sp, 8
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: call __divmodsi4 at plt
+; RV32I-NEXT: lw a1, 8(sp)
+; RV32I-NEXT: lw a2, 20(sp)
+; RV32I-NEXT: lw a3, 16(sp)
+; RV32I-NEXT: lw a4, 12(sp)
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: add a2, a2, s1
+; RV32I-NEXT: add a3, a3, s2
+; RV32I-NEXT: add a4, a4, s4
+; RV32I-NEXT: sh a4, 6(s3)
+; RV32I-NEXT: sh a3, 4(s3)
+; RV32I-NEXT: sh a2, 2(s3)
+; RV32I-NEXT: sh a0, 0(s3)
; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s3, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s4, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s8, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 48
; RV32I-NEXT: ret
;
@@ -499,64 +483,48 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) nounwind {
; RV64I-NEXT: sd s2, 48(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s3, 40(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s4, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s5, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s6, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s7, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s8, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT: lh s1, 0(a1)
-; RV64I-NEXT: lh s2, 8(a1)
-; RV64I-NEXT: lh s3, 16(a1)
-; RV64I-NEXT: lh s4, 24(a1)
-; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: li a1, 95
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __moddi3 at plt
-; RV64I-NEXT: mv s5, a0
-; RV64I-NEXT: li a1, 95
-; RV64I-NEXT: mv a0, s3
-; RV64I-NEXT: call __moddi3 at plt
-; RV64I-NEXT: mv s6, a0
-; RV64I-NEXT: li a1, 95
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __moddi3 at plt
-; RV64I-NEXT: mv s7, a0
-; RV64I-NEXT: li a1, 95
-; RV64I-NEXT: mv a0, s1
-; RV64I-NEXT: call __moddi3 at plt
-; RV64I-NEXT: mv s8, a0
+; RV64I-NEXT: lh s0, 0(a1)
+; RV64I-NEXT: lh s1, 8(a1)
+; RV64I-NEXT: lh s2, 16(a1)
+; RV64I-NEXT: lh a3, 24(a1)
+; RV64I-NEXT: mv s3, a0
; RV64I-NEXT: li a1, 95
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __divdi3 at plt
+; RV64I-NEXT: addi a2, sp, 8
+; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: call __divmoddi4 at plt
; RV64I-NEXT: mv s4, a0
; RV64I-NEXT: li a1, 95
-; RV64I-NEXT: mv a0, s3
-; RV64I-NEXT: call __divdi3 at plt
-; RV64I-NEXT: mv s3, a0
-; RV64I-NEXT: li a1, 95
+; RV64I-NEXT: addi a2, sp, 16
; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __divdi3 at plt
+; RV64I-NEXT: call __divmoddi4 at plt
; RV64I-NEXT: mv s2, a0
; RV64I-NEXT: li a1, 95
+; RV64I-NEXT: addi a2, sp, 24
; RV64I-NEXT: mv a0, s1
-; RV64I-NEXT: call __divdi3 at plt
-; RV64I-NEXT: add a0, s8, a0
-; RV64I-NEXT: add s2, s7, s2
-; RV64I-NEXT: add s3, s6, s3
-; RV64I-NEXT: add s4, s5, s4
-; RV64I-NEXT: sh s4, 6(s0)
-; RV64I-NEXT: sh s3, 4(s0)
-; RV64I-NEXT: sh s2, 2(s0)
-; RV64I-NEXT: sh a0, 0(s0)
+; RV64I-NEXT: call __divmoddi4 at plt
+; RV64I-NEXT: mv s1, a0
+; RV64I-NEXT: li a1, 95
+; RV64I-NEXT: mv a2, sp
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: call __divmoddi4 at plt
+; RV64I-NEXT: ld a1, 0(sp)
+; RV64I-NEXT: ld a2, 24(sp)
+; RV64I-NEXT: ld a3, 16(sp)
+; RV64I-NEXT: ld a4, 8(sp)
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: add a2, a2, s1
+; RV64I-NEXT: add a3, a3, s2
+; RV64I-NEXT: add a4, a4, s4
+; RV64I-NEXT: sh a4, 6(s3)
+; RV64I-NEXT: sh a3, 4(s3)
+; RV64I-NEXT: sh a2, 2(s3)
+; RV64I-NEXT: sh a0, 0(s3)
; RV64I-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 56(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s2, 48(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s3, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s4, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s5, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s6, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s7, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s8, 0(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 80
; RV64I-NEXT: ret
;
diff --git a/llvm/test/CodeGen/RISCV/urem-lkk.ll b/llvm/test/CodeGen/RISCV/urem-lkk.ll
index 3d181c3a30d0947..b7f20274492868e 100644
--- a/llvm/test/CodeGen/RISCV/urem-lkk.ll
+++ b/llvm/test/CodeGen/RISCV/urem-lkk.ll
@@ -113,19 +113,12 @@ define i32 @combine_urem_udiv(i32 %x) nounwind {
; RV32I: # %bb.0:
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: li a1, 95
-; RV32I-NEXT: call __umodsi3 at plt
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: li a1, 95
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __udivsi3 at plt
-; RV32I-NEXT: add a0, s1, a0
+; RV32I-NEXT: addi a2, sp, 8
+; RV32I-NEXT: call __udivmodsi4 at plt
+; RV32I-NEXT: lw a1, 8(sp)
+; RV32I-NEXT: add a0, a1, a0
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
@@ -146,24 +139,17 @@ define i32 @combine_urem_udiv(i32 %x) nounwind {
;
; RV64I-LABEL: combine_urem_udiv:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: srli s0, a0, 32
-; RV64I-NEXT: li a1, 95
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __umoddi3 at plt
-; RV64I-NEXT: mv s1, a0
+; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: li a1, 95
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __udivdi3 at plt
-; RV64I-NEXT: add a0, s1, a0
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: mv a2, sp
+; RV64I-NEXT: call __udivmoddi4 at plt
+; RV64I-NEXT: ld a1, 0(sp)
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV64IM-LABEL: combine_urem_udiv:
diff --git a/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll
index a38ae17f19df385..dac99ae96414079 100644
--- a/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll
+++ b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll
@@ -323,64 +323,48 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) nounwind {
; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s4, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s8, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: lhu s1, 0(a1)
-; RV32I-NEXT: lhu s2, 4(a1)
-; RV32I-NEXT: lhu s3, 8(a1)
-; RV32I-NEXT: lhu s4, 12(a1)
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: li a1, 95
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __umodsi3 at plt
-; RV32I-NEXT: mv s5, a0
-; RV32I-NEXT: li a1, 95
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __umodsi3 at plt
-; RV32I-NEXT: mv s6, a0
-; RV32I-NEXT: li a1, 95
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __umodsi3 at plt
-; RV32I-NEXT: mv s7, a0
-; RV32I-NEXT: li a1, 95
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __umodsi3 at plt
-; RV32I-NEXT: mv s8, a0
+; RV32I-NEXT: lhu s0, 0(a1)
+; RV32I-NEXT: lhu s1, 4(a1)
+; RV32I-NEXT: lhu s2, 8(a1)
+; RV32I-NEXT: lhu a3, 12(a1)
+; RV32I-NEXT: mv s3, a0
; RV32I-NEXT: li a1, 95
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: call __udivsi3 at plt
+; RV32I-NEXT: addi a2, sp, 12
+; RV32I-NEXT: mv a0, a3
+; RV32I-NEXT: call __udivmodsi4 at plt
; RV32I-NEXT: mv s4, a0
; RV32I-NEXT: li a1, 95
-; RV32I-NEXT: mv a0, s3
-; RV32I-NEXT: call __udivsi3 at plt
-; RV32I-NEXT: mv s3, a0
-; RV32I-NEXT: li a1, 95
+; RV32I-NEXT: addi a2, sp, 16
; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: call __udivsi3 at plt
+; RV32I-NEXT: call __udivmodsi4 at plt
; RV32I-NEXT: mv s2, a0
; RV32I-NEXT: li a1, 95
+; RV32I-NEXT: addi a2, sp, 20
; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: call __udivsi3 at plt
-; RV32I-NEXT: add a0, s8, a0
-; RV32I-NEXT: add s2, s7, s2
-; RV32I-NEXT: add s3, s6, s3
-; RV32I-NEXT: add s4, s5, s4
-; RV32I-NEXT: sh s4, 6(s0)
-; RV32I-NEXT: sh s3, 4(s0)
-; RV32I-NEXT: sh s2, 2(s0)
-; RV32I-NEXT: sh a0, 0(s0)
+; RV32I-NEXT: call __udivmodsi4 at plt
+; RV32I-NEXT: mv s1, a0
+; RV32I-NEXT: li a1, 95
+; RV32I-NEXT: addi a2, sp, 8
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: call __udivmodsi4 at plt
+; RV32I-NEXT: lw a1, 8(sp)
+; RV32I-NEXT: lw a2, 20(sp)
+; RV32I-NEXT: lw a3, 16(sp)
+; RV32I-NEXT: lw a4, 12(sp)
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: add a2, a2, s1
+; RV32I-NEXT: add a3, a3, s2
+; RV32I-NEXT: add a4, a4, s4
+; RV32I-NEXT: sh a4, 6(s3)
+; RV32I-NEXT: sh a3, 4(s3)
+; RV32I-NEXT: sh a2, 2(s3)
+; RV32I-NEXT: sh a0, 0(s3)
; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s3, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s4, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s8, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 48
; RV32I-NEXT: ret
;
@@ -424,64 +408,48 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) nounwind {
; RV64I-NEXT: sd s2, 48(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s3, 40(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s4, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s5, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s6, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s7, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s8, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT: lhu s1, 0(a1)
-; RV64I-NEXT: lhu s2, 8(a1)
-; RV64I-NEXT: lhu s3, 16(a1)
-; RV64I-NEXT: lhu s4, 24(a1)
-; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: li a1, 95
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __umoddi3 at plt
-; RV64I-NEXT: mv s5, a0
-; RV64I-NEXT: li a1, 95
-; RV64I-NEXT: mv a0, s3
-; RV64I-NEXT: call __umoddi3 at plt
-; RV64I-NEXT: mv s6, a0
-; RV64I-NEXT: li a1, 95
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __umoddi3 at plt
-; RV64I-NEXT: mv s7, a0
-; RV64I-NEXT: li a1, 95
-; RV64I-NEXT: mv a0, s1
-; RV64I-NEXT: call __umoddi3 at plt
-; RV64I-NEXT: mv s8, a0
+; RV64I-NEXT: lhu s0, 0(a1)
+; RV64I-NEXT: lhu s1, 8(a1)
+; RV64I-NEXT: lhu s2, 16(a1)
+; RV64I-NEXT: lhu a3, 24(a1)
+; RV64I-NEXT: mv s3, a0
; RV64I-NEXT: li a1, 95
-; RV64I-NEXT: mv a0, s4
-; RV64I-NEXT: call __udivdi3 at plt
+; RV64I-NEXT: addi a2, sp, 8
+; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: call __udivmoddi4 at plt
; RV64I-NEXT: mv s4, a0
; RV64I-NEXT: li a1, 95
-; RV64I-NEXT: mv a0, s3
-; RV64I-NEXT: call __udivdi3 at plt
-; RV64I-NEXT: mv s3, a0
-; RV64I-NEXT: li a1, 95
+; RV64I-NEXT: addi a2, sp, 16
; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: call __udivdi3 at plt
+; RV64I-NEXT: call __udivmoddi4 at plt
; RV64I-NEXT: mv s2, a0
; RV64I-NEXT: li a1, 95
+; RV64I-NEXT: addi a2, sp, 24
; RV64I-NEXT: mv a0, s1
-; RV64I-NEXT: call __udivdi3 at plt
-; RV64I-NEXT: add a0, s8, a0
-; RV64I-NEXT: add s2, s7, s2
-; RV64I-NEXT: add s3, s6, s3
-; RV64I-NEXT: add s4, s5, s4
-; RV64I-NEXT: sh s4, 6(s0)
-; RV64I-NEXT: sh s3, 4(s0)
-; RV64I-NEXT: sh s2, 2(s0)
-; RV64I-NEXT: sh a0, 0(s0)
+; RV64I-NEXT: call __udivmoddi4 at plt
+; RV64I-NEXT: mv s1, a0
+; RV64I-NEXT: li a1, 95
+; RV64I-NEXT: mv a2, sp
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: call __udivmoddi4 at plt
+; RV64I-NEXT: ld a1, 0(sp)
+; RV64I-NEXT: ld a2, 24(sp)
+; RV64I-NEXT: ld a3, 16(sp)
+; RV64I-NEXT: ld a4, 8(sp)
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: add a2, a2, s1
+; RV64I-NEXT: add a3, a3, s2
+; RV64I-NEXT: add a4, a4, s4
+; RV64I-NEXT: sh a4, 6(s3)
+; RV64I-NEXT: sh a3, 4(s3)
+; RV64I-NEXT: sh a2, 2(s3)
+; RV64I-NEXT: sh a0, 0(s3)
; RV64I-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 56(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s2, 48(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s3, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s4, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s5, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s6, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s7, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s8, 0(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 80
; RV64I-NEXT: ret
;
More information about the llvm-commits
mailing list