[llvm] [GlobalISel] Handle div-by-pow2 (PR #83155)
Shilei Tian via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 27 09:10:19 PST 2024
shiltian wrote:
I'm still working on it, but I noticed the code generated from GlobalISel is much larger than the SelectionSAG's.
```
define i128 @v_sdiv_i128_v_pow2k(i128 %lhs) {
%div = sdiv i128 %lhs, 8589934592
ret i128 %div
}
```
GlobalISel:
```
.text
.section .AMDGPU.config,"", at progbits
.long 47176
.long 0
.long 47180
.long 0
.long 47200
.long 0
.long 4
.long 0
.long 8
.long 0
.text
.globl v_sdiv_i128_v_pow2k ; -- Begin function v_sdiv_i128_v_pow2k
.p2align 2
.type v_sdiv_i128_v_pow2k, at function
v_sdiv_i128_v_pow2k: ; @v_sdiv_i128_v_pow2k
; %bb.0:
s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
v_ashrrev_i32_e32 v4, 31, v3
s_mov_b32 s4, 0
s_mov_b32 s5, 2
s_ff1_i32_b64 s5, s[4:5]
v_mov_b32_e32 v5, v4
s_sub_i32 s4, 0x80, s5
s_sub_i32 s7, 64, s4
s_sub_i32 s6, s4, 64
s_cmp_lt_u32 s4, 64
v_lshlrev_b64 v[6:7], s4, v[4:5]
v_lshrrev_b64 v[8:9], s7, v[4:5]
s_cselect_b32 s8, 1, 0
s_cmp_eq_u32 s4, 0
v_lshlrev_b64 v[10:11], s6, v[4:5]
s_cselect_b32 s4, 1, 0
s_and_b32 s7, 1, s8
v_or_b32_e32 v5, v8, v6
v_cmp_ne_u32_e64 vcc_lo, 0, s7
v_or_b32_e32 v8, v9, v7
s_and_b32 s4, 1, s4
s_sub_i32 s6, 64, s5
v_cmp_ne_u32_e64 s4, 0, s4
v_cndmask_b32_e32 v5, v10, v5, vcc_lo
v_cndmask_b32_e32 v6, 0, v6, vcc_lo
v_cndmask_b32_e32 v8, v11, v8, vcc_lo
v_cndmask_b32_e32 v7, 0, v7, vcc_lo
v_cndmask_b32_e64 v5, v5, v4, s4
v_add_co_u32 v0, vcc_lo, v0, v6
v_cndmask_b32_e64 v4, v8, v4, s4
v_add_co_ci_u32_e32 v1, vcc_lo, v1, v7, vcc_lo
v_add_co_ci_u32_e32 v2, vcc_lo, v2, v5, vcc_lo
v_add_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo
s_sub_i32 s4, s5, 64
v_lshrrev_b64 v[4:5], s5, v[0:1]
s_cmp_lt_u32 s5, 64
v_lshlrev_b64 v[6:7], s6, v[2:3]
s_cselect_b32 s6, 1, 0
s_cmp_eq_u32 s5, 0
v_ashrrev_i64 v[8:9], s4, v[2:3]
s_cselect_b32 s7, 1, 0
s_and_b32 s4, 1, s6
v_or_b32_e32 v6, v4, v6
v_or_b32_e32 v7, v5, v7
v_cmp_ne_u32_e64 vcc_lo, 0, s4
s_and_b32 s4, 1, s7
v_ashrrev_i64 v[4:5], s5, v[2:3]
v_cmp_ne_u32_e64 s4, 0, s4
v_ashrrev_i32_e32 v3, 31, v3
v_cndmask_b32_e32 v2, v8, v6, vcc_lo
v_cndmask_b32_e32 v6, v9, v7, vcc_lo
v_cndmask_b32_e64 v0, v2, v0, s4
v_cndmask_b32_e64 v1, v6, v1, s4
v_cndmask_b32_e32 v2, v3, v4, vcc_lo
v_cndmask_b32_e32 v3, v3, v5, vcc_lo
s_setpc_b64 s[30:31]
.Lfunc_end0:
.size v_sdiv_i128_v_pow2k, .Lfunc_end0-v_sdiv_i128_v_pow2k
; -- End function
.section .AMDGPU.csdata,"", at progbits
; Function info:
; codeLenInByte = 292
; NumSgprs: 34
; NumVgprs: 12
; ScratchSize: 0
; MemoryBound: 0
.section ".note.GNU-stack","", at progbits
.amd_amdgpu_isa "amdgcn-unknown-linux-gnu-gfx1030"
```
SelectionDAG:
```
.text
.section .AMDGPU.config,"", at progbits
.long 47176
.long 0
.long 47180
.long 0
.long 47200
.long 0
.long 4
.long 0
.long 8
.long 0
.text
.globl v_sdiv_i128_v_pow2k ; -- Begin function v_sdiv_i128_v_pow2k
.p2align 2
.type v_sdiv_i128_v_pow2k, at function
v_sdiv_i128_v_pow2k: ; @v_sdiv_i128_v_pow2k
; %bb.0:
s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
v_ashrrev_i32_e32 v4, 31, v3
v_mov_b32_e32 v5, v4
v_lshrrev_b64 v[4:5], 31, v[4:5]
v_add_co_u32 v0, vcc_lo, v0, v4
v_add_co_ci_u32_e32 v4, vcc_lo, v1, v5, vcc_lo
v_add_co_ci_u32_e32 v2, vcc_lo, 0, v2, vcc_lo
v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo
v_lshrrev_b32_e32 v4, 1, v4
v_lshlrev_b64 v[0:1], 31, v[2:3]
v_ashrrev_i64 v[2:3], 33, v[2:3]
v_or_b32_e32 v0, v4, v0
s_setpc_b64 s[30:31]
.Lfunc_end0:
.size v_sdiv_i128_v_pow2k, .Lfunc_end0-v_sdiv_i128_v_pow2k
; -- End function
.section .AMDGPU.csdata,"", at progbits
; Function info:
; codeLenInByte = 68
; NumSgprs: 34
; NumVgprs: 6
; ScratchSize: 0
; MemoryBound: 0
.section ".note.GNU-stack","", at progbits
.amd_amdgpu_isa "amdgcn-unknown-linux-gnu-gfx1030"
```
What could cause this? @arsenm
https://github.com/llvm/llvm-project/pull/83155
More information about the llvm-commits
mailing list