[llvm] GlobalISel lane masks merging (PR #73337)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 14 21:47:52 PST 2023
================
@@ -96,30 +102,41 @@ define amdgpu_cs void @loop_with_1break(ptr addrspace(1) %x, i32 %x.size, ptr ad
; GFX10-NEXT: v_mov_b32_e32 v4, s1
; GFX10-NEXT: v_mov_b32_e32 v3, s0
; GFX10-NEXT: v_mov_b32_e32 v5, s4
+; GFX10-NEXT: ; implicit-def: $sgpr0
+; GFX10-NEXT: ; implicit-def: $sgpr1
; GFX10-NEXT: s_branch .LBB2_3
; GFX10-NEXT: .LBB2_1: ; %loop.body
; GFX10-NEXT: ; in Loop: Header=BB2_3 Depth=1
; GFX10-NEXT: v_add_co_u32 v6, vcc_lo, v0, v6
; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, v1, v7, vcc_lo
; GFX10-NEXT: v_add_nc_u32_e32 v9, 1, v5
-; GFX10-NEXT: v_cmp_lt_u32_e64 s0, v5, v2
-; GFX10-NEXT: s_mov_b32 s1, 0
+; GFX10-NEXT: v_cmp_lt_u32_e32 vcc_lo, v5, v2
+; GFX10-NEXT: s_andn2_b32 s1, s1, exec_lo
; GFX10-NEXT: global_load_dword v8, v[6:7], off
+; GFX10-NEXT: s_mov_b32 s5, 0
; GFX10-NEXT: v_mov_b32_e32 v5, v9
+; GFX10-NEXT: s_and_b32 s6, exec_lo, vcc_lo
+; GFX10-NEXT: s_or_b32 s1, s1, s6
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_add_nc_u32_e32 v8, 1, v8
; GFX10-NEXT: global_store_dword v[6:7], v8, off
; GFX10-NEXT: .LBB2_2: ; %Flow
; GFX10-NEXT: ; in Loop: Header=BB2_3 Depth=1
-; GFX10-NEXT: s_and_b32 s0, exec_lo, s0
-; GFX10-NEXT: s_or_b32 s4, s0, s4
-; GFX10-NEXT: s_and_b32 s0, 1, s1
-; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
+; GFX10-NEXT: s_and_b32 s5, 1, s5
+; GFX10-NEXT: s_and_b32 s6, exec_lo, s1
+; GFX10-NEXT: v_cmp_ne_u32_e64 s5, 0, s5
+; GFX10-NEXT: s_or_b32 s4, s6, s4
+; GFX10-NEXT: s_andn2_b32 s0, s0, exec_lo
+; GFX10-NEXT: s_and_b32 s5, exec_lo, s5
+; GFX10-NEXT: s_or_b32 s0, s0, s5
----------------
ruiling wrote:
Why does the change regress code generation here?
https://github.com/llvm/llvm-project/pull/73337
More information about the llvm-commits
mailing list