[llvm-branch-commits] [llvm] [AMDGPU] DPP wave reduction for long types - 2 (PR #189225)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Apr 14 02:21:17 PDT 2026
================
@@ -1009,6 +1009,943 @@ entry:
ret void
}
+define void @divergent_value_dpp_i64(ptr addrspace(1) %out, i64 %in) {
+; GFX8DAGISEL-LABEL: divergent_value_dpp_i64:
+; GFX8DAGISEL: ; %bb.0: ; %entry
+; GFX8DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8DAGISEL-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8DAGISEL-NEXT: buffer_store_dword v4, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX8DAGISEL-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GFX8DAGISEL-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; GFX8DAGISEL-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; GFX8DAGISEL-NEXT: s_mov_b64 exec, s[4:5]
+; GFX8DAGISEL-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX8DAGISEL-NEXT: v_cndmask_b32_e64 v4, 0, v2, s[4:5]
+; GFX8DAGISEL-NEXT: v_cndmask_b32_e64 v5, 0, v3, s[4:5]
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v6, v4
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v7, v5
+; GFX8DAGISEL-NEXT: s_nop 0
+; GFX8DAGISEL-NEXT: v_mov_b32_dpp v6, v6 row_shr:1 row_mask:0xf bank_mask:0xf
+; GFX8DAGISEL-NEXT: v_mov_b32_dpp v7, v7 row_shr:1 row_mask:0xf bank_mask:0xf
+; GFX8DAGISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6
+; GFX8DAGISEL-NEXT: v_addc_u32_e32 v5, vcc, v5, v7, vcc
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v6, v4
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v7, v5
+; GFX8DAGISEL-NEXT: s_nop 0
+; GFX8DAGISEL-NEXT: v_mov_b32_dpp v6, v6 row_shr:2 row_mask:0xf bank_mask:0xf
+; GFX8DAGISEL-NEXT: v_mov_b32_dpp v7, v7 row_shr:2 row_mask:0xf bank_mask:0xf
----------------
easyonaadit wrote:
I didn't end up tracking it down completely yet, but this is being done by the two-address instruction pass. I'll have to look into it.
https://github.com/llvm/llvm-project/pull/189225
More information about the llvm-branch-commits
mailing list