[llvm] [clang] [clang-tools-extra] [AMDGPU][GFX12] Add 16 bit atomic fadd instructions (PR #75917)
Matt Arsenault via cfe-commits
cfe-commits at lists.llvm.org
Mon Jan 15 08:13:15 PST 2024
================
@@ -27,34 +27,23 @@ main_body:
ret float %out0
}
-define amdgpu_ps float @atomic_pk_add_bf16_1d_v2(<8 x i32> inreg %rsrc, <2 x i16> %data, i32 %s) {
+define amdgpu_ps float @atomic_pk_add_bf16_1d_v2(<8 x i32> inreg %rsrc, <2 x bfloat> %data, i32 %s) {
; GFX12-LABEL: atomic_pk_add_bf16_1d_v2:
; GFX12: ; %bb.0: ; %main_body
; GFX12-NEXT: image_atomic_pk_add_bf16 v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
+; GFX12-NEXT: v_mov_b32_e32 v1, 0
+; GFX12-NEXT: v_mov_b32_e32 v2, 0
; GFX12-NEXT: s_waitcnt vmcnt(0)
+; GFX12-NEXT: flat_store_b32 v[1:2], v0
+; GFX12-NEXT: v_mov_b32_e32 v0, 1.0
+; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: ; return to shader part epilog
main_body:
- %out = call <2 x i16> @llvm.amdgcn.image.atomic.pk.add.bf16.1d.v2i16.v2i16(<2 x i16> %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
- %out_i32 = bitcast <2 x i16> %out to i32
- %out_float = bitcast i32 %out_i32 to float
- ret float %out_float
-}
-
-define amdgpu_ps float @atomic_pk_add_bf16_1d_v4(<8 x i32> inreg %rsrc, <4 x i16> %data, i32 %s) {
-; GFX12-LABEL: atomic_pk_add_bf16_1d_v4:
-; GFX12: ; %bb.0: ; %main_body
-; GFX12-NEXT: image_atomic_pk_add_bf16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
-; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: ; return to shader part epilog
-main_body:
- %out = call <4 x i16> @llvm.amdgcn.image.atomic.pk.add.bf16.1d.v4i16.v4i16(<4 x i16> %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
----------------
arsenm wrote:
Is it #77448?
https://github.com/llvm/llvm-project/pull/75917
More information about the cfe-commits
mailing list