[llvm] [AMDGPU] Fix kernarg preloading crash with some types and alignments (PR #91625)
Austin Kerbow via llvm-commits
llvm-commits at lists.llvm.org
Thu May 16 12:40:22 PDT 2024
================
@@ -2317,3 +1408,804 @@ define amdgpu_kernel void @f64_kernel_preload_arg(ptr addrspace(1) %out, double
store double %in, ptr addrspace(1) %out
ret void
}
+
+define amdgpu_kernel void @half_kernel_preload_arg(ptr addrspace(1) %out, half %in) {
+; GFX940-NO-PRELOAD-LABEL: half_kernel_preload_arg:
+; GFX940-NO-PRELOAD: ; %bb.0:
+; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8
+; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0
+; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s4
+; GFX940-NO-PRELOAD-NEXT: global_store_short v0, v1, s[2:3] sc0 sc1
+; GFX940-NO-PRELOAD-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-2-LABEL: half_kernel_preload_arg:
+; GFX940-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
+; GFX940-PRELOAD-2-NEXT: ; %bb.0:
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s4
+; GFX940-PRELOAD-2-NEXT: global_store_short v0, v1, s[2:3] sc0 sc1
+; GFX940-PRELOAD-2-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-8-LABEL: half_kernel_preload_arg:
+; GFX940-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
+; GFX940-PRELOAD-8-NEXT: ; %bb.0:
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s4
+; GFX940-PRELOAD-8-NEXT: global_store_short v0, v1, s[2:3] sc0 sc1
+; GFX940-PRELOAD-8-NEXT: s_endpgm
+;
+; GFX90a-NO-PRELOAD-LABEL: half_kernel_preload_arg:
+; GFX90a-NO-PRELOAD: ; %bb.0:
+; GFX90a-NO-PRELOAD-NEXT: s_load_dword s2, s[4:5], 0x8
+; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s2
+; GFX90a-NO-PRELOAD-NEXT: global_store_short v0, v1, s[0:1]
+; GFX90a-NO-PRELOAD-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-2-LABEL: half_kernel_preload_arg:
+; GFX90a-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
+; GFX90a-PRELOAD-2-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s8
+; GFX90a-PRELOAD-2-NEXT: global_store_short v0, v1, s[6:7]
+; GFX90a-PRELOAD-2-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-8-LABEL: half_kernel_preload_arg:
+; GFX90a-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
+; GFX90a-PRELOAD-8-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s8
+; GFX90a-PRELOAD-8-NEXT: global_store_short v0, v1, s[6:7]
+; GFX90a-PRELOAD-8-NEXT: s_endpgm
+ store half %in, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @bfloat_kernel_preload_arg(ptr addrspace(1) %out, bfloat %in) {
----------------
kerbowa wrote:
There were already tests for vectors of half/i16, it was only raw half type when dword aligned that was crashing.
https://github.com/llvm/llvm-project/pull/91625
More information about the llvm-commits
mailing list