[llvm] [AMDGPU] Fix kernarg preloading crash with some types and alignments (PR #91625)

Austin Kerbow via llvm-commits llvm-commits at lists.llvm.org
Thu May 16 12:40:22 PDT 2024


================
@@ -2317,3 +1408,804 @@ define amdgpu_kernel void @f64_kernel_preload_arg(ptr addrspace(1) %out, double
   store double %in, ptr addrspace(1) %out
   ret void
 }
+
+define amdgpu_kernel void @half_kernel_preload_arg(ptr addrspace(1) %out, half %in) {
+; GFX940-NO-PRELOAD-LABEL: half_kernel_preload_arg:
+; GFX940-NO-PRELOAD:       ; %bb.0:
+; GFX940-NO-PRELOAD-NEXT:    s_load_dword s4, s[0:1], 0x8
+; GFX940-NO-PRELOAD-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; GFX940-NO-PRELOAD-NEXT:    v_mov_b32_e32 v0, 0
+; GFX940-NO-PRELOAD-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX940-NO-PRELOAD-NEXT:    v_mov_b32_e32 v1, s4
+; GFX940-NO-PRELOAD-NEXT:    global_store_short v0, v1, s[2:3] sc0 sc1
+; GFX940-NO-PRELOAD-NEXT:    s_endpgm
+;
+; GFX940-PRELOAD-2-LABEL: half_kernel_preload_arg:
+; GFX940-PRELOAD-2:         s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-2-NEXT:    .fill 63, 4, 0xbf800000 ; s_nop 0
+; GFX940-PRELOAD-2-NEXT:  ; %bb.0:
+; GFX940-PRELOAD-2-NEXT:    v_mov_b32_e32 v0, 0
+; GFX940-PRELOAD-2-NEXT:    v_mov_b32_e32 v1, s4
+; GFX940-PRELOAD-2-NEXT:    global_store_short v0, v1, s[2:3] sc0 sc1
+; GFX940-PRELOAD-2-NEXT:    s_endpgm
+;
+; GFX940-PRELOAD-8-LABEL: half_kernel_preload_arg:
+; GFX940-PRELOAD-8:         s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-8-NEXT:    .fill 63, 4, 0xbf800000 ; s_nop 0
+; GFX940-PRELOAD-8-NEXT:  ; %bb.0:
+; GFX940-PRELOAD-8-NEXT:    v_mov_b32_e32 v0, 0
+; GFX940-PRELOAD-8-NEXT:    v_mov_b32_e32 v1, s4
+; GFX940-PRELOAD-8-NEXT:    global_store_short v0, v1, s[2:3] sc0 sc1
+; GFX940-PRELOAD-8-NEXT:    s_endpgm
+;
+; GFX90a-NO-PRELOAD-LABEL: half_kernel_preload_arg:
+; GFX90a-NO-PRELOAD:       ; %bb.0:
+; GFX90a-NO-PRELOAD-NEXT:    s_load_dword s2, s[4:5], 0x8
+; GFX90a-NO-PRELOAD-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX90a-NO-PRELOAD-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90a-NO-PRELOAD-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX90a-NO-PRELOAD-NEXT:    v_mov_b32_e32 v1, s2
+; GFX90a-NO-PRELOAD-NEXT:    global_store_short v0, v1, s[0:1]
+; GFX90a-NO-PRELOAD-NEXT:    s_endpgm
+;
+; GFX90a-PRELOAD-2-LABEL: half_kernel_preload_arg:
+; GFX90a-PRELOAD-2:         s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-2-NEXT:    .fill 63, 4, 0xbf800000 ; s_nop 0
+; GFX90a-PRELOAD-2-NEXT:  ; %bb.0:
+; GFX90a-PRELOAD-2-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90a-PRELOAD-2-NEXT:    v_mov_b32_e32 v1, s8
+; GFX90a-PRELOAD-2-NEXT:    global_store_short v0, v1, s[6:7]
+; GFX90a-PRELOAD-2-NEXT:    s_endpgm
+;
+; GFX90a-PRELOAD-8-LABEL: half_kernel_preload_arg:
+; GFX90a-PRELOAD-8:         s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-8-NEXT:    .fill 63, 4, 0xbf800000 ; s_nop 0
+; GFX90a-PRELOAD-8-NEXT:  ; %bb.0:
+; GFX90a-PRELOAD-8-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90a-PRELOAD-8-NEXT:    v_mov_b32_e32 v1, s8
+; GFX90a-PRELOAD-8-NEXT:    global_store_short v0, v1, s[6:7]
+; GFX90a-PRELOAD-8-NEXT:    s_endpgm
+  store half %in, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @bfloat_kernel_preload_arg(ptr addrspace(1) %out, bfloat %in) {
----------------
kerbowa wrote:

There were already tests for vectors of half/i16, it was only raw half type when dword aligned that was crashing.

https://github.com/llvm/llvm-project/pull/91625


More information about the llvm-commits mailing list