[llvm] [AMDGPU][NFC] Fix preload-kernarg.ll test after attributor move (PR #98840)

via llvm-commits llvm-commits at lists.llvm.org
Sun Jul 14 15:00:27 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Austin Kerbow (kerbowa)

<details>
<summary>Changes</summary>

Update was to stale version of the test with missing functions and extra runlines that had been removed.

---

Patch is 200.38 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/98840.diff


1 Files Affected:

- (modified) llvm/test/CodeGen/AMDGPU/preload-kernargs.ll (+1721-1373) 


``````````diff
diff --git a/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll b/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll
index a547c258e3921..f5c097f010eb8 100644
--- a/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll
+++ b/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll
@@ -1,18 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940-NO-PRELOAD %s
-; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940-PRELOAD-1 %s
-; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=2 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940-PRELOAD-2 %s
-; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=4 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940-PRELOAD-4 %s
-; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=8 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940-PRELOAD-8 %s
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -passes=amdgpu-attributor < %s | llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -verify-machineinstrs | FileCheck -check-prefixes=GFX940-NO-PRELOAD %s
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -passes=amdgpu-attributor -amdgpu-kernarg-preload-count=2 < %s | llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -verify-machineinstrs | FileCheck -check-prefixes=GFX940-PRELOAD-2 %s
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -passes=amdgpu-attributor -amdgpu-kernarg-preload-count=8 < %s | llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -verify-machineinstrs | FileCheck -check-prefixes=GFX940-PRELOAD-8 %s
 
-; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90a-NO-PRELOAD %s
-; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90a-PRELOAD-1 %s
-; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=2 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90a-PRELOAD-2 %s
-; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=4 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90a-PRELOAD-4 %s
-; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=8 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90a-PRELOAD-8 %s
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -passes=amdgpu-attributor < %s | llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a -verify-machineinstrs | FileCheck -check-prefixes=GFX90a-NO-PRELOAD %s
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -passes=amdgpu-attributor -amdgpu-kernarg-preload-count=2 < %s | llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a -verify-machineinstrs | FileCheck -check-prefixes=GFX90a-PRELOAD-2 %s
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -passes=amdgpu-attributor -amdgpu-kernarg-preload-count=8 < %s | llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a -verify-machineinstrs | FileCheck -check-prefixes=GFX90a-PRELOAD-8 %s
 
-define amdgpu_kernel void @ptr1_i8(ptr addrspace(1) %out, i8 %arg0) #0 {
-; GFX940-NO-PRELOAD-LABEL: ptr1_i8:
+define amdgpu_kernel void @ptr1_i8_kernel_preload_arg(ptr addrspace(1) %out, i8 %arg0) {
+; GFX940-NO-PRELOAD-LABEL: ptr1_i8_kernel_preload_arg:
 ; GFX940-NO-PRELOAD:       ; %bb.0:
 ; GFX940-NO-PRELOAD-NEXT:    s_load_dword s4, s[0:1], 0x8
 ; GFX940-NO-PRELOAD-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
@@ -23,51 +19,27 @@ define amdgpu_kernel void @ptr1_i8(ptr addrspace(1) %out, i8 %arg0) #0 {
 ; GFX940-NO-PRELOAD-NEXT:    global_store_dword v0, v1, s[2:3] sc0 sc1
 ; GFX940-NO-PRELOAD-NEXT:    s_endpgm
 ;
-; GFX940-PRELOAD-1-LABEL: ptr1_i8:
-; GFX940-PRELOAD-1:       ; %bb.0:
-; GFX940-PRELOAD-1-NEXT:    s_load_dword s4, s[0:1], 0x8
-; GFX940-PRELOAD-1-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
-; GFX940-PRELOAD-1-NEXT:    v_mov_b32_e32 v0, 0
-; GFX940-PRELOAD-1-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-PRELOAD-1-NEXT:    s_and_b32 s0, s4, 0xff
-; GFX940-PRELOAD-1-NEXT:    v_mov_b32_e32 v1, s0
-; GFX940-PRELOAD-1-NEXT:    global_store_dword v0, v1, s[2:3] sc0 sc1
-; GFX940-PRELOAD-1-NEXT:    s_endpgm
-;
-; GFX940-PRELOAD-2-LABEL: ptr1_i8:
-; GFX940-PRELOAD-2:       ; %bb.0:
-; GFX940-PRELOAD-2-NEXT:    s_load_dword s4, s[0:1], 0x8
-; GFX940-PRELOAD-2-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
-; GFX940-PRELOAD-2-NEXT:    v_mov_b32_e32 v0, 0
-; GFX940-PRELOAD-2-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX940-PRELOAD-2-LABEL: ptr1_i8_kernel_preload_arg:
+; GFX940-PRELOAD-2:         s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-2-NEXT:    .fill 63, 4, 0xbf800000 ; s_nop 0
+; GFX940-PRELOAD-2-NEXT:  ; %bb.0:
 ; GFX940-PRELOAD-2-NEXT:    s_and_b32 s0, s4, 0xff
+; GFX940-PRELOAD-2-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX940-PRELOAD-2-NEXT:    v_mov_b32_e32 v1, s0
 ; GFX940-PRELOAD-2-NEXT:    global_store_dword v0, v1, s[2:3] sc0 sc1
 ; GFX940-PRELOAD-2-NEXT:    s_endpgm
 ;
-; GFX940-PRELOAD-4-LABEL: ptr1_i8:
-; GFX940-PRELOAD-4:       ; %bb.0:
-; GFX940-PRELOAD-4-NEXT:    s_load_dword s4, s[0:1], 0x8
-; GFX940-PRELOAD-4-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
-; GFX940-PRELOAD-4-NEXT:    v_mov_b32_e32 v0, 0
-; GFX940-PRELOAD-4-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-PRELOAD-4-NEXT:    s_and_b32 s0, s4, 0xff
-; GFX940-PRELOAD-4-NEXT:    v_mov_b32_e32 v1, s0
-; GFX940-PRELOAD-4-NEXT:    global_store_dword v0, v1, s[2:3] sc0 sc1
-; GFX940-PRELOAD-4-NEXT:    s_endpgm
-;
-; GFX940-PRELOAD-8-LABEL: ptr1_i8:
-; GFX940-PRELOAD-8:       ; %bb.0:
-; GFX940-PRELOAD-8-NEXT:    s_load_dword s4, s[0:1], 0x8
-; GFX940-PRELOAD-8-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
-; GFX940-PRELOAD-8-NEXT:    v_mov_b32_e32 v0, 0
-; GFX940-PRELOAD-8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX940-PRELOAD-8-LABEL: ptr1_i8_kernel_preload_arg:
+; GFX940-PRELOAD-8:         s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-8-NEXT:    .fill 63, 4, 0xbf800000 ; s_nop 0
+; GFX940-PRELOAD-8-NEXT:  ; %bb.0:
 ; GFX940-PRELOAD-8-NEXT:    s_and_b32 s0, s4, 0xff
+; GFX940-PRELOAD-8-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX940-PRELOAD-8-NEXT:    v_mov_b32_e32 v1, s0
 ; GFX940-PRELOAD-8-NEXT:    global_store_dword v0, v1, s[2:3] sc0 sc1
 ; GFX940-PRELOAD-8-NEXT:    s_endpgm
 ;
-; GFX90a-NO-PRELOAD-LABEL: ptr1_i8:
+; GFX90a-NO-PRELOAD-LABEL: ptr1_i8_kernel_preload_arg:
 ; GFX90a-NO-PRELOAD:       ; %bb.0:
 ; GFX90a-NO-PRELOAD-NEXT:    s_load_dword s2, s[4:5], 0x8
 ; GFX90a-NO-PRELOAD-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -78,56 +50,32 @@ define amdgpu_kernel void @ptr1_i8(ptr addrspace(1) %out, i8 %arg0) #0 {
 ; GFX90a-NO-PRELOAD-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX90a-NO-PRELOAD-NEXT:    s_endpgm
 ;
-; GFX90a-PRELOAD-1-LABEL: ptr1_i8:
-; GFX90a-PRELOAD-1:       ; %bb.0:
-; GFX90a-PRELOAD-1-NEXT:    s_load_dword s2, s[4:5], 0x8
-; GFX90a-PRELOAD-1-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; GFX90a-PRELOAD-1-NEXT:    v_mov_b32_e32 v0, 0
-; GFX90a-PRELOAD-1-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX90a-PRELOAD-1-NEXT:    s_and_b32 s2, s2, 0xff
-; GFX90a-PRELOAD-1-NEXT:    v_mov_b32_e32 v1, s2
-; GFX90a-PRELOAD-1-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX90a-PRELOAD-1-NEXT:    s_endpgm
-;
-; GFX90a-PRELOAD-2-LABEL: ptr1_i8:
-; GFX90a-PRELOAD-2:       ; %bb.0:
-; GFX90a-PRELOAD-2-NEXT:    s_load_dword s2, s[4:5], 0x8
-; GFX90a-PRELOAD-2-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX90a-PRELOAD-2-LABEL: ptr1_i8_kernel_preload_arg:
+; GFX90a-PRELOAD-2:         s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-2-NEXT:    .fill 63, 4, 0xbf800000 ; s_nop 0
+; GFX90a-PRELOAD-2-NEXT:  ; %bb.0:
+; GFX90a-PRELOAD-2-NEXT:    s_and_b32 s0, s8, 0xff
 ; GFX90a-PRELOAD-2-NEXT:    v_mov_b32_e32 v0, 0
-; GFX90a-PRELOAD-2-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX90a-PRELOAD-2-NEXT:    s_and_b32 s2, s2, 0xff
-; GFX90a-PRELOAD-2-NEXT:    v_mov_b32_e32 v1, s2
-; GFX90a-PRELOAD-2-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX90a-PRELOAD-2-NEXT:    v_mov_b32_e32 v1, s0
+; GFX90a-PRELOAD-2-NEXT:    global_store_dword v0, v1, s[6:7]
 ; GFX90a-PRELOAD-2-NEXT:    s_endpgm
 ;
-; GFX90a-PRELOAD-4-LABEL: ptr1_i8:
-; GFX90a-PRELOAD-4:       ; %bb.0:
-; GFX90a-PRELOAD-4-NEXT:    s_load_dword s2, s[4:5], 0x8
-; GFX90a-PRELOAD-4-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; GFX90a-PRELOAD-4-NEXT:    v_mov_b32_e32 v0, 0
-; GFX90a-PRELOAD-4-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX90a-PRELOAD-4-NEXT:    s_and_b32 s2, s2, 0xff
-; GFX90a-PRELOAD-4-NEXT:    v_mov_b32_e32 v1, s2
-; GFX90a-PRELOAD-4-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX90a-PRELOAD-4-NEXT:    s_endpgm
-;
-; GFX90a-PRELOAD-8-LABEL: ptr1_i8:
-; GFX90a-PRELOAD-8:       ; %bb.0:
-; GFX90a-PRELOAD-8-NEXT:    s_load_dword s2, s[4:5], 0x8
-; GFX90a-PRELOAD-8-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX90a-PRELOAD-8-LABEL: ptr1_i8_kernel_preload_arg:
+; GFX90a-PRELOAD-8:         s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-8-NEXT:    .fill 63, 4, 0xbf800000 ; s_nop 0
+; GFX90a-PRELOAD-8-NEXT:  ; %bb.0:
+; GFX90a-PRELOAD-8-NEXT:    s_and_b32 s0, s8, 0xff
 ; GFX90a-PRELOAD-8-NEXT:    v_mov_b32_e32 v0, 0
-; GFX90a-PRELOAD-8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX90a-PRELOAD-8-NEXT:    s_and_b32 s2, s2, 0xff
-; GFX90a-PRELOAD-8-NEXT:    v_mov_b32_e32 v1, s2
-; GFX90a-PRELOAD-8-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX90a-PRELOAD-8-NEXT:    v_mov_b32_e32 v1, s0
+; GFX90a-PRELOAD-8-NEXT:    global_store_dword v0, v1, s[6:7]
 ; GFX90a-PRELOAD-8-NEXT:    s_endpgm
   %ext = zext i8 %arg0 to i32
   store i32 %ext, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @ptr1_i8_zext_arg(ptr addrspace(1) %out, i8 zeroext %arg0) #0 {
-; GFX940-NO-PRELOAD-LABEL: ptr1_i8_zext_arg:
+define amdgpu_kernel void @ptr1_i8_zext_kernel_preload_arg(ptr addrspace(1) %out, i8 zeroext %arg0) {
+; GFX940-NO-PRELOAD-LABEL: ptr1_i8_zext_kernel_preload_arg:
 ; GFX940-NO-PRELOAD:       ; %bb.0:
 ; GFX940-NO-PRELOAD-NEXT:    s_load_dword s4, s[0:1], 0x8
 ; GFX940-NO-PRELOAD-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
@@ -138,51 +86,29 @@ define amdgpu_kernel void @ptr1_i8_zext_arg(ptr addrspace(1) %out, i8 zeroext %a
 ; GFX940-NO-PRELOAD-NEXT:    global_store_dword v0, v1, s[2:3] sc0 sc1
 ; GFX940-NO-PRELOAD-NEXT:    s_endpgm
 ;
-; GFX940-PRELOAD-1-LABEL: ptr1_i8_zext_arg:
-; GFX940-PRELOAD-1:       ; %bb.0:
-; GFX940-PRELOAD-1-NEXT:    s_load_dword s4, s[0:1], 0x8
-; GFX940-PRELOAD-1-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
-; GFX940-PRELOAD-1-NEXT:    v_mov_b32_e32 v0, 0
-; GFX940-PRELOAD-1-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-PRELOAD-1-NEXT:    s_and_b32 s0, s4, 0xff
-; GFX940-PRELOAD-1-NEXT:    v_mov_b32_e32 v1, s0
-; GFX940-PRELOAD-1-NEXT:    global_store_dword v0, v1, s[2:3] sc0 sc1
-; GFX940-PRELOAD-1-NEXT:    s_endpgm
-;
-; GFX940-PRELOAD-2-LABEL: ptr1_i8_zext_arg:
-; GFX940-PRELOAD-2:       ; %bb.0:
-; GFX940-PRELOAD-2-NEXT:    s_load_dword s4, s[0:1], 0x8
-; GFX940-PRELOAD-2-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; GFX940-PRELOAD-2-LABEL: ptr1_i8_zext_kernel_preload_arg:
+; GFX940-PRELOAD-2:         s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-2-NEXT:    .fill 63, 4, 0xbf800000 ; s_nop 0
+; GFX940-PRELOAD-2-NEXT:  ; %bb.0:
+; GFX940-PRELOAD-2-NEXT:    s_mov_b32 s0, 0xffff
+; GFX940-PRELOAD-2-NEXT:    v_mov_b32_e32 v1, s4
 ; GFX940-PRELOAD-2-NEXT:    v_mov_b32_e32 v0, 0
-; GFX940-PRELOAD-2-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-PRELOAD-2-NEXT:    s_and_b32 s0, s4, 0xff
-; GFX940-PRELOAD-2-NEXT:    v_mov_b32_e32 v1, s0
+; GFX940-PRELOAD-2-NEXT:    v_and_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
 ; GFX940-PRELOAD-2-NEXT:    global_store_dword v0, v1, s[2:3] sc0 sc1
 ; GFX940-PRELOAD-2-NEXT:    s_endpgm
 ;
-; GFX940-PRELOAD-4-LABEL: ptr1_i8_zext_arg:
-; GFX940-PRELOAD-4:       ; %bb.0:
-; GFX940-PRELOAD-4-NEXT:    s_load_dword s4, s[0:1], 0x8
-; GFX940-PRELOAD-4-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
-; GFX940-PRELOAD-4-NEXT:    v_mov_b32_e32 v0, 0
-; GFX940-PRELOAD-4-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-PRELOAD-4-NEXT:    s_and_b32 s0, s4, 0xff
-; GFX940-PRELOAD-4-NEXT:    v_mov_b32_e32 v1, s0
-; GFX940-PRELOAD-4-NEXT:    global_store_dword v0, v1, s[2:3] sc0 sc1
-; GFX940-PRELOAD-4-NEXT:    s_endpgm
-;
-; GFX940-PRELOAD-8-LABEL: ptr1_i8_zext_arg:
-; GFX940-PRELOAD-8:       ; %bb.0:
-; GFX940-PRELOAD-8-NEXT:    s_load_dword s4, s[0:1], 0x8
-; GFX940-PRELOAD-8-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; GFX940-PRELOAD-8-LABEL: ptr1_i8_zext_kernel_preload_arg:
+; GFX940-PRELOAD-8:         s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-8-NEXT:    .fill 63, 4, 0xbf800000 ; s_nop 0
+; GFX940-PRELOAD-8-NEXT:  ; %bb.0:
+; GFX940-PRELOAD-8-NEXT:    s_mov_b32 s0, 0xffff
+; GFX940-PRELOAD-8-NEXT:    v_mov_b32_e32 v1, s4
 ; GFX940-PRELOAD-8-NEXT:    v_mov_b32_e32 v0, 0
-; GFX940-PRELOAD-8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-PRELOAD-8-NEXT:    s_and_b32 s0, s4, 0xff
-; GFX940-PRELOAD-8-NEXT:    v_mov_b32_e32 v1, s0
+; GFX940-PRELOAD-8-NEXT:    v_and_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
 ; GFX940-PRELOAD-8-NEXT:    global_store_dword v0, v1, s[2:3] sc0 sc1
 ; GFX940-PRELOAD-8-NEXT:    s_endpgm
 ;
-; GFX90a-NO-PRELOAD-LABEL: ptr1_i8_zext_arg:
+; GFX90a-NO-PRELOAD-LABEL: ptr1_i8_zext_kernel_preload_arg:
 ; GFX90a-NO-PRELOAD:       ; %bb.0:
 ; GFX90a-NO-PRELOAD-NEXT:    s_load_dword s2, s[4:5], 0x8
 ; GFX90a-NO-PRELOAD-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -193,56 +119,34 @@ define amdgpu_kernel void @ptr1_i8_zext_arg(ptr addrspace(1) %out, i8 zeroext %a
 ; GFX90a-NO-PRELOAD-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX90a-NO-PRELOAD-NEXT:    s_endpgm
 ;
-; GFX90a-PRELOAD-1-LABEL: ptr1_i8_zext_arg:
-; GFX90a-PRELOAD-1:       ; %bb.0:
-; GFX90a-PRELOAD-1-NEXT:    s_load_dword s2, s[4:5], 0x8
-; GFX90a-PRELOAD-1-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; GFX90a-PRELOAD-1-NEXT:    v_mov_b32_e32 v0, 0
-; GFX90a-PRELOAD-1-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX90a-PRELOAD-1-NEXT:    s_and_b32 s2, s2, 0xff
-; GFX90a-PRELOAD-1-NEXT:    v_mov_b32_e32 v1, s2
-; GFX90a-PRELOAD-1-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX90a-PRELOAD-1-NEXT:    s_endpgm
-;
-; GFX90a-PRELOAD-2-LABEL: ptr1_i8_zext_arg:
-; GFX90a-PRELOAD-2:       ; %bb.0:
-; GFX90a-PRELOAD-2-NEXT:    s_load_dword s2, s[4:5], 0x8
-; GFX90a-PRELOAD-2-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX90a-PRELOAD-2-LABEL: ptr1_i8_zext_kernel_preload_arg:
+; GFX90a-PRELOAD-2:         s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-2-NEXT:    .fill 63, 4, 0xbf800000 ; s_nop 0
+; GFX90a-PRELOAD-2-NEXT:  ; %bb.0:
+; GFX90a-PRELOAD-2-NEXT:    s_mov_b32 s0, 0xffff
+; GFX90a-PRELOAD-2-NEXT:    v_mov_b32_e32 v1, s8
 ; GFX90a-PRELOAD-2-NEXT:    v_mov_b32_e32 v0, 0
-; GFX90a-PRELOAD-2-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX90a-PRELOAD-2-NEXT:    s_and_b32 s2, s2, 0xff
-; GFX90a-PRELOAD-2-NEXT:    v_mov_b32_e32 v1, s2
-; GFX90a-PRELOAD-2-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX90a-PRELOAD-2-NEXT:    v_and_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; GFX90a-PRELOAD-2-NEXT:    global_store_dword v0, v1, s[6:7]
 ; GFX90a-PRELOAD-2-NEXT:    s_endpgm
 ;
-; GFX90a-PRELOAD-4-LABEL: ptr1_i8_zext_arg:
-; GFX90a-PRELOAD-4:       ; %bb.0:
-; GFX90a-PRELOAD-4-NEXT:    s_load_dword s2, s[4:5], 0x8
-; GFX90a-PRELOAD-4-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; GFX90a-PRELOAD-4-NEXT:    v_mov_b32_e32 v0, 0
-; GFX90a-PRELOAD-4-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX90a-PRELOAD-4-NEXT:    s_and_b32 s2, s2, 0xff
-; GFX90a-PRELOAD-4-NEXT:    v_mov_b32_e32 v1, s2
-; GFX90a-PRELOAD-4-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX90a-PRELOAD-4-NEXT:    s_endpgm
-;
-; GFX90a-PRELOAD-8-LABEL: ptr1_i8_zext_arg:
-; GFX90a-PRELOAD-8:       ; %bb.0:
-; GFX90a-PRELOAD-8-NEXT:    s_load_dword s2, s[4:5], 0x8
-; GFX90a-PRELOAD-8-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX90a-PRELOAD-8-LABEL: ptr1_i8_zext_kernel_preload_arg:
+; GFX90a-PRELOAD-8:         s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-8-NEXT:    .fill 63, 4, 0xbf800000 ; s_nop 0
+; GFX90a-PRELOAD-8-NEXT:  ; %bb.0:
+; GFX90a-PRELOAD-8-NEXT:    s_mov_b32 s0, 0xffff
+; GFX90a-PRELOAD-8-NEXT:    v_mov_b32_e32 v1, s8
 ; GFX90a-PRELOAD-8-NEXT:    v_mov_b32_e32 v0, 0
-; GFX90a-PRELOAD-8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX90a-PRELOAD-8-NEXT:    s_and_b32 s2, s2, 0xff
-; GFX90a-PRELOAD-8-NEXT:    v_mov_b32_e32 v1, s2
-; GFX90a-PRELOAD-8-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX90a-PRELOAD-8-NEXT:    v_and_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; GFX90a-PRELOAD-8-NEXT:    global_store_dword v0, v1, s[6:7]
 ; GFX90a-PRELOAD-8-NEXT:    s_endpgm
   %ext = zext i8 %arg0 to i32
   store i32 %ext, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @ptr1_i16_preload_arg(ptr addrspace(1) %out, i16 %arg0) #0 {
-; GFX940-NO-PRELOAD-LABEL: ptr1_i16_preload_arg:
+define amdgpu_kernel void @ptr1_i16_kernel_preload_arg(ptr addrspace(1) %out, i16 %arg0) {
+; GFX940-NO-PRELOAD-LABEL: ptr1_i16_kernel_preload_arg:
 ; GFX940-NO-PRELOAD:       ; %bb.0:
 ; GFX940-NO-PRELOAD-NEXT:    s_load_dword s4, s[0:1], 0x8
 ; GFX940-NO-PRELOAD-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
@@ -253,51 +157,27 @@ define amdgpu_kernel void @ptr1_i16_preload_arg(ptr addrspace(1) %out, i16 %arg0
 ; GFX940-NO-PRELOAD-NEXT:    global_store_dword v0, v1, s[2:3] sc0 sc1
 ; GFX940-NO-PRELOAD-NEXT:    s_endpgm
 ;
-; GFX940-PRELOAD-1-LABEL: ptr1_i16_preload_arg:
-; GFX940-PRELOAD-1:       ; %bb.0:
-; GFX940-PRELOAD-1-NEXT:    s_load_dword s4, s[0:1], 0x8
-; GFX940-PRELOAD-1-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
-; GFX940-PRELOAD-1-NEXT:    v_mov_b32_e32 v0, 0
-; GFX940-PRELOAD-1-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-PRELOAD-1-NEXT:    s_and_b32 s0, s4, 0xffff
-; GFX940-PRELOAD-1-NEXT:    v_mov_b32_e32 v1, s0
-; GFX940-PRELOAD-1-NEXT:    global_store_dword v0, v1, s[2:3] sc0 sc1
-; GFX940-PRELOAD-1-NEXT:    s_endpgm
-;
-; GFX940-PRELOAD-2-LABEL: ptr1_i16_preload_arg:
-; GFX940-PRELOAD-2:       ; %bb.0:
-; GFX940-PRELOAD-2-NEXT:    s_load_dword s4, s[0:1], 0x8
-; GFX940-PRELOAD-2-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
-; GFX940-PRELOAD-2-NEXT:    v_mov_b32_e32 v0, 0
-; GFX940-PRELOAD-2-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX940-PRELOAD-2-LABEL: ptr1_i16_kernel_preload_arg:
+; GFX940-PRELOAD-2:         s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-2-NEXT:    .fill 63, 4, 0xbf800000 ; s_nop 0
+; GFX940-PRELOAD-2-NEXT:  ; %bb.0:
 ; GFX940-PRELOAD-2-NEXT:    s_and_b32 s0, s4, 0xffff
+; GFX940-PRELOAD-2-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX940-PRELOAD-2-NEXT:    v_mov_b32_e32 v1, s0
 ; GFX940-PRELOAD-2-NEXT:    global_store_dword v0, v1, s[2:3] sc0 sc1
 ; GFX940-PRELOAD-2-NEXT:    s_endpgm
 ;
-; GFX940-PRELOAD-4-LABEL: ptr1_i16_preload_arg:
-; GFX940-PRELOAD-4:       ; %bb.0:
-; GFX940-PRELOAD-4-NEXT:    s_load_dword s4, s[0:1], 0x8
-; GFX940-PRELOAD-4-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
-; GFX940-PRELOAD-4-NEXT:    v_mov_b32_e32 v0, 0
-; GFX940-PRELOAD-4-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-PRELOAD-4-NEXT:    s_and_b32 s0, s4, 0xffff
-; GFX940-PRELOAD-4-NEXT:    v_mov_b32_e32 v1, s0
-; GFX940-PRELOAD-4-NEXT:    global_store_dword v0, v1, s[2:3] sc0 sc1
-; GFX940-PRELOAD-4-NEXT:    s_endpgm
-;
-; GFX940-PRELOAD-8-LABEL: ptr1_i16_preload_arg:
-; GFX940-PRELOAD-8:       ; %bb.0:
-; GFX940-PRELOAD-8-NEXT:    s_load_dword s4, s[0:1], 0x8
-; GFX940-PRELOAD-8-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
-; GFX940-PRELOAD-8-NEXT:    v_mov_b32_e32 v0, 0
-; GFX940-PRELOAD-8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX940-PRELOAD-8-L...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/98840


More information about the llvm-commits mailing list