[llvm] [AMDGPU][NFC] Fix preload-kernarg.ll test after attributor move (PR #98840)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Jul 14 15:00:27 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Austin Kerbow (kerbowa)
<details>
<summary>Changes</summary>
Update was to stale version of the test with missing functions and extra runlines that had been removed.
---
Patch is 200.38 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/98840.diff
1 Files Affected:
- (modified) llvm/test/CodeGen/AMDGPU/preload-kernargs.ll (+1721-1373)
``````````diff
diff --git a/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll b/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll
index a547c258e3921..f5c097f010eb8 100644
--- a/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll
+++ b/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll
@@ -1,18 +1,14 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940-NO-PRELOAD %s
-; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940-PRELOAD-1 %s
-; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=2 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940-PRELOAD-2 %s
-; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=4 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940-PRELOAD-4 %s
-; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=8 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940-PRELOAD-8 %s
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -passes=amdgpu-attributor < %s | llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -verify-machineinstrs | FileCheck -check-prefixes=GFX940-NO-PRELOAD %s
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -passes=amdgpu-attributor -amdgpu-kernarg-preload-count=2 < %s | llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -verify-machineinstrs | FileCheck -check-prefixes=GFX940-PRELOAD-2 %s
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -passes=amdgpu-attributor -amdgpu-kernarg-preload-count=8 < %s | llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -verify-machineinstrs | FileCheck -check-prefixes=GFX940-PRELOAD-8 %s
-; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90a-NO-PRELOAD %s
-; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90a-PRELOAD-1 %s
-; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=2 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90a-PRELOAD-2 %s
-; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=4 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90a-PRELOAD-4 %s
-; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=8 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90a-PRELOAD-8 %s
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -passes=amdgpu-attributor < %s | llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a -verify-machineinstrs | FileCheck -check-prefixes=GFX90a-NO-PRELOAD %s
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -passes=amdgpu-attributor -amdgpu-kernarg-preload-count=2 < %s | llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a -verify-machineinstrs | FileCheck -check-prefixes=GFX90a-PRELOAD-2 %s
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -passes=amdgpu-attributor -amdgpu-kernarg-preload-count=8 < %s | llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a -verify-machineinstrs | FileCheck -check-prefixes=GFX90a-PRELOAD-8 %s
-define amdgpu_kernel void @ptr1_i8(ptr addrspace(1) %out, i8 %arg0) #0 {
-; GFX940-NO-PRELOAD-LABEL: ptr1_i8:
+define amdgpu_kernel void @ptr1_i8_kernel_preload_arg(ptr addrspace(1) %out, i8 %arg0) {
+; GFX940-NO-PRELOAD-LABEL: ptr1_i8_kernel_preload_arg:
; GFX940-NO-PRELOAD: ; %bb.0:
; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8
; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
@@ -23,51 +19,27 @@ define amdgpu_kernel void @ptr1_i8(ptr addrspace(1) %out, i8 %arg0) #0 {
; GFX940-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
; GFX940-NO-PRELOAD-NEXT: s_endpgm
;
-; GFX940-PRELOAD-1-LABEL: ptr1_i8:
-; GFX940-PRELOAD-1: ; %bb.0:
-; GFX940-PRELOAD-1-NEXT: s_load_dword s4, s[0:1], 0x8
-; GFX940-PRELOAD-1-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
-; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0
-; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
-; GFX940-PRELOAD-1-NEXT: s_and_b32 s0, s4, 0xff
-; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0
-; GFX940-PRELOAD-1-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
-; GFX940-PRELOAD-1-NEXT: s_endpgm
-;
-; GFX940-PRELOAD-2-LABEL: ptr1_i8:
-; GFX940-PRELOAD-2: ; %bb.0:
-; GFX940-PRELOAD-2-NEXT: s_load_dword s4, s[0:1], 0x8
-; GFX940-PRELOAD-2-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
-; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0
-; GFX940-PRELOAD-2-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-PRELOAD-2-LABEL: ptr1_i8_kernel_preload_arg:
+; GFX940-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
+; GFX940-PRELOAD-2-NEXT: ; %bb.0:
; GFX940-PRELOAD-2-NEXT: s_and_b32 s0, s4, 0xff
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0
; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s0
; GFX940-PRELOAD-2-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
; GFX940-PRELOAD-2-NEXT: s_endpgm
;
-; GFX940-PRELOAD-4-LABEL: ptr1_i8:
-; GFX940-PRELOAD-4: ; %bb.0:
-; GFX940-PRELOAD-4-NEXT: s_load_dword s4, s[0:1], 0x8
-; GFX940-PRELOAD-4-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
-; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0
-; GFX940-PRELOAD-4-NEXT: s_waitcnt lgkmcnt(0)
-; GFX940-PRELOAD-4-NEXT: s_and_b32 s0, s4, 0xff
-; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s0
-; GFX940-PRELOAD-4-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
-; GFX940-PRELOAD-4-NEXT: s_endpgm
-;
-; GFX940-PRELOAD-8-LABEL: ptr1_i8:
-; GFX940-PRELOAD-8: ; %bb.0:
-; GFX940-PRELOAD-8-NEXT: s_load_dword s4, s[0:1], 0x8
-; GFX940-PRELOAD-8-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
-; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0
-; GFX940-PRELOAD-8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-PRELOAD-8-LABEL: ptr1_i8_kernel_preload_arg:
+; GFX940-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
+; GFX940-PRELOAD-8-NEXT: ; %bb.0:
; GFX940-PRELOAD-8-NEXT: s_and_b32 s0, s4, 0xff
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0
; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s0
; GFX940-PRELOAD-8-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
; GFX940-PRELOAD-8-NEXT: s_endpgm
;
-; GFX90a-NO-PRELOAD-LABEL: ptr1_i8:
+; GFX90a-NO-PRELOAD-LABEL: ptr1_i8_kernel_preload_arg:
; GFX90a-NO-PRELOAD: ; %bb.0:
; GFX90a-NO-PRELOAD-NEXT: s_load_dword s2, s[4:5], 0x8
; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -78,56 +50,32 @@ define amdgpu_kernel void @ptr1_i8(ptr addrspace(1) %out, i8 %arg0) #0 {
; GFX90a-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[0:1]
; GFX90a-NO-PRELOAD-NEXT: s_endpgm
;
-; GFX90a-PRELOAD-1-LABEL: ptr1_i8:
-; GFX90a-PRELOAD-1: ; %bb.0:
-; GFX90a-PRELOAD-1-NEXT: s_load_dword s2, s[4:5], 0x8
-; GFX90a-PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
-; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0
-; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
-; GFX90a-PRELOAD-1-NEXT: s_and_b32 s2, s2, 0xff
-; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s2
-; GFX90a-PRELOAD-1-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX90a-PRELOAD-1-NEXT: s_endpgm
-;
-; GFX90a-PRELOAD-2-LABEL: ptr1_i8:
-; GFX90a-PRELOAD-2: ; %bb.0:
-; GFX90a-PRELOAD-2-NEXT: s_load_dword s2, s[4:5], 0x8
-; GFX90a-PRELOAD-2-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX90a-PRELOAD-2-LABEL: ptr1_i8_kernel_preload_arg:
+; GFX90a-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
+; GFX90a-PRELOAD-2-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-2-NEXT: s_and_b32 s0, s8, 0xff
; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0
-; GFX90a-PRELOAD-2-NEXT: s_waitcnt lgkmcnt(0)
-; GFX90a-PRELOAD-2-NEXT: s_and_b32 s2, s2, 0xff
-; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s2
-; GFX90a-PRELOAD-2-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s0
+; GFX90a-PRELOAD-2-NEXT: global_store_dword v0, v1, s[6:7]
; GFX90a-PRELOAD-2-NEXT: s_endpgm
;
-; GFX90a-PRELOAD-4-LABEL: ptr1_i8:
-; GFX90a-PRELOAD-4: ; %bb.0:
-; GFX90a-PRELOAD-4-NEXT: s_load_dword s2, s[4:5], 0x8
-; GFX90a-PRELOAD-4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
-; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0
-; GFX90a-PRELOAD-4-NEXT: s_waitcnt lgkmcnt(0)
-; GFX90a-PRELOAD-4-NEXT: s_and_b32 s2, s2, 0xff
-; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s2
-; GFX90a-PRELOAD-4-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX90a-PRELOAD-4-NEXT: s_endpgm
-;
-; GFX90a-PRELOAD-8-LABEL: ptr1_i8:
-; GFX90a-PRELOAD-8: ; %bb.0:
-; GFX90a-PRELOAD-8-NEXT: s_load_dword s2, s[4:5], 0x8
-; GFX90a-PRELOAD-8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX90a-PRELOAD-8-LABEL: ptr1_i8_kernel_preload_arg:
+; GFX90a-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
+; GFX90a-PRELOAD-8-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-8-NEXT: s_and_b32 s0, s8, 0xff
; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0
-; GFX90a-PRELOAD-8-NEXT: s_waitcnt lgkmcnt(0)
-; GFX90a-PRELOAD-8-NEXT: s_and_b32 s2, s2, 0xff
-; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s2
-; GFX90a-PRELOAD-8-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s0
+; GFX90a-PRELOAD-8-NEXT: global_store_dword v0, v1, s[6:7]
; GFX90a-PRELOAD-8-NEXT: s_endpgm
%ext = zext i8 %arg0 to i32
store i32 %ext, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @ptr1_i8_zext_arg(ptr addrspace(1) %out, i8 zeroext %arg0) #0 {
-; GFX940-NO-PRELOAD-LABEL: ptr1_i8_zext_arg:
+define amdgpu_kernel void @ptr1_i8_zext_kernel_preload_arg(ptr addrspace(1) %out, i8 zeroext %arg0) {
+; GFX940-NO-PRELOAD-LABEL: ptr1_i8_zext_kernel_preload_arg:
; GFX940-NO-PRELOAD: ; %bb.0:
; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8
; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
@@ -138,51 +86,29 @@ define amdgpu_kernel void @ptr1_i8_zext_arg(ptr addrspace(1) %out, i8 zeroext %a
; GFX940-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
; GFX940-NO-PRELOAD-NEXT: s_endpgm
;
-; GFX940-PRELOAD-1-LABEL: ptr1_i8_zext_arg:
-; GFX940-PRELOAD-1: ; %bb.0:
-; GFX940-PRELOAD-1-NEXT: s_load_dword s4, s[0:1], 0x8
-; GFX940-PRELOAD-1-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
-; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0
-; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
-; GFX940-PRELOAD-1-NEXT: s_and_b32 s0, s4, 0xff
-; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0
-; GFX940-PRELOAD-1-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
-; GFX940-PRELOAD-1-NEXT: s_endpgm
-;
-; GFX940-PRELOAD-2-LABEL: ptr1_i8_zext_arg:
-; GFX940-PRELOAD-2: ; %bb.0:
-; GFX940-PRELOAD-2-NEXT: s_load_dword s4, s[0:1], 0x8
-; GFX940-PRELOAD-2-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
+; GFX940-PRELOAD-2-LABEL: ptr1_i8_zext_kernel_preload_arg:
+; GFX940-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
+; GFX940-PRELOAD-2-NEXT: ; %bb.0:
+; GFX940-PRELOAD-2-NEXT: s_mov_b32 s0, 0xffff
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s4
; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0
-; GFX940-PRELOAD-2-NEXT: s_waitcnt lgkmcnt(0)
-; GFX940-PRELOAD-2-NEXT: s_and_b32 s0, s4, 0xff
-; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s0
+; GFX940-PRELOAD-2-NEXT: v_and_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX940-PRELOAD-2-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
; GFX940-PRELOAD-2-NEXT: s_endpgm
;
-; GFX940-PRELOAD-4-LABEL: ptr1_i8_zext_arg:
-; GFX940-PRELOAD-4: ; %bb.0:
-; GFX940-PRELOAD-4-NEXT: s_load_dword s4, s[0:1], 0x8
-; GFX940-PRELOAD-4-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
-; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0
-; GFX940-PRELOAD-4-NEXT: s_waitcnt lgkmcnt(0)
-; GFX940-PRELOAD-4-NEXT: s_and_b32 s0, s4, 0xff
-; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s0
-; GFX940-PRELOAD-4-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
-; GFX940-PRELOAD-4-NEXT: s_endpgm
-;
-; GFX940-PRELOAD-8-LABEL: ptr1_i8_zext_arg:
-; GFX940-PRELOAD-8: ; %bb.0:
-; GFX940-PRELOAD-8-NEXT: s_load_dword s4, s[0:1], 0x8
-; GFX940-PRELOAD-8-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
+; GFX940-PRELOAD-8-LABEL: ptr1_i8_zext_kernel_preload_arg:
+; GFX940-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
+; GFX940-PRELOAD-8-NEXT: ; %bb.0:
+; GFX940-PRELOAD-8-NEXT: s_mov_b32 s0, 0xffff
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s4
; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0
-; GFX940-PRELOAD-8-NEXT: s_waitcnt lgkmcnt(0)
-; GFX940-PRELOAD-8-NEXT: s_and_b32 s0, s4, 0xff
-; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s0
+; GFX940-PRELOAD-8-NEXT: v_and_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX940-PRELOAD-8-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
; GFX940-PRELOAD-8-NEXT: s_endpgm
;
-; GFX90a-NO-PRELOAD-LABEL: ptr1_i8_zext_arg:
+; GFX90a-NO-PRELOAD-LABEL: ptr1_i8_zext_kernel_preload_arg:
; GFX90a-NO-PRELOAD: ; %bb.0:
; GFX90a-NO-PRELOAD-NEXT: s_load_dword s2, s[4:5], 0x8
; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -193,56 +119,34 @@ define amdgpu_kernel void @ptr1_i8_zext_arg(ptr addrspace(1) %out, i8 zeroext %a
; GFX90a-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[0:1]
; GFX90a-NO-PRELOAD-NEXT: s_endpgm
;
-; GFX90a-PRELOAD-1-LABEL: ptr1_i8_zext_arg:
-; GFX90a-PRELOAD-1: ; %bb.0:
-; GFX90a-PRELOAD-1-NEXT: s_load_dword s2, s[4:5], 0x8
-; GFX90a-PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
-; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0
-; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
-; GFX90a-PRELOAD-1-NEXT: s_and_b32 s2, s2, 0xff
-; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s2
-; GFX90a-PRELOAD-1-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX90a-PRELOAD-1-NEXT: s_endpgm
-;
-; GFX90a-PRELOAD-2-LABEL: ptr1_i8_zext_arg:
-; GFX90a-PRELOAD-2: ; %bb.0:
-; GFX90a-PRELOAD-2-NEXT: s_load_dword s2, s[4:5], 0x8
-; GFX90a-PRELOAD-2-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX90a-PRELOAD-2-LABEL: ptr1_i8_zext_kernel_preload_arg:
+; GFX90a-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
+; GFX90a-PRELOAD-2-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-2-NEXT: s_mov_b32 s0, 0xffff
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s8
; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0
-; GFX90a-PRELOAD-2-NEXT: s_waitcnt lgkmcnt(0)
-; GFX90a-PRELOAD-2-NEXT: s_and_b32 s2, s2, 0xff
-; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s2
-; GFX90a-PRELOAD-2-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX90a-PRELOAD-2-NEXT: v_and_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; GFX90a-PRELOAD-2-NEXT: global_store_dword v0, v1, s[6:7]
; GFX90a-PRELOAD-2-NEXT: s_endpgm
;
-; GFX90a-PRELOAD-4-LABEL: ptr1_i8_zext_arg:
-; GFX90a-PRELOAD-4: ; %bb.0:
-; GFX90a-PRELOAD-4-NEXT: s_load_dword s2, s[4:5], 0x8
-; GFX90a-PRELOAD-4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
-; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0
-; GFX90a-PRELOAD-4-NEXT: s_waitcnt lgkmcnt(0)
-; GFX90a-PRELOAD-4-NEXT: s_and_b32 s2, s2, 0xff
-; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s2
-; GFX90a-PRELOAD-4-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX90a-PRELOAD-4-NEXT: s_endpgm
-;
-; GFX90a-PRELOAD-8-LABEL: ptr1_i8_zext_arg:
-; GFX90a-PRELOAD-8: ; %bb.0:
-; GFX90a-PRELOAD-8-NEXT: s_load_dword s2, s[4:5], 0x8
-; GFX90a-PRELOAD-8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX90a-PRELOAD-8-LABEL: ptr1_i8_zext_kernel_preload_arg:
+; GFX90a-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
+; GFX90a-PRELOAD-8-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-8-NEXT: s_mov_b32 s0, 0xffff
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s8
; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0
-; GFX90a-PRELOAD-8-NEXT: s_waitcnt lgkmcnt(0)
-; GFX90a-PRELOAD-8-NEXT: s_and_b32 s2, s2, 0xff
-; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s2
-; GFX90a-PRELOAD-8-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX90a-PRELOAD-8-NEXT: v_and_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; GFX90a-PRELOAD-8-NEXT: global_store_dword v0, v1, s[6:7]
; GFX90a-PRELOAD-8-NEXT: s_endpgm
%ext = zext i8 %arg0 to i32
store i32 %ext, ptr addrspace(1) %out, align 4
ret void
}
-define amdgpu_kernel void @ptr1_i16_preload_arg(ptr addrspace(1) %out, i16 %arg0) #0 {
-; GFX940-NO-PRELOAD-LABEL: ptr1_i16_preload_arg:
+define amdgpu_kernel void @ptr1_i16_kernel_preload_arg(ptr addrspace(1) %out, i16 %arg0) {
+; GFX940-NO-PRELOAD-LABEL: ptr1_i16_kernel_preload_arg:
; GFX940-NO-PRELOAD: ; %bb.0:
; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8
; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
@@ -253,51 +157,27 @@ define amdgpu_kernel void @ptr1_i16_preload_arg(ptr addrspace(1) %out, i16 %arg0
; GFX940-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
; GFX940-NO-PRELOAD-NEXT: s_endpgm
;
-; GFX940-PRELOAD-1-LABEL: ptr1_i16_preload_arg:
-; GFX940-PRELOAD-1: ; %bb.0:
-; GFX940-PRELOAD-1-NEXT: s_load_dword s4, s[0:1], 0x8
-; GFX940-PRELOAD-1-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
-; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0
-; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
-; GFX940-PRELOAD-1-NEXT: s_and_b32 s0, s4, 0xffff
-; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0
-; GFX940-PRELOAD-1-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
-; GFX940-PRELOAD-1-NEXT: s_endpgm
-;
-; GFX940-PRELOAD-2-LABEL: ptr1_i16_preload_arg:
-; GFX940-PRELOAD-2: ; %bb.0:
-; GFX940-PRELOAD-2-NEXT: s_load_dword s4, s[0:1], 0x8
-; GFX940-PRELOAD-2-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
-; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0
-; GFX940-PRELOAD-2-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-PRELOAD-2-LABEL: ptr1_i16_kernel_preload_arg:
+; GFX940-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
+; GFX940-PRELOAD-2-NEXT: ; %bb.0:
; GFX940-PRELOAD-2-NEXT: s_and_b32 s0, s4, 0xffff
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0
; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s0
; GFX940-PRELOAD-2-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
; GFX940-PRELOAD-2-NEXT: s_endpgm
;
-; GFX940-PRELOAD-4-LABEL: ptr1_i16_preload_arg:
-; GFX940-PRELOAD-4: ; %bb.0:
-; GFX940-PRELOAD-4-NEXT: s_load_dword s4, s[0:1], 0x8
-; GFX940-PRELOAD-4-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
-; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0
-; GFX940-PRELOAD-4-NEXT: s_waitcnt lgkmcnt(0)
-; GFX940-PRELOAD-4-NEXT: s_and_b32 s0, s4, 0xffff
-; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s0
-; GFX940-PRELOAD-4-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
-; GFX940-PRELOAD-4-NEXT: s_endpgm
-;
-; GFX940-PRELOAD-8-LABEL: ptr1_i16_preload_arg:
-; GFX940-PRELOAD-8: ; %bb.0:
-; GFX940-PRELOAD-8-NEXT: s_load_dword s4, s[0:1], 0x8
-; GFX940-PRELOAD-8-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
-; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0
-; GFX940-PRELOAD-8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-PRELOAD-8-L...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/98840
More information about the llvm-commits
mailing list