[llvm] [AMDGPU] Use directive for kernarg preload header padding (PR #86004)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 20 14:06:34 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Austin Kerbow (kerbowa)
<details>
<summary>Changes</summary>
---
Patch is 406.56 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/86004.diff
3 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp (+10-12)
- (modified) llvm/test/CodeGen/AMDGPU/preload-kernarg-header.ll (+7-4)
- (modified) llvm/test/CodeGen/AMDGPU/preload-kernargs.ll (+309-8741)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 4742b0b3e52ecf..697b986bca4995 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -283,6 +283,16 @@ bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
return true;
}
+bool AMDGPUTargetAsmStreamer::EmitKernargPreloadHeader(
+ const MCSubtargetInfo &STI, bool TrapEnabled) {
+ const char *TrapInstr = TrapEnabled ? "\ts_trap 2" : "\ts_endpgm";
+ OS << TrapInstr
+ << " ; Kernarg preload header. Trap with incompatible firmware that "
+ "doesn't support preloading kernel arguments.\n";
+ OS << "\t.fill 63, 4, 0xbf800000 ; s_nop 0\n";
+ return true;
+}
+
bool AMDGPUTargetAsmStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) {
const uint32_t Encoded_s_code_end = 0xbf9f0000;
const uint32_t Encoded_s_nop = 0xbf800000;
@@ -781,18 +791,6 @@ bool AMDGPUTargetELFStreamer::EmitHSAMetadata(msgpack::Document &HSAMetadataDoc,
return true;
}
-bool AMDGPUTargetAsmStreamer::EmitKernargPreloadHeader(
- const MCSubtargetInfo &STI, bool TrapEnabled) {
- const char *TrapInstr = TrapEnabled ? "\ts_trap 2" : "\ts_endpgm";
- OS << TrapInstr
- << " ; Trap with incompatible firmware that doesn't "
- "support preloading kernel arguments.\n";
- for (int i = 0; i < 63; ++i) {
- OS << "\ts_nop 0\n";
- }
- return true;
-}
-
bool AMDGPUTargetELFStreamer::EmitKernargPreloadHeader(
const MCSubtargetInfo &STI, bool TrapEnabled) {
const uint32_t Encoded_s_nop = 0xbf800000;
diff --git a/llvm/test/CodeGen/AMDGPU/preload-kernarg-header.ll b/llvm/test/CodeGen/AMDGPU/preload-kernarg-header.ll
index a70488a00db739..a030f86da1b67d 100644
--- a/llvm/test/CodeGen/AMDGPU/preload-kernarg-header.ll
+++ b/llvm/test/CodeGen/AMDGPU/preload-kernarg-header.ll
@@ -1,17 +1,20 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=1 -asm-verbose=0 < %s | FileCheck -check-prefixes=GCN,HSA %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=1 -filetype=obj < %s | llvm-objdump --arch=amdgcn --mcpu=gfx940 --disassemble - | FileCheck -check-prefixes=GCN,HSA %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -amdgpu-kernarg-preload-count=1 -filetype=obj < %s | llvm-objdump --arch=amdgcn --mcpu=gfx940 --disassemble - | FileCheck -check-prefixes=GCN,NON-HSA %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=1 -asm-verbose=0 < %s | FileCheck -check-prefixes=GCN,HSA,ASM %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=1 -filetype=obj < %s | llvm-objdump --arch=amdgcn --mcpu=gfx940 --disassemble - | FileCheck -check-prefixes=GCN,HSA,OBJ %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -amdgpu-kernarg-preload-count=1 -filetype=obj < %s | llvm-objdump --arch=amdgcn --mcpu=gfx940 --disassemble - | FileCheck -check-prefixes=GCN,NON-HSA,OBJ %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=1 -asm-verbose=0 < %s | llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=obj | llvm-objdump --arch=amdgcn --mcpu=gfx940 --disassemble - | FileCheck -check-prefixes=GCN,HSA,OBJ %s
; GCN: preload_kernarg_header
; HSA: s_trap 2
; NON-HSA: s_endpgm
-; GCN-COUNT-63: s_nop 0
+; ASM: .fill 63, 4, 0xbf800000 ; s_nop 0
+; OBJ-COUNT-63: s_nop 0
define amdgpu_kernel void @preload_kernarg_header(ptr %arg) {
store ptr %arg, ptr %arg
ret void
}
; GCN: non_kernel_function
+; GCN-NOT: s_trap 2
; GCN-NOT: s_nop 0
; GCN: flat_store
define void @non_kernel_function(ptr %arg) {
diff --git a/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll b/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll
index d20c3a4007ffdd..f0e709b5a17279 100644
--- a/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll
+++ b/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll
@@ -24,70 +24,8 @@ define amdgpu_kernel void @ptr1_i8(ptr addrspace(1) %out, i8 %arg0) {
; GFX940-NO-PRELOAD-NEXT: s_endpgm
;
; GFX940-PRELOAD-1-LABEL: ptr1_i8:
-; GFX940-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
-; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-1-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
; GFX940-PRELOAD-1-NEXT: ; %bb.0:
; GFX940-PRELOAD-1-NEXT: s_load_dword s0, s[0:1], 0x8
; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0
@@ -98,70 +36,8 @@ define amdgpu_kernel void @ptr1_i8(ptr addrspace(1) %out, i8 %arg0) {
; GFX940-PRELOAD-1-NEXT: s_endpgm
;
; GFX940-PRELOAD-2-LABEL: ptr1_i8:
-; GFX940-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
-; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
; GFX940-PRELOAD-2-NEXT: ; %bb.0:
; GFX940-PRELOAD-2-NEXT: s_and_b32 s0, s4, 0xff
; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0
@@ -170,70 +46,8 @@ define amdgpu_kernel void @ptr1_i8(ptr addrspace(1) %out, i8 %arg0) {
; GFX940-PRELOAD-2-NEXT: s_endpgm
;
; GFX940-PRELOAD-4-LABEL: ptr1_i8:
-; GFX940-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
-; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-4-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
; GFX940-PRELOAD-4-NEXT: ; %bb.0:
; GFX940-PRELOAD-4-NEXT: s_and_b32 s0, s4, 0xff
; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0
@@ -242,70 +56,8 @@ define amdgpu_kernel void @ptr1_i8(ptr addrspace(1) %out, i8 %arg0) {
; GFX940-PRELOAD-4-NEXT: s_endpgm
;
; GFX940-PRELOAD-8-LABEL: ptr1_i8:
-; GFX940-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
-; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
; GFX940-PRELOAD-8-NEXT: ; %bb.0:
; GFX940-PRELOAD-8-NEXT: s_and_b32 s0, s4, 0xff
; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0
@@ -325,70 +77,8 @@ define amdgpu_kernel void @ptr1_i8(ptr addrspace(1) %out, i8 %arg0) {
; GFX90a-NO-PRELOAD-NEXT: s_endpgm
;
; GFX90a-PRELOAD-1-LABEL: ptr1_i8:
-; GFX90a-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
-; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-1-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
; GFX90a-PRELOAD-1-NEXT: ; %bb.0:
; GFX90a-PRELOAD-1-NEXT: s_load_dword s0, s[4:5], 0x8
; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0
@@ -399,70 +89,8 @@ define amdgpu_kernel void @ptr1_i8(ptr addrspace(1) %out, i8 %arg0) {
; GFX90a-PRELOAD-1-NEXT: s_endpgm
;
; GFX90a-PRELOAD-2-LABEL: ptr1_i8:
-; GFX90a-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOAD-2-NEXT: s_nop 0
-; GFX90a-PRELOA...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/86004
More information about the llvm-commits
mailing list