[llvm] [AMDGPU] Fix kernarg preloading crash with some types and alignments (PR #91625)
via llvm-commits
llvm-commits at lists.llvm.org
Thu May 9 09:58:42 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Austin Kerbow (kerbowa)
<details>
<summary>Changes</summary>
Lowering of preloded arguments would fail with half/bfloat if they were dword aligned in the kernarg segment and not part of a vector. Added more tests with different alignments and types.
---
Patch is 149.41 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/91625.diff
2 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+14-6)
- (modified) llvm/test/CodeGen/AMDGPU/preload-kernargs.ll (+906-1014)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 33bdd6195a040..07874f2109472 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -2981,12 +2981,20 @@ SDValue SITargetLowering::LowerFormalArguments(
DL, Elts);
}
- SDValue CMemVT;
- if (VT.isScalarInteger() && VT.bitsLT(NewArg.getSimpleValueType()))
- CMemVT = DAG.getNode(ISD::TRUNCATE, DL, MemVT, NewArg);
- else
- CMemVT = DAG.getBitcast(MemVT, NewArg);
- NewArg = convertArgType(DAG, VT, MemVT, DL, CMemVT,
+ // If the argument was preloaded to multiple consecutive 32-bit
+ // registers because of misalignment between addressable SGPR tuples
+ // and the argument size, we can still assume that because of kernarg
+ // segment alignment restrictions that NewArg's size is the same as
+ // MemVT and just do a bitcast. If MemVT is less than 32-bits we add a
+ // truncate since we cannot preload to less than a single SGPR and the
+ // MemVT may be smaller.
+ EVT MemVTInt =
+ EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
+ if (MemVT.bitsLT(NewArg.getSimpleValueType()))
+ NewArg = DAG.getNode(ISD::TRUNCATE, DL, MemVTInt, NewArg);
+
+ NewArg = DAG.getBitcast(MemVT, NewArg);
+ NewArg = convertArgType(DAG, VT, MemVT, DL, NewArg,
Ins[i].Flags.isSExt(), &Ins[i]);
NewArg = DAG.getMergeValues({NewArg, Chain}, DL);
}
diff --git a/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll b/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll
index f0e709b5a1727..979bba938816c 100644
--- a/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll
+++ b/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll
@@ -1,18 +1,14 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940-NO-PRELOAD %s
-; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940-PRELOAD-1 %s
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=2 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940-PRELOAD-2 %s
-; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=4 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940-PRELOAD-4 %s
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=8 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940-PRELOAD-8 %s
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90a-NO-PRELOAD %s
-; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90a-PRELOAD-1 %s
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=2 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90a-PRELOAD-2 %s
-; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=4 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90a-PRELOAD-4 %s
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=8 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90a-PRELOAD-8 %s
-define amdgpu_kernel void @ptr1_i8(ptr addrspace(1) %out, i8 %arg0) {
-; GFX940-NO-PRELOAD-LABEL: ptr1_i8:
+define amdgpu_kernel void @ptr1_i8_kernel_preload_arg(ptr addrspace(1) %out, i8 %arg0) {
+; GFX940-NO-PRELOAD-LABEL: ptr1_i8_kernel_preload_arg:
; GFX940-NO-PRELOAD: ; %bb.0:
; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8
; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
@@ -23,19 +19,7 @@ define amdgpu_kernel void @ptr1_i8(ptr addrspace(1) %out, i8 %arg0) {
; GFX940-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
; GFX940-NO-PRELOAD-NEXT: s_endpgm
;
-; GFX940-PRELOAD-1-LABEL: ptr1_i8:
-; GFX940-PRELOAD-1: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
-; GFX940-PRELOAD-1-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
-; GFX940-PRELOAD-1-NEXT: ; %bb.0:
-; GFX940-PRELOAD-1-NEXT: s_load_dword s0, s[0:1], 0x8
-; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0
-; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
-; GFX940-PRELOAD-1-NEXT: s_and_b32 s0, s0, 0xff
-; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0
-; GFX940-PRELOAD-1-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
-; GFX940-PRELOAD-1-NEXT: s_endpgm
-;
-; GFX940-PRELOAD-2-LABEL: ptr1_i8:
+; GFX940-PRELOAD-2-LABEL: ptr1_i8_kernel_preload_arg:
; GFX940-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
; GFX940-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
; GFX940-PRELOAD-2-NEXT: ; %bb.0:
@@ -45,17 +29,7 @@ define amdgpu_kernel void @ptr1_i8(ptr addrspace(1) %out, i8 %arg0) {
; GFX940-PRELOAD-2-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
; GFX940-PRELOAD-2-NEXT: s_endpgm
;
-; GFX940-PRELOAD-4-LABEL: ptr1_i8:
-; GFX940-PRELOAD-4: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
-; GFX940-PRELOAD-4-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
-; GFX940-PRELOAD-4-NEXT: ; %bb.0:
-; GFX940-PRELOAD-4-NEXT: s_and_b32 s0, s4, 0xff
-; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0
-; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s0
-; GFX940-PRELOAD-4-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
-; GFX940-PRELOAD-4-NEXT: s_endpgm
-;
-; GFX940-PRELOAD-8-LABEL: ptr1_i8:
+; GFX940-PRELOAD-8-LABEL: ptr1_i8_kernel_preload_arg:
; GFX940-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
; GFX940-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
; GFX940-PRELOAD-8-NEXT: ; %bb.0:
@@ -65,7 +39,7 @@ define amdgpu_kernel void @ptr1_i8(ptr addrspace(1) %out, i8 %arg0) {
; GFX940-PRELOAD-8-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
; GFX940-PRELOAD-8-NEXT: s_endpgm
;
-; GFX90a-NO-PRELOAD-LABEL: ptr1_i8:
+; GFX90a-NO-PRELOAD-LABEL: ptr1_i8_kernel_preload_arg:
; GFX90a-NO-PRELOAD: ; %bb.0:
; GFX90a-NO-PRELOAD-NEXT: s_load_dword s2, s[4:5], 0x8
; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -76,19 +50,7 @@ define amdgpu_kernel void @ptr1_i8(ptr addrspace(1) %out, i8 %arg0) {
; GFX90a-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[0:1]
; GFX90a-NO-PRELOAD-NEXT: s_endpgm
;
-; GFX90a-PRELOAD-1-LABEL: ptr1_i8:
-; GFX90a-PRELOAD-1: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
-; GFX90a-PRELOAD-1-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
-; GFX90a-PRELOAD-1-NEXT: ; %bb.0:
-; GFX90a-PRELOAD-1-NEXT: s_load_dword s0, s[4:5], 0x8
-; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0
-; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
-; GFX90a-PRELOAD-1-NEXT: s_and_b32 s0, s0, 0xff
-; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0
-; GFX90a-PRELOAD-1-NEXT: global_store_dword v0, v1, s[6:7]
-; GFX90a-PRELOAD-1-NEXT: s_endpgm
-;
-; GFX90a-PRELOAD-2-LABEL: ptr1_i8:
+; GFX90a-PRELOAD-2-LABEL: ptr1_i8_kernel_preload_arg:
; GFX90a-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
; GFX90a-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
; GFX90a-PRELOAD-2-NEXT: ; %bb.0:
@@ -98,17 +60,7 @@ define amdgpu_kernel void @ptr1_i8(ptr addrspace(1) %out, i8 %arg0) {
; GFX90a-PRELOAD-2-NEXT: global_store_dword v0, v1, s[6:7]
; GFX90a-PRELOAD-2-NEXT: s_endpgm
;
-; GFX90a-PRELOAD-4-LABEL: ptr1_i8:
-; GFX90a-PRELOAD-4: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
-; GFX90a-PRELOAD-4-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
-; GFX90a-PRELOAD-4-NEXT: ; %bb.0:
-; GFX90a-PRELOAD-4-NEXT: s_and_b32 s0, s8, 0xff
-; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0
-; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s0
-; GFX90a-PRELOAD-4-NEXT: global_store_dword v0, v1, s[6:7]
-; GFX90a-PRELOAD-4-NEXT: s_endpgm
-;
-; GFX90a-PRELOAD-8-LABEL: ptr1_i8:
+; GFX90a-PRELOAD-8-LABEL: ptr1_i8_kernel_preload_arg:
; GFX90a-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
; GFX90a-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
; GFX90a-PRELOAD-8-NEXT: ; %bb.0:
@@ -122,8 +74,8 @@ define amdgpu_kernel void @ptr1_i8(ptr addrspace(1) %out, i8 %arg0) {
ret void
}
-define amdgpu_kernel void @ptr1_i8_zext_arg(ptr addrspace(1) %out, i8 zeroext %arg0) {
-; GFX940-NO-PRELOAD-LABEL: ptr1_i8_zext_arg:
+define amdgpu_kernel void @ptr1_i8_zext_kernel_preload_arg(ptr addrspace(1) %out, i8 zeroext %arg0) {
+; GFX940-NO-PRELOAD-LABEL: ptr1_i8_zext_kernel_preload_arg:
; GFX940-NO-PRELOAD: ; %bb.0:
; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8
; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
@@ -134,19 +86,7 @@ define amdgpu_kernel void @ptr1_i8_zext_arg(ptr addrspace(1) %out, i8 zeroext %a
; GFX940-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
; GFX940-NO-PRELOAD-NEXT: s_endpgm
;
-; GFX940-PRELOAD-1-LABEL: ptr1_i8_zext_arg:
-; GFX940-PRELOAD-1: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
-; GFX940-PRELOAD-1-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
-; GFX940-PRELOAD-1-NEXT: ; %bb.0:
-; GFX940-PRELOAD-1-NEXT: s_load_dword s0, s[0:1], 0x8
-; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0
-; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
-; GFX940-PRELOAD-1-NEXT: s_and_b32 s0, s0, 0xff
-; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0
-; GFX940-PRELOAD-1-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
-; GFX940-PRELOAD-1-NEXT: s_endpgm
-;
-; GFX940-PRELOAD-2-LABEL: ptr1_i8_zext_arg:
+; GFX940-PRELOAD-2-LABEL: ptr1_i8_zext_kernel_preload_arg:
; GFX940-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
; GFX940-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
; GFX940-PRELOAD-2-NEXT: ; %bb.0:
@@ -157,18 +97,7 @@ define amdgpu_kernel void @ptr1_i8_zext_arg(ptr addrspace(1) %out, i8 zeroext %a
; GFX940-PRELOAD-2-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
; GFX940-PRELOAD-2-NEXT: s_endpgm
;
-; GFX940-PRELOAD-4-LABEL: ptr1_i8_zext_arg:
-; GFX940-PRELOAD-4: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
-; GFX940-PRELOAD-4-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
-; GFX940-PRELOAD-4-NEXT: ; %bb.0:
-; GFX940-PRELOAD-4-NEXT: s_mov_b32 s0, 0xffff
-; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s4
-; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0
-; GFX940-PRELOAD-4-NEXT: v_and_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
-; GFX940-PRELOAD-4-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
-; GFX940-PRELOAD-4-NEXT: s_endpgm
-;
-; GFX940-PRELOAD-8-LABEL: ptr1_i8_zext_arg:
+; GFX940-PRELOAD-8-LABEL: ptr1_i8_zext_kernel_preload_arg:
; GFX940-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
; GFX940-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
; GFX940-PRELOAD-8-NEXT: ; %bb.0:
@@ -179,7 +108,7 @@ define amdgpu_kernel void @ptr1_i8_zext_arg(ptr addrspace(1) %out, i8 zeroext %a
; GFX940-PRELOAD-8-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
; GFX940-PRELOAD-8-NEXT: s_endpgm
;
-; GFX90a-NO-PRELOAD-LABEL: ptr1_i8_zext_arg:
+; GFX90a-NO-PRELOAD-LABEL: ptr1_i8_zext_kernel_preload_arg:
; GFX90a-NO-PRELOAD: ; %bb.0:
; GFX90a-NO-PRELOAD-NEXT: s_load_dword s2, s[4:5], 0x8
; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -190,19 +119,7 @@ define amdgpu_kernel void @ptr1_i8_zext_arg(ptr addrspace(1) %out, i8 zeroext %a
; GFX90a-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[0:1]
; GFX90a-NO-PRELOAD-NEXT: s_endpgm
;
-; GFX90a-PRELOAD-1-LABEL: ptr1_i8_zext_arg:
-; GFX90a-PRELOAD-1: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
-; GFX90a-PRELOAD-1-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
-; GFX90a-PRELOAD-1-NEXT: ; %bb.0:
-; GFX90a-PRELOAD-1-NEXT: s_load_dword s0, s[4:5], 0x8
-; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0
-; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
-; GFX90a-PRELOAD-1-NEXT: s_and_b32 s0, s0, 0xff
-; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0
-; GFX90a-PRELOAD-1-NEXT: global_store_dword v0, v1, s[6:7]
-; GFX90a-PRELOAD-1-NEXT: s_endpgm
-;
-; GFX90a-PRELOAD-2-LABEL: ptr1_i8_zext_arg:
+; GFX90a-PRELOAD-2-LABEL: ptr1_i8_zext_kernel_preload_arg:
; GFX90a-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
; GFX90a-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
; GFX90a-PRELOAD-2-NEXT: ; %bb.0:
@@ -213,18 +130,7 @@ define amdgpu_kernel void @ptr1_i8_zext_arg(ptr addrspace(1) %out, i8 zeroext %a
; GFX90a-PRELOAD-2-NEXT: global_store_dword v0, v1, s[6:7]
; GFX90a-PRELOAD-2-NEXT: s_endpgm
;
-; GFX90a-PRELOAD-4-LABEL: ptr1_i8_zext_arg:
-; GFX90a-PRELOAD-4: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
-; GFX90a-PRELOAD-4-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
-; GFX90a-PRELOAD-4-NEXT: ; %bb.0:
-; GFX90a-PRELOAD-4-NEXT: s_mov_b32 s0, 0xffff
-; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s8
-; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0
-; GFX90a-PRELOAD-4-NEXT: v_and_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
-; GFX90a-PRELOAD-4-NEXT: global_store_dword v0, v1, s[6:7]
-; GFX90a-PRELOAD-4-NEXT: s_endpgm
-;
-; GFX90a-PRELOAD-8-LABEL: ptr1_i8_zext_arg:
+; GFX90a-PRELOAD-8-LABEL: ptr1_i8_zext_kernel_preload_arg:
; GFX90a-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
; GFX90a-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
; GFX90a-PRELOAD-8-NEXT: ; %bb.0:
@@ -239,8 +145,8 @@ define amdgpu_kernel void @ptr1_i8_zext_arg(ptr addrspace(1) %out, i8 zeroext %a
ret void
}
-define amdgpu_kernel void @ptr1_i16_preload_arg(ptr addrspace(1) %out, i16 %arg0) {
-; GFX940-NO-PRELOAD-LABEL: ptr1_i16_preload_arg:
+define amdgpu_kernel void @ptr1_i16_kernel_preload_arg(ptr addrspace(1) %out, i16 %arg0) {
+; GFX940-NO-PRELOAD-LABEL: ptr1_i16_kernel_preload_arg:
; GFX940-NO-PRELOAD: ; %bb.0:
; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8
; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
@@ -251,19 +157,7 @@ define amdgpu_kernel void @ptr1_i16_preload_arg(ptr addrspace(1) %out, i16 %arg0
; GFX940-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
; GFX940-NO-PRELOAD-NEXT: s_endpgm
;
-; GFX940-PRELOAD-1-LABEL: ptr1_i16_preload_arg:
-; GFX940-PRELOAD-1: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
-; GFX940-PRELOAD-1-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
-; GFX940-PRELOAD-1-NEXT: ; %bb.0:
-; GFX940-PRELOAD-1-NEXT: s_load_dword s0, s[0:1], 0x8
-; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0
-; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
-; GFX940-PRELOAD-1-NEXT: s_and_b32 s0, s0, 0xffff
-; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0
-; GFX940-PRELOAD-1-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
-; GFX940-PRELOAD-1-NEXT: s_endpgm
-;
-; GFX940-PRELOAD-2-LABEL: ptr1_i16_preload_arg:
+; GFX940-PRELOAD-2-LABEL: ptr1_i16_kernel_preload_arg:
; GFX940-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
; GFX940-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
; GFX940-PRELOAD-2-NEXT: ; %bb.0:
@@ -273,17 +167,7 @@ define amdgpu_kernel void @ptr1_i16_preload_arg(ptr addrspace(1) %out, i16 %arg0
; GFX940-PRELOAD-2-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
; GFX940-PRELOAD-2-NEXT: s_endpgm
;
-; GFX940-PRELOAD-4-LABEL: ptr1_i16_preload_arg:
-; GFX940-PRELOAD-4: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
-; GFX940-PRELOAD-4-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
-; GFX940-PRELOAD-4-NEXT: ; %bb.0:
-; GFX940-PRELOAD-4-NEXT: s_and_b32 s0, s4, 0xffff
-; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0
-; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s0
-; GFX940-PRELOAD-4-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
-; GFX940-PRELOAD-4-NEXT: s_endpgm
-;
-; GFX940-PRELOAD-8-LABEL: ptr1_i16_preload_arg:
+; GFX940-PRELOAD-8-LABEL: ptr1_i16_kernel_preload_arg:
; GFX940-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
; GFX940-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
; GFX940-PRELOAD-8-NEXT: ; %bb.0:
@@ -293,7 +177,7 @@ define amdgpu_kernel void @ptr1_i16_preload_arg(ptr addrspace(1) %out, i16 %arg0
; GFX940-PRELOAD-8-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
; GFX940-PRELOAD-8-NEXT: s_endpgm
;
-; GFX90a-NO-PRELOAD-LABEL: ptr1_i16_preload_arg:
+; GFX90a-NO-PRELOAD-LABEL: ptr1_i16_kernel_preload_arg:
; GFX90a-NO-PRELOAD: ; %bb.0:
; GFX90a-NO-PRELOAD-NEXT: s_load_dword s2, s[4:5], 0x8
; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -304,19 +188,7 @@ define amdgpu_kernel void @ptr1_i16_preload_arg(ptr addrspace(1) %out, i16 %arg0
; GFX90a-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[0:1]
; GFX90a-NO-PRELOAD-NEXT: s_endpgm
;
-; GFX90a-PRELOAD-1-LABEL: ptr1_i16_preload_arg:
-; GFX90a-PRELOAD-1: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
-; GFX90a-PRELOAD-1-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
-; GFX90a-PRELOAD-1-NEXT: ; %bb.0:
-; GFX90a-PRELOAD-1-NEXT: s_load_dword s0, s[4:5], 0x8
-; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0
-; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
-; GFX90a-PRELOAD-1-NEXT: s_and_b32 s0, s0, 0xffff
-; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0
-; GFX90a-PRELOAD-1-NEXT: global_store_dword v0, v1, s[6:7]
-; GFX90a-PRELOAD-1-NEXT: s_endpgm
-;
-; GFX90a-PRELOAD-2-LABEL: ptr1_i16_preload_arg:
+; GFX90a-PRELOAD-2-LABEL: ptr1_i16_kernel_preload_arg:
; GFX90a-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
; GFX90a-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
; GFX90a-PRELOAD-2-NEXT: ; %bb.0:
@@ -326,17 +198,7 @@ define amdgpu_kernel void @ptr1_i16_preload_arg(ptr addrspace(1) %out, i16 %arg0
; GFX90a-PRELOAD-2-NEXT: global_store_dword v0, v1, s[6:7]
; GFX90a-PRELOAD-2-NEXT: s_endpgm
;
-; GFX90a-PRELOAD-4-LABEL: ptr1_i16_preload_arg:
-; GFX90a-PRELOAD-4: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
-; GFX90a-PRELOAD-4-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
-; GFX90a-PRELOAD-4-NEXT: ; %bb.0:
-; GFX90a-PRELOAD-4-NEXT: s_and_b32 s0, s8, 0xffff
-; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0
-; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s0
-; GFX90a-PRELOAD-4-NEXT: global_store_dword v0, v1, s[6:7]
-; GFX90a-PRELOAD-4-NEXT: s_endpgm
-;
-; GFX90a-PRELOAD-8-LABEL: ptr1_i16_preload_arg:
+; GFX90a-PRELOAD-8-LABEL: ptr1_i16_kernel_preload_arg:
; GFX90a-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading k...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/91625
More information about the llvm-commits
mailing list