[llvm] [AMDGPU] Enable kernel arg preloading with gfx90a (PR #81180)
Austin Kerbow via llvm-commits
llvm-commits at lists.llvm.org
Sun Feb 11 09:49:11 PST 2024
https://github.com/kerbowa updated https://github.com/llvm/llvm-project/pull/81180
>From 86e67e2a81d423edf1e99af494519c74c0a94a17 Mon Sep 17 00:00:00 2001
From: Austin Kerbow <Austin.Kerbow at amd.com>
Date: Thu, 8 Feb 2024 11:21:10 -0800
Subject: [PATCH] [AMDGPU] Enable kernel arg preloading with gfx90a
Add a trap instruction to the beginning of the kernel prologue to handle
cases where preloading is attempted on HW loaded with incompatible
firmware.
---
llvm/docs/AMDGPUUsage.rst | 5 +-
llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 3 +-
.../AMDGPU/AMDGPULowerKernelArguments.cpp | 1 -
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 6 -
.../MCTargetDesc/AMDGPUTargetStreamer.cpp | 16 +-
.../MCTargetDesc/AMDGPUTargetStreamer.h | 9 +-
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 3 +-
.../CodeGen/AMDGPU/preload-kernarg-header.ll | 9 +-
llvm/test/CodeGen/AMDGPU/preload-kernargs.ll | 15978 +++++++++++-----
9 files changed, 10685 insertions(+), 5345 deletions(-)
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 6b2417143ca06c..bee237ad77691a 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -5366,7 +5366,10 @@ additional 256 bytes to the kernel_code_entry_byte_offset. This addition
facilitates the incorporation of a prologue to the kernel entry to handle cases
where code designed for kernarg preloading is executed on hardware equipped with
incompatible firmware. If hardware has compatible firmware the 256 bytes at the
-start of the kernel entry will be skipped.
+start of the kernel entry will be skipped. Additionally, the compiler backend
+may insert a trap instruction at the start of the kernel prologue to manage
+situations where kernarg preloading is attempted on hardware with incompatible
+firmware.
.. _amdgpu-amdhsa-kernel-prolog:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index db81e1ee9e3899..886d855e227a2d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -197,7 +197,8 @@ void AMDGPUAsmPrinter::emitFunctionBodyStart() {
if (MFI.getNumKernargPreloadedSGPRs() > 0) {
assert(AMDGPU::hasKernargPreload(STM));
- getTargetStreamer()->EmitKernargPreloadHeader(*getGlobalSTI());
+ getTargetStreamer()->EmitKernargPreloadHeader(*getGlobalSTI(),
+ STM.isAmdHsaOS());
}
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
index 015c71080d6701..bc58407a73294c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
@@ -145,7 +145,6 @@ static bool lowerKernelArguments(Function &F, const TargetMachine &TM) {
// Try to preload this argument into user SGPRs.
if (Arg.hasInRegAttr() && InPreloadSequence && ST.hasKernargPreload() &&
- !ST.needsKernargPreloadBackwardsCompatibility() &&
!Arg.getType()->isAggregateType())
if (PreloadInfo.tryAllocPreloadSGPRs(AllocSize, EltOffset,
LastExplicitArgOffset))
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 4f8eeaaf500b4d..ba633fa9e9cb4b 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1254,12 +1254,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
// \returns true if preloading kernel arguments is supported.
bool hasKernargPreload() const { return KernargPreload; }
- // \returns true if we need to generate backwards compatible code when
- // preloading kernel arguments.
- bool needsKernargPreloadBackwardsCompatibility() const {
- return hasKernargPreload() && !hasGFX940Insts();
- }
-
// \returns true if the target has split barriers feature
bool hasSplitBarriers() const { return getGeneration() >= GFX12; }
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 5e9b1674d87dcb..61f4a94019efb0 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -756,18 +756,26 @@ bool AMDGPUTargetELFStreamer::EmitHSAMetadata(msgpack::Document &HSAMetadataDoc,
}
bool AMDGPUTargetAsmStreamer::EmitKernargPreloadHeader(
- const MCSubtargetInfo &STI) {
- for (int i = 0; i < 64; ++i) {
+ const MCSubtargetInfo &STI, bool TrapEnabled) {
+ const char *TrapInstr = TrapEnabled ? "\ts_trap 2" : "\ts_endpgm";
+ OS << TrapInstr
+ << " ; Trap with incompatible firmware that doesn't "
+ "support preloading kernel arguments.\n";
+ for (int i = 0; i < 63; ++i) {
OS << "\ts_nop 0\n";
}
return true;
}
bool AMDGPUTargetELFStreamer::EmitKernargPreloadHeader(
- const MCSubtargetInfo &STI) {
+ const MCSubtargetInfo &STI, bool TrapEnabled) {
const uint32_t Encoded_s_nop = 0xbf800000;
+ const uint32_t Encoded_s_trap = 0xbf920002;
+ const uint32_t Encoded_s_endpgm = 0xbf810000;
+ const uint32_t TrapInstr = TrapEnabled ? Encoded_s_trap : Encoded_s_endpgm;
MCStreamer &OS = getStreamer();
- for (int i = 0; i < 64; ++i) {
+ OS.emitInt32(TrapInstr);
+ for (int i = 0; i < 63; ++i) {
OS.emitInt32(Encoded_s_nop);
}
return true;
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
index ad5f27a33fcbd1..5aa80ff578c6b6 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
@@ -89,7 +89,8 @@ class AMDGPUTargetStreamer : public MCTargetStreamer {
virtual bool EmitCodeEnd(const MCSubtargetInfo &STI) { return true; }
/// \returns True on success, false on failure.
- virtual bool EmitKernargPreloadHeader(const MCSubtargetInfo &STI) {
+ virtual bool EmitKernargPreloadHeader(const MCSubtargetInfo &STI,
+ bool TrapEnabled) {
return true;
}
@@ -146,7 +147,8 @@ class AMDGPUTargetAsmStreamer final : public AMDGPUTargetStreamer {
bool EmitCodeEnd(const MCSubtargetInfo &STI) override;
/// \returns True on success, false on failure.
- bool EmitKernargPreloadHeader(const MCSubtargetInfo &STI) override;
+ bool EmitKernargPreloadHeader(const MCSubtargetInfo &STI,
+ bool TrapEnabled) override;
void EmitAmdhsaKernelDescriptor(
const MCSubtargetInfo &STI, StringRef KernelName,
@@ -200,7 +202,8 @@ class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer {
bool EmitCodeEnd(const MCSubtargetInfo &STI) override;
/// \returns True on success, false on failure.
- bool EmitKernargPreloadHeader(const MCSubtargetInfo &STI) override;
+ bool EmitKernargPreloadHeader(const MCSubtargetInfo &STI,
+ bool TrapEnabled) override;
void EmitAmdhsaKernelDescriptor(
const MCSubtargetInfo &STI, StringRef KernelName,
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index a64a9e608f2173..83221f7ead37e1 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -2826,8 +2826,7 @@ SDValue SITargetLowering::LowerFormalArguments(
if (IsEntryFunc) {
allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info);
allocateHSAUserSGPRs(CCInfo, MF, *TRI, *Info);
- if (IsKernel && Subtarget->hasKernargPreload() &&
- !Subtarget->needsKernargPreloadBackwardsCompatibility())
+ if (IsKernel && Subtarget->hasKernargPreload())
allocatePreloadKernArgSGPRs(CCInfo, ArgLocs, Ins, MF, *TRI, *Info);
allocateLDSKernelId(CCInfo, MF, *TRI, *Info);
diff --git a/llvm/test/CodeGen/AMDGPU/preload-kernarg-header.ll b/llvm/test/CodeGen/AMDGPU/preload-kernarg-header.ll
index 75feac35dacd84..a70488a00db739 100644
--- a/llvm/test/CodeGen/AMDGPU/preload-kernarg-header.ll
+++ b/llvm/test/CodeGen/AMDGPU/preload-kernarg-header.ll
@@ -1,8 +1,11 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=1 -asm-verbose=0 < %s | FileCheck -check-prefixes=GCN %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=1 -filetype=obj < %s | llvm-objdump --arch=amdgcn --mcpu=gfx940 --disassemble - | FileCheck -check-prefixes=GCN %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=1 -asm-verbose=0 < %s | FileCheck -check-prefixes=GCN,HSA %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=1 -filetype=obj < %s | llvm-objdump --arch=amdgcn --mcpu=gfx940 --disassemble - | FileCheck -check-prefixes=GCN,HSA %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -amdgpu-kernarg-preload-count=1 -filetype=obj < %s | llvm-objdump --arch=amdgcn --mcpu=gfx940 --disassemble - | FileCheck -check-prefixes=GCN,NON-HSA %s
; GCN: preload_kernarg_header
-; GCN-COUNT-64: s_nop 0
+; HSA: s_trap 2
+; NON-HSA: s_endpgm
+; GCN-COUNT-63: s_nop 0
define amdgpu_kernel void @preload_kernarg_header(ptr %arg) {
store ptr %arg, ptr %arg
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll b/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll
index 57980214e58e2b..d20c3a4007ffdd 100644
--- a/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll
+++ b/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll
@@ -1,1856 +1,3681 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefixes=NO-PRELOAD %s
-; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=PRELOAD-1 %s
-; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=2 -verify-machineinstrs < %s | FileCheck -check-prefixes=PRELOAD-2 %s
-; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=4 -verify-machineinstrs < %s | FileCheck -check-prefixes=PRELOAD-4 %s
-; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=8 -verify-machineinstrs < %s | FileCheck -check-prefixes=PRELOAD-8 %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940-NO-PRELOAD %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940-PRELOAD-1 %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=2 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940-PRELOAD-2 %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=4 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940-PRELOAD-4 %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=8 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940-PRELOAD-8 %s
+
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90a-NO-PRELOAD %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90a-PRELOAD-1 %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=2 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90a-PRELOAD-2 %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=4 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90a-PRELOAD-4 %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=8 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90a-PRELOAD-8 %s
define amdgpu_kernel void @ptr1_i8(ptr addrspace(1) %out, i8 %arg0) {
-; NO-PRELOAD-LABEL: ptr1_i8:
-; NO-PRELOAD: ; %bb.0:
-; NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8
-; NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0
-; NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
-; NO-PRELOAD-NEXT: s_and_b32 s0, s4, 0xff
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s0
-; NO-PRELOAD-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
-; NO-PRELOAD-NEXT: s_endpgm
-;
-; PRELOAD-1-LABEL: ptr1_i8:
-; PRELOAD-1: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: ; %bb.0:
-; PRELOAD-1-NEXT: s_load_dword s0, s[0:1], 0x8
-; PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0
-; PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
-; PRELOAD-1-NEXT: s_and_b32 s0, s0, 0xff
-; PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0
-; PRELOAD-1-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
-; PRELOAD-1-NEXT: s_endpgm
-;
-; PRELOAD-2-LABEL: ptr1_i8:
-; PRELOAD-2: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: ; %bb.0:
-; PRELOAD-2-NEXT: s_and_b32 s0, s4, 0xff
-; PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0
-; PRELOAD-2-NEXT: v_mov_b32_e32 v1, s0
-; PRELOAD-2-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
-; PRELOAD-2-NEXT: s_endpgm
-;
-; PRELOAD-4-LABEL: ptr1_i8:
-; PRELOAD-4: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: ; %bb.0:
-; PRELOAD-4-NEXT: s_and_b32 s0, s4, 0xff
-; PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0
-; PRELOAD-4-NEXT: v_mov_b32_e32 v1, s0
-; PRELOAD-4-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
-; PRELOAD-4-NEXT: s_endpgm
-;
-; PRELOAD-8-LABEL: ptr1_i8:
-; PRELOAD-8: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: ; %bb.0:
-; PRELOAD-8-NEXT: s_and_b32 s0, s4, 0xff
-; PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0
-; PRELOAD-8-NEXT: v_mov_b32_e32 v1, s0
-; PRELOAD-8-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
-; PRELOAD-8-NEXT: s_endpgm
+; GFX940-NO-PRELOAD-LABEL: ptr1_i8:
+; GFX940-NO-PRELOAD: ; %bb.0:
+; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8
+; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0
+; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NO-PRELOAD-NEXT: s_and_b32 s0, s4, 0xff
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s0
+; GFX940-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
+; GFX940-NO-PRELOAD-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-1-LABEL: ptr1_i8:
+; GFX940-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: ; %bb.0:
+; GFX940-PRELOAD-1-NEXT: s_load_dword s0, s[0:1], 0x8
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0
+; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-PRELOAD-1-NEXT: s_and_b32 s0, s0, 0xff
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0
+; GFX940-PRELOAD-1-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
+; GFX940-PRELOAD-1-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-2-LABEL: ptr1_i8:
+; GFX940-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: ; %bb.0:
+; GFX940-PRELOAD-2-NEXT: s_and_b32 s0, s4, 0xff
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s0
+; GFX940-PRELOAD-2-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
+; GFX940-PRELOAD-2-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-4-LABEL: ptr1_i8:
+; GFX940-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: ; %bb.0:
+; GFX940-PRELOAD-4-NEXT: s_and_b32 s0, s4, 0xff
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s0
+; GFX940-PRELOAD-4-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
+; GFX940-PRELOAD-4-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-8-LABEL: ptr1_i8:
+; GFX940-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: ; %bb.0:
+; GFX940-PRELOAD-8-NEXT: s_and_b32 s0, s4, 0xff
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s0
+; GFX940-PRELOAD-8-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
+; GFX940-PRELOAD-8-NEXT: s_endpgm
+;
+; GFX90a-NO-PRELOAD-LABEL: ptr1_i8:
+; GFX90a-NO-PRELOAD: ; %bb.0:
+; GFX90a-NO-PRELOAD-NEXT: s_load_dword s2, s[4:5], 0x8
+; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-NO-PRELOAD-NEXT: s_and_b32 s2, s2, 0xff
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s2
+; GFX90a-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX90a-NO-PRELOAD-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-1-LABEL: ptr1_i8:
+; GFX90a-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-1-NEXT: s_load_dword s0, s[4:5], 0x8
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-PRELOAD-1-NEXT: s_and_b32 s0, s0, 0xff
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0
+; GFX90a-PRELOAD-1-NEXT: global_store_dword v0, v1, s[6:7]
+; GFX90a-PRELOAD-1-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-2-LABEL: ptr1_i8:
+; GFX90a-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-2-NEXT: s_and_b32 s0, s8, 0xff
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s0
+; GFX90a-PRELOAD-2-NEXT: global_store_dword v0, v1, s[6:7]
+; GFX90a-PRELOAD-2-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-4-LABEL: ptr1_i8:
+; GFX90a-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-4-NEXT: s_and_b32 s0, s8, 0xff
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s0
+; GFX90a-PRELOAD-4-NEXT: global_store_dword v0, v1, s[6:7]
+; GFX90a-PRELOAD-4-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-8-LABEL: ptr1_i8:
+; GFX90a-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-8-NEXT: s_and_b32 s0, s8, 0xff
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s0
+; GFX90a-PRELOAD-8-NEXT: global_store_dword v0, v1, s[6:7]
+; GFX90a-PRELOAD-8-NEXT: s_endpgm
%ext = zext i8 %arg0 to i32
store i32 %ext, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @ptr1_i8_zext_arg(ptr addrspace(1) %out, i8 zeroext %arg0) {
-; NO-PRELOAD-LABEL: ptr1_i8_zext_arg:
-; NO-PRELOAD: ; %bb.0:
-; NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8
-; NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0
-; NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
-; NO-PRELOAD-NEXT: s_and_b32 s0, s4, 0xff
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s0
-; NO-PRELOAD-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
-; NO-PRELOAD-NEXT: s_endpgm
-;
-; PRELOAD-1-LABEL: ptr1_i8_zext_arg:
-; PRELOAD-1: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: ; %bb.0:
-; PRELOAD-1-NEXT: s_load_dword s0, s[0:1], 0x8
-; PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0
-; PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
-; PRELOAD-1-NEXT: s_and_b32 s0, s0, 0xff
-; PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0
-; PRELOAD-1-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
-; PRELOAD-1-NEXT: s_endpgm
-;
-; PRELOAD-2-LABEL: ptr1_i8_zext_arg:
-; PRELOAD-2: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: ; %bb.0:
-; PRELOAD-2-NEXT: s_mov_b32 s0, 0xffff
-; PRELOAD-2-NEXT: v_mov_b32_e32 v1, s4
-; PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0
-; PRELOAD-2-NEXT: v_and_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
-; PRELOAD-2-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
-; PRELOAD-2-NEXT: s_endpgm
-;
-; PRELOAD-4-LABEL: ptr1_i8_zext_arg:
-; PRELOAD-4: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: ; %bb.0:
-; PRELOAD-4-NEXT: s_mov_b32 s0, 0xffff
-; PRELOAD-4-NEXT: v_mov_b32_e32 v1, s4
-; PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0
-; PRELOAD-4-NEXT: v_and_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
-; PRELOAD-4-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
-; PRELOAD-4-NEXT: s_endpgm
-;
-; PRELOAD-8-LABEL: ptr1_i8_zext_arg:
-; PRELOAD-8: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: ; %bb.0:
-; PRELOAD-8-NEXT: s_mov_b32 s0, 0xffff
-; PRELOAD-8-NEXT: v_mov_b32_e32 v1, s4
-; PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0
-; PRELOAD-8-NEXT: v_and_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
-; PRELOAD-8-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
-; PRELOAD-8-NEXT: s_endpgm
+; GFX940-NO-PRELOAD-LABEL: ptr1_i8_zext_arg:
+; GFX940-NO-PRELOAD: ; %bb.0:
+; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8
+; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0
+; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NO-PRELOAD-NEXT: s_and_b32 s0, s4, 0xff
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s0
+; GFX940-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
+; GFX940-NO-PRELOAD-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-1-LABEL: ptr1_i8_zext_arg:
+; GFX940-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: ; %bb.0:
+; GFX940-PRELOAD-1-NEXT: s_load_dword s0, s[0:1], 0x8
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0
+; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-PRELOAD-1-NEXT: s_and_b32 s0, s0, 0xff
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0
+; GFX940-PRELOAD-1-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
+; GFX940-PRELOAD-1-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-2-LABEL: ptr1_i8_zext_arg:
+; GFX940-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: ; %bb.0:
+; GFX940-PRELOAD-2-NEXT: s_mov_b32 s0, 0xffff
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s4
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0
+; GFX940-PRELOAD-2-NEXT: v_and_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; GFX940-PRELOAD-2-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
+; GFX940-PRELOAD-2-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-4-LABEL: ptr1_i8_zext_arg:
+; GFX940-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: ; %bb.0:
+; GFX940-PRELOAD-4-NEXT: s_mov_b32 s0, 0xffff
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s4
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0
+; GFX940-PRELOAD-4-NEXT: v_and_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; GFX940-PRELOAD-4-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
+; GFX940-PRELOAD-4-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-8-LABEL: ptr1_i8_zext_arg:
+; GFX940-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: ; %bb.0:
+; GFX940-PRELOAD-8-NEXT: s_mov_b32 s0, 0xffff
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s4
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0
+; GFX940-PRELOAD-8-NEXT: v_and_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; GFX940-PRELOAD-8-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
+; GFX940-PRELOAD-8-NEXT: s_endpgm
+;
+; GFX90a-NO-PRELOAD-LABEL: ptr1_i8_zext_arg:
+; GFX90a-NO-PRELOAD: ; %bb.0:
+; GFX90a-NO-PRELOAD-NEXT: s_load_dword s2, s[4:5], 0x8
+; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-NO-PRELOAD-NEXT: s_and_b32 s2, s2, 0xff
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s2
+; GFX90a-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX90a-NO-PRELOAD-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-1-LABEL: ptr1_i8_zext_arg:
+; GFX90a-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-1-NEXT: s_load_dword s0, s[4:5], 0x8
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-PRELOAD-1-NEXT: s_and_b32 s0, s0, 0xff
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0
+; GFX90a-PRELOAD-1-NEXT: global_store_dword v0, v1, s[6:7]
+; GFX90a-PRELOAD-1-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-2-LABEL: ptr1_i8_zext_arg:
+; GFX90a-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-2-NEXT: s_mov_b32 s0, 0xffff
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s8
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-PRELOAD-2-NEXT: v_and_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; GFX90a-PRELOAD-2-NEXT: global_store_dword v0, v1, s[6:7]
+; GFX90a-PRELOAD-2-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-4-LABEL: ptr1_i8_zext_arg:
+; GFX90a-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-4-NEXT: s_mov_b32 s0, 0xffff
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s8
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-PRELOAD-4-NEXT: v_and_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; GFX90a-PRELOAD-4-NEXT: global_store_dword v0, v1, s[6:7]
+; GFX90a-PRELOAD-4-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-8-LABEL: ptr1_i8_zext_arg:
+; GFX90a-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-8-NEXT: s_mov_b32 s0, 0xffff
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s8
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-PRELOAD-8-NEXT: v_and_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; GFX90a-PRELOAD-8-NEXT: global_store_dword v0, v1, s[6:7]
+; GFX90a-PRELOAD-8-NEXT: s_endpgm
%ext = zext i8 %arg0 to i32
store i32 %ext, ptr addrspace(1) %out, align 4
ret void
}
define amdgpu_kernel void @ptr1_i16_preload_arg(ptr addrspace(1) %out, i16 %arg0) {
-; NO-PRELOAD-LABEL: ptr1_i16_preload_arg:
-; NO-PRELOAD: ; %bb.0:
-; NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8
-; NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0
-; NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
-; NO-PRELOAD-NEXT: s_and_b32 s0, s4, 0xffff
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s0
-; NO-PRELOAD-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
-; NO-PRELOAD-NEXT: s_endpgm
-;
-; PRELOAD-1-LABEL: ptr1_i16_preload_arg:
-; PRELOAD-1: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: ; %bb.0:
-; PRELOAD-1-NEXT: s_load_dword s0, s[0:1], 0x8
-; PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0
-; PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
-; PRELOAD-1-NEXT: s_and_b32 s0, s0, 0xffff
-; PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0
-; PRELOAD-1-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
-; PRELOAD-1-NEXT: s_endpgm
-;
-; PRELOAD-2-LABEL: ptr1_i16_preload_arg:
-; PRELOAD-2: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: ; %bb.0:
-; PRELOAD-2-NEXT: s_and_b32 s0, s4, 0xffff
-; PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0
-; PRELOAD-2-NEXT: v_mov_b32_e32 v1, s0
-; PRELOAD-2-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
-; PRELOAD-2-NEXT: s_endpgm
-;
-; PRELOAD-4-LABEL: ptr1_i16_preload_arg:
-; PRELOAD-4: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: ; %bb.0:
-; PRELOAD-4-NEXT: s_and_b32 s0, s4, 0xffff
-; PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0
-; PRELOAD-4-NEXT: v_mov_b32_e32 v1, s0
-; PRELOAD-4-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
-; PRELOAD-4-NEXT: s_endpgm
-;
-; PRELOAD-8-LABEL: ptr1_i16_preload_arg:
-; PRELOAD-8: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: ; %bb.0:
-; PRELOAD-8-NEXT: s_and_b32 s0, s4, 0xffff
-; PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0
-; PRELOAD-8-NEXT: v_mov_b32_e32 v1, s0
-; PRELOAD-8-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
-; PRELOAD-8-NEXT: s_endpgm
+; GFX940-NO-PRELOAD-LABEL: ptr1_i16_preload_arg:
+; GFX940-NO-PRELOAD: ; %bb.0:
+; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8
+; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0
+; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NO-PRELOAD-NEXT: s_and_b32 s0, s4, 0xffff
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s0
+; GFX940-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
+; GFX940-NO-PRELOAD-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-1-LABEL: ptr1_i16_preload_arg:
+; GFX940-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: ; %bb.0:
+; GFX940-PRELOAD-1-NEXT: s_load_dword s0, s[0:1], 0x8
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0
+; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-PRELOAD-1-NEXT: s_and_b32 s0, s0, 0xffff
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0
+; GFX940-PRELOAD-1-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
+; GFX940-PRELOAD-1-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-2-LABEL: ptr1_i16_preload_arg:
+; GFX940-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: ; %bb.0:
+; GFX940-PRELOAD-2-NEXT: s_and_b32 s0, s4, 0xffff
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s0
+; GFX940-PRELOAD-2-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
+; GFX940-PRELOAD-2-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-4-LABEL: ptr1_i16_preload_arg:
+; GFX940-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: ; %bb.0:
+; GFX940-PRELOAD-4-NEXT: s_and_b32 s0, s4, 0xffff
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s0
+; GFX940-PRELOAD-4-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
+; GFX940-PRELOAD-4-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-8-LABEL: ptr1_i16_preload_arg:
+; GFX940-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: ; %bb.0:
+; GFX940-PRELOAD-8-NEXT: s_and_b32 s0, s4, 0xffff
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s0
+; GFX940-PRELOAD-8-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
+; GFX940-PRELOAD-8-NEXT: s_endpgm
+;
+; GFX90a-NO-PRELOAD-LABEL: ptr1_i16_preload_arg:
+; GFX90a-NO-PRELOAD: ; %bb.0:
+; GFX90a-NO-PRELOAD-NEXT: s_load_dword s2, s[4:5], 0x8
+; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-NO-PRELOAD-NEXT: s_and_b32 s2, s2, 0xffff
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s2
+; GFX90a-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX90a-NO-PRELOAD-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-1-LABEL: ptr1_i16_preload_arg:
+; GFX90a-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-1-NEXT: s_load_dword s0, s[4:5], 0x8
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-PRELOAD-1-NEXT: s_and_b32 s0, s0, 0xffff
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0
+; GFX90a-PRELOAD-1-NEXT: global_store_dword v0, v1, s[6:7]
+; GFX90a-PRELOAD-1-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-2-LABEL: ptr1_i16_preload_arg:
+; GFX90a-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-2-NEXT: s_and_b32 s0, s8, 0xffff
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s0
+; GFX90a-PRELOAD-2-NEXT: global_store_dword v0, v1, s[6:7]
+; GFX90a-PRELOAD-2-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-4-LABEL: ptr1_i16_preload_arg:
+; GFX90a-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-4-NEXT: s_and_b32 s0, s8, 0xffff
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s0
+; GFX90a-PRELOAD-4-NEXT: global_store_dword v0, v1, s[6:7]
+; GFX90a-PRELOAD-4-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-8-LABEL: ptr1_i16_preload_arg:
+; GFX90a-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-8-NEXT: s_and_b32 s0, s8, 0xffff
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s0
+; GFX90a-PRELOAD-8-NEXT: global_store_dword v0, v1, s[6:7]
+; GFX90a-PRELOAD-8-NEXT: s_endpgm
%ext = zext i16 %arg0 to i32
store i32 %ext, ptr addrspace(1) %out, align 4
ret void
}
define amdgpu_kernel void @ptr1_i32_preload_arg(ptr addrspace(1) %out, i32 %arg0) {
-; NO-PRELOAD-LABEL: ptr1_i32_preload_arg:
-; NO-PRELOAD: ; %bb.0:
-; NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8
-; NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0
-; NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s4
-; NO-PRELOAD-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
-; NO-PRELOAD-NEXT: s_endpgm
-;
-; PRELOAD-1-LABEL: ptr1_i32_preload_arg:
-; PRELOAD-1: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: ; %bb.0:
-; PRELOAD-1-NEXT: s_load_dword s0, s[0:1], 0x8
-; PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0
-; PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
-; PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0
-; PRELOAD-1-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
-; PRELOAD-1-NEXT: s_endpgm
-;
-; PRELOAD-2-LABEL: ptr1_i32_preload_arg:
-; PRELOAD-2: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: ; %bb.0:
-; PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0
-; PRELOAD-2-NEXT: v_mov_b32_e32 v1, s4
-; PRELOAD-2-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
-; PRELOAD-2-NEXT: s_endpgm
-;
-; PRELOAD-4-LABEL: ptr1_i32_preload_arg:
-; PRELOAD-4: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: ; %bb.0:
-; PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0
-; PRELOAD-4-NEXT: v_mov_b32_e32 v1, s4
-; PRELOAD-4-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
-; PRELOAD-4-NEXT: s_endpgm
-;
-; PRELOAD-8-LABEL: ptr1_i32_preload_arg:
-; PRELOAD-8: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: ; %bb.0:
-; PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0
-; PRELOAD-8-NEXT: v_mov_b32_e32 v1, s4
-; PRELOAD-8-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
-; PRELOAD-8-NEXT: s_endpgm
+; GFX940-NO-PRELOAD-LABEL: ptr1_i32_preload_arg:
+; GFX940-NO-PRELOAD: ; %bb.0:
+; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8
+; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0
+; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s4
+; GFX940-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
+; GFX940-NO-PRELOAD-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-1-LABEL: ptr1_i32_preload_arg:
+; GFX940-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: ; %bb.0:
+; GFX940-PRELOAD-1-NEXT: s_load_dword s0, s[0:1], 0x8
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0
+; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0
+; GFX940-PRELOAD-1-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
+; GFX940-PRELOAD-1-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-2-LABEL: ptr1_i32_preload_arg:
+; GFX940-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: ; %bb.0:
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s4
+; GFX940-PRELOAD-2-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
+; GFX940-PRELOAD-2-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-4-LABEL: ptr1_i32_preload_arg:
+; GFX940-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: ; %bb.0:
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s4
+; GFX940-PRELOAD-4-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
+; GFX940-PRELOAD-4-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-8-LABEL: ptr1_i32_preload_arg:
+; GFX940-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: ; %bb.0:
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s4
+; GFX940-PRELOAD-8-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
+; GFX940-PRELOAD-8-NEXT: s_endpgm
+;
+; GFX90a-NO-PRELOAD-LABEL: ptr1_i32_preload_arg:
+; GFX90a-NO-PRELOAD: ; %bb.0:
+; GFX90a-NO-PRELOAD-NEXT: s_load_dword s2, s[4:5], 0x8
+; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s2
+; GFX90a-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX90a-NO-PRELOAD-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-1-LABEL: ptr1_i32_preload_arg:
+; GFX90a-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-1-NEXT: s_load_dword s0, s[4:5], 0x8
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0
+; GFX90a-PRELOAD-1-NEXT: global_store_dword v0, v1, s[6:7]
+; GFX90a-PRELOAD-1-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-2-LABEL: ptr1_i32_preload_arg:
+; GFX90a-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s8
+; GFX90a-PRELOAD-2-NEXT: global_store_dword v0, v1, s[6:7]
+; GFX90a-PRELOAD-2-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-4-LABEL: ptr1_i32_preload_arg:
+; GFX90a-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s8
+; GFX90a-PRELOAD-4-NEXT: global_store_dword v0, v1, s[6:7]
+; GFX90a-PRELOAD-4-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-8-LABEL: ptr1_i32_preload_arg:
+; GFX90a-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s8
+; GFX90a-PRELOAD-8-NEXT: global_store_dword v0, v1, s[6:7]
+; GFX90a-PRELOAD-8-NEXT: s_endpgm
store i32 %arg0, ptr addrspace(1) %out
ret void
}
-; Check alignment on the second preloaded arg.
define amdgpu_kernel void @i32_ptr1_i32_preload_arg(i32 %arg0, ptr addrspace(1) %out, i32 %arg1) {
-; NO-PRELOAD-LABEL: i32_ptr1_i32_preload_arg:
-; NO-PRELOAD: ; %bb.0:
-; NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x10
-; NO-PRELOAD-NEXT: s_load_dword s5, s[0:1], 0x0
-; NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x8
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0
-; NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
-; NO-PRELOAD-NEXT: s_add_i32 s0, s5, s4
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s0
-; NO-PRELOAD-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
-; NO-PRELOAD-NEXT: s_endpgm
-;
-; PRELOAD-1-LABEL: i32_ptr1_i32_preload_arg:
-; PRELOAD-1: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: ; %bb.0:
-; PRELOAD-1-NEXT: s_load_dword s3, s[0:1], 0x10
-; PRELOAD-1-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x8
-; PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0
-; PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
-; PRELOAD-1-NEXT: s_add_i32 s0, s2, s3
-; PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0
-; PRELOAD-1-NEXT: global_store_dword v0, v1, s[4:5] sc0 sc1
-; PRELOAD-1-NEXT: s_endpgm
-;
-; PRELOAD-2-LABEL: i32_ptr1_i32_preload_arg:
-; PRELOAD-2: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: ; %bb.0:
-; PRELOAD-2-NEXT: s_load_dword s0, s[0:1], 0x10
-; PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0
-; PRELOAD-2-NEXT: s_waitcnt lgkmcnt(0)
-; PRELOAD-2-NEXT: s_add_i32 s0, s2, s0
-; PRELOAD-2-NEXT: v_mov_b32_e32 v1, s0
-; PRELOAD-2-NEXT: global_store_dword v0, v1, s[4:5] sc0 sc1
-; PRELOAD-2-NEXT: s_endpgm
-;
-; PRELOAD-4-LABEL: i32_ptr1_i32_preload_arg:
-; PRELOAD-4: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: ; %bb.0:
-; PRELOAD-4-NEXT: s_add_i32 s0, s2, s6
-; PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0
-; PRELOAD-4-NEXT: v_mov_b32_e32 v1, s0
-; PRELOAD-4-NEXT: global_store_dword v0, v1, s[4:5] sc0 sc1
-; PRELOAD-4-NEXT: s_endpgm
-;
-; PRELOAD-8-LABEL: i32_ptr1_i32_preload_arg:
-; PRELOAD-8: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: ; %bb.0:
-; PRELOAD-8-NEXT: s_add_i32 s0, s2, s6
-; PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0
-; PRELOAD-8-NEXT: v_mov_b32_e32 v1, s0
-; PRELOAD-8-NEXT: global_store_dword v0, v1, s[4:5] sc0 sc1
-; PRELOAD-8-NEXT: s_endpgm
+; GFX940-NO-PRELOAD-LABEL: i32_ptr1_i32_preload_arg:
+; GFX940-NO-PRELOAD: ; %bb.0:
+; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x10
+; GFX940-NO-PRELOAD-NEXT: s_load_dword s5, s[0:1], 0x0
+; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x8
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0
+; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NO-PRELOAD-NEXT: s_add_i32 s0, s5, s4
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s0
+; GFX940-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
+; GFX940-NO-PRELOAD-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-1-LABEL: i32_ptr1_i32_preload_arg:
+; GFX940-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: ; %bb.0:
+; GFX940-PRELOAD-1-NEXT: s_load_dword s3, s[0:1], 0x10
+; GFX940-PRELOAD-1-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x8
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0
+; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-PRELOAD-1-NEXT: s_add_i32 s0, s2, s3
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0
+; GFX940-PRELOAD-1-NEXT: global_store_dword v0, v1, s[4:5] sc0 sc1
+; GFX940-PRELOAD-1-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-2-LABEL: i32_ptr1_i32_preload_arg:
+; GFX940-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: ; %bb.0:
+; GFX940-PRELOAD-2-NEXT: s_load_dword s0, s[0:1], 0x10
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0
+; GFX940-PRELOAD-2-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-PRELOAD-2-NEXT: s_add_i32 s0, s2, s0
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s0
+; GFX940-PRELOAD-2-NEXT: global_store_dword v0, v1, s[4:5] sc0 sc1
+; GFX940-PRELOAD-2-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-4-LABEL: i32_ptr1_i32_preload_arg:
+; GFX940-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: ; %bb.0:
+; GFX940-PRELOAD-4-NEXT: s_add_i32 s0, s2, s6
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s0
+; GFX940-PRELOAD-4-NEXT: global_store_dword v0, v1, s[4:5] sc0 sc1
+; GFX940-PRELOAD-4-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-8-LABEL: i32_ptr1_i32_preload_arg:
+; GFX940-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: ; %bb.0:
+; GFX940-PRELOAD-8-NEXT: s_add_i32 s0, s2, s6
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s0
+; GFX940-PRELOAD-8-NEXT: global_store_dword v0, v1, s[4:5] sc0 sc1
+; GFX940-PRELOAD-8-NEXT: s_endpgm
+;
+; GFX90a-NO-PRELOAD-LABEL: i32_ptr1_i32_preload_arg:
+; GFX90a-NO-PRELOAD: ; %bb.0:
+; GFX90a-NO-PRELOAD-NEXT: s_load_dword s2, s[4:5], 0x10
+; GFX90a-NO-PRELOAD-NEXT: s_load_dword s3, s[4:5], 0x0
+; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-NO-PRELOAD-NEXT: s_add_i32 s2, s3, s2
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s2
+; GFX90a-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX90a-NO-PRELOAD-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-1-LABEL: i32_ptr1_i32_preload_arg:
+; GFX90a-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-1-NEXT: s_load_dword s2, s[4:5], 0x10
+; GFX90a-PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-PRELOAD-1-NEXT: s_add_i32 s2, s6, s2
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s2
+; GFX90a-PRELOAD-1-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX90a-PRELOAD-1-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-2-LABEL: i32_ptr1_i32_preload_arg:
+; GFX90a-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-2-NEXT: s_load_dword s0, s[4:5], 0x10
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-PRELOAD-2-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-PRELOAD-2-NEXT: s_add_i32 s0, s6, s0
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s0
+; GFX90a-PRELOAD-2-NEXT: global_store_dword v0, v1, s[8:9]
+; GFX90a-PRELOAD-2-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-4-LABEL: i32_ptr1_i32_preload_arg:
+; GFX90a-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-4-NEXT: s_add_i32 s0, s6, s10
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s0
+; GFX90a-PRELOAD-4-NEXT: global_store_dword v0, v1, s[8:9]
+; GFX90a-PRELOAD-4-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-8-LABEL: i32_ptr1_i32_preload_arg:
+; GFX90a-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-8-NEXT: s_add_i32 s0, s6, s10
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s0
+; GFX90a-PRELOAD-8-NEXT: global_store_dword v0, v1, s[8:9]
+; GFX90a-PRELOAD-8-NEXT: s_endpgm
%add = add i32 %arg0, %arg1
store i32 %add, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @ptr1_i16_i16_preload_arg(ptr addrspace(1) %out, i16 %arg0, i16 %arg1) {
-; NO-PRELOAD-LABEL: ptr1_i16_i16_preload_arg:
-; NO-PRELOAD: ; %bb.0:
-; NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8
-; NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0
-; NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
-; NO-PRELOAD-NEXT: s_lshr_b32 s0, s4, 16
-; NO-PRELOAD-NEXT: s_and_b32 s1, s4, 0xffff
-; NO-PRELOAD-NEXT: s_add_i32 s0, s1, s0
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s0
-; NO-PRELOAD-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
-; NO-PRELOAD-NEXT: s_endpgm
-;
-; PRELOAD-1-LABEL: ptr1_i16_i16_preload_arg:
-; PRELOAD-1: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: ; %bb.0:
-; PRELOAD-1-NEXT: s_load_dword s0, s[0:1], 0x8
-; PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0
-; PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
-; PRELOAD-1-NEXT: s_lshr_b32 s1, s0, 16
-; PRELOAD-1-NEXT: s_and_b32 s0, s0, 0xffff
-; PRELOAD-1-NEXT: s_add_i32 s0, s0, s1
-; PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0
-; PRELOAD-1-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
-; PRELOAD-1-NEXT: s_endpgm
-;
-; PRELOAD-2-LABEL: ptr1_i16_i16_preload_arg:
-; PRELOAD-2: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: ; %bb.0:
-; PRELOAD-2-NEXT: s_load_dword s0, s[0:1], 0x8
-; PRELOAD-2-NEXT: s_and_b32 s1, s4, 0xffff
-; PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0
-; PRELOAD-2-NEXT: s_waitcnt lgkmcnt(0)
-; PRELOAD-2-NEXT: s_lshr_b32 s0, s0, 16
-; PRELOAD-2-NEXT: s_add_i32 s0, s1, s0
-; PRELOAD-2-NEXT: v_mov_b32_e32 v1, s0
-; PRELOAD-2-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
-; PRELOAD-2-NEXT: s_endpgm
-;
-; PRELOAD-4-LABEL: ptr1_i16_i16_preload_arg:
-; PRELOAD-4: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: ; %bb.0:
-; PRELOAD-4-NEXT: s_lshr_b32 s0, s4, 16
-; PRELOAD-4-NEXT: s_and_b32 s1, s4, 0xffff
-; PRELOAD-4-NEXT: s_add_i32 s0, s1, s0
-; PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0
-; PRELOAD-4-NEXT: v_mov_b32_e32 v1, s0
-; PRELOAD-4-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
-; PRELOAD-4-NEXT: s_endpgm
-;
-; PRELOAD-8-LABEL: ptr1_i16_i16_preload_arg:
-; PRELOAD-8: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: ; %bb.0:
-; PRELOAD-8-NEXT: s_lshr_b32 s0, s4, 16
-; PRELOAD-8-NEXT: s_and_b32 s1, s4, 0xffff
-; PRELOAD-8-NEXT: s_add_i32 s0, s1, s0
-; PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0
-; PRELOAD-8-NEXT: v_mov_b32_e32 v1, s0
-; PRELOAD-8-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
-; PRELOAD-8-NEXT: s_endpgm
+; GFX940-NO-PRELOAD-LABEL: ptr1_i16_i16_preload_arg:
+; GFX940-NO-PRELOAD: ; %bb.0:
+; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8
+; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0
+; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NO-PRELOAD-NEXT: s_lshr_b32 s0, s4, 16
+; GFX940-NO-PRELOAD-NEXT: s_and_b32 s1, s4, 0xffff
+; GFX940-NO-PRELOAD-NEXT: s_add_i32 s0, s1, s0
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s0
+; GFX940-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
+; GFX940-NO-PRELOAD-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-1-LABEL: ptr1_i16_i16_preload_arg:
+; GFX940-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: ; %bb.0:
+; GFX940-PRELOAD-1-NEXT: s_load_dword s0, s[0:1], 0x8
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0
+; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-PRELOAD-1-NEXT: s_lshr_b32 s1, s0, 16
+; GFX940-PRELOAD-1-NEXT: s_and_b32 s0, s0, 0xffff
+; GFX940-PRELOAD-1-NEXT: s_add_i32 s0, s0, s1
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0
+; GFX940-PRELOAD-1-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
+; GFX940-PRELOAD-1-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-2-LABEL: ptr1_i16_i16_preload_arg:
+; GFX940-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: ; %bb.0:
+; GFX940-PRELOAD-2-NEXT: s_load_dword s0, s[0:1], 0x8
+; GFX940-PRELOAD-2-NEXT: s_and_b32 s1, s4, 0xffff
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0
+; GFX940-PRELOAD-2-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-PRELOAD-2-NEXT: s_lshr_b32 s0, s0, 16
+; GFX940-PRELOAD-2-NEXT: s_add_i32 s0, s1, s0
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s0
+; GFX940-PRELOAD-2-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
+; GFX940-PRELOAD-2-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-4-LABEL: ptr1_i16_i16_preload_arg:
+; GFX940-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: ; %bb.0:
+; GFX940-PRELOAD-4-NEXT: s_lshr_b32 s0, s4, 16
+; GFX940-PRELOAD-4-NEXT: s_and_b32 s1, s4, 0xffff
+; GFX940-PRELOAD-4-NEXT: s_add_i32 s0, s1, s0
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s0
+; GFX940-PRELOAD-4-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
+; GFX940-PRELOAD-4-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-8-LABEL: ptr1_i16_i16_preload_arg:
+; GFX940-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: ; %bb.0:
+; GFX940-PRELOAD-8-NEXT: s_lshr_b32 s0, s4, 16
+; GFX940-PRELOAD-8-NEXT: s_and_b32 s1, s4, 0xffff
+; GFX940-PRELOAD-8-NEXT: s_add_i32 s0, s1, s0
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s0
+; GFX940-PRELOAD-8-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
+; GFX940-PRELOAD-8-NEXT: s_endpgm
+;
+; GFX90a-NO-PRELOAD-LABEL: ptr1_i16_i16_preload_arg:
+; GFX90a-NO-PRELOAD: ; %bb.0:
+; GFX90a-NO-PRELOAD-NEXT: s_load_dword s2, s[4:5], 0x8
+; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-NO-PRELOAD-NEXT: s_lshr_b32 s3, s2, 16
+; GFX90a-NO-PRELOAD-NEXT: s_and_b32 s2, s2, 0xffff
+; GFX90a-NO-PRELOAD-NEXT: s_add_i32 s2, s2, s3
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s2
+; GFX90a-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX90a-NO-PRELOAD-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-1-LABEL: ptr1_i16_i16_preload_arg:
+; GFX90a-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-1-NEXT: s_load_dword s0, s[4:5], 0x8
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-PRELOAD-1-NEXT: s_lshr_b32 s1, s0, 16
+; GFX90a-PRELOAD-1-NEXT: s_and_b32 s0, s0, 0xffff
+; GFX90a-PRELOAD-1-NEXT: s_add_i32 s0, s0, s1
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0
+; GFX90a-PRELOAD-1-NEXT: global_store_dword v0, v1, s[6:7]
+; GFX90a-PRELOAD-1-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-2-LABEL: ptr1_i16_i16_preload_arg:
+; GFX90a-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-2-NEXT: s_load_dword s0, s[4:5], 0x8
+; GFX90a-PRELOAD-2-NEXT: s_and_b32 s1, s8, 0xffff
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-PRELOAD-2-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-PRELOAD-2-NEXT: s_lshr_b32 s0, s0, 16
+; GFX90a-PRELOAD-2-NEXT: s_add_i32 s0, s1, s0
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s0
+; GFX90a-PRELOAD-2-NEXT: global_store_dword v0, v1, s[6:7]
+; GFX90a-PRELOAD-2-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-4-LABEL: ptr1_i16_i16_preload_arg:
+; GFX90a-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-4-NEXT: s_lshr_b32 s0, s8, 16
+; GFX90a-PRELOAD-4-NEXT: s_and_b32 s1, s8, 0xffff
+; GFX90a-PRELOAD-4-NEXT: s_add_i32 s0, s1, s0
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s0
+; GFX90a-PRELOAD-4-NEXT: global_store_dword v0, v1, s[6:7]
+; GFX90a-PRELOAD-4-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-8-LABEL: ptr1_i16_i16_preload_arg:
+; GFX90a-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-8-NEXT: s_lshr_b32 s0, s8, 16
+; GFX90a-PRELOAD-8-NEXT: s_and_b32 s1, s8, 0xffff
+; GFX90a-PRELOAD-8-NEXT: s_add_i32 s0, s1, s0
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s0
+; GFX90a-PRELOAD-8-NEXT: global_store_dword v0, v1, s[6:7]
+; GFX90a-PRELOAD-8-NEXT: s_endpgm
%ext = zext i16 %arg0 to i32
%ext1 = zext i16 %arg1 to i32
%add = add i32 %ext, %ext1
@@ -1859,3563 +3684,7068 @@ define amdgpu_kernel void @ptr1_i16_i16_preload_arg(ptr addrspace(1) %out, i16 %
}
define amdgpu_kernel void @ptr1_v2i8_preload_arg(ptr addrspace(1) %out, <2 x i8> %in) {
-; NO-PRELOAD-LABEL: ptr1_v2i8_preload_arg:
-; NO-PRELOAD: ; %bb.0:
-; NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8
-; NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0
-; NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s4
-; NO-PRELOAD-NEXT: global_store_short v0, v1, s[2:3] sc0 sc1
-; NO-PRELOAD-NEXT: s_endpgm
-;
-; PRELOAD-1-LABEL: ptr1_v2i8_preload_arg:
-; PRELOAD-1: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: ; %bb.0:
-; PRELOAD-1-NEXT: s_load_dword s0, s[0:1], 0x8
-; PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0
-; PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
-; PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0
-; PRELOAD-1-NEXT: global_store_short v0, v1, s[2:3] sc0 sc1
-; PRELOAD-1-NEXT: s_endpgm
-;
-; PRELOAD-2-LABEL: ptr1_v2i8_preload_arg:
-; PRELOAD-2: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: ; %bb.0:
-; PRELOAD-2-NEXT: s_lshr_b32 s0, s4, 8
-; PRELOAD-2-NEXT: v_lshlrev_b16_e64 v0, 8, s0
-; PRELOAD-2-NEXT: v_or_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; PRELOAD-2-NEXT: v_mov_b32_e32 v1, 0
-; PRELOAD-2-NEXT: global_store_short v1, v0, s[2:3] sc0 sc1
-; PRELOAD-2-NEXT: s_endpgm
-;
-; PRELOAD-4-LABEL: ptr1_v2i8_preload_arg:
-; PRELOAD-4: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: ; %bb.0:
-; PRELOAD-4-NEXT: s_lshr_b32 s0, s4, 8
-; PRELOAD-4-NEXT: v_lshlrev_b16_e64 v0, 8, s0
-; PRELOAD-4-NEXT: v_or_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; PRELOAD-4-NEXT: v_mov_b32_e32 v1, 0
-; PRELOAD-4-NEXT: global_store_short v1, v0, s[2:3] sc0 sc1
-; PRELOAD-4-NEXT: s_endpgm
-;
-; PRELOAD-8-LABEL: ptr1_v2i8_preload_arg:
-; PRELOAD-8: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: ; %bb.0:
-; PRELOAD-8-NEXT: s_lshr_b32 s0, s4, 8
-; PRELOAD-8-NEXT: v_lshlrev_b16_e64 v0, 8, s0
-; PRELOAD-8-NEXT: v_or_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; PRELOAD-8-NEXT: v_mov_b32_e32 v1, 0
-; PRELOAD-8-NEXT: global_store_short v1, v0, s[2:3] sc0 sc1
-; PRELOAD-8-NEXT: s_endpgm
+; GFX940-NO-PRELOAD-LABEL: ptr1_v2i8_preload_arg:
+; GFX940-NO-PRELOAD: ; %bb.0:
+; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8
+; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0
+; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s4
+; GFX940-NO-PRELOAD-NEXT: global_store_short v0, v1, s[2:3] sc0 sc1
+; GFX940-NO-PRELOAD-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-1-LABEL: ptr1_v2i8_preload_arg:
+; GFX940-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: ; %bb.0:
+; GFX940-PRELOAD-1-NEXT: s_load_dword s0, s[0:1], 0x8
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0
+; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0
+; GFX940-PRELOAD-1-NEXT: global_store_short v0, v1, s[2:3] sc0 sc1
+; GFX940-PRELOAD-1-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-2-LABEL: ptr1_v2i8_preload_arg:
+; GFX940-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: ; %bb.0:
+; GFX940-PRELOAD-2-NEXT: s_lshr_b32 s0, s4, 8
+; GFX940-PRELOAD-2-NEXT: v_lshlrev_b16_e64 v0, 8, s0
+; GFX940-PRELOAD-2-NEXT: v_or_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, 0
+; GFX940-PRELOAD-2-NEXT: global_store_short v1, v0, s[2:3] sc0 sc1
+; GFX940-PRELOAD-2-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-4-LABEL: ptr1_v2i8_preload_arg:
+; GFX940-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: ; %bb.0:
+; GFX940-PRELOAD-4-NEXT: s_lshr_b32 s0, s4, 8
+; GFX940-PRELOAD-4-NEXT: v_lshlrev_b16_e64 v0, 8, s0
+; GFX940-PRELOAD-4-NEXT: v_or_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, 0
+; GFX940-PRELOAD-4-NEXT: global_store_short v1, v0, s[2:3] sc0 sc1
+; GFX940-PRELOAD-4-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-8-LABEL: ptr1_v2i8_preload_arg:
+; GFX940-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: ; %bb.0:
+; GFX940-PRELOAD-8-NEXT: s_lshr_b32 s0, s4, 8
+; GFX940-PRELOAD-8-NEXT: v_lshlrev_b16_e64 v0, 8, s0
+; GFX940-PRELOAD-8-NEXT: v_or_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, 0
+; GFX940-PRELOAD-8-NEXT: global_store_short v1, v0, s[2:3] sc0 sc1
+; GFX940-PRELOAD-8-NEXT: s_endpgm
+;
+; GFX90a-NO-PRELOAD-LABEL: ptr1_v2i8_preload_arg:
+; GFX90a-NO-PRELOAD: ; %bb.0:
+; GFX90a-NO-PRELOAD-NEXT: s_load_dword s2, s[4:5], 0x8
+; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s2
+; GFX90a-NO-PRELOAD-NEXT: global_store_short v0, v1, s[0:1]
+; GFX90a-NO-PRELOAD-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-1-LABEL: ptr1_v2i8_preload_arg:
+; GFX90a-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-1-NEXT: s_load_dword s0, s[4:5], 0x8
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0
+; GFX90a-PRELOAD-1-NEXT: global_store_short v0, v1, s[6:7]
+; GFX90a-PRELOAD-1-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-2-LABEL: ptr1_v2i8_preload_arg:
+; GFX90a-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-2-NEXT: s_lshr_b32 s0, s8, 8
+; GFX90a-PRELOAD-2-NEXT: v_lshlrev_b16_e64 v0, 8, s0
+; GFX90a-PRELOAD-2-NEXT: v_or_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, 0
+; GFX90a-PRELOAD-2-NEXT: global_store_short v1, v0, s[6:7]
+; GFX90a-PRELOAD-2-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-4-LABEL: ptr1_v2i8_preload_arg:
+; GFX90a-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-4-NEXT: s_lshr_b32 s0, s8, 8
+; GFX90a-PRELOAD-4-NEXT: v_lshlrev_b16_e64 v0, 8, s0
+; GFX90a-PRELOAD-4-NEXT: v_or_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, 0
+; GFX90a-PRELOAD-4-NEXT: global_store_short v1, v0, s[6:7]
+; GFX90a-PRELOAD-4-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-8-LABEL: ptr1_v2i8_preload_arg:
+; GFX90a-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-8-NEXT: s_lshr_b32 s0, s8, 8
+; GFX90a-PRELOAD-8-NEXT: v_lshlrev_b16_e64 v0, 8, s0
+; GFX90a-PRELOAD-8-NEXT: v_or_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, 0
+; GFX90a-PRELOAD-8-NEXT: global_store_short v1, v0, s[6:7]
+; GFX90a-PRELOAD-8-NEXT: s_endpgm
store <2 x i8> %in, ptr addrspace(1) %out
ret void
}
-; Don't try to preload byref args.
define amdgpu_kernel void @byref_preload_arg(ptr addrspace(1) %out, ptr addrspace(4) byref(i32) align(256) %in.byref, i32 %after.offset) {
-; NO-PRELOAD-LABEL: byref_preload_arg:
-; NO-PRELOAD: ; %bb.0:
-; NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x100
-; NO-PRELOAD-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0
-; NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s2
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s3
-; NO-PRELOAD-NEXT: global_store_dword v0, v1, s[4:5] sc0 sc1
-; NO-PRELOAD-NEXT: s_waitcnt vmcnt(0)
-; NO-PRELOAD-NEXT: global_store_dword v0, v2, s[4:5] sc0 sc1
-; NO-PRELOAD-NEXT: s_waitcnt vmcnt(0)
-; NO-PRELOAD-NEXT: s_endpgm
-;
-; PRELOAD-1-LABEL: byref_preload_arg:
-; PRELOAD-1: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: ; %bb.0:
-; PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x100
-; PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0
-; PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
-; PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0
-; PRELOAD-1-NEXT: v_mov_b32_e32 v2, s1
-; PRELOAD-1-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
-; PRELOAD-1-NEXT: s_waitcnt vmcnt(0)
-; PRELOAD-1-NEXT: global_store_dword v0, v2, s[2:3] sc0 sc1
-; PRELOAD-1-NEXT: s_waitcnt vmcnt(0)
-; PRELOAD-1-NEXT: s_endpgm
-;
-; PRELOAD-2-LABEL: byref_preload_arg:
-; PRELOAD-2: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: ; %bb.0:
-; PRELOAD-2-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x100
-; PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0
-; PRELOAD-2-NEXT: s_waitcnt lgkmcnt(0)
-; PRELOAD-2-NEXT: v_mov_b32_e32 v1, s0
-; PRELOAD-2-NEXT: v_mov_b32_e32 v2, s1
-; PRELOAD-2-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
-; PRELOAD-2-NEXT: s_waitcnt vmcnt(0)
-; PRELOAD-2-NEXT: global_store_dword v0, v2, s[2:3] sc0 sc1
-; PRELOAD-2-NEXT: s_waitcnt vmcnt(0)
-; PRELOAD-2-NEXT: s_endpgm
-;
-; PRELOAD-4-LABEL: byref_preload_arg:
-; PRELOAD-4: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: ; %bb.0:
-; PRELOAD-4-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x100
-; PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0
-; PRELOAD-4-NEXT: s_waitcnt lgkmcnt(0)
-; PRELOAD-4-NEXT: v_mov_b32_e32 v1, s0
-; PRELOAD-4-NEXT: v_mov_b32_e32 v2, s1
-; PRELOAD-4-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
-; PRELOAD-4-NEXT: s_waitcnt vmcnt(0)
-; PRELOAD-4-NEXT: global_store_dword v0, v2, s[2:3] sc0 sc1
-; PRELOAD-4-NEXT: s_waitcnt vmcnt(0)
-; PRELOAD-4-NEXT: s_endpgm
-;
-; PRELOAD-8-LABEL: byref_preload_arg:
-; PRELOAD-8: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: ; %bb.0:
-; PRELOAD-8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x100
-; PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0
-; PRELOAD-8-NEXT: s_waitcnt lgkmcnt(0)
-; PRELOAD-8-NEXT: v_mov_b32_e32 v1, s0
-; PRELOAD-8-NEXT: v_mov_b32_e32 v2, s1
-; PRELOAD-8-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
-; PRELOAD-8-NEXT: s_waitcnt vmcnt(0)
-; PRELOAD-8-NEXT: global_store_dword v0, v2, s[2:3] sc0 sc1
-; PRELOAD-8-NEXT: s_waitcnt vmcnt(0)
-; PRELOAD-8-NEXT: s_endpgm
+; GFX940-NO-PRELOAD-LABEL: byref_preload_arg:
+; GFX940-NO-PRELOAD: ; %bb.0:
+; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x100
+; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0
+; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s2
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s3
+; GFX940-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[4:5] sc0 sc1
+; GFX940-NO-PRELOAD-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NO-PRELOAD-NEXT: global_store_dword v0, v2, s[4:5] sc0 sc1
+; GFX940-NO-PRELOAD-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NO-PRELOAD-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-1-LABEL: byref_preload_arg:
+; GFX940-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: ; %bb.0:
+; GFX940-PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x100
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0
+; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s1
+; GFX940-PRELOAD-1-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
+; GFX940-PRELOAD-1-NEXT: s_waitcnt vmcnt(0)
+; GFX940-PRELOAD-1-NEXT: global_store_dword v0, v2, s[2:3] sc0 sc1
+; GFX940-PRELOAD-1-NEXT: s_waitcnt vmcnt(0)
+; GFX940-PRELOAD-1-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-2-LABEL: byref_preload_arg:
+; GFX940-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: ; %bb.0:
+; GFX940-PRELOAD-2-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x100
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0
+; GFX940-PRELOAD-2-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s0
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v2, s1
+; GFX940-PRELOAD-2-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
+; GFX940-PRELOAD-2-NEXT: s_waitcnt vmcnt(0)
+; GFX940-PRELOAD-2-NEXT: global_store_dword v0, v2, s[2:3] sc0 sc1
+; GFX940-PRELOAD-2-NEXT: s_waitcnt vmcnt(0)
+; GFX940-PRELOAD-2-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-4-LABEL: byref_preload_arg:
+; GFX940-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: ; %bb.0:
+; GFX940-PRELOAD-4-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x100
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0
+; GFX940-PRELOAD-4-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s0
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v2, s1
+; GFX940-PRELOAD-4-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
+; GFX940-PRELOAD-4-NEXT: s_waitcnt vmcnt(0)
+; GFX940-PRELOAD-4-NEXT: global_store_dword v0, v2, s[2:3] sc0 sc1
+; GFX940-PRELOAD-4-NEXT: s_waitcnt vmcnt(0)
+; GFX940-PRELOAD-4-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-8-LABEL: byref_preload_arg:
+; GFX940-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: ; %bb.0:
+; GFX940-PRELOAD-8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x100
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0
+; GFX940-PRELOAD-8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s0
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v2, s1
+; GFX940-PRELOAD-8-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
+; GFX940-PRELOAD-8-NEXT: s_waitcnt vmcnt(0)
+; GFX940-PRELOAD-8-NEXT: global_store_dword v0, v2, s[2:3] sc0 sc1
+; GFX940-PRELOAD-8-NEXT: s_waitcnt vmcnt(0)
+; GFX940-PRELOAD-8-NEXT: s_endpgm
+;
+; GFX90a-NO-PRELOAD-LABEL: byref_preload_arg:
+; GFX90a-NO-PRELOAD: ; %bb.0:
+; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x100
+; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s0
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s1
+; GFX90a-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[2:3]
+; GFX90a-NO-PRELOAD-NEXT: s_waitcnt vmcnt(0)
+; GFX90a-NO-PRELOAD-NEXT: global_store_dword v0, v2, s[2:3]
+; GFX90a-NO-PRELOAD-NEXT: s_waitcnt vmcnt(0)
+; GFX90a-NO-PRELOAD-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-1-LABEL: byref_preload_arg:
+; GFX90a-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x100
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s1
+; GFX90a-PRELOAD-1-NEXT: global_store_dword v0, v1, s[6:7]
+; GFX90a-PRELOAD-1-NEXT: s_waitcnt vmcnt(0)
+; GFX90a-PRELOAD-1-NEXT: global_store_dword v0, v2, s[6:7]
+; GFX90a-PRELOAD-1-NEXT: s_waitcnt vmcnt(0)
+; GFX90a-PRELOAD-1-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-2-LABEL: byref_preload_arg:
+; GFX90a-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-2-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x100
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-PRELOAD-2-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s0
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v2, s1
+; GFX90a-PRELOAD-2-NEXT: global_store_dword v0, v1, s[6:7]
+; GFX90a-PRELOAD-2-NEXT: s_waitcnt vmcnt(0)
+; GFX90a-PRELOAD-2-NEXT: global_store_dword v0, v2, s[6:7]
+; GFX90a-PRELOAD-2-NEXT: s_waitcnt vmcnt(0)
+; GFX90a-PRELOAD-2-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-4-LABEL: byref_preload_arg:
+; GFX90a-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x100
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-PRELOAD-4-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s0
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v2, s1
+; GFX90a-PRELOAD-4-NEXT: global_store_dword v0, v1, s[6:7]
+; GFX90a-PRELOAD-4-NEXT: s_waitcnt vmcnt(0)
+; GFX90a-PRELOAD-4-NEXT: global_store_dword v0, v2, s[6:7]
+; GFX90a-PRELOAD-4-NEXT: s_waitcnt vmcnt(0)
+; GFX90a-PRELOAD-4-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-8-LABEL: byref_preload_arg:
+; GFX90a-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x100
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-PRELOAD-8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s0
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v2, s1
+; GFX90a-PRELOAD-8-NEXT: global_store_dword v0, v1, s[6:7]
+; GFX90a-PRELOAD-8-NEXT: s_waitcnt vmcnt(0)
+; GFX90a-PRELOAD-8-NEXT: global_store_dword v0, v2, s[6:7]
+; GFX90a-PRELOAD-8-NEXT: s_waitcnt vmcnt(0)
+; GFX90a-PRELOAD-8-NEXT: s_endpgm
%in = load i32, ptr addrspace(4) %in.byref
store volatile i32 %in, ptr addrspace(1) %out, align 4
store volatile i32 %after.offset, ptr addrspace(1) %out, align 4
ret void
}
-; TODO: Should do partial preload in cases like these where only part of the arg
-; can be preloaded.
define amdgpu_kernel void @v8i32_arg(ptr addrspace(1) nocapture %out, <8 x i32> %in) nounwind {
-; NO-PRELOAD-LABEL: v8i32_arg:
-; NO-PRELOAD: ; %bb.0:
-; NO-PRELOAD-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x20
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v4, 0
-; NO-PRELOAD-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
-; NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s8
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s9
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s10
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v3, s11
-; NO-PRELOAD-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 sc0 sc1
-; NO-PRELOAD-NEXT: s_nop 1
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s4
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s5
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s6
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v3, s7
-; NO-PRELOAD-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1
-; NO-PRELOAD-NEXT: s_endpgm
-;
-; PRELOAD-1-LABEL: v8i32_arg:
-; PRELOAD-1: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: ; %bb.0:
-; PRELOAD-1-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x20
-; PRELOAD-1-NEXT: v_mov_b32_e32 v4, 0
-; PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
-; PRELOAD-1-NEXT: v_mov_b32_e32 v0, s8
-; PRELOAD-1-NEXT: v_mov_b32_e32 v1, s9
-; PRELOAD-1-NEXT: v_mov_b32_e32 v2, s10
-; PRELOAD-1-NEXT: v_mov_b32_e32 v3, s11
-; PRELOAD-1-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:16 sc0 sc1
-; PRELOAD-1-NEXT: s_nop 1
-; PRELOAD-1-NEXT: v_mov_b32_e32 v0, s4
-; PRELOAD-1-NEXT: v_mov_b32_e32 v1, s5
-; PRELOAD-1-NEXT: v_mov_b32_e32 v2, s6
-; PRELOAD-1-NEXT: v_mov_b32_e32 v3, s7
-; PRELOAD-1-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] sc0 sc1
-; PRELOAD-1-NEXT: s_endpgm
-;
-; PRELOAD-2-LABEL: v8i32_arg:
-; PRELOAD-2: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: ; %bb.0:
-; PRELOAD-2-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x20
-; PRELOAD-2-NEXT: v_mov_b32_e32 v4, 0
-; PRELOAD-2-NEXT: s_waitcnt lgkmcnt(0)
-; PRELOAD-2-NEXT: v_mov_b32_e32 v0, s8
-; PRELOAD-2-NEXT: v_mov_b32_e32 v1, s9
-; PRELOAD-2-NEXT: v_mov_b32_e32 v2, s10
-; PRELOAD-2-NEXT: v_mov_b32_e32 v3, s11
-; PRELOAD-2-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:16 sc0 sc1
-; PRELOAD-2-NEXT: s_nop 1
-; PRELOAD-2-NEXT: v_mov_b32_e32 v0, s4
-; PRELOAD-2-NEXT: v_mov_b32_e32 v1, s5
-; PRELOAD-2-NEXT: v_mov_b32_e32 v2, s6
-; PRELOAD-2-NEXT: v_mov_b32_e32 v3, s7
-; PRELOAD-2-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] sc0 sc1
-; PRELOAD-2-NEXT: s_endpgm
-;
-; PRELOAD-4-LABEL: v8i32_arg:
-; PRELOAD-4: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: ; %bb.0:
-; PRELOAD-4-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x20
-; PRELOAD-4-NEXT: v_mov_b32_e32 v4, 0
-; PRELOAD-4-NEXT: s_waitcnt lgkmcnt(0)
-; PRELOAD-4-NEXT: v_mov_b32_e32 v0, s8
-; PRELOAD-4-NEXT: v_mov_b32_e32 v1, s9
-; PRELOAD-4-NEXT: v_mov_b32_e32 v2, s10
-; PRELOAD-4-NEXT: v_mov_b32_e32 v3, s11
-; PRELOAD-4-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:16 sc0 sc1
-; PRELOAD-4-NEXT: s_nop 1
-; PRELOAD-4-NEXT: v_mov_b32_e32 v0, s4
-; PRELOAD-4-NEXT: v_mov_b32_e32 v1, s5
-; PRELOAD-4-NEXT: v_mov_b32_e32 v2, s6
-; PRELOAD-4-NEXT: v_mov_b32_e32 v3, s7
-; PRELOAD-4-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] sc0 sc1
-; PRELOAD-4-NEXT: s_endpgm
-;
-; PRELOAD-8-LABEL: v8i32_arg:
-; PRELOAD-8: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: ; %bb.0:
-; PRELOAD-8-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x20
-; PRELOAD-8-NEXT: v_mov_b32_e32 v4, 0
-; PRELOAD-8-NEXT: s_waitcnt lgkmcnt(0)
-; PRELOAD-8-NEXT: v_mov_b32_e32 v0, s8
-; PRELOAD-8-NEXT: v_mov_b32_e32 v1, s9
-; PRELOAD-8-NEXT: v_mov_b32_e32 v2, s10
-; PRELOAD-8-NEXT: v_mov_b32_e32 v3, s11
-; PRELOAD-8-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:16 sc0 sc1
-; PRELOAD-8-NEXT: s_nop 1
-; PRELOAD-8-NEXT: v_mov_b32_e32 v0, s4
-; PRELOAD-8-NEXT: v_mov_b32_e32 v1, s5
-; PRELOAD-8-NEXT: v_mov_b32_e32 v2, s6
-; PRELOAD-8-NEXT: v_mov_b32_e32 v3, s7
-; PRELOAD-8-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] sc0 sc1
-; PRELOAD-8-NEXT: s_endpgm
+; GFX940-NO-PRELOAD-LABEL: v8i32_arg:
+; GFX940-NO-PRELOAD: ; %bb.0:
+; GFX940-NO-PRELOAD-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x20
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v4, 0
+; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s8
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s9
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s10
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v3, s11
+; GFX940-NO-PRELOAD-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 sc0 sc1
+; GFX940-NO-PRELOAD-NEXT: s_nop 1
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s4
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s5
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s6
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v3, s7
+; GFX940-NO-PRELOAD-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1
+; GFX940-NO-PRELOAD-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-1-LABEL: v8i32_arg:
+; GFX940-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: ; %bb.0:
+; GFX940-PRELOAD-1-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x20
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v4, 0
+; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, s8
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s9
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s10
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v3, s11
+; GFX940-PRELOAD-1-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:16 sc0 sc1
+; GFX940-PRELOAD-1-NEXT: s_nop 1
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, s4
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s5
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s6
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v3, s7
+; GFX940-PRELOAD-1-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] sc0 sc1
+; GFX940-PRELOAD-1-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-2-LABEL: v8i32_arg:
+; GFX940-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: ; %bb.0:
+; GFX940-PRELOAD-2-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x20
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v4, 0
+; GFX940-PRELOAD-2-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, s8
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s9
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v2, s10
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v3, s11
+; GFX940-PRELOAD-2-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:16 sc0 sc1
+; GFX940-PRELOAD-2-NEXT: s_nop 1
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, s4
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s5
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v2, s6
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v3, s7
+; GFX940-PRELOAD-2-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] sc0 sc1
+; GFX940-PRELOAD-2-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-4-LABEL: v8i32_arg:
+; GFX940-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: ; %bb.0:
+; GFX940-PRELOAD-4-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x20
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v4, 0
+; GFX940-PRELOAD-4-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, s8
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s9
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v2, s10
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v3, s11
+; GFX940-PRELOAD-4-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:16 sc0 sc1
+; GFX940-PRELOAD-4-NEXT: s_nop 1
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, s4
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s5
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v2, s6
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v3, s7
+; GFX940-PRELOAD-4-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] sc0 sc1
+; GFX940-PRELOAD-4-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-8-LABEL: v8i32_arg:
+; GFX940-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: ; %bb.0:
+; GFX940-PRELOAD-8-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x20
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v4, 0
+; GFX940-PRELOAD-8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, s8
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s9
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v2, s10
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v3, s11
+; GFX940-PRELOAD-8-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:16 sc0 sc1
+; GFX940-PRELOAD-8-NEXT: s_nop 1
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, s4
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s5
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v2, s6
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v3, s7
+; GFX940-PRELOAD-8-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] sc0 sc1
+; GFX940-PRELOAD-8-NEXT: s_endpgm
+;
+; GFX90a-NO-PRELOAD-LABEL: v8i32_arg:
+; GFX90a-NO-PRELOAD: ; %bb.0:
+; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x20
+; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v4, 0
+; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s12
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s13
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s14
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v3, s15
+; GFX90a-NO-PRELOAD-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16
+; GFX90a-NO-PRELOAD-NEXT: s_nop 0
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s8
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s9
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s10
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v3, s11
+; GFX90a-NO-PRELOAD-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX90a-NO-PRELOAD-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-1-LABEL: v8i32_arg:
+; GFX90a-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-1-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x20
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v4, 0
+; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, s12
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s13
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s14
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v3, s15
+; GFX90a-PRELOAD-1-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] offset:16
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, s8
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s9
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s10
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v3, s11
+; GFX90a-PRELOAD-1-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7]
+; GFX90a-PRELOAD-1-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-2-LABEL: v8i32_arg:
+; GFX90a-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-2-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x20
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v4, 0
+; GFX90a-PRELOAD-2-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v0, s12
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s13
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v2, s14
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v3, s15
+; GFX90a-PRELOAD-2-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] offset:16
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v0, s8
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s9
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v2, s10
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v3, s11
+; GFX90a-PRELOAD-2-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7]
+; GFX90a-PRELOAD-2-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-4-LABEL: v8i32_arg:
+; GFX90a-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-4-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x20
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v4, 0
+; GFX90a-PRELOAD-4-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v0, s12
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s13
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v2, s14
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v3, s15
+; GFX90a-PRELOAD-4-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] offset:16
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v0, s8
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s9
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v2, s10
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v3, s11
+; GFX90a-PRELOAD-4-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7]
+; GFX90a-PRELOAD-4-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-8-LABEL: v8i32_arg:
+; GFX90a-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-8-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x20
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v4, 0
+; GFX90a-PRELOAD-8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v0, s12
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s13
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v2, s14
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v3, s15
+; GFX90a-PRELOAD-8-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] offset:16
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v0, s8
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s9
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v2, s10
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v3, s11
+; GFX90a-PRELOAD-8-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7]
+; GFX90a-PRELOAD-8-NEXT: s_endpgm
store <8 x i32> %in, ptr addrspace(1) %out, align 4
ret void
}
define amdgpu_kernel void @v3i16_preload_arg(ptr addrspace(1) nocapture %out, <3 x i16> %in) nounwind {
-; NO-PRELOAD-LABEL: v3i16_preload_arg:
-; NO-PRELOAD: ; %bb.0:
-; NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0
-; NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s3
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s2
-; NO-PRELOAD-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1
-; NO-PRELOAD-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1
-; NO-PRELOAD-NEXT: s_endpgm
-;
-; PRELOAD-1-LABEL: v3i16_preload_arg:
-; PRELOAD-1: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: ; %bb.0:
-; PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8
-; PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0
-; PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
-; PRELOAD-1-NEXT: v_mov_b32_e32 v1, s1
-; PRELOAD-1-NEXT: v_mov_b32_e32 v2, s0
-; PRELOAD-1-NEXT: global_store_short v0, v1, s[2:3] offset:4 sc0 sc1
-; PRELOAD-1-NEXT: global_store_dword v0, v2, s[2:3] sc0 sc1
-; PRELOAD-1-NEXT: s_endpgm
-;
-; PRELOAD-2-LABEL: v3i16_preload_arg:
-; PRELOAD-2: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: ; %bb.0:
-; PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0
-; PRELOAD-2-NEXT: v_mov_b32_e32 v1, s5
-; PRELOAD-2-NEXT: global_store_short v0, v1, s[2:3] offset:4 sc0 sc1
-; PRELOAD-2-NEXT: v_mov_b32_e32 v1, s4
-; PRELOAD-2-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
-; PRELOAD-2-NEXT: s_endpgm
-;
-; PRELOAD-4-LABEL: v3i16_preload_arg:
-; PRELOAD-4: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: ; %bb.0:
-; PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0
-; PRELOAD-4-NEXT: v_mov_b32_e32 v1, s5
-; PRELOAD-4-NEXT: global_store_short v0, v1, s[2:3] offset:4 sc0 sc1
-; PRELOAD-4-NEXT: v_mov_b32_e32 v1, s4
-; PRELOAD-4-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
-; PRELOAD-4-NEXT: s_endpgm
-;
-; PRELOAD-8-LABEL: v3i16_preload_arg:
-; PRELOAD-8: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: ; %bb.0:
-; PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0
-; PRELOAD-8-NEXT: v_mov_b32_e32 v1, s5
-; PRELOAD-8-NEXT: global_store_short v0, v1, s[2:3] offset:4 sc0 sc1
-; PRELOAD-8-NEXT: v_mov_b32_e32 v1, s4
-; PRELOAD-8-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
-; PRELOAD-8-NEXT: s_endpgm
+; GFX940-NO-PRELOAD-LABEL: v3i16_preload_arg:
+; GFX940-NO-PRELOAD: ; %bb.0:
+; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0
+; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s3
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s2
+; GFX940-NO-PRELOAD-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1
+; GFX940-NO-PRELOAD-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1
+; GFX940-NO-PRELOAD-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-1-LABEL: v3i16_preload_arg:
+; GFX940-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: ; %bb.0:
+; GFX940-PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0
+; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s1
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s0
+; GFX940-PRELOAD-1-NEXT: global_store_short v0, v1, s[2:3] offset:4 sc0 sc1
+; GFX940-PRELOAD-1-NEXT: global_store_dword v0, v2, s[2:3] sc0 sc1
+; GFX940-PRELOAD-1-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-2-LABEL: v3i16_preload_arg:
+; GFX940-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: ; %bb.0:
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s5
+; GFX940-PRELOAD-2-NEXT: global_store_short v0, v1, s[2:3] offset:4 sc0 sc1
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s4
+; GFX940-PRELOAD-2-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
+; GFX940-PRELOAD-2-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-4-LABEL: v3i16_preload_arg:
+; GFX940-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: ; %bb.0:
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s5
+; GFX940-PRELOAD-4-NEXT: global_store_short v0, v1, s[2:3] offset:4 sc0 sc1
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s4
+; GFX940-PRELOAD-4-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
+; GFX940-PRELOAD-4-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-8-LABEL: v3i16_preload_arg:
+; GFX940-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: ; %bb.0:
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s5
+; GFX940-PRELOAD-8-NEXT: global_store_short v0, v1, s[2:3] offset:4 sc0 sc1
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s4
+; GFX940-PRELOAD-8-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
+; GFX940-PRELOAD-8-NEXT: s_endpgm
+;
+; GFX90a-NO-PRELOAD-LABEL: v3i16_preload_arg:
+; GFX90a-NO-PRELOAD: ; %bb.0:
+; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s3
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s2
+; GFX90a-NO-PRELOAD-NEXT: global_store_short v0, v1, s[0:1] offset:4
+; GFX90a-NO-PRELOAD-NEXT: global_store_dword v0, v2, s[0:1]
+; GFX90a-NO-PRELOAD-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-1-LABEL: v3i16_preload_arg:
+; GFX90a-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s1
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s0
+; GFX90a-PRELOAD-1-NEXT: global_store_short v0, v1, s[6:7] offset:4
+; GFX90a-PRELOAD-1-NEXT: global_store_dword v0, v2, s[6:7]
+; GFX90a-PRELOAD-1-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-2-LABEL: v3i16_preload_arg:
+; GFX90a-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s9
+; GFX90a-PRELOAD-2-NEXT: global_store_short v0, v1, s[6:7] offset:4
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s8
+; GFX90a-PRELOAD-2-NEXT: global_store_dword v0, v1, s[6:7]
+; GFX90a-PRELOAD-2-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-4-LABEL: v3i16_preload_arg:
+; GFX90a-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s9
+; GFX90a-PRELOAD-4-NEXT: global_store_short v0, v1, s[6:7] offset:4
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s8
+; GFX90a-PRELOAD-4-NEXT: global_store_dword v0, v1, s[6:7]
+; GFX90a-PRELOAD-4-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-8-LABEL: v3i16_preload_arg:
+; GFX90a-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s9
+; GFX90a-PRELOAD-8-NEXT: global_store_short v0, v1, s[6:7] offset:4
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s8
+; GFX90a-PRELOAD-8-NEXT: global_store_dword v0, v1, s[6:7]
+; GFX90a-PRELOAD-8-NEXT: s_endpgm
store <3 x i16> %in, ptr addrspace(1) %out, align 4
ret void
}
define amdgpu_kernel void @v3i32_preload_arg(ptr addrspace(1) nocapture %out, <3 x i32> %in) nounwind {
-; NO-PRELOAD-LABEL: v3i32_preload_arg:
-; NO-PRELOAD: ; %bb.0:
-; NO-PRELOAD-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10
-; NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v3, 0
-; NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s4
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s5
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s6
-; NO-PRELOAD-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1
-; NO-PRELOAD-NEXT: s_endpgm
-;
-; PRELOAD-1-LABEL: v3i32_preload_arg:
-; PRELOAD-1: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: ; %bb.0:
-; PRELOAD-1-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10
-; PRELOAD-1-NEXT: v_mov_b32_e32 v3, 0
-; PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
-; PRELOAD-1-NEXT: v_mov_b32_e32 v0, s4
-; PRELOAD-1-NEXT: v_mov_b32_e32 v1, s5
-; PRELOAD-1-NEXT: v_mov_b32_e32 v2, s6
-; PRELOAD-1-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1
-; PRELOAD-1-NEXT: s_endpgm
-;
-; PRELOAD-2-LABEL: v3i32_preload_arg:
-; PRELOAD-2: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: ; %bb.0:
-; PRELOAD-2-NEXT: v_mov_b32_e32 v0, s6
-; PRELOAD-2-NEXT: v_mov_b32_e32 v1, s7
-; PRELOAD-2-NEXT: v_mov_b32_e32 v2, s8
-; PRELOAD-2-NEXT: v_mov_b32_e32 v3, 0
-; PRELOAD-2-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1
-; PRELOAD-2-NEXT: s_endpgm
-;
-; PRELOAD-4-LABEL: v3i32_preload_arg:
-; PRELOAD-4: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: ; %bb.0:
-; PRELOAD-4-NEXT: v_mov_b32_e32 v0, s6
-; PRELOAD-4-NEXT: v_mov_b32_e32 v1, s7
-; PRELOAD-4-NEXT: v_mov_b32_e32 v2, s8
-; PRELOAD-4-NEXT: v_mov_b32_e32 v3, 0
-; PRELOAD-4-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1
-; PRELOAD-4-NEXT: s_endpgm
-;
-; PRELOAD-8-LABEL: v3i32_preload_arg:
-; PRELOAD-8: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: ; %bb.0:
-; PRELOAD-8-NEXT: v_mov_b32_e32 v0, s6
-; PRELOAD-8-NEXT: v_mov_b32_e32 v1, s7
-; PRELOAD-8-NEXT: v_mov_b32_e32 v2, s8
-; PRELOAD-8-NEXT: v_mov_b32_e32 v3, 0
-; PRELOAD-8-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1
-; PRELOAD-8-NEXT: s_endpgm
+; GFX940-NO-PRELOAD-LABEL: v3i32_preload_arg:
+; GFX940-NO-PRELOAD: ; %bb.0:
+; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10
+; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v3, 0
+; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s4
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s5
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s6
+; GFX940-NO-PRELOAD-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1
+; GFX940-NO-PRELOAD-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-1-LABEL: v3i32_preload_arg:
+; GFX940-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: ; %bb.0:
+; GFX940-PRELOAD-1-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v3, 0
+; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, s4
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s5
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s6
+; GFX940-PRELOAD-1-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1
+; GFX940-PRELOAD-1-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-2-LABEL: v3i32_preload_arg:
+; GFX940-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: ; %bb.0:
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, s6
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s7
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v2, s8
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v3, 0
+; GFX940-PRELOAD-2-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1
+; GFX940-PRELOAD-2-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-4-LABEL: v3i32_preload_arg:
+; GFX940-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: ; %bb.0:
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, s6
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s7
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v2, s8
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v3, 0
+; GFX940-PRELOAD-4-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1
+; GFX940-PRELOAD-4-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-8-LABEL: v3i32_preload_arg:
+; GFX940-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: ; %bb.0:
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, s6
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s7
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v2, s8
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v3, 0
+; GFX940-PRELOAD-8-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1
+; GFX940-PRELOAD-8-NEXT: s_endpgm
+;
+; GFX90a-NO-PRELOAD-LABEL: v3i32_preload_arg:
+; GFX90a-NO-PRELOAD: ; %bb.0:
+; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x10
+; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v3, 0
+; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s0
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s1
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s2
+; GFX90a-NO-PRELOAD-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7]
+; GFX90a-NO-PRELOAD-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-1-LABEL: v3i32_preload_arg:
+; GFX90a-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-1-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x10
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v3, 0
+; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, s0
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s1
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s2
+; GFX90a-PRELOAD-1-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7]
+; GFX90a-PRELOAD-1-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-2-LABEL: v3i32_preload_arg:
+; GFX90a-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v0, s10
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s11
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v2, s12
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v3, 0
+; GFX90a-PRELOAD-2-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7]
+; GFX90a-PRELOAD-2-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-4-LABEL: v3i32_preload_arg:
+; GFX90a-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v0, s10
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s11
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v2, s12
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v3, 0
+; GFX90a-PRELOAD-4-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7]
+; GFX90a-PRELOAD-4-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-8-LABEL: v3i32_preload_arg:
+; GFX90a-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v0, s10
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s11
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v2, s12
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v3, 0
+; GFX90a-PRELOAD-8-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7]
+; GFX90a-PRELOAD-8-NEXT: s_endpgm
store <3 x i32> %in, ptr addrspace(1) %out, align 4
ret void
}
define amdgpu_kernel void @v3f32_preload_arg(ptr addrspace(1) nocapture %out, <3 x float> %in) nounwind {
-; NO-PRELOAD-LABEL: v3f32_preload_arg:
-; NO-PRELOAD: ; %bb.0:
-; NO-PRELOAD-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10
-; NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v3, 0
-; NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s4
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s5
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s6
-; NO-PRELOAD-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1
-; NO-PRELOAD-NEXT: s_endpgm
-;
-; PRELOAD-1-LABEL: v3f32_preload_arg:
-; PRELOAD-1: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: ; %bb.0:
-; PRELOAD-1-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10
-; PRELOAD-1-NEXT: v_mov_b32_e32 v3, 0
-; PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
-; PRELOAD-1-NEXT: v_mov_b32_e32 v0, s4
-; PRELOAD-1-NEXT: v_mov_b32_e32 v1, s5
-; PRELOAD-1-NEXT: v_mov_b32_e32 v2, s6
-; PRELOAD-1-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1
-; PRELOAD-1-NEXT: s_endpgm
-;
-; PRELOAD-2-LABEL: v3f32_preload_arg:
-; PRELOAD-2: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: ; %bb.0:
-; PRELOAD-2-NEXT: v_mov_b32_e32 v3, 0
-; PRELOAD-2-NEXT: v_mov_b32_e32 v0, s6
-; PRELOAD-2-NEXT: v_mov_b32_e32 v1, s7
-; PRELOAD-2-NEXT: v_mov_b32_e32 v2, s8
-; PRELOAD-2-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1
-; PRELOAD-2-NEXT: s_endpgm
-;
-; PRELOAD-4-LABEL: v3f32_preload_arg:
-; PRELOAD-4: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: ; %bb.0:
-; PRELOAD-4-NEXT: v_mov_b32_e32 v3, 0
-; PRELOAD-4-NEXT: v_mov_b32_e32 v0, s6
-; PRELOAD-4-NEXT: v_mov_b32_e32 v1, s7
-; PRELOAD-4-NEXT: v_mov_b32_e32 v2, s8
-; PRELOAD-4-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1
-; PRELOAD-4-NEXT: s_endpgm
-;
-; PRELOAD-8-LABEL: v3f32_preload_arg:
-; PRELOAD-8: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: ; %bb.0:
-; PRELOAD-8-NEXT: v_mov_b32_e32 v3, 0
-; PRELOAD-8-NEXT: v_mov_b32_e32 v0, s6
-; PRELOAD-8-NEXT: v_mov_b32_e32 v1, s7
-; PRELOAD-8-NEXT: v_mov_b32_e32 v2, s8
-; PRELOAD-8-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1
-; PRELOAD-8-NEXT: s_endpgm
+; GFX940-NO-PRELOAD-LABEL: v3f32_preload_arg:
+; GFX940-NO-PRELOAD: ; %bb.0:
+; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10
+; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v3, 0
+; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s4
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s5
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s6
+; GFX940-NO-PRELOAD-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1
+; GFX940-NO-PRELOAD-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-1-LABEL: v3f32_preload_arg:
+; GFX940-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: ; %bb.0:
+; GFX940-PRELOAD-1-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v3, 0
+; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, s4
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s5
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s6
+; GFX940-PRELOAD-1-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1
+; GFX940-PRELOAD-1-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-2-LABEL: v3f32_preload_arg:
+; GFX940-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: ; %bb.0:
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v3, 0
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, s6
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s7
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v2, s8
+; GFX940-PRELOAD-2-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1
+; GFX940-PRELOAD-2-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-4-LABEL: v3f32_preload_arg:
+; GFX940-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: ; %bb.0:
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v3, 0
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, s6
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s7
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v2, s8
+; GFX940-PRELOAD-4-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1
+; GFX940-PRELOAD-4-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-8-LABEL: v3f32_preload_arg:
+; GFX940-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: ; %bb.0:
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v3, 0
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, s6
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s7
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v2, s8
+; GFX940-PRELOAD-8-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1
+; GFX940-PRELOAD-8-NEXT: s_endpgm
+;
+; GFX90a-NO-PRELOAD-LABEL: v3f32_preload_arg:
+; GFX90a-NO-PRELOAD: ; %bb.0:
+; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x10
+; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v3, 0
+; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s0
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s1
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s2
+; GFX90a-NO-PRELOAD-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7]
+; GFX90a-NO-PRELOAD-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-1-LABEL: v3f32_preload_arg:
+; GFX90a-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-1-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x10
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v3, 0
+; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, s0
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s1
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s2
+; GFX90a-PRELOAD-1-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7]
+; GFX90a-PRELOAD-1-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-2-LABEL: v3f32_preload_arg:
+; GFX90a-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v3, 0
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v0, s10
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s11
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v2, s12
+; GFX90a-PRELOAD-2-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7]
+; GFX90a-PRELOAD-2-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-4-LABEL: v3f32_preload_arg:
+; GFX90a-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v3, 0
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v0, s10
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s11
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v2, s12
+; GFX90a-PRELOAD-4-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7]
+; GFX90a-PRELOAD-4-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-8-LABEL: v3f32_preload_arg:
+; GFX90a-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v3, 0
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v0, s10
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s11
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v2, s12
+; GFX90a-PRELOAD-8-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7]
+; GFX90a-PRELOAD-8-NEXT: s_endpgm
store <3 x float> %in, ptr addrspace(1) %out, align 4
ret void
}
define amdgpu_kernel void @v5i8_preload_arg(ptr addrspace(1) nocapture %out, <5 x i8> %in) nounwind {
-; NO-PRELOAD-LABEL: v5i8_preload_arg:
-; NO-PRELOAD: ; %bb.0:
-; NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0
-; NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s3
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s2
-; NO-PRELOAD-NEXT: global_store_byte v0, v1, s[0:1] offset:4 sc0 sc1
-; NO-PRELOAD-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1
-; NO-PRELOAD-NEXT: s_endpgm
-;
-; PRELOAD-1-LABEL: v5i8_preload_arg:
-; PRELOAD-1: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: ; %bb.0:
-; PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8
-; PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0
-; PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
-; PRELOAD-1-NEXT: v_mov_b32_e32 v1, s1
-; PRELOAD-1-NEXT: v_mov_b32_e32 v2, s0
-; PRELOAD-1-NEXT: global_store_byte v0, v1, s[2:3] offset:4 sc0 sc1
-; PRELOAD-1-NEXT: global_store_dword v0, v2, s[2:3] sc0 sc1
-; PRELOAD-1-NEXT: s_endpgm
-;
-; PRELOAD-2-LABEL: v5i8_preload_arg:
-; PRELOAD-2: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: ; %bb.0:
-; PRELOAD-2-NEXT: s_lshr_b32 s0, s4, 8
-; PRELOAD-2-NEXT: v_lshlrev_b16_e64 v0, 8, s0
-; PRELOAD-2-NEXT: s_lshr_b32 s0, s4, 24
-; PRELOAD-2-NEXT: v_lshlrev_b16_e64 v1, 8, s0
-; PRELOAD-2-NEXT: s_lshr_b32 s0, s4, 16
-; PRELOAD-2-NEXT: v_or_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; PRELOAD-2-NEXT: v_or_b32_sdwa v1, s0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; PRELOAD-2-NEXT: v_mov_b32_e32 v2, s5
-; PRELOAD-2-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; PRELOAD-2-NEXT: v_mov_b32_e32 v1, 0
-; PRELOAD-2-NEXT: global_store_byte v1, v2, s[2:3] offset:4 sc0 sc1
-; PRELOAD-2-NEXT: global_store_dword v1, v0, s[2:3] sc0 sc1
-; PRELOAD-2-NEXT: s_endpgm
-;
-; PRELOAD-4-LABEL: v5i8_preload_arg:
-; PRELOAD-4: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: ; %bb.0:
-; PRELOAD-4-NEXT: s_lshr_b32 s0, s4, 8
-; PRELOAD-4-NEXT: v_lshlrev_b16_e64 v0, 8, s0
-; PRELOAD-4-NEXT: s_lshr_b32 s0, s4, 24
-; PRELOAD-4-NEXT: v_lshlrev_b16_e64 v1, 8, s0
-; PRELOAD-4-NEXT: s_lshr_b32 s0, s4, 16
-; PRELOAD-4-NEXT: v_or_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; PRELOAD-4-NEXT: v_or_b32_sdwa v1, s0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; PRELOAD-4-NEXT: v_mov_b32_e32 v2, s5
-; PRELOAD-4-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; PRELOAD-4-NEXT: v_mov_b32_e32 v1, 0
-; PRELOAD-4-NEXT: global_store_byte v1, v2, s[2:3] offset:4 sc0 sc1
-; PRELOAD-4-NEXT: global_store_dword v1, v0, s[2:3] sc0 sc1
-; PRELOAD-4-NEXT: s_endpgm
-;
-; PRELOAD-8-LABEL: v5i8_preload_arg:
-; PRELOAD-8: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: ; %bb.0:
-; PRELOAD-8-NEXT: s_lshr_b32 s0, s4, 8
-; PRELOAD-8-NEXT: v_lshlrev_b16_e64 v0, 8, s0
-; PRELOAD-8-NEXT: s_lshr_b32 s0, s4, 24
-; PRELOAD-8-NEXT: v_lshlrev_b16_e64 v1, 8, s0
-; PRELOAD-8-NEXT: s_lshr_b32 s0, s4, 16
-; PRELOAD-8-NEXT: v_or_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; PRELOAD-8-NEXT: v_or_b32_sdwa v1, s0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; PRELOAD-8-NEXT: v_mov_b32_e32 v2, s5
-; PRELOAD-8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; PRELOAD-8-NEXT: v_mov_b32_e32 v1, 0
-; PRELOAD-8-NEXT: global_store_byte v1, v2, s[2:3] offset:4 sc0 sc1
-; PRELOAD-8-NEXT: global_store_dword v1, v0, s[2:3] sc0 sc1
-; PRELOAD-8-NEXT: s_endpgm
+; GFX940-NO-PRELOAD-LABEL: v5i8_preload_arg:
+; GFX940-NO-PRELOAD: ; %bb.0:
+; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0
+; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s3
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s2
+; GFX940-NO-PRELOAD-NEXT: global_store_byte v0, v1, s[0:1] offset:4 sc0 sc1
+; GFX940-NO-PRELOAD-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1
+; GFX940-NO-PRELOAD-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-1-LABEL: v5i8_preload_arg:
+; GFX940-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: ; %bb.0:
+; GFX940-PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0
+; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s1
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s0
+; GFX940-PRELOAD-1-NEXT: global_store_byte v0, v1, s[2:3] offset:4 sc0 sc1
+; GFX940-PRELOAD-1-NEXT: global_store_dword v0, v2, s[2:3] sc0 sc1
+; GFX940-PRELOAD-1-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-2-LABEL: v5i8_preload_arg:
+; GFX940-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: ; %bb.0:
+; GFX940-PRELOAD-2-NEXT: s_lshr_b32 s0, s4, 8
+; GFX940-PRELOAD-2-NEXT: v_lshlrev_b16_e64 v0, 8, s0
+; GFX940-PRELOAD-2-NEXT: s_lshr_b32 s0, s4, 24
+; GFX940-PRELOAD-2-NEXT: v_lshlrev_b16_e64 v1, 8, s0
+; GFX940-PRELOAD-2-NEXT: s_lshr_b32 s0, s4, 16
+; GFX940-PRELOAD-2-NEXT: v_or_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX940-PRELOAD-2-NEXT: v_or_b32_sdwa v1, s0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v2, s5
+; GFX940-PRELOAD-2-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, 0
+; GFX940-PRELOAD-2-NEXT: global_store_byte v1, v2, s[2:3] offset:4 sc0 sc1
+; GFX940-PRELOAD-2-NEXT: global_store_dword v1, v0, s[2:3] sc0 sc1
+; GFX940-PRELOAD-2-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-4-LABEL: v5i8_preload_arg:
+; GFX940-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: ; %bb.0:
+; GFX940-PRELOAD-4-NEXT: s_lshr_b32 s0, s4, 8
+; GFX940-PRELOAD-4-NEXT: v_lshlrev_b16_e64 v0, 8, s0
+; GFX940-PRELOAD-4-NEXT: s_lshr_b32 s0, s4, 24
+; GFX940-PRELOAD-4-NEXT: v_lshlrev_b16_e64 v1, 8, s0
+; GFX940-PRELOAD-4-NEXT: s_lshr_b32 s0, s4, 16
+; GFX940-PRELOAD-4-NEXT: v_or_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX940-PRELOAD-4-NEXT: v_or_b32_sdwa v1, s0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v2, s5
+; GFX940-PRELOAD-4-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, 0
+; GFX940-PRELOAD-4-NEXT: global_store_byte v1, v2, s[2:3] offset:4 sc0 sc1
+; GFX940-PRELOAD-4-NEXT: global_store_dword v1, v0, s[2:3] sc0 sc1
+; GFX940-PRELOAD-4-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-8-LABEL: v5i8_preload_arg:
+; GFX940-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: ; %bb.0:
+; GFX940-PRELOAD-8-NEXT: s_lshr_b32 s0, s4, 8
+; GFX940-PRELOAD-8-NEXT: v_lshlrev_b16_e64 v0, 8, s0
+; GFX940-PRELOAD-8-NEXT: s_lshr_b32 s0, s4, 24
+; GFX940-PRELOAD-8-NEXT: v_lshlrev_b16_e64 v1, 8, s0
+; GFX940-PRELOAD-8-NEXT: s_lshr_b32 s0, s4, 16
+; GFX940-PRELOAD-8-NEXT: v_or_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX940-PRELOAD-8-NEXT: v_or_b32_sdwa v1, s0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v2, s5
+; GFX940-PRELOAD-8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, 0
+; GFX940-PRELOAD-8-NEXT: global_store_byte v1, v2, s[2:3] offset:4 sc0 sc1
+; GFX940-PRELOAD-8-NEXT: global_store_dword v1, v0, s[2:3] sc0 sc1
+; GFX940-PRELOAD-8-NEXT: s_endpgm
+;
+; GFX90a-NO-PRELOAD-LABEL: v5i8_preload_arg:
+; GFX90a-NO-PRELOAD: ; %bb.0:
+; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s3
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s2
+; GFX90a-NO-PRELOAD-NEXT: global_store_byte v0, v1, s[0:1] offset:4
+; GFX90a-NO-PRELOAD-NEXT: global_store_dword v0, v2, s[0:1]
+; GFX90a-NO-PRELOAD-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-1-LABEL: v5i8_preload_arg:
+; GFX90a-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0
+; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s1
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s0
+; GFX90a-PRELOAD-1-NEXT: global_store_byte v0, v1, s[6:7] offset:4
+; GFX90a-PRELOAD-1-NEXT: global_store_dword v0, v2, s[6:7]
+; GFX90a-PRELOAD-1-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-2-LABEL: v5i8_preload_arg:
+; GFX90a-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-2-NEXT: s_lshr_b32 s0, s8, 8
+; GFX90a-PRELOAD-2-NEXT: v_lshlrev_b16_e64 v0, 8, s0
+; GFX90a-PRELOAD-2-NEXT: s_lshr_b32 s0, s8, 24
+; GFX90a-PRELOAD-2-NEXT: v_lshlrev_b16_e64 v1, 8, s0
+; GFX90a-PRELOAD-2-NEXT: s_lshr_b32 s0, s8, 16
+; GFX90a-PRELOAD-2-NEXT: v_or_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX90a-PRELOAD-2-NEXT: v_or_b32_sdwa v1, s0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX90a-PRELOAD-2-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, 0
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v2, s9
+; GFX90a-PRELOAD-2-NEXT: global_store_byte v1, v2, s[6:7] offset:4
+; GFX90a-PRELOAD-2-NEXT: global_store_dword v1, v0, s[6:7]
+; GFX90a-PRELOAD-2-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-4-LABEL: v5i8_preload_arg:
+; GFX90a-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-4-NEXT: s_lshr_b32 s0, s8, 8
+; GFX90a-PRELOAD-4-NEXT: v_lshlrev_b16_e64 v0, 8, s0
+; GFX90a-PRELOAD-4-NEXT: s_lshr_b32 s0, s8, 24
+; GFX90a-PRELOAD-4-NEXT: v_lshlrev_b16_e64 v1, 8, s0
+; GFX90a-PRELOAD-4-NEXT: s_lshr_b32 s0, s8, 16
+; GFX90a-PRELOAD-4-NEXT: v_or_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX90a-PRELOAD-4-NEXT: v_or_b32_sdwa v1, s0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX90a-PRELOAD-4-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, 0
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v2, s9
+; GFX90a-PRELOAD-4-NEXT: global_store_byte v1, v2, s[6:7] offset:4
+; GFX90a-PRELOAD-4-NEXT: global_store_dword v1, v0, s[6:7]
+; GFX90a-PRELOAD-4-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-8-LABEL: v5i8_preload_arg:
+; GFX90a-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-8-NEXT: s_lshr_b32 s0, s8, 8
+; GFX90a-PRELOAD-8-NEXT: v_lshlrev_b16_e64 v0, 8, s0
+; GFX90a-PRELOAD-8-NEXT: s_lshr_b32 s0, s8, 24
+; GFX90a-PRELOAD-8-NEXT: v_lshlrev_b16_e64 v1, 8, s0
+; GFX90a-PRELOAD-8-NEXT: s_lshr_b32 s0, s8, 16
+; GFX90a-PRELOAD-8-NEXT: v_or_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX90a-PRELOAD-8-NEXT: v_or_b32_sdwa v1, s0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX90a-PRELOAD-8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, 0
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v2, s9
+; GFX90a-PRELOAD-8-NEXT: global_store_byte v1, v2, s[6:7] offset:4
+; GFX90a-PRELOAD-8-NEXT: global_store_dword v1, v0, s[6:7]
+; GFX90a-PRELOAD-8-NEXT: s_endpgm
store <5 x i8> %in, ptr addrspace(1) %out, align 4
ret void
}
define amdgpu_kernel void @v5f64_arg(ptr addrspace(1) nocapture %out, <5 x double> %in) nounwind {
-; NO-PRELOAD-LABEL: v5f64_arg:
-; NO-PRELOAD: ; %bb.0:
-; NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x60
-; NO-PRELOAD-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x40
-; NO-PRELOAD-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x0
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v4, 0
-; NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
-; NO-PRELOAD-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s8
-; NO-PRELOAD-NEXT: global_store_dwordx2 v4, v[2:3], s[12:13] offset:32 sc0 sc1
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s9
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s10
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v3, s11
-; NO-PRELOAD-NEXT: global_store_dwordx4 v4, v[0:3], s[12:13] offset:16 sc0 sc1
-; NO-PRELOAD-NEXT: s_nop 1
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s4
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s5
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s6
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v3, s7
-; NO-PRELOAD-NEXT: global_store_dwordx4 v4, v[0:3], s[12:13] sc0 sc1
-; NO-PRELOAD-NEXT: s_endpgm
-;
-; PRELOAD-1-LABEL: v5f64_arg:
-; PRELOAD-1: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: ; %bb.0:
-; PRELOAD-1-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x60
-; PRELOAD-1-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x40
-; PRELOAD-1-NEXT: v_mov_b32_e32 v4, 0
-; PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
-; PRELOAD-1-NEXT: v_mov_b64_e32 v[2:3], s[12:13]
-; PRELOAD-1-NEXT: v_mov_b32_e32 v0, s8
-; PRELOAD-1-NEXT: global_store_dwordx2 v4, v[2:3], s[2:3] offset:32 sc0 sc1
-; PRELOAD-1-NEXT: v_mov_b32_e32 v1, s9
-; PRELOAD-1-NEXT: v_mov_b32_e32 v2, s10
-; PRELOAD-1-NEXT: v_mov_b32_e32 v3, s11
-; PRELOAD-1-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:16 sc0 sc1
-; PRELOAD-1-NEXT: s_nop 1
-; PRELOAD-1-NEXT: v_mov_b32_e32 v0, s4
-; PRELOAD-1-NEXT: v_mov_b32_e32 v1, s5
-; PRELOAD-1-NEXT: v_mov_b32_e32 v2, s6
-; PRELOAD-1-NEXT: v_mov_b32_e32 v3, s7
-; PRELOAD-1-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] sc0 sc1
-; PRELOAD-1-NEXT: s_endpgm
-;
-; PRELOAD-2-LABEL: v5f64_arg:
-; PRELOAD-2: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: ; %bb.0:
-; PRELOAD-2-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x60
-; PRELOAD-2-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x40
-; PRELOAD-2-NEXT: v_mov_b32_e32 v4, 0
-; PRELOAD-2-NEXT: s_waitcnt lgkmcnt(0)
-; PRELOAD-2-NEXT: v_mov_b64_e32 v[2:3], s[12:13]
-; PRELOAD-2-NEXT: v_mov_b32_e32 v0, s8
-; PRELOAD-2-NEXT: global_store_dwordx2 v4, v[2:3], s[2:3] offset:32 sc0 sc1
-; PRELOAD-2-NEXT: v_mov_b32_e32 v1, s9
-; PRELOAD-2-NEXT: v_mov_b32_e32 v2, s10
-; PRELOAD-2-NEXT: v_mov_b32_e32 v3, s11
-; PRELOAD-2-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:16 sc0 sc1
-; PRELOAD-2-NEXT: s_nop 1
-; PRELOAD-2-NEXT: v_mov_b32_e32 v0, s4
-; PRELOAD-2-NEXT: v_mov_b32_e32 v1, s5
-; PRELOAD-2-NEXT: v_mov_b32_e32 v2, s6
-; PRELOAD-2-NEXT: v_mov_b32_e32 v3, s7
-; PRELOAD-2-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] sc0 sc1
-; PRELOAD-2-NEXT: s_endpgm
-;
-; PRELOAD-4-LABEL: v5f64_arg:
-; PRELOAD-4: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: ; %bb.0:
-; PRELOAD-4-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x60
-; PRELOAD-4-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x40
-; PRELOAD-4-NEXT: v_mov_b32_e32 v4, 0
-; PRELOAD-4-NEXT: s_waitcnt lgkmcnt(0)
-; PRELOAD-4-NEXT: v_mov_b64_e32 v[2:3], s[12:13]
-; PRELOAD-4-NEXT: v_mov_b32_e32 v0, s8
-; PRELOAD-4-NEXT: global_store_dwordx2 v4, v[2:3], s[2:3] offset:32 sc0 sc1
-; PRELOAD-4-NEXT: v_mov_b32_e32 v1, s9
-; PRELOAD-4-NEXT: v_mov_b32_e32 v2, s10
-; PRELOAD-4-NEXT: v_mov_b32_e32 v3, s11
-; PRELOAD-4-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:16 sc0 sc1
-; PRELOAD-4-NEXT: s_nop 1
-; PRELOAD-4-NEXT: v_mov_b32_e32 v0, s4
-; PRELOAD-4-NEXT: v_mov_b32_e32 v1, s5
-; PRELOAD-4-NEXT: v_mov_b32_e32 v2, s6
-; PRELOAD-4-NEXT: v_mov_b32_e32 v3, s7
-; PRELOAD-4-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] sc0 sc1
-; PRELOAD-4-NEXT: s_endpgm
-;
-; PRELOAD-8-LABEL: v5f64_arg:
-; PRELOAD-8: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: ; %bb.0:
-; PRELOAD-8-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x60
-; PRELOAD-8-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x40
-; PRELOAD-8-NEXT: v_mov_b32_e32 v4, 0
-; PRELOAD-8-NEXT: s_waitcnt lgkmcnt(0)
-; PRELOAD-8-NEXT: v_mov_b64_e32 v[2:3], s[12:13]
-; PRELOAD-8-NEXT: v_mov_b32_e32 v0, s8
-; PRELOAD-8-NEXT: global_store_dwordx2 v4, v[2:3], s[2:3] offset:32 sc0 sc1
-; PRELOAD-8-NEXT: v_mov_b32_e32 v1, s9
-; PRELOAD-8-NEXT: v_mov_b32_e32 v2, s10
-; PRELOAD-8-NEXT: v_mov_b32_e32 v3, s11
-; PRELOAD-8-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:16 sc0 sc1
-; PRELOAD-8-NEXT: s_nop 1
-; PRELOAD-8-NEXT: v_mov_b32_e32 v0, s4
-; PRELOAD-8-NEXT: v_mov_b32_e32 v1, s5
-; PRELOAD-8-NEXT: v_mov_b32_e32 v2, s6
-; PRELOAD-8-NEXT: v_mov_b32_e32 v3, s7
-; PRELOAD-8-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] sc0 sc1
-; PRELOAD-8-NEXT: s_endpgm
+; GFX940-NO-PRELOAD-LABEL: v5f64_arg:
+; GFX940-NO-PRELOAD: ; %bb.0:
+; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x60
+; GFX940-NO-PRELOAD-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x40
+; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x0
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v4, 0
+; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NO-PRELOAD-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s8
+; GFX940-NO-PRELOAD-NEXT: global_store_dwordx2 v4, v[2:3], s[12:13] offset:32 sc0 sc1
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s9
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s10
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v3, s11
+; GFX940-NO-PRELOAD-NEXT: global_store_dwordx4 v4, v[0:3], s[12:13] offset:16 sc0 sc1
+; GFX940-NO-PRELOAD-NEXT: s_nop 1
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s4
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s5
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s6
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v3, s7
+; GFX940-NO-PRELOAD-NEXT: global_store_dwordx4 v4, v[0:3], s[12:13] sc0 sc1
+; GFX940-NO-PRELOAD-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-1-LABEL: v5f64_arg:
+; GFX940-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: ; %bb.0:
+; GFX940-PRELOAD-1-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x60
+; GFX940-PRELOAD-1-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x40
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v4, 0
+; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-PRELOAD-1-NEXT: v_mov_b64_e32 v[2:3], s[12:13]
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, s8
+; GFX940-PRELOAD-1-NEXT: global_store_dwordx2 v4, v[2:3], s[2:3] offset:32 sc0 sc1
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s9
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s10
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v3, s11
+; GFX940-PRELOAD-1-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:16 sc0 sc1
+; GFX940-PRELOAD-1-NEXT: s_nop 1
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, s4
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s5
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s6
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v3, s7
+; GFX940-PRELOAD-1-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] sc0 sc1
+; GFX940-PRELOAD-1-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-2-LABEL: v5f64_arg:
+; GFX940-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: ; %bb.0:
+; GFX940-PRELOAD-2-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x60
+; GFX940-PRELOAD-2-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x40
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v4, 0
+; GFX940-PRELOAD-2-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-PRELOAD-2-NEXT: v_mov_b64_e32 v[2:3], s[12:13]
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, s8
+; GFX940-PRELOAD-2-NEXT: global_store_dwordx2 v4, v[2:3], s[2:3] offset:32 sc0 sc1
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s9
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v2, s10
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v3, s11
+; GFX940-PRELOAD-2-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:16 sc0 sc1
+; GFX940-PRELOAD-2-NEXT: s_nop 1
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, s4
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s5
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v2, s6
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v3, s7
+; GFX940-PRELOAD-2-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] sc0 sc1
+; GFX940-PRELOAD-2-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-4-LABEL: v5f64_arg:
+; GFX940-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: ; %bb.0:
+; GFX940-PRELOAD-4-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x60
+; GFX940-PRELOAD-4-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x40
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v4, 0
+; GFX940-PRELOAD-4-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-PRELOAD-4-NEXT: v_mov_b64_e32 v[2:3], s[12:13]
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, s8
+; GFX940-PRELOAD-4-NEXT: global_store_dwordx2 v4, v[2:3], s[2:3] offset:32 sc0 sc1
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s9
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v2, s10
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v3, s11
+; GFX940-PRELOAD-4-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:16 sc0 sc1
+; GFX940-PRELOAD-4-NEXT: s_nop 1
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, s4
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s5
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v2, s6
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v3, s7
+; GFX940-PRELOAD-4-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] sc0 sc1
+; GFX940-PRELOAD-4-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-8-LABEL: v5f64_arg:
+; GFX940-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: ; %bb.0:
+; GFX940-PRELOAD-8-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x60
+; GFX940-PRELOAD-8-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x40
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v4, 0
+; GFX940-PRELOAD-8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-PRELOAD-8-NEXT: v_mov_b64_e32 v[2:3], s[12:13]
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, s8
+; GFX940-PRELOAD-8-NEXT: global_store_dwordx2 v4, v[2:3], s[2:3] offset:32 sc0 sc1
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s9
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v2, s10
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v3, s11
+; GFX940-PRELOAD-8-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:16 sc0 sc1
+; GFX940-PRELOAD-8-NEXT: s_nop 1
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, s4
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s5
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v2, s6
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v3, s7
+; GFX940-PRELOAD-8-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] sc0 sc1
+; GFX940-PRELOAD-8-NEXT: s_endpgm
+;
+; GFX90a-NO-PRELOAD-LABEL: v5f64_arg:
+; GFX90a-NO-PRELOAD: ; %bb.0:
+; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x60
+; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x40
+; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v4, 0
+; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-NO-PRELOAD-NEXT: v_pk_mov_b32 v[2:3], s[0:1], s[0:1] op_sel:[0,1]
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s12
+; GFX90a-NO-PRELOAD-NEXT: global_store_dwordx2 v4, v[2:3], s[2:3] offset:32
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s13
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s14
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v3, s15
+; GFX90a-NO-PRELOAD-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:16
+; GFX90a-NO-PRELOAD-NEXT: s_nop 0
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s8
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s9
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s10
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v3, s11
+; GFX90a-NO-PRELOAD-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3]
+; GFX90a-NO-PRELOAD-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-1-LABEL: v5f64_arg:
+; GFX90a-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x60
+; GFX90a-PRELOAD-1-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x40
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v4, 0
+; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-PRELOAD-1-NEXT: v_pk_mov_b32 v[2:3], s[0:1], s[0:1] op_sel:[0,1]
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, s12
+; GFX90a-PRELOAD-1-NEXT: global_store_dwordx2 v4, v[2:3], s[6:7] offset:32
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s13
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s14
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v3, s15
+; GFX90a-PRELOAD-1-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] offset:16
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, s8
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s9
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s10
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v3, s11
+; GFX90a-PRELOAD-1-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7]
+; GFX90a-PRELOAD-1-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-2-LABEL: v5f64_arg:
+; GFX90a-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-2-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x60
+; GFX90a-PRELOAD-2-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x40
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v4, 0
+; GFX90a-PRELOAD-2-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-PRELOAD-2-NEXT: v_pk_mov_b32 v[2:3], s[0:1], s[0:1] op_sel:[0,1]
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v0, s12
+; GFX90a-PRELOAD-2-NEXT: global_store_dwordx2 v4, v[2:3], s[6:7] offset:32
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s13
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v2, s14
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v3, s15
+; GFX90a-PRELOAD-2-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] offset:16
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v0, s8
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s9
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v2, s10
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v3, s11
+; GFX90a-PRELOAD-2-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7]
+; GFX90a-PRELOAD-2-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-4-LABEL: v5f64_arg:
+; GFX90a-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x60
+; GFX90a-PRELOAD-4-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x40
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v4, 0
+; GFX90a-PRELOAD-4-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-PRELOAD-4-NEXT: v_pk_mov_b32 v[2:3], s[0:1], s[0:1] op_sel:[0,1]
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v0, s12
+; GFX90a-PRELOAD-4-NEXT: global_store_dwordx2 v4, v[2:3], s[6:7] offset:32
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s13
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v2, s14
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v3, s15
+; GFX90a-PRELOAD-4-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] offset:16
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v0, s8
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s9
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v2, s10
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v3, s11
+; GFX90a-PRELOAD-4-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7]
+; GFX90a-PRELOAD-4-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-8-LABEL: v5f64_arg:
+; GFX90a-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x60
+; GFX90a-PRELOAD-8-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x40
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v4, 0
+; GFX90a-PRELOAD-8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-PRELOAD-8-NEXT: v_pk_mov_b32 v[2:3], s[0:1], s[0:1] op_sel:[0,1]
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v0, s12
+; GFX90a-PRELOAD-8-NEXT: global_store_dwordx2 v4, v[2:3], s[6:7] offset:32
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s13
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v2, s14
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v3, s15
+; GFX90a-PRELOAD-8-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] offset:16
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v0, s8
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s9
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v2, s10
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v3, s11
+; GFX90a-PRELOAD-8-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7]
+; GFX90a-PRELOAD-8-NEXT: s_endpgm
store <5 x double> %in, ptr addrspace(1) %out, align 8
ret void
}
define amdgpu_kernel void @v8i8_preload_arg(ptr addrspace(1) %out, <8 x i8> %in) {
-; NO-PRELOAD-LABEL: v8i8_preload_arg:
-; NO-PRELOAD: ; %bb.0:
-; NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v2, 0
-; NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
-; NO-PRELOAD-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
-; NO-PRELOAD-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
-; NO-PRELOAD-NEXT: s_endpgm
-;
-; PRELOAD-1-LABEL: v8i8_preload_arg:
-; PRELOAD-1: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: ; %bb.0:
-; PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8
-; PRELOAD-1-NEXT: v_mov_b32_e32 v2, 0
-; PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
-; PRELOAD-1-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
-; PRELOAD-1-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1
-; PRELOAD-1-NEXT: s_endpgm
-;
-; PRELOAD-2-LABEL: v8i8_preload_arg:
-; PRELOAD-2: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: ; %bb.0:
-; PRELOAD-2-NEXT: s_lshr_b32 s0, s5, 8
-; PRELOAD-2-NEXT: v_lshlrev_b16_e64 v0, 8, s0
-; PRELOAD-2-NEXT: s_lshr_b32 s0, s5, 24
-; PRELOAD-2-NEXT: v_lshlrev_b16_e64 v1, 8, s0
-; PRELOAD-2-NEXT: s_lshr_b32 s0, s5, 16
-; PRELOAD-2-NEXT: v_or_b32_sdwa v0, s5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; PRELOAD-2-NEXT: v_or_b32_sdwa v1, s0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; PRELOAD-2-NEXT: s_lshr_b32 s0, s4, 8
-; PRELOAD-2-NEXT: v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; PRELOAD-2-NEXT: v_lshlrev_b16_e64 v0, 8, s0
-; PRELOAD-2-NEXT: s_lshr_b32 s0, s4, 24
-; PRELOAD-2-NEXT: v_lshlrev_b16_e64 v2, 8, s0
-; PRELOAD-2-NEXT: s_lshr_b32 s0, s4, 16
-; PRELOAD-2-NEXT: v_or_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; PRELOAD-2-NEXT: v_or_b32_sdwa v2, s0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; PRELOAD-2-NEXT: v_mov_b32_e32 v2, 0
-; PRELOAD-2-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1
-; PRELOAD-2-NEXT: s_endpgm
-;
-; PRELOAD-4-LABEL: v8i8_preload_arg:
-; PRELOAD-4: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: ; %bb.0:
-; PRELOAD-4-NEXT: s_lshr_b32 s0, s5, 8
-; PRELOAD-4-NEXT: v_lshlrev_b16_e64 v0, 8, s0
-; PRELOAD-4-NEXT: s_lshr_b32 s0, s5, 24
-; PRELOAD-4-NEXT: v_lshlrev_b16_e64 v1, 8, s0
-; PRELOAD-4-NEXT: s_lshr_b32 s0, s5, 16
-; PRELOAD-4-NEXT: v_or_b32_sdwa v0, s5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; PRELOAD-4-NEXT: v_or_b32_sdwa v1, s0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; PRELOAD-4-NEXT: s_lshr_b32 s0, s4, 8
-; PRELOAD-4-NEXT: v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; PRELOAD-4-NEXT: v_lshlrev_b16_e64 v0, 8, s0
-; PRELOAD-4-NEXT: s_lshr_b32 s0, s4, 24
-; PRELOAD-4-NEXT: v_lshlrev_b16_e64 v2, 8, s0
-; PRELOAD-4-NEXT: s_lshr_b32 s0, s4, 16
-; PRELOAD-4-NEXT: v_or_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; PRELOAD-4-NEXT: v_or_b32_sdwa v2, s0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; PRELOAD-4-NEXT: v_mov_b32_e32 v2, 0
-; PRELOAD-4-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1
-; PRELOAD-4-NEXT: s_endpgm
-;
-; PRELOAD-8-LABEL: v8i8_preload_arg:
-; PRELOAD-8: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: ; %bb.0:
-; PRELOAD-8-NEXT: s_lshr_b32 s0, s5, 8
-; PRELOAD-8-NEXT: v_lshlrev_b16_e64 v0, 8, s0
-; PRELOAD-8-NEXT: s_lshr_b32 s0, s5, 24
-; PRELOAD-8-NEXT: v_lshlrev_b16_e64 v1, 8, s0
-; PRELOAD-8-NEXT: s_lshr_b32 s0, s5, 16
-; PRELOAD-8-NEXT: v_or_b32_sdwa v0, s5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; PRELOAD-8-NEXT: v_or_b32_sdwa v1, s0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; PRELOAD-8-NEXT: s_lshr_b32 s0, s4, 8
-; PRELOAD-8-NEXT: v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; PRELOAD-8-NEXT: v_lshlrev_b16_e64 v0, 8, s0
-; PRELOAD-8-NEXT: s_lshr_b32 s0, s4, 24
-; PRELOAD-8-NEXT: v_lshlrev_b16_e64 v2, 8, s0
-; PRELOAD-8-NEXT: s_lshr_b32 s0, s4, 16
-; PRELOAD-8-NEXT: v_or_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; PRELOAD-8-NEXT: v_or_b32_sdwa v2, s0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; PRELOAD-8-NEXT: v_mov_b32_e32 v2, 0
-; PRELOAD-8-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1
-; PRELOAD-8-NEXT: s_endpgm
+; GFX940-NO-PRELOAD-LABEL: v8i8_preload_arg:
+; GFX940-NO-PRELOAD: ; %bb.0:
+; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, 0
+; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NO-PRELOAD-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
+; GFX940-NO-PRELOAD-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
+; GFX940-NO-PRELOAD-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-1-LABEL: v8i8_preload_arg:
+; GFX940-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: ; %bb.0:
+; GFX940-PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v2, 0
+; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-PRELOAD-1-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GFX940-PRELOAD-1-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1
+; GFX940-PRELOAD-1-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-2-LABEL: v8i8_preload_arg:
+; GFX940-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: ; %bb.0:
+; GFX940-PRELOAD-2-NEXT: s_lshr_b32 s0, s5, 8
+; GFX940-PRELOAD-2-NEXT: v_lshlrev_b16_e64 v0, 8, s0
+; GFX940-PRELOAD-2-NEXT: s_lshr_b32 s0, s5, 24
+; GFX940-PRELOAD-2-NEXT: v_lshlrev_b16_e64 v1, 8, s0
+; GFX940-PRELOAD-2-NEXT: s_lshr_b32 s0, s5, 16
+; GFX940-PRELOAD-2-NEXT: v_or_b32_sdwa v0, s5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX940-PRELOAD-2-NEXT: v_or_b32_sdwa v1, s0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX940-PRELOAD-2-NEXT: s_lshr_b32 s0, s4, 8
+; GFX940-PRELOAD-2-NEXT: v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX940-PRELOAD-2-NEXT: v_lshlrev_b16_e64 v0, 8, s0
+; GFX940-PRELOAD-2-NEXT: s_lshr_b32 s0, s4, 24
+; GFX940-PRELOAD-2-NEXT: v_lshlrev_b16_e64 v2, 8, s0
+; GFX940-PRELOAD-2-NEXT: s_lshr_b32 s0, s4, 16
+; GFX940-PRELOAD-2-NEXT: v_or_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX940-PRELOAD-2-NEXT: v_or_b32_sdwa v2, s0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v2, 0
+; GFX940-PRELOAD-2-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1
+; GFX940-PRELOAD-2-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-4-LABEL: v8i8_preload_arg:
+; GFX940-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: ; %bb.0:
+; GFX940-PRELOAD-4-NEXT: s_lshr_b32 s0, s5, 8
+; GFX940-PRELOAD-4-NEXT: v_lshlrev_b16_e64 v0, 8, s0
+; GFX940-PRELOAD-4-NEXT: s_lshr_b32 s0, s5, 24
+; GFX940-PRELOAD-4-NEXT: v_lshlrev_b16_e64 v1, 8, s0
+; GFX940-PRELOAD-4-NEXT: s_lshr_b32 s0, s5, 16
+; GFX940-PRELOAD-4-NEXT: v_or_b32_sdwa v0, s5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX940-PRELOAD-4-NEXT: v_or_b32_sdwa v1, s0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX940-PRELOAD-4-NEXT: s_lshr_b32 s0, s4, 8
+; GFX940-PRELOAD-4-NEXT: v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX940-PRELOAD-4-NEXT: v_lshlrev_b16_e64 v0, 8, s0
+; GFX940-PRELOAD-4-NEXT: s_lshr_b32 s0, s4, 24
+; GFX940-PRELOAD-4-NEXT: v_lshlrev_b16_e64 v2, 8, s0
+; GFX940-PRELOAD-4-NEXT: s_lshr_b32 s0, s4, 16
+; GFX940-PRELOAD-4-NEXT: v_or_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX940-PRELOAD-4-NEXT: v_or_b32_sdwa v2, s0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v2, 0
+; GFX940-PRELOAD-4-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1
+; GFX940-PRELOAD-4-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-8-LABEL: v8i8_preload_arg:
+; GFX940-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: ; %bb.0:
+; GFX940-PRELOAD-8-NEXT: s_lshr_b32 s0, s5, 8
+; GFX940-PRELOAD-8-NEXT: v_lshlrev_b16_e64 v0, 8, s0
+; GFX940-PRELOAD-8-NEXT: s_lshr_b32 s0, s5, 24
+; GFX940-PRELOAD-8-NEXT: v_lshlrev_b16_e64 v1, 8, s0
+; GFX940-PRELOAD-8-NEXT: s_lshr_b32 s0, s5, 16
+; GFX940-PRELOAD-8-NEXT: v_or_b32_sdwa v0, s5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX940-PRELOAD-8-NEXT: v_or_b32_sdwa v1, s0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX940-PRELOAD-8-NEXT: s_lshr_b32 s0, s4, 8
+; GFX940-PRELOAD-8-NEXT: v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX940-PRELOAD-8-NEXT: v_lshlrev_b16_e64 v0, 8, s0
+; GFX940-PRELOAD-8-NEXT: s_lshr_b32 s0, s4, 24
+; GFX940-PRELOAD-8-NEXT: v_lshlrev_b16_e64 v2, 8, s0
+; GFX940-PRELOAD-8-NEXT: s_lshr_b32 s0, s4, 16
+; GFX940-PRELOAD-8-NEXT: v_or_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX940-PRELOAD-8-NEXT: v_or_b32_sdwa v2, s0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v2, 0
+; GFX940-PRELOAD-8-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1
+; GFX940-PRELOAD-8-NEXT: s_endpgm
+;
+; GFX90a-NO-PRELOAD-LABEL: v8i8_preload_arg:
+; GFX90a-NO-PRELOAD: ; %bb.0:
+; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, 0
+; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-NO-PRELOAD-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
+; GFX90a-NO-PRELOAD-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX90a-NO-PRELOAD-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-1-LABEL: v8i8_preload_arg:
+; GFX90a-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v2, 0
+; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-PRELOAD-1-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
+; GFX90a-PRELOAD-1-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7]
+; GFX90a-PRELOAD-1-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-2-LABEL: v8i8_preload_arg:
+; GFX90a-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-2-NEXT: s_lshr_b32 s0, s9, 8
+; GFX90a-PRELOAD-2-NEXT: v_lshlrev_b16_e64 v0, 8, s0
+; GFX90a-PRELOAD-2-NEXT: s_lshr_b32 s0, s9, 24
+; GFX90a-PRELOAD-2-NEXT: v_lshlrev_b16_e64 v1, 8, s0
+; GFX90a-PRELOAD-2-NEXT: s_lshr_b32 s0, s9, 16
+; GFX90a-PRELOAD-2-NEXT: v_or_b32_sdwa v0, s9, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX90a-PRELOAD-2-NEXT: v_or_b32_sdwa v1, s0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX90a-PRELOAD-2-NEXT: s_lshr_b32 s0, s8, 8
+; GFX90a-PRELOAD-2-NEXT: v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX90a-PRELOAD-2-NEXT: v_lshlrev_b16_e64 v0, 8, s0
+; GFX90a-PRELOAD-2-NEXT: s_lshr_b32 s0, s8, 24
+; GFX90a-PRELOAD-2-NEXT: v_lshlrev_b16_e64 v2, 8, s0
+; GFX90a-PRELOAD-2-NEXT: s_lshr_b32 s0, s8, 16
+; GFX90a-PRELOAD-2-NEXT: v_or_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX90a-PRELOAD-2-NEXT: v_or_b32_sdwa v2, s0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX90a-PRELOAD-2-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v2, 0
+; GFX90a-PRELOAD-2-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7]
+; GFX90a-PRELOAD-2-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-4-LABEL: v8i8_preload_arg:
+; GFX90a-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-4-NEXT: s_lshr_b32 s0, s9, 8
+; GFX90a-PRELOAD-4-NEXT: v_lshlrev_b16_e64 v0, 8, s0
+; GFX90a-PRELOAD-4-NEXT: s_lshr_b32 s0, s9, 24
+; GFX90a-PRELOAD-4-NEXT: v_lshlrev_b16_e64 v1, 8, s0
+; GFX90a-PRELOAD-4-NEXT: s_lshr_b32 s0, s9, 16
+; GFX90a-PRELOAD-4-NEXT: v_or_b32_sdwa v0, s9, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX90a-PRELOAD-4-NEXT: v_or_b32_sdwa v1, s0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX90a-PRELOAD-4-NEXT: s_lshr_b32 s0, s8, 8
+; GFX90a-PRELOAD-4-NEXT: v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX90a-PRELOAD-4-NEXT: v_lshlrev_b16_e64 v0, 8, s0
+; GFX90a-PRELOAD-4-NEXT: s_lshr_b32 s0, s8, 24
+; GFX90a-PRELOAD-4-NEXT: v_lshlrev_b16_e64 v2, 8, s0
+; GFX90a-PRELOAD-4-NEXT: s_lshr_b32 s0, s8, 16
+; GFX90a-PRELOAD-4-NEXT: v_or_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX90a-PRELOAD-4-NEXT: v_or_b32_sdwa v2, s0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX90a-PRELOAD-4-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v2, 0
+; GFX90a-PRELOAD-4-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7]
+; GFX90a-PRELOAD-4-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-8-LABEL: v8i8_preload_arg:
+; GFX90a-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-8-NEXT: s_lshr_b32 s0, s9, 8
+; GFX90a-PRELOAD-8-NEXT: v_lshlrev_b16_e64 v0, 8, s0
+; GFX90a-PRELOAD-8-NEXT: s_lshr_b32 s0, s9, 24
+; GFX90a-PRELOAD-8-NEXT: v_lshlrev_b16_e64 v1, 8, s0
+; GFX90a-PRELOAD-8-NEXT: s_lshr_b32 s0, s9, 16
+; GFX90a-PRELOAD-8-NEXT: v_or_b32_sdwa v0, s9, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX90a-PRELOAD-8-NEXT: v_or_b32_sdwa v1, s0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX90a-PRELOAD-8-NEXT: s_lshr_b32 s0, s8, 8
+; GFX90a-PRELOAD-8-NEXT: v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX90a-PRELOAD-8-NEXT: v_lshlrev_b16_e64 v0, 8, s0
+; GFX90a-PRELOAD-8-NEXT: s_lshr_b32 s0, s8, 24
+; GFX90a-PRELOAD-8-NEXT: v_lshlrev_b16_e64 v2, 8, s0
+; GFX90a-PRELOAD-8-NEXT: s_lshr_b32 s0, s8, 16
+; GFX90a-PRELOAD-8-NEXT: v_or_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX90a-PRELOAD-8-NEXT: v_or_b32_sdwa v2, s0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX90a-PRELOAD-8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v2, 0
+; GFX90a-PRELOAD-8-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7]
+; GFX90a-PRELOAD-8-NEXT: s_endpgm
store <8 x i8> %in, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @i64_kernel_preload_arg(ptr addrspace(1) %out, i64 %a) {
-; NO-PRELOAD-LABEL: i64_kernel_preload_arg:
-; NO-PRELOAD: ; %bb.0:
-; NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v2, 0
-; NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s2
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s3
-; NO-PRELOAD-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
-; NO-PRELOAD-NEXT: s_endpgm
-;
-; PRELOAD-1-LABEL: i64_kernel_preload_arg:
-; PRELOAD-1: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: ; %bb.0:
-; PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8
-; PRELOAD-1-NEXT: v_mov_b32_e32 v2, 0
-; PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
-; PRELOAD-1-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
-; PRELOAD-1-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1
-; PRELOAD-1-NEXT: s_endpgm
-;
-; PRELOAD-2-LABEL: i64_kernel_preload_arg:
-; PRELOAD-2: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: ; %bb.0:
-; PRELOAD-2-NEXT: v_mov_b32_e32 v2, 0
-; PRELOAD-2-NEXT: v_mov_b64_e32 v[0:1], s[4:5]
-; PRELOAD-2-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1
-; PRELOAD-2-NEXT: s_endpgm
-;
-; PRELOAD-4-LABEL: i64_kernel_preload_arg:
-; PRELOAD-4: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: ; %bb.0:
-; PRELOAD-4-NEXT: v_mov_b32_e32 v2, 0
-; PRELOAD-4-NEXT: v_mov_b64_e32 v[0:1], s[4:5]
-; PRELOAD-4-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1
-; PRELOAD-4-NEXT: s_endpgm
-;
-; PRELOAD-8-LABEL: i64_kernel_preload_arg:
-; PRELOAD-8: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: ; %bb.0:
-; PRELOAD-8-NEXT: v_mov_b32_e32 v2, 0
-; PRELOAD-8-NEXT: v_mov_b64_e32 v[0:1], s[4:5]
-; PRELOAD-8-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1
-; PRELOAD-8-NEXT: s_endpgm
+; GFX940-NO-PRELOAD-LABEL: i64_kernel_preload_arg:
+; GFX940-NO-PRELOAD: ; %bb.0:
+; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, 0
+; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s2
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s3
+; GFX940-NO-PRELOAD-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
+; GFX940-NO-PRELOAD-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-1-LABEL: i64_kernel_preload_arg:
+; GFX940-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: ; %bb.0:
+; GFX940-PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v2, 0
+; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-PRELOAD-1-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GFX940-PRELOAD-1-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1
+; GFX940-PRELOAD-1-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-2-LABEL: i64_kernel_preload_arg:
+; GFX940-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: ; %bb.0:
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v2, 0
+; GFX940-PRELOAD-2-NEXT: v_mov_b64_e32 v[0:1], s[4:5]
+; GFX940-PRELOAD-2-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1
+; GFX940-PRELOAD-2-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-4-LABEL: i64_kernel_preload_arg:
+; GFX940-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: ; %bb.0:
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v2, 0
+; GFX940-PRELOAD-4-NEXT: v_mov_b64_e32 v[0:1], s[4:5]
+; GFX940-PRELOAD-4-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1
+; GFX940-PRELOAD-4-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-8-LABEL: i64_kernel_preload_arg:
+; GFX940-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: ; %bb.0:
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v2, 0
+; GFX940-PRELOAD-8-NEXT: v_mov_b64_e32 v[0:1], s[4:5]
+; GFX940-PRELOAD-8-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1
+; GFX940-PRELOAD-8-NEXT: s_endpgm
+;
+; GFX90a-NO-PRELOAD-LABEL: i64_kernel_preload_arg:
+; GFX90a-NO-PRELOAD: ; %bb.0:
+; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, 0
+; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s2
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s3
+; GFX90a-NO-PRELOAD-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX90a-NO-PRELOAD-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-1-LABEL: i64_kernel_preload_arg:
+; GFX90a-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v2, 0
+; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-PRELOAD-1-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
+; GFX90a-PRELOAD-1-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7]
+; GFX90a-PRELOAD-1-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-2-LABEL: i64_kernel_preload_arg:
+; GFX90a-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v2, 0
+; GFX90a-PRELOAD-2-NEXT: v_pk_mov_b32 v[0:1], s[8:9], s[8:9] op_sel:[0,1]
+; GFX90a-PRELOAD-2-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7]
+; GFX90a-PRELOAD-2-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-4-LABEL: i64_kernel_preload_arg:
+; GFX90a-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v2, 0
+; GFX90a-PRELOAD-4-NEXT: v_pk_mov_b32 v[0:1], s[8:9], s[8:9] op_sel:[0,1]
+; GFX90a-PRELOAD-4-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7]
+; GFX90a-PRELOAD-4-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-8-LABEL: i64_kernel_preload_arg:
+; GFX90a-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v2, 0
+; GFX90a-PRELOAD-8-NEXT: v_pk_mov_b32 v[0:1], s[8:9], s[8:9] op_sel:[0,1]
+; GFX90a-PRELOAD-8-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7]
+; GFX90a-PRELOAD-8-NEXT: s_endpgm
store i64 %a, ptr addrspace(1) %out, align 8
ret void
}
define amdgpu_kernel void @f64_kernel_preload_arg(ptr addrspace(1) %out, double %in) {
-; NO-PRELOAD-LABEL: f64_kernel_preload_arg:
-; NO-PRELOAD: ; %bb.0:
-; NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v2, 0
-; NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s2
-; NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s3
-; NO-PRELOAD-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
-; NO-PRELOAD-NEXT: s_endpgm
-;
-; PRELOAD-1-LABEL: f64_kernel_preload_arg:
-; PRELOAD-1: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: s_nop 0
-; PRELOAD-1-NEXT: ; %bb.0:
-; PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8
-; PRELOAD-1-NEXT: v_mov_b32_e32 v2, 0
-; PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
-; PRELOAD-1-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
-; PRELOAD-1-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1
-; PRELOAD-1-NEXT: s_endpgm
-;
-; PRELOAD-2-LABEL: f64_kernel_preload_arg:
-; PRELOAD-2: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: s_nop 0
-; PRELOAD-2-NEXT: ; %bb.0:
-; PRELOAD-2-NEXT: v_mov_b32_e32 v2, 0
-; PRELOAD-2-NEXT: v_mov_b64_e32 v[0:1], s[4:5]
-; PRELOAD-2-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1
-; PRELOAD-2-NEXT: s_endpgm
-;
-; PRELOAD-4-LABEL: f64_kernel_preload_arg:
-; PRELOAD-4: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: s_nop 0
-; PRELOAD-4-NEXT: ; %bb.0:
-; PRELOAD-4-NEXT: v_mov_b32_e32 v2, 0
-; PRELOAD-4-NEXT: v_mov_b64_e32 v[0:1], s[4:5]
-; PRELOAD-4-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1
-; PRELOAD-4-NEXT: s_endpgm
-;
-; PRELOAD-8-LABEL: f64_kernel_preload_arg:
-; PRELOAD-8: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: s_nop 0
-; PRELOAD-8-NEXT: ; %bb.0:
-; PRELOAD-8-NEXT: v_mov_b32_e32 v2, 0
-; PRELOAD-8-NEXT: v_mov_b64_e32 v[0:1], s[4:5]
-; PRELOAD-8-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1
-; PRELOAD-8-NEXT: s_endpgm
+; GFX940-NO-PRELOAD-LABEL: f64_kernel_preload_arg:
+; GFX940-NO-PRELOAD: ; %bb.0:
+; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, 0
+; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s2
+; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s3
+; GFX940-NO-PRELOAD-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
+; GFX940-NO-PRELOAD-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-1-LABEL: f64_kernel_preload_arg:
+; GFX940-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: s_nop 0
+; GFX940-PRELOAD-1-NEXT: ; %bb.0:
+; GFX940-PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8
+; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v2, 0
+; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-PRELOAD-1-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GFX940-PRELOAD-1-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1
+; GFX940-PRELOAD-1-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-2-LABEL: f64_kernel_preload_arg:
+; GFX940-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: s_nop 0
+; GFX940-PRELOAD-2-NEXT: ; %bb.0:
+; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v2, 0
+; GFX940-PRELOAD-2-NEXT: v_mov_b64_e32 v[0:1], s[4:5]
+; GFX940-PRELOAD-2-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1
+; GFX940-PRELOAD-2-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-4-LABEL: f64_kernel_preload_arg:
+; GFX940-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: s_nop 0
+; GFX940-PRELOAD-4-NEXT: ; %bb.0:
+; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v2, 0
+; GFX940-PRELOAD-4-NEXT: v_mov_b64_e32 v[0:1], s[4:5]
+; GFX940-PRELOAD-4-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1
+; GFX940-PRELOAD-4-NEXT: s_endpgm
+;
+; GFX940-PRELOAD-8-LABEL: f64_kernel_preload_arg:
+; GFX940-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: s_nop 0
+; GFX940-PRELOAD-8-NEXT: ; %bb.0:
+; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v2, 0
+; GFX940-PRELOAD-8-NEXT: v_mov_b64_e32 v[0:1], s[4:5]
+; GFX940-PRELOAD-8-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1
+; GFX940-PRELOAD-8-NEXT: s_endpgm
+;
+; GFX90a-NO-PRELOAD-LABEL: f64_kernel_preload_arg:
+; GFX90a-NO-PRELOAD: ; %bb.0:
+; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, 0
+; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s2
+; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s3
+; GFX90a-NO-PRELOAD-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX90a-NO-PRELOAD-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-1-LABEL: f64_kernel_preload_arg:
+; GFX90a-PRELOAD-1: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: s_nop 0
+; GFX90a-PRELOAD-1-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8
+; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v2, 0
+; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90a-PRELOAD-1-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
+; GFX90a-PRELOAD-1-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7]
+; GFX90a-PRELOAD-1-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-2-LABEL: f64_kernel_preload_arg:
+; GFX90a-PRELOAD-2: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: s_nop 0
+; GFX90a-PRELOAD-2-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v2, 0
+; GFX90a-PRELOAD-2-NEXT: v_pk_mov_b32 v[0:1], s[8:9], s[8:9] op_sel:[0,1]
+; GFX90a-PRELOAD-2-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7]
+; GFX90a-PRELOAD-2-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-4-LABEL: f64_kernel_preload_arg:
+; GFX90a-PRELOAD-4: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: s_nop 0
+; GFX90a-PRELOAD-4-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v2, 0
+; GFX90a-PRELOAD-4-NEXT: v_pk_mov_b32 v[0:1], s[8:9], s[8:9] op_sel:[0,1]
+; GFX90a-PRELOAD-4-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7]
+; GFX90a-PRELOAD-4-NEXT: s_endpgm
+;
+; GFX90a-PRELOAD-8-LABEL: f64_kernel_preload_arg:
+; GFX90a-PRELOAD-8: s_trap 2 ; Trap with incompatible firmware that doesn't support preloading kernel arguments.
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: s_nop 0
+; GFX90a-PRELOAD-8-NEXT: ; %bb.0:
+; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v2, 0
+; GFX90a-PRELOAD-8-NEXT: v_pk_mov_b32 v[0:1], s[8:9], s[8:9] op_sel:[0,1]
+; GFX90a-PRELOAD-8-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7]
+; GFX90a-PRELOAD-8-NEXT: s_endpgm
store double %in, ptr addrspace(1) %out
ret void
}
More information about the llvm-commits
mailing list