[llvm] 864a2b2 - [AMDGPU] Reserve extra SGPR blocks wth XNACK "any" TID Setting
Austin Kerbow via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 17 21:11:56 PDT 2023
Author: Austin Kerbow
Date: 2023-03-17T20:26:23-07:00
New Revision: 864a2b25beac507cc76b50030757283aae434c0c
URL: https://github.com/llvm/llvm-project/commit/864a2b25beac507cc76b50030757283aae434c0c
DIFF: https://github.com/llvm/llvm-project/commit/864a2b25beac507cc76b50030757283aae434c0c.diff
LOG: [AMDGPU] Reserve extra SGPR blocks wth XNACK "any" TID Setting
ASMPrinter was relying on feature bits to setup extra SGRPs in the knerel
descriptor for the xnack_mask. This was broken for the dynamic XNACK "any" TID
setting which could cause user SGPRs to be clobbered if the number of SGPRs
reserved was near a granulated block boundary.
When XNACK was enabled this worked correctly in the ASMParser which meant some
kernels were only failing without "-save-temps".
Fixes: SWDEV-382764
Reviewed By: kzhuravl
Differential Revision: https://reviews.llvm.org/D145401
Added:
llvm/test/CodeGen/AMDGPU/tid-kd-xnack-any.ll
llvm/test/CodeGen/AMDGPU/tid-kd-xnack-off.ll
llvm/test/CodeGen/AMDGPU/tid-kd-xnack-on.ll
Modified:
clang/test/Frontend/amdgcn-machine-analysis-remarks.cl
llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
llvm/test/CodeGen/AMDGPU/amdpal-callable.ll
llvm/test/CodeGen/AMDGPU/occupancy-levels.ll
llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll
llvm/test/CodeGen/AMDGPU/trap-abis.ll
Removed:
################################################################################
diff --git a/clang/test/Frontend/amdgcn-machine-analysis-remarks.cl b/clang/test/Frontend/amdgcn-machine-analysis-remarks.cl
index cf0c15b6319f1..9403d12afa05a 100644
--- a/clang/test/Frontend/amdgcn-machine-analysis-remarks.cl
+++ b/clang/test/Frontend/amdgcn-machine-analysis-remarks.cl
@@ -2,7 +2,7 @@
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx908 -Rpass-analysis=kernel-resource-usage -S -O0 -verify %s -o /dev/null
// expected-remark at +9 {{Function Name: foo}}
-// expected-remark at +8 {{ SGPRs: 9}}
+// expected-remark at +8 {{ SGPRs: 13}}
// expected-remark at +7 {{ VGPRs: 10}}
// expected-remark at +6 {{ AGPRs: 12}}
// expected-remark at +5 {{ ScratchSize [bytes/lane]: 0}}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 0883e7a5ed3a3..82c57dfcef0d8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -251,9 +251,9 @@ void AMDGPUAsmPrinter::emitFunctionBodyEnd() {
STM, KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo),
CurrentProgramInfo.NumVGPRsForWavesPerEU,
CurrentProgramInfo.NumSGPRsForWavesPerEU -
- IsaInfo::getNumExtraSGPRs(&STM,
- CurrentProgramInfo.VCCUsed,
- CurrentProgramInfo.FlatUsed),
+ IsaInfo::getNumExtraSGPRs(
+ &STM, CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed,
+ getTargetStreamer()->getTargetID()->isXnackOnOrAny()),
CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed,
CodeObjectVersion);
@@ -721,7 +721,8 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
// duplicated in part in AMDGPUAsmParser::calculateGPRBlocks, and could be
// unified.
unsigned ExtraSGPRs = IsaInfo::getNumExtraSGPRs(
- &STM, ProgInfo.VCCUsed, ProgInfo.FlatUsed);
+ &STM, ProgInfo.VCCUsed, ProgInfo.FlatUsed,
+ getTargetStreamer()->getTargetID()->isXnackOnOrAny());
// Check the addressable register limit before we add ExtraSGPRs.
if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
index 8e558b539fa72..e639fce9d690e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
@@ -3061,7 +3061,7 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(ptr addrspace(1) %out, i32 %sel
; GPRIDX-NEXT: gds_segment_byte_size = 0
; GPRIDX-NEXT: kernarg_segment_byte_size = 12
; GPRIDX-NEXT: workgroup_fbarrier_count = 0
-; GPRIDX-NEXT: wavefront_sgpr_count = 9
+; GPRIDX-NEXT: wavefront_sgpr_count = 13
; GPRIDX-NEXT: workitem_vgpr_count = 3
; GPRIDX-NEXT: reserved_vgpr_first = 0
; GPRIDX-NEXT: reserved_vgpr_count = 0
@@ -3913,7 +3913,7 @@ define amdgpu_kernel void @dyn_extract_v4f32_s_s_s(ptr addrspace(1) %out, i32 %s
; GPRIDX-NEXT: kernel_code_entry_byte_offset = 256
; GPRIDX-NEXT: kernel_code_prefetch_byte_size = 0
; GPRIDX-NEXT: granulated_workitem_vgpr_count = 0
-; GPRIDX-NEXT: granulated_wavefront_sgpr_count = 0
+; GPRIDX-NEXT: granulated_wavefront_sgpr_count = 1
; GPRIDX-NEXT: priority = 0
; GPRIDX-NEXT: float_mode = 240
; GPRIDX-NEXT: priv = 0
@@ -3956,7 +3956,7 @@ define amdgpu_kernel void @dyn_extract_v4f32_s_s_s(ptr addrspace(1) %out, i32 %s
; GPRIDX-NEXT: gds_segment_byte_size = 0
; GPRIDX-NEXT: kernarg_segment_byte_size = 12
; GPRIDX-NEXT: workgroup_fbarrier_count = 0
-; GPRIDX-NEXT: wavefront_sgpr_count = 6
+; GPRIDX-NEXT: wavefront_sgpr_count = 10
; GPRIDX-NEXT: workitem_vgpr_count = 2
; GPRIDX-NEXT: reserved_vgpr_first = 0
; GPRIDX-NEXT: reserved_vgpr_count = 0
@@ -4259,7 +4259,7 @@ define amdgpu_kernel void @dyn_extract_v4f64_s_s_s(ptr addrspace(1) %out, i32 %s
; GPRIDX-NEXT: kernel_code_entry_byte_offset = 256
; GPRIDX-NEXT: kernel_code_prefetch_byte_size = 0
; GPRIDX-NEXT: granulated_workitem_vgpr_count = 0
-; GPRIDX-NEXT: granulated_wavefront_sgpr_count = 0
+; GPRIDX-NEXT: granulated_wavefront_sgpr_count = 1
; GPRIDX-NEXT: priority = 0
; GPRIDX-NEXT: float_mode = 240
; GPRIDX-NEXT: priv = 0
@@ -4302,7 +4302,7 @@ define amdgpu_kernel void @dyn_extract_v4f64_s_s_s(ptr addrspace(1) %out, i32 %s
; GPRIDX-NEXT: gds_segment_byte_size = 0
; GPRIDX-NEXT: kernarg_segment_byte_size = 12
; GPRIDX-NEXT: workgroup_fbarrier_count = 0
-; GPRIDX-NEXT: wavefront_sgpr_count = 7
+; GPRIDX-NEXT: wavefront_sgpr_count = 11
; GPRIDX-NEXT: workitem_vgpr_count = 3
; GPRIDX-NEXT: reserved_vgpr_first = 0
; GPRIDX-NEXT: reserved_vgpr_count = 0
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll b/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll
index 14db2ab9c419c..824adbecfc3ae 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll
@@ -1,6 +1,6 @@
-; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG,GFX8 -enable-var-scope %s
-; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG,GFX9 -enable-var-scope %s
-; RUN: llc -global-isel -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GISEL,GFX9 -enable-var-scope %s
+; RUN: llc -mtriple=amdgcn--amdpal -mattr=-xnack -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG,GFX8 -enable-var-scope %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -mattr=-xnack -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG,GFX9 -enable-var-scope %s
+; RUN: llc -global-isel -mtriple=amdgcn--amdpal -mattr=-xnack -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GISEL,GFX9 -enable-var-scope %s
declare amdgpu_gfx float @extern_func(float) #0
declare amdgpu_gfx float @extern_func_many_args(<64 x float>) #0
diff --git a/llvm/test/CodeGen/AMDGPU/occupancy-levels.ll b/llvm/test/CodeGen/AMDGPU/occupancy-levels.ll
index 3527329d1ee3b..5f3509c2517f7 100644
--- a/llvm/test/CodeGen/AMDGPU/occupancy-levels.ll
+++ b/llvm/test/CodeGen/AMDGPU/occupancy-levels.ll
@@ -1,6 +1,6 @@
-; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck --check-prefixes=GCN,GFX9 %s
-; RUN: llc -march=amdgcn -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GCN,GFX10,GFX10W32,GFX1010,GFX1010W32 %s
-; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 < %s | FileCheck --check-prefixes=GCN,GFX10,GFX10W64,GFX1010,GFX1010W64 %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-xnack < %s | FileCheck --check-prefixes=GCN,GFX9 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-xnack < %s | FileCheck --check-prefixes=GCN,GFX10,GFX10W32,GFX1010,GFX1010W32 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-xnack -mattr=+wavefrontsize64 < %s | FileCheck --check-prefixes=GCN,GFX10,GFX10W64,GFX1010,GFX1010W64 %s
; RUN: llc -march=amdgcn -mcpu=gfx1030 < %s | FileCheck --check-prefixes=GCN,GFX10,GFX10W32,GFX1030,GFX1030W32 %s
; RUN: llc -march=amdgcn -mcpu=gfx1030 -mattr=+wavefrontsize64 < %s | FileCheck --check-prefixes=GCN,GFX10,GFX10W64,GFX1030,GFX1030W64 %s
; RUN: llc -march=amdgcn -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GCN,GFX1100,GFX1100W32 %s
diff --git a/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll b/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll
index 50a1d48b71304..2616b04332419 100644
--- a/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll
+++ b/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll
@@ -2,7 +2,7 @@
; RUN: FileCheck -check-prefix=REMARK %s < %t
; STDERR: remark: foo.cl:27:0: Function Name: test_kernel
-; STDERR-NEXT: remark: foo.cl:27:0: SGPRs: 24
+; STDERR-NEXT: remark: foo.cl:27:0: SGPRs: 28
; STDERR-NEXT: remark: foo.cl:27:0: VGPRs: 9
; STDERR-NEXT: remark: foo.cl:27:0: AGPRs: 43
; STDERR-NEXT: remark: foo.cl:27:0: ScratchSize [bytes/lane]: 0
@@ -27,7 +27,7 @@
; REMARK-NEXT: Function: test_kernel
; REMARK-NEXT: Args:
; REMARK-NEXT: - String: ' SGPRs: '
-; REMARK-NEXT: - NumSGPR: '24'
+; REMARK-NEXT: - NumSGPR: '28'
; REMARK-NEXT: ...
; REMARK-NEXT: --- !Analysis
; REMARK-NEXT: Pass: kernel-resource-usage
@@ -120,7 +120,7 @@ define void @test_func() !dbg !6 {
}
; STDERR: remark: foo.cl:8:0: Function Name: empty_kernel
-; STDERR-NEXT: remark: foo.cl:8:0: SGPRs: 0
+; STDERR-NEXT: remark: foo.cl:8:0: SGPRs: 4
; STDERR-NEXT: remark: foo.cl:8:0: VGPRs: 0
; STDERR-NEXT: remark: foo.cl:8:0: AGPRs: 0
; STDERR-NEXT: remark: foo.cl:8:0: ScratchSize [bytes/lane]: 0
diff --git a/llvm/test/CodeGen/AMDGPU/tid-kd-xnack-any.ll b/llvm/test/CodeGen/AMDGPU/tid-kd-xnack-any.ll
new file mode 100644
index 0000000000000..2d2d64910c4fb
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/tid-kd-xnack-any.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck --check-prefixes=ASM %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a --filetype=obj < %s | llvm-objdump -s -j .rodata - | FileCheck --check-prefixes=OBJ %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a --filetype=obj < %s | llvm-readelf --notes - | FileCheck --check-prefixes=ELF %s
+
+; TODO: Update to check for granulated sgpr count directive once one is added.
+
+define amdgpu_kernel void @kern() {
+; ASM-LABEL: kern:
+; ASM: .amdhsa_next_free_sgpr 5
+; ASM: .amdhsa_reserve_xnack_mask 1
+
+; Verify that an extra SGPR block is reserved with XNACK "any" tid setting.
+; OBJ: Contents of section .rodata:
+; OBJ-NEXT: 0000 00000000 00000000 00000000 00000000 ................
+; OBJ-NEXT: 0010 00000000 00000000 00000000 00000000 ................
+; OBJ-NEXT: 0020 00000000 00000000 00000000 00000000 ................
+; OBJ-NEXT: 0030 4000af00 88000000 01000000 00000000 @...............
+
+; ELF: AMDGPU Metadata
+; ELF: .sgpr_count: 9
+entry:
+ tail call void asm sideeffect "", "~{s[0:4]}"()
+ ret void
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/tid-kd-xnack-off.ll b/llvm/test/CodeGen/AMDGPU/tid-kd-xnack-off.ll
new file mode 100644
index 0000000000000..e676f4f8de74d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/tid-kd-xnack-off.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=-xnack < %s | FileCheck --check-prefixes=ASM %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=-xnack --filetype=obj < %s | llvm-objdump -s -j .rodata - | FileCheck --check-prefixes=OBJ %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=-xnack --filetype=obj < %s | llvm-readelf --notes - | FileCheck --check-prefixes=ELF %s
+
+; TODO: Update to check for granulated sgpr count directive once one is added.
+
+define amdgpu_kernel void @kern() {
+; ASM-LABEL: kern:
+; ASM: .amdhsa_next_free_sgpr 5
+; ASM: .amdhsa_reserve_xnack_mask 0
+
+; Verify that an extra SGPR block is not reserved with XNACK "off" tid setting.
+; OBJ: Contents of section .rodata:
+; OBJ-NEXT: 0000 00000000 00000000 00000000 00000000 ................
+; OBJ-NEXT: 0010 00000000 00000000 00000000 00000000 ................
+; OBJ-NEXT: 0020 00000000 00000000 00000000 00000000 ................
+; OBJ-NEXT: 0030 0000af00 88000000 01000000 00000000 ................
+
+; ELF: AMDGPU Metadata
+; ELF: .sgpr_count: 5
+entry:
+ tail call void asm sideeffect "", "~{s[0:4]}"()
+ ret void
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/tid-kd-xnack-on.ll b/llvm/test/CodeGen/AMDGPU/tid-kd-xnack-on.ll
new file mode 100644
index 0000000000000..705bedf450975
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/tid-kd-xnack-on.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+xnack < %s | FileCheck --check-prefixes=ASM %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+xnack --filetype=obj < %s | llvm-objdump -s -j .rodata - | FileCheck --check-prefixes=OBJ %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+xnack --filetype=obj < %s | llvm-readelf --notes - | FileCheck --check-prefixes=ELF %s
+
+; TODO: Update to check for granulated sgpr count directive once one is added.
+
+define amdgpu_kernel void @kern() {
+; ASM-LABEL: kern:
+; ASM: .amdhsa_next_free_sgpr 5
+; ASM: .amdhsa_reserve_xnack_mask 1
+
+; Verify that an extra SGPR block is reserved with XNACK "on" tid setting.
+; OBJ: Contents of section .rodata:
+; OBJ-NEXT: 0000 00000000 00000000 00000000 00000000 ................
+; OBJ-NEXT: 0010 00000000 00000000 00000000 00000000 ................
+; OBJ-NEXT: 0020 00000000 00000000 00000000 00000000 ................
+; OBJ-NEXT: 0030 4000af00 88000000 01000000 00000000 @...............
+
+; ELF: AMDGPU Metadata
+; ELF: .sgpr_count: 9
+entry:
+ tail call void asm sideeffect "", "~{s[0:4]}"()
+ ret void
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/trap-abis.ll b/llvm/test/CodeGen/AMDGPU/trap-abis.ll
index 8cdd8ad002c69..c9987ac7831a4 100644
--- a/llvm/test/CodeGen/AMDGPU/trap-abis.ll
+++ b/llvm/test/CodeGen/AMDGPU/trap-abis.ll
@@ -17,7 +17,75 @@ declare void @llvm.debugtrap() #1
define amdgpu_kernel void @trap(ptr addrspace(1) nocapture readonly %arg0) {
; NOHSA-TRAP-GFX900-V2-LABEL: trap:
-; NOHSA-TRAP-GFX900-V2: ; %bb.0:
+; NOHSA-TRAP-GFX900-V2: .amd_kernel_code_t
+; NOHSA-TRAP-GFX900-V2-NEXT: amd_code_version_major = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: amd_code_version_minor = 2
+; NOHSA-TRAP-GFX900-V2-NEXT: amd_machine_kind = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: amd_machine_version_major = 9
+; NOHSA-TRAP-GFX900-V2-NEXT: amd_machine_version_minor = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: amd_machine_version_stepping = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: kernel_code_entry_byte_offset = 256
+; NOHSA-TRAP-GFX900-V2-NEXT: kernel_code_prefetch_byte_size = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: granulated_workitem_vgpr_count = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: granulated_wavefront_sgpr_count = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: priority = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: float_mode = 240
+; NOHSA-TRAP-GFX900-V2-NEXT: priv = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_dx10_clamp = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: debug_mode = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_ieee_mode = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_wgp_mode = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_mem_ordered = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_fwd_progress = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: user_sgpr_count = 4
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_trap_handler = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_x = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_y = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_z = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_info = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_vgpr_workitem_id = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_exception_msb = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: granulated_lds_size = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_exception = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_buffer = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_ptr = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_queue_ptr = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_id = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_flat_scratch_init = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_size = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_wavefront_size32 = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_ordered_append_gds = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: private_element_size = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: is_ptr64 = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: is_dynamic_callstack = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: is_debug_enabled = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: is_xnack_enabled = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: workitem_private_segment_byte_size = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: workgroup_group_segment_byte_size = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: gds_segment_byte_size = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: kernarg_segment_byte_size = 44
+; NOHSA-TRAP-GFX900-V2-NEXT: workgroup_fbarrier_count = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 8
+; NOHSA-TRAP-GFX900-V2-NEXT: workitem_vgpr_count = 2
+; NOHSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_first = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_count = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: reserved_sgpr_first = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: reserved_sgpr_count = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: debug_private_segment_buffer_sgpr = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: kernarg_segment_alignment = 4
+; NOHSA-TRAP-GFX900-V2-NEXT: group_segment_alignment = 4
+; NOHSA-TRAP-GFX900-V2-NEXT: private_segment_alignment = 4
+; NOHSA-TRAP-GFX900-V2-NEXT: wavefront_size = 6
+; NOHSA-TRAP-GFX900-V2-NEXT: call_convention = -1
+; NOHSA-TRAP-GFX900-V2-NEXT: runtime_loader_kernel_symbol = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: .end_amd_kernel_code_t
+; NOHSA-TRAP-GFX900-V2-NEXT: ; %bb.0:
; NOHSA-TRAP-GFX900-V2-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; NOHSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v0, 0
; NOHSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v1, 1
@@ -161,7 +229,7 @@ define amdgpu_kernel void @trap(ptr addrspace(1) nocapture readonly %arg0) {
; HSA-TRAP-GFX900-V2-NEXT: kernel_code_entry_byte_offset = 256
; HSA-TRAP-GFX900-V2-NEXT: kernel_code_prefetch_byte_size = 0
; HSA-TRAP-GFX900-V2-NEXT: granulated_workitem_vgpr_count = 0
-; HSA-TRAP-GFX900-V2-NEXT: granulated_wavefront_sgpr_count = 0
+; HSA-TRAP-GFX900-V2-NEXT: granulated_wavefront_sgpr_count = 1
; HSA-TRAP-GFX900-V2-NEXT: priority = 0
; HSA-TRAP-GFX900-V2-NEXT: float_mode = 240
; HSA-TRAP-GFX900-V2-NEXT: priv = 0
@@ -204,7 +272,7 @@ define amdgpu_kernel void @trap(ptr addrspace(1) nocapture readonly %arg0) {
; HSA-TRAP-GFX900-V2-NEXT: gds_segment_byte_size = 0
; HSA-TRAP-GFX900-V2-NEXT: kernarg_segment_byte_size = 8
; HSA-TRAP-GFX900-V2-NEXT: workgroup_fbarrier_count = 0
-; HSA-TRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 8
+; HSA-TRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 12
; HSA-TRAP-GFX900-V2-NEXT: workitem_vgpr_count = 2
; HSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_first = 0
; HSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_count = 0
@@ -261,7 +329,7 @@ define amdgpu_kernel void @trap(ptr addrspace(1) nocapture readonly %arg0) {
; HSA-NOTRAP-GFX900-V2-NEXT: kernel_code_entry_byte_offset = 256
; HSA-NOTRAP-GFX900-V2-NEXT: kernel_code_prefetch_byte_size = 0
; HSA-NOTRAP-GFX900-V2-NEXT: granulated_workitem_vgpr_count = 0
-; HSA-NOTRAP-GFX900-V2-NEXT: granulated_wavefront_sgpr_count = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: granulated_wavefront_sgpr_count = 1
; HSA-NOTRAP-GFX900-V2-NEXT: priority = 0
; HSA-NOTRAP-GFX900-V2-NEXT: float_mode = 240
; HSA-NOTRAP-GFX900-V2-NEXT: priv = 0
@@ -304,7 +372,7 @@ define amdgpu_kernel void @trap(ptr addrspace(1) nocapture readonly %arg0) {
; HSA-NOTRAP-GFX900-V2-NEXT: gds_segment_byte_size = 0
; HSA-NOTRAP-GFX900-V2-NEXT: kernarg_segment_byte_size = 8
; HSA-NOTRAP-GFX900-V2-NEXT: workgroup_fbarrier_count = 0
-; HSA-NOTRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 8
+; HSA-NOTRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 12
; HSA-NOTRAP-GFX900-V2-NEXT: workitem_vgpr_count = 2
; HSA-NOTRAP-GFX900-V2-NEXT: reserved_vgpr_first = 0
; HSA-NOTRAP-GFX900-V2-NEXT: reserved_vgpr_count = 0
@@ -356,7 +424,75 @@ define amdgpu_kernel void @trap(ptr addrspace(1) nocapture readonly %arg0) {
define amdgpu_kernel void @non_entry_trap(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr {
; NOHSA-TRAP-GFX900-V2-LABEL: non_entry_trap:
-; NOHSA-TRAP-GFX900-V2: ; %bb.0: ; %entry
+; NOHSA-TRAP-GFX900-V2: .amd_kernel_code_t
+; NOHSA-TRAP-GFX900-V2-NEXT: amd_code_version_major = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: amd_code_version_minor = 2
+; NOHSA-TRAP-GFX900-V2-NEXT: amd_machine_kind = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: amd_machine_version_major = 9
+; NOHSA-TRAP-GFX900-V2-NEXT: amd_machine_version_minor = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: amd_machine_version_stepping = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: kernel_code_entry_byte_offset = 256
+; NOHSA-TRAP-GFX900-V2-NEXT: kernel_code_prefetch_byte_size = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: granulated_workitem_vgpr_count = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: granulated_wavefront_sgpr_count = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: priority = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: float_mode = 240
+; NOHSA-TRAP-GFX900-V2-NEXT: priv = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_dx10_clamp = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: debug_mode = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_ieee_mode = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_wgp_mode = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_mem_ordered = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_fwd_progress = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: user_sgpr_count = 4
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_trap_handler = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_x = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_y = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_z = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_info = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_vgpr_workitem_id = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_exception_msb = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: granulated_lds_size = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_exception = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_buffer = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_ptr = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_queue_ptr = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_id = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_flat_scratch_init = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_size = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_wavefront_size32 = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_ordered_append_gds = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: private_element_size = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: is_ptr64 = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: is_dynamic_callstack = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: is_debug_enabled = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: is_xnack_enabled = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: workitem_private_segment_byte_size = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: workgroup_group_segment_byte_size = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: gds_segment_byte_size = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: kernarg_segment_byte_size = 44
+; NOHSA-TRAP-GFX900-V2-NEXT: workgroup_fbarrier_count = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 8
+; NOHSA-TRAP-GFX900-V2-NEXT: workitem_vgpr_count = 2
+; NOHSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_first = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_count = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: reserved_sgpr_first = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: reserved_sgpr_count = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: debug_private_segment_buffer_sgpr = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: kernarg_segment_alignment = 4
+; NOHSA-TRAP-GFX900-V2-NEXT: group_segment_alignment = 4
+; NOHSA-TRAP-GFX900-V2-NEXT: private_segment_alignment = 4
+; NOHSA-TRAP-GFX900-V2-NEXT: wavefront_size = 6
+; NOHSA-TRAP-GFX900-V2-NEXT: call_convention = -1
+; NOHSA-TRAP-GFX900-V2-NEXT: runtime_loader_kernel_symbol = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: .end_amd_kernel_code_t
+; NOHSA-TRAP-GFX900-V2-NEXT: ; %bb.0: ; %entry
; NOHSA-TRAP-GFX900-V2-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; NOHSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v0, 0
; NOHSA-TRAP-GFX900-V2-NEXT: s_waitcnt lgkmcnt(0)
@@ -591,7 +727,7 @@ define amdgpu_kernel void @non_entry_trap(ptr addrspace(1) nocapture readonly %a
; HSA-TRAP-GFX900-V2-NEXT: gds_segment_byte_size = 0
; HSA-TRAP-GFX900-V2-NEXT: kernarg_segment_byte_size = 8
; HSA-TRAP-GFX900-V2-NEXT: workgroup_fbarrier_count = 0
-; HSA-TRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 10
+; HSA-TRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 12
; HSA-TRAP-GFX900-V2-NEXT: workitem_vgpr_count = 2
; HSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_first = 0
; HSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_count = 0
@@ -712,7 +848,7 @@ define amdgpu_kernel void @non_entry_trap(ptr addrspace(1) nocapture readonly %a
; HSA-NOTRAP-GFX900-V2-NEXT: gds_segment_byte_size = 0
; HSA-NOTRAP-GFX900-V2-NEXT: kernarg_segment_byte_size = 8
; HSA-NOTRAP-GFX900-V2-NEXT: workgroup_fbarrier_count = 0
-; HSA-NOTRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 10
+; HSA-NOTRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 12
; HSA-NOTRAP-GFX900-V2-NEXT: workitem_vgpr_count = 2
; HSA-NOTRAP-GFX900-V2-NEXT: reserved_vgpr_first = 0
; HSA-NOTRAP-GFX900-V2-NEXT: reserved_vgpr_count = 0
@@ -792,7 +928,75 @@ ret:
define amdgpu_kernel void @debugtrap(ptr addrspace(1) nocapture readonly %arg0) {
; NOHSA-TRAP-GFX900-V2-LABEL: debugtrap:
-; NOHSA-TRAP-GFX900-V2: ; %bb.0:
+; NOHSA-TRAP-GFX900-V2: .amd_kernel_code_t
+; NOHSA-TRAP-GFX900-V2-NEXT: amd_code_version_major = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: amd_code_version_minor = 2
+; NOHSA-TRAP-GFX900-V2-NEXT: amd_machine_kind = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: amd_machine_version_major = 9
+; NOHSA-TRAP-GFX900-V2-NEXT: amd_machine_version_minor = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: amd_machine_version_stepping = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: kernel_code_entry_byte_offset = 256
+; NOHSA-TRAP-GFX900-V2-NEXT: kernel_code_prefetch_byte_size = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: granulated_workitem_vgpr_count = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: granulated_wavefront_sgpr_count = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: priority = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: float_mode = 240
+; NOHSA-TRAP-GFX900-V2-NEXT: priv = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_dx10_clamp = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: debug_mode = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_ieee_mode = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_wgp_mode = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_mem_ordered = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_fwd_progress = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: user_sgpr_count = 2
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_trap_handler = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_x = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_y = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_z = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_info = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_vgpr_workitem_id = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_exception_msb = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: granulated_lds_size = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_exception = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_buffer = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_ptr = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_queue_ptr = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_id = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_flat_scratch_init = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_size = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_wavefront_size32 = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: enable_ordered_append_gds = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: private_element_size = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: is_ptr64 = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: is_dynamic_callstack = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: is_debug_enabled = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: is_xnack_enabled = 1
+; NOHSA-TRAP-GFX900-V2-NEXT: workitem_private_segment_byte_size = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: workgroup_group_segment_byte_size = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: gds_segment_byte_size = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: kernarg_segment_byte_size = 44
+; NOHSA-TRAP-GFX900-V2-NEXT: workgroup_fbarrier_count = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 6
+; NOHSA-TRAP-GFX900-V2-NEXT: workitem_vgpr_count = 3
+; NOHSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_first = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_count = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: reserved_sgpr_first = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: reserved_sgpr_count = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: debug_private_segment_buffer_sgpr = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: kernarg_segment_alignment = 4
+; NOHSA-TRAP-GFX900-V2-NEXT: group_segment_alignment = 4
+; NOHSA-TRAP-GFX900-V2-NEXT: private_segment_alignment = 4
+; NOHSA-TRAP-GFX900-V2-NEXT: wavefront_size = 6
+; NOHSA-TRAP-GFX900-V2-NEXT: call_convention = -1
+; NOHSA-TRAP-GFX900-V2-NEXT: runtime_loader_kernel_symbol = 0
+; NOHSA-TRAP-GFX900-V2-NEXT: .end_amd_kernel_code_t
+; NOHSA-TRAP-GFX900-V2-NEXT: ; %bb.0:
; NOHSA-TRAP-GFX900-V2-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; NOHSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v0, 0
; NOHSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v1, 1
@@ -954,7 +1158,7 @@ define amdgpu_kernel void @debugtrap(ptr addrspace(1) nocapture readonly %arg0)
; HSA-TRAP-GFX900-V2-NEXT: kernel_code_entry_byte_offset = 256
; HSA-TRAP-GFX900-V2-NEXT: kernel_code_prefetch_byte_size = 0
; HSA-TRAP-GFX900-V2-NEXT: granulated_workitem_vgpr_count = 0
-; HSA-TRAP-GFX900-V2-NEXT: granulated_wavefront_sgpr_count = 0
+; HSA-TRAP-GFX900-V2-NEXT: granulated_wavefront_sgpr_count = 1
; HSA-TRAP-GFX900-V2-NEXT: priority = 0
; HSA-TRAP-GFX900-V2-NEXT: float_mode = 240
; HSA-TRAP-GFX900-V2-NEXT: priv = 0
@@ -997,7 +1201,7 @@ define amdgpu_kernel void @debugtrap(ptr addrspace(1) nocapture readonly %arg0)
; HSA-TRAP-GFX900-V2-NEXT: gds_segment_byte_size = 0
; HSA-TRAP-GFX900-V2-NEXT: kernarg_segment_byte_size = 8
; HSA-TRAP-GFX900-V2-NEXT: workgroup_fbarrier_count = 0
-; HSA-TRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 6
+; HSA-TRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 10
; HSA-TRAP-GFX900-V2-NEXT: workitem_vgpr_count = 3
; HSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_first = 0
; HSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_count = 0
@@ -1064,7 +1268,7 @@ define amdgpu_kernel void @debugtrap(ptr addrspace(1) nocapture readonly %arg0)
; HSA-NOTRAP-GFX900-V2-NEXT: kernel_code_entry_byte_offset = 256
; HSA-NOTRAP-GFX900-V2-NEXT: kernel_code_prefetch_byte_size = 0
; HSA-NOTRAP-GFX900-V2-NEXT: granulated_workitem_vgpr_count = 0
-; HSA-NOTRAP-GFX900-V2-NEXT: granulated_wavefront_sgpr_count = 0
+; HSA-NOTRAP-GFX900-V2-NEXT: granulated_wavefront_sgpr_count = 1
; HSA-NOTRAP-GFX900-V2-NEXT: priority = 0
; HSA-NOTRAP-GFX900-V2-NEXT: float_mode = 240
; HSA-NOTRAP-GFX900-V2-NEXT: priv = 0
@@ -1107,7 +1311,7 @@ define amdgpu_kernel void @debugtrap(ptr addrspace(1) nocapture readonly %arg0)
; HSA-NOTRAP-GFX900-V2-NEXT: gds_segment_byte_size = 0
; HSA-NOTRAP-GFX900-V2-NEXT: kernarg_segment_byte_size = 8
; HSA-NOTRAP-GFX900-V2-NEXT: workgroup_fbarrier_count = 0
-; HSA-NOTRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 6
+; HSA-NOTRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 10
; HSA-NOTRAP-GFX900-V2-NEXT: workitem_vgpr_count = 3
; HSA-NOTRAP-GFX900-V2-NEXT: reserved_vgpr_first = 0
; HSA-NOTRAP-GFX900-V2-NEXT: reserved_vgpr_count = 0
More information about the llvm-commits
mailing list