[llvm] [AMDGPU][GlobalISel] Add register bank legalization for G_FADD (PR #163407)
Abhinav Garg via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 30 03:13:24 PDT 2025
https://github.com/abhigargrepo updated https://github.com/llvm/llvm-project/pull/163407
>From 2529613139078311d312eeedb6af3cef4379d517 Mon Sep 17 00:00:00 2001
From: Abhinav Garg <abhigarg at amd.com>
Date: Tue, 14 Oct 2025 02:56:44 -0700
Subject: [PATCH 1/2] Add register bank legalization for G_FADD
---
.../AMDGPU/AMDGPURegBankLegalizeHelper.cpp | 3 +
.../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 11 +-
.../AMDGPU/AMDGPURegBankLegalizeRules.h | 4 +
llvm/test/CodeGen/AMDGPU/GlobalISel/fadd.ll | 246 ++++++++++++++++++
4 files changed, 263 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/fadd.ll
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
index 540756653dd22..198ee6b73b0b5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
@@ -837,6 +837,7 @@ LLT RegBankLegalizeHelper::getTyFromID(RegBankLLTMappingApplyID ID) {
return LLT::scalar(32);
case Sgpr64:
case Vgpr64:
+ case UniInVgprS64:
return LLT::scalar(64);
case Sgpr128:
case Vgpr128:
@@ -960,6 +961,7 @@ RegBankLegalizeHelper::getRegBankFromID(RegBankLLTMappingApplyID ID) {
case UniInVcc:
case UniInVgprS16:
case UniInVgprS32:
+ case UniInVgprS64:
case UniInVgprV2S16:
case UniInVgprV4S32:
case UniInVgprB32:
@@ -1092,6 +1094,7 @@ void RegBankLegalizeHelper::applyMappingDst(
break;
}
case UniInVgprS32:
+ case UniInVgprS64:
case UniInVgprV2S16:
case UniInVgprV4S32: {
assert(Ty == getTyFromID(MethodIDs[OpIdx]));
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index bfe2c80c810ef..9cf0c52717318 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -904,9 +904,18 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
bool hasSALUFloat = ST->hasSALUFloatInsts();
addRulesForGOpcs({G_FADD}, Standard)
+ .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}}, !hasSALUFloat)
+ .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}}, hasSALUFloat)
+ .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
.Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasSALUFloat)
.Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasSALUFloat)
- .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}});
+ .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
+ .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64}})
+ .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}})
+ .Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16}})
+ .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
+ .Any({{UniV2S32}, {{UniInVgprV2S32}, {VgprV2S32, VgprV2S32}}})
+ .Any({{DivV2S32}, {{VgprV2S32}, {VgprV2S32, VgprV2S32}}});
addRulesForGOpcs({G_FPTOUI})
.Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
index 93e0efda77fdd..1cf9ae2e226ca 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
@@ -92,8 +92,10 @@ enum UniformityLLTOpPredicateID {
V4S32,
UniV2S16,
+ UniV2S32,
DivV2S16,
+ DivV2S32,
// B types
B32,
@@ -178,7 +180,9 @@ enum RegBankLLTMappingApplyID {
UniInVcc,
UniInVgprS16,
UniInVgprS32,
+ UniInVgprS64,
UniInVgprV2S16,
+ UniInVgprV2S32,
UniInVgprV4S32,
UniInVgprB32,
UniInVgprB64,
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fadd.ll
new file mode 100644
index 0000000000000..ec221496f450c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fadd.ll
@@ -0,0 +1,246 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-SDAG,GFX12-SDAG-FAKE16 %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-SDAG,GFX12-SDAG-TRUE16 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-GISEL,GFX12-GISEL-FAKE16 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-GISEL,GFX12-GISEL-TRUE16 %s
+
+define amdgpu_ps half @fadd_s16_uniform(half inreg %a, half inreg %b) {
+; GFX11-SDAG-FAKE16-LABEL: fadd_s16_uniform:
+; GFX11-SDAG-FAKE16: ; %bb.0:
+; GFX11-SDAG-FAKE16-NEXT: v_add_f16_e64 v0, s0, s1
+; GFX11-SDAG-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX11-SDAG-TRUE16-LABEL: fadd_s16_uniform:
+; GFX11-SDAG-TRUE16: ; %bb.0:
+; GFX11-SDAG-TRUE16-NEXT: v_add_f16_e64 v0.l, s0, s1
+; GFX11-SDAG-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX11-GISEL-FAKE16-LABEL: fadd_s16_uniform:
+; GFX11-GISEL-FAKE16: ; %bb.0:
+; GFX11-GISEL-FAKE16-NEXT: v_add_f16_e64 v0, s0, s1
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-GISEL-FAKE16-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-GISEL-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX11-GISEL-TRUE16-LABEL: fadd_s16_uniform:
+; GFX11-GISEL-TRUE16: ; %bb.0:
+; GFX11-GISEL-TRUE16-NEXT: v_add_f16_e64 v0.l, s0, s1
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-GISEL-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX12-LABEL: fadd_s16_uniform:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_add_f16 s0, s0, s1
+; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; GFX12-NEXT: v_mov_b32_e32 v0, s0
+; GFX12-NEXT: ; return to shader part epilog
+ %fadd = fadd half %a, %b
+ ret half %fadd
+}
+
+define amdgpu_ps half @fadd_s16_div(half %a, half %b) {
+; GFX11-SDAG-FAKE16-LABEL: fadd_s16_div:
+; GFX11-SDAG-FAKE16: ; %bb.0:
+; GFX11-SDAG-FAKE16-NEXT: v_add_f16_e32 v0, v0, v1
+; GFX11-SDAG-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX11-SDAG-TRUE16-LABEL: fadd_s16_div:
+; GFX11-SDAG-TRUE16: ; %bb.0:
+; GFX11-SDAG-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX11-GISEL-FAKE16-LABEL: fadd_s16_div:
+; GFX11-GISEL-FAKE16: ; %bb.0:
+; GFX11-GISEL-FAKE16-NEXT: v_add_f16_e32 v0, v0, v1
+; GFX11-GISEL-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX11-GISEL-TRUE16-LABEL: fadd_s16_div:
+; GFX11-GISEL-TRUE16: ; %bb.0:
+; GFX11-GISEL-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v1.l
+; GFX11-GISEL-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX12-SDAG-FAKE16-LABEL: fadd_s16_div:
+; GFX12-SDAG-FAKE16: ; %bb.0:
+; GFX12-SDAG-FAKE16-NEXT: v_add_f16_e32 v0, v0, v1
+; GFX12-SDAG-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX12-SDAG-TRUE16-LABEL: fadd_s16_div:
+; GFX12-SDAG-TRUE16: ; %bb.0:
+; GFX12-SDAG-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v1.l
+; GFX12-SDAG-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX12-GISEL-FAKE16-LABEL: fadd_s16_div:
+; GFX12-GISEL-FAKE16: ; %bb.0:
+; GFX12-GISEL-FAKE16-NEXT: v_add_f16_e32 v0, v0, v1
+; GFX12-GISEL-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX12-GISEL-TRUE16-LABEL: fadd_s16_div:
+; GFX12-GISEL-TRUE16: ; %bb.0:
+; GFX12-GISEL-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v1.l
+; GFX12-GISEL-TRUE16-NEXT: ; return to shader part epilog
+ %fadd = fadd half %a, %b
+ ret half %fadd
+}
+
+define amdgpu_ps float @fadd_s32_uniform(float inreg %a, float inreg %b) {
+; GFX11-LABEL: fadd_s32_uniform:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_add_f32_e64 v0, s0, s1
+; GFX11-NEXT: ; return to shader part epilog
+;
+; GFX12-LABEL: fadd_s32_uniform:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_add_f32 s0, s0, s1
+; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; GFX12-NEXT: v_mov_b32_e32 v0, s0
+; GFX12-NEXT: ; return to shader part epilog
+ %fadd = fadd float %a, %b
+ ret float %fadd
+}
+
+define amdgpu_ps float @fadd_s32_div(float %a, float %b) {
+; GCN-LABEL: fadd_s32_div:
+; GCN: ; %bb.0:
+; GCN-NEXT: v_add_f32_e32 v0, v0, v1
+; GCN-NEXT: ; return to shader part epilog
+ %fadd = fadd float %a, %b
+ ret float %fadd
+}
+
+define amdgpu_ps double @fadd_s64_uniform(double inreg %a, double inreg %b) {
+; GFX11-LABEL: fadd_s64_uniform:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_add_f64 v[0:1], s[0:1], s[2:3]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-NEXT: v_readfirstlane_b32 s1, v1
+; GFX11-NEXT: ; return to shader part epilog
+;
+; GFX12-LABEL: fadd_s64_uniform:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: v_add_f64_e64 v[0:1], s[0:1], s[2:3]
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-NEXT: v_readfirstlane_b32 s0, v0
+; GFX12-NEXT: v_readfirstlane_b32 s1, v1
+; GFX12-NEXT: s_wait_alu 0xf1ff
+; GFX12-NEXT: ; return to shader part epilog
+ %fadd = fadd double %a, %b
+ ret double %fadd
+}
+
+define amdgpu_ps double @fadd_s64_div(double %a, double %b) {
+; GFX11-LABEL: fadd_s64_div:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-NEXT: v_readfirstlane_b32 s1, v1
+; GFX11-NEXT: ; return to shader part epilog
+;
+; GFX12-LABEL: fadd_s64_div:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: v_add_f64_e32 v[0:1], v[0:1], v[2:3]
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-NEXT: v_readfirstlane_b32 s0, v0
+; GFX12-NEXT: v_readfirstlane_b32 s1, v1
+; GFX12-NEXT: ; return to shader part epilog
+ %fadd = fadd double %a, %b
+ ret double %fadd
+}
+
+define <2 x half> @fadd_v2s16_uniform(<2 x half> inreg %a, <2 x half> inreg %b) {
+; GFX11-LABEL: fadd_v2s16_uniform:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_pk_add_f16 v0, s0, s1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: fadd_v2s16_uniform:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_pk_add_f16 v0, s0, s1
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %fadd = fadd <2 x half> %a, %b
+ ret <2 x half> %fadd
+}
+
+define <2 x half> @fadd_v2s16_div(<2 x half> %a, <2 x half> %b) {
+; GFX11-LABEL: fadd_v2s16_div:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_pk_add_f16 v0, v0, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: fadd_v2s16_div:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_pk_add_f16 v0, v0, v1
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %fadd = fadd <2 x half> %a, %b
+ ret <2 x half> %fadd
+}
+
+define <2 x float> @fadd_v2s32_uniform(<2 x float> inreg %a, <2 x float> inreg %b) {
+; GFX11-LABEL: fadd_v2s32_uniform:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_add_f32_e64 v0, s0, s2
+; GFX11-NEXT: v_add_f32_e64 v1, s1, s3
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: fadd_v2s32_uniform:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_add_f32 s0, s0, s2
+; GFX12-NEXT: s_add_f32 s1, s1, s3
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_2)
+; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %fadd = fadd <2 x float> %a, %b
+ ret <2 x float> %fadd
+}
+
+define <2 x float> @fadd_v2s32_div(<2 x float> %a, <2 x float> %b) {
+; GFX11-LABEL: fadd_v2s32_div:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_dual_add_f32 v0, v0, v2 :: v_dual_add_f32 v1, v1, v3
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: fadd_v2s32_div:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_dual_add_f32 v0, v0, v2 :: v_dual_add_f32 v1, v1, v3
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %fadd = fadd <2 x float> %a, %b
+ ret <2 x float> %fadd
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX11-GISEL: {{.*}}
+; GFX11-SDAG: {{.*}}
+; GFX12-GISEL: {{.*}}
+; GFX12-SDAG: {{.*}}
>From d398f3ae0ad166b5467aec7502782f51dbed21c1 Mon Sep 17 00:00:00 2001
From: Abhinav Garg <abhigarg at amd.com>
Date: Thu, 30 Oct 2025 10:02:54 +0000
Subject: [PATCH 2/2] Address review comments: Scalarize v2s16 for uniform
operation
---
.../CodeGen/GlobalISel/MachineIRBuilder.cpp | 73 +-
.../AMDGPU/AMDGPURegBankLegalizeHelper.cpp | 20 +
.../AMDGPU/AMDGPURegBankLegalizeHelper.h | 2 +
.../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 4 +-
.../AMDGPU/AMDGPURegBankLegalizeRules.h | 1 +
llvm/test/CodeGen/AMDGPU/GlobalISel/fadd.ll | 203 +-
llvm/test/CodeGen/AMDGPU/lds-size.ll | 85 +-
llvm/test/CodeGen/AMDGPU/trap.ll | 2543 ++++++++++++++++-
8 files changed, 2672 insertions(+), 259 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 27df7e369436a..807a320d91e72 100644
--- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -99,7 +99,7 @@ MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C,
"Expected inlined-at fields to agree");
auto MIB = buildInstrNoInsert(TargetOpcode::DBG_VALUE);
- auto *NumericConstant = [&] () -> const Constant* {
+ auto *NumericConstant = [&]() -> const Constant * {
if (const auto *CE = dyn_cast<ConstantExpr>(&C))
if (CE->getOpcode() == Instruction::IntToPtr)
return CE->getOperand(0);
@@ -203,7 +203,8 @@ MachineIRBuilder::buildPtrAdd(const DstOp &Res, const SrcOp &Op0,
const SrcOp &Op1, std::optional<unsigned> Flags) {
assert(Res.getLLTTy(*getMRI()).isPointerOrPointerVector() &&
Res.getLLTTy(*getMRI()) == Op0.getLLTTy(*getMRI()) && "type mismatch");
- assert(Op1.getLLTTy(*getMRI()).getScalarType().isScalar() && "invalid offset type");
+ assert(Op1.getLLTTy(*getMRI()).getScalarType().isScalar() &&
+ "invalid offset type");
return buildInstr(TargetOpcode::G_PTR_ADD, {Res}, {Op0, Op1}, Flags);
}
@@ -314,8 +315,7 @@ MachineInstrBuilder MachineIRBuilder::buildBrIndirect(Register Tgt) {
return buildInstr(TargetOpcode::G_BRINDIRECT).addUse(Tgt);
}
-MachineInstrBuilder MachineIRBuilder::buildBrJT(Register TablePtr,
- unsigned JTI,
+MachineInstrBuilder MachineIRBuilder::buildBrJT(Register TablePtr, unsigned JTI,
Register IndexReg) {
assert(getMRI()->getType(TablePtr).isPointer() &&
"Table reg must be a pointer");
@@ -343,8 +343,8 @@ MachineInstrBuilder MachineIRBuilder::buildConstant(const DstOp &Res,
if (Ty.isFixedVector()) {
auto Const = buildInstr(TargetOpcode::G_CONSTANT)
- .addDef(getMRI()->createGenericVirtualRegister(EltTy))
- .addCImm(&Val);
+ .addDef(getMRI()->createGenericVirtualRegister(EltTy))
+ .addCImm(&Val);
return buildSplatBuildVector(Res, Const);
}
@@ -369,8 +369,8 @@ MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res,
LLT Ty = Res.getLLTTy(*getMRI());
LLT EltTy = Ty.getScalarType();
- assert(APFloat::getSizeInBits(Val.getValueAPF().getSemantics())
- == EltTy.getSizeInBits() &&
+ assert(APFloat::getSizeInBits(Val.getValueAPF().getSemantics()) ==
+ EltTy.getSizeInBits() &&
"creating fconstant with the wrong size");
assert(!Ty.isPointer() && "invalid operand type");
@@ -380,8 +380,8 @@ MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res,
if (Ty.isFixedVector()) {
auto Const = buildInstr(TargetOpcode::G_FCONSTANT)
- .addDef(getMRI()->createGenericVirtualRegister(EltTy))
- .addFPImm(&Val);
+ .addDef(getMRI()->createGenericVirtualRegister(EltTy))
+ .addFPImm(&Val);
return buildSplatBuildVector(Res, Const);
}
@@ -403,8 +403,8 @@ MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res,
double Val) {
LLT DstTy = Res.getLLTTy(*getMRI());
auto &Ctx = getMF().getFunction().getContext();
- auto *CFP =
- ConstantFP::get(Ctx, getAPFloatFromSize(Val, DstTy.getScalarSizeInBits()));
+ auto *CFP = ConstantFP::get(
+ Ctx, getAPFloatFromSize(Val, DstTy.getScalarSizeInBits()));
return buildFConstant(Res, *CFP);
}
@@ -466,9 +466,10 @@ MachineInstrBuilder MachineIRBuilder::buildLoadInstr(unsigned Opcode,
return MIB;
}
-MachineInstrBuilder MachineIRBuilder::buildLoadFromOffset(
- const DstOp &Dst, const SrcOp &BasePtr,
- MachineMemOperand &BaseMMO, int64_t Offset) {
+MachineInstrBuilder
+MachineIRBuilder::buildLoadFromOffset(const DstOp &Dst, const SrcOp &BasePtr,
+ MachineMemOperand &BaseMMO,
+ int64_t Offset) {
LLT LoadTy = Dst.getLLTTy(*getMRI());
MachineMemOperand *OffsetMMO =
getMF().getMachineMemOperand(&BaseMMO, Offset, LoadTy);
@@ -539,9 +540,9 @@ unsigned MachineIRBuilder::getBoolExtOp(bool IsVec, bool IsFP) const {
}
MachineInstrBuilder MachineIRBuilder::buildBoolExt(const DstOp &Res,
- const SrcOp &Op,
- bool IsFP) {
- unsigned ExtOp = getBoolExtOp(getMRI()->getType(Op.getReg()).isVector(), IsFP);
+ const SrcOp &Op, bool IsFP) {
+ unsigned ExtOp =
+ getBoolExtOp(getMRI()->getType(Op.getReg()).isVector(), IsFP);
return buildInstr(ExtOp, Res, Op);
}
@@ -709,9 +710,9 @@ MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<LLT> Res,
return buildInstr(TargetOpcode::G_UNMERGE_VALUES, TmpVec, Op);
}
-MachineInstrBuilder MachineIRBuilder::buildUnmerge(LLT Res,
- const SrcOp &Op) {
- unsigned NumReg = Op.getLLTTy(*getMRI()).getSizeInBits() / Res.getSizeInBits();
+MachineInstrBuilder MachineIRBuilder::buildUnmerge(LLT Res, const SrcOp &Op) {
+ unsigned NumReg =
+ Op.getLLTTy(*getMRI()).getSizeInBits() / Res.getSizeInBits();
SmallVector<DstOp, 8> TmpVec(NumReg, Res);
return buildInstr(TargetOpcode::G_UNMERGE_VALUES, TmpVec, Op);
}
@@ -1053,10 +1054,11 @@ MachineIRBuilder::buildAtomicCmpXchg(const DstOp &OldValRes, const SrcOp &Addr,
return MIB;
}
-MachineInstrBuilder MachineIRBuilder::buildAtomicRMW(
- unsigned Opcode, const DstOp &OldValRes,
- const SrcOp &Addr, const SrcOp &Val,
- MachineMemOperand &MMO) {
+MachineInstrBuilder MachineIRBuilder::buildAtomicRMW(unsigned Opcode,
+ const DstOp &OldValRes,
+ const SrcOp &Addr,
+ const SrcOp &Val,
+ MachineMemOperand &MMO) {
#ifndef NDEBUG
LLT OldValResTy = OldValRes.getLLTTy(*getMRI());
@@ -1145,16 +1147,15 @@ MachineIRBuilder::buildAtomicRMWUmin(Register OldValRes, Register Addr,
}
MachineInstrBuilder
-MachineIRBuilder::buildAtomicRMWFAdd(
- const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &Val,
- MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWFAdd(const DstOp &OldValRes, const SrcOp &Addr,
+ const SrcOp &Val, MachineMemOperand &MMO) {
return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_FADD, OldValRes, Addr, Val,
MMO);
}
MachineInstrBuilder
-MachineIRBuilder::buildAtomicRMWFSub(const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &Val,
- MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWFSub(const DstOp &OldValRes, const SrcOp &Addr,
+ const SrcOp &Val, MachineMemOperand &MMO) {
return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_FSUB, OldValRes, Addr, Val,
MMO);
}
@@ -1189,11 +1190,9 @@ MachineIRBuilder::buildAtomicRMWFMinimum(const DstOp &OldValRes,
Val, MMO);
}
-MachineInstrBuilder
-MachineIRBuilder::buildFence(unsigned Ordering, unsigned Scope) {
- return buildInstr(TargetOpcode::G_FENCE)
- .addImm(Ordering)
- .addImm(Scope);
+MachineInstrBuilder MachineIRBuilder::buildFence(unsigned Ordering,
+ unsigned Scope) {
+ return buildInstr(TargetOpcode::G_FENCE).addImm(Ordering).addImm(Scope);
}
MachineInstrBuilder MachineIRBuilder::buildPrefetch(const SrcOp &Addr,
@@ -1276,6 +1275,7 @@ MachineIRBuilder::buildInstr(unsigned Opc, ArrayRef<DstOp> DstOps,
SrcOps[0].getLLTTy(*getMRI()));
break;
case TargetOpcode::G_ADD:
+ case TargetOpcode::G_FADD:
case TargetOpcode::G_AND:
case TargetOpcode::G_MUL:
case TargetOpcode::G_OR:
@@ -1333,7 +1333,8 @@ MachineIRBuilder::buildInstr(unsigned Opc, ArrayRef<DstOp> DstOps,
assert(DstOps.size() == 1 && "Invalid Dst");
assert(SrcOps.size() == 1 && "Invalid Srcs");
assert(DstOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
- SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() && "invalid bitcast");
+ SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() &&
+ "invalid bitcast");
break;
}
case TargetOpcode::COPY:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
index 198ee6b73b0b5..a8c041dad6963 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
@@ -616,6 +616,24 @@ void RegBankLegalizeHelper::lowerSplitTo32(MachineInstr &MI) {
MI.eraseFromParent();
}
+void RegBankLegalizeHelper::lowerSplitTo16(MachineInstr &MI) {
+ Register Dst = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(Dst);
+ assert(DstTy == V2S16);
+ auto [Val0_Lo_32, Val0_Hi_32] = unpackAExt(MI.getOperand(1).getReg());
+ auto [Val1_Lo_32, Val1_Hi_32] = unpackAExt(MI.getOperand(2).getReg());
+ unsigned Opc = MI.getOpcode();
+ auto Flags = MI.getFlags();
+ auto Val0_Lo = B.buildTrunc(SgprRB_S16, Val0_Lo_32);
+ auto Val0_Hi = B.buildTrunc(SgprRB_S16, Val0_Hi_32);
+ auto Val1_Lo = B.buildTrunc(SgprRB_S16, Val1_Lo_32);
+ auto Val1_Hi = B.buildTrunc(SgprRB_S16, Val1_Hi_32);
+ auto Lo = B.buildInstr(Opc, {SgprRB_S16}, {Val0_Lo, Val1_Lo}, Flags);
+ auto Hi = B.buildInstr(Opc, {SgprRB_S16}, {Val0_Hi, Val1_Hi}, Flags);
+ B.buildMergeLikeInstr(Dst, {Lo.getReg(0), Hi.getReg(0)});
+ MI.eraseFromParent();
+}
+
void RegBankLegalizeHelper::lowerSplitTo32Select(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(Dst);
@@ -688,6 +706,8 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI,
return lowerUnpackBitShift(MI);
case UnpackMinMax:
return lowerUnpackMinMax(MI);
+ case ScalarizeToS16:
+ return lowerSplitTo16(MI);
case Ext32To64: {
const RegisterBank *RB = MRI.getRegBank(MI.getOperand(0).getReg());
MachineInstrBuilder Hi;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h
index d937815bf4714..df0d7ef4689fe 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h
@@ -72,6 +72,7 @@ class RegBankLegalizeHelper {
static constexpr LLT P6 = LLT::pointer(6, 32);
MachineRegisterInfo::VRegAttrs SgprRB_S32 = {SgprRB, S32};
+ MachineRegisterInfo::VRegAttrs SgprRB_S16 = {SgprRB, S16};
MachineRegisterInfo::VRegAttrs VgprRB_S32 = {VgprRB, S32};
MachineRegisterInfo::VRegAttrs VccRB_S1 = {VccRB, S1};
@@ -121,6 +122,7 @@ class RegBankLegalizeHelper {
void lowerV_BFE(MachineInstr &MI);
void lowerS_BFE(MachineInstr &MI);
void lowerSplitTo32(MachineInstr &MI);
+ void lowerSplitTo16(MachineInstr &MI);
void lowerSplitTo32Select(MachineInstr &MI);
void lowerSplitTo32SExtInReg(MachineInstr &MI);
void lowerUnpackMinMax(MachineInstr &MI);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index 9cf0c52717318..78997b30318e0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -912,7 +912,9 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
.Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
.Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64}})
.Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}})
- .Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16}})
+ .Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16}}, !hasSALUFloat)
+ .Uni(V2S16, {{SgprV2S16}, {SgprV2S16, SgprV2S16}, ScalarizeToS16},
+ hasSALUFloat)
.Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
.Any({{UniV2S32}, {{UniInVgprV2S32}, {VgprV2S32, VgprV2S32}}})
.Any({{DivV2S32}, {{VgprV2S32}, {VgprV2S32, VgprV2S32}}});
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
index 1cf9ae2e226ca..007fedc737512 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
@@ -221,6 +221,7 @@ enum LoweringMethodID {
V_BFE,
VgprToVccCopy,
SplitTo32,
+ ScalarizeToS16,
SplitTo32Select,
SplitTo32SExtInReg,
Ext32To64,
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fadd.ll
index ec221496f450c..b315ac549b1c2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fadd.ll
@@ -1,39 +1,25 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s
-; RUN: llc -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s
-; RUN: llc -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-SDAG,GFX12-SDAG-FAKE16 %s
-; RUN: llc -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-SDAG,GFX12-SDAG-TRUE16 %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-GISEL,GFX12-GISEL-FAKE16 %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-GISEL,GFX12-GISEL-TRUE16 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-FAKE16 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-TRUE16 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-FAKE16 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-TRUE16 %s
define amdgpu_ps half @fadd_s16_uniform(half inreg %a, half inreg %b) {
-; GFX11-SDAG-FAKE16-LABEL: fadd_s16_uniform:
-; GFX11-SDAG-FAKE16: ; %bb.0:
-; GFX11-SDAG-FAKE16-NEXT: v_add_f16_e64 v0, s0, s1
-; GFX11-SDAG-FAKE16-NEXT: ; return to shader part epilog
-;
-; GFX11-SDAG-TRUE16-LABEL: fadd_s16_uniform:
-; GFX11-SDAG-TRUE16: ; %bb.0:
-; GFX11-SDAG-TRUE16-NEXT: v_add_f16_e64 v0.l, s0, s1
-; GFX11-SDAG-TRUE16-NEXT: ; return to shader part epilog
-;
-; GFX11-GISEL-FAKE16-LABEL: fadd_s16_uniform:
-; GFX11-GISEL-FAKE16: ; %bb.0:
-; GFX11-GISEL-FAKE16-NEXT: v_add_f16_e64 v0, s0, s1
-; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-GISEL-FAKE16-NEXT: v_mov_b32_e32 v0, s0
-; GFX11-GISEL-FAKE16-NEXT: ; return to shader part epilog
-;
-; GFX11-GISEL-TRUE16-LABEL: fadd_s16_uniform:
-; GFX11-GISEL-TRUE16: ; %bb.0:
-; GFX11-GISEL-TRUE16-NEXT: v_add_f16_e64 v0.l, s0, s1
-; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-GISEL-TRUE16-NEXT: v_mov_b32_e32 v0, s0
-; GFX11-GISEL-TRUE16-NEXT: ; return to shader part epilog
+; GFX11-FAKE16-LABEL: fadd_s16_uniform:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: v_add_f16_e64 v0, s0, s1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX11-TRUE16-LABEL: fadd_s16_uniform:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: v_add_f16_e64 v0.l, s0, s1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-TRUE16-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: fadd_s16_uniform:
; GFX12: ; %bb.0:
@@ -41,50 +27,41 @@ define amdgpu_ps half @fadd_s16_uniform(half inreg %a, half inreg %b) {
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
; GFX12-NEXT: v_mov_b32_e32 v0, s0
; GFX12-NEXT: ; return to shader part epilog
+; -LABEL: fadd_s16_uniform:
+; : ; %bb.0:
+; -NEXT: v_add_f16_e64 v0.l, s0, s1
+; -NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; -NEXT: v_readfirstlane_b32 s0, v0
+; -NEXT: v_mov_b32_e32 v0, s0
+; -NEXT: ; return to shader part epilog
%fadd = fadd half %a, %b
ret half %fadd
}
define amdgpu_ps half @fadd_s16_div(half %a, half %b) {
-; GFX11-SDAG-FAKE16-LABEL: fadd_s16_div:
-; GFX11-SDAG-FAKE16: ; %bb.0:
-; GFX11-SDAG-FAKE16-NEXT: v_add_f16_e32 v0, v0, v1
-; GFX11-SDAG-FAKE16-NEXT: ; return to shader part epilog
-;
-; GFX11-SDAG-TRUE16-LABEL: fadd_s16_div:
-; GFX11-SDAG-TRUE16: ; %bb.0:
-; GFX11-SDAG-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v1.l
-; GFX11-SDAG-TRUE16-NEXT: ; return to shader part epilog
-;
-; GFX11-GISEL-FAKE16-LABEL: fadd_s16_div:
-; GFX11-GISEL-FAKE16: ; %bb.0:
-; GFX11-GISEL-FAKE16-NEXT: v_add_f16_e32 v0, v0, v1
-; GFX11-GISEL-FAKE16-NEXT: ; return to shader part epilog
-;
-; GFX11-GISEL-TRUE16-LABEL: fadd_s16_div:
-; GFX11-GISEL-TRUE16: ; %bb.0:
-; GFX11-GISEL-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v1.l
-; GFX11-GISEL-TRUE16-NEXT: ; return to shader part epilog
-;
-; GFX12-SDAG-FAKE16-LABEL: fadd_s16_div:
-; GFX12-SDAG-FAKE16: ; %bb.0:
-; GFX12-SDAG-FAKE16-NEXT: v_add_f16_e32 v0, v0, v1
-; GFX12-SDAG-FAKE16-NEXT: ; return to shader part epilog
-;
-; GFX12-SDAG-TRUE16-LABEL: fadd_s16_div:
-; GFX12-SDAG-TRUE16: ; %bb.0:
-; GFX12-SDAG-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v1.l
-; GFX12-SDAG-TRUE16-NEXT: ; return to shader part epilog
-;
-; GFX12-GISEL-FAKE16-LABEL: fadd_s16_div:
-; GFX12-GISEL-FAKE16: ; %bb.0:
-; GFX12-GISEL-FAKE16-NEXT: v_add_f16_e32 v0, v0, v1
-; GFX12-GISEL-FAKE16-NEXT: ; return to shader part epilog
-;
-; GFX12-GISEL-TRUE16-LABEL: fadd_s16_div:
-; GFX12-GISEL-TRUE16: ; %bb.0:
-; GFX12-GISEL-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v1.l
-; GFX12-GISEL-TRUE16-NEXT: ; return to shader part epilog
+; GFX11-FAKE16-LABEL: fadd_s16_div:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: v_add_f16_e32 v0, v0, v1
+; GFX11-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX11-TRUE16-LABEL: fadd_s16_div:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v1.l
+; GFX11-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX12-FAKE16-LABEL: fadd_s16_div:
+; GFX12-FAKE16: ; %bb.0:
+; GFX12-FAKE16-NEXT: v_add_f16_e32 v0, v0, v1
+; GFX12-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX12-TRUE16-LABEL: fadd_s16_div:
+; GFX12-TRUE16: ; %bb.0:
+; GFX12-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v1.l
+; GFX12-TRUE16-NEXT: ; return to shader part epilog
+; -LABEL: fadd_s16_div:
+; : ; %bb.0:
+; -NEXT: v_add_f16_e32 v0.l, v0.l, v1.l
+; -NEXT: ; return to shader part epilog
%fadd = fadd half %a, %b
ret half %fadd
}
@@ -155,92 +132,58 @@ define amdgpu_ps double @fadd_s64_div(double %a, double %b) {
ret double %fadd
}
-define <2 x half> @fadd_v2s16_uniform(<2 x half> inreg %a, <2 x half> inreg %b) {
+define amdgpu_ps <2 x half> @fadd_v2s16_uniform(<2 x half> inreg %a, <2 x half> inreg %b) {
; GFX11-LABEL: fadd_v2s16_uniform:
; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_pk_add_f16 v0, s0, s1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: fadd_v2s16_uniform:
; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_pk_add_f16 v0, s0, s1
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX12-NEXT: s_lshr_b32 s2, s0, 16
+; GFX12-NEXT: s_lshr_b32 s3, s1, 16
+; GFX12-NEXT: s_add_f16 s0, s0, s1
+; GFX12-NEXT: s_add_f16 s1, s2, s3
+; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX12-NEXT: s_pack_ll_b32_b16 s0, s0, s1
+; GFX12-NEXT: v_mov_b32_e32 v0, s0
+; GFX12-NEXT: ; return to shader part epilog
%fadd = fadd <2 x half> %a, %b
ret <2 x half> %fadd
}
-define <2 x half> @fadd_v2s16_div(<2 x half> %a, <2 x half> %b) {
-; GFX11-LABEL: fadd_v2s16_div:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_pk_add_f16 v0, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: fadd_v2s16_div:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_pk_add_f16 v0, v0, v1
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+define amdgpu_ps <2 x half> @fadd_v2s16_div(<2 x half> %a, <2 x half> %b) {
+; GCN-LABEL: fadd_v2s16_div:
+; GCN: ; %bb.0:
+; GCN-NEXT: v_pk_add_f16 v0, v0, v1
+; GCN-NEXT: ; return to shader part epilog
%fadd = fadd <2 x half> %a, %b
ret <2 x half> %fadd
}
-define <2 x float> @fadd_v2s32_uniform(<2 x float> inreg %a, <2 x float> inreg %b) {
+define amdgpu_ps <2 x float> @fadd_v2s32_uniform(<2 x float> inreg %a, <2 x float> inreg %b) {
; GFX11-LABEL: fadd_v2s32_uniform:
; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_add_f32_e64 v0, s0, s2
; GFX11-NEXT: v_add_f32_e64 v1, s1, s3
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: fadd_v2s32_uniform:
; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_add_f32 s0, s0, s2
; GFX12-NEXT: s_add_f32 s1, s1, s3
-; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_2)
+; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX12-NEXT: ; return to shader part epilog
%fadd = fadd <2 x float> %a, %b
ret <2 x float> %fadd
}
-define <2 x float> @fadd_v2s32_div(<2 x float> %a, <2 x float> %b) {
-; GFX11-LABEL: fadd_v2s32_div:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_dual_add_f32 v0, v0, v2 :: v_dual_add_f32 v1, v1, v3
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: fadd_v2s32_div:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_dual_add_f32 v0, v0, v2 :: v_dual_add_f32 v1, v1, v3
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+define amdgpu_ps <2 x float> @fadd_v2s32_div(<2 x float> %a, <2 x float> %b) {
+; GCN-LABEL: fadd_v2s32_div:
+; GCN: ; %bb.0:
+; GCN-NEXT: v_dual_add_f32 v0, v0, v2 :: v_dual_add_f32 v1, v1, v3
+; GCN-NEXT: ; return to shader part epilog
%fadd = fadd <2 x float> %a, %b
ret <2 x float> %fadd
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GFX11-GISEL: {{.*}}
-; GFX11-SDAG: {{.*}}
-; GFX12-GISEL: {{.*}}
-; GFX12-SDAG: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/lds-size.ll b/llvm/test/CodeGen/AMDGPU/lds-size.ll
index 75732a58eafc4..300d36477e2df 100644
--- a/llvm/test/CodeGen/AMDGPU/lds-size.ll
+++ b/llvm/test/CodeGen/AMDGPU/lds-size.ll
@@ -1,24 +1,80 @@
-; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefix=ALL -check-prefix=HSA %s
-; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefix=ALL -check-prefix=HSA %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefix=ALL -check-prefix=HSA -check-prefix=HSA-SDAG %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefix=ALL -check-prefix=HSA -check-prefix=HSA-GISEL %s
; RUN: llc -mtriple=r600 -mcpu=redwood < %s | FileCheck -check-prefix=ALL -check-prefix=EG %s
; This test makes sure we do not double count global values when they are
; used in different basic blocks.
-; GCN: .long 47180
-; GCN-NEXT: .long 32900
-
-; EG: .long 166120
-; EG-NEXT: .long 1
-; ALL: {{^}}test:
-
-; HSA-NOT: COMPUTE_PGM_RSRC2.LDS_SIZE
-; HSA: .amdhsa_group_segment_fixed_size 4
-
-; GCN: ; LDSByteSize: 4 bytes/workgroup (compile time only)
@lds = internal unnamed_addr addrspace(3) global i32 poison, align 4
define amdgpu_kernel void @test(ptr addrspace(1) %out, i32 %cond) {
+; HSA-SDAG-LABEL: test:
+; HSA-SDAG: ; %bb.0: ; %entry
+; HSA-SDAG-NEXT: s_load_dword s0, s[8:9], 0x2
+; HSA-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-SDAG-NEXT: s_cmp_lg_u32 s0, 0
+; HSA-SDAG-NEXT: s_mov_b32 m0, -1
+; HSA-SDAG-NEXT: s_cbranch_scc0 .LBB0_4
+; HSA-SDAG-NEXT: ; %bb.1: ; %else
+; HSA-SDAG-NEXT: v_mov_b32_e32 v0, 2
+; HSA-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; HSA-SDAG-NEXT: ds_write_b32 v1, v0
+; HSA-SDAG-NEXT: s_mov_b64 vcc, exec
+; HSA-SDAG-NEXT: s_cbranch_execnz .LBB0_3
+; HSA-SDAG-NEXT: .LBB0_2: ; %if
+; HSA-SDAG-NEXT: v_mov_b32_e32 v0, 1
+; HSA-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; HSA-SDAG-NEXT: ds_write_b32 v1, v0
+; HSA-SDAG-NEXT: .LBB0_3: ; %endif
+; HSA-SDAG-NEXT: s_endpgm
+; HSA-SDAG-NEXT: .LBB0_4:
+; HSA-SDAG-NEXT: s_mov_b64 vcc, 0
+; HSA-SDAG-NEXT: s_branch .LBB0_2
+;
+; HSA-GISEL-LABEL: test:
+; HSA-GISEL: ; %bb.0: ; %entry
+; HSA-GISEL-NEXT: s_load_dword s0, s[8:9], 0x2
+; HSA-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-GISEL-NEXT: s_cmp_lg_u32 s0, 0
+; HSA-GISEL-NEXT: s_mov_b32 s0, 1
+; HSA-GISEL-NEXT: s_cbranch_scc0 .LBB0_2
+; HSA-GISEL-NEXT: ; %bb.1: ; %else
+; HSA-GISEL-NEXT: s_mov_b32 s0, 0
+; HSA-GISEL-NEXT: v_mov_b32_e32 v0, 2
+; HSA-GISEL-NEXT: v_mov_b32_e32 v1, 0
+; HSA-GISEL-NEXT: s_mov_b32 m0, -1
+; HSA-GISEL-NEXT: ds_write_b32 v1, v0
+; HSA-GISEL-NEXT: .LBB0_2: ; %Flow
+; HSA-GISEL-NEXT: s_xor_b32 s0, s0, 1
+; HSA-GISEL-NEXT: s_cmp_lg_u32 s0, 0
+; HSA-GISEL-NEXT: s_cbranch_scc1 .LBB0_4
+; HSA-GISEL-NEXT: ; %bb.3: ; %if
+; HSA-GISEL-NEXT: v_mov_b32_e32 v0, 1
+; HSA-GISEL-NEXT: v_mov_b32_e32 v1, 0
+; HSA-GISEL-NEXT: s_mov_b32 m0, -1
+; HSA-GISEL-NEXT: ds_write_b32 v1, v0
+; HSA-GISEL-NEXT: .LBB0_4: ; %endif
+; HSA-GISEL-NEXT: s_endpgm
+;
+; EG-LABEL: test:
+; EG: ; %bb.0: ; %entry
+; EG-NEXT: ALU 13, @0, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MOV T0.Z, literal.x,
+; EG-NEXT: MOV T0.W, literal.y,
+; EG-NEXT: SETNE_INT * T1.W, KC0[2].Z, 0.0,
+; EG-NEXT: 0(0.000000e+00), 1(1.401298e-45)
+; EG-NEXT: PRED_SETNE_INT * Pred,PredicateBit (MASKED), PS, 0.0,
+; EG-NEXT: MOV * T0.W, literal.x, Pred_sel_one
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: LDS_WRITE * T0.Z, T0.W, Pred_sel_one
+; EG-NEXT: MOV * T0.W, T0.Z, Pred_sel_one
+; EG-NEXT: SETE_INT * T0.W, T0.W, 0.0,
+; EG-NEXT: PRED_SETNE_INT * Pred,PredicateBit (MASKED), PV.W, 0.0,
+; EG-NEXT: MOV * T0.W, literal.x, Pred_sel_zero
+; EG-NEXT: 1(1.401298e-45), 0(0.000000e+00)
+; EG-NEXT: LDS_WRITE * T0.Z, T0.W, Pred_sel_zero
+; EG-NEXT: RETURN
entry:
%0 = icmp eq i32 %cond, 0
br i1 %0, label %if, label %else
@@ -37,3 +93,6 @@ endif:
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdhsa_code_object_version", i32 400}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; ALL: {{.*}}
+; HSA: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/trap.ll b/llvm/test/CodeGen/AMDGPU/trap.ll
index a7affb93c1c6a..dd6ac4311fda0 100644
--- a/llvm/test/CodeGen/AMDGPU/trap.ll
+++ b/llvm/test/CodeGen/AMDGPU/trap.ll
@@ -1,55 +1,456 @@
-; RUN: llc -global-isel=0 -mtriple=amdgcn--amdhsa < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP %s
-; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn--amdhsa < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -global-isel=0 -mtriple=amdgcn--amdhsa < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP-SDAG %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn--amdhsa < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP-GISEL %s
-; RUN: llc -global-isel=0 -mtriple=amdgcn--amdhsa -mattr=+trap-handler < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP %s
-; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn--amdhsa -mattr=+trap-handler < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP %s
-; RUN: llc -global-isel=0 -mtriple=amdgcn--amdhsa -mattr=-trap-handler < %s | FileCheck -check-prefix=GCN -check-prefix=NO-HSA-TRAP %s
-; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn--amdhsa -mattr=-trap-handler < %s | FileCheck -check-prefix=GCN -check-prefix=NO-HSA-TRAP %s
-; RUN: llc -global-isel=0 -mtriple=amdgcn--amdhsa -mattr=-trap-handler < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING %s
-; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn--amdhsa -mattr=-trap-handler < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn--amdhsa -mattr=+trap-handler < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP-SDAG %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn--amdhsa -mattr=+trap-handler < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn--amdhsa -mattr=-trap-handler < %s | FileCheck -check-prefix=GCN -check-prefix=NO-HSA-TRAP-SDAG %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn--amdhsa -mattr=-trap-handler < %s | FileCheck -check-prefix=GCN -check-prefix=NO-HSA-TRAP-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn--amdhsa -mattr=-trap-handler < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=HSA-WARNING-SDAG %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn--amdhsa -mattr=-trap-handler < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=HSA-WARNING-GISEL %s
; enable trap handler feature
-; RUN: llc -global-isel=0 -mtriple=amdgcn-unknown-mesa3d -mattr=+trap-handler < %s | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP -check-prefix=TRAP-BIT -check-prefix=MESA-TRAP %s
-; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-unknown-mesa3d -mattr=+trap-handler < %s | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP -check-prefix=TRAP-BIT -check-prefix=MESA-TRAP %s
-; RUN: llc -global-isel=0 -mtriple=amdgcn-unknown-mesa3d -mattr=+trap-handler < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING -check-prefix=TRAP-BIT %s
-; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-unknown-mesa3d -mattr=+trap-handler < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING -check-prefix=TRAP-BIT %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-unknown-mesa3d -mattr=+trap-handler < %s | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP-SDAG -check-prefix=TRAP-BIT-SDAG -check-prefix=MESA-TRAP-SDAG %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-unknown-mesa3d -mattr=+trap-handler < %s | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP-GISEL -check-prefix=TRAP-BIT-GISEL -check-prefix=MESA-TRAP-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-unknown-mesa3d -mattr=+trap-handler < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=MESA-WARNING-SDAG -check-prefix=TRAP-BIT-SDAG %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-unknown-mesa3d -mattr=+trap-handler < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=MESA-WARNING-GISEL -check-prefix=TRAP-BIT-GISEL %s
; disable trap handler feature
-; RUN: llc -global-isel=0 -mtriple=amdgcn-unknown-mesa3d -mattr=-trap-handler < %s | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP -check-prefix=NO-TRAP-BIT -check-prefix=NOMESA-TRAP %s
-; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-unknown-mesa3d -mattr=-trap-handler < %s | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP -check-prefix=NO-TRAP-BIT -check-prefix=NOMESA-TRAP %s
-; RUN: llc -global-isel=0 -mtriple=amdgcn-unknown-mesa3d -mattr=-trap-handler < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING -check-prefix=NO-TRAP-BIT %s
-; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-unknown-mesa3d -mattr=-trap-handler < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING -check-prefix=NO-TRAP-BIT %s
-
-; RUN: llc -global-isel=0 -mtriple=amdgcn < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING %s
-; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING %s
-
-; GCN-WARNING: warning: <unknown>:0:0: in function hsa_debugtrap void (ptr addrspace(1)): debugtrap handler not supported
+; RUN: llc -global-isel=0 -mtriple=amdgcn-unknown-mesa3d -mattr=-trap-handler < %s | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP-SDAG -check-prefix=NO-TRAP-BIT-SDAG -check-prefix=NOMESA-TRAP-SDAG %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-unknown-mesa3d -mattr=-trap-handler < %s | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP-GISEL -check-prefix=NO-TRAP-BIT-GISEL -check-prefix=NOMESA-TRAP-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-unknown-mesa3d -mattr=-trap-handler < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=MESA-WARNING-SDAG -check-prefix=NO-TRAP-BIT-SDAG %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-unknown-mesa3d -mattr=-trap-handler < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=MESA-WARNING-GISEL -check-prefix=NO-TRAP-BIT-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING-SDAG %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING-GISEL %s
declare void @llvm.trap() #0
declare void @llvm.debugtrap() #1
-; MESA-TRAP: .section .AMDGPU.config
-; MESA-TRAP: .long 47180
-; MESA-TRAP-NEXT: .long 5080
-
-; NOMESA-TRAP: .section .AMDGPU.config
-; NOMESA-TRAP: .long 47180
-; NOMESA-TRAP-NEXT: .long 5016
-
-; GCN-LABEL: {{^}}hsa_trap:
-; HSA-TRAP: s_mov_b64 s[0:1], s[6:7]
-; HSA-TRAP: s_trap 2
-; HSA-TRAP: COMPUTE_PGM_RSRC2:TRAP_HANDLER: 0
-
-; for llvm.trap in hsa path without ABI, direct generate s_endpgm instruction without any warning information
-; NO-HSA-TRAP: s_endpgm
-; NO-HSA-TRAP: COMPUTE_PGM_RSRC2:TRAP_HANDLER: 0
-
-; TRAP-BIT: enable_trap_handler = 1
-; NO-TRAP-BIT: enable_trap_handler = 0
-; NO-MESA-TRAP: s_endpgm
define amdgpu_kernel void @hsa_trap(ptr addrspace(1) nocapture readonly %arg0) {
+; HSA-TRAP-SDAG-LABEL: hsa_trap:
+; HSA-TRAP-SDAG: ; %bb.0:
+; HSA-TRAP-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
+; HSA-TRAP-SDAG-NEXT: s_add_i32 s12, s12, s17
+; HSA-TRAP-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; HSA-TRAP-SDAG-NEXT: s_mov_b64 s[0:1], s[6:7]
+; HSA-TRAP-SDAG-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x0
+; HSA-TRAP-SDAG-NEXT: v_mov_b32_e32 v2, 1
+; HSA-TRAP-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-TRAP-SDAG-NEXT: v_mov_b32_e32 v0, s2
+; HSA-TRAP-SDAG-NEXT: v_mov_b32_e32 v1, s3
+; HSA-TRAP-SDAG-NEXT: flat_store_dword v[0:1], v2
+; HSA-TRAP-SDAG-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-SDAG-NEXT: s_trap 2
+;
+; HSA-TRAP-GISEL-LABEL: hsa_trap:
+; HSA-TRAP-GISEL: ; %bb.0:
+; HSA-TRAP-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
+; HSA-TRAP-GISEL-NEXT: s_add_i32 s12, s12, s17
+; HSA-TRAP-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; HSA-TRAP-GISEL-NEXT: s_mov_b64 s[0:1], s[6:7]
+; HSA-TRAP-GISEL-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x0
+; HSA-TRAP-GISEL-NEXT: v_mov_b32_e32 v2, 1
+; HSA-TRAP-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-TRAP-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; HSA-TRAP-GISEL-NEXT: v_mov_b32_e32 v1, s3
+; HSA-TRAP-GISEL-NEXT: flat_store_dword v[0:1], v2
+; HSA-TRAP-GISEL-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-GISEL-NEXT: s_trap 2
+;
+; NO-HSA-TRAP-SDAG-LABEL: hsa_trap:
+; NO-HSA-TRAP-SDAG: ; %bb.0:
+; NO-HSA-TRAP-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
+; NO-HSA-TRAP-SDAG-NEXT: s_add_i32 s12, s12, s17
+; NO-HSA-TRAP-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; NO-HSA-TRAP-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; NO-HSA-TRAP-SDAG-NEXT: v_mov_b32_e32 v2, 1
+; NO-HSA-TRAP-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; NO-HSA-TRAP-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; NO-HSA-TRAP-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; NO-HSA-TRAP-SDAG-NEXT: flat_store_dword v[0:1], v2
+; NO-HSA-TRAP-SDAG-NEXT: s_waitcnt vmcnt(0)
+; NO-HSA-TRAP-SDAG-NEXT: s_endpgm
+;
+; NO-HSA-TRAP-GISEL-LABEL: hsa_trap:
+; NO-HSA-TRAP-GISEL: ; %bb.0:
+; NO-HSA-TRAP-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
+; NO-HSA-TRAP-GISEL-NEXT: s_add_i32 s12, s12, s17
+; NO-HSA-TRAP-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; NO-HSA-TRAP-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; NO-HSA-TRAP-GISEL-NEXT: v_mov_b32_e32 v2, 1
+; NO-HSA-TRAP-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; NO-HSA-TRAP-GISEL-NEXT: v_mov_b32_e32 v0, s0
+; NO-HSA-TRAP-GISEL-NEXT: v_mov_b32_e32 v1, s1
+; NO-HSA-TRAP-GISEL-NEXT: flat_store_dword v[0:1], v2
+; NO-HSA-TRAP-GISEL-NEXT: s_waitcnt vmcnt(0)
+; NO-HSA-TRAP-GISEL-NEXT: s_endpgm
+;
+; HSA-WARNING-SDAG-LABEL: hsa_trap:
+; HSA-WARNING-SDAG: ; %bb.0:
+; HSA-WARNING-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
+; HSA-WARNING-SDAG-NEXT: s_add_i32 s12, s12, s17
+; HSA-WARNING-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; HSA-WARNING-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; HSA-WARNING-SDAG-NEXT: v_mov_b32_e32 v2, 1
+; HSA-WARNING-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-WARNING-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; HSA-WARNING-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; HSA-WARNING-SDAG-NEXT: flat_store_dword v[0:1], v2
+; HSA-WARNING-SDAG-NEXT: s_waitcnt vmcnt(0)
+; HSA-WARNING-SDAG-NEXT: s_endpgm
+;
+; HSA-WARNING-GISEL-LABEL: hsa_trap:
+; HSA-WARNING-GISEL: ; %bb.0:
+; HSA-WARNING-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
+; HSA-WARNING-GISEL-NEXT: s_add_i32 s12, s12, s17
+; HSA-WARNING-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; HSA-WARNING-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; HSA-WARNING-GISEL-NEXT: v_mov_b32_e32 v2, 1
+; HSA-WARNING-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-WARNING-GISEL-NEXT: v_mov_b32_e32 v0, s0
+; HSA-WARNING-GISEL-NEXT: v_mov_b32_e32 v1, s1
+; HSA-WARNING-GISEL-NEXT: flat_store_dword v[0:1], v2
+; HSA-WARNING-GISEL-NEXT: s_waitcnt vmcnt(0)
+; HSA-WARNING-GISEL-NEXT: s_endpgm
+;
+; TRAP-BIT-SDAG-LABEL: hsa_trap:
+; TRAP-BIT-SDAG: .amd_kernel_code_t
+; TRAP-BIT-SDAG-NEXT: amd_code_version_major = 1
+; TRAP-BIT-SDAG-NEXT: amd_code_version_minor = 2
+; TRAP-BIT-SDAG-NEXT: amd_machine_kind = 1
+; TRAP-BIT-SDAG-NEXT: amd_machine_version_major = 6
+; TRAP-BIT-SDAG-NEXT: amd_machine_version_minor = 0
+; TRAP-BIT-SDAG-NEXT: amd_machine_version_stepping = 0
+; TRAP-BIT-SDAG-NEXT: kernel_code_entry_byte_offset = 256
+; TRAP-BIT-SDAG-NEXT: kernel_code_prefetch_byte_size = 0
+; TRAP-BIT-SDAG-NEXT: granulated_workitem_vgpr_count = 0
+; TRAP-BIT-SDAG-NEXT: granulated_wavefront_sgpr_count = 1
+; TRAP-BIT-SDAG-NEXT: priority = 0
+; TRAP-BIT-SDAG-NEXT: float_mode = 240
+; TRAP-BIT-SDAG-NEXT: priv = 0
+; TRAP-BIT-SDAG-NEXT: enable_dx10_clamp = 1
+; TRAP-BIT-SDAG-NEXT: debug_mode = 0
+; TRAP-BIT-SDAG-NEXT: enable_ieee_mode = 1
+; TRAP-BIT-SDAG-NEXT: enable_wgp_mode = 0
+; TRAP-BIT-SDAG-NEXT: enable_mem_ordered = 0
+; TRAP-BIT-SDAG-NEXT: enable_fwd_progress = 0
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; TRAP-BIT-SDAG-NEXT: user_sgpr_count = 12
+; TRAP-BIT-SDAG-NEXT: enable_trap_handler = 1
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_workgroup_id_x = 1
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_workgroup_id_y = 1
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_workgroup_id_z = 1
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_workgroup_info = 0
+; TRAP-BIT-SDAG-NEXT: enable_vgpr_workitem_id = 2
+; TRAP-BIT-SDAG-NEXT: enable_exception_msb = 0
+; TRAP-BIT-SDAG-NEXT: granulated_lds_size = 0
+; TRAP-BIT-SDAG-NEXT: enable_exception = 0
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_private_segment_buffer = 1
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_dispatch_ptr = 1
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_queue_ptr = 1
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_dispatch_id = 1
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_flat_scratch_init = 0
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_private_segment_size = 0
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; TRAP-BIT-SDAG-NEXT: enable_wavefront_size32 = 0
+; TRAP-BIT-SDAG-NEXT: enable_ordered_append_gds = 0
+; TRAP-BIT-SDAG-NEXT: private_element_size = 1
+; TRAP-BIT-SDAG-NEXT: is_ptr64 = 1
+; TRAP-BIT-SDAG-NEXT: is_dynamic_callstack = 0
+; TRAP-BIT-SDAG-NEXT: is_debug_enabled = 0
+; TRAP-BIT-SDAG-NEXT: is_xnack_enabled = 0
+; TRAP-BIT-SDAG-NEXT: workitem_private_segment_byte_size = 0
+; TRAP-BIT-SDAG-NEXT: workgroup_group_segment_byte_size = 0
+; TRAP-BIT-SDAG-NEXT: gds_segment_byte_size = 0
+; TRAP-BIT-SDAG-NEXT: kernarg_segment_byte_size = 24
+; TRAP-BIT-SDAG-NEXT: workgroup_fbarrier_count = 0
+; TRAP-BIT-SDAG-NEXT: wavefront_sgpr_count = 10
+; TRAP-BIT-SDAG-NEXT: workitem_vgpr_count = 1
+; TRAP-BIT-SDAG-NEXT: reserved_vgpr_first = 0
+; TRAP-BIT-SDAG-NEXT: reserved_vgpr_count = 0
+; TRAP-BIT-SDAG-NEXT: reserved_sgpr_first = 0
+; TRAP-BIT-SDAG-NEXT: reserved_sgpr_count = 0
+; TRAP-BIT-SDAG-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; TRAP-BIT-SDAG-NEXT: debug_private_segment_buffer_sgpr = 0
+; TRAP-BIT-SDAG-NEXT: kernarg_segment_alignment = 4
+; TRAP-BIT-SDAG-NEXT: group_segment_alignment = 4
+; TRAP-BIT-SDAG-NEXT: private_segment_alignment = 4
+; TRAP-BIT-SDAG-NEXT: wavefront_size = 6
+; TRAP-BIT-SDAG-NEXT: call_convention = -1
+; TRAP-BIT-SDAG-NEXT: runtime_loader_kernel_symbol = 0
+; TRAP-BIT-SDAG-NEXT: .end_amd_kernel_code_t
+; TRAP-BIT-SDAG-NEXT: ; %bb.0:
+; TRAP-BIT-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; TRAP-BIT-SDAG-NEXT: s_mov_b32 s3, 0xf000
+; TRAP-BIT-SDAG-NEXT: s_mov_b32 s2, -1
+; TRAP-BIT-SDAG-NEXT: v_mov_b32_e32 v0, 1
+; TRAP-BIT-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; TRAP-BIT-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; TRAP-BIT-SDAG-NEXT: s_waitcnt vmcnt(0)
+; TRAP-BIT-SDAG-NEXT: s_endpgm
+;
+; TRAP-BIT-GISEL-LABEL: hsa_trap:
+; TRAP-BIT-GISEL: .amd_kernel_code_t
+; TRAP-BIT-GISEL-NEXT: amd_code_version_major = 1
+; TRAP-BIT-GISEL-NEXT: amd_code_version_minor = 2
+; TRAP-BIT-GISEL-NEXT: amd_machine_kind = 1
+; TRAP-BIT-GISEL-NEXT: amd_machine_version_major = 6
+; TRAP-BIT-GISEL-NEXT: amd_machine_version_minor = 0
+; TRAP-BIT-GISEL-NEXT: amd_machine_version_stepping = 0
+; TRAP-BIT-GISEL-NEXT: kernel_code_entry_byte_offset = 256
+; TRAP-BIT-GISEL-NEXT: kernel_code_prefetch_byte_size = 0
+; TRAP-BIT-GISEL-NEXT: granulated_workitem_vgpr_count = 0
+; TRAP-BIT-GISEL-NEXT: granulated_wavefront_sgpr_count = 1
+; TRAP-BIT-GISEL-NEXT: priority = 0
+; TRAP-BIT-GISEL-NEXT: float_mode = 240
+; TRAP-BIT-GISEL-NEXT: priv = 0
+; TRAP-BIT-GISEL-NEXT: enable_dx10_clamp = 1
+; TRAP-BIT-GISEL-NEXT: debug_mode = 0
+; TRAP-BIT-GISEL-NEXT: enable_ieee_mode = 1
+; TRAP-BIT-GISEL-NEXT: enable_wgp_mode = 0
+; TRAP-BIT-GISEL-NEXT: enable_mem_ordered = 0
+; TRAP-BIT-GISEL-NEXT: enable_fwd_progress = 0
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; TRAP-BIT-GISEL-NEXT: user_sgpr_count = 12
+; TRAP-BIT-GISEL-NEXT: enable_trap_handler = 1
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_workgroup_id_x = 1
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_workgroup_id_y = 1
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_workgroup_id_z = 1
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_workgroup_info = 0
+; TRAP-BIT-GISEL-NEXT: enable_vgpr_workitem_id = 2
+; TRAP-BIT-GISEL-NEXT: enable_exception_msb = 0
+; TRAP-BIT-GISEL-NEXT: granulated_lds_size = 0
+; TRAP-BIT-GISEL-NEXT: enable_exception = 0
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_private_segment_buffer = 1
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_dispatch_ptr = 1
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_queue_ptr = 1
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_dispatch_id = 1
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_flat_scratch_init = 0
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_private_segment_size = 0
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; TRAP-BIT-GISEL-NEXT: enable_wavefront_size32 = 0
+; TRAP-BIT-GISEL-NEXT: enable_ordered_append_gds = 0
+; TRAP-BIT-GISEL-NEXT: private_element_size = 1
+; TRAP-BIT-GISEL-NEXT: is_ptr64 = 1
+; TRAP-BIT-GISEL-NEXT: is_dynamic_callstack = 0
+; TRAP-BIT-GISEL-NEXT: is_debug_enabled = 0
+; TRAP-BIT-GISEL-NEXT: is_xnack_enabled = 0
+; TRAP-BIT-GISEL-NEXT: workitem_private_segment_byte_size = 0
+; TRAP-BIT-GISEL-NEXT: workgroup_group_segment_byte_size = 0
+; TRAP-BIT-GISEL-NEXT: gds_segment_byte_size = 0
+; TRAP-BIT-GISEL-NEXT: kernarg_segment_byte_size = 24
+; TRAP-BIT-GISEL-NEXT: workgroup_fbarrier_count = 0
+; TRAP-BIT-GISEL-NEXT: wavefront_sgpr_count = 10
+; TRAP-BIT-GISEL-NEXT: workitem_vgpr_count = 1
+; TRAP-BIT-GISEL-NEXT: reserved_vgpr_first = 0
+; TRAP-BIT-GISEL-NEXT: reserved_vgpr_count = 0
+; TRAP-BIT-GISEL-NEXT: reserved_sgpr_first = 0
+; TRAP-BIT-GISEL-NEXT: reserved_sgpr_count = 0
+; TRAP-BIT-GISEL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; TRAP-BIT-GISEL-NEXT: debug_private_segment_buffer_sgpr = 0
+; TRAP-BIT-GISEL-NEXT: kernarg_segment_alignment = 4
+; TRAP-BIT-GISEL-NEXT: group_segment_alignment = 4
+; TRAP-BIT-GISEL-NEXT: private_segment_alignment = 4
+; TRAP-BIT-GISEL-NEXT: wavefront_size = 6
+; TRAP-BIT-GISEL-NEXT: call_convention = -1
+; TRAP-BIT-GISEL-NEXT: runtime_loader_kernel_symbol = 0
+; TRAP-BIT-GISEL-NEXT: .end_amd_kernel_code_t
+; TRAP-BIT-GISEL-NEXT: ; %bb.0:
+; TRAP-BIT-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; TRAP-BIT-GISEL-NEXT: v_mov_b32_e32 v0, 1
+; TRAP-BIT-GISEL-NEXT: s_mov_b32 s2, -1
+; TRAP-BIT-GISEL-NEXT: s_mov_b32 s3, 0xf000
+; TRAP-BIT-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; TRAP-BIT-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; TRAP-BIT-GISEL-NEXT: s_waitcnt vmcnt(0)
+; TRAP-BIT-GISEL-NEXT: s_endpgm
+;
+; NO-TRAP-BIT-SDAG-LABEL: hsa_trap:
+; NO-TRAP-BIT-SDAG: .amd_kernel_code_t
+; NO-TRAP-BIT-SDAG-NEXT: amd_code_version_major = 1
+; NO-TRAP-BIT-SDAG-NEXT: amd_code_version_minor = 2
+; NO-TRAP-BIT-SDAG-NEXT: amd_machine_kind = 1
+; NO-TRAP-BIT-SDAG-NEXT: amd_machine_version_major = 6
+; NO-TRAP-BIT-SDAG-NEXT: amd_machine_version_minor = 0
+; NO-TRAP-BIT-SDAG-NEXT: amd_machine_version_stepping = 0
+; NO-TRAP-BIT-SDAG-NEXT: kernel_code_entry_byte_offset = 256
+; NO-TRAP-BIT-SDAG-NEXT: kernel_code_prefetch_byte_size = 0
+; NO-TRAP-BIT-SDAG-NEXT: granulated_workitem_vgpr_count = 0
+; NO-TRAP-BIT-SDAG-NEXT: granulated_wavefront_sgpr_count = 1
+; NO-TRAP-BIT-SDAG-NEXT: priority = 0
+; NO-TRAP-BIT-SDAG-NEXT: float_mode = 240
+; NO-TRAP-BIT-SDAG-NEXT: priv = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_dx10_clamp = 1
+; NO-TRAP-BIT-SDAG-NEXT: debug_mode = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_ieee_mode = 1
+; NO-TRAP-BIT-SDAG-NEXT: enable_wgp_mode = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_mem_ordered = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_fwd_progress = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; NO-TRAP-BIT-SDAG-NEXT: user_sgpr_count = 12
+; NO-TRAP-BIT-SDAG-NEXT: enable_trap_handler = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_workgroup_id_x = 1
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_workgroup_id_y = 1
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_workgroup_id_z = 1
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_workgroup_info = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_vgpr_workitem_id = 2
+; NO-TRAP-BIT-SDAG-NEXT: enable_exception_msb = 0
+; NO-TRAP-BIT-SDAG-NEXT: granulated_lds_size = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_exception = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_private_segment_buffer = 1
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_dispatch_ptr = 1
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_queue_ptr = 1
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_dispatch_id = 1
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_flat_scratch_init = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_private_segment_size = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_wavefront_size32 = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_ordered_append_gds = 0
+; NO-TRAP-BIT-SDAG-NEXT: private_element_size = 1
+; NO-TRAP-BIT-SDAG-NEXT: is_ptr64 = 1
+; NO-TRAP-BIT-SDAG-NEXT: is_dynamic_callstack = 0
+; NO-TRAP-BIT-SDAG-NEXT: is_debug_enabled = 0
+; NO-TRAP-BIT-SDAG-NEXT: is_xnack_enabled = 0
+; NO-TRAP-BIT-SDAG-NEXT: workitem_private_segment_byte_size = 0
+; NO-TRAP-BIT-SDAG-NEXT: workgroup_group_segment_byte_size = 0
+; NO-TRAP-BIT-SDAG-NEXT: gds_segment_byte_size = 0
+; NO-TRAP-BIT-SDAG-NEXT: kernarg_segment_byte_size = 24
+; NO-TRAP-BIT-SDAG-NEXT: workgroup_fbarrier_count = 0
+; NO-TRAP-BIT-SDAG-NEXT: wavefront_sgpr_count = 10
+; NO-TRAP-BIT-SDAG-NEXT: workitem_vgpr_count = 1
+; NO-TRAP-BIT-SDAG-NEXT: reserved_vgpr_first = 0
+; NO-TRAP-BIT-SDAG-NEXT: reserved_vgpr_count = 0
+; NO-TRAP-BIT-SDAG-NEXT: reserved_sgpr_first = 0
+; NO-TRAP-BIT-SDAG-NEXT: reserved_sgpr_count = 0
+; NO-TRAP-BIT-SDAG-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; NO-TRAP-BIT-SDAG-NEXT: debug_private_segment_buffer_sgpr = 0
+; NO-TRAP-BIT-SDAG-NEXT: kernarg_segment_alignment = 4
+; NO-TRAP-BIT-SDAG-NEXT: group_segment_alignment = 4
+; NO-TRAP-BIT-SDAG-NEXT: private_segment_alignment = 4
+; NO-TRAP-BIT-SDAG-NEXT: wavefront_size = 6
+; NO-TRAP-BIT-SDAG-NEXT: call_convention = -1
+; NO-TRAP-BIT-SDAG-NEXT: runtime_loader_kernel_symbol = 0
+; NO-TRAP-BIT-SDAG-NEXT: .end_amd_kernel_code_t
+; NO-TRAP-BIT-SDAG-NEXT: ; %bb.0:
+; NO-TRAP-BIT-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; NO-TRAP-BIT-SDAG-NEXT: s_mov_b32 s3, 0xf000
+; NO-TRAP-BIT-SDAG-NEXT: s_mov_b32 s2, -1
+; NO-TRAP-BIT-SDAG-NEXT: v_mov_b32_e32 v0, 1
+; NO-TRAP-BIT-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; NO-TRAP-BIT-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; NO-TRAP-BIT-SDAG-NEXT: s_waitcnt vmcnt(0)
+; NO-TRAP-BIT-SDAG-NEXT: s_endpgm
+;
+; NO-TRAP-BIT-GISEL-LABEL: hsa_trap:
+; NO-TRAP-BIT-GISEL: .amd_kernel_code_t
+; NO-TRAP-BIT-GISEL-NEXT: amd_code_version_major = 1
+; NO-TRAP-BIT-GISEL-NEXT: amd_code_version_minor = 2
+; NO-TRAP-BIT-GISEL-NEXT: amd_machine_kind = 1
+; NO-TRAP-BIT-GISEL-NEXT: amd_machine_version_major = 6
+; NO-TRAP-BIT-GISEL-NEXT: amd_machine_version_minor = 0
+; NO-TRAP-BIT-GISEL-NEXT: amd_machine_version_stepping = 0
+; NO-TRAP-BIT-GISEL-NEXT: kernel_code_entry_byte_offset = 256
+; NO-TRAP-BIT-GISEL-NEXT: kernel_code_prefetch_byte_size = 0
+; NO-TRAP-BIT-GISEL-NEXT: granulated_workitem_vgpr_count = 0
+; NO-TRAP-BIT-GISEL-NEXT: granulated_wavefront_sgpr_count = 1
+; NO-TRAP-BIT-GISEL-NEXT: priority = 0
+; NO-TRAP-BIT-GISEL-NEXT: float_mode = 240
+; NO-TRAP-BIT-GISEL-NEXT: priv = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_dx10_clamp = 1
+; NO-TRAP-BIT-GISEL-NEXT: debug_mode = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_ieee_mode = 1
+; NO-TRAP-BIT-GISEL-NEXT: enable_wgp_mode = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_mem_ordered = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_fwd_progress = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; NO-TRAP-BIT-GISEL-NEXT: user_sgpr_count = 12
+; NO-TRAP-BIT-GISEL-NEXT: enable_trap_handler = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_workgroup_id_x = 1
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_workgroup_id_y = 1
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_workgroup_id_z = 1
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_workgroup_info = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_vgpr_workitem_id = 2
+; NO-TRAP-BIT-GISEL-NEXT: enable_exception_msb = 0
+; NO-TRAP-BIT-GISEL-NEXT: granulated_lds_size = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_exception = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_private_segment_buffer = 1
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_dispatch_ptr = 1
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_queue_ptr = 1
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_dispatch_id = 1
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_flat_scratch_init = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_private_segment_size = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_wavefront_size32 = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_ordered_append_gds = 0
+; NO-TRAP-BIT-GISEL-NEXT: private_element_size = 1
+; NO-TRAP-BIT-GISEL-NEXT: is_ptr64 = 1
+; NO-TRAP-BIT-GISEL-NEXT: is_dynamic_callstack = 0
+; NO-TRAP-BIT-GISEL-NEXT: is_debug_enabled = 0
+; NO-TRAP-BIT-GISEL-NEXT: is_xnack_enabled = 0
+; NO-TRAP-BIT-GISEL-NEXT: workitem_private_segment_byte_size = 0
+; NO-TRAP-BIT-GISEL-NEXT: workgroup_group_segment_byte_size = 0
+; NO-TRAP-BIT-GISEL-NEXT: gds_segment_byte_size = 0
+; NO-TRAP-BIT-GISEL-NEXT: kernarg_segment_byte_size = 24
+; NO-TRAP-BIT-GISEL-NEXT: workgroup_fbarrier_count = 0
+; NO-TRAP-BIT-GISEL-NEXT: wavefront_sgpr_count = 10
+; NO-TRAP-BIT-GISEL-NEXT: workitem_vgpr_count = 1
+; NO-TRAP-BIT-GISEL-NEXT: reserved_vgpr_first = 0
+; NO-TRAP-BIT-GISEL-NEXT: reserved_vgpr_count = 0
+; NO-TRAP-BIT-GISEL-NEXT: reserved_sgpr_first = 0
+; NO-TRAP-BIT-GISEL-NEXT: reserved_sgpr_count = 0
+; NO-TRAP-BIT-GISEL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; NO-TRAP-BIT-GISEL-NEXT: debug_private_segment_buffer_sgpr = 0
+; NO-TRAP-BIT-GISEL-NEXT: kernarg_segment_alignment = 4
+; NO-TRAP-BIT-GISEL-NEXT: group_segment_alignment = 4
+; NO-TRAP-BIT-GISEL-NEXT: private_segment_alignment = 4
+; NO-TRAP-BIT-GISEL-NEXT: wavefront_size = 6
+; NO-TRAP-BIT-GISEL-NEXT: call_convention = -1
+; NO-TRAP-BIT-GISEL-NEXT: runtime_loader_kernel_symbol = 0
+; NO-TRAP-BIT-GISEL-NEXT: .end_amd_kernel_code_t
+; NO-TRAP-BIT-GISEL-NEXT: ; %bb.0:
+; NO-TRAP-BIT-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; NO-TRAP-BIT-GISEL-NEXT: v_mov_b32_e32 v0, 1
+; NO-TRAP-BIT-GISEL-NEXT: s_mov_b32 s2, -1
+; NO-TRAP-BIT-GISEL-NEXT: s_mov_b32 s3, 0xf000
+; NO-TRAP-BIT-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; NO-TRAP-BIT-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; NO-TRAP-BIT-GISEL-NEXT: s_waitcnt vmcnt(0)
+; NO-TRAP-BIT-GISEL-NEXT: s_endpgm
+;
+; GCN-WARNING-SDAG-LABEL: hsa_trap:
+; GCN-WARNING-SDAG: ; %bb.0:
+; GCN-WARNING-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-WARNING-SDAG-NEXT: s_mov_b32 s3, 0xf000
+; GCN-WARNING-SDAG-NEXT: s_mov_b32 s2, -1
+; GCN-WARNING-SDAG-NEXT: v_mov_b32_e32 v0, 1
+; GCN-WARNING-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-WARNING-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-WARNING-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GCN-WARNING-SDAG-NEXT: s_endpgm
+;
+; GCN-WARNING-GISEL-LABEL: hsa_trap:
+; GCN-WARNING-GISEL: ; %bb.0:
+; GCN-WARNING-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-WARNING-GISEL-NEXT: v_mov_b32_e32 v0, 1
+; GCN-WARNING-GISEL-NEXT: s_mov_b32 s2, -1
+; GCN-WARNING-GISEL-NEXT: s_mov_b32 s3, 0xf000
+; GCN-WARNING-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-WARNING-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-WARNING-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GCN-WARNING-GISEL-NEXT: s_endpgm
store volatile i32 1, ptr addrspace(1) %arg0
call void @llvm.trap()
unreachable
@@ -57,26 +458,466 @@ define amdgpu_kernel void @hsa_trap(ptr addrspace(1) nocapture readonly %arg0) {
ret void
}
-; MESA-TRAP: .section .AMDGPU.config
-; MESA-TRAP: .long 47180
-; MESA-TRAP-NEXT: .long 5080
-
-; NOMESA-TRAP: .section .AMDGPU.config
-; NOMESA-TRAP: .long 47180
-; NOMESA-TRAP-NEXT: .long 5016
-
-; GCN-LABEL: {{^}}hsa_debugtrap:
-; HSA-TRAP: s_trap 3
-; HSA-TRAP: flat_store_dword v[0:1], v3
-; HSA-TRAP: COMPUTE_PGM_RSRC2:TRAP_HANDLER: 0
-
-; for llvm.debugtrap in non-hsa path without ABI, generate a warning and a s_endpgm instruction
-; NO-HSA-TRAP: s_endpgm
-
-; TRAP-BIT: enable_trap_handler = 1
-; NO-TRAP-BIT: enable_trap_handler = 0
-; NO-MESA-TRAP: s_endpgm
define amdgpu_kernel void @hsa_debugtrap(ptr addrspace(1) nocapture readonly %arg0) {
+; HSA-TRAP-SDAG-LABEL: hsa_debugtrap:
+; HSA-TRAP-SDAG: ; %bb.0:
+; HSA-TRAP-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
+; HSA-TRAP-SDAG-NEXT: s_add_i32 s12, s12, s17
+; HSA-TRAP-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; HSA-TRAP-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; HSA-TRAP-SDAG-NEXT: v_mov_b32_e32 v2, 1
+; HSA-TRAP-SDAG-NEXT: v_mov_b32_e32 v3, 2
+; HSA-TRAP-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-TRAP-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; HSA-TRAP-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; HSA-TRAP-SDAG-NEXT: flat_store_dword v[0:1], v2
+; HSA-TRAP-SDAG-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-SDAG-NEXT: s_trap 3
+; HSA-TRAP-SDAG-NEXT: flat_store_dword v[0:1], v3
+; HSA-TRAP-SDAG-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-SDAG-NEXT: s_endpgm
+;
+; HSA-TRAP-GISEL-LABEL: hsa_debugtrap:
+; HSA-TRAP-GISEL: ; %bb.0:
+; HSA-TRAP-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
+; HSA-TRAP-GISEL-NEXT: s_add_i32 s12, s12, s17
+; HSA-TRAP-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; HSA-TRAP-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; HSA-TRAP-GISEL-NEXT: v_mov_b32_e32 v2, 1
+; HSA-TRAP-GISEL-NEXT: v_mov_b32_e32 v3, 2
+; HSA-TRAP-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-TRAP-GISEL-NEXT: v_mov_b32_e32 v0, s0
+; HSA-TRAP-GISEL-NEXT: v_mov_b32_e32 v1, s1
+; HSA-TRAP-GISEL-NEXT: flat_store_dword v[0:1], v2
+; HSA-TRAP-GISEL-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-GISEL-NEXT: s_trap 3
+; HSA-TRAP-GISEL-NEXT: flat_store_dword v[0:1], v3
+; HSA-TRAP-GISEL-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-GISEL-NEXT: s_endpgm
+;
+; NO-HSA-TRAP-SDAG-LABEL: hsa_debugtrap:
+; NO-HSA-TRAP-SDAG: ; %bb.0:
+; NO-HSA-TRAP-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
+; NO-HSA-TRAP-SDAG-NEXT: s_add_i32 s12, s12, s17
+; NO-HSA-TRAP-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; NO-HSA-TRAP-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; NO-HSA-TRAP-SDAG-NEXT: v_mov_b32_e32 v2, 1
+; NO-HSA-TRAP-SDAG-NEXT: v_mov_b32_e32 v3, 2
+; NO-HSA-TRAP-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; NO-HSA-TRAP-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; NO-HSA-TRAP-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; NO-HSA-TRAP-SDAG-NEXT: flat_store_dword v[0:1], v2
+; NO-HSA-TRAP-SDAG-NEXT: s_waitcnt vmcnt(0)
+; NO-HSA-TRAP-SDAG-NEXT: flat_store_dword v[0:1], v3
+; NO-HSA-TRAP-SDAG-NEXT: s_waitcnt vmcnt(0)
+; NO-HSA-TRAP-SDAG-NEXT: s_endpgm
+;
+; NO-HSA-TRAP-GISEL-LABEL: hsa_debugtrap:
+; NO-HSA-TRAP-GISEL: ; %bb.0:
+; NO-HSA-TRAP-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
+; NO-HSA-TRAP-GISEL-NEXT: s_add_i32 s12, s12, s17
+; NO-HSA-TRAP-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; NO-HSA-TRAP-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; NO-HSA-TRAP-GISEL-NEXT: v_mov_b32_e32 v2, 1
+; NO-HSA-TRAP-GISEL-NEXT: v_mov_b32_e32 v3, 2
+; NO-HSA-TRAP-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; NO-HSA-TRAP-GISEL-NEXT: v_mov_b32_e32 v0, s0
+; NO-HSA-TRAP-GISEL-NEXT: v_mov_b32_e32 v1, s1
+; NO-HSA-TRAP-GISEL-NEXT: flat_store_dword v[0:1], v2
+; NO-HSA-TRAP-GISEL-NEXT: s_waitcnt vmcnt(0)
+; NO-HSA-TRAP-GISEL-NEXT: flat_store_dword v[0:1], v3
+; NO-HSA-TRAP-GISEL-NEXT: s_waitcnt vmcnt(0)
+; NO-HSA-TRAP-GISEL-NEXT: s_endpgm
+;
+; HSA-WARNING-SDAG-LABEL: hsa_debugtrap:
+; HSA-WARNING-SDAG: ; %bb.0:
+; HSA-WARNING-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
+; HSA-WARNING-SDAG-NEXT: s_add_i32 s12, s12, s17
+; HSA-WARNING-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; HSA-WARNING-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; HSA-WARNING-SDAG-NEXT: v_mov_b32_e32 v2, 1
+; HSA-WARNING-SDAG-NEXT: v_mov_b32_e32 v3, 2
+; HSA-WARNING-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-WARNING-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; HSA-WARNING-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; HSA-WARNING-SDAG-NEXT: flat_store_dword v[0:1], v2
+; HSA-WARNING-SDAG-NEXT: s_waitcnt vmcnt(0)
+; HSA-WARNING-SDAG-NEXT: flat_store_dword v[0:1], v3
+; HSA-WARNING-SDAG-NEXT: s_waitcnt vmcnt(0)
+; HSA-WARNING-SDAG-NEXT: s_endpgm
+;
+; HSA-WARNING-GISEL-LABEL: hsa_debugtrap:
+; HSA-WARNING-GISEL: ; %bb.0:
+; HSA-WARNING-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
+; HSA-WARNING-GISEL-NEXT: s_add_i32 s12, s12, s17
+; HSA-WARNING-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; HSA-WARNING-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; HSA-WARNING-GISEL-NEXT: v_mov_b32_e32 v2, 1
+; HSA-WARNING-GISEL-NEXT: v_mov_b32_e32 v3, 2
+; HSA-WARNING-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-WARNING-GISEL-NEXT: v_mov_b32_e32 v0, s0
+; HSA-WARNING-GISEL-NEXT: v_mov_b32_e32 v1, s1
+; HSA-WARNING-GISEL-NEXT: flat_store_dword v[0:1], v2
+; HSA-WARNING-GISEL-NEXT: s_waitcnt vmcnt(0)
+; HSA-WARNING-GISEL-NEXT: flat_store_dword v[0:1], v3
+; HSA-WARNING-GISEL-NEXT: s_waitcnt vmcnt(0)
+; HSA-WARNING-GISEL-NEXT: s_endpgm
+;
+; TRAP-BIT-SDAG-LABEL: hsa_debugtrap:
+; TRAP-BIT-SDAG: .amd_kernel_code_t
+; TRAP-BIT-SDAG-NEXT: amd_code_version_major = 1
+; TRAP-BIT-SDAG-NEXT: amd_code_version_minor = 2
+; TRAP-BIT-SDAG-NEXT: amd_machine_kind = 1
+; TRAP-BIT-SDAG-NEXT: amd_machine_version_major = 6
+; TRAP-BIT-SDAG-NEXT: amd_machine_version_minor = 0
+; TRAP-BIT-SDAG-NEXT: amd_machine_version_stepping = 0
+; TRAP-BIT-SDAG-NEXT: kernel_code_entry_byte_offset = 256
+; TRAP-BIT-SDAG-NEXT: kernel_code_prefetch_byte_size = 0
+; TRAP-BIT-SDAG-NEXT: granulated_workitem_vgpr_count = 0
+; TRAP-BIT-SDAG-NEXT: granulated_wavefront_sgpr_count = 1
+; TRAP-BIT-SDAG-NEXT: priority = 0
+; TRAP-BIT-SDAG-NEXT: float_mode = 240
+; TRAP-BIT-SDAG-NEXT: priv = 0
+; TRAP-BIT-SDAG-NEXT: enable_dx10_clamp = 1
+; TRAP-BIT-SDAG-NEXT: debug_mode = 0
+; TRAP-BIT-SDAG-NEXT: enable_ieee_mode = 1
+; TRAP-BIT-SDAG-NEXT: enable_wgp_mode = 0
+; TRAP-BIT-SDAG-NEXT: enable_mem_ordered = 0
+; TRAP-BIT-SDAG-NEXT: enable_fwd_progress = 0
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; TRAP-BIT-SDAG-NEXT: user_sgpr_count = 12
+; TRAP-BIT-SDAG-NEXT: enable_trap_handler = 1
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_workgroup_id_x = 1
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_workgroup_id_y = 1
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_workgroup_id_z = 1
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_workgroup_info = 0
+; TRAP-BIT-SDAG-NEXT: enable_vgpr_workitem_id = 2
+; TRAP-BIT-SDAG-NEXT: enable_exception_msb = 0
+; TRAP-BIT-SDAG-NEXT: granulated_lds_size = 0
+; TRAP-BIT-SDAG-NEXT: enable_exception = 0
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_private_segment_buffer = 1
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_dispatch_ptr = 1
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_queue_ptr = 1
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_dispatch_id = 1
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_flat_scratch_init = 0
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_private_segment_size = 0
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; TRAP-BIT-SDAG-NEXT: enable_wavefront_size32 = 0
+; TRAP-BIT-SDAG-NEXT: enable_ordered_append_gds = 0
+; TRAP-BIT-SDAG-NEXT: private_element_size = 1
+; TRAP-BIT-SDAG-NEXT: is_ptr64 = 1
+; TRAP-BIT-SDAG-NEXT: is_dynamic_callstack = 0
+; TRAP-BIT-SDAG-NEXT: is_debug_enabled = 0
+; TRAP-BIT-SDAG-NEXT: is_xnack_enabled = 0
+; TRAP-BIT-SDAG-NEXT: workitem_private_segment_byte_size = 0
+; TRAP-BIT-SDAG-NEXT: workgroup_group_segment_byte_size = 0
+; TRAP-BIT-SDAG-NEXT: gds_segment_byte_size = 0
+; TRAP-BIT-SDAG-NEXT: kernarg_segment_byte_size = 24
+; TRAP-BIT-SDAG-NEXT: workgroup_fbarrier_count = 0
+; TRAP-BIT-SDAG-NEXT: wavefront_sgpr_count = 10
+; TRAP-BIT-SDAG-NEXT: workitem_vgpr_count = 2
+; TRAP-BIT-SDAG-NEXT: reserved_vgpr_first = 0
+; TRAP-BIT-SDAG-NEXT: reserved_vgpr_count = 0
+; TRAP-BIT-SDAG-NEXT: reserved_sgpr_first = 0
+; TRAP-BIT-SDAG-NEXT: reserved_sgpr_count = 0
+; TRAP-BIT-SDAG-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; TRAP-BIT-SDAG-NEXT: debug_private_segment_buffer_sgpr = 0
+; TRAP-BIT-SDAG-NEXT: kernarg_segment_alignment = 4
+; TRAP-BIT-SDAG-NEXT: group_segment_alignment = 4
+; TRAP-BIT-SDAG-NEXT: private_segment_alignment = 4
+; TRAP-BIT-SDAG-NEXT: wavefront_size = 6
+; TRAP-BIT-SDAG-NEXT: call_convention = -1
+; TRAP-BIT-SDAG-NEXT: runtime_loader_kernel_symbol = 0
+; TRAP-BIT-SDAG-NEXT: .end_amd_kernel_code_t
+; TRAP-BIT-SDAG-NEXT: ; %bb.0:
+; TRAP-BIT-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; TRAP-BIT-SDAG-NEXT: s_mov_b32 s3, 0xf000
+; TRAP-BIT-SDAG-NEXT: s_mov_b32 s2, -1
+; TRAP-BIT-SDAG-NEXT: v_mov_b32_e32 v0, 1
+; TRAP-BIT-SDAG-NEXT: v_mov_b32_e32 v1, 2
+; TRAP-BIT-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; TRAP-BIT-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; TRAP-BIT-SDAG-NEXT: s_waitcnt vmcnt(0)
+; TRAP-BIT-SDAG-NEXT: buffer_store_dword v1, off, s[0:3], 0
+; TRAP-BIT-SDAG-NEXT: s_waitcnt vmcnt(0)
+; TRAP-BIT-SDAG-NEXT: s_endpgm
+;
+; TRAP-BIT-GISEL-LABEL: hsa_debugtrap:
+; TRAP-BIT-GISEL: .amd_kernel_code_t
+; TRAP-BIT-GISEL-NEXT: amd_code_version_major = 1
+; TRAP-BIT-GISEL-NEXT: amd_code_version_minor = 2
+; TRAP-BIT-GISEL-NEXT: amd_machine_kind = 1
+; TRAP-BIT-GISEL-NEXT: amd_machine_version_major = 6
+; TRAP-BIT-GISEL-NEXT: amd_machine_version_minor = 0
+; TRAP-BIT-GISEL-NEXT: amd_machine_version_stepping = 0
+; TRAP-BIT-GISEL-NEXT: kernel_code_entry_byte_offset = 256
+; TRAP-BIT-GISEL-NEXT: kernel_code_prefetch_byte_size = 0
+; TRAP-BIT-GISEL-NEXT: granulated_workitem_vgpr_count = 0
+; TRAP-BIT-GISEL-NEXT: granulated_wavefront_sgpr_count = 1
+; TRAP-BIT-GISEL-NEXT: priority = 0
+; TRAP-BIT-GISEL-NEXT: float_mode = 240
+; TRAP-BIT-GISEL-NEXT: priv = 0
+; TRAP-BIT-GISEL-NEXT: enable_dx10_clamp = 1
+; TRAP-BIT-GISEL-NEXT: debug_mode = 0
+; TRAP-BIT-GISEL-NEXT: enable_ieee_mode = 1
+; TRAP-BIT-GISEL-NEXT: enable_wgp_mode = 0
+; TRAP-BIT-GISEL-NEXT: enable_mem_ordered = 0
+; TRAP-BIT-GISEL-NEXT: enable_fwd_progress = 0
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; TRAP-BIT-GISEL-NEXT: user_sgpr_count = 12
+; TRAP-BIT-GISEL-NEXT: enable_trap_handler = 1
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_workgroup_id_x = 1
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_workgroup_id_y = 1
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_workgroup_id_z = 1
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_workgroup_info = 0
+; TRAP-BIT-GISEL-NEXT: enable_vgpr_workitem_id = 2
+; TRAP-BIT-GISEL-NEXT: enable_exception_msb = 0
+; TRAP-BIT-GISEL-NEXT: granulated_lds_size = 0
+; TRAP-BIT-GISEL-NEXT: enable_exception = 0
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_private_segment_buffer = 1
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_dispatch_ptr = 1
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_queue_ptr = 1
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_dispatch_id = 1
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_flat_scratch_init = 0
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_private_segment_size = 0
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; TRAP-BIT-GISEL-NEXT: enable_wavefront_size32 = 0
+; TRAP-BIT-GISEL-NEXT: enable_ordered_append_gds = 0
+; TRAP-BIT-GISEL-NEXT: private_element_size = 1
+; TRAP-BIT-GISEL-NEXT: is_ptr64 = 1
+; TRAP-BIT-GISEL-NEXT: is_dynamic_callstack = 0
+; TRAP-BIT-GISEL-NEXT: is_debug_enabled = 0
+; TRAP-BIT-GISEL-NEXT: is_xnack_enabled = 0
+; TRAP-BIT-GISEL-NEXT: workitem_private_segment_byte_size = 0
+; TRAP-BIT-GISEL-NEXT: workgroup_group_segment_byte_size = 0
+; TRAP-BIT-GISEL-NEXT: gds_segment_byte_size = 0
+; TRAP-BIT-GISEL-NEXT: kernarg_segment_byte_size = 24
+; TRAP-BIT-GISEL-NEXT: workgroup_fbarrier_count = 0
+; TRAP-BIT-GISEL-NEXT: wavefront_sgpr_count = 10
+; TRAP-BIT-GISEL-NEXT: workitem_vgpr_count = 2
+; TRAP-BIT-GISEL-NEXT: reserved_vgpr_first = 0
+; TRAP-BIT-GISEL-NEXT: reserved_vgpr_count = 0
+; TRAP-BIT-GISEL-NEXT: reserved_sgpr_first = 0
+; TRAP-BIT-GISEL-NEXT: reserved_sgpr_count = 0
+; TRAP-BIT-GISEL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; TRAP-BIT-GISEL-NEXT: debug_private_segment_buffer_sgpr = 0
+; TRAP-BIT-GISEL-NEXT: kernarg_segment_alignment = 4
+; TRAP-BIT-GISEL-NEXT: group_segment_alignment = 4
+; TRAP-BIT-GISEL-NEXT: private_segment_alignment = 4
+; TRAP-BIT-GISEL-NEXT: wavefront_size = 6
+; TRAP-BIT-GISEL-NEXT: call_convention = -1
+; TRAP-BIT-GISEL-NEXT: runtime_loader_kernel_symbol = 0
+; TRAP-BIT-GISEL-NEXT: .end_amd_kernel_code_t
+; TRAP-BIT-GISEL-NEXT: ; %bb.0:
+; TRAP-BIT-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; TRAP-BIT-GISEL-NEXT: v_mov_b32_e32 v0, 1
+; TRAP-BIT-GISEL-NEXT: s_mov_b32 s2, -1
+; TRAP-BIT-GISEL-NEXT: s_mov_b32 s3, 0xf000
+; TRAP-BIT-GISEL-NEXT: v_mov_b32_e32 v1, 2
+; TRAP-BIT-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; TRAP-BIT-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; TRAP-BIT-GISEL-NEXT: s_waitcnt vmcnt(0)
+; TRAP-BIT-GISEL-NEXT: buffer_store_dword v1, off, s[0:3], 0
+; TRAP-BIT-GISEL-NEXT: s_waitcnt vmcnt(0)
+; TRAP-BIT-GISEL-NEXT: s_endpgm
+;
+; NO-TRAP-BIT-SDAG-LABEL: hsa_debugtrap:
+; NO-TRAP-BIT-SDAG: .amd_kernel_code_t
+; NO-TRAP-BIT-SDAG-NEXT: amd_code_version_major = 1
+; NO-TRAP-BIT-SDAG-NEXT: amd_code_version_minor = 2
+; NO-TRAP-BIT-SDAG-NEXT: amd_machine_kind = 1
+; NO-TRAP-BIT-SDAG-NEXT: amd_machine_version_major = 6
+; NO-TRAP-BIT-SDAG-NEXT: amd_machine_version_minor = 0
+; NO-TRAP-BIT-SDAG-NEXT: amd_machine_version_stepping = 0
+; NO-TRAP-BIT-SDAG-NEXT: kernel_code_entry_byte_offset = 256
+; NO-TRAP-BIT-SDAG-NEXT: kernel_code_prefetch_byte_size = 0
+; NO-TRAP-BIT-SDAG-NEXT: granulated_workitem_vgpr_count = 0
+; NO-TRAP-BIT-SDAG-NEXT: granulated_wavefront_sgpr_count = 1
+; NO-TRAP-BIT-SDAG-NEXT: priority = 0
+; NO-TRAP-BIT-SDAG-NEXT: float_mode = 240
+; NO-TRAP-BIT-SDAG-NEXT: priv = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_dx10_clamp = 1
+; NO-TRAP-BIT-SDAG-NEXT: debug_mode = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_ieee_mode = 1
+; NO-TRAP-BIT-SDAG-NEXT: enable_wgp_mode = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_mem_ordered = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_fwd_progress = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; NO-TRAP-BIT-SDAG-NEXT: user_sgpr_count = 12
+; NO-TRAP-BIT-SDAG-NEXT: enable_trap_handler = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_workgroup_id_x = 1
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_workgroup_id_y = 1
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_workgroup_id_z = 1
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_workgroup_info = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_vgpr_workitem_id = 2
+; NO-TRAP-BIT-SDAG-NEXT: enable_exception_msb = 0
+; NO-TRAP-BIT-SDAG-NEXT: granulated_lds_size = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_exception = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_private_segment_buffer = 1
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_dispatch_ptr = 1
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_queue_ptr = 1
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_dispatch_id = 1
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_flat_scratch_init = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_private_segment_size = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_wavefront_size32 = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_ordered_append_gds = 0
+; NO-TRAP-BIT-SDAG-NEXT: private_element_size = 1
+; NO-TRAP-BIT-SDAG-NEXT: is_ptr64 = 1
+; NO-TRAP-BIT-SDAG-NEXT: is_dynamic_callstack = 0
+; NO-TRAP-BIT-SDAG-NEXT: is_debug_enabled = 0
+; NO-TRAP-BIT-SDAG-NEXT: is_xnack_enabled = 0
+; NO-TRAP-BIT-SDAG-NEXT: workitem_private_segment_byte_size = 0
+; NO-TRAP-BIT-SDAG-NEXT: workgroup_group_segment_byte_size = 0
+; NO-TRAP-BIT-SDAG-NEXT: gds_segment_byte_size = 0
+; NO-TRAP-BIT-SDAG-NEXT: kernarg_segment_byte_size = 24
+; NO-TRAP-BIT-SDAG-NEXT: workgroup_fbarrier_count = 0
+; NO-TRAP-BIT-SDAG-NEXT: wavefront_sgpr_count = 10
+; NO-TRAP-BIT-SDAG-NEXT: workitem_vgpr_count = 2
+; NO-TRAP-BIT-SDAG-NEXT: reserved_vgpr_first = 0
+; NO-TRAP-BIT-SDAG-NEXT: reserved_vgpr_count = 0
+; NO-TRAP-BIT-SDAG-NEXT: reserved_sgpr_first = 0
+; NO-TRAP-BIT-SDAG-NEXT: reserved_sgpr_count = 0
+; NO-TRAP-BIT-SDAG-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; NO-TRAP-BIT-SDAG-NEXT: debug_private_segment_buffer_sgpr = 0
+; NO-TRAP-BIT-SDAG-NEXT: kernarg_segment_alignment = 4
+; NO-TRAP-BIT-SDAG-NEXT: group_segment_alignment = 4
+; NO-TRAP-BIT-SDAG-NEXT: private_segment_alignment = 4
+; NO-TRAP-BIT-SDAG-NEXT: wavefront_size = 6
+; NO-TRAP-BIT-SDAG-NEXT: call_convention = -1
+; NO-TRAP-BIT-SDAG-NEXT: runtime_loader_kernel_symbol = 0
+; NO-TRAP-BIT-SDAG-NEXT: .end_amd_kernel_code_t
+; NO-TRAP-BIT-SDAG-NEXT: ; %bb.0:
+; NO-TRAP-BIT-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; NO-TRAP-BIT-SDAG-NEXT: s_mov_b32 s3, 0xf000
+; NO-TRAP-BIT-SDAG-NEXT: s_mov_b32 s2, -1
+; NO-TRAP-BIT-SDAG-NEXT: v_mov_b32_e32 v0, 1
+; NO-TRAP-BIT-SDAG-NEXT: v_mov_b32_e32 v1, 2
+; NO-TRAP-BIT-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; NO-TRAP-BIT-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; NO-TRAP-BIT-SDAG-NEXT: s_waitcnt vmcnt(0)
+; NO-TRAP-BIT-SDAG-NEXT: buffer_store_dword v1, off, s[0:3], 0
+; NO-TRAP-BIT-SDAG-NEXT: s_waitcnt vmcnt(0)
+; NO-TRAP-BIT-SDAG-NEXT: s_endpgm
+;
+; NO-TRAP-BIT-GISEL-LABEL: hsa_debugtrap:
+; NO-TRAP-BIT-GISEL: .amd_kernel_code_t
+; NO-TRAP-BIT-GISEL-NEXT: amd_code_version_major = 1
+; NO-TRAP-BIT-GISEL-NEXT: amd_code_version_minor = 2
+; NO-TRAP-BIT-GISEL-NEXT: amd_machine_kind = 1
+; NO-TRAP-BIT-GISEL-NEXT: amd_machine_version_major = 6
+; NO-TRAP-BIT-GISEL-NEXT: amd_machine_version_minor = 0
+; NO-TRAP-BIT-GISEL-NEXT: amd_machine_version_stepping = 0
+; NO-TRAP-BIT-GISEL-NEXT: kernel_code_entry_byte_offset = 256
+; NO-TRAP-BIT-GISEL-NEXT: kernel_code_prefetch_byte_size = 0
+; NO-TRAP-BIT-GISEL-NEXT: granulated_workitem_vgpr_count = 0
+; NO-TRAP-BIT-GISEL-NEXT: granulated_wavefront_sgpr_count = 1
+; NO-TRAP-BIT-GISEL-NEXT: priority = 0
+; NO-TRAP-BIT-GISEL-NEXT: float_mode = 240
+; NO-TRAP-BIT-GISEL-NEXT: priv = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_dx10_clamp = 1
+; NO-TRAP-BIT-GISEL-NEXT: debug_mode = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_ieee_mode = 1
+; NO-TRAP-BIT-GISEL-NEXT: enable_wgp_mode = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_mem_ordered = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_fwd_progress = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; NO-TRAP-BIT-GISEL-NEXT: user_sgpr_count = 12
+; NO-TRAP-BIT-GISEL-NEXT: enable_trap_handler = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_workgroup_id_x = 1
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_workgroup_id_y = 1
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_workgroup_id_z = 1
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_workgroup_info = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_vgpr_workitem_id = 2
+; NO-TRAP-BIT-GISEL-NEXT: enable_exception_msb = 0
+; NO-TRAP-BIT-GISEL-NEXT: granulated_lds_size = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_exception = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_private_segment_buffer = 1
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_dispatch_ptr = 1
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_queue_ptr = 1
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_dispatch_id = 1
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_flat_scratch_init = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_private_segment_size = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_wavefront_size32 = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_ordered_append_gds = 0
+; NO-TRAP-BIT-GISEL-NEXT: private_element_size = 1
+; NO-TRAP-BIT-GISEL-NEXT: is_ptr64 = 1
+; NO-TRAP-BIT-GISEL-NEXT: is_dynamic_callstack = 0
+; NO-TRAP-BIT-GISEL-NEXT: is_debug_enabled = 0
+; NO-TRAP-BIT-GISEL-NEXT: is_xnack_enabled = 0
+; NO-TRAP-BIT-GISEL-NEXT: workitem_private_segment_byte_size = 0
+; NO-TRAP-BIT-GISEL-NEXT: workgroup_group_segment_byte_size = 0
+; NO-TRAP-BIT-GISEL-NEXT: gds_segment_byte_size = 0
+; NO-TRAP-BIT-GISEL-NEXT: kernarg_segment_byte_size = 24
+; NO-TRAP-BIT-GISEL-NEXT: workgroup_fbarrier_count = 0
+; NO-TRAP-BIT-GISEL-NEXT: wavefront_sgpr_count = 10
+; NO-TRAP-BIT-GISEL-NEXT: workitem_vgpr_count = 2
+; NO-TRAP-BIT-GISEL-NEXT: reserved_vgpr_first = 0
+; NO-TRAP-BIT-GISEL-NEXT: reserved_vgpr_count = 0
+; NO-TRAP-BIT-GISEL-NEXT: reserved_sgpr_first = 0
+; NO-TRAP-BIT-GISEL-NEXT: reserved_sgpr_count = 0
+; NO-TRAP-BIT-GISEL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; NO-TRAP-BIT-GISEL-NEXT: debug_private_segment_buffer_sgpr = 0
+; NO-TRAP-BIT-GISEL-NEXT: kernarg_segment_alignment = 4
+; NO-TRAP-BIT-GISEL-NEXT: group_segment_alignment = 4
+; NO-TRAP-BIT-GISEL-NEXT: private_segment_alignment = 4
+; NO-TRAP-BIT-GISEL-NEXT: wavefront_size = 6
+; NO-TRAP-BIT-GISEL-NEXT: call_convention = -1
+; NO-TRAP-BIT-GISEL-NEXT: runtime_loader_kernel_symbol = 0
+; NO-TRAP-BIT-GISEL-NEXT: .end_amd_kernel_code_t
+; NO-TRAP-BIT-GISEL-NEXT: ; %bb.0:
+; NO-TRAP-BIT-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; NO-TRAP-BIT-GISEL-NEXT: v_mov_b32_e32 v0, 1
+; NO-TRAP-BIT-GISEL-NEXT: s_mov_b32 s2, -1
+; NO-TRAP-BIT-GISEL-NEXT: s_mov_b32 s3, 0xf000
+; NO-TRAP-BIT-GISEL-NEXT: v_mov_b32_e32 v1, 2
+; NO-TRAP-BIT-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; NO-TRAP-BIT-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; NO-TRAP-BIT-GISEL-NEXT: s_waitcnt vmcnt(0)
+; NO-TRAP-BIT-GISEL-NEXT: buffer_store_dword v1, off, s[0:3], 0
+; NO-TRAP-BIT-GISEL-NEXT: s_waitcnt vmcnt(0)
+; NO-TRAP-BIT-GISEL-NEXT: s_endpgm
+;
+; GCN-WARNING-SDAG-LABEL: hsa_debugtrap:
+; GCN-WARNING-SDAG: ; %bb.0:
+; GCN-WARNING-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-WARNING-SDAG-NEXT: s_mov_b32 s3, 0xf000
+; GCN-WARNING-SDAG-NEXT: s_mov_b32 s2, -1
+; GCN-WARNING-SDAG-NEXT: v_mov_b32_e32 v0, 1
+; GCN-WARNING-SDAG-NEXT: v_mov_b32_e32 v1, 2
+; GCN-WARNING-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-WARNING-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-WARNING-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GCN-WARNING-SDAG-NEXT: buffer_store_dword v1, off, s[0:3], 0
+; GCN-WARNING-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GCN-WARNING-SDAG-NEXT: s_endpgm
+;
+; GCN-WARNING-GISEL-LABEL: hsa_debugtrap:
+; GCN-WARNING-GISEL: ; %bb.0:
+; GCN-WARNING-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-WARNING-GISEL-NEXT: v_mov_b32_e32 v0, 1
+; GCN-WARNING-GISEL-NEXT: s_mov_b32 s2, -1
+; GCN-WARNING-GISEL-NEXT: s_mov_b32 s3, 0xf000
+; GCN-WARNING-GISEL-NEXT: v_mov_b32_e32 v1, 2
+; GCN-WARNING-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-WARNING-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-WARNING-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GCN-WARNING-GISEL-NEXT: buffer_store_dword v1, off, s[0:3], 0
+; GCN-WARNING-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GCN-WARNING-GISEL-NEXT: s_endpgm
store volatile i32 1, ptr addrspace(1) %arg0
call void @llvm.debugtrap()
store volatile i32 2, ptr addrspace(1) %arg0
@@ -84,12 +925,430 @@ define amdgpu_kernel void @hsa_debugtrap(ptr addrspace(1) nocapture readonly %ar
}
; For non-HSA path
-; GCN-LABEL: {{^}}trap:
-; TRAP-BIT: enable_trap_handler = 1
-; NO-TRAP-BIT: enable_trap_handler = 0
-; NO-HSA-TRAP: s_endpgm
-; NO-MESA-TRAP: s_endpgm
define amdgpu_kernel void @trap(ptr addrspace(1) nocapture readonly %arg0) {
+; HSA-TRAP-SDAG-LABEL: trap:
+; HSA-TRAP-SDAG: ; %bb.0:
+; HSA-TRAP-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
+; HSA-TRAP-SDAG-NEXT: s_add_i32 s12, s12, s17
+; HSA-TRAP-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; HSA-TRAP-SDAG-NEXT: s_mov_b64 s[0:1], s[6:7]
+; HSA-TRAP-SDAG-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x0
+; HSA-TRAP-SDAG-NEXT: v_mov_b32_e32 v2, 1
+; HSA-TRAP-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-TRAP-SDAG-NEXT: v_mov_b32_e32 v0, s2
+; HSA-TRAP-SDAG-NEXT: v_mov_b32_e32 v1, s3
+; HSA-TRAP-SDAG-NEXT: flat_store_dword v[0:1], v2
+; HSA-TRAP-SDAG-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-SDAG-NEXT: s_trap 2
+;
+; HSA-TRAP-GISEL-LABEL: trap:
+; HSA-TRAP-GISEL: ; %bb.0:
+; HSA-TRAP-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
+; HSA-TRAP-GISEL-NEXT: s_add_i32 s12, s12, s17
+; HSA-TRAP-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; HSA-TRAP-GISEL-NEXT: s_mov_b64 s[0:1], s[6:7]
+; HSA-TRAP-GISEL-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x0
+; HSA-TRAP-GISEL-NEXT: v_mov_b32_e32 v2, 1
+; HSA-TRAP-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-TRAP-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; HSA-TRAP-GISEL-NEXT: v_mov_b32_e32 v1, s3
+; HSA-TRAP-GISEL-NEXT: flat_store_dword v[0:1], v2
+; HSA-TRAP-GISEL-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-GISEL-NEXT: s_trap 2
+;
+; NO-HSA-TRAP-SDAG-LABEL: trap:
+; NO-HSA-TRAP-SDAG: ; %bb.0:
+; NO-HSA-TRAP-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
+; NO-HSA-TRAP-SDAG-NEXT: s_add_i32 s12, s12, s17
+; NO-HSA-TRAP-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; NO-HSA-TRAP-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; NO-HSA-TRAP-SDAG-NEXT: v_mov_b32_e32 v2, 1
+; NO-HSA-TRAP-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; NO-HSA-TRAP-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; NO-HSA-TRAP-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; NO-HSA-TRAP-SDAG-NEXT: flat_store_dword v[0:1], v2
+; NO-HSA-TRAP-SDAG-NEXT: s_waitcnt vmcnt(0)
+; NO-HSA-TRAP-SDAG-NEXT: s_endpgm
+;
+; NO-HSA-TRAP-GISEL-LABEL: trap:
+; NO-HSA-TRAP-GISEL: ; %bb.0:
+; NO-HSA-TRAP-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
+; NO-HSA-TRAP-GISEL-NEXT: s_add_i32 s12, s12, s17
+; NO-HSA-TRAP-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; NO-HSA-TRAP-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; NO-HSA-TRAP-GISEL-NEXT: v_mov_b32_e32 v2, 1
+; NO-HSA-TRAP-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; NO-HSA-TRAP-GISEL-NEXT: v_mov_b32_e32 v0, s0
+; NO-HSA-TRAP-GISEL-NEXT: v_mov_b32_e32 v1, s1
+; NO-HSA-TRAP-GISEL-NEXT: flat_store_dword v[0:1], v2
+; NO-HSA-TRAP-GISEL-NEXT: s_waitcnt vmcnt(0)
+; NO-HSA-TRAP-GISEL-NEXT: s_endpgm
+;
+; HSA-WARNING-SDAG-LABEL: trap:
+; HSA-WARNING-SDAG: ; %bb.0:
+; HSA-WARNING-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
+; HSA-WARNING-SDAG-NEXT: s_add_i32 s12, s12, s17
+; HSA-WARNING-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; HSA-WARNING-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; HSA-WARNING-SDAG-NEXT: v_mov_b32_e32 v2, 1
+; HSA-WARNING-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-WARNING-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; HSA-WARNING-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; HSA-WARNING-SDAG-NEXT: flat_store_dword v[0:1], v2
+; HSA-WARNING-SDAG-NEXT: s_waitcnt vmcnt(0)
+; HSA-WARNING-SDAG-NEXT: s_endpgm
+;
+; HSA-WARNING-GISEL-LABEL: trap:
+; HSA-WARNING-GISEL: ; %bb.0:
+; HSA-WARNING-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
+; HSA-WARNING-GISEL-NEXT: s_add_i32 s12, s12, s17
+; HSA-WARNING-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; HSA-WARNING-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; HSA-WARNING-GISEL-NEXT: v_mov_b32_e32 v2, 1
+; HSA-WARNING-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-WARNING-GISEL-NEXT: v_mov_b32_e32 v0, s0
+; HSA-WARNING-GISEL-NEXT: v_mov_b32_e32 v1, s1
+; HSA-WARNING-GISEL-NEXT: flat_store_dword v[0:1], v2
+; HSA-WARNING-GISEL-NEXT: s_waitcnt vmcnt(0)
+; HSA-WARNING-GISEL-NEXT: s_endpgm
+;
+; TRAP-BIT-SDAG-LABEL: trap:
+; TRAP-BIT-SDAG: .amd_kernel_code_t
+; TRAP-BIT-SDAG-NEXT: amd_code_version_major = 1
+; TRAP-BIT-SDAG-NEXT: amd_code_version_minor = 2
+; TRAP-BIT-SDAG-NEXT: amd_machine_kind = 1
+; TRAP-BIT-SDAG-NEXT: amd_machine_version_major = 6
+; TRAP-BIT-SDAG-NEXT: amd_machine_version_minor = 0
+; TRAP-BIT-SDAG-NEXT: amd_machine_version_stepping = 0
+; TRAP-BIT-SDAG-NEXT: kernel_code_entry_byte_offset = 256
+; TRAP-BIT-SDAG-NEXT: kernel_code_prefetch_byte_size = 0
+; TRAP-BIT-SDAG-NEXT: granulated_workitem_vgpr_count = 0
+; TRAP-BIT-SDAG-NEXT: granulated_wavefront_sgpr_count = 1
+; TRAP-BIT-SDAG-NEXT: priority = 0
+; TRAP-BIT-SDAG-NEXT: float_mode = 240
+; TRAP-BIT-SDAG-NEXT: priv = 0
+; TRAP-BIT-SDAG-NEXT: enable_dx10_clamp = 1
+; TRAP-BIT-SDAG-NEXT: debug_mode = 0
+; TRAP-BIT-SDAG-NEXT: enable_ieee_mode = 1
+; TRAP-BIT-SDAG-NEXT: enable_wgp_mode = 0
+; TRAP-BIT-SDAG-NEXT: enable_mem_ordered = 0
+; TRAP-BIT-SDAG-NEXT: enable_fwd_progress = 0
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; TRAP-BIT-SDAG-NEXT: user_sgpr_count = 12
+; TRAP-BIT-SDAG-NEXT: enable_trap_handler = 1
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_workgroup_id_x = 1
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_workgroup_id_y = 1
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_workgroup_id_z = 1
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_workgroup_info = 0
+; TRAP-BIT-SDAG-NEXT: enable_vgpr_workitem_id = 2
+; TRAP-BIT-SDAG-NEXT: enable_exception_msb = 0
+; TRAP-BIT-SDAG-NEXT: granulated_lds_size = 0
+; TRAP-BIT-SDAG-NEXT: enable_exception = 0
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_private_segment_buffer = 1
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_dispatch_ptr = 1
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_queue_ptr = 1
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_dispatch_id = 1
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_flat_scratch_init = 0
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_private_segment_size = 0
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; TRAP-BIT-SDAG-NEXT: enable_wavefront_size32 = 0
+; TRAP-BIT-SDAG-NEXT: enable_ordered_append_gds = 0
+; TRAP-BIT-SDAG-NEXT: private_element_size = 1
+; TRAP-BIT-SDAG-NEXT: is_ptr64 = 1
+; TRAP-BIT-SDAG-NEXT: is_dynamic_callstack = 0
+; TRAP-BIT-SDAG-NEXT: is_debug_enabled = 0
+; TRAP-BIT-SDAG-NEXT: is_xnack_enabled = 0
+; TRAP-BIT-SDAG-NEXT: workitem_private_segment_byte_size = 0
+; TRAP-BIT-SDAG-NEXT: workgroup_group_segment_byte_size = 0
+; TRAP-BIT-SDAG-NEXT: gds_segment_byte_size = 0
+; TRAP-BIT-SDAG-NEXT: kernarg_segment_byte_size = 24
+; TRAP-BIT-SDAG-NEXT: workgroup_fbarrier_count = 0
+; TRAP-BIT-SDAG-NEXT: wavefront_sgpr_count = 10
+; TRAP-BIT-SDAG-NEXT: workitem_vgpr_count = 1
+; TRAP-BIT-SDAG-NEXT: reserved_vgpr_first = 0
+; TRAP-BIT-SDAG-NEXT: reserved_vgpr_count = 0
+; TRAP-BIT-SDAG-NEXT: reserved_sgpr_first = 0
+; TRAP-BIT-SDAG-NEXT: reserved_sgpr_count = 0
+; TRAP-BIT-SDAG-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; TRAP-BIT-SDAG-NEXT: debug_private_segment_buffer_sgpr = 0
+; TRAP-BIT-SDAG-NEXT: kernarg_segment_alignment = 4
+; TRAP-BIT-SDAG-NEXT: group_segment_alignment = 4
+; TRAP-BIT-SDAG-NEXT: private_segment_alignment = 4
+; TRAP-BIT-SDAG-NEXT: wavefront_size = 6
+; TRAP-BIT-SDAG-NEXT: call_convention = -1
+; TRAP-BIT-SDAG-NEXT: runtime_loader_kernel_symbol = 0
+; TRAP-BIT-SDAG-NEXT: .end_amd_kernel_code_t
+; TRAP-BIT-SDAG-NEXT: ; %bb.0:
+; TRAP-BIT-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; TRAP-BIT-SDAG-NEXT: s_mov_b32 s3, 0xf000
+; TRAP-BIT-SDAG-NEXT: s_mov_b32 s2, -1
+; TRAP-BIT-SDAG-NEXT: v_mov_b32_e32 v0, 1
+; TRAP-BIT-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; TRAP-BIT-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; TRAP-BIT-SDAG-NEXT: s_waitcnt vmcnt(0)
+; TRAP-BIT-SDAG-NEXT: s_endpgm
+;
+; TRAP-BIT-GISEL-LABEL: trap:
+; TRAP-BIT-GISEL: .amd_kernel_code_t
+; TRAP-BIT-GISEL-NEXT: amd_code_version_major = 1
+; TRAP-BIT-GISEL-NEXT: amd_code_version_minor = 2
+; TRAP-BIT-GISEL-NEXT: amd_machine_kind = 1
+; TRAP-BIT-GISEL-NEXT: amd_machine_version_major = 6
+; TRAP-BIT-GISEL-NEXT: amd_machine_version_minor = 0
+; TRAP-BIT-GISEL-NEXT: amd_machine_version_stepping = 0
+; TRAP-BIT-GISEL-NEXT: kernel_code_entry_byte_offset = 256
+; TRAP-BIT-GISEL-NEXT: kernel_code_prefetch_byte_size = 0
+; TRAP-BIT-GISEL-NEXT: granulated_workitem_vgpr_count = 0
+; TRAP-BIT-GISEL-NEXT: granulated_wavefront_sgpr_count = 1
+; TRAP-BIT-GISEL-NEXT: priority = 0
+; TRAP-BIT-GISEL-NEXT: float_mode = 240
+; TRAP-BIT-GISEL-NEXT: priv = 0
+; TRAP-BIT-GISEL-NEXT: enable_dx10_clamp = 1
+; TRAP-BIT-GISEL-NEXT: debug_mode = 0
+; TRAP-BIT-GISEL-NEXT: enable_ieee_mode = 1
+; TRAP-BIT-GISEL-NEXT: enable_wgp_mode = 0
+; TRAP-BIT-GISEL-NEXT: enable_mem_ordered = 0
+; TRAP-BIT-GISEL-NEXT: enable_fwd_progress = 0
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; TRAP-BIT-GISEL-NEXT: user_sgpr_count = 12
+; TRAP-BIT-GISEL-NEXT: enable_trap_handler = 1
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_workgroup_id_x = 1
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_workgroup_id_y = 1
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_workgroup_id_z = 1
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_workgroup_info = 0
+; TRAP-BIT-GISEL-NEXT: enable_vgpr_workitem_id = 2
+; TRAP-BIT-GISEL-NEXT: enable_exception_msb = 0
+; TRAP-BIT-GISEL-NEXT: granulated_lds_size = 0
+; TRAP-BIT-GISEL-NEXT: enable_exception = 0
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_private_segment_buffer = 1
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_dispatch_ptr = 1
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_queue_ptr = 1
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_dispatch_id = 1
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_flat_scratch_init = 0
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_private_segment_size = 0
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; TRAP-BIT-GISEL-NEXT: enable_wavefront_size32 = 0
+; TRAP-BIT-GISEL-NEXT: enable_ordered_append_gds = 0
+; TRAP-BIT-GISEL-NEXT: private_element_size = 1
+; TRAP-BIT-GISEL-NEXT: is_ptr64 = 1
+; TRAP-BIT-GISEL-NEXT: is_dynamic_callstack = 0
+; TRAP-BIT-GISEL-NEXT: is_debug_enabled = 0
+; TRAP-BIT-GISEL-NEXT: is_xnack_enabled = 0
+; TRAP-BIT-GISEL-NEXT: workitem_private_segment_byte_size = 0
+; TRAP-BIT-GISEL-NEXT: workgroup_group_segment_byte_size = 0
+; TRAP-BIT-GISEL-NEXT: gds_segment_byte_size = 0
+; TRAP-BIT-GISEL-NEXT: kernarg_segment_byte_size = 24
+; TRAP-BIT-GISEL-NEXT: workgroup_fbarrier_count = 0
+; TRAP-BIT-GISEL-NEXT: wavefront_sgpr_count = 10
+; TRAP-BIT-GISEL-NEXT: workitem_vgpr_count = 1
+; TRAP-BIT-GISEL-NEXT: reserved_vgpr_first = 0
+; TRAP-BIT-GISEL-NEXT: reserved_vgpr_count = 0
+; TRAP-BIT-GISEL-NEXT: reserved_sgpr_first = 0
+; TRAP-BIT-GISEL-NEXT: reserved_sgpr_count = 0
+; TRAP-BIT-GISEL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; TRAP-BIT-GISEL-NEXT: debug_private_segment_buffer_sgpr = 0
+; TRAP-BIT-GISEL-NEXT: kernarg_segment_alignment = 4
+; TRAP-BIT-GISEL-NEXT: group_segment_alignment = 4
+; TRAP-BIT-GISEL-NEXT: private_segment_alignment = 4
+; TRAP-BIT-GISEL-NEXT: wavefront_size = 6
+; TRAP-BIT-GISEL-NEXT: call_convention = -1
+; TRAP-BIT-GISEL-NEXT: runtime_loader_kernel_symbol = 0
+; TRAP-BIT-GISEL-NEXT: .end_amd_kernel_code_t
+; TRAP-BIT-GISEL-NEXT: ; %bb.0:
+; TRAP-BIT-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; TRAP-BIT-GISEL-NEXT: v_mov_b32_e32 v0, 1
+; TRAP-BIT-GISEL-NEXT: s_mov_b32 s2, -1
+; TRAP-BIT-GISEL-NEXT: s_mov_b32 s3, 0xf000
+; TRAP-BIT-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; TRAP-BIT-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; TRAP-BIT-GISEL-NEXT: s_waitcnt vmcnt(0)
+; TRAP-BIT-GISEL-NEXT: s_endpgm
+;
+; NO-TRAP-BIT-SDAG-LABEL: trap:
+; NO-TRAP-BIT-SDAG: .amd_kernel_code_t
+; NO-TRAP-BIT-SDAG-NEXT: amd_code_version_major = 1
+; NO-TRAP-BIT-SDAG-NEXT: amd_code_version_minor = 2
+; NO-TRAP-BIT-SDAG-NEXT: amd_machine_kind = 1
+; NO-TRAP-BIT-SDAG-NEXT: amd_machine_version_major = 6
+; NO-TRAP-BIT-SDAG-NEXT: amd_machine_version_minor = 0
+; NO-TRAP-BIT-SDAG-NEXT: amd_machine_version_stepping = 0
+; NO-TRAP-BIT-SDAG-NEXT: kernel_code_entry_byte_offset = 256
+; NO-TRAP-BIT-SDAG-NEXT: kernel_code_prefetch_byte_size = 0
+; NO-TRAP-BIT-SDAG-NEXT: granulated_workitem_vgpr_count = 0
+; NO-TRAP-BIT-SDAG-NEXT: granulated_wavefront_sgpr_count = 1
+; NO-TRAP-BIT-SDAG-NEXT: priority = 0
+; NO-TRAP-BIT-SDAG-NEXT: float_mode = 240
+; NO-TRAP-BIT-SDAG-NEXT: priv = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_dx10_clamp = 1
+; NO-TRAP-BIT-SDAG-NEXT: debug_mode = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_ieee_mode = 1
+; NO-TRAP-BIT-SDAG-NEXT: enable_wgp_mode = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_mem_ordered = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_fwd_progress = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; NO-TRAP-BIT-SDAG-NEXT: user_sgpr_count = 12
+; NO-TRAP-BIT-SDAG-NEXT: enable_trap_handler = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_workgroup_id_x = 1
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_workgroup_id_y = 1
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_workgroup_id_z = 1
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_workgroup_info = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_vgpr_workitem_id = 2
+; NO-TRAP-BIT-SDAG-NEXT: enable_exception_msb = 0
+; NO-TRAP-BIT-SDAG-NEXT: granulated_lds_size = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_exception = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_private_segment_buffer = 1
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_dispatch_ptr = 1
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_queue_ptr = 1
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_dispatch_id = 1
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_flat_scratch_init = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_private_segment_size = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_wavefront_size32 = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_ordered_append_gds = 0
+; NO-TRAP-BIT-SDAG-NEXT: private_element_size = 1
+; NO-TRAP-BIT-SDAG-NEXT: is_ptr64 = 1
+; NO-TRAP-BIT-SDAG-NEXT: is_dynamic_callstack = 0
+; NO-TRAP-BIT-SDAG-NEXT: is_debug_enabled = 0
+; NO-TRAP-BIT-SDAG-NEXT: is_xnack_enabled = 0
+; NO-TRAP-BIT-SDAG-NEXT: workitem_private_segment_byte_size = 0
+; NO-TRAP-BIT-SDAG-NEXT: workgroup_group_segment_byte_size = 0
+; NO-TRAP-BIT-SDAG-NEXT: gds_segment_byte_size = 0
+; NO-TRAP-BIT-SDAG-NEXT: kernarg_segment_byte_size = 24
+; NO-TRAP-BIT-SDAG-NEXT: workgroup_fbarrier_count = 0
+; NO-TRAP-BIT-SDAG-NEXT: wavefront_sgpr_count = 10
+; NO-TRAP-BIT-SDAG-NEXT: workitem_vgpr_count = 1
+; NO-TRAP-BIT-SDAG-NEXT: reserved_vgpr_first = 0
+; NO-TRAP-BIT-SDAG-NEXT: reserved_vgpr_count = 0
+; NO-TRAP-BIT-SDAG-NEXT: reserved_sgpr_first = 0
+; NO-TRAP-BIT-SDAG-NEXT: reserved_sgpr_count = 0
+; NO-TRAP-BIT-SDAG-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; NO-TRAP-BIT-SDAG-NEXT: debug_private_segment_buffer_sgpr = 0
+; NO-TRAP-BIT-SDAG-NEXT: kernarg_segment_alignment = 4
+; NO-TRAP-BIT-SDAG-NEXT: group_segment_alignment = 4
+; NO-TRAP-BIT-SDAG-NEXT: private_segment_alignment = 4
+; NO-TRAP-BIT-SDAG-NEXT: wavefront_size = 6
+; NO-TRAP-BIT-SDAG-NEXT: call_convention = -1
+; NO-TRAP-BIT-SDAG-NEXT: runtime_loader_kernel_symbol = 0
+; NO-TRAP-BIT-SDAG-NEXT: .end_amd_kernel_code_t
+; NO-TRAP-BIT-SDAG-NEXT: ; %bb.0:
+; NO-TRAP-BIT-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; NO-TRAP-BIT-SDAG-NEXT: s_mov_b32 s3, 0xf000
+; NO-TRAP-BIT-SDAG-NEXT: s_mov_b32 s2, -1
+; NO-TRAP-BIT-SDAG-NEXT: v_mov_b32_e32 v0, 1
+; NO-TRAP-BIT-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; NO-TRAP-BIT-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; NO-TRAP-BIT-SDAG-NEXT: s_waitcnt vmcnt(0)
+; NO-TRAP-BIT-SDAG-NEXT: s_endpgm
+;
+; NO-TRAP-BIT-GISEL-LABEL: trap:
+; NO-TRAP-BIT-GISEL: .amd_kernel_code_t
+; NO-TRAP-BIT-GISEL-NEXT: amd_code_version_major = 1
+; NO-TRAP-BIT-GISEL-NEXT: amd_code_version_minor = 2
+; NO-TRAP-BIT-GISEL-NEXT: amd_machine_kind = 1
+; NO-TRAP-BIT-GISEL-NEXT: amd_machine_version_major = 6
+; NO-TRAP-BIT-GISEL-NEXT: amd_machine_version_minor = 0
+; NO-TRAP-BIT-GISEL-NEXT: amd_machine_version_stepping = 0
+; NO-TRAP-BIT-GISEL-NEXT: kernel_code_entry_byte_offset = 256
+; NO-TRAP-BIT-GISEL-NEXT: kernel_code_prefetch_byte_size = 0
+; NO-TRAP-BIT-GISEL-NEXT: granulated_workitem_vgpr_count = 0
+; NO-TRAP-BIT-GISEL-NEXT: granulated_wavefront_sgpr_count = 1
+; NO-TRAP-BIT-GISEL-NEXT: priority = 0
+; NO-TRAP-BIT-GISEL-NEXT: float_mode = 240
+; NO-TRAP-BIT-GISEL-NEXT: priv = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_dx10_clamp = 1
+; NO-TRAP-BIT-GISEL-NEXT: debug_mode = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_ieee_mode = 1
+; NO-TRAP-BIT-GISEL-NEXT: enable_wgp_mode = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_mem_ordered = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_fwd_progress = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; NO-TRAP-BIT-GISEL-NEXT: user_sgpr_count = 12
+; NO-TRAP-BIT-GISEL-NEXT: enable_trap_handler = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_workgroup_id_x = 1
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_workgroup_id_y = 1
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_workgroup_id_z = 1
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_workgroup_info = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_vgpr_workitem_id = 2
+; NO-TRAP-BIT-GISEL-NEXT: enable_exception_msb = 0
+; NO-TRAP-BIT-GISEL-NEXT: granulated_lds_size = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_exception = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_private_segment_buffer = 1
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_dispatch_ptr = 1
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_queue_ptr = 1
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_dispatch_id = 1
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_flat_scratch_init = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_private_segment_size = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_wavefront_size32 = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_ordered_append_gds = 0
+; NO-TRAP-BIT-GISEL-NEXT: private_element_size = 1
+; NO-TRAP-BIT-GISEL-NEXT: is_ptr64 = 1
+; NO-TRAP-BIT-GISEL-NEXT: is_dynamic_callstack = 0
+; NO-TRAP-BIT-GISEL-NEXT: is_debug_enabled = 0
+; NO-TRAP-BIT-GISEL-NEXT: is_xnack_enabled = 0
+; NO-TRAP-BIT-GISEL-NEXT: workitem_private_segment_byte_size = 0
+; NO-TRAP-BIT-GISEL-NEXT: workgroup_group_segment_byte_size = 0
+; NO-TRAP-BIT-GISEL-NEXT: gds_segment_byte_size = 0
+; NO-TRAP-BIT-GISEL-NEXT: kernarg_segment_byte_size = 24
+; NO-TRAP-BIT-GISEL-NEXT: workgroup_fbarrier_count = 0
+; NO-TRAP-BIT-GISEL-NEXT: wavefront_sgpr_count = 10
+; NO-TRAP-BIT-GISEL-NEXT: workitem_vgpr_count = 1
+; NO-TRAP-BIT-GISEL-NEXT: reserved_vgpr_first = 0
+; NO-TRAP-BIT-GISEL-NEXT: reserved_vgpr_count = 0
+; NO-TRAP-BIT-GISEL-NEXT: reserved_sgpr_first = 0
+; NO-TRAP-BIT-GISEL-NEXT: reserved_sgpr_count = 0
+; NO-TRAP-BIT-GISEL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; NO-TRAP-BIT-GISEL-NEXT: debug_private_segment_buffer_sgpr = 0
+; NO-TRAP-BIT-GISEL-NEXT: kernarg_segment_alignment = 4
+; NO-TRAP-BIT-GISEL-NEXT: group_segment_alignment = 4
+; NO-TRAP-BIT-GISEL-NEXT: private_segment_alignment = 4
+; NO-TRAP-BIT-GISEL-NEXT: wavefront_size = 6
+; NO-TRAP-BIT-GISEL-NEXT: call_convention = -1
+; NO-TRAP-BIT-GISEL-NEXT: runtime_loader_kernel_symbol = 0
+; NO-TRAP-BIT-GISEL-NEXT: .end_amd_kernel_code_t
+; NO-TRAP-BIT-GISEL-NEXT: ; %bb.0:
+; NO-TRAP-BIT-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; NO-TRAP-BIT-GISEL-NEXT: v_mov_b32_e32 v0, 1
+; NO-TRAP-BIT-GISEL-NEXT: s_mov_b32 s2, -1
+; NO-TRAP-BIT-GISEL-NEXT: s_mov_b32 s3, 0xf000
+; NO-TRAP-BIT-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; NO-TRAP-BIT-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; NO-TRAP-BIT-GISEL-NEXT: s_waitcnt vmcnt(0)
+; NO-TRAP-BIT-GISEL-NEXT: s_endpgm
+;
+; GCN-WARNING-SDAG-LABEL: trap:
+; GCN-WARNING-SDAG: ; %bb.0:
+; GCN-WARNING-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-WARNING-SDAG-NEXT: s_mov_b32 s3, 0xf000
+; GCN-WARNING-SDAG-NEXT: s_mov_b32 s2, -1
+; GCN-WARNING-SDAG-NEXT: v_mov_b32_e32 v0, 1
+; GCN-WARNING-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-WARNING-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-WARNING-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GCN-WARNING-SDAG-NEXT: s_endpgm
+;
+; GCN-WARNING-GISEL-LABEL: trap:
+; GCN-WARNING-GISEL: ; %bb.0:
+; GCN-WARNING-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-WARNING-GISEL-NEXT: v_mov_b32_e32 v0, 1
+; GCN-WARNING-GISEL-NEXT: s_mov_b32 s2, -1
+; GCN-WARNING-GISEL-NEXT: s_mov_b32 s3, 0xf000
+; GCN-WARNING-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-WARNING-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-WARNING-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GCN-WARNING-GISEL-NEXT: s_endpgm
store volatile i32 1, ptr addrspace(1) %arg0
call void @llvm.trap()
unreachable
@@ -97,14 +1356,532 @@ define amdgpu_kernel void @trap(ptr addrspace(1) nocapture readonly %arg0) {
ret void
}
-; GCN-LABEL: {{^}}non_entry_trap:
-; TRAP-BIT: enable_trap_handler = 1
-; NO-TRAP-BIT: enable_trap_handler = 0
-
-; HSA-TRAP: BB{{[0-9]_[0-9]+}}: ; %trap
-; HSA-TRAP: s_mov_b64 s[0:1], s[6:7]
-; HSA-TRAP-NEXT: s_trap 2
define amdgpu_kernel void @non_entry_trap(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr {
+; HSA-TRAP-SDAG-LABEL: non_entry_trap:
+; HSA-TRAP-SDAG: ; %bb.0: ; %entry
+; HSA-TRAP-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
+; HSA-TRAP-SDAG-NEXT: s_add_i32 s12, s12, s17
+; HSA-TRAP-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; HSA-TRAP-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; HSA-TRAP-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-TRAP-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; HSA-TRAP-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; HSA-TRAP-SDAG-NEXT: flat_load_dword v0, v[0:1] glc
+; HSA-TRAP-SDAG-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, -1, v0
+; HSA-TRAP-SDAG-NEXT: s_cbranch_vccz .LBB3_2
+; HSA-TRAP-SDAG-NEXT: ; %bb.1: ; %ret
+; HSA-TRAP-SDAG-NEXT: v_mov_b32_e32 v2, 3
+; HSA-TRAP-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; HSA-TRAP-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; HSA-TRAP-SDAG-NEXT: flat_store_dword v[0:1], v2
+; HSA-TRAP-SDAG-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-SDAG-NEXT: s_endpgm
+; HSA-TRAP-SDAG-NEXT: .LBB3_2: ; %trap
+; HSA-TRAP-SDAG-NEXT: s_mov_b64 s[0:1], s[6:7]
+; HSA-TRAP-SDAG-NEXT: s_trap 2
+;
+; HSA-TRAP-GISEL-LABEL: non_entry_trap:
+; HSA-TRAP-GISEL: ; %bb.0: ; %entry
+; HSA-TRAP-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
+; HSA-TRAP-GISEL-NEXT: s_add_i32 s12, s12, s17
+; HSA-TRAP-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; HSA-TRAP-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; HSA-TRAP-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-TRAP-GISEL-NEXT: v_mov_b32_e32 v0, s0
+; HSA-TRAP-GISEL-NEXT: v_mov_b32_e32 v1, s1
+; HSA-TRAP-GISEL-NEXT: flat_load_dword v0, v[0:1] glc
+; HSA-TRAP-GISEL-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-GISEL-NEXT: v_readfirstlane_b32 s2, v0
+; HSA-TRAP-GISEL-NEXT: s_cmp_eq_u32 s2, -1
+; HSA-TRAP-GISEL-NEXT: s_cbranch_scc0 .LBB3_2
+; HSA-TRAP-GISEL-NEXT: ; %bb.1: ; %ret
+; HSA-TRAP-GISEL-NEXT: v_mov_b32_e32 v2, 3
+; HSA-TRAP-GISEL-NEXT: v_mov_b32_e32 v0, s0
+; HSA-TRAP-GISEL-NEXT: v_mov_b32_e32 v1, s1
+; HSA-TRAP-GISEL-NEXT: flat_store_dword v[0:1], v2
+; HSA-TRAP-GISEL-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-GISEL-NEXT: s_endpgm
+; HSA-TRAP-GISEL-NEXT: .LBB3_2: ; %trap
+; HSA-TRAP-GISEL-NEXT: s_mov_b64 s[0:1], s[6:7]
+; HSA-TRAP-GISEL-NEXT: s_trap 2
+;
+; NO-HSA-TRAP-SDAG-LABEL: non_entry_trap:
+; NO-HSA-TRAP-SDAG: ; %bb.0: ; %entry
+; NO-HSA-TRAP-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
+; NO-HSA-TRAP-SDAG-NEXT: s_add_i32 s12, s12, s17
+; NO-HSA-TRAP-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; NO-HSA-TRAP-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; NO-HSA-TRAP-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; NO-HSA-TRAP-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; NO-HSA-TRAP-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; NO-HSA-TRAP-SDAG-NEXT: flat_load_dword v0, v[0:1] glc
+; NO-HSA-TRAP-SDAG-NEXT: s_waitcnt vmcnt(0)
+; NO-HSA-TRAP-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, -1, v0
+; NO-HSA-TRAP-SDAG-NEXT: s_cbranch_vccz .LBB3_2
+; NO-HSA-TRAP-SDAG-NEXT: ; %bb.1: ; %ret
+; NO-HSA-TRAP-SDAG-NEXT: v_mov_b32_e32 v2, 3
+; NO-HSA-TRAP-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; NO-HSA-TRAP-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; NO-HSA-TRAP-SDAG-NEXT: flat_store_dword v[0:1], v2
+; NO-HSA-TRAP-SDAG-NEXT: s_waitcnt vmcnt(0)
+; NO-HSA-TRAP-SDAG-NEXT: s_endpgm
+; NO-HSA-TRAP-SDAG-NEXT: .LBB3_2: ; %trap
+; NO-HSA-TRAP-SDAG-NEXT: s_endpgm
+;
+; NO-HSA-TRAP-GISEL-LABEL: non_entry_trap:
+; NO-HSA-TRAP-GISEL: ; %bb.0: ; %entry
+; NO-HSA-TRAP-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
+; NO-HSA-TRAP-GISEL-NEXT: s_add_i32 s12, s12, s17
+; NO-HSA-TRAP-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; NO-HSA-TRAP-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; NO-HSA-TRAP-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; NO-HSA-TRAP-GISEL-NEXT: v_mov_b32_e32 v0, s0
+; NO-HSA-TRAP-GISEL-NEXT: v_mov_b32_e32 v1, s1
+; NO-HSA-TRAP-GISEL-NEXT: flat_load_dword v0, v[0:1] glc
+; NO-HSA-TRAP-GISEL-NEXT: s_waitcnt vmcnt(0)
+; NO-HSA-TRAP-GISEL-NEXT: v_readfirstlane_b32 s2, v0
+; NO-HSA-TRAP-GISEL-NEXT: s_cmp_eq_u32 s2, -1
+; NO-HSA-TRAP-GISEL-NEXT: s_cbranch_scc0 .LBB3_2
+; NO-HSA-TRAP-GISEL-NEXT: ; %bb.1: ; %ret
+; NO-HSA-TRAP-GISEL-NEXT: v_mov_b32_e32 v2, 3
+; NO-HSA-TRAP-GISEL-NEXT: v_mov_b32_e32 v0, s0
+; NO-HSA-TRAP-GISEL-NEXT: v_mov_b32_e32 v1, s1
+; NO-HSA-TRAP-GISEL-NEXT: flat_store_dword v[0:1], v2
+; NO-HSA-TRAP-GISEL-NEXT: s_waitcnt vmcnt(0)
+; NO-HSA-TRAP-GISEL-NEXT: s_endpgm
+; NO-HSA-TRAP-GISEL-NEXT: .LBB3_2: ; %trap
+; NO-HSA-TRAP-GISEL-NEXT: s_endpgm
+;
+; HSA-WARNING-SDAG-LABEL: non_entry_trap:
+; HSA-WARNING-SDAG: ; %bb.0: ; %entry
+; HSA-WARNING-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
+; HSA-WARNING-SDAG-NEXT: s_add_i32 s12, s12, s17
+; HSA-WARNING-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; HSA-WARNING-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; HSA-WARNING-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-WARNING-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; HSA-WARNING-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; HSA-WARNING-SDAG-NEXT: flat_load_dword v0, v[0:1] glc
+; HSA-WARNING-SDAG-NEXT: s_waitcnt vmcnt(0)
+; HSA-WARNING-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, -1, v0
+; HSA-WARNING-SDAG-NEXT: s_cbranch_vccz .LBB3_2
+; HSA-WARNING-SDAG-NEXT: ; %bb.1: ; %ret
+; HSA-WARNING-SDAG-NEXT: v_mov_b32_e32 v2, 3
+; HSA-WARNING-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; HSA-WARNING-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; HSA-WARNING-SDAG-NEXT: flat_store_dword v[0:1], v2
+; HSA-WARNING-SDAG-NEXT: s_waitcnt vmcnt(0)
+; HSA-WARNING-SDAG-NEXT: s_endpgm
+; HSA-WARNING-SDAG-NEXT: .LBB3_2: ; %trap
+; HSA-WARNING-SDAG-NEXT: s_endpgm
+;
+; HSA-WARNING-GISEL-LABEL: non_entry_trap:
+; HSA-WARNING-GISEL: ; %bb.0: ; %entry
+; HSA-WARNING-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
+; HSA-WARNING-GISEL-NEXT: s_add_i32 s12, s12, s17
+; HSA-WARNING-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; HSA-WARNING-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; HSA-WARNING-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-WARNING-GISEL-NEXT: v_mov_b32_e32 v0, s0
+; HSA-WARNING-GISEL-NEXT: v_mov_b32_e32 v1, s1
+; HSA-WARNING-GISEL-NEXT: flat_load_dword v0, v[0:1] glc
+; HSA-WARNING-GISEL-NEXT: s_waitcnt vmcnt(0)
+; HSA-WARNING-GISEL-NEXT: v_readfirstlane_b32 s2, v0
+; HSA-WARNING-GISEL-NEXT: s_cmp_eq_u32 s2, -1
+; HSA-WARNING-GISEL-NEXT: s_cbranch_scc0 .LBB3_2
+; HSA-WARNING-GISEL-NEXT: ; %bb.1: ; %ret
+; HSA-WARNING-GISEL-NEXT: v_mov_b32_e32 v2, 3
+; HSA-WARNING-GISEL-NEXT: v_mov_b32_e32 v0, s0
+; HSA-WARNING-GISEL-NEXT: v_mov_b32_e32 v1, s1
+; HSA-WARNING-GISEL-NEXT: flat_store_dword v[0:1], v2
+; HSA-WARNING-GISEL-NEXT: s_waitcnt vmcnt(0)
+; HSA-WARNING-GISEL-NEXT: s_endpgm
+; HSA-WARNING-GISEL-NEXT: .LBB3_2: ; %trap
+; HSA-WARNING-GISEL-NEXT: s_endpgm
+;
+; TRAP-BIT-SDAG-LABEL: non_entry_trap:
+; TRAP-BIT-SDAG: .amd_kernel_code_t
+; TRAP-BIT-SDAG-NEXT: amd_code_version_major = 1
+; TRAP-BIT-SDAG-NEXT: amd_code_version_minor = 2
+; TRAP-BIT-SDAG-NEXT: amd_machine_kind = 1
+; TRAP-BIT-SDAG-NEXT: amd_machine_version_major = 6
+; TRAP-BIT-SDAG-NEXT: amd_machine_version_minor = 0
+; TRAP-BIT-SDAG-NEXT: amd_machine_version_stepping = 0
+; TRAP-BIT-SDAG-NEXT: kernel_code_entry_byte_offset = 256
+; TRAP-BIT-SDAG-NEXT: kernel_code_prefetch_byte_size = 0
+; TRAP-BIT-SDAG-NEXT: granulated_workitem_vgpr_count = 0
+; TRAP-BIT-SDAG-NEXT: granulated_wavefront_sgpr_count = 1
+; TRAP-BIT-SDAG-NEXT: priority = 0
+; TRAP-BIT-SDAG-NEXT: float_mode = 240
+; TRAP-BIT-SDAG-NEXT: priv = 0
+; TRAP-BIT-SDAG-NEXT: enable_dx10_clamp = 1
+; TRAP-BIT-SDAG-NEXT: debug_mode = 0
+; TRAP-BIT-SDAG-NEXT: enable_ieee_mode = 1
+; TRAP-BIT-SDAG-NEXT: enable_wgp_mode = 0
+; TRAP-BIT-SDAG-NEXT: enable_mem_ordered = 0
+; TRAP-BIT-SDAG-NEXT: enable_fwd_progress = 0
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; TRAP-BIT-SDAG-NEXT: user_sgpr_count = 12
+; TRAP-BIT-SDAG-NEXT: enable_trap_handler = 1
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_workgroup_id_x = 1
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_workgroup_id_y = 1
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_workgroup_id_z = 1
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_workgroup_info = 0
+; TRAP-BIT-SDAG-NEXT: enable_vgpr_workitem_id = 2
+; TRAP-BIT-SDAG-NEXT: enable_exception_msb = 0
+; TRAP-BIT-SDAG-NEXT: granulated_lds_size = 0
+; TRAP-BIT-SDAG-NEXT: enable_exception = 0
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_private_segment_buffer = 1
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_dispatch_ptr = 1
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_queue_ptr = 1
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_dispatch_id = 1
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_flat_scratch_init = 0
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_private_segment_size = 0
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; TRAP-BIT-SDAG-NEXT: enable_wavefront_size32 = 0
+; TRAP-BIT-SDAG-NEXT: enable_ordered_append_gds = 0
+; TRAP-BIT-SDAG-NEXT: private_element_size = 1
+; TRAP-BIT-SDAG-NEXT: is_ptr64 = 1
+; TRAP-BIT-SDAG-NEXT: is_dynamic_callstack = 0
+; TRAP-BIT-SDAG-NEXT: is_debug_enabled = 0
+; TRAP-BIT-SDAG-NEXT: is_xnack_enabled = 0
+; TRAP-BIT-SDAG-NEXT: workitem_private_segment_byte_size = 0
+; TRAP-BIT-SDAG-NEXT: workgroup_group_segment_byte_size = 0
+; TRAP-BIT-SDAG-NEXT: gds_segment_byte_size = 0
+; TRAP-BIT-SDAG-NEXT: kernarg_segment_byte_size = 24
+; TRAP-BIT-SDAG-NEXT: workgroup_fbarrier_count = 0
+; TRAP-BIT-SDAG-NEXT: wavefront_sgpr_count = 12
+; TRAP-BIT-SDAG-NEXT: workitem_vgpr_count = 1
+; TRAP-BIT-SDAG-NEXT: reserved_vgpr_first = 0
+; TRAP-BIT-SDAG-NEXT: reserved_vgpr_count = 0
+; TRAP-BIT-SDAG-NEXT: reserved_sgpr_first = 0
+; TRAP-BIT-SDAG-NEXT: reserved_sgpr_count = 0
+; TRAP-BIT-SDAG-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; TRAP-BIT-SDAG-NEXT: debug_private_segment_buffer_sgpr = 0
+; TRAP-BIT-SDAG-NEXT: kernarg_segment_alignment = 4
+; TRAP-BIT-SDAG-NEXT: group_segment_alignment = 4
+; TRAP-BIT-SDAG-NEXT: private_segment_alignment = 4
+; TRAP-BIT-SDAG-NEXT: wavefront_size = 6
+; TRAP-BIT-SDAG-NEXT: call_convention = -1
+; TRAP-BIT-SDAG-NEXT: runtime_loader_kernel_symbol = 0
+; TRAP-BIT-SDAG-NEXT: .end_amd_kernel_code_t
+; TRAP-BIT-SDAG-NEXT: ; %bb.0: ; %entry
+; TRAP-BIT-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; TRAP-BIT-SDAG-NEXT: s_mov_b32 s3, 0xf000
+; TRAP-BIT-SDAG-NEXT: s_mov_b32 s2, -1
+; TRAP-BIT-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; TRAP-BIT-SDAG-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc
+; TRAP-BIT-SDAG-NEXT: s_waitcnt vmcnt(0)
+; TRAP-BIT-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, -1, v0
+; TRAP-BIT-SDAG-NEXT: s_cbranch_vccz .LBB3_2
+; TRAP-BIT-SDAG-NEXT: ; %bb.1: ; %ret
+; TRAP-BIT-SDAG-NEXT: v_mov_b32_e32 v0, 3
+; TRAP-BIT-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; TRAP-BIT-SDAG-NEXT: s_waitcnt vmcnt(0)
+; TRAP-BIT-SDAG-NEXT: s_endpgm
+; TRAP-BIT-SDAG-NEXT: .LBB3_2: ; %trap
+; TRAP-BIT-SDAG-NEXT: s_endpgm
+;
+; TRAP-BIT-GISEL-LABEL: non_entry_trap:
+; TRAP-BIT-GISEL: .amd_kernel_code_t
+; TRAP-BIT-GISEL-NEXT: amd_code_version_major = 1
+; TRAP-BIT-GISEL-NEXT: amd_code_version_minor = 2
+; TRAP-BIT-GISEL-NEXT: amd_machine_kind = 1
+; TRAP-BIT-GISEL-NEXT: amd_machine_version_major = 6
+; TRAP-BIT-GISEL-NEXT: amd_machine_version_minor = 0
+; TRAP-BIT-GISEL-NEXT: amd_machine_version_stepping = 0
+; TRAP-BIT-GISEL-NEXT: kernel_code_entry_byte_offset = 256
+; TRAP-BIT-GISEL-NEXT: kernel_code_prefetch_byte_size = 0
+; TRAP-BIT-GISEL-NEXT: granulated_workitem_vgpr_count = 0
+; TRAP-BIT-GISEL-NEXT: granulated_wavefront_sgpr_count = 1
+; TRAP-BIT-GISEL-NEXT: priority = 0
+; TRAP-BIT-GISEL-NEXT: float_mode = 240
+; TRAP-BIT-GISEL-NEXT: priv = 0
+; TRAP-BIT-GISEL-NEXT: enable_dx10_clamp = 1
+; TRAP-BIT-GISEL-NEXT: debug_mode = 0
+; TRAP-BIT-GISEL-NEXT: enable_ieee_mode = 1
+; TRAP-BIT-GISEL-NEXT: enable_wgp_mode = 0
+; TRAP-BIT-GISEL-NEXT: enable_mem_ordered = 0
+; TRAP-BIT-GISEL-NEXT: enable_fwd_progress = 0
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; TRAP-BIT-GISEL-NEXT: user_sgpr_count = 12
+; TRAP-BIT-GISEL-NEXT: enable_trap_handler = 1
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_workgroup_id_x = 1
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_workgroup_id_y = 1
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_workgroup_id_z = 1
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_workgroup_info = 0
+; TRAP-BIT-GISEL-NEXT: enable_vgpr_workitem_id = 2
+; TRAP-BIT-GISEL-NEXT: enable_exception_msb = 0
+; TRAP-BIT-GISEL-NEXT: granulated_lds_size = 0
+; TRAP-BIT-GISEL-NEXT: enable_exception = 0
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_private_segment_buffer = 1
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_dispatch_ptr = 1
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_queue_ptr = 1
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_dispatch_id = 1
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_flat_scratch_init = 0
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_private_segment_size = 0
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; TRAP-BIT-GISEL-NEXT: enable_wavefront_size32 = 0
+; TRAP-BIT-GISEL-NEXT: enable_ordered_append_gds = 0
+; TRAP-BIT-GISEL-NEXT: private_element_size = 1
+; TRAP-BIT-GISEL-NEXT: is_ptr64 = 1
+; TRAP-BIT-GISEL-NEXT: is_dynamic_callstack = 0
+; TRAP-BIT-GISEL-NEXT: is_debug_enabled = 0
+; TRAP-BIT-GISEL-NEXT: is_xnack_enabled = 0
+; TRAP-BIT-GISEL-NEXT: workitem_private_segment_byte_size = 0
+; TRAP-BIT-GISEL-NEXT: workgroup_group_segment_byte_size = 0
+; TRAP-BIT-GISEL-NEXT: gds_segment_byte_size = 0
+; TRAP-BIT-GISEL-NEXT: kernarg_segment_byte_size = 24
+; TRAP-BIT-GISEL-NEXT: workgroup_fbarrier_count = 0
+; TRAP-BIT-GISEL-NEXT: wavefront_sgpr_count = 10
+; TRAP-BIT-GISEL-NEXT: workitem_vgpr_count = 1
+; TRAP-BIT-GISEL-NEXT: reserved_vgpr_first = 0
+; TRAP-BIT-GISEL-NEXT: reserved_vgpr_count = 0
+; TRAP-BIT-GISEL-NEXT: reserved_sgpr_first = 0
+; TRAP-BIT-GISEL-NEXT: reserved_sgpr_count = 0
+; TRAP-BIT-GISEL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; TRAP-BIT-GISEL-NEXT: debug_private_segment_buffer_sgpr = 0
+; TRAP-BIT-GISEL-NEXT: kernarg_segment_alignment = 4
+; TRAP-BIT-GISEL-NEXT: group_segment_alignment = 4
+; TRAP-BIT-GISEL-NEXT: private_segment_alignment = 4
+; TRAP-BIT-GISEL-NEXT: wavefront_size = 6
+; TRAP-BIT-GISEL-NEXT: call_convention = -1
+; TRAP-BIT-GISEL-NEXT: runtime_loader_kernel_symbol = 0
+; TRAP-BIT-GISEL-NEXT: .end_amd_kernel_code_t
+; TRAP-BIT-GISEL-NEXT: ; %bb.0: ; %entry
+; TRAP-BIT-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; TRAP-BIT-GISEL-NEXT: s_mov_b32 s2, -1
+; TRAP-BIT-GISEL-NEXT: s_mov_b32 s3, 0xf000
+; TRAP-BIT-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; TRAP-BIT-GISEL-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc
+; TRAP-BIT-GISEL-NEXT: s_waitcnt vmcnt(0)
+; TRAP-BIT-GISEL-NEXT: v_readfirstlane_b32 s4, v0
+; TRAP-BIT-GISEL-NEXT: s_cmp_eq_u32 s4, -1
+; TRAP-BIT-GISEL-NEXT: s_cbranch_scc0 .LBB3_2
+; TRAP-BIT-GISEL-NEXT: ; %bb.1: ; %ret
+; TRAP-BIT-GISEL-NEXT: v_mov_b32_e32 v0, 3
+; TRAP-BIT-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; TRAP-BIT-GISEL-NEXT: s_waitcnt vmcnt(0)
+; TRAP-BIT-GISEL-NEXT: s_endpgm
+; TRAP-BIT-GISEL-NEXT: .LBB3_2: ; %trap
+; TRAP-BIT-GISEL-NEXT: s_endpgm
+;
+; NO-TRAP-BIT-SDAG-LABEL: non_entry_trap:
+; NO-TRAP-BIT-SDAG: .amd_kernel_code_t
+; NO-TRAP-BIT-SDAG-NEXT: amd_code_version_major = 1
+; NO-TRAP-BIT-SDAG-NEXT: amd_code_version_minor = 2
+; NO-TRAP-BIT-SDAG-NEXT: amd_machine_kind = 1
+; NO-TRAP-BIT-SDAG-NEXT: amd_machine_version_major = 6
+; NO-TRAP-BIT-SDAG-NEXT: amd_machine_version_minor = 0
+; NO-TRAP-BIT-SDAG-NEXT: amd_machine_version_stepping = 0
+; NO-TRAP-BIT-SDAG-NEXT: kernel_code_entry_byte_offset = 256
+; NO-TRAP-BIT-SDAG-NEXT: kernel_code_prefetch_byte_size = 0
+; NO-TRAP-BIT-SDAG-NEXT: granulated_workitem_vgpr_count = 0
+; NO-TRAP-BIT-SDAG-NEXT: granulated_wavefront_sgpr_count = 1
+; NO-TRAP-BIT-SDAG-NEXT: priority = 0
+; NO-TRAP-BIT-SDAG-NEXT: float_mode = 240
+; NO-TRAP-BIT-SDAG-NEXT: priv = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_dx10_clamp = 1
+; NO-TRAP-BIT-SDAG-NEXT: debug_mode = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_ieee_mode = 1
+; NO-TRAP-BIT-SDAG-NEXT: enable_wgp_mode = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_mem_ordered = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_fwd_progress = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; NO-TRAP-BIT-SDAG-NEXT: user_sgpr_count = 12
+; NO-TRAP-BIT-SDAG-NEXT: enable_trap_handler = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_workgroup_id_x = 1
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_workgroup_id_y = 1
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_workgroup_id_z = 1
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_workgroup_info = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_vgpr_workitem_id = 2
+; NO-TRAP-BIT-SDAG-NEXT: enable_exception_msb = 0
+; NO-TRAP-BIT-SDAG-NEXT: granulated_lds_size = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_exception = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_private_segment_buffer = 1
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_dispatch_ptr = 1
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_queue_ptr = 1
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_dispatch_id = 1
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_flat_scratch_init = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_private_segment_size = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_wavefront_size32 = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_ordered_append_gds = 0
+; NO-TRAP-BIT-SDAG-NEXT: private_element_size = 1
+; NO-TRAP-BIT-SDAG-NEXT: is_ptr64 = 1
+; NO-TRAP-BIT-SDAG-NEXT: is_dynamic_callstack = 0
+; NO-TRAP-BIT-SDAG-NEXT: is_debug_enabled = 0
+; NO-TRAP-BIT-SDAG-NEXT: is_xnack_enabled = 0
+; NO-TRAP-BIT-SDAG-NEXT: workitem_private_segment_byte_size = 0
+; NO-TRAP-BIT-SDAG-NEXT: workgroup_group_segment_byte_size = 0
+; NO-TRAP-BIT-SDAG-NEXT: gds_segment_byte_size = 0
+; NO-TRAP-BIT-SDAG-NEXT: kernarg_segment_byte_size = 24
+; NO-TRAP-BIT-SDAG-NEXT: workgroup_fbarrier_count = 0
+; NO-TRAP-BIT-SDAG-NEXT: wavefront_sgpr_count = 12
+; NO-TRAP-BIT-SDAG-NEXT: workitem_vgpr_count = 1
+; NO-TRAP-BIT-SDAG-NEXT: reserved_vgpr_first = 0
+; NO-TRAP-BIT-SDAG-NEXT: reserved_vgpr_count = 0
+; NO-TRAP-BIT-SDAG-NEXT: reserved_sgpr_first = 0
+; NO-TRAP-BIT-SDAG-NEXT: reserved_sgpr_count = 0
+; NO-TRAP-BIT-SDAG-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; NO-TRAP-BIT-SDAG-NEXT: debug_private_segment_buffer_sgpr = 0
+; NO-TRAP-BIT-SDAG-NEXT: kernarg_segment_alignment = 4
+; NO-TRAP-BIT-SDAG-NEXT: group_segment_alignment = 4
+; NO-TRAP-BIT-SDAG-NEXT: private_segment_alignment = 4
+; NO-TRAP-BIT-SDAG-NEXT: wavefront_size = 6
+; NO-TRAP-BIT-SDAG-NEXT: call_convention = -1
+; NO-TRAP-BIT-SDAG-NEXT: runtime_loader_kernel_symbol = 0
+; NO-TRAP-BIT-SDAG-NEXT: .end_amd_kernel_code_t
+; NO-TRAP-BIT-SDAG-NEXT: ; %bb.0: ; %entry
+; NO-TRAP-BIT-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; NO-TRAP-BIT-SDAG-NEXT: s_mov_b32 s3, 0xf000
+; NO-TRAP-BIT-SDAG-NEXT: s_mov_b32 s2, -1
+; NO-TRAP-BIT-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; NO-TRAP-BIT-SDAG-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc
+; NO-TRAP-BIT-SDAG-NEXT: s_waitcnt vmcnt(0)
+; NO-TRAP-BIT-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, -1, v0
+; NO-TRAP-BIT-SDAG-NEXT: s_cbranch_vccz .LBB3_2
+; NO-TRAP-BIT-SDAG-NEXT: ; %bb.1: ; %ret
+; NO-TRAP-BIT-SDAG-NEXT: v_mov_b32_e32 v0, 3
+; NO-TRAP-BIT-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; NO-TRAP-BIT-SDAG-NEXT: s_waitcnt vmcnt(0)
+; NO-TRAP-BIT-SDAG-NEXT: s_endpgm
+; NO-TRAP-BIT-SDAG-NEXT: .LBB3_2: ; %trap
+; NO-TRAP-BIT-SDAG-NEXT: s_endpgm
+;
+; NO-TRAP-BIT-GISEL-LABEL: non_entry_trap:
+; NO-TRAP-BIT-GISEL: .amd_kernel_code_t
+; NO-TRAP-BIT-GISEL-NEXT: amd_code_version_major = 1
+; NO-TRAP-BIT-GISEL-NEXT: amd_code_version_minor = 2
+; NO-TRAP-BIT-GISEL-NEXT: amd_machine_kind = 1
+; NO-TRAP-BIT-GISEL-NEXT: amd_machine_version_major = 6
+; NO-TRAP-BIT-GISEL-NEXT: amd_machine_version_minor = 0
+; NO-TRAP-BIT-GISEL-NEXT: amd_machine_version_stepping = 0
+; NO-TRAP-BIT-GISEL-NEXT: kernel_code_entry_byte_offset = 256
+; NO-TRAP-BIT-GISEL-NEXT: kernel_code_prefetch_byte_size = 0
+; NO-TRAP-BIT-GISEL-NEXT: granulated_workitem_vgpr_count = 0
+; NO-TRAP-BIT-GISEL-NEXT: granulated_wavefront_sgpr_count = 1
+; NO-TRAP-BIT-GISEL-NEXT: priority = 0
+; NO-TRAP-BIT-GISEL-NEXT: float_mode = 240
+; NO-TRAP-BIT-GISEL-NEXT: priv = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_dx10_clamp = 1
+; NO-TRAP-BIT-GISEL-NEXT: debug_mode = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_ieee_mode = 1
+; NO-TRAP-BIT-GISEL-NEXT: enable_wgp_mode = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_mem_ordered = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_fwd_progress = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; NO-TRAP-BIT-GISEL-NEXT: user_sgpr_count = 12
+; NO-TRAP-BIT-GISEL-NEXT: enable_trap_handler = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_workgroup_id_x = 1
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_workgroup_id_y = 1
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_workgroup_id_z = 1
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_workgroup_info = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_vgpr_workitem_id = 2
+; NO-TRAP-BIT-GISEL-NEXT: enable_exception_msb = 0
+; NO-TRAP-BIT-GISEL-NEXT: granulated_lds_size = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_exception = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_private_segment_buffer = 1
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_dispatch_ptr = 1
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_queue_ptr = 1
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_dispatch_id = 1
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_flat_scratch_init = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_private_segment_size = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_wavefront_size32 = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_ordered_append_gds = 0
+; NO-TRAP-BIT-GISEL-NEXT: private_element_size = 1
+; NO-TRAP-BIT-GISEL-NEXT: is_ptr64 = 1
+; NO-TRAP-BIT-GISEL-NEXT: is_dynamic_callstack = 0
+; NO-TRAP-BIT-GISEL-NEXT: is_debug_enabled = 0
+; NO-TRAP-BIT-GISEL-NEXT: is_xnack_enabled = 0
+; NO-TRAP-BIT-GISEL-NEXT: workitem_private_segment_byte_size = 0
+; NO-TRAP-BIT-GISEL-NEXT: workgroup_group_segment_byte_size = 0
+; NO-TRAP-BIT-GISEL-NEXT: gds_segment_byte_size = 0
+; NO-TRAP-BIT-GISEL-NEXT: kernarg_segment_byte_size = 24
+; NO-TRAP-BIT-GISEL-NEXT: workgroup_fbarrier_count = 0
+; NO-TRAP-BIT-GISEL-NEXT: wavefront_sgpr_count = 10
+; NO-TRAP-BIT-GISEL-NEXT: workitem_vgpr_count = 1
+; NO-TRAP-BIT-GISEL-NEXT: reserved_vgpr_first = 0
+; NO-TRAP-BIT-GISEL-NEXT: reserved_vgpr_count = 0
+; NO-TRAP-BIT-GISEL-NEXT: reserved_sgpr_first = 0
+; NO-TRAP-BIT-GISEL-NEXT: reserved_sgpr_count = 0
+; NO-TRAP-BIT-GISEL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; NO-TRAP-BIT-GISEL-NEXT: debug_private_segment_buffer_sgpr = 0
+; NO-TRAP-BIT-GISEL-NEXT: kernarg_segment_alignment = 4
+; NO-TRAP-BIT-GISEL-NEXT: group_segment_alignment = 4
+; NO-TRAP-BIT-GISEL-NEXT: private_segment_alignment = 4
+; NO-TRAP-BIT-GISEL-NEXT: wavefront_size = 6
+; NO-TRAP-BIT-GISEL-NEXT: call_convention = -1
+; NO-TRAP-BIT-GISEL-NEXT: runtime_loader_kernel_symbol = 0
+; NO-TRAP-BIT-GISEL-NEXT: .end_amd_kernel_code_t
+; NO-TRAP-BIT-GISEL-NEXT: ; %bb.0: ; %entry
+; NO-TRAP-BIT-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; NO-TRAP-BIT-GISEL-NEXT: s_mov_b32 s2, -1
+; NO-TRAP-BIT-GISEL-NEXT: s_mov_b32 s3, 0xf000
+; NO-TRAP-BIT-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; NO-TRAP-BIT-GISEL-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc
+; NO-TRAP-BIT-GISEL-NEXT: s_waitcnt vmcnt(0)
+; NO-TRAP-BIT-GISEL-NEXT: v_readfirstlane_b32 s4, v0
+; NO-TRAP-BIT-GISEL-NEXT: s_cmp_eq_u32 s4, -1
+; NO-TRAP-BIT-GISEL-NEXT: s_cbranch_scc0 .LBB3_2
+; NO-TRAP-BIT-GISEL-NEXT: ; %bb.1: ; %ret
+; NO-TRAP-BIT-GISEL-NEXT: v_mov_b32_e32 v0, 3
+; NO-TRAP-BIT-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; NO-TRAP-BIT-GISEL-NEXT: s_waitcnt vmcnt(0)
+; NO-TRAP-BIT-GISEL-NEXT: s_endpgm
+; NO-TRAP-BIT-GISEL-NEXT: .LBB3_2: ; %trap
+; NO-TRAP-BIT-GISEL-NEXT: s_endpgm
+;
+; GCN-WARNING-SDAG-LABEL: non_entry_trap:
+; GCN-WARNING-SDAG: ; %bb.0: ; %entry
+; GCN-WARNING-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-WARNING-SDAG-NEXT: s_mov_b32 s3, 0xf000
+; GCN-WARNING-SDAG-NEXT: s_mov_b32 s2, -1
+; GCN-WARNING-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-WARNING-SDAG-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc
+; GCN-WARNING-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GCN-WARNING-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, -1, v0
+; GCN-WARNING-SDAG-NEXT: s_cbranch_vccz .LBB3_2
+; GCN-WARNING-SDAG-NEXT: ; %bb.1: ; %ret
+; GCN-WARNING-SDAG-NEXT: v_mov_b32_e32 v0, 3
+; GCN-WARNING-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-WARNING-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GCN-WARNING-SDAG-NEXT: s_endpgm
+; GCN-WARNING-SDAG-NEXT: .LBB3_2: ; %trap
+; GCN-WARNING-SDAG-NEXT: s_endpgm
+;
+; GCN-WARNING-GISEL-LABEL: non_entry_trap:
+; GCN-WARNING-GISEL: ; %bb.0: ; %entry
+; GCN-WARNING-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-WARNING-GISEL-NEXT: s_mov_b32 s2, -1
+; GCN-WARNING-GISEL-NEXT: s_mov_b32 s3, 0xf000
+; GCN-WARNING-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-WARNING-GISEL-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc
+; GCN-WARNING-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GCN-WARNING-GISEL-NEXT: v_readfirstlane_b32 s4, v0
+; GCN-WARNING-GISEL-NEXT: s_cmp_eq_u32 s4, -1
+; GCN-WARNING-GISEL-NEXT: s_cbranch_scc0 .LBB3_2
+; GCN-WARNING-GISEL-NEXT: ; %bb.1: ; %ret
+; GCN-WARNING-GISEL-NEXT: v_mov_b32_e32 v0, 3
+; GCN-WARNING-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-WARNING-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GCN-WARNING-GISEL-NEXT: s_endpgm
+; GCN-WARNING-GISEL-NEXT: .LBB3_2: ; %trap
+; GCN-WARNING-GISEL-NEXT: s_endpgm
entry:
%tmp29 = load volatile i32, ptr addrspace(1) %arg0
%cmp = icmp eq i32 %tmp29, -1
@@ -119,14 +1896,612 @@ ret:
ret void
}
-; GCN-LABEL: {{^}}non_entry_trap_no_unreachable:
-; TRAP-BIT: enable_trap_handler = 1
-; NO-TRAP-BIT: enable_trap_handler = 0
-
-; HSA-TRAP: BB{{[0-9]_[0-9]+}}: ; %trap
-; HSA-TRAP: s_mov_b64 s[0:1], s[6:7]
-; HSA-TRAP-NEXT: s_trap 2
define amdgpu_kernel void @non_entry_trap_no_unreachable(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr {
+; HSA-TRAP-SDAG-LABEL: non_entry_trap_no_unreachable:
+; HSA-TRAP-SDAG: ; %bb.0: ; %entry
+; HSA-TRAP-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
+; HSA-TRAP-SDAG-NEXT: s_add_i32 s12, s12, s17
+; HSA-TRAP-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; HSA-TRAP-SDAG-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x0
+; HSA-TRAP-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-TRAP-SDAG-NEXT: v_mov_b32_e32 v0, s2
+; HSA-TRAP-SDAG-NEXT: v_mov_b32_e32 v1, s3
+; HSA-TRAP-SDAG-NEXT: flat_load_dword v0, v[0:1] glc
+; HSA-TRAP-SDAG-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, -1, v0
+; HSA-TRAP-SDAG-NEXT: s_cbranch_vccz .LBB4_2
+; HSA-TRAP-SDAG-NEXT: .LBB4_1: ; %ret
+; HSA-TRAP-SDAG-NEXT: v_mov_b32_e32 v2, 3
+; HSA-TRAP-SDAG-NEXT: v_mov_b32_e32 v0, s2
+; HSA-TRAP-SDAG-NEXT: v_mov_b32_e32 v1, s3
+; HSA-TRAP-SDAG-NEXT: flat_store_dword v[0:1], v2
+; HSA-TRAP-SDAG-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-SDAG-NEXT: s_endpgm
+; HSA-TRAP-SDAG-NEXT: .LBB4_2: ; %trap
+; HSA-TRAP-SDAG-NEXT: s_mov_b64 s[0:1], s[6:7]
+; HSA-TRAP-SDAG-NEXT: s_trap 2
+; HSA-TRAP-SDAG-NEXT: v_mov_b32_e32 v0, 0x4d2
+; HSA-TRAP-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; HSA-TRAP-SDAG-NEXT: s_mov_b32 m0, -1
+; HSA-TRAP-SDAG-NEXT: ds_write_b32 v1, v0
+; HSA-TRAP-SDAG-NEXT: s_branch .LBB4_1
+;
+; HSA-TRAP-GISEL-LABEL: non_entry_trap_no_unreachable:
+; HSA-TRAP-GISEL: ; %bb.0: ; %entry
+; HSA-TRAP-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
+; HSA-TRAP-GISEL-NEXT: s_add_i32 s12, s12, s17
+; HSA-TRAP-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; HSA-TRAP-GISEL-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x0
+; HSA-TRAP-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-TRAP-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; HSA-TRAP-GISEL-NEXT: v_mov_b32_e32 v1, s3
+; HSA-TRAP-GISEL-NEXT: flat_load_dword v0, v[0:1] glc
+; HSA-TRAP-GISEL-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-GISEL-NEXT: v_readfirstlane_b32 s0, v0
+; HSA-TRAP-GISEL-NEXT: s_cmp_eq_u32 s0, -1
+; HSA-TRAP-GISEL-NEXT: s_cbranch_scc0 .LBB4_2
+; HSA-TRAP-GISEL-NEXT: .LBB4_1: ; %ret
+; HSA-TRAP-GISEL-NEXT: v_mov_b32_e32 v2, 3
+; HSA-TRAP-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; HSA-TRAP-GISEL-NEXT: v_mov_b32_e32 v1, s3
+; HSA-TRAP-GISEL-NEXT: flat_store_dword v[0:1], v2
+; HSA-TRAP-GISEL-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-GISEL-NEXT: s_endpgm
+; HSA-TRAP-GISEL-NEXT: .LBB4_2: ; %trap
+; HSA-TRAP-GISEL-NEXT: s_mov_b64 s[0:1], s[6:7]
+; HSA-TRAP-GISEL-NEXT: s_trap 2
+; HSA-TRAP-GISEL-NEXT: v_mov_b32_e32 v0, 0x4d2
+; HSA-TRAP-GISEL-NEXT: v_mov_b32_e32 v1, 0
+; HSA-TRAP-GISEL-NEXT: s_mov_b32 m0, -1
+; HSA-TRAP-GISEL-NEXT: ds_write_b32 v1, v0
+; HSA-TRAP-GISEL-NEXT: s_branch .LBB4_1
+;
+; NO-HSA-TRAP-SDAG-LABEL: non_entry_trap_no_unreachable:
+; NO-HSA-TRAP-SDAG: ; %bb.0: ; %entry
+; NO-HSA-TRAP-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
+; NO-HSA-TRAP-SDAG-NEXT: s_add_i32 s12, s12, s17
+; NO-HSA-TRAP-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; NO-HSA-TRAP-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; NO-HSA-TRAP-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; NO-HSA-TRAP-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; NO-HSA-TRAP-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; NO-HSA-TRAP-SDAG-NEXT: flat_load_dword v0, v[0:1] glc
+; NO-HSA-TRAP-SDAG-NEXT: s_waitcnt vmcnt(0)
+; NO-HSA-TRAP-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, -1, v0
+; NO-HSA-TRAP-SDAG-NEXT: s_cbranch_vccz .LBB4_3
+; NO-HSA-TRAP-SDAG-NEXT: .LBB4_1: ; %ret
+; NO-HSA-TRAP-SDAG-NEXT: v_mov_b32_e32 v2, 3
+; NO-HSA-TRAP-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; NO-HSA-TRAP-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; NO-HSA-TRAP-SDAG-NEXT: flat_store_dword v[0:1], v2
+; NO-HSA-TRAP-SDAG-NEXT: s_waitcnt vmcnt(0)
+; NO-HSA-TRAP-SDAG-NEXT: .LBB4_2:
+; NO-HSA-TRAP-SDAG-NEXT: s_endpgm
+; NO-HSA-TRAP-SDAG-NEXT: .LBB4_3: ; %trap
+; NO-HSA-TRAP-SDAG-NEXT: s_mov_b32 m0, -1
+; NO-HSA-TRAP-SDAG-NEXT: s_cbranch_execnz .LBB4_2
+; NO-HSA-TRAP-SDAG-NEXT: ; %bb.4: ; %trap
+; NO-HSA-TRAP-SDAG-NEXT: v_mov_b32_e32 v0, 0x4d2
+; NO-HSA-TRAP-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; NO-HSA-TRAP-SDAG-NEXT: ds_write_b32 v1, v0
+; NO-HSA-TRAP-SDAG-NEXT: s_branch .LBB4_1
+;
+; NO-HSA-TRAP-GISEL-LABEL: non_entry_trap_no_unreachable:
+; NO-HSA-TRAP-GISEL: ; %bb.0: ; %entry
+; NO-HSA-TRAP-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
+; NO-HSA-TRAP-GISEL-NEXT: s_add_i32 s12, s12, s17
+; NO-HSA-TRAP-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; NO-HSA-TRAP-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; NO-HSA-TRAP-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; NO-HSA-TRAP-GISEL-NEXT: v_mov_b32_e32 v0, s0
+; NO-HSA-TRAP-GISEL-NEXT: v_mov_b32_e32 v1, s1
+; NO-HSA-TRAP-GISEL-NEXT: flat_load_dword v0, v[0:1] glc
+; NO-HSA-TRAP-GISEL-NEXT: s_waitcnt vmcnt(0)
+; NO-HSA-TRAP-GISEL-NEXT: v_readfirstlane_b32 s2, v0
+; NO-HSA-TRAP-GISEL-NEXT: s_cmp_eq_u32 s2, -1
+; NO-HSA-TRAP-GISEL-NEXT: s_cbranch_scc0 .LBB4_3
+; NO-HSA-TRAP-GISEL-NEXT: .LBB4_1: ; %ret
+; NO-HSA-TRAP-GISEL-NEXT: v_mov_b32_e32 v2, 3
+; NO-HSA-TRAP-GISEL-NEXT: v_mov_b32_e32 v0, s0
+; NO-HSA-TRAP-GISEL-NEXT: v_mov_b32_e32 v1, s1
+; NO-HSA-TRAP-GISEL-NEXT: flat_store_dword v[0:1], v2
+; NO-HSA-TRAP-GISEL-NEXT: s_waitcnt vmcnt(0)
+; NO-HSA-TRAP-GISEL-NEXT: .LBB4_2:
+; NO-HSA-TRAP-GISEL-NEXT: s_endpgm
+; NO-HSA-TRAP-GISEL-NEXT: .LBB4_3: ; %trap
+; NO-HSA-TRAP-GISEL-NEXT: s_cbranch_execnz .LBB4_2
+; NO-HSA-TRAP-GISEL-NEXT: ; %bb.4: ; %trap
+; NO-HSA-TRAP-GISEL-NEXT: v_mov_b32_e32 v0, 0x4d2
+; NO-HSA-TRAP-GISEL-NEXT: v_mov_b32_e32 v1, 0
+; NO-HSA-TRAP-GISEL-NEXT: s_mov_b32 m0, -1
+; NO-HSA-TRAP-GISEL-NEXT: ds_write_b32 v1, v0
+; NO-HSA-TRAP-GISEL-NEXT: s_branch .LBB4_1
+;
+; HSA-WARNING-SDAG-LABEL: non_entry_trap_no_unreachable:
+; HSA-WARNING-SDAG: ; %bb.0: ; %entry
+; HSA-WARNING-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
+; HSA-WARNING-SDAG-NEXT: s_add_i32 s12, s12, s17
+; HSA-WARNING-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; HSA-WARNING-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; HSA-WARNING-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-WARNING-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; HSA-WARNING-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; HSA-WARNING-SDAG-NEXT: flat_load_dword v0, v[0:1] glc
+; HSA-WARNING-SDAG-NEXT: s_waitcnt vmcnt(0)
+; HSA-WARNING-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, -1, v0
+; HSA-WARNING-SDAG-NEXT: s_cbranch_vccz .LBB4_3
+; HSA-WARNING-SDAG-NEXT: .LBB4_1: ; %ret
+; HSA-WARNING-SDAG-NEXT: v_mov_b32_e32 v2, 3
+; HSA-WARNING-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; HSA-WARNING-SDAG-NEXT: v_mov_b32_e32 v1, s1
+; HSA-WARNING-SDAG-NEXT: flat_store_dword v[0:1], v2
+; HSA-WARNING-SDAG-NEXT: s_waitcnt vmcnt(0)
+; HSA-WARNING-SDAG-NEXT: .LBB4_2:
+; HSA-WARNING-SDAG-NEXT: s_endpgm
+; HSA-WARNING-SDAG-NEXT: .LBB4_3: ; %trap
+; HSA-WARNING-SDAG-NEXT: s_mov_b32 m0, -1
+; HSA-WARNING-SDAG-NEXT: s_cbranch_execnz .LBB4_2
+; HSA-WARNING-SDAG-NEXT: ; %bb.4: ; %trap
+; HSA-WARNING-SDAG-NEXT: v_mov_b32_e32 v0, 0x4d2
+; HSA-WARNING-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; HSA-WARNING-SDAG-NEXT: ds_write_b32 v1, v0
+; HSA-WARNING-SDAG-NEXT: s_branch .LBB4_1
+;
+; HSA-WARNING-GISEL-LABEL: non_entry_trap_no_unreachable:
+; HSA-WARNING-GISEL: ; %bb.0: ; %entry
+; HSA-WARNING-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
+; HSA-WARNING-GISEL-NEXT: s_add_i32 s12, s12, s17
+; HSA-WARNING-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; HSA-WARNING-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; HSA-WARNING-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-WARNING-GISEL-NEXT: v_mov_b32_e32 v0, s0
+; HSA-WARNING-GISEL-NEXT: v_mov_b32_e32 v1, s1
+; HSA-WARNING-GISEL-NEXT: flat_load_dword v0, v[0:1] glc
+; HSA-WARNING-GISEL-NEXT: s_waitcnt vmcnt(0)
+; HSA-WARNING-GISEL-NEXT: v_readfirstlane_b32 s2, v0
+; HSA-WARNING-GISEL-NEXT: s_cmp_eq_u32 s2, -1
+; HSA-WARNING-GISEL-NEXT: s_cbranch_scc0 .LBB4_3
+; HSA-WARNING-GISEL-NEXT: .LBB4_1: ; %ret
+; HSA-WARNING-GISEL-NEXT: v_mov_b32_e32 v2, 3
+; HSA-WARNING-GISEL-NEXT: v_mov_b32_e32 v0, s0
+; HSA-WARNING-GISEL-NEXT: v_mov_b32_e32 v1, s1
+; HSA-WARNING-GISEL-NEXT: flat_store_dword v[0:1], v2
+; HSA-WARNING-GISEL-NEXT: s_waitcnt vmcnt(0)
+; HSA-WARNING-GISEL-NEXT: .LBB4_2:
+; HSA-WARNING-GISEL-NEXT: s_endpgm
+; HSA-WARNING-GISEL-NEXT: .LBB4_3: ; %trap
+; HSA-WARNING-GISEL-NEXT: s_cbranch_execnz .LBB4_2
+; HSA-WARNING-GISEL-NEXT: ; %bb.4: ; %trap
+; HSA-WARNING-GISEL-NEXT: v_mov_b32_e32 v0, 0x4d2
+; HSA-WARNING-GISEL-NEXT: v_mov_b32_e32 v1, 0
+; HSA-WARNING-GISEL-NEXT: s_mov_b32 m0, -1
+; HSA-WARNING-GISEL-NEXT: ds_write_b32 v1, v0
+; HSA-WARNING-GISEL-NEXT: s_branch .LBB4_1
+;
+; TRAP-BIT-SDAG-LABEL: non_entry_trap_no_unreachable:
+; TRAP-BIT-SDAG: .amd_kernel_code_t
+; TRAP-BIT-SDAG-NEXT: amd_code_version_major = 1
+; TRAP-BIT-SDAG-NEXT: amd_code_version_minor = 2
+; TRAP-BIT-SDAG-NEXT: amd_machine_kind = 1
+; TRAP-BIT-SDAG-NEXT: amd_machine_version_major = 6
+; TRAP-BIT-SDAG-NEXT: amd_machine_version_minor = 0
+; TRAP-BIT-SDAG-NEXT: amd_machine_version_stepping = 0
+; TRAP-BIT-SDAG-NEXT: kernel_code_entry_byte_offset = 256
+; TRAP-BIT-SDAG-NEXT: kernel_code_prefetch_byte_size = 0
+; TRAP-BIT-SDAG-NEXT: granulated_workitem_vgpr_count = 0
+; TRAP-BIT-SDAG-NEXT: granulated_wavefront_sgpr_count = 1
+; TRAP-BIT-SDAG-NEXT: priority = 0
+; TRAP-BIT-SDAG-NEXT: float_mode = 240
+; TRAP-BIT-SDAG-NEXT: priv = 0
+; TRAP-BIT-SDAG-NEXT: enable_dx10_clamp = 1
+; TRAP-BIT-SDAG-NEXT: debug_mode = 0
+; TRAP-BIT-SDAG-NEXT: enable_ieee_mode = 1
+; TRAP-BIT-SDAG-NEXT: enable_wgp_mode = 0
+; TRAP-BIT-SDAG-NEXT: enable_mem_ordered = 0
+; TRAP-BIT-SDAG-NEXT: enable_fwd_progress = 0
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; TRAP-BIT-SDAG-NEXT: user_sgpr_count = 12
+; TRAP-BIT-SDAG-NEXT: enable_trap_handler = 1
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_workgroup_id_x = 1
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_workgroup_id_y = 1
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_workgroup_id_z = 1
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_workgroup_info = 0
+; TRAP-BIT-SDAG-NEXT: enable_vgpr_workitem_id = 2
+; TRAP-BIT-SDAG-NEXT: enable_exception_msb = 0
+; TRAP-BIT-SDAG-NEXT: granulated_lds_size = 0
+; TRAP-BIT-SDAG-NEXT: enable_exception = 0
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_private_segment_buffer = 1
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_dispatch_ptr = 1
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_queue_ptr = 1
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_dispatch_id = 1
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_flat_scratch_init = 0
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_private_segment_size = 0
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; TRAP-BIT-SDAG-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; TRAP-BIT-SDAG-NEXT: enable_wavefront_size32 = 0
+; TRAP-BIT-SDAG-NEXT: enable_ordered_append_gds = 0
+; TRAP-BIT-SDAG-NEXT: private_element_size = 1
+; TRAP-BIT-SDAG-NEXT: is_ptr64 = 1
+; TRAP-BIT-SDAG-NEXT: is_dynamic_callstack = 0
+; TRAP-BIT-SDAG-NEXT: is_debug_enabled = 0
+; TRAP-BIT-SDAG-NEXT: is_xnack_enabled = 0
+; TRAP-BIT-SDAG-NEXT: workitem_private_segment_byte_size = 0
+; TRAP-BIT-SDAG-NEXT: workgroup_group_segment_byte_size = 0
+; TRAP-BIT-SDAG-NEXT: gds_segment_byte_size = 0
+; TRAP-BIT-SDAG-NEXT: kernarg_segment_byte_size = 24
+; TRAP-BIT-SDAG-NEXT: workgroup_fbarrier_count = 0
+; TRAP-BIT-SDAG-NEXT: wavefront_sgpr_count = 12
+; TRAP-BIT-SDAG-NEXT: workitem_vgpr_count = 2
+; TRAP-BIT-SDAG-NEXT: reserved_vgpr_first = 0
+; TRAP-BIT-SDAG-NEXT: reserved_vgpr_count = 0
+; TRAP-BIT-SDAG-NEXT: reserved_sgpr_first = 0
+; TRAP-BIT-SDAG-NEXT: reserved_sgpr_count = 0
+; TRAP-BIT-SDAG-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; TRAP-BIT-SDAG-NEXT: debug_private_segment_buffer_sgpr = 0
+; TRAP-BIT-SDAG-NEXT: kernarg_segment_alignment = 4
+; TRAP-BIT-SDAG-NEXT: group_segment_alignment = 4
+; TRAP-BIT-SDAG-NEXT: private_segment_alignment = 4
+; TRAP-BIT-SDAG-NEXT: wavefront_size = 6
+; TRAP-BIT-SDAG-NEXT: call_convention = -1
+; TRAP-BIT-SDAG-NEXT: runtime_loader_kernel_symbol = 0
+; TRAP-BIT-SDAG-NEXT: .end_amd_kernel_code_t
+; TRAP-BIT-SDAG-NEXT: ; %bb.0: ; %entry
+; TRAP-BIT-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; TRAP-BIT-SDAG-NEXT: s_mov_b32 s3, 0xf000
+; TRAP-BIT-SDAG-NEXT: s_mov_b32 s2, -1
+; TRAP-BIT-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; TRAP-BIT-SDAG-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc
+; TRAP-BIT-SDAG-NEXT: s_waitcnt vmcnt(0)
+; TRAP-BIT-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, -1, v0
+; TRAP-BIT-SDAG-NEXT: s_cbranch_vccz .LBB4_3
+; TRAP-BIT-SDAG-NEXT: .LBB4_1: ; %ret
+; TRAP-BIT-SDAG-NEXT: v_mov_b32_e32 v0, 3
+; TRAP-BIT-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; TRAP-BIT-SDAG-NEXT: s_waitcnt vmcnt(0)
+; TRAP-BIT-SDAG-NEXT: .LBB4_2:
+; TRAP-BIT-SDAG-NEXT: s_endpgm
+; TRAP-BIT-SDAG-NEXT: .LBB4_3: ; %trap
+; TRAP-BIT-SDAG-NEXT: s_mov_b32 m0, -1
+; TRAP-BIT-SDAG-NEXT: s_cbranch_execnz .LBB4_2
+; TRAP-BIT-SDAG-NEXT: ; %bb.4: ; %trap
+; TRAP-BIT-SDAG-NEXT: v_mov_b32_e32 v0, 0x4d2
+; TRAP-BIT-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; TRAP-BIT-SDAG-NEXT: ds_write_b32 v1, v0
+; TRAP-BIT-SDAG-NEXT: s_branch .LBB4_1
+;
+; TRAP-BIT-GISEL-LABEL: non_entry_trap_no_unreachable:
+; TRAP-BIT-GISEL: .amd_kernel_code_t
+; TRAP-BIT-GISEL-NEXT: amd_code_version_major = 1
+; TRAP-BIT-GISEL-NEXT: amd_code_version_minor = 2
+; TRAP-BIT-GISEL-NEXT: amd_machine_kind = 1
+; TRAP-BIT-GISEL-NEXT: amd_machine_version_major = 6
+; TRAP-BIT-GISEL-NEXT: amd_machine_version_minor = 0
+; TRAP-BIT-GISEL-NEXT: amd_machine_version_stepping = 0
+; TRAP-BIT-GISEL-NEXT: kernel_code_entry_byte_offset = 256
+; TRAP-BIT-GISEL-NEXT: kernel_code_prefetch_byte_size = 0
+; TRAP-BIT-GISEL-NEXT: granulated_workitem_vgpr_count = 0
+; TRAP-BIT-GISEL-NEXT: granulated_wavefront_sgpr_count = 1
+; TRAP-BIT-GISEL-NEXT: priority = 0
+; TRAP-BIT-GISEL-NEXT: float_mode = 240
+; TRAP-BIT-GISEL-NEXT: priv = 0
+; TRAP-BIT-GISEL-NEXT: enable_dx10_clamp = 1
+; TRAP-BIT-GISEL-NEXT: debug_mode = 0
+; TRAP-BIT-GISEL-NEXT: enable_ieee_mode = 1
+; TRAP-BIT-GISEL-NEXT: enable_wgp_mode = 0
+; TRAP-BIT-GISEL-NEXT: enable_mem_ordered = 0
+; TRAP-BIT-GISEL-NEXT: enable_fwd_progress = 0
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; TRAP-BIT-GISEL-NEXT: user_sgpr_count = 12
+; TRAP-BIT-GISEL-NEXT: enable_trap_handler = 1
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_workgroup_id_x = 1
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_workgroup_id_y = 1
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_workgroup_id_z = 1
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_workgroup_info = 0
+; TRAP-BIT-GISEL-NEXT: enable_vgpr_workitem_id = 2
+; TRAP-BIT-GISEL-NEXT: enable_exception_msb = 0
+; TRAP-BIT-GISEL-NEXT: granulated_lds_size = 0
+; TRAP-BIT-GISEL-NEXT: enable_exception = 0
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_private_segment_buffer = 1
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_dispatch_ptr = 1
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_queue_ptr = 1
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_dispatch_id = 1
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_flat_scratch_init = 0
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_private_segment_size = 0
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; TRAP-BIT-GISEL-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; TRAP-BIT-GISEL-NEXT: enable_wavefront_size32 = 0
+; TRAP-BIT-GISEL-NEXT: enable_ordered_append_gds = 0
+; TRAP-BIT-GISEL-NEXT: private_element_size = 1
+; TRAP-BIT-GISEL-NEXT: is_ptr64 = 1
+; TRAP-BIT-GISEL-NEXT: is_dynamic_callstack = 0
+; TRAP-BIT-GISEL-NEXT: is_debug_enabled = 0
+; TRAP-BIT-GISEL-NEXT: is_xnack_enabled = 0
+; TRAP-BIT-GISEL-NEXT: workitem_private_segment_byte_size = 0
+; TRAP-BIT-GISEL-NEXT: workgroup_group_segment_byte_size = 0
+; TRAP-BIT-GISEL-NEXT: gds_segment_byte_size = 0
+; TRAP-BIT-GISEL-NEXT: kernarg_segment_byte_size = 24
+; TRAP-BIT-GISEL-NEXT: workgroup_fbarrier_count = 0
+; TRAP-BIT-GISEL-NEXT: wavefront_sgpr_count = 10
+; TRAP-BIT-GISEL-NEXT: workitem_vgpr_count = 2
+; TRAP-BIT-GISEL-NEXT: reserved_vgpr_first = 0
+; TRAP-BIT-GISEL-NEXT: reserved_vgpr_count = 0
+; TRAP-BIT-GISEL-NEXT: reserved_sgpr_first = 0
+; TRAP-BIT-GISEL-NEXT: reserved_sgpr_count = 0
+; TRAP-BIT-GISEL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; TRAP-BIT-GISEL-NEXT: debug_private_segment_buffer_sgpr = 0
+; TRAP-BIT-GISEL-NEXT: kernarg_segment_alignment = 4
+; TRAP-BIT-GISEL-NEXT: group_segment_alignment = 4
+; TRAP-BIT-GISEL-NEXT: private_segment_alignment = 4
+; TRAP-BIT-GISEL-NEXT: wavefront_size = 6
+; TRAP-BIT-GISEL-NEXT: call_convention = -1
+; TRAP-BIT-GISEL-NEXT: runtime_loader_kernel_symbol = 0
+; TRAP-BIT-GISEL-NEXT: .end_amd_kernel_code_t
+; TRAP-BIT-GISEL-NEXT: ; %bb.0: ; %entry
+; TRAP-BIT-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; TRAP-BIT-GISEL-NEXT: s_mov_b32 s2, -1
+; TRAP-BIT-GISEL-NEXT: s_mov_b32 s3, 0xf000
+; TRAP-BIT-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; TRAP-BIT-GISEL-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc
+; TRAP-BIT-GISEL-NEXT: s_waitcnt vmcnt(0)
+; TRAP-BIT-GISEL-NEXT: v_readfirstlane_b32 s4, v0
+; TRAP-BIT-GISEL-NEXT: s_cmp_eq_u32 s4, -1
+; TRAP-BIT-GISEL-NEXT: s_cbranch_scc0 .LBB4_3
+; TRAP-BIT-GISEL-NEXT: .LBB4_1: ; %ret
+; TRAP-BIT-GISEL-NEXT: v_mov_b32_e32 v0, 3
+; TRAP-BIT-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; TRAP-BIT-GISEL-NEXT: s_waitcnt vmcnt(0)
+; TRAP-BIT-GISEL-NEXT: .LBB4_2:
+; TRAP-BIT-GISEL-NEXT: s_endpgm
+; TRAP-BIT-GISEL-NEXT: .LBB4_3: ; %trap
+; TRAP-BIT-GISEL-NEXT: s_cbranch_execnz .LBB4_2
+; TRAP-BIT-GISEL-NEXT: ; %bb.4: ; %trap
+; TRAP-BIT-GISEL-NEXT: v_mov_b32_e32 v0, 0x4d2
+; TRAP-BIT-GISEL-NEXT: v_mov_b32_e32 v1, 0
+; TRAP-BIT-GISEL-NEXT: s_mov_b32 m0, -1
+; TRAP-BIT-GISEL-NEXT: ds_write_b32 v1, v0
+; TRAP-BIT-GISEL-NEXT: s_branch .LBB4_1
+;
+; NO-TRAP-BIT-SDAG-LABEL: non_entry_trap_no_unreachable:
+; NO-TRAP-BIT-SDAG: .amd_kernel_code_t
+; NO-TRAP-BIT-SDAG-NEXT: amd_code_version_major = 1
+; NO-TRAP-BIT-SDAG-NEXT: amd_code_version_minor = 2
+; NO-TRAP-BIT-SDAG-NEXT: amd_machine_kind = 1
+; NO-TRAP-BIT-SDAG-NEXT: amd_machine_version_major = 6
+; NO-TRAP-BIT-SDAG-NEXT: amd_machine_version_minor = 0
+; NO-TRAP-BIT-SDAG-NEXT: amd_machine_version_stepping = 0
+; NO-TRAP-BIT-SDAG-NEXT: kernel_code_entry_byte_offset = 256
+; NO-TRAP-BIT-SDAG-NEXT: kernel_code_prefetch_byte_size = 0
+; NO-TRAP-BIT-SDAG-NEXT: granulated_workitem_vgpr_count = 0
+; NO-TRAP-BIT-SDAG-NEXT: granulated_wavefront_sgpr_count = 1
+; NO-TRAP-BIT-SDAG-NEXT: priority = 0
+; NO-TRAP-BIT-SDAG-NEXT: float_mode = 240
+; NO-TRAP-BIT-SDAG-NEXT: priv = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_dx10_clamp = 1
+; NO-TRAP-BIT-SDAG-NEXT: debug_mode = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_ieee_mode = 1
+; NO-TRAP-BIT-SDAG-NEXT: enable_wgp_mode = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_mem_ordered = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_fwd_progress = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; NO-TRAP-BIT-SDAG-NEXT: user_sgpr_count = 12
+; NO-TRAP-BIT-SDAG-NEXT: enable_trap_handler = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_workgroup_id_x = 1
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_workgroup_id_y = 1
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_workgroup_id_z = 1
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_workgroup_info = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_vgpr_workitem_id = 2
+; NO-TRAP-BIT-SDAG-NEXT: enable_exception_msb = 0
+; NO-TRAP-BIT-SDAG-NEXT: granulated_lds_size = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_exception = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_private_segment_buffer = 1
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_dispatch_ptr = 1
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_queue_ptr = 1
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_dispatch_id = 1
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_flat_scratch_init = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_private_segment_size = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_wavefront_size32 = 0
+; NO-TRAP-BIT-SDAG-NEXT: enable_ordered_append_gds = 0
+; NO-TRAP-BIT-SDAG-NEXT: private_element_size = 1
+; NO-TRAP-BIT-SDAG-NEXT: is_ptr64 = 1
+; NO-TRAP-BIT-SDAG-NEXT: is_dynamic_callstack = 0
+; NO-TRAP-BIT-SDAG-NEXT: is_debug_enabled = 0
+; NO-TRAP-BIT-SDAG-NEXT: is_xnack_enabled = 0
+; NO-TRAP-BIT-SDAG-NEXT: workitem_private_segment_byte_size = 0
+; NO-TRAP-BIT-SDAG-NEXT: workgroup_group_segment_byte_size = 0
+; NO-TRAP-BIT-SDAG-NEXT: gds_segment_byte_size = 0
+; NO-TRAP-BIT-SDAG-NEXT: kernarg_segment_byte_size = 24
+; NO-TRAP-BIT-SDAG-NEXT: workgroup_fbarrier_count = 0
+; NO-TRAP-BIT-SDAG-NEXT: wavefront_sgpr_count = 12
+; NO-TRAP-BIT-SDAG-NEXT: workitem_vgpr_count = 2
+; NO-TRAP-BIT-SDAG-NEXT: reserved_vgpr_first = 0
+; NO-TRAP-BIT-SDAG-NEXT: reserved_vgpr_count = 0
+; NO-TRAP-BIT-SDAG-NEXT: reserved_sgpr_first = 0
+; NO-TRAP-BIT-SDAG-NEXT: reserved_sgpr_count = 0
+; NO-TRAP-BIT-SDAG-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; NO-TRAP-BIT-SDAG-NEXT: debug_private_segment_buffer_sgpr = 0
+; NO-TRAP-BIT-SDAG-NEXT: kernarg_segment_alignment = 4
+; NO-TRAP-BIT-SDAG-NEXT: group_segment_alignment = 4
+; NO-TRAP-BIT-SDAG-NEXT: private_segment_alignment = 4
+; NO-TRAP-BIT-SDAG-NEXT: wavefront_size = 6
+; NO-TRAP-BIT-SDAG-NEXT: call_convention = -1
+; NO-TRAP-BIT-SDAG-NEXT: runtime_loader_kernel_symbol = 0
+; NO-TRAP-BIT-SDAG-NEXT: .end_amd_kernel_code_t
+; NO-TRAP-BIT-SDAG-NEXT: ; %bb.0: ; %entry
+; NO-TRAP-BIT-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; NO-TRAP-BIT-SDAG-NEXT: s_mov_b32 s3, 0xf000
+; NO-TRAP-BIT-SDAG-NEXT: s_mov_b32 s2, -1
+; NO-TRAP-BIT-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; NO-TRAP-BIT-SDAG-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc
+; NO-TRAP-BIT-SDAG-NEXT: s_waitcnt vmcnt(0)
+; NO-TRAP-BIT-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, -1, v0
+; NO-TRAP-BIT-SDAG-NEXT: s_cbranch_vccz .LBB4_3
+; NO-TRAP-BIT-SDAG-NEXT: .LBB4_1: ; %ret
+; NO-TRAP-BIT-SDAG-NEXT: v_mov_b32_e32 v0, 3
+; NO-TRAP-BIT-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; NO-TRAP-BIT-SDAG-NEXT: s_waitcnt vmcnt(0)
+; NO-TRAP-BIT-SDAG-NEXT: .LBB4_2:
+; NO-TRAP-BIT-SDAG-NEXT: s_endpgm
+; NO-TRAP-BIT-SDAG-NEXT: .LBB4_3: ; %trap
+; NO-TRAP-BIT-SDAG-NEXT: s_mov_b32 m0, -1
+; NO-TRAP-BIT-SDAG-NEXT: s_cbranch_execnz .LBB4_2
+; NO-TRAP-BIT-SDAG-NEXT: ; %bb.4: ; %trap
+; NO-TRAP-BIT-SDAG-NEXT: v_mov_b32_e32 v0, 0x4d2
+; NO-TRAP-BIT-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; NO-TRAP-BIT-SDAG-NEXT: ds_write_b32 v1, v0
+; NO-TRAP-BIT-SDAG-NEXT: s_branch .LBB4_1
+;
+; NO-TRAP-BIT-GISEL-LABEL: non_entry_trap_no_unreachable:
+; NO-TRAP-BIT-GISEL: .amd_kernel_code_t
+; NO-TRAP-BIT-GISEL-NEXT: amd_code_version_major = 1
+; NO-TRAP-BIT-GISEL-NEXT: amd_code_version_minor = 2
+; NO-TRAP-BIT-GISEL-NEXT: amd_machine_kind = 1
+; NO-TRAP-BIT-GISEL-NEXT: amd_machine_version_major = 6
+; NO-TRAP-BIT-GISEL-NEXT: amd_machine_version_minor = 0
+; NO-TRAP-BIT-GISEL-NEXT: amd_machine_version_stepping = 0
+; NO-TRAP-BIT-GISEL-NEXT: kernel_code_entry_byte_offset = 256
+; NO-TRAP-BIT-GISEL-NEXT: kernel_code_prefetch_byte_size = 0
+; NO-TRAP-BIT-GISEL-NEXT: granulated_workitem_vgpr_count = 0
+; NO-TRAP-BIT-GISEL-NEXT: granulated_wavefront_sgpr_count = 1
+; NO-TRAP-BIT-GISEL-NEXT: priority = 0
+; NO-TRAP-BIT-GISEL-NEXT: float_mode = 240
+; NO-TRAP-BIT-GISEL-NEXT: priv = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_dx10_clamp = 1
+; NO-TRAP-BIT-GISEL-NEXT: debug_mode = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_ieee_mode = 1
+; NO-TRAP-BIT-GISEL-NEXT: enable_wgp_mode = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_mem_ordered = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_fwd_progress = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
+; NO-TRAP-BIT-GISEL-NEXT: user_sgpr_count = 12
+; NO-TRAP-BIT-GISEL-NEXT: enable_trap_handler = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_workgroup_id_x = 1
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_workgroup_id_y = 1
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_workgroup_id_z = 1
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_workgroup_info = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_vgpr_workitem_id = 2
+; NO-TRAP-BIT-GISEL-NEXT: enable_exception_msb = 0
+; NO-TRAP-BIT-GISEL-NEXT: granulated_lds_size = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_exception = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_private_segment_buffer = 1
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_dispatch_ptr = 1
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_queue_ptr = 1
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_dispatch_id = 1
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_flat_scratch_init = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_private_segment_size = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_wavefront_size32 = 0
+; NO-TRAP-BIT-GISEL-NEXT: enable_ordered_append_gds = 0
+; NO-TRAP-BIT-GISEL-NEXT: private_element_size = 1
+; NO-TRAP-BIT-GISEL-NEXT: is_ptr64 = 1
+; NO-TRAP-BIT-GISEL-NEXT: is_dynamic_callstack = 0
+; NO-TRAP-BIT-GISEL-NEXT: is_debug_enabled = 0
+; NO-TRAP-BIT-GISEL-NEXT: is_xnack_enabled = 0
+; NO-TRAP-BIT-GISEL-NEXT: workitem_private_segment_byte_size = 0
+; NO-TRAP-BIT-GISEL-NEXT: workgroup_group_segment_byte_size = 0
+; NO-TRAP-BIT-GISEL-NEXT: gds_segment_byte_size = 0
+; NO-TRAP-BIT-GISEL-NEXT: kernarg_segment_byte_size = 24
+; NO-TRAP-BIT-GISEL-NEXT: workgroup_fbarrier_count = 0
+; NO-TRAP-BIT-GISEL-NEXT: wavefront_sgpr_count = 10
+; NO-TRAP-BIT-GISEL-NEXT: workitem_vgpr_count = 2
+; NO-TRAP-BIT-GISEL-NEXT: reserved_vgpr_first = 0
+; NO-TRAP-BIT-GISEL-NEXT: reserved_vgpr_count = 0
+; NO-TRAP-BIT-GISEL-NEXT: reserved_sgpr_first = 0
+; NO-TRAP-BIT-GISEL-NEXT: reserved_sgpr_count = 0
+; NO-TRAP-BIT-GISEL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; NO-TRAP-BIT-GISEL-NEXT: debug_private_segment_buffer_sgpr = 0
+; NO-TRAP-BIT-GISEL-NEXT: kernarg_segment_alignment = 4
+; NO-TRAP-BIT-GISEL-NEXT: group_segment_alignment = 4
+; NO-TRAP-BIT-GISEL-NEXT: private_segment_alignment = 4
+; NO-TRAP-BIT-GISEL-NEXT: wavefront_size = 6
+; NO-TRAP-BIT-GISEL-NEXT: call_convention = -1
+; NO-TRAP-BIT-GISEL-NEXT: runtime_loader_kernel_symbol = 0
+; NO-TRAP-BIT-GISEL-NEXT: .end_amd_kernel_code_t
+; NO-TRAP-BIT-GISEL-NEXT: ; %bb.0: ; %entry
+; NO-TRAP-BIT-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; NO-TRAP-BIT-GISEL-NEXT: s_mov_b32 s2, -1
+; NO-TRAP-BIT-GISEL-NEXT: s_mov_b32 s3, 0xf000
+; NO-TRAP-BIT-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; NO-TRAP-BIT-GISEL-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc
+; NO-TRAP-BIT-GISEL-NEXT: s_waitcnt vmcnt(0)
+; NO-TRAP-BIT-GISEL-NEXT: v_readfirstlane_b32 s4, v0
+; NO-TRAP-BIT-GISEL-NEXT: s_cmp_eq_u32 s4, -1
+; NO-TRAP-BIT-GISEL-NEXT: s_cbranch_scc0 .LBB4_3
+; NO-TRAP-BIT-GISEL-NEXT: .LBB4_1: ; %ret
+; NO-TRAP-BIT-GISEL-NEXT: v_mov_b32_e32 v0, 3
+; NO-TRAP-BIT-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; NO-TRAP-BIT-GISEL-NEXT: s_waitcnt vmcnt(0)
+; NO-TRAP-BIT-GISEL-NEXT: .LBB4_2:
+; NO-TRAP-BIT-GISEL-NEXT: s_endpgm
+; NO-TRAP-BIT-GISEL-NEXT: .LBB4_3: ; %trap
+; NO-TRAP-BIT-GISEL-NEXT: s_cbranch_execnz .LBB4_2
+; NO-TRAP-BIT-GISEL-NEXT: ; %bb.4: ; %trap
+; NO-TRAP-BIT-GISEL-NEXT: v_mov_b32_e32 v0, 0x4d2
+; NO-TRAP-BIT-GISEL-NEXT: v_mov_b32_e32 v1, 0
+; NO-TRAP-BIT-GISEL-NEXT: s_mov_b32 m0, -1
+; NO-TRAP-BIT-GISEL-NEXT: ds_write_b32 v1, v0
+; NO-TRAP-BIT-GISEL-NEXT: s_branch .LBB4_1
+;
+; GCN-WARNING-SDAG-LABEL: non_entry_trap_no_unreachable:
+; GCN-WARNING-SDAG: ; %bb.0: ; %entry
+; GCN-WARNING-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-WARNING-SDAG-NEXT: s_mov_b32 s3, 0xf000
+; GCN-WARNING-SDAG-NEXT: s_mov_b32 s2, -1
+; GCN-WARNING-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-WARNING-SDAG-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc
+; GCN-WARNING-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GCN-WARNING-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, -1, v0
+; GCN-WARNING-SDAG-NEXT: s_cbranch_vccz .LBB4_3
+; GCN-WARNING-SDAG-NEXT: .LBB4_1: ; %ret
+; GCN-WARNING-SDAG-NEXT: v_mov_b32_e32 v0, 3
+; GCN-WARNING-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-WARNING-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GCN-WARNING-SDAG-NEXT: .LBB4_2:
+; GCN-WARNING-SDAG-NEXT: s_endpgm
+; GCN-WARNING-SDAG-NEXT: .LBB4_3: ; %trap
+; GCN-WARNING-SDAG-NEXT: s_mov_b32 m0, -1
+; GCN-WARNING-SDAG-NEXT: s_cbranch_execnz .LBB4_2
+; GCN-WARNING-SDAG-NEXT: ; %bb.4: ; %trap
+; GCN-WARNING-SDAG-NEXT: v_mov_b32_e32 v0, 0x4d2
+; GCN-WARNING-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GCN-WARNING-SDAG-NEXT: ds_write_b32 v1, v0
+; GCN-WARNING-SDAG-NEXT: s_branch .LBB4_1
+;
+; GCN-WARNING-GISEL-LABEL: non_entry_trap_no_unreachable:
+; GCN-WARNING-GISEL: ; %bb.0: ; %entry
+; GCN-WARNING-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-WARNING-GISEL-NEXT: s_mov_b32 s2, -1
+; GCN-WARNING-GISEL-NEXT: s_mov_b32 s3, 0xf000
+; GCN-WARNING-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-WARNING-GISEL-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc
+; GCN-WARNING-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GCN-WARNING-GISEL-NEXT: v_readfirstlane_b32 s4, v0
+; GCN-WARNING-GISEL-NEXT: s_cmp_eq_u32 s4, -1
+; GCN-WARNING-GISEL-NEXT: s_cbranch_scc0 .LBB4_3
+; GCN-WARNING-GISEL-NEXT: .LBB4_1: ; %ret
+; GCN-WARNING-GISEL-NEXT: v_mov_b32_e32 v0, 3
+; GCN-WARNING-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-WARNING-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GCN-WARNING-GISEL-NEXT: .LBB4_2:
+; GCN-WARNING-GISEL-NEXT: s_endpgm
+; GCN-WARNING-GISEL-NEXT: .LBB4_3: ; %trap
+; GCN-WARNING-GISEL-NEXT: s_cbranch_execnz .LBB4_2
+; GCN-WARNING-GISEL-NEXT: ; %bb.4: ; %trap
+; GCN-WARNING-GISEL-NEXT: v_mov_b32_e32 v0, 0x4d2
+; GCN-WARNING-GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GCN-WARNING-GISEL-NEXT: s_mov_b32 m0, -1
+; GCN-WARNING-GISEL-NEXT: ds_write_b32 v1, v0
+; GCN-WARNING-GISEL-NEXT: s_branch .LBB4_1
entry:
%tmp29 = load volatile i32, ptr addrspace(1) %arg0
%cmp = icmp eq i32 %tmp29, -1
@@ -147,3 +2522,13 @@ attributes #1 = { nounwind }
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdhsa_code_object_version", i32 400}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GCN: {{.*}}
+; MESA-TRAP-GISEL: {{.*}}
+; MESA-TRAP-SDAG: {{.*}}
+; MESA-WARNING-GISEL: {{.*}}
+; MESA-WARNING-SDAG: {{.*}}
+; NO-MESA-TRAP-GISEL: {{.*}}
+; NO-MESA-TRAP-SDAG: {{.*}}
+; NOMESA-TRAP-GISEL: {{.*}}
+; NOMESA-TRAP-SDAG: {{.*}}
More information about the llvm-commits
mailing list