[llvm] [AMDGPU][GlobalISel] Add register bank legalization for G_FADD (PR #163407)

Abhinav Garg via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 30 03:13:24 PDT 2025


https://github.com/abhigargrepo updated https://github.com/llvm/llvm-project/pull/163407

>From 2529613139078311d312eeedb6af3cef4379d517 Mon Sep 17 00:00:00 2001
From: Abhinav Garg <abhigarg at amd.com>
Date: Tue, 14 Oct 2025 02:56:44 -0700
Subject: [PATCH 1/2] Add register bank legalization for G_FADD

---
 .../AMDGPU/AMDGPURegBankLegalizeHelper.cpp    |   3 +
 .../AMDGPU/AMDGPURegBankLegalizeRules.cpp     |  11 +-
 .../AMDGPU/AMDGPURegBankLegalizeRules.h       |   4 +
 llvm/test/CodeGen/AMDGPU/GlobalISel/fadd.ll   | 246 ++++++++++++++++++
 4 files changed, 263 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/fadd.ll

diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
index 540756653dd22..198ee6b73b0b5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
@@ -837,6 +837,7 @@ LLT RegBankLegalizeHelper::getTyFromID(RegBankLLTMappingApplyID ID) {
     return LLT::scalar(32);
   case Sgpr64:
   case Vgpr64:
+  case UniInVgprS64:
     return LLT::scalar(64);
   case Sgpr128:
   case Vgpr128:
@@ -960,6 +961,7 @@ RegBankLegalizeHelper::getRegBankFromID(RegBankLLTMappingApplyID ID) {
   case UniInVcc:
   case UniInVgprS16:
   case UniInVgprS32:
+  case UniInVgprS64:
   case UniInVgprV2S16:
   case UniInVgprV4S32:
   case UniInVgprB32:
@@ -1092,6 +1094,7 @@ void RegBankLegalizeHelper::applyMappingDst(
       break;
     }
     case UniInVgprS32:
+    case UniInVgprS64:
     case UniInVgprV2S16:
     case UniInVgprV4S32: {
       assert(Ty == getTyFromID(MethodIDs[OpIdx]));
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index bfe2c80c810ef..9cf0c52717318 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -904,9 +904,18 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
   bool hasSALUFloat = ST->hasSALUFloatInsts();
 
   addRulesForGOpcs({G_FADD}, Standard)
+      .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}}, !hasSALUFloat)
+      .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}}, hasSALUFloat)
+      .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
       .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasSALUFloat)
       .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasSALUFloat)
-      .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}});
+      .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
+      .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64}})
+      .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}})
+      .Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16}})
+      .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
+      .Any({{UniV2S32}, {{UniInVgprV2S32}, {VgprV2S32, VgprV2S32}}})
+      .Any({{DivV2S32}, {{VgprV2S32}, {VgprV2S32, VgprV2S32}}});
 
   addRulesForGOpcs({G_FPTOUI})
       .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
index 93e0efda77fdd..1cf9ae2e226ca 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
@@ -92,8 +92,10 @@ enum UniformityLLTOpPredicateID {
   V4S32,
 
   UniV2S16,
+  UniV2S32,
 
   DivV2S16,
+  DivV2S32,
 
   // B types
   B32,
@@ -178,7 +180,9 @@ enum RegBankLLTMappingApplyID {
   UniInVcc,
   UniInVgprS16,
   UniInVgprS32,
+  UniInVgprS64,
   UniInVgprV2S16,
+  UniInVgprV2S32,
   UniInVgprV4S32,
   UniInVgprB32,
   UniInVgprB64,
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fadd.ll
new file mode 100644
index 0000000000000..ec221496f450c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fadd.ll
@@ -0,0 +1,246 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-SDAG,GFX12-SDAG-FAKE16 %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-SDAG,GFX12-SDAG-TRUE16 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-GISEL,GFX12-GISEL-FAKE16 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-GISEL,GFX12-GISEL-TRUE16 %s
+
+define amdgpu_ps half @fadd_s16_uniform(half inreg %a, half inreg %b) {
+; GFX11-SDAG-FAKE16-LABEL: fadd_s16_uniform:
+; GFX11-SDAG-FAKE16:       ; %bb.0:
+; GFX11-SDAG-FAKE16-NEXT:    v_add_f16_e64 v0, s0, s1
+; GFX11-SDAG-FAKE16-NEXT:    ; return to shader part epilog
+;
+; GFX11-SDAG-TRUE16-LABEL: fadd_s16_uniform:
+; GFX11-SDAG-TRUE16:       ; %bb.0:
+; GFX11-SDAG-TRUE16-NEXT:    v_add_f16_e64 v0.l, s0, s1
+; GFX11-SDAG-TRUE16-NEXT:    ; return to shader part epilog
+;
+; GFX11-GISEL-FAKE16-LABEL: fadd_s16_uniform:
+; GFX11-GISEL-FAKE16:       ; %bb.0:
+; GFX11-GISEL-FAKE16-NEXT:    v_add_f16_e64 v0, s0, s1
+; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-FAKE16-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX11-GISEL-FAKE16-NEXT:    v_mov_b32_e32 v0, s0
+; GFX11-GISEL-FAKE16-NEXT:    ; return to shader part epilog
+;
+; GFX11-GISEL-TRUE16-LABEL: fadd_s16_uniform:
+; GFX11-GISEL-TRUE16:       ; %bb.0:
+; GFX11-GISEL-TRUE16-NEXT:    v_add_f16_e64 v0.l, s0, s1
+; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-TRUE16-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX11-GISEL-TRUE16-NEXT:    v_mov_b32_e32 v0, s0
+; GFX11-GISEL-TRUE16-NEXT:    ; return to shader part epilog
+;
+; GFX12-LABEL: fadd_s16_uniform:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_add_f16 s0, s0, s1
+; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
+; GFX12-NEXT:    v_mov_b32_e32 v0, s0
+; GFX12-NEXT:    ; return to shader part epilog
+  %fadd = fadd half %a, %b
+  ret half %fadd
+}
+
+define amdgpu_ps half @fadd_s16_div(half %a, half %b) {
+; GFX11-SDAG-FAKE16-LABEL: fadd_s16_div:
+; GFX11-SDAG-FAKE16:       ; %bb.0:
+; GFX11-SDAG-FAKE16-NEXT:    v_add_f16_e32 v0, v0, v1
+; GFX11-SDAG-FAKE16-NEXT:    ; return to shader part epilog
+;
+; GFX11-SDAG-TRUE16-LABEL: fadd_s16_div:
+; GFX11-SDAG-TRUE16:       ; %bb.0:
+; GFX11-SDAG-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT:    ; return to shader part epilog
+;
+; GFX11-GISEL-FAKE16-LABEL: fadd_s16_div:
+; GFX11-GISEL-FAKE16:       ; %bb.0:
+; GFX11-GISEL-FAKE16-NEXT:    v_add_f16_e32 v0, v0, v1
+; GFX11-GISEL-FAKE16-NEXT:    ; return to shader part epilog
+;
+; GFX11-GISEL-TRUE16-LABEL: fadd_s16_div:
+; GFX11-GISEL-TRUE16:       ; %bb.0:
+; GFX11-GISEL-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v1.l
+; GFX11-GISEL-TRUE16-NEXT:    ; return to shader part epilog
+;
+; GFX12-SDAG-FAKE16-LABEL: fadd_s16_div:
+; GFX12-SDAG-FAKE16:       ; %bb.0:
+; GFX12-SDAG-FAKE16-NEXT:    v_add_f16_e32 v0, v0, v1
+; GFX12-SDAG-FAKE16-NEXT:    ; return to shader part epilog
+;
+; GFX12-SDAG-TRUE16-LABEL: fadd_s16_div:
+; GFX12-SDAG-TRUE16:       ; %bb.0:
+; GFX12-SDAG-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v1.l
+; GFX12-SDAG-TRUE16-NEXT:    ; return to shader part epilog
+;
+; GFX12-GISEL-FAKE16-LABEL: fadd_s16_div:
+; GFX12-GISEL-FAKE16:       ; %bb.0:
+; GFX12-GISEL-FAKE16-NEXT:    v_add_f16_e32 v0, v0, v1
+; GFX12-GISEL-FAKE16-NEXT:    ; return to shader part epilog
+;
+; GFX12-GISEL-TRUE16-LABEL: fadd_s16_div:
+; GFX12-GISEL-TRUE16:       ; %bb.0:
+; GFX12-GISEL-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v1.l
+; GFX12-GISEL-TRUE16-NEXT:    ; return to shader part epilog
+  %fadd = fadd half %a, %b
+  ret half %fadd
+}
+
+define amdgpu_ps float @fadd_s32_uniform(float inreg %a, float inreg %b) {
+; GFX11-LABEL: fadd_s32_uniform:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_add_f32_e64 v0, s0, s1
+; GFX11-NEXT:    ; return to shader part epilog
+;
+; GFX12-LABEL: fadd_s32_uniform:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_add_f32 s0, s0, s1
+; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
+; GFX12-NEXT:    v_mov_b32_e32 v0, s0
+; GFX12-NEXT:    ; return to shader part epilog
+  %fadd = fadd float %a, %b
+  ret float %fadd
+}
+
+define amdgpu_ps float @fadd_s32_div(float %a, float %b) {
+; GCN-LABEL: fadd_s32_div:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_add_f32_e32 v0, v0, v1
+; GCN-NEXT:    ; return to shader part epilog
+  %fadd = fadd float %a, %b
+  ret float %fadd
+}
+
+define amdgpu_ps double @fadd_s64_uniform(double inreg %a, double inreg %b) {
+; GFX11-LABEL: fadd_s64_uniform:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_add_f64 v[0:1], s[0:1], s[2:3]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX11-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX11-NEXT:    ; return to shader part epilog
+;
+; GFX12-LABEL: fadd_s64_uniform:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    v_add_f64_e64 v[0:1], s[0:1], s[2:3]
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX12-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX12-NEXT:    s_wait_alu 0xf1ff
+; GFX12-NEXT:    ; return to shader part epilog
+  %fadd = fadd double %a, %b
+  ret double %fadd
+}
+
+define amdgpu_ps double @fadd_s64_div(double %a, double %b) {
+; GFX11-LABEL: fadd_s64_div:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_add_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX11-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX11-NEXT:    ; return to shader part epilog
+;
+; GFX12-LABEL: fadd_s64_div:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    v_add_f64_e32 v[0:1], v[0:1], v[2:3]
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX12-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX12-NEXT:    ; return to shader part epilog
+  %fadd = fadd double %a, %b
+  ret double %fadd
+}
+
+define <2 x half> @fadd_v2s16_uniform(<2 x half> inreg %a, <2 x half> inreg %b) {
+; GFX11-LABEL: fadd_v2s16_uniform:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_pk_add_f16 v0, s0, s1
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: fadd_v2s16_uniform:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    v_pk_add_f16 v0, s0, s1
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %fadd = fadd <2 x half> %a, %b
+  ret <2 x half> %fadd
+}
+
+define <2 x half> @fadd_v2s16_div(<2 x half> %a, <2 x half> %b) {
+; GFX11-LABEL: fadd_v2s16_div:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_pk_add_f16 v0, v0, v1
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: fadd_v2s16_div:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    v_pk_add_f16 v0, v0, v1
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %fadd = fadd <2 x half> %a, %b
+  ret <2 x half> %fadd
+}
+
+define <2 x float> @fadd_v2s32_uniform(<2 x float> inreg %a, <2 x float> inreg %b) {
+; GFX11-LABEL: fadd_v2s32_uniform:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_add_f32_e64 v0, s0, s2
+; GFX11-NEXT:    v_add_f32_e64 v1, s1, s3
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: fadd_v2s32_uniform:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    s_add_f32 s0, s0, s2
+; GFX12-NEXT:    s_add_f32 s1, s1, s3
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_2)
+; GFX12-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %fadd = fadd <2 x float> %a, %b
+  ret <2 x float> %fadd
+}
+
+define <2 x float> @fadd_v2s32_div(<2 x float> %a, <2 x float> %b) {
+; GFX11-LABEL: fadd_v2s32_div:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_dual_add_f32 v0, v0, v2 :: v_dual_add_f32 v1, v1, v3
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: fadd_v2s32_div:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    v_dual_add_f32 v0, v0, v2 :: v_dual_add_f32 v1, v1, v3
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %fadd = fadd <2 x float> %a, %b
+  ret <2 x float> %fadd
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX11-GISEL: {{.*}}
+; GFX11-SDAG: {{.*}}
+; GFX12-GISEL: {{.*}}
+; GFX12-SDAG: {{.*}}

>From d398f3ae0ad166b5467aec7502782f51dbed21c1 Mon Sep 17 00:00:00 2001
From: Abhinav Garg <abhigarg at amd.com>
Date: Thu, 30 Oct 2025 10:02:54 +0000
Subject: [PATCH 2/2] Address review comments: Scalarize v2s16 for uniform
 operation

---
 .../CodeGen/GlobalISel/MachineIRBuilder.cpp   |   73 +-
 .../AMDGPU/AMDGPURegBankLegalizeHelper.cpp    |   20 +
 .../AMDGPU/AMDGPURegBankLegalizeHelper.h      |    2 +
 .../AMDGPU/AMDGPURegBankLegalizeRules.cpp     |    4 +-
 .../AMDGPU/AMDGPURegBankLegalizeRules.h       |    1 +
 llvm/test/CodeGen/AMDGPU/GlobalISel/fadd.ll   |  203 +-
 llvm/test/CodeGen/AMDGPU/lds-size.ll          |   85 +-
 llvm/test/CodeGen/AMDGPU/trap.ll              | 2543 ++++++++++++++++-
 8 files changed, 2672 insertions(+), 259 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 27df7e369436a..807a320d91e72 100644
--- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -99,7 +99,7 @@ MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C,
       "Expected inlined-at fields to agree");
   auto MIB = buildInstrNoInsert(TargetOpcode::DBG_VALUE);
 
-  auto *NumericConstant = [&] () -> const Constant* {
+  auto *NumericConstant = [&]() -> const Constant * {
     if (const auto *CE = dyn_cast<ConstantExpr>(&C))
       if (CE->getOpcode() == Instruction::IntToPtr)
         return CE->getOperand(0);
@@ -203,7 +203,8 @@ MachineIRBuilder::buildPtrAdd(const DstOp &Res, const SrcOp &Op0,
                               const SrcOp &Op1, std::optional<unsigned> Flags) {
   assert(Res.getLLTTy(*getMRI()).isPointerOrPointerVector() &&
          Res.getLLTTy(*getMRI()) == Op0.getLLTTy(*getMRI()) && "type mismatch");
-  assert(Op1.getLLTTy(*getMRI()).getScalarType().isScalar() && "invalid offset type");
+  assert(Op1.getLLTTy(*getMRI()).getScalarType().isScalar() &&
+         "invalid offset type");
 
   return buildInstr(TargetOpcode::G_PTR_ADD, {Res}, {Op0, Op1}, Flags);
 }
@@ -314,8 +315,7 @@ MachineInstrBuilder MachineIRBuilder::buildBrIndirect(Register Tgt) {
   return buildInstr(TargetOpcode::G_BRINDIRECT).addUse(Tgt);
 }
 
-MachineInstrBuilder MachineIRBuilder::buildBrJT(Register TablePtr,
-                                                unsigned JTI,
+MachineInstrBuilder MachineIRBuilder::buildBrJT(Register TablePtr, unsigned JTI,
                                                 Register IndexReg) {
   assert(getMRI()->getType(TablePtr).isPointer() &&
          "Table reg must be a pointer");
@@ -343,8 +343,8 @@ MachineInstrBuilder MachineIRBuilder::buildConstant(const DstOp &Res,
 
   if (Ty.isFixedVector()) {
     auto Const = buildInstr(TargetOpcode::G_CONSTANT)
-    .addDef(getMRI()->createGenericVirtualRegister(EltTy))
-    .addCImm(&Val);
+                     .addDef(getMRI()->createGenericVirtualRegister(EltTy))
+                     .addCImm(&Val);
     return buildSplatBuildVector(Res, Const);
   }
 
@@ -369,8 +369,8 @@ MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res,
   LLT Ty = Res.getLLTTy(*getMRI());
   LLT EltTy = Ty.getScalarType();
 
-  assert(APFloat::getSizeInBits(Val.getValueAPF().getSemantics())
-         == EltTy.getSizeInBits() &&
+  assert(APFloat::getSizeInBits(Val.getValueAPF().getSemantics()) ==
+             EltTy.getSizeInBits() &&
          "creating fconstant with the wrong size");
 
   assert(!Ty.isPointer() && "invalid operand type");
@@ -380,8 +380,8 @@ MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res,
 
   if (Ty.isFixedVector()) {
     auto Const = buildInstr(TargetOpcode::G_FCONSTANT)
-    .addDef(getMRI()->createGenericVirtualRegister(EltTy))
-    .addFPImm(&Val);
+                     .addDef(getMRI()->createGenericVirtualRegister(EltTy))
+                     .addFPImm(&Val);
 
     return buildSplatBuildVector(Res, Const);
   }
@@ -403,8 +403,8 @@ MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res,
                                                      double Val) {
   LLT DstTy = Res.getLLTTy(*getMRI());
   auto &Ctx = getMF().getFunction().getContext();
-  auto *CFP =
-      ConstantFP::get(Ctx, getAPFloatFromSize(Val, DstTy.getScalarSizeInBits()));
+  auto *CFP = ConstantFP::get(
+      Ctx, getAPFloatFromSize(Val, DstTy.getScalarSizeInBits()));
   return buildFConstant(Res, *CFP);
 }
 
@@ -466,9 +466,10 @@ MachineInstrBuilder MachineIRBuilder::buildLoadInstr(unsigned Opcode,
   return MIB;
 }
 
-MachineInstrBuilder MachineIRBuilder::buildLoadFromOffset(
-  const DstOp &Dst, const SrcOp &BasePtr,
-  MachineMemOperand &BaseMMO, int64_t Offset) {
+MachineInstrBuilder
+MachineIRBuilder::buildLoadFromOffset(const DstOp &Dst, const SrcOp &BasePtr,
+                                      MachineMemOperand &BaseMMO,
+                                      int64_t Offset) {
   LLT LoadTy = Dst.getLLTTy(*getMRI());
   MachineMemOperand *OffsetMMO =
       getMF().getMachineMemOperand(&BaseMMO, Offset, LoadTy);
@@ -539,9 +540,9 @@ unsigned MachineIRBuilder::getBoolExtOp(bool IsVec, bool IsFP) const {
 }
 
 MachineInstrBuilder MachineIRBuilder::buildBoolExt(const DstOp &Res,
-                                                   const SrcOp &Op,
-                                                   bool IsFP) {
-  unsigned ExtOp = getBoolExtOp(getMRI()->getType(Op.getReg()).isVector(), IsFP);
+                                                   const SrcOp &Op, bool IsFP) {
+  unsigned ExtOp =
+      getBoolExtOp(getMRI()->getType(Op.getReg()).isVector(), IsFP);
   return buildInstr(ExtOp, Res, Op);
 }
 
@@ -709,9 +710,9 @@ MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<LLT> Res,
   return buildInstr(TargetOpcode::G_UNMERGE_VALUES, TmpVec, Op);
 }
 
-MachineInstrBuilder MachineIRBuilder::buildUnmerge(LLT Res,
-                                                   const SrcOp &Op) {
-  unsigned NumReg = Op.getLLTTy(*getMRI()).getSizeInBits() / Res.getSizeInBits();
+MachineInstrBuilder MachineIRBuilder::buildUnmerge(LLT Res, const SrcOp &Op) {
+  unsigned NumReg =
+      Op.getLLTTy(*getMRI()).getSizeInBits() / Res.getSizeInBits();
   SmallVector<DstOp, 8> TmpVec(NumReg, Res);
   return buildInstr(TargetOpcode::G_UNMERGE_VALUES, TmpVec, Op);
 }
@@ -1053,10 +1054,11 @@ MachineIRBuilder::buildAtomicCmpXchg(const DstOp &OldValRes, const SrcOp &Addr,
   return MIB;
 }
 
-MachineInstrBuilder MachineIRBuilder::buildAtomicRMW(
-  unsigned Opcode, const DstOp &OldValRes,
-  const SrcOp &Addr, const SrcOp &Val,
-  MachineMemOperand &MMO) {
+MachineInstrBuilder MachineIRBuilder::buildAtomicRMW(unsigned Opcode,
+                                                     const DstOp &OldValRes,
+                                                     const SrcOp &Addr,
+                                                     const SrcOp &Val,
+                                                     MachineMemOperand &MMO) {
 
 #ifndef NDEBUG
   LLT OldValResTy = OldValRes.getLLTTy(*getMRI());
@@ -1145,16 +1147,15 @@ MachineIRBuilder::buildAtomicRMWUmin(Register OldValRes, Register Addr,
 }
 
 MachineInstrBuilder
-MachineIRBuilder::buildAtomicRMWFAdd(
-  const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &Val,
-  MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWFAdd(const DstOp &OldValRes, const SrcOp &Addr,
+                                     const SrcOp &Val, MachineMemOperand &MMO) {
   return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_FADD, OldValRes, Addr, Val,
                         MMO);
 }
 
 MachineInstrBuilder
-MachineIRBuilder::buildAtomicRMWFSub(const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &Val,
-                                     MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWFSub(const DstOp &OldValRes, const SrcOp &Addr,
+                                     const SrcOp &Val, MachineMemOperand &MMO) {
   return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_FSUB, OldValRes, Addr, Val,
                         MMO);
 }
@@ -1189,11 +1190,9 @@ MachineIRBuilder::buildAtomicRMWFMinimum(const DstOp &OldValRes,
                         Val, MMO);
 }
 
-MachineInstrBuilder
-MachineIRBuilder::buildFence(unsigned Ordering, unsigned Scope) {
-  return buildInstr(TargetOpcode::G_FENCE)
-    .addImm(Ordering)
-    .addImm(Scope);
+MachineInstrBuilder MachineIRBuilder::buildFence(unsigned Ordering,
+                                                 unsigned Scope) {
+  return buildInstr(TargetOpcode::G_FENCE).addImm(Ordering).addImm(Scope);
 }
 
 MachineInstrBuilder MachineIRBuilder::buildPrefetch(const SrcOp &Addr,
@@ -1276,6 +1275,7 @@ MachineIRBuilder::buildInstr(unsigned Opc, ArrayRef<DstOp> DstOps,
                     SrcOps[0].getLLTTy(*getMRI()));
     break;
   case TargetOpcode::G_ADD:
+  case TargetOpcode::G_FADD:
   case TargetOpcode::G_AND:
   case TargetOpcode::G_MUL:
   case TargetOpcode::G_OR:
@@ -1333,7 +1333,8 @@ MachineIRBuilder::buildInstr(unsigned Opc, ArrayRef<DstOp> DstOps,
     assert(DstOps.size() == 1 && "Invalid Dst");
     assert(SrcOps.size() == 1 && "Invalid Srcs");
     assert(DstOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
-           SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() && "invalid bitcast");
+               SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() &&
+           "invalid bitcast");
     break;
   }
   case TargetOpcode::COPY:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
index 198ee6b73b0b5..a8c041dad6963 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
@@ -616,6 +616,24 @@ void RegBankLegalizeHelper::lowerSplitTo32(MachineInstr &MI) {
   MI.eraseFromParent();
 }
 
+void RegBankLegalizeHelper::lowerSplitTo16(MachineInstr &MI) {
+  Register Dst = MI.getOperand(0).getReg();
+  LLT DstTy = MRI.getType(Dst);
+  assert(DstTy == V2S16);
+  auto [Val0_Lo_32, Val0_Hi_32] = unpackAExt(MI.getOperand(1).getReg());
+  auto [Val1_Lo_32, Val1_Hi_32] = unpackAExt(MI.getOperand(2).getReg());
+  unsigned Opc = MI.getOpcode();
+  auto Flags = MI.getFlags();
+  auto Val0_Lo = B.buildTrunc(SgprRB_S16, Val0_Lo_32);
+  auto Val0_Hi = B.buildTrunc(SgprRB_S16, Val0_Hi_32);
+  auto Val1_Lo = B.buildTrunc(SgprRB_S16, Val1_Lo_32);
+  auto Val1_Hi = B.buildTrunc(SgprRB_S16, Val1_Hi_32);
+  auto Lo = B.buildInstr(Opc, {SgprRB_S16}, {Val0_Lo, Val1_Lo}, Flags);
+  auto Hi = B.buildInstr(Opc, {SgprRB_S16}, {Val0_Hi, Val1_Hi}, Flags);
+  B.buildMergeLikeInstr(Dst, {Lo.getReg(0), Hi.getReg(0)});
+  MI.eraseFromParent();
+}
+
 void RegBankLegalizeHelper::lowerSplitTo32Select(MachineInstr &MI) {
   Register Dst = MI.getOperand(0).getReg();
   LLT DstTy = MRI.getType(Dst);
@@ -688,6 +706,8 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI,
     return lowerUnpackBitShift(MI);
   case UnpackMinMax:
     return lowerUnpackMinMax(MI);
+  case ScalarizeToS16:
+    return lowerSplitTo16(MI);
   case Ext32To64: {
     const RegisterBank *RB = MRI.getRegBank(MI.getOperand(0).getReg());
     MachineInstrBuilder Hi;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h
index d937815bf4714..df0d7ef4689fe 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h
@@ -72,6 +72,7 @@ class RegBankLegalizeHelper {
   static constexpr LLT P6 = LLT::pointer(6, 32);
 
   MachineRegisterInfo::VRegAttrs SgprRB_S32 = {SgprRB, S32};
+  MachineRegisterInfo::VRegAttrs SgprRB_S16 = {SgprRB, S16};
   MachineRegisterInfo::VRegAttrs VgprRB_S32 = {VgprRB, S32};
   MachineRegisterInfo::VRegAttrs VccRB_S1 = {VccRB, S1};
 
@@ -121,6 +122,7 @@ class RegBankLegalizeHelper {
   void lowerV_BFE(MachineInstr &MI);
   void lowerS_BFE(MachineInstr &MI);
   void lowerSplitTo32(MachineInstr &MI);
+  void lowerSplitTo16(MachineInstr &MI);
   void lowerSplitTo32Select(MachineInstr &MI);
   void lowerSplitTo32SExtInReg(MachineInstr &MI);
   void lowerUnpackMinMax(MachineInstr &MI);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index 9cf0c52717318..78997b30318e0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -912,7 +912,9 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
       .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
       .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64}})
       .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}})
-      .Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16}})
+      .Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16}}, !hasSALUFloat)
+      .Uni(V2S16, {{SgprV2S16}, {SgprV2S16, SgprV2S16}, ScalarizeToS16},
+           hasSALUFloat)
       .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
       .Any({{UniV2S32}, {{UniInVgprV2S32}, {VgprV2S32, VgprV2S32}}})
       .Any({{DivV2S32}, {{VgprV2S32}, {VgprV2S32, VgprV2S32}}});
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
index 1cf9ae2e226ca..007fedc737512 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
@@ -221,6 +221,7 @@ enum LoweringMethodID {
   V_BFE,
   VgprToVccCopy,
   SplitTo32,
+  ScalarizeToS16,
   SplitTo32Select,
   SplitTo32SExtInReg,
   Ext32To64,
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fadd.ll
index ec221496f450c..b315ac549b1c2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fadd.ll
@@ -1,39 +1,25 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s
-; RUN: llc -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s
-; RUN: llc -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-SDAG,GFX12-SDAG-FAKE16 %s
-; RUN: llc -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-SDAG,GFX12-SDAG-TRUE16 %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-GISEL,GFX12-GISEL-FAKE16 %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-GISEL,GFX12-GISEL-TRUE16 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-FAKE16 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-TRUE16 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-FAKE16 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-TRUE16 %s
 
 define amdgpu_ps half @fadd_s16_uniform(half inreg %a, half inreg %b) {
-; GFX11-SDAG-FAKE16-LABEL: fadd_s16_uniform:
-; GFX11-SDAG-FAKE16:       ; %bb.0:
-; GFX11-SDAG-FAKE16-NEXT:    v_add_f16_e64 v0, s0, s1
-; GFX11-SDAG-FAKE16-NEXT:    ; return to shader part epilog
-;
-; GFX11-SDAG-TRUE16-LABEL: fadd_s16_uniform:
-; GFX11-SDAG-TRUE16:       ; %bb.0:
-; GFX11-SDAG-TRUE16-NEXT:    v_add_f16_e64 v0.l, s0, s1
-; GFX11-SDAG-TRUE16-NEXT:    ; return to shader part epilog
-;
-; GFX11-GISEL-FAKE16-LABEL: fadd_s16_uniform:
-; GFX11-GISEL-FAKE16:       ; %bb.0:
-; GFX11-GISEL-FAKE16-NEXT:    v_add_f16_e64 v0, s0, s1
-; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-FAKE16-NEXT:    v_readfirstlane_b32 s0, v0
-; GFX11-GISEL-FAKE16-NEXT:    v_mov_b32_e32 v0, s0
-; GFX11-GISEL-FAKE16-NEXT:    ; return to shader part epilog
-;
-; GFX11-GISEL-TRUE16-LABEL: fadd_s16_uniform:
-; GFX11-GISEL-TRUE16:       ; %bb.0:
-; GFX11-GISEL-TRUE16-NEXT:    v_add_f16_e64 v0.l, s0, s1
-; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-TRUE16-NEXT:    v_readfirstlane_b32 s0, v0
-; GFX11-GISEL-TRUE16-NEXT:    v_mov_b32_e32 v0, s0
-; GFX11-GISEL-TRUE16-NEXT:    ; return to shader part epilog
+; GFX11-FAKE16-LABEL: fadd_s16_uniform:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    v_add_f16_e64 v0, s0, s1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX11-FAKE16-NEXT:    v_mov_b32_e32 v0, s0
+; GFX11-FAKE16-NEXT:    ; return to shader part epilog
+;
+; GFX11-TRUE16-LABEL: fadd_s16_uniform:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    v_add_f16_e64 v0.l, s0, s1
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX11-TRUE16-NEXT:    v_mov_b32_e32 v0, s0
+; GFX11-TRUE16-NEXT:    ; return to shader part epilog
 ;
 ; GFX12-LABEL: fadd_s16_uniform:
 ; GFX12:       ; %bb.0:
@@ -41,50 +27,41 @@ define amdgpu_ps half @fadd_s16_uniform(half inreg %a, half inreg %b) {
 ; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
 ; GFX12-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX12-NEXT:    ; return to shader part epilog
+; -LABEL: fadd_s16_uniform:
+; :       ; %bb.0:
+; -NEXT:    v_add_f16_e64 v0.l, s0, s1
+; -NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; -NEXT:    v_readfirstlane_b32 s0, v0
+; -NEXT:    v_mov_b32_e32 v0, s0
+; -NEXT:    ; return to shader part epilog
   %fadd = fadd half %a, %b
   ret half %fadd
 }
 
 define amdgpu_ps half @fadd_s16_div(half %a, half %b) {
-; GFX11-SDAG-FAKE16-LABEL: fadd_s16_div:
-; GFX11-SDAG-FAKE16:       ; %bb.0:
-; GFX11-SDAG-FAKE16-NEXT:    v_add_f16_e32 v0, v0, v1
-; GFX11-SDAG-FAKE16-NEXT:    ; return to shader part epilog
-;
-; GFX11-SDAG-TRUE16-LABEL: fadd_s16_div:
-; GFX11-SDAG-TRUE16:       ; %bb.0:
-; GFX11-SDAG-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v1.l
-; GFX11-SDAG-TRUE16-NEXT:    ; return to shader part epilog
-;
-; GFX11-GISEL-FAKE16-LABEL: fadd_s16_div:
-; GFX11-GISEL-FAKE16:       ; %bb.0:
-; GFX11-GISEL-FAKE16-NEXT:    v_add_f16_e32 v0, v0, v1
-; GFX11-GISEL-FAKE16-NEXT:    ; return to shader part epilog
-;
-; GFX11-GISEL-TRUE16-LABEL: fadd_s16_div:
-; GFX11-GISEL-TRUE16:       ; %bb.0:
-; GFX11-GISEL-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v1.l
-; GFX11-GISEL-TRUE16-NEXT:    ; return to shader part epilog
-;
-; GFX12-SDAG-FAKE16-LABEL: fadd_s16_div:
-; GFX12-SDAG-FAKE16:       ; %bb.0:
-; GFX12-SDAG-FAKE16-NEXT:    v_add_f16_e32 v0, v0, v1
-; GFX12-SDAG-FAKE16-NEXT:    ; return to shader part epilog
-;
-; GFX12-SDAG-TRUE16-LABEL: fadd_s16_div:
-; GFX12-SDAG-TRUE16:       ; %bb.0:
-; GFX12-SDAG-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v1.l
-; GFX12-SDAG-TRUE16-NEXT:    ; return to shader part epilog
-;
-; GFX12-GISEL-FAKE16-LABEL: fadd_s16_div:
-; GFX12-GISEL-FAKE16:       ; %bb.0:
-; GFX12-GISEL-FAKE16-NEXT:    v_add_f16_e32 v0, v0, v1
-; GFX12-GISEL-FAKE16-NEXT:    ; return to shader part epilog
-;
-; GFX12-GISEL-TRUE16-LABEL: fadd_s16_div:
-; GFX12-GISEL-TRUE16:       ; %bb.0:
-; GFX12-GISEL-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v1.l
-; GFX12-GISEL-TRUE16-NEXT:    ; return to shader part epilog
+; GFX11-FAKE16-LABEL: fadd_s16_div:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    v_add_f16_e32 v0, v0, v1
+; GFX11-FAKE16-NEXT:    ; return to shader part epilog
+;
+; GFX11-TRUE16-LABEL: fadd_s16_div:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v1.l
+; GFX11-TRUE16-NEXT:    ; return to shader part epilog
+;
+; GFX12-FAKE16-LABEL: fadd_s16_div:
+; GFX12-FAKE16:       ; %bb.0:
+; GFX12-FAKE16-NEXT:    v_add_f16_e32 v0, v0, v1
+; GFX12-FAKE16-NEXT:    ; return to shader part epilog
+;
+; GFX12-TRUE16-LABEL: fadd_s16_div:
+; GFX12-TRUE16:       ; %bb.0:
+; GFX12-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v1.l
+; GFX12-TRUE16-NEXT:    ; return to shader part epilog
+; -LABEL: fadd_s16_div:
+; :       ; %bb.0:
+; -NEXT:    v_add_f16_e32 v0.l, v0.l, v1.l
+; -NEXT:    ; return to shader part epilog
   %fadd = fadd half %a, %b
   ret half %fadd
 }
@@ -155,92 +132,58 @@ define amdgpu_ps double @fadd_s64_div(double %a, double %b) {
   ret double %fadd
 }
 
-define <2 x half> @fadd_v2s16_uniform(<2 x half> inreg %a, <2 x half> inreg %b) {
+define amdgpu_ps <2 x half> @fadd_v2s16_uniform(<2 x half> inreg %a, <2 x half> inreg %b) {
 ; GFX11-LABEL: fadd_v2s16_uniform:
 ; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_pk_add_f16 v0, s0, s1
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-NEXT:    ; return to shader part epilog
 ;
 ; GFX12-LABEL: fadd_v2s16_uniform:
 ; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT:    s_wait_expcnt 0x0
-; GFX12-NEXT:    s_wait_samplecnt 0x0
-; GFX12-NEXT:    s_wait_bvhcnt 0x0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_pk_add_f16 v0, s0, s1
-; GFX12-NEXT:    s_setpc_b64 s[30:31]
+; GFX12-NEXT:    s_lshr_b32 s2, s0, 16
+; GFX12-NEXT:    s_lshr_b32 s3, s1, 16
+; GFX12-NEXT:    s_add_f16 s0, s0, s1
+; GFX12-NEXT:    s_add_f16 s1, s2, s3
+; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX12-NEXT:    s_pack_ll_b32_b16 s0, s0, s1
+; GFX12-NEXT:    v_mov_b32_e32 v0, s0
+; GFX12-NEXT:    ; return to shader part epilog
   %fadd = fadd <2 x half> %a, %b
   ret <2 x half> %fadd
 }
 
-define <2 x half> @fadd_v2s16_div(<2 x half> %a, <2 x half> %b) {
-; GFX11-LABEL: fadd_v2s16_div:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_pk_add_f16 v0, v0, v1
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: fadd_v2s16_div:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT:    s_wait_expcnt 0x0
-; GFX12-NEXT:    s_wait_samplecnt 0x0
-; GFX12-NEXT:    s_wait_bvhcnt 0x0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_pk_add_f16 v0, v0, v1
-; GFX12-NEXT:    s_setpc_b64 s[30:31]
+define amdgpu_ps <2 x half> @fadd_v2s16_div(<2 x half> %a, <2 x half> %b) {
+; GCN-LABEL: fadd_v2s16_div:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_pk_add_f16 v0, v0, v1
+; GCN-NEXT:    ; return to shader part epilog
   %fadd = fadd <2 x half> %a, %b
   ret <2 x half> %fadd
 }
 
-define <2 x float> @fadd_v2s32_uniform(<2 x float> inreg %a, <2 x float> inreg %b) {
+define amdgpu_ps <2 x float> @fadd_v2s32_uniform(<2 x float> inreg %a, <2 x float> inreg %b) {
 ; GFX11-LABEL: fadd_v2s32_uniform:
 ; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_add_f32_e64 v0, s0, s2
 ; GFX11-NEXT:    v_add_f32_e64 v1, s1, s3
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-NEXT:    ; return to shader part epilog
 ;
 ; GFX12-LABEL: fadd_v2s32_uniform:
 ; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT:    s_wait_expcnt 0x0
-; GFX12-NEXT:    s_wait_samplecnt 0x0
-; GFX12-NEXT:    s_wait_bvhcnt 0x0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
 ; GFX12-NEXT:    s_add_f32 s0, s0, s2
 ; GFX12-NEXT:    s_add_f32 s1, s1, s3
-; GFX12-NEXT:    s_wait_alu 0xfffe
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_2)
+; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
 ; GFX12-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
-; GFX12-NEXT:    s_setpc_b64 s[30:31]
+; GFX12-NEXT:    ; return to shader part epilog
   %fadd = fadd <2 x float> %a, %b
   ret <2 x float> %fadd
 }
 
-define <2 x float> @fadd_v2s32_div(<2 x float> %a, <2 x float> %b) {
-; GFX11-LABEL: fadd_v2s32_div:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_dual_add_f32 v0, v0, v2 :: v_dual_add_f32 v1, v1, v3
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: fadd_v2s32_div:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT:    s_wait_expcnt 0x0
-; GFX12-NEXT:    s_wait_samplecnt 0x0
-; GFX12-NEXT:    s_wait_bvhcnt 0x0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_dual_add_f32 v0, v0, v2 :: v_dual_add_f32 v1, v1, v3
-; GFX12-NEXT:    s_setpc_b64 s[30:31]
+define amdgpu_ps <2 x float> @fadd_v2s32_div(<2 x float> %a, <2 x float> %b) {
+; GCN-LABEL: fadd_v2s32_div:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_dual_add_f32 v0, v0, v2 :: v_dual_add_f32 v1, v1, v3
+; GCN-NEXT:    ; return to shader part epilog
   %fadd = fadd <2 x float> %a, %b
   ret <2 x float> %fadd
 }
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GFX11-GISEL: {{.*}}
-; GFX11-SDAG: {{.*}}
-; GFX12-GISEL: {{.*}}
-; GFX12-SDAG: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/lds-size.ll b/llvm/test/CodeGen/AMDGPU/lds-size.ll
index 75732a58eafc4..300d36477e2df 100644
--- a/llvm/test/CodeGen/AMDGPU/lds-size.ll
+++ b/llvm/test/CodeGen/AMDGPU/lds-size.ll
@@ -1,24 +1,80 @@
-; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefix=ALL -check-prefix=HSA %s
-; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefix=ALL -check-prefix=HSA %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefix=ALL -check-prefix=HSA -check-prefix=HSA-SDAG %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefix=ALL -check-prefix=HSA -check-prefix=HSA-GISEL %s
 ; RUN: llc -mtriple=r600 -mcpu=redwood < %s | FileCheck -check-prefix=ALL -check-prefix=EG %s
 
 ; This test makes sure we do not double count global values when they are
 ; used in different basic blocks.
 
-; GCN: .long 47180
-; GCN-NEXT: .long 32900
-
-; EG: .long 166120
-; EG-NEXT: .long 1
-; ALL: {{^}}test:
-
-; HSA-NOT: COMPUTE_PGM_RSRC2.LDS_SIZE
-; HSA: .amdhsa_group_segment_fixed_size 4
-
-; GCN: ; LDSByteSize: 4 bytes/workgroup (compile time only)
 @lds = internal unnamed_addr addrspace(3) global i32 poison, align 4
 
 define amdgpu_kernel void @test(ptr addrspace(1) %out, i32 %cond) {
+; HSA-SDAG-LABEL: test:
+; HSA-SDAG:       ; %bb.0: ; %entry
+; HSA-SDAG-NEXT:    s_load_dword s0, s[8:9], 0x2
+; HSA-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; HSA-SDAG-NEXT:    s_cmp_lg_u32 s0, 0
+; HSA-SDAG-NEXT:    s_mov_b32 m0, -1
+; HSA-SDAG-NEXT:    s_cbranch_scc0 .LBB0_4
+; HSA-SDAG-NEXT:  ; %bb.1: ; %else
+; HSA-SDAG-NEXT:    v_mov_b32_e32 v0, 2
+; HSA-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; HSA-SDAG-NEXT:    ds_write_b32 v1, v0
+; HSA-SDAG-NEXT:    s_mov_b64 vcc, exec
+; HSA-SDAG-NEXT:    s_cbranch_execnz .LBB0_3
+; HSA-SDAG-NEXT:  .LBB0_2: ; %if
+; HSA-SDAG-NEXT:    v_mov_b32_e32 v0, 1
+; HSA-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; HSA-SDAG-NEXT:    ds_write_b32 v1, v0
+; HSA-SDAG-NEXT:  .LBB0_3: ; %endif
+; HSA-SDAG-NEXT:    s_endpgm
+; HSA-SDAG-NEXT:  .LBB0_4:
+; HSA-SDAG-NEXT:    s_mov_b64 vcc, 0
+; HSA-SDAG-NEXT:    s_branch .LBB0_2
+;
+; HSA-GISEL-LABEL: test:
+; HSA-GISEL:       ; %bb.0: ; %entry
+; HSA-GISEL-NEXT:    s_load_dword s0, s[8:9], 0x2
+; HSA-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; HSA-GISEL-NEXT:    s_cmp_lg_u32 s0, 0
+; HSA-GISEL-NEXT:    s_mov_b32 s0, 1
+; HSA-GISEL-NEXT:    s_cbranch_scc0 .LBB0_2
+; HSA-GISEL-NEXT:  ; %bb.1: ; %else
+; HSA-GISEL-NEXT:    s_mov_b32 s0, 0
+; HSA-GISEL-NEXT:    v_mov_b32_e32 v0, 2
+; HSA-GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; HSA-GISEL-NEXT:    s_mov_b32 m0, -1
+; HSA-GISEL-NEXT:    ds_write_b32 v1, v0
+; HSA-GISEL-NEXT:  .LBB0_2: ; %Flow
+; HSA-GISEL-NEXT:    s_xor_b32 s0, s0, 1
+; HSA-GISEL-NEXT:    s_cmp_lg_u32 s0, 0
+; HSA-GISEL-NEXT:    s_cbranch_scc1 .LBB0_4
+; HSA-GISEL-NEXT:  ; %bb.3: ; %if
+; HSA-GISEL-NEXT:    v_mov_b32_e32 v0, 1
+; HSA-GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; HSA-GISEL-NEXT:    s_mov_b32 m0, -1
+; HSA-GISEL-NEXT:    ds_write_b32 v1, v0
+; HSA-GISEL-NEXT:  .LBB0_4: ; %endif
+; HSA-GISEL-NEXT:    s_endpgm
+;
+; EG-LABEL: test:
+; EG:       ; %bb.0: ; %entry
+; EG-NEXT:    ALU 13, @0, KC0[CB0:0-32], KC1[]
+; EG-NEXT:     MOV T0.Z, literal.x,
+; EG-NEXT:     MOV T0.W, literal.y,
+; EG-NEXT:     SETNE_INT * T1.W, KC0[2].Z, 0.0,
+; EG-NEXT:    0(0.000000e+00), 1(1.401298e-45)
+; EG-NEXT:     PRED_SETNE_INT * Pred,PredicateBit (MASKED), PS, 0.0,
+; EG-NEXT:     MOV * T0.W, literal.x, Pred_sel_one
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT:     LDS_WRITE * T0.Z, T0.W, Pred_sel_one
+; EG-NEXT:     MOV * T0.W, T0.Z, Pred_sel_one
+; EG-NEXT:     SETE_INT * T0.W, T0.W, 0.0,
+; EG-NEXT:     PRED_SETNE_INT * Pred,PredicateBit (MASKED), PV.W, 0.0,
+; EG-NEXT:     MOV * T0.W, literal.x, Pred_sel_zero
+; EG-NEXT:    1(1.401298e-45), 0(0.000000e+00)
+; EG-NEXT:     LDS_WRITE * T0.Z, T0.W, Pred_sel_zero
+; EG-NEXT:    RETURN
 entry:
   %0 = icmp eq i32 %cond, 0
   br i1 %0, label %if, label %else
@@ -37,3 +93,6 @@ endif:
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 400}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; ALL: {{.*}}
+; HSA: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/trap.ll b/llvm/test/CodeGen/AMDGPU/trap.ll
index a7affb93c1c6a..dd6ac4311fda0 100644
--- a/llvm/test/CodeGen/AMDGPU/trap.ll
+++ b/llvm/test/CodeGen/AMDGPU/trap.ll
@@ -1,55 +1,456 @@
-; RUN: llc -global-isel=0 -mtriple=amdgcn--amdhsa < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP %s
-; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn--amdhsa < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -global-isel=0 -mtriple=amdgcn--amdhsa < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP-SDAG %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn--amdhsa < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP-GISEL %s
 
-; RUN: llc -global-isel=0 -mtriple=amdgcn--amdhsa -mattr=+trap-handler < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP %s
-; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn--amdhsa -mattr=+trap-handler < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP %s
-; RUN: llc -global-isel=0 -mtriple=amdgcn--amdhsa -mattr=-trap-handler < %s | FileCheck -check-prefix=GCN -check-prefix=NO-HSA-TRAP %s
-; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn--amdhsa -mattr=-trap-handler < %s | FileCheck -check-prefix=GCN -check-prefix=NO-HSA-TRAP %s
-; RUN: llc -global-isel=0 -mtriple=amdgcn--amdhsa -mattr=-trap-handler < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING %s
-; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn--amdhsa -mattr=-trap-handler < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn--amdhsa -mattr=+trap-handler < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP-SDAG %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn--amdhsa -mattr=+trap-handler < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn--amdhsa -mattr=-trap-handler < %s | FileCheck -check-prefix=GCN -check-prefix=NO-HSA-TRAP-SDAG %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn--amdhsa -mattr=-trap-handler < %s | FileCheck -check-prefix=GCN -check-prefix=NO-HSA-TRAP-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn--amdhsa -mattr=-trap-handler < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=HSA-WARNING-SDAG %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn--amdhsa -mattr=-trap-handler < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=HSA-WARNING-GISEL %s
 
 ; enable trap handler feature
-; RUN: llc -global-isel=0 -mtriple=amdgcn-unknown-mesa3d -mattr=+trap-handler < %s | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP -check-prefix=TRAP-BIT -check-prefix=MESA-TRAP %s
-; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-unknown-mesa3d -mattr=+trap-handler < %s | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP -check-prefix=TRAP-BIT -check-prefix=MESA-TRAP %s
-; RUN: llc -global-isel=0 -mtriple=amdgcn-unknown-mesa3d -mattr=+trap-handler < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING -check-prefix=TRAP-BIT %s
-; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-unknown-mesa3d -mattr=+trap-handler < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING -check-prefix=TRAP-BIT %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-unknown-mesa3d -mattr=+trap-handler < %s | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP-SDAG -check-prefix=TRAP-BIT-SDAG -check-prefix=MESA-TRAP-SDAG %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-unknown-mesa3d -mattr=+trap-handler < %s | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP-GISEL -check-prefix=TRAP-BIT-GISEL -check-prefix=MESA-TRAP-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-unknown-mesa3d -mattr=+trap-handler < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=MESA-WARNING-SDAG -check-prefix=TRAP-BIT-SDAG %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-unknown-mesa3d -mattr=+trap-handler < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=MESA-WARNING-GISEL -check-prefix=TRAP-BIT-GISEL %s
 
 ; disable trap handler feature
-; RUN: llc -global-isel=0 -mtriple=amdgcn-unknown-mesa3d -mattr=-trap-handler < %s | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP -check-prefix=NO-TRAP-BIT -check-prefix=NOMESA-TRAP %s
-; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-unknown-mesa3d -mattr=-trap-handler < %s | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP -check-prefix=NO-TRAP-BIT -check-prefix=NOMESA-TRAP %s
-; RUN: llc -global-isel=0 -mtriple=amdgcn-unknown-mesa3d -mattr=-trap-handler < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING -check-prefix=NO-TRAP-BIT %s
-; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-unknown-mesa3d -mattr=-trap-handler < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING -check-prefix=NO-TRAP-BIT %s
-
-; RUN: llc -global-isel=0 -mtriple=amdgcn < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING %s
-; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING %s
-
-; GCN-WARNING: warning: <unknown>:0:0: in function hsa_debugtrap void (ptr addrspace(1)): debugtrap handler not supported
+; RUN: llc -global-isel=0 -mtriple=amdgcn-unknown-mesa3d -mattr=-trap-handler < %s | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP-SDAG -check-prefix=NO-TRAP-BIT-SDAG -check-prefix=NOMESA-TRAP-SDAG %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-unknown-mesa3d -mattr=-trap-handler < %s | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP-GISEL -check-prefix=NO-TRAP-BIT-GISEL -check-prefix=NOMESA-TRAP-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-unknown-mesa3d -mattr=-trap-handler < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=MESA-WARNING-SDAG -check-prefix=NO-TRAP-BIT-SDAG %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-unknown-mesa3d -mattr=-trap-handler < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=MESA-WARNING-GISEL -check-prefix=NO-TRAP-BIT-GISEL %s
 
+; RUN: llc -global-isel=0 -mtriple=amdgcn < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING-SDAG %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING-GISEL %s
 
 declare void @llvm.trap() #0
 declare void @llvm.debugtrap() #1
 
-; MESA-TRAP: .section .AMDGPU.config
-; MESA-TRAP:  .long   47180
-; MESA-TRAP-NEXT: .long   5080
-
-; NOMESA-TRAP: .section .AMDGPU.config
-; NOMESA-TRAP:  .long   47180
-; NOMESA-TRAP-NEXT: .long   5016
-
-; GCN-LABEL: {{^}}hsa_trap:
-; HSA-TRAP: s_mov_b64 s[0:1], s[6:7]
-; HSA-TRAP: s_trap 2
-; HSA-TRAP: COMPUTE_PGM_RSRC2:TRAP_HANDLER: 0
-
-; for llvm.trap in hsa path without ABI, direct generate s_endpgm instruction without any warning information
-; NO-HSA-TRAP: s_endpgm
-; NO-HSA-TRAP: COMPUTE_PGM_RSRC2:TRAP_HANDLER: 0
-
-; TRAP-BIT: enable_trap_handler = 1
-; NO-TRAP-BIT: enable_trap_handler = 0
-; NO-MESA-TRAP: s_endpgm
 define amdgpu_kernel void @hsa_trap(ptr addrspace(1) nocapture readonly %arg0) {
+; HSA-TRAP-SDAG-LABEL: hsa_trap:
+; HSA-TRAP-SDAG:       ; %bb.0:
+; HSA-TRAP-SDAG-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; HSA-TRAP-SDAG-NEXT:    s_add_i32 s12, s12, s17
+; HSA-TRAP-SDAG-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; HSA-TRAP-SDAG-NEXT:    s_mov_b64 s[0:1], s[6:7]
+; HSA-TRAP-SDAG-NEXT:    s_load_dwordx2 s[2:3], s[8:9], 0x0
+; HSA-TRAP-SDAG-NEXT:    v_mov_b32_e32 v2, 1
+; HSA-TRAP-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; HSA-TRAP-SDAG-NEXT:    v_mov_b32_e32 v0, s2
+; HSA-TRAP-SDAG-NEXT:    v_mov_b32_e32 v1, s3
+; HSA-TRAP-SDAG-NEXT:    flat_store_dword v[0:1], v2
+; HSA-TRAP-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; HSA-TRAP-SDAG-NEXT:    s_trap 2
+;
+; HSA-TRAP-GISEL-LABEL: hsa_trap:
+; HSA-TRAP-GISEL:       ; %bb.0:
+; HSA-TRAP-GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; HSA-TRAP-GISEL-NEXT:    s_add_i32 s12, s12, s17
+; HSA-TRAP-GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; HSA-TRAP-GISEL-NEXT:    s_mov_b64 s[0:1], s[6:7]
+; HSA-TRAP-GISEL-NEXT:    s_load_dwordx2 s[2:3], s[8:9], 0x0
+; HSA-TRAP-GISEL-NEXT:    v_mov_b32_e32 v2, 1
+; HSA-TRAP-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; HSA-TRAP-GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; HSA-TRAP-GISEL-NEXT:    v_mov_b32_e32 v1, s3
+; HSA-TRAP-GISEL-NEXT:    flat_store_dword v[0:1], v2
+; HSA-TRAP-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; HSA-TRAP-GISEL-NEXT:    s_trap 2
+;
+; NO-HSA-TRAP-SDAG-LABEL: hsa_trap:
+; NO-HSA-TRAP-SDAG:       ; %bb.0:
+; NO-HSA-TRAP-SDAG-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; NO-HSA-TRAP-SDAG-NEXT:    s_add_i32 s12, s12, s17
+; NO-HSA-TRAP-SDAG-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; NO-HSA-TRAP-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; NO-HSA-TRAP-SDAG-NEXT:    v_mov_b32_e32 v2, 1
+; NO-HSA-TRAP-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; NO-HSA-TRAP-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; NO-HSA-TRAP-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; NO-HSA-TRAP-SDAG-NEXT:    flat_store_dword v[0:1], v2
+; NO-HSA-TRAP-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; NO-HSA-TRAP-SDAG-NEXT:    s_endpgm
+;
+; NO-HSA-TRAP-GISEL-LABEL: hsa_trap:
+; NO-HSA-TRAP-GISEL:       ; %bb.0:
+; NO-HSA-TRAP-GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; NO-HSA-TRAP-GISEL-NEXT:    s_add_i32 s12, s12, s17
+; NO-HSA-TRAP-GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; NO-HSA-TRAP-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; NO-HSA-TRAP-GISEL-NEXT:    v_mov_b32_e32 v2, 1
+; NO-HSA-TRAP-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; NO-HSA-TRAP-GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; NO-HSA-TRAP-GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; NO-HSA-TRAP-GISEL-NEXT:    flat_store_dword v[0:1], v2
+; NO-HSA-TRAP-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; NO-HSA-TRAP-GISEL-NEXT:    s_endpgm
+;
+; HSA-WARNING-SDAG-LABEL: hsa_trap:
+; HSA-WARNING-SDAG:       ; %bb.0:
+; HSA-WARNING-SDAG-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; HSA-WARNING-SDAG-NEXT:    s_add_i32 s12, s12, s17
+; HSA-WARNING-SDAG-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; HSA-WARNING-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; HSA-WARNING-SDAG-NEXT:    v_mov_b32_e32 v2, 1
+; HSA-WARNING-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; HSA-WARNING-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; HSA-WARNING-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; HSA-WARNING-SDAG-NEXT:    flat_store_dword v[0:1], v2
+; HSA-WARNING-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; HSA-WARNING-SDAG-NEXT:    s_endpgm
+;
+; HSA-WARNING-GISEL-LABEL: hsa_trap:
+; HSA-WARNING-GISEL:       ; %bb.0:
+; HSA-WARNING-GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; HSA-WARNING-GISEL-NEXT:    s_add_i32 s12, s12, s17
+; HSA-WARNING-GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; HSA-WARNING-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; HSA-WARNING-GISEL-NEXT:    v_mov_b32_e32 v2, 1
+; HSA-WARNING-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; HSA-WARNING-GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; HSA-WARNING-GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; HSA-WARNING-GISEL-NEXT:    flat_store_dword v[0:1], v2
+; HSA-WARNING-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; HSA-WARNING-GISEL-NEXT:    s_endpgm
+;
+; TRAP-BIT-SDAG-LABEL: hsa_trap:
+; TRAP-BIT-SDAG:         .amd_kernel_code_t
+; TRAP-BIT-SDAG-NEXT:     amd_code_version_major = 1
+; TRAP-BIT-SDAG-NEXT:     amd_code_version_minor = 2
+; TRAP-BIT-SDAG-NEXT:     amd_machine_kind = 1
+; TRAP-BIT-SDAG-NEXT:     amd_machine_version_major = 6
+; TRAP-BIT-SDAG-NEXT:     amd_machine_version_minor = 0
+; TRAP-BIT-SDAG-NEXT:     amd_machine_version_stepping = 0
+; TRAP-BIT-SDAG-NEXT:     kernel_code_entry_byte_offset = 256
+; TRAP-BIT-SDAG-NEXT:     kernel_code_prefetch_byte_size = 0
+; TRAP-BIT-SDAG-NEXT:     granulated_workitem_vgpr_count = 0
+; TRAP-BIT-SDAG-NEXT:     granulated_wavefront_sgpr_count = 1
+; TRAP-BIT-SDAG-NEXT:     priority = 0
+; TRAP-BIT-SDAG-NEXT:     float_mode = 240
+; TRAP-BIT-SDAG-NEXT:     priv = 0
+; TRAP-BIT-SDAG-NEXT:     enable_dx10_clamp = 1
+; TRAP-BIT-SDAG-NEXT:     debug_mode = 0
+; TRAP-BIT-SDAG-NEXT:     enable_ieee_mode = 1
+; TRAP-BIT-SDAG-NEXT:     enable_wgp_mode = 0
+; TRAP-BIT-SDAG-NEXT:     enable_mem_ordered = 0
+; TRAP-BIT-SDAG-NEXT:     enable_fwd_progress = 0
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; TRAP-BIT-SDAG-NEXT:     user_sgpr_count = 12
+; TRAP-BIT-SDAG-NEXT:     enable_trap_handler = 1
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_workgroup_id_x = 1
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_workgroup_id_y = 1
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_workgroup_id_z = 1
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_workgroup_info = 0
+; TRAP-BIT-SDAG-NEXT:     enable_vgpr_workitem_id = 2
+; TRAP-BIT-SDAG-NEXT:     enable_exception_msb = 0
+; TRAP-BIT-SDAG-NEXT:     granulated_lds_size = 0
+; TRAP-BIT-SDAG-NEXT:     enable_exception = 0
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_private_segment_buffer = 1
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_dispatch_ptr = 1
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_queue_ptr = 1
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_dispatch_id = 1
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_flat_scratch_init = 0
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_private_segment_size = 0
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; TRAP-BIT-SDAG-NEXT:     enable_wavefront_size32 = 0
+; TRAP-BIT-SDAG-NEXT:     enable_ordered_append_gds = 0
+; TRAP-BIT-SDAG-NEXT:     private_element_size = 1
+; TRAP-BIT-SDAG-NEXT:     is_ptr64 = 1
+; TRAP-BIT-SDAG-NEXT:     is_dynamic_callstack = 0
+; TRAP-BIT-SDAG-NEXT:     is_debug_enabled = 0
+; TRAP-BIT-SDAG-NEXT:     is_xnack_enabled = 0
+; TRAP-BIT-SDAG-NEXT:     workitem_private_segment_byte_size = 0
+; TRAP-BIT-SDAG-NEXT:     workgroup_group_segment_byte_size = 0
+; TRAP-BIT-SDAG-NEXT:     gds_segment_byte_size = 0
+; TRAP-BIT-SDAG-NEXT:     kernarg_segment_byte_size = 24
+; TRAP-BIT-SDAG-NEXT:     workgroup_fbarrier_count = 0
+; TRAP-BIT-SDAG-NEXT:     wavefront_sgpr_count = 10
+; TRAP-BIT-SDAG-NEXT:     workitem_vgpr_count = 1
+; TRAP-BIT-SDAG-NEXT:     reserved_vgpr_first = 0
+; TRAP-BIT-SDAG-NEXT:     reserved_vgpr_count = 0
+; TRAP-BIT-SDAG-NEXT:     reserved_sgpr_first = 0
+; TRAP-BIT-SDAG-NEXT:     reserved_sgpr_count = 0
+; TRAP-BIT-SDAG-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; TRAP-BIT-SDAG-NEXT:     debug_private_segment_buffer_sgpr = 0
+; TRAP-BIT-SDAG-NEXT:     kernarg_segment_alignment = 4
+; TRAP-BIT-SDAG-NEXT:     group_segment_alignment = 4
+; TRAP-BIT-SDAG-NEXT:     private_segment_alignment = 4
+; TRAP-BIT-SDAG-NEXT:     wavefront_size = 6
+; TRAP-BIT-SDAG-NEXT:     call_convention = -1
+; TRAP-BIT-SDAG-NEXT:     runtime_loader_kernel_symbol = 0
+; TRAP-BIT-SDAG-NEXT:    .end_amd_kernel_code_t
+; TRAP-BIT-SDAG-NEXT:  ; %bb.0:
+; TRAP-BIT-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; TRAP-BIT-SDAG-NEXT:    s_mov_b32 s3, 0xf000
+; TRAP-BIT-SDAG-NEXT:    s_mov_b32 s2, -1
+; TRAP-BIT-SDAG-NEXT:    v_mov_b32_e32 v0, 1
+; TRAP-BIT-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; TRAP-BIT-SDAG-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; TRAP-BIT-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; TRAP-BIT-SDAG-NEXT:    s_endpgm
+;
+; TRAP-BIT-GISEL-LABEL: hsa_trap:
+; TRAP-BIT-GISEL:         .amd_kernel_code_t
+; TRAP-BIT-GISEL-NEXT:     amd_code_version_major = 1
+; TRAP-BIT-GISEL-NEXT:     amd_code_version_minor = 2
+; TRAP-BIT-GISEL-NEXT:     amd_machine_kind = 1
+; TRAP-BIT-GISEL-NEXT:     amd_machine_version_major = 6
+; TRAP-BIT-GISEL-NEXT:     amd_machine_version_minor = 0
+; TRAP-BIT-GISEL-NEXT:     amd_machine_version_stepping = 0
+; TRAP-BIT-GISEL-NEXT:     kernel_code_entry_byte_offset = 256
+; TRAP-BIT-GISEL-NEXT:     kernel_code_prefetch_byte_size = 0
+; TRAP-BIT-GISEL-NEXT:     granulated_workitem_vgpr_count = 0
+; TRAP-BIT-GISEL-NEXT:     granulated_wavefront_sgpr_count = 1
+; TRAP-BIT-GISEL-NEXT:     priority = 0
+; TRAP-BIT-GISEL-NEXT:     float_mode = 240
+; TRAP-BIT-GISEL-NEXT:     priv = 0
+; TRAP-BIT-GISEL-NEXT:     enable_dx10_clamp = 1
+; TRAP-BIT-GISEL-NEXT:     debug_mode = 0
+; TRAP-BIT-GISEL-NEXT:     enable_ieee_mode = 1
+; TRAP-BIT-GISEL-NEXT:     enable_wgp_mode = 0
+; TRAP-BIT-GISEL-NEXT:     enable_mem_ordered = 0
+; TRAP-BIT-GISEL-NEXT:     enable_fwd_progress = 0
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; TRAP-BIT-GISEL-NEXT:     user_sgpr_count = 12
+; TRAP-BIT-GISEL-NEXT:     enable_trap_handler = 1
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_workgroup_id_x = 1
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_workgroup_id_y = 1
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_workgroup_id_z = 1
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_workgroup_info = 0
+; TRAP-BIT-GISEL-NEXT:     enable_vgpr_workitem_id = 2
+; TRAP-BIT-GISEL-NEXT:     enable_exception_msb = 0
+; TRAP-BIT-GISEL-NEXT:     granulated_lds_size = 0
+; TRAP-BIT-GISEL-NEXT:     enable_exception = 0
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_private_segment_buffer = 1
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_dispatch_ptr = 1
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_queue_ptr = 1
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_dispatch_id = 1
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_flat_scratch_init = 0
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_private_segment_size = 0
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; TRAP-BIT-GISEL-NEXT:     enable_wavefront_size32 = 0
+; TRAP-BIT-GISEL-NEXT:     enable_ordered_append_gds = 0
+; TRAP-BIT-GISEL-NEXT:     private_element_size = 1
+; TRAP-BIT-GISEL-NEXT:     is_ptr64 = 1
+; TRAP-BIT-GISEL-NEXT:     is_dynamic_callstack = 0
+; TRAP-BIT-GISEL-NEXT:     is_debug_enabled = 0
+; TRAP-BIT-GISEL-NEXT:     is_xnack_enabled = 0
+; TRAP-BIT-GISEL-NEXT:     workitem_private_segment_byte_size = 0
+; TRAP-BIT-GISEL-NEXT:     workgroup_group_segment_byte_size = 0
+; TRAP-BIT-GISEL-NEXT:     gds_segment_byte_size = 0
+; TRAP-BIT-GISEL-NEXT:     kernarg_segment_byte_size = 24
+; TRAP-BIT-GISEL-NEXT:     workgroup_fbarrier_count = 0
+; TRAP-BIT-GISEL-NEXT:     wavefront_sgpr_count = 10
+; TRAP-BIT-GISEL-NEXT:     workitem_vgpr_count = 1
+; TRAP-BIT-GISEL-NEXT:     reserved_vgpr_first = 0
+; TRAP-BIT-GISEL-NEXT:     reserved_vgpr_count = 0
+; TRAP-BIT-GISEL-NEXT:     reserved_sgpr_first = 0
+; TRAP-BIT-GISEL-NEXT:     reserved_sgpr_count = 0
+; TRAP-BIT-GISEL-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; TRAP-BIT-GISEL-NEXT:     debug_private_segment_buffer_sgpr = 0
+; TRAP-BIT-GISEL-NEXT:     kernarg_segment_alignment = 4
+; TRAP-BIT-GISEL-NEXT:     group_segment_alignment = 4
+; TRAP-BIT-GISEL-NEXT:     private_segment_alignment = 4
+; TRAP-BIT-GISEL-NEXT:     wavefront_size = 6
+; TRAP-BIT-GISEL-NEXT:     call_convention = -1
+; TRAP-BIT-GISEL-NEXT:     runtime_loader_kernel_symbol = 0
+; TRAP-BIT-GISEL-NEXT:    .end_amd_kernel_code_t
+; TRAP-BIT-GISEL-NEXT:  ; %bb.0:
+; TRAP-BIT-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; TRAP-BIT-GISEL-NEXT:    v_mov_b32_e32 v0, 1
+; TRAP-BIT-GISEL-NEXT:    s_mov_b32 s2, -1
+; TRAP-BIT-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; TRAP-BIT-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; TRAP-BIT-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; TRAP-BIT-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; TRAP-BIT-GISEL-NEXT:    s_endpgm
+;
+; NO-TRAP-BIT-SDAG-LABEL: hsa_trap:
+; NO-TRAP-BIT-SDAG:         .amd_kernel_code_t
+; NO-TRAP-BIT-SDAG-NEXT:     amd_code_version_major = 1
+; NO-TRAP-BIT-SDAG-NEXT:     amd_code_version_minor = 2
+; NO-TRAP-BIT-SDAG-NEXT:     amd_machine_kind = 1
+; NO-TRAP-BIT-SDAG-NEXT:     amd_machine_version_major = 6
+; NO-TRAP-BIT-SDAG-NEXT:     amd_machine_version_minor = 0
+; NO-TRAP-BIT-SDAG-NEXT:     amd_machine_version_stepping = 0
+; NO-TRAP-BIT-SDAG-NEXT:     kernel_code_entry_byte_offset = 256
+; NO-TRAP-BIT-SDAG-NEXT:     kernel_code_prefetch_byte_size = 0
+; NO-TRAP-BIT-SDAG-NEXT:     granulated_workitem_vgpr_count = 0
+; NO-TRAP-BIT-SDAG-NEXT:     granulated_wavefront_sgpr_count = 1
+; NO-TRAP-BIT-SDAG-NEXT:     priority = 0
+; NO-TRAP-BIT-SDAG-NEXT:     float_mode = 240
+; NO-TRAP-BIT-SDAG-NEXT:     priv = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_dx10_clamp = 1
+; NO-TRAP-BIT-SDAG-NEXT:     debug_mode = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_ieee_mode = 1
+; NO-TRAP-BIT-SDAG-NEXT:     enable_wgp_mode = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_mem_ordered = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_fwd_progress = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; NO-TRAP-BIT-SDAG-NEXT:     user_sgpr_count = 12
+; NO-TRAP-BIT-SDAG-NEXT:     enable_trap_handler = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_workgroup_id_x = 1
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_workgroup_id_y = 1
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_workgroup_id_z = 1
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_workgroup_info = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_vgpr_workitem_id = 2
+; NO-TRAP-BIT-SDAG-NEXT:     enable_exception_msb = 0
+; NO-TRAP-BIT-SDAG-NEXT:     granulated_lds_size = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_exception = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_private_segment_buffer = 1
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_dispatch_ptr = 1
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_queue_ptr = 1
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_dispatch_id = 1
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_flat_scratch_init = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_private_segment_size = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_wavefront_size32 = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_ordered_append_gds = 0
+; NO-TRAP-BIT-SDAG-NEXT:     private_element_size = 1
+; NO-TRAP-BIT-SDAG-NEXT:     is_ptr64 = 1
+; NO-TRAP-BIT-SDAG-NEXT:     is_dynamic_callstack = 0
+; NO-TRAP-BIT-SDAG-NEXT:     is_debug_enabled = 0
+; NO-TRAP-BIT-SDAG-NEXT:     is_xnack_enabled = 0
+; NO-TRAP-BIT-SDAG-NEXT:     workitem_private_segment_byte_size = 0
+; NO-TRAP-BIT-SDAG-NEXT:     workgroup_group_segment_byte_size = 0
+; NO-TRAP-BIT-SDAG-NEXT:     gds_segment_byte_size = 0
+; NO-TRAP-BIT-SDAG-NEXT:     kernarg_segment_byte_size = 24
+; NO-TRAP-BIT-SDAG-NEXT:     workgroup_fbarrier_count = 0
+; NO-TRAP-BIT-SDAG-NEXT:     wavefront_sgpr_count = 10
+; NO-TRAP-BIT-SDAG-NEXT:     workitem_vgpr_count = 1
+; NO-TRAP-BIT-SDAG-NEXT:     reserved_vgpr_first = 0
+; NO-TRAP-BIT-SDAG-NEXT:     reserved_vgpr_count = 0
+; NO-TRAP-BIT-SDAG-NEXT:     reserved_sgpr_first = 0
+; NO-TRAP-BIT-SDAG-NEXT:     reserved_sgpr_count = 0
+; NO-TRAP-BIT-SDAG-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; NO-TRAP-BIT-SDAG-NEXT:     debug_private_segment_buffer_sgpr = 0
+; NO-TRAP-BIT-SDAG-NEXT:     kernarg_segment_alignment = 4
+; NO-TRAP-BIT-SDAG-NEXT:     group_segment_alignment = 4
+; NO-TRAP-BIT-SDAG-NEXT:     private_segment_alignment = 4
+; NO-TRAP-BIT-SDAG-NEXT:     wavefront_size = 6
+; NO-TRAP-BIT-SDAG-NEXT:     call_convention = -1
+; NO-TRAP-BIT-SDAG-NEXT:     runtime_loader_kernel_symbol = 0
+; NO-TRAP-BIT-SDAG-NEXT:    .end_amd_kernel_code_t
+; NO-TRAP-BIT-SDAG-NEXT:  ; %bb.0:
+; NO-TRAP-BIT-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; NO-TRAP-BIT-SDAG-NEXT:    s_mov_b32 s3, 0xf000
+; NO-TRAP-BIT-SDAG-NEXT:    s_mov_b32 s2, -1
+; NO-TRAP-BIT-SDAG-NEXT:    v_mov_b32_e32 v0, 1
+; NO-TRAP-BIT-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; NO-TRAP-BIT-SDAG-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; NO-TRAP-BIT-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; NO-TRAP-BIT-SDAG-NEXT:    s_endpgm
+;
+; NO-TRAP-BIT-GISEL-LABEL: hsa_trap:
+; NO-TRAP-BIT-GISEL:         .amd_kernel_code_t
+; NO-TRAP-BIT-GISEL-NEXT:     amd_code_version_major = 1
+; NO-TRAP-BIT-GISEL-NEXT:     amd_code_version_minor = 2
+; NO-TRAP-BIT-GISEL-NEXT:     amd_machine_kind = 1
+; NO-TRAP-BIT-GISEL-NEXT:     amd_machine_version_major = 6
+; NO-TRAP-BIT-GISEL-NEXT:     amd_machine_version_minor = 0
+; NO-TRAP-BIT-GISEL-NEXT:     amd_machine_version_stepping = 0
+; NO-TRAP-BIT-GISEL-NEXT:     kernel_code_entry_byte_offset = 256
+; NO-TRAP-BIT-GISEL-NEXT:     kernel_code_prefetch_byte_size = 0
+; NO-TRAP-BIT-GISEL-NEXT:     granulated_workitem_vgpr_count = 0
+; NO-TRAP-BIT-GISEL-NEXT:     granulated_wavefront_sgpr_count = 1
+; NO-TRAP-BIT-GISEL-NEXT:     priority = 0
+; NO-TRAP-BIT-GISEL-NEXT:     float_mode = 240
+; NO-TRAP-BIT-GISEL-NEXT:     priv = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_dx10_clamp = 1
+; NO-TRAP-BIT-GISEL-NEXT:     debug_mode = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_ieee_mode = 1
+; NO-TRAP-BIT-GISEL-NEXT:     enable_wgp_mode = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_mem_ordered = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_fwd_progress = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; NO-TRAP-BIT-GISEL-NEXT:     user_sgpr_count = 12
+; NO-TRAP-BIT-GISEL-NEXT:     enable_trap_handler = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_workgroup_id_x = 1
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_workgroup_id_y = 1
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_workgroup_id_z = 1
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_workgroup_info = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_vgpr_workitem_id = 2
+; NO-TRAP-BIT-GISEL-NEXT:     enable_exception_msb = 0
+; NO-TRAP-BIT-GISEL-NEXT:     granulated_lds_size = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_exception = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_private_segment_buffer = 1
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_dispatch_ptr = 1
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_queue_ptr = 1
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_dispatch_id = 1
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_flat_scratch_init = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_private_segment_size = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_wavefront_size32 = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_ordered_append_gds = 0
+; NO-TRAP-BIT-GISEL-NEXT:     private_element_size = 1
+; NO-TRAP-BIT-GISEL-NEXT:     is_ptr64 = 1
+; NO-TRAP-BIT-GISEL-NEXT:     is_dynamic_callstack = 0
+; NO-TRAP-BIT-GISEL-NEXT:     is_debug_enabled = 0
+; NO-TRAP-BIT-GISEL-NEXT:     is_xnack_enabled = 0
+; NO-TRAP-BIT-GISEL-NEXT:     workitem_private_segment_byte_size = 0
+; NO-TRAP-BIT-GISEL-NEXT:     workgroup_group_segment_byte_size = 0
+; NO-TRAP-BIT-GISEL-NEXT:     gds_segment_byte_size = 0
+; NO-TRAP-BIT-GISEL-NEXT:     kernarg_segment_byte_size = 24
+; NO-TRAP-BIT-GISEL-NEXT:     workgroup_fbarrier_count = 0
+; NO-TRAP-BIT-GISEL-NEXT:     wavefront_sgpr_count = 10
+; NO-TRAP-BIT-GISEL-NEXT:     workitem_vgpr_count = 1
+; NO-TRAP-BIT-GISEL-NEXT:     reserved_vgpr_first = 0
+; NO-TRAP-BIT-GISEL-NEXT:     reserved_vgpr_count = 0
+; NO-TRAP-BIT-GISEL-NEXT:     reserved_sgpr_first = 0
+; NO-TRAP-BIT-GISEL-NEXT:     reserved_sgpr_count = 0
+; NO-TRAP-BIT-GISEL-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; NO-TRAP-BIT-GISEL-NEXT:     debug_private_segment_buffer_sgpr = 0
+; NO-TRAP-BIT-GISEL-NEXT:     kernarg_segment_alignment = 4
+; NO-TRAP-BIT-GISEL-NEXT:     group_segment_alignment = 4
+; NO-TRAP-BIT-GISEL-NEXT:     private_segment_alignment = 4
+; NO-TRAP-BIT-GISEL-NEXT:     wavefront_size = 6
+; NO-TRAP-BIT-GISEL-NEXT:     call_convention = -1
+; NO-TRAP-BIT-GISEL-NEXT:     runtime_loader_kernel_symbol = 0
+; NO-TRAP-BIT-GISEL-NEXT:    .end_amd_kernel_code_t
+; NO-TRAP-BIT-GISEL-NEXT:  ; %bb.0:
+; NO-TRAP-BIT-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; NO-TRAP-BIT-GISEL-NEXT:    v_mov_b32_e32 v0, 1
+; NO-TRAP-BIT-GISEL-NEXT:    s_mov_b32 s2, -1
+; NO-TRAP-BIT-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; NO-TRAP-BIT-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; NO-TRAP-BIT-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; NO-TRAP-BIT-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; NO-TRAP-BIT-GISEL-NEXT:    s_endpgm
+;
+; GCN-WARNING-SDAG-LABEL: hsa_trap:
+; GCN-WARNING-SDAG:       ; %bb.0:
+; GCN-WARNING-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-WARNING-SDAG-NEXT:    s_mov_b32 s3, 0xf000
+; GCN-WARNING-SDAG-NEXT:    s_mov_b32 s2, -1
+; GCN-WARNING-SDAG-NEXT:    v_mov_b32_e32 v0, 1
+; GCN-WARNING-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-WARNING-SDAG-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GCN-WARNING-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GCN-WARNING-SDAG-NEXT:    s_endpgm
+;
+; GCN-WARNING-GISEL-LABEL: hsa_trap:
+; GCN-WARNING-GISEL:       ; %bb.0:
+; GCN-WARNING-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-WARNING-GISEL-NEXT:    v_mov_b32_e32 v0, 1
+; GCN-WARNING-GISEL-NEXT:    s_mov_b32 s2, -1
+; GCN-WARNING-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; GCN-WARNING-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-WARNING-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GCN-WARNING-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GCN-WARNING-GISEL-NEXT:    s_endpgm
   store volatile i32 1, ptr addrspace(1) %arg0
   call void @llvm.trap()
   unreachable
@@ -57,26 +458,466 @@ define amdgpu_kernel void @hsa_trap(ptr addrspace(1) nocapture readonly %arg0) {
   ret void
 }
 
-; MESA-TRAP: .section .AMDGPU.config
-; MESA-TRAP:  .long   47180
-; MESA-TRAP-NEXT: .long   5080
-
-; NOMESA-TRAP: .section .AMDGPU.config
-; NOMESA-TRAP:  .long   47180
-; NOMESA-TRAP-NEXT: .long   5016
-
-; GCN-LABEL: {{^}}hsa_debugtrap:
-; HSA-TRAP: s_trap 3
-; HSA-TRAP: flat_store_dword v[0:1], v3
-; HSA-TRAP: COMPUTE_PGM_RSRC2:TRAP_HANDLER: 0
-
-; for llvm.debugtrap in non-hsa path without ABI, generate a warning and a s_endpgm instruction
-; NO-HSA-TRAP: s_endpgm
-
-; TRAP-BIT: enable_trap_handler = 1
-; NO-TRAP-BIT: enable_trap_handler = 0
-; NO-MESA-TRAP: s_endpgm
 define amdgpu_kernel void @hsa_debugtrap(ptr addrspace(1) nocapture readonly %arg0) {
+; HSA-TRAP-SDAG-LABEL: hsa_debugtrap:
+; HSA-TRAP-SDAG:       ; %bb.0:
+; HSA-TRAP-SDAG-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; HSA-TRAP-SDAG-NEXT:    s_add_i32 s12, s12, s17
+; HSA-TRAP-SDAG-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; HSA-TRAP-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; HSA-TRAP-SDAG-NEXT:    v_mov_b32_e32 v2, 1
+; HSA-TRAP-SDAG-NEXT:    v_mov_b32_e32 v3, 2
+; HSA-TRAP-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; HSA-TRAP-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; HSA-TRAP-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; HSA-TRAP-SDAG-NEXT:    flat_store_dword v[0:1], v2
+; HSA-TRAP-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; HSA-TRAP-SDAG-NEXT:    s_trap 3
+; HSA-TRAP-SDAG-NEXT:    flat_store_dword v[0:1], v3
+; HSA-TRAP-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; HSA-TRAP-SDAG-NEXT:    s_endpgm
+;
+; HSA-TRAP-GISEL-LABEL: hsa_debugtrap:
+; HSA-TRAP-GISEL:       ; %bb.0:
+; HSA-TRAP-GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; HSA-TRAP-GISEL-NEXT:    s_add_i32 s12, s12, s17
+; HSA-TRAP-GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; HSA-TRAP-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; HSA-TRAP-GISEL-NEXT:    v_mov_b32_e32 v2, 1
+; HSA-TRAP-GISEL-NEXT:    v_mov_b32_e32 v3, 2
+; HSA-TRAP-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; HSA-TRAP-GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; HSA-TRAP-GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; HSA-TRAP-GISEL-NEXT:    flat_store_dword v[0:1], v2
+; HSA-TRAP-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; HSA-TRAP-GISEL-NEXT:    s_trap 3
+; HSA-TRAP-GISEL-NEXT:    flat_store_dword v[0:1], v3
+; HSA-TRAP-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; HSA-TRAP-GISEL-NEXT:    s_endpgm
+;
+; NO-HSA-TRAP-SDAG-LABEL: hsa_debugtrap:
+; NO-HSA-TRAP-SDAG:       ; %bb.0:
+; NO-HSA-TRAP-SDAG-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; NO-HSA-TRAP-SDAG-NEXT:    s_add_i32 s12, s12, s17
+; NO-HSA-TRAP-SDAG-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; NO-HSA-TRAP-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; NO-HSA-TRAP-SDAG-NEXT:    v_mov_b32_e32 v2, 1
+; NO-HSA-TRAP-SDAG-NEXT:    v_mov_b32_e32 v3, 2
+; NO-HSA-TRAP-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; NO-HSA-TRAP-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; NO-HSA-TRAP-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; NO-HSA-TRAP-SDAG-NEXT:    flat_store_dword v[0:1], v2
+; NO-HSA-TRAP-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; NO-HSA-TRAP-SDAG-NEXT:    flat_store_dword v[0:1], v3
+; NO-HSA-TRAP-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; NO-HSA-TRAP-SDAG-NEXT:    s_endpgm
+;
+; NO-HSA-TRAP-GISEL-LABEL: hsa_debugtrap:
+; NO-HSA-TRAP-GISEL:       ; %bb.0:
+; NO-HSA-TRAP-GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; NO-HSA-TRAP-GISEL-NEXT:    s_add_i32 s12, s12, s17
+; NO-HSA-TRAP-GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; NO-HSA-TRAP-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; NO-HSA-TRAP-GISEL-NEXT:    v_mov_b32_e32 v2, 1
+; NO-HSA-TRAP-GISEL-NEXT:    v_mov_b32_e32 v3, 2
+; NO-HSA-TRAP-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; NO-HSA-TRAP-GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; NO-HSA-TRAP-GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; NO-HSA-TRAP-GISEL-NEXT:    flat_store_dword v[0:1], v2
+; NO-HSA-TRAP-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; NO-HSA-TRAP-GISEL-NEXT:    flat_store_dword v[0:1], v3
+; NO-HSA-TRAP-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; NO-HSA-TRAP-GISEL-NEXT:    s_endpgm
+;
+; HSA-WARNING-SDAG-LABEL: hsa_debugtrap:
+; HSA-WARNING-SDAG:       ; %bb.0:
+; HSA-WARNING-SDAG-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; HSA-WARNING-SDAG-NEXT:    s_add_i32 s12, s12, s17
+; HSA-WARNING-SDAG-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; HSA-WARNING-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; HSA-WARNING-SDAG-NEXT:    v_mov_b32_e32 v2, 1
+; HSA-WARNING-SDAG-NEXT:    v_mov_b32_e32 v3, 2
+; HSA-WARNING-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; HSA-WARNING-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; HSA-WARNING-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; HSA-WARNING-SDAG-NEXT:    flat_store_dword v[0:1], v2
+; HSA-WARNING-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; HSA-WARNING-SDAG-NEXT:    flat_store_dword v[0:1], v3
+; HSA-WARNING-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; HSA-WARNING-SDAG-NEXT:    s_endpgm
+;
+; HSA-WARNING-GISEL-LABEL: hsa_debugtrap:
+; HSA-WARNING-GISEL:       ; %bb.0:
+; HSA-WARNING-GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; HSA-WARNING-GISEL-NEXT:    s_add_i32 s12, s12, s17
+; HSA-WARNING-GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; HSA-WARNING-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; HSA-WARNING-GISEL-NEXT:    v_mov_b32_e32 v2, 1
+; HSA-WARNING-GISEL-NEXT:    v_mov_b32_e32 v3, 2
+; HSA-WARNING-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; HSA-WARNING-GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; HSA-WARNING-GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; HSA-WARNING-GISEL-NEXT:    flat_store_dword v[0:1], v2
+; HSA-WARNING-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; HSA-WARNING-GISEL-NEXT:    flat_store_dword v[0:1], v3
+; HSA-WARNING-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; HSA-WARNING-GISEL-NEXT:    s_endpgm
+;
+; TRAP-BIT-SDAG-LABEL: hsa_debugtrap:
+; TRAP-BIT-SDAG:         .amd_kernel_code_t
+; TRAP-BIT-SDAG-NEXT:     amd_code_version_major = 1
+; TRAP-BIT-SDAG-NEXT:     amd_code_version_minor = 2
+; TRAP-BIT-SDAG-NEXT:     amd_machine_kind = 1
+; TRAP-BIT-SDAG-NEXT:     amd_machine_version_major = 6
+; TRAP-BIT-SDAG-NEXT:     amd_machine_version_minor = 0
+; TRAP-BIT-SDAG-NEXT:     amd_machine_version_stepping = 0
+; TRAP-BIT-SDAG-NEXT:     kernel_code_entry_byte_offset = 256
+; TRAP-BIT-SDAG-NEXT:     kernel_code_prefetch_byte_size = 0
+; TRAP-BIT-SDAG-NEXT:     granulated_workitem_vgpr_count = 0
+; TRAP-BIT-SDAG-NEXT:     granulated_wavefront_sgpr_count = 1
+; TRAP-BIT-SDAG-NEXT:     priority = 0
+; TRAP-BIT-SDAG-NEXT:     float_mode = 240
+; TRAP-BIT-SDAG-NEXT:     priv = 0
+; TRAP-BIT-SDAG-NEXT:     enable_dx10_clamp = 1
+; TRAP-BIT-SDAG-NEXT:     debug_mode = 0
+; TRAP-BIT-SDAG-NEXT:     enable_ieee_mode = 1
+; TRAP-BIT-SDAG-NEXT:     enable_wgp_mode = 0
+; TRAP-BIT-SDAG-NEXT:     enable_mem_ordered = 0
+; TRAP-BIT-SDAG-NEXT:     enable_fwd_progress = 0
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; TRAP-BIT-SDAG-NEXT:     user_sgpr_count = 12
+; TRAP-BIT-SDAG-NEXT:     enable_trap_handler = 1
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_workgroup_id_x = 1
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_workgroup_id_y = 1
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_workgroup_id_z = 1
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_workgroup_info = 0
+; TRAP-BIT-SDAG-NEXT:     enable_vgpr_workitem_id = 2
+; TRAP-BIT-SDAG-NEXT:     enable_exception_msb = 0
+; TRAP-BIT-SDAG-NEXT:     granulated_lds_size = 0
+; TRAP-BIT-SDAG-NEXT:     enable_exception = 0
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_private_segment_buffer = 1
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_dispatch_ptr = 1
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_queue_ptr = 1
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_dispatch_id = 1
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_flat_scratch_init = 0
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_private_segment_size = 0
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; TRAP-BIT-SDAG-NEXT:     enable_wavefront_size32 = 0
+; TRAP-BIT-SDAG-NEXT:     enable_ordered_append_gds = 0
+; TRAP-BIT-SDAG-NEXT:     private_element_size = 1
+; TRAP-BIT-SDAG-NEXT:     is_ptr64 = 1
+; TRAP-BIT-SDAG-NEXT:     is_dynamic_callstack = 0
+; TRAP-BIT-SDAG-NEXT:     is_debug_enabled = 0
+; TRAP-BIT-SDAG-NEXT:     is_xnack_enabled = 0
+; TRAP-BIT-SDAG-NEXT:     workitem_private_segment_byte_size = 0
+; TRAP-BIT-SDAG-NEXT:     workgroup_group_segment_byte_size = 0
+; TRAP-BIT-SDAG-NEXT:     gds_segment_byte_size = 0
+; TRAP-BIT-SDAG-NEXT:     kernarg_segment_byte_size = 24
+; TRAP-BIT-SDAG-NEXT:     workgroup_fbarrier_count = 0
+; TRAP-BIT-SDAG-NEXT:     wavefront_sgpr_count = 10
+; TRAP-BIT-SDAG-NEXT:     workitem_vgpr_count = 2
+; TRAP-BIT-SDAG-NEXT:     reserved_vgpr_first = 0
+; TRAP-BIT-SDAG-NEXT:     reserved_vgpr_count = 0
+; TRAP-BIT-SDAG-NEXT:     reserved_sgpr_first = 0
+; TRAP-BIT-SDAG-NEXT:     reserved_sgpr_count = 0
+; TRAP-BIT-SDAG-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; TRAP-BIT-SDAG-NEXT:     debug_private_segment_buffer_sgpr = 0
+; TRAP-BIT-SDAG-NEXT:     kernarg_segment_alignment = 4
+; TRAP-BIT-SDAG-NEXT:     group_segment_alignment = 4
+; TRAP-BIT-SDAG-NEXT:     private_segment_alignment = 4
+; TRAP-BIT-SDAG-NEXT:     wavefront_size = 6
+; TRAP-BIT-SDAG-NEXT:     call_convention = -1
+; TRAP-BIT-SDAG-NEXT:     runtime_loader_kernel_symbol = 0
+; TRAP-BIT-SDAG-NEXT:    .end_amd_kernel_code_t
+; TRAP-BIT-SDAG-NEXT:  ; %bb.0:
+; TRAP-BIT-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; TRAP-BIT-SDAG-NEXT:    s_mov_b32 s3, 0xf000
+; TRAP-BIT-SDAG-NEXT:    s_mov_b32 s2, -1
+; TRAP-BIT-SDAG-NEXT:    v_mov_b32_e32 v0, 1
+; TRAP-BIT-SDAG-NEXT:    v_mov_b32_e32 v1, 2
+; TRAP-BIT-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; TRAP-BIT-SDAG-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; TRAP-BIT-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; TRAP-BIT-SDAG-NEXT:    buffer_store_dword v1, off, s[0:3], 0
+; TRAP-BIT-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; TRAP-BIT-SDAG-NEXT:    s_endpgm
+;
+; TRAP-BIT-GISEL-LABEL: hsa_debugtrap:
+; TRAP-BIT-GISEL:         .amd_kernel_code_t
+; TRAP-BIT-GISEL-NEXT:     amd_code_version_major = 1
+; TRAP-BIT-GISEL-NEXT:     amd_code_version_minor = 2
+; TRAP-BIT-GISEL-NEXT:     amd_machine_kind = 1
+; TRAP-BIT-GISEL-NEXT:     amd_machine_version_major = 6
+; TRAP-BIT-GISEL-NEXT:     amd_machine_version_minor = 0
+; TRAP-BIT-GISEL-NEXT:     amd_machine_version_stepping = 0
+; TRAP-BIT-GISEL-NEXT:     kernel_code_entry_byte_offset = 256
+; TRAP-BIT-GISEL-NEXT:     kernel_code_prefetch_byte_size = 0
+; TRAP-BIT-GISEL-NEXT:     granulated_workitem_vgpr_count = 0
+; TRAP-BIT-GISEL-NEXT:     granulated_wavefront_sgpr_count = 1
+; TRAP-BIT-GISEL-NEXT:     priority = 0
+; TRAP-BIT-GISEL-NEXT:     float_mode = 240
+; TRAP-BIT-GISEL-NEXT:     priv = 0
+; TRAP-BIT-GISEL-NEXT:     enable_dx10_clamp = 1
+; TRAP-BIT-GISEL-NEXT:     debug_mode = 0
+; TRAP-BIT-GISEL-NEXT:     enable_ieee_mode = 1
+; TRAP-BIT-GISEL-NEXT:     enable_wgp_mode = 0
+; TRAP-BIT-GISEL-NEXT:     enable_mem_ordered = 0
+; TRAP-BIT-GISEL-NEXT:     enable_fwd_progress = 0
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; TRAP-BIT-GISEL-NEXT:     user_sgpr_count = 12
+; TRAP-BIT-GISEL-NEXT:     enable_trap_handler = 1
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_workgroup_id_x = 1
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_workgroup_id_y = 1
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_workgroup_id_z = 1
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_workgroup_info = 0
+; TRAP-BIT-GISEL-NEXT:     enable_vgpr_workitem_id = 2
+; TRAP-BIT-GISEL-NEXT:     enable_exception_msb = 0
+; TRAP-BIT-GISEL-NEXT:     granulated_lds_size = 0
+; TRAP-BIT-GISEL-NEXT:     enable_exception = 0
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_private_segment_buffer = 1
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_dispatch_ptr = 1
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_queue_ptr = 1
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_dispatch_id = 1
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_flat_scratch_init = 0
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_private_segment_size = 0
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; TRAP-BIT-GISEL-NEXT:     enable_wavefront_size32 = 0
+; TRAP-BIT-GISEL-NEXT:     enable_ordered_append_gds = 0
+; TRAP-BIT-GISEL-NEXT:     private_element_size = 1
+; TRAP-BIT-GISEL-NEXT:     is_ptr64 = 1
+; TRAP-BIT-GISEL-NEXT:     is_dynamic_callstack = 0
+; TRAP-BIT-GISEL-NEXT:     is_debug_enabled = 0
+; TRAP-BIT-GISEL-NEXT:     is_xnack_enabled = 0
+; TRAP-BIT-GISEL-NEXT:     workitem_private_segment_byte_size = 0
+; TRAP-BIT-GISEL-NEXT:     workgroup_group_segment_byte_size = 0
+; TRAP-BIT-GISEL-NEXT:     gds_segment_byte_size = 0
+; TRAP-BIT-GISEL-NEXT:     kernarg_segment_byte_size = 24
+; TRAP-BIT-GISEL-NEXT:     workgroup_fbarrier_count = 0
+; TRAP-BIT-GISEL-NEXT:     wavefront_sgpr_count = 10
+; TRAP-BIT-GISEL-NEXT:     workitem_vgpr_count = 2
+; TRAP-BIT-GISEL-NEXT:     reserved_vgpr_first = 0
+; TRAP-BIT-GISEL-NEXT:     reserved_vgpr_count = 0
+; TRAP-BIT-GISEL-NEXT:     reserved_sgpr_first = 0
+; TRAP-BIT-GISEL-NEXT:     reserved_sgpr_count = 0
+; TRAP-BIT-GISEL-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; TRAP-BIT-GISEL-NEXT:     debug_private_segment_buffer_sgpr = 0
+; TRAP-BIT-GISEL-NEXT:     kernarg_segment_alignment = 4
+; TRAP-BIT-GISEL-NEXT:     group_segment_alignment = 4
+; TRAP-BIT-GISEL-NEXT:     private_segment_alignment = 4
+; TRAP-BIT-GISEL-NEXT:     wavefront_size = 6
+; TRAP-BIT-GISEL-NEXT:     call_convention = -1
+; TRAP-BIT-GISEL-NEXT:     runtime_loader_kernel_symbol = 0
+; TRAP-BIT-GISEL-NEXT:    .end_amd_kernel_code_t
+; TRAP-BIT-GISEL-NEXT:  ; %bb.0:
+; TRAP-BIT-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; TRAP-BIT-GISEL-NEXT:    v_mov_b32_e32 v0, 1
+; TRAP-BIT-GISEL-NEXT:    s_mov_b32 s2, -1
+; TRAP-BIT-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; TRAP-BIT-GISEL-NEXT:    v_mov_b32_e32 v1, 2
+; TRAP-BIT-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; TRAP-BIT-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; TRAP-BIT-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; TRAP-BIT-GISEL-NEXT:    buffer_store_dword v1, off, s[0:3], 0
+; TRAP-BIT-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; TRAP-BIT-GISEL-NEXT:    s_endpgm
+;
+; NO-TRAP-BIT-SDAG-LABEL: hsa_debugtrap:
+; NO-TRAP-BIT-SDAG:         .amd_kernel_code_t
+; NO-TRAP-BIT-SDAG-NEXT:     amd_code_version_major = 1
+; NO-TRAP-BIT-SDAG-NEXT:     amd_code_version_minor = 2
+; NO-TRAP-BIT-SDAG-NEXT:     amd_machine_kind = 1
+; NO-TRAP-BIT-SDAG-NEXT:     amd_machine_version_major = 6
+; NO-TRAP-BIT-SDAG-NEXT:     amd_machine_version_minor = 0
+; NO-TRAP-BIT-SDAG-NEXT:     amd_machine_version_stepping = 0
+; NO-TRAP-BIT-SDAG-NEXT:     kernel_code_entry_byte_offset = 256
+; NO-TRAP-BIT-SDAG-NEXT:     kernel_code_prefetch_byte_size = 0
+; NO-TRAP-BIT-SDAG-NEXT:     granulated_workitem_vgpr_count = 0
+; NO-TRAP-BIT-SDAG-NEXT:     granulated_wavefront_sgpr_count = 1
+; NO-TRAP-BIT-SDAG-NEXT:     priority = 0
+; NO-TRAP-BIT-SDAG-NEXT:     float_mode = 240
+; NO-TRAP-BIT-SDAG-NEXT:     priv = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_dx10_clamp = 1
+; NO-TRAP-BIT-SDAG-NEXT:     debug_mode = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_ieee_mode = 1
+; NO-TRAP-BIT-SDAG-NEXT:     enable_wgp_mode = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_mem_ordered = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_fwd_progress = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; NO-TRAP-BIT-SDAG-NEXT:     user_sgpr_count = 12
+; NO-TRAP-BIT-SDAG-NEXT:     enable_trap_handler = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_workgroup_id_x = 1
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_workgroup_id_y = 1
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_workgroup_id_z = 1
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_workgroup_info = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_vgpr_workitem_id = 2
+; NO-TRAP-BIT-SDAG-NEXT:     enable_exception_msb = 0
+; NO-TRAP-BIT-SDAG-NEXT:     granulated_lds_size = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_exception = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_private_segment_buffer = 1
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_dispatch_ptr = 1
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_queue_ptr = 1
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_dispatch_id = 1
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_flat_scratch_init = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_private_segment_size = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_wavefront_size32 = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_ordered_append_gds = 0
+; NO-TRAP-BIT-SDAG-NEXT:     private_element_size = 1
+; NO-TRAP-BIT-SDAG-NEXT:     is_ptr64 = 1
+; NO-TRAP-BIT-SDAG-NEXT:     is_dynamic_callstack = 0
+; NO-TRAP-BIT-SDAG-NEXT:     is_debug_enabled = 0
+; NO-TRAP-BIT-SDAG-NEXT:     is_xnack_enabled = 0
+; NO-TRAP-BIT-SDAG-NEXT:     workitem_private_segment_byte_size = 0
+; NO-TRAP-BIT-SDAG-NEXT:     workgroup_group_segment_byte_size = 0
+; NO-TRAP-BIT-SDAG-NEXT:     gds_segment_byte_size = 0
+; NO-TRAP-BIT-SDAG-NEXT:     kernarg_segment_byte_size = 24
+; NO-TRAP-BIT-SDAG-NEXT:     workgroup_fbarrier_count = 0
+; NO-TRAP-BIT-SDAG-NEXT:     wavefront_sgpr_count = 10
+; NO-TRAP-BIT-SDAG-NEXT:     workitem_vgpr_count = 2
+; NO-TRAP-BIT-SDAG-NEXT:     reserved_vgpr_first = 0
+; NO-TRAP-BIT-SDAG-NEXT:     reserved_vgpr_count = 0
+; NO-TRAP-BIT-SDAG-NEXT:     reserved_sgpr_first = 0
+; NO-TRAP-BIT-SDAG-NEXT:     reserved_sgpr_count = 0
+; NO-TRAP-BIT-SDAG-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; NO-TRAP-BIT-SDAG-NEXT:     debug_private_segment_buffer_sgpr = 0
+; NO-TRAP-BIT-SDAG-NEXT:     kernarg_segment_alignment = 4
+; NO-TRAP-BIT-SDAG-NEXT:     group_segment_alignment = 4
+; NO-TRAP-BIT-SDAG-NEXT:     private_segment_alignment = 4
+; NO-TRAP-BIT-SDAG-NEXT:     wavefront_size = 6
+; NO-TRAP-BIT-SDAG-NEXT:     call_convention = -1
+; NO-TRAP-BIT-SDAG-NEXT:     runtime_loader_kernel_symbol = 0
+; NO-TRAP-BIT-SDAG-NEXT:    .end_amd_kernel_code_t
+; NO-TRAP-BIT-SDAG-NEXT:  ; %bb.0:
+; NO-TRAP-BIT-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; NO-TRAP-BIT-SDAG-NEXT:    s_mov_b32 s3, 0xf000
+; NO-TRAP-BIT-SDAG-NEXT:    s_mov_b32 s2, -1
+; NO-TRAP-BIT-SDAG-NEXT:    v_mov_b32_e32 v0, 1
+; NO-TRAP-BIT-SDAG-NEXT:    v_mov_b32_e32 v1, 2
+; NO-TRAP-BIT-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; NO-TRAP-BIT-SDAG-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; NO-TRAP-BIT-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; NO-TRAP-BIT-SDAG-NEXT:    buffer_store_dword v1, off, s[0:3], 0
+; NO-TRAP-BIT-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; NO-TRAP-BIT-SDAG-NEXT:    s_endpgm
+;
+; NO-TRAP-BIT-GISEL-LABEL: hsa_debugtrap:
+; NO-TRAP-BIT-GISEL:         .amd_kernel_code_t
+; NO-TRAP-BIT-GISEL-NEXT:     amd_code_version_major = 1
+; NO-TRAP-BIT-GISEL-NEXT:     amd_code_version_minor = 2
+; NO-TRAP-BIT-GISEL-NEXT:     amd_machine_kind = 1
+; NO-TRAP-BIT-GISEL-NEXT:     amd_machine_version_major = 6
+; NO-TRAP-BIT-GISEL-NEXT:     amd_machine_version_minor = 0
+; NO-TRAP-BIT-GISEL-NEXT:     amd_machine_version_stepping = 0
+; NO-TRAP-BIT-GISEL-NEXT:     kernel_code_entry_byte_offset = 256
+; NO-TRAP-BIT-GISEL-NEXT:     kernel_code_prefetch_byte_size = 0
+; NO-TRAP-BIT-GISEL-NEXT:     granulated_workitem_vgpr_count = 0
+; NO-TRAP-BIT-GISEL-NEXT:     granulated_wavefront_sgpr_count = 1
+; NO-TRAP-BIT-GISEL-NEXT:     priority = 0
+; NO-TRAP-BIT-GISEL-NEXT:     float_mode = 240
+; NO-TRAP-BIT-GISEL-NEXT:     priv = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_dx10_clamp = 1
+; NO-TRAP-BIT-GISEL-NEXT:     debug_mode = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_ieee_mode = 1
+; NO-TRAP-BIT-GISEL-NEXT:     enable_wgp_mode = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_mem_ordered = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_fwd_progress = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; NO-TRAP-BIT-GISEL-NEXT:     user_sgpr_count = 12
+; NO-TRAP-BIT-GISEL-NEXT:     enable_trap_handler = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_workgroup_id_x = 1
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_workgroup_id_y = 1
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_workgroup_id_z = 1
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_workgroup_info = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_vgpr_workitem_id = 2
+; NO-TRAP-BIT-GISEL-NEXT:     enable_exception_msb = 0
+; NO-TRAP-BIT-GISEL-NEXT:     granulated_lds_size = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_exception = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_private_segment_buffer = 1
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_dispatch_ptr = 1
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_queue_ptr = 1
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_dispatch_id = 1
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_flat_scratch_init = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_private_segment_size = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_wavefront_size32 = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_ordered_append_gds = 0
+; NO-TRAP-BIT-GISEL-NEXT:     private_element_size = 1
+; NO-TRAP-BIT-GISEL-NEXT:     is_ptr64 = 1
+; NO-TRAP-BIT-GISEL-NEXT:     is_dynamic_callstack = 0
+; NO-TRAP-BIT-GISEL-NEXT:     is_debug_enabled = 0
+; NO-TRAP-BIT-GISEL-NEXT:     is_xnack_enabled = 0
+; NO-TRAP-BIT-GISEL-NEXT:     workitem_private_segment_byte_size = 0
+; NO-TRAP-BIT-GISEL-NEXT:     workgroup_group_segment_byte_size = 0
+; NO-TRAP-BIT-GISEL-NEXT:     gds_segment_byte_size = 0
+; NO-TRAP-BIT-GISEL-NEXT:     kernarg_segment_byte_size = 24
+; NO-TRAP-BIT-GISEL-NEXT:     workgroup_fbarrier_count = 0
+; NO-TRAP-BIT-GISEL-NEXT:     wavefront_sgpr_count = 10
+; NO-TRAP-BIT-GISEL-NEXT:     workitem_vgpr_count = 2
+; NO-TRAP-BIT-GISEL-NEXT:     reserved_vgpr_first = 0
+; NO-TRAP-BIT-GISEL-NEXT:     reserved_vgpr_count = 0
+; NO-TRAP-BIT-GISEL-NEXT:     reserved_sgpr_first = 0
+; NO-TRAP-BIT-GISEL-NEXT:     reserved_sgpr_count = 0
+; NO-TRAP-BIT-GISEL-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; NO-TRAP-BIT-GISEL-NEXT:     debug_private_segment_buffer_sgpr = 0
+; NO-TRAP-BIT-GISEL-NEXT:     kernarg_segment_alignment = 4
+; NO-TRAP-BIT-GISEL-NEXT:     group_segment_alignment = 4
+; NO-TRAP-BIT-GISEL-NEXT:     private_segment_alignment = 4
+; NO-TRAP-BIT-GISEL-NEXT:     wavefront_size = 6
+; NO-TRAP-BIT-GISEL-NEXT:     call_convention = -1
+; NO-TRAP-BIT-GISEL-NEXT:     runtime_loader_kernel_symbol = 0
+; NO-TRAP-BIT-GISEL-NEXT:    .end_amd_kernel_code_t
+; NO-TRAP-BIT-GISEL-NEXT:  ; %bb.0:
+; NO-TRAP-BIT-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; NO-TRAP-BIT-GISEL-NEXT:    v_mov_b32_e32 v0, 1
+; NO-TRAP-BIT-GISEL-NEXT:    s_mov_b32 s2, -1
+; NO-TRAP-BIT-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; NO-TRAP-BIT-GISEL-NEXT:    v_mov_b32_e32 v1, 2
+; NO-TRAP-BIT-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; NO-TRAP-BIT-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; NO-TRAP-BIT-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; NO-TRAP-BIT-GISEL-NEXT:    buffer_store_dword v1, off, s[0:3], 0
+; NO-TRAP-BIT-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; NO-TRAP-BIT-GISEL-NEXT:    s_endpgm
+;
+; GCN-WARNING-SDAG-LABEL: hsa_debugtrap:
+; GCN-WARNING-SDAG:       ; %bb.0:
+; GCN-WARNING-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-WARNING-SDAG-NEXT:    s_mov_b32 s3, 0xf000
+; GCN-WARNING-SDAG-NEXT:    s_mov_b32 s2, -1
+; GCN-WARNING-SDAG-NEXT:    v_mov_b32_e32 v0, 1
+; GCN-WARNING-SDAG-NEXT:    v_mov_b32_e32 v1, 2
+; GCN-WARNING-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-WARNING-SDAG-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GCN-WARNING-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GCN-WARNING-SDAG-NEXT:    buffer_store_dword v1, off, s[0:3], 0
+; GCN-WARNING-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GCN-WARNING-SDAG-NEXT:    s_endpgm
+;
+; GCN-WARNING-GISEL-LABEL: hsa_debugtrap:
+; GCN-WARNING-GISEL:       ; %bb.0:
+; GCN-WARNING-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-WARNING-GISEL-NEXT:    v_mov_b32_e32 v0, 1
+; GCN-WARNING-GISEL-NEXT:    s_mov_b32 s2, -1
+; GCN-WARNING-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; GCN-WARNING-GISEL-NEXT:    v_mov_b32_e32 v1, 2
+; GCN-WARNING-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-WARNING-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GCN-WARNING-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GCN-WARNING-GISEL-NEXT:    buffer_store_dword v1, off, s[0:3], 0
+; GCN-WARNING-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GCN-WARNING-GISEL-NEXT:    s_endpgm
   store volatile i32 1, ptr addrspace(1) %arg0
   call void @llvm.debugtrap()
   store volatile i32 2, ptr addrspace(1) %arg0
@@ -84,12 +925,430 @@ define amdgpu_kernel void @hsa_debugtrap(ptr addrspace(1) nocapture readonly %ar
 }
 
 ; For non-HSA path
-; GCN-LABEL: {{^}}trap:
-; TRAP-BIT: enable_trap_handler = 1
-; NO-TRAP-BIT: enable_trap_handler = 0
-; NO-HSA-TRAP: s_endpgm
-; NO-MESA-TRAP: s_endpgm
 define amdgpu_kernel void @trap(ptr addrspace(1) nocapture readonly %arg0) {
+; HSA-TRAP-SDAG-LABEL: trap:
+; HSA-TRAP-SDAG:       ; %bb.0:
+; HSA-TRAP-SDAG-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; HSA-TRAP-SDAG-NEXT:    s_add_i32 s12, s12, s17
+; HSA-TRAP-SDAG-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; HSA-TRAP-SDAG-NEXT:    s_mov_b64 s[0:1], s[6:7]
+; HSA-TRAP-SDAG-NEXT:    s_load_dwordx2 s[2:3], s[8:9], 0x0
+; HSA-TRAP-SDAG-NEXT:    v_mov_b32_e32 v2, 1
+; HSA-TRAP-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; HSA-TRAP-SDAG-NEXT:    v_mov_b32_e32 v0, s2
+; HSA-TRAP-SDAG-NEXT:    v_mov_b32_e32 v1, s3
+; HSA-TRAP-SDAG-NEXT:    flat_store_dword v[0:1], v2
+; HSA-TRAP-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; HSA-TRAP-SDAG-NEXT:    s_trap 2
+;
+; HSA-TRAP-GISEL-LABEL: trap:
+; HSA-TRAP-GISEL:       ; %bb.0:
+; HSA-TRAP-GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; HSA-TRAP-GISEL-NEXT:    s_add_i32 s12, s12, s17
+; HSA-TRAP-GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; HSA-TRAP-GISEL-NEXT:    s_mov_b64 s[0:1], s[6:7]
+; HSA-TRAP-GISEL-NEXT:    s_load_dwordx2 s[2:3], s[8:9], 0x0
+; HSA-TRAP-GISEL-NEXT:    v_mov_b32_e32 v2, 1
+; HSA-TRAP-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; HSA-TRAP-GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; HSA-TRAP-GISEL-NEXT:    v_mov_b32_e32 v1, s3
+; HSA-TRAP-GISEL-NEXT:    flat_store_dword v[0:1], v2
+; HSA-TRAP-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; HSA-TRAP-GISEL-NEXT:    s_trap 2
+;
+; NO-HSA-TRAP-SDAG-LABEL: trap:
+; NO-HSA-TRAP-SDAG:       ; %bb.0:
+; NO-HSA-TRAP-SDAG-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; NO-HSA-TRAP-SDAG-NEXT:    s_add_i32 s12, s12, s17
+; NO-HSA-TRAP-SDAG-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; NO-HSA-TRAP-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; NO-HSA-TRAP-SDAG-NEXT:    v_mov_b32_e32 v2, 1
+; NO-HSA-TRAP-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; NO-HSA-TRAP-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; NO-HSA-TRAP-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; NO-HSA-TRAP-SDAG-NEXT:    flat_store_dword v[0:1], v2
+; NO-HSA-TRAP-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; NO-HSA-TRAP-SDAG-NEXT:    s_endpgm
+;
+; NO-HSA-TRAP-GISEL-LABEL: trap:
+; NO-HSA-TRAP-GISEL:       ; %bb.0:
+; NO-HSA-TRAP-GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; NO-HSA-TRAP-GISEL-NEXT:    s_add_i32 s12, s12, s17
+; NO-HSA-TRAP-GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; NO-HSA-TRAP-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; NO-HSA-TRAP-GISEL-NEXT:    v_mov_b32_e32 v2, 1
+; NO-HSA-TRAP-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; NO-HSA-TRAP-GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; NO-HSA-TRAP-GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; NO-HSA-TRAP-GISEL-NEXT:    flat_store_dword v[0:1], v2
+; NO-HSA-TRAP-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; NO-HSA-TRAP-GISEL-NEXT:    s_endpgm
+;
+; HSA-WARNING-SDAG-LABEL: trap:
+; HSA-WARNING-SDAG:       ; %bb.0:
+; HSA-WARNING-SDAG-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; HSA-WARNING-SDAG-NEXT:    s_add_i32 s12, s12, s17
+; HSA-WARNING-SDAG-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; HSA-WARNING-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; HSA-WARNING-SDAG-NEXT:    v_mov_b32_e32 v2, 1
+; HSA-WARNING-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; HSA-WARNING-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; HSA-WARNING-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; HSA-WARNING-SDAG-NEXT:    flat_store_dword v[0:1], v2
+; HSA-WARNING-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; HSA-WARNING-SDAG-NEXT:    s_endpgm
+;
+; HSA-WARNING-GISEL-LABEL: trap:
+; HSA-WARNING-GISEL:       ; %bb.0:
+; HSA-WARNING-GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; HSA-WARNING-GISEL-NEXT:    s_add_i32 s12, s12, s17
+; HSA-WARNING-GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; HSA-WARNING-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; HSA-WARNING-GISEL-NEXT:    v_mov_b32_e32 v2, 1
+; HSA-WARNING-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; HSA-WARNING-GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; HSA-WARNING-GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; HSA-WARNING-GISEL-NEXT:    flat_store_dword v[0:1], v2
+; HSA-WARNING-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; HSA-WARNING-GISEL-NEXT:    s_endpgm
+;
+; TRAP-BIT-SDAG-LABEL: trap:
+; TRAP-BIT-SDAG:         .amd_kernel_code_t
+; TRAP-BIT-SDAG-NEXT:     amd_code_version_major = 1
+; TRAP-BIT-SDAG-NEXT:     amd_code_version_minor = 2
+; TRAP-BIT-SDAG-NEXT:     amd_machine_kind = 1
+; TRAP-BIT-SDAG-NEXT:     amd_machine_version_major = 6
+; TRAP-BIT-SDAG-NEXT:     amd_machine_version_minor = 0
+; TRAP-BIT-SDAG-NEXT:     amd_machine_version_stepping = 0
+; TRAP-BIT-SDAG-NEXT:     kernel_code_entry_byte_offset = 256
+; TRAP-BIT-SDAG-NEXT:     kernel_code_prefetch_byte_size = 0
+; TRAP-BIT-SDAG-NEXT:     granulated_workitem_vgpr_count = 0
+; TRAP-BIT-SDAG-NEXT:     granulated_wavefront_sgpr_count = 1
+; TRAP-BIT-SDAG-NEXT:     priority = 0
+; TRAP-BIT-SDAG-NEXT:     float_mode = 240
+; TRAP-BIT-SDAG-NEXT:     priv = 0
+; TRAP-BIT-SDAG-NEXT:     enable_dx10_clamp = 1
+; TRAP-BIT-SDAG-NEXT:     debug_mode = 0
+; TRAP-BIT-SDAG-NEXT:     enable_ieee_mode = 1
+; TRAP-BIT-SDAG-NEXT:     enable_wgp_mode = 0
+; TRAP-BIT-SDAG-NEXT:     enable_mem_ordered = 0
+; TRAP-BIT-SDAG-NEXT:     enable_fwd_progress = 0
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; TRAP-BIT-SDAG-NEXT:     user_sgpr_count = 12
+; TRAP-BIT-SDAG-NEXT:     enable_trap_handler = 1
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_workgroup_id_x = 1
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_workgroup_id_y = 1
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_workgroup_id_z = 1
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_workgroup_info = 0
+; TRAP-BIT-SDAG-NEXT:     enable_vgpr_workitem_id = 2
+; TRAP-BIT-SDAG-NEXT:     enable_exception_msb = 0
+; TRAP-BIT-SDAG-NEXT:     granulated_lds_size = 0
+; TRAP-BIT-SDAG-NEXT:     enable_exception = 0
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_private_segment_buffer = 1
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_dispatch_ptr = 1
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_queue_ptr = 1
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_dispatch_id = 1
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_flat_scratch_init = 0
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_private_segment_size = 0
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; TRAP-BIT-SDAG-NEXT:     enable_wavefront_size32 = 0
+; TRAP-BIT-SDAG-NEXT:     enable_ordered_append_gds = 0
+; TRAP-BIT-SDAG-NEXT:     private_element_size = 1
+; TRAP-BIT-SDAG-NEXT:     is_ptr64 = 1
+; TRAP-BIT-SDAG-NEXT:     is_dynamic_callstack = 0
+; TRAP-BIT-SDAG-NEXT:     is_debug_enabled = 0
+; TRAP-BIT-SDAG-NEXT:     is_xnack_enabled = 0
+; TRAP-BIT-SDAG-NEXT:     workitem_private_segment_byte_size = 0
+; TRAP-BIT-SDAG-NEXT:     workgroup_group_segment_byte_size = 0
+; TRAP-BIT-SDAG-NEXT:     gds_segment_byte_size = 0
+; TRAP-BIT-SDAG-NEXT:     kernarg_segment_byte_size = 24
+; TRAP-BIT-SDAG-NEXT:     workgroup_fbarrier_count = 0
+; TRAP-BIT-SDAG-NEXT:     wavefront_sgpr_count = 10
+; TRAP-BIT-SDAG-NEXT:     workitem_vgpr_count = 1
+; TRAP-BIT-SDAG-NEXT:     reserved_vgpr_first = 0
+; TRAP-BIT-SDAG-NEXT:     reserved_vgpr_count = 0
+; TRAP-BIT-SDAG-NEXT:     reserved_sgpr_first = 0
+; TRAP-BIT-SDAG-NEXT:     reserved_sgpr_count = 0
+; TRAP-BIT-SDAG-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; TRAP-BIT-SDAG-NEXT:     debug_private_segment_buffer_sgpr = 0
+; TRAP-BIT-SDAG-NEXT:     kernarg_segment_alignment = 4
+; TRAP-BIT-SDAG-NEXT:     group_segment_alignment = 4
+; TRAP-BIT-SDAG-NEXT:     private_segment_alignment = 4
+; TRAP-BIT-SDAG-NEXT:     wavefront_size = 6
+; TRAP-BIT-SDAG-NEXT:     call_convention = -1
+; TRAP-BIT-SDAG-NEXT:     runtime_loader_kernel_symbol = 0
+; TRAP-BIT-SDAG-NEXT:    .end_amd_kernel_code_t
+; TRAP-BIT-SDAG-NEXT:  ; %bb.0:
+; TRAP-BIT-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; TRAP-BIT-SDAG-NEXT:    s_mov_b32 s3, 0xf000
+; TRAP-BIT-SDAG-NEXT:    s_mov_b32 s2, -1
+; TRAP-BIT-SDAG-NEXT:    v_mov_b32_e32 v0, 1
+; TRAP-BIT-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; TRAP-BIT-SDAG-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; TRAP-BIT-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; TRAP-BIT-SDAG-NEXT:    s_endpgm
+;
+; TRAP-BIT-GISEL-LABEL: trap:
+; TRAP-BIT-GISEL:         .amd_kernel_code_t
+; TRAP-BIT-GISEL-NEXT:     amd_code_version_major = 1
+; TRAP-BIT-GISEL-NEXT:     amd_code_version_minor = 2
+; TRAP-BIT-GISEL-NEXT:     amd_machine_kind = 1
+; TRAP-BIT-GISEL-NEXT:     amd_machine_version_major = 6
+; TRAP-BIT-GISEL-NEXT:     amd_machine_version_minor = 0
+; TRAP-BIT-GISEL-NEXT:     amd_machine_version_stepping = 0
+; TRAP-BIT-GISEL-NEXT:     kernel_code_entry_byte_offset = 256
+; TRAP-BIT-GISEL-NEXT:     kernel_code_prefetch_byte_size = 0
+; TRAP-BIT-GISEL-NEXT:     granulated_workitem_vgpr_count = 0
+; TRAP-BIT-GISEL-NEXT:     granulated_wavefront_sgpr_count = 1
+; TRAP-BIT-GISEL-NEXT:     priority = 0
+; TRAP-BIT-GISEL-NEXT:     float_mode = 240
+; TRAP-BIT-GISEL-NEXT:     priv = 0
+; TRAP-BIT-GISEL-NEXT:     enable_dx10_clamp = 1
+; TRAP-BIT-GISEL-NEXT:     debug_mode = 0
+; TRAP-BIT-GISEL-NEXT:     enable_ieee_mode = 1
+; TRAP-BIT-GISEL-NEXT:     enable_wgp_mode = 0
+; TRAP-BIT-GISEL-NEXT:     enable_mem_ordered = 0
+; TRAP-BIT-GISEL-NEXT:     enable_fwd_progress = 0
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; TRAP-BIT-GISEL-NEXT:     user_sgpr_count = 12
+; TRAP-BIT-GISEL-NEXT:     enable_trap_handler = 1
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_workgroup_id_x = 1
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_workgroup_id_y = 1
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_workgroup_id_z = 1
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_workgroup_info = 0
+; TRAP-BIT-GISEL-NEXT:     enable_vgpr_workitem_id = 2
+; TRAP-BIT-GISEL-NEXT:     enable_exception_msb = 0
+; TRAP-BIT-GISEL-NEXT:     granulated_lds_size = 0
+; TRAP-BIT-GISEL-NEXT:     enable_exception = 0
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_private_segment_buffer = 1
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_dispatch_ptr = 1
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_queue_ptr = 1
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_dispatch_id = 1
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_flat_scratch_init = 0
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_private_segment_size = 0
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; TRAP-BIT-GISEL-NEXT:     enable_wavefront_size32 = 0
+; TRAP-BIT-GISEL-NEXT:     enable_ordered_append_gds = 0
+; TRAP-BIT-GISEL-NEXT:     private_element_size = 1
+; TRAP-BIT-GISEL-NEXT:     is_ptr64 = 1
+; TRAP-BIT-GISEL-NEXT:     is_dynamic_callstack = 0
+; TRAP-BIT-GISEL-NEXT:     is_debug_enabled = 0
+; TRAP-BIT-GISEL-NEXT:     is_xnack_enabled = 0
+; TRAP-BIT-GISEL-NEXT:     workitem_private_segment_byte_size = 0
+; TRAP-BIT-GISEL-NEXT:     workgroup_group_segment_byte_size = 0
+; TRAP-BIT-GISEL-NEXT:     gds_segment_byte_size = 0
+; TRAP-BIT-GISEL-NEXT:     kernarg_segment_byte_size = 24
+; TRAP-BIT-GISEL-NEXT:     workgroup_fbarrier_count = 0
+; TRAP-BIT-GISEL-NEXT:     wavefront_sgpr_count = 10
+; TRAP-BIT-GISEL-NEXT:     workitem_vgpr_count = 1
+; TRAP-BIT-GISEL-NEXT:     reserved_vgpr_first = 0
+; TRAP-BIT-GISEL-NEXT:     reserved_vgpr_count = 0
+; TRAP-BIT-GISEL-NEXT:     reserved_sgpr_first = 0
+; TRAP-BIT-GISEL-NEXT:     reserved_sgpr_count = 0
+; TRAP-BIT-GISEL-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; TRAP-BIT-GISEL-NEXT:     debug_private_segment_buffer_sgpr = 0
+; TRAP-BIT-GISEL-NEXT:     kernarg_segment_alignment = 4
+; TRAP-BIT-GISEL-NEXT:     group_segment_alignment = 4
+; TRAP-BIT-GISEL-NEXT:     private_segment_alignment = 4
+; TRAP-BIT-GISEL-NEXT:     wavefront_size = 6
+; TRAP-BIT-GISEL-NEXT:     call_convention = -1
+; TRAP-BIT-GISEL-NEXT:     runtime_loader_kernel_symbol = 0
+; TRAP-BIT-GISEL-NEXT:    .end_amd_kernel_code_t
+; TRAP-BIT-GISEL-NEXT:  ; %bb.0:
+; TRAP-BIT-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; TRAP-BIT-GISEL-NEXT:    v_mov_b32_e32 v0, 1
+; TRAP-BIT-GISEL-NEXT:    s_mov_b32 s2, -1
+; TRAP-BIT-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; TRAP-BIT-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; TRAP-BIT-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; TRAP-BIT-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; TRAP-BIT-GISEL-NEXT:    s_endpgm
+;
+; NO-TRAP-BIT-SDAG-LABEL: trap:
+; NO-TRAP-BIT-SDAG:         .amd_kernel_code_t
+; NO-TRAP-BIT-SDAG-NEXT:     amd_code_version_major = 1
+; NO-TRAP-BIT-SDAG-NEXT:     amd_code_version_minor = 2
+; NO-TRAP-BIT-SDAG-NEXT:     amd_machine_kind = 1
+; NO-TRAP-BIT-SDAG-NEXT:     amd_machine_version_major = 6
+; NO-TRAP-BIT-SDAG-NEXT:     amd_machine_version_minor = 0
+; NO-TRAP-BIT-SDAG-NEXT:     amd_machine_version_stepping = 0
+; NO-TRAP-BIT-SDAG-NEXT:     kernel_code_entry_byte_offset = 256
+; NO-TRAP-BIT-SDAG-NEXT:     kernel_code_prefetch_byte_size = 0
+; NO-TRAP-BIT-SDAG-NEXT:     granulated_workitem_vgpr_count = 0
+; NO-TRAP-BIT-SDAG-NEXT:     granulated_wavefront_sgpr_count = 1
+; NO-TRAP-BIT-SDAG-NEXT:     priority = 0
+; NO-TRAP-BIT-SDAG-NEXT:     float_mode = 240
+; NO-TRAP-BIT-SDAG-NEXT:     priv = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_dx10_clamp = 1
+; NO-TRAP-BIT-SDAG-NEXT:     debug_mode = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_ieee_mode = 1
+; NO-TRAP-BIT-SDAG-NEXT:     enable_wgp_mode = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_mem_ordered = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_fwd_progress = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; NO-TRAP-BIT-SDAG-NEXT:     user_sgpr_count = 12
+; NO-TRAP-BIT-SDAG-NEXT:     enable_trap_handler = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_workgroup_id_x = 1
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_workgroup_id_y = 1
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_workgroup_id_z = 1
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_workgroup_info = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_vgpr_workitem_id = 2
+; NO-TRAP-BIT-SDAG-NEXT:     enable_exception_msb = 0
+; NO-TRAP-BIT-SDAG-NEXT:     granulated_lds_size = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_exception = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_private_segment_buffer = 1
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_dispatch_ptr = 1
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_queue_ptr = 1
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_dispatch_id = 1
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_flat_scratch_init = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_private_segment_size = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_wavefront_size32 = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_ordered_append_gds = 0
+; NO-TRAP-BIT-SDAG-NEXT:     private_element_size = 1
+; NO-TRAP-BIT-SDAG-NEXT:     is_ptr64 = 1
+; NO-TRAP-BIT-SDAG-NEXT:     is_dynamic_callstack = 0
+; NO-TRAP-BIT-SDAG-NEXT:     is_debug_enabled = 0
+; NO-TRAP-BIT-SDAG-NEXT:     is_xnack_enabled = 0
+; NO-TRAP-BIT-SDAG-NEXT:     workitem_private_segment_byte_size = 0
+; NO-TRAP-BIT-SDAG-NEXT:     workgroup_group_segment_byte_size = 0
+; NO-TRAP-BIT-SDAG-NEXT:     gds_segment_byte_size = 0
+; NO-TRAP-BIT-SDAG-NEXT:     kernarg_segment_byte_size = 24
+; NO-TRAP-BIT-SDAG-NEXT:     workgroup_fbarrier_count = 0
+; NO-TRAP-BIT-SDAG-NEXT:     wavefront_sgpr_count = 10
+; NO-TRAP-BIT-SDAG-NEXT:     workitem_vgpr_count = 1
+; NO-TRAP-BIT-SDAG-NEXT:     reserved_vgpr_first = 0
+; NO-TRAP-BIT-SDAG-NEXT:     reserved_vgpr_count = 0
+; NO-TRAP-BIT-SDAG-NEXT:     reserved_sgpr_first = 0
+; NO-TRAP-BIT-SDAG-NEXT:     reserved_sgpr_count = 0
+; NO-TRAP-BIT-SDAG-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; NO-TRAP-BIT-SDAG-NEXT:     debug_private_segment_buffer_sgpr = 0
+; NO-TRAP-BIT-SDAG-NEXT:     kernarg_segment_alignment = 4
+; NO-TRAP-BIT-SDAG-NEXT:     group_segment_alignment = 4
+; NO-TRAP-BIT-SDAG-NEXT:     private_segment_alignment = 4
+; NO-TRAP-BIT-SDAG-NEXT:     wavefront_size = 6
+; NO-TRAP-BIT-SDAG-NEXT:     call_convention = -1
+; NO-TRAP-BIT-SDAG-NEXT:     runtime_loader_kernel_symbol = 0
+; NO-TRAP-BIT-SDAG-NEXT:    .end_amd_kernel_code_t
+; NO-TRAP-BIT-SDAG-NEXT:  ; %bb.0:
+; NO-TRAP-BIT-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; NO-TRAP-BIT-SDAG-NEXT:    s_mov_b32 s3, 0xf000
+; NO-TRAP-BIT-SDAG-NEXT:    s_mov_b32 s2, -1
+; NO-TRAP-BIT-SDAG-NEXT:    v_mov_b32_e32 v0, 1
+; NO-TRAP-BIT-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; NO-TRAP-BIT-SDAG-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; NO-TRAP-BIT-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; NO-TRAP-BIT-SDAG-NEXT:    s_endpgm
+;
+; NO-TRAP-BIT-GISEL-LABEL: trap:
+; NO-TRAP-BIT-GISEL:         .amd_kernel_code_t
+; NO-TRAP-BIT-GISEL-NEXT:     amd_code_version_major = 1
+; NO-TRAP-BIT-GISEL-NEXT:     amd_code_version_minor = 2
+; NO-TRAP-BIT-GISEL-NEXT:     amd_machine_kind = 1
+; NO-TRAP-BIT-GISEL-NEXT:     amd_machine_version_major = 6
+; NO-TRAP-BIT-GISEL-NEXT:     amd_machine_version_minor = 0
+; NO-TRAP-BIT-GISEL-NEXT:     amd_machine_version_stepping = 0
+; NO-TRAP-BIT-GISEL-NEXT:     kernel_code_entry_byte_offset = 256
+; NO-TRAP-BIT-GISEL-NEXT:     kernel_code_prefetch_byte_size = 0
+; NO-TRAP-BIT-GISEL-NEXT:     granulated_workitem_vgpr_count = 0
+; NO-TRAP-BIT-GISEL-NEXT:     granulated_wavefront_sgpr_count = 1
+; NO-TRAP-BIT-GISEL-NEXT:     priority = 0
+; NO-TRAP-BIT-GISEL-NEXT:     float_mode = 240
+; NO-TRAP-BIT-GISEL-NEXT:     priv = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_dx10_clamp = 1
+; NO-TRAP-BIT-GISEL-NEXT:     debug_mode = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_ieee_mode = 1
+; NO-TRAP-BIT-GISEL-NEXT:     enable_wgp_mode = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_mem_ordered = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_fwd_progress = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; NO-TRAP-BIT-GISEL-NEXT:     user_sgpr_count = 12
+; NO-TRAP-BIT-GISEL-NEXT:     enable_trap_handler = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_workgroup_id_x = 1
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_workgroup_id_y = 1
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_workgroup_id_z = 1
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_workgroup_info = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_vgpr_workitem_id = 2
+; NO-TRAP-BIT-GISEL-NEXT:     enable_exception_msb = 0
+; NO-TRAP-BIT-GISEL-NEXT:     granulated_lds_size = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_exception = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_private_segment_buffer = 1
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_dispatch_ptr = 1
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_queue_ptr = 1
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_dispatch_id = 1
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_flat_scratch_init = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_private_segment_size = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_wavefront_size32 = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_ordered_append_gds = 0
+; NO-TRAP-BIT-GISEL-NEXT:     private_element_size = 1
+; NO-TRAP-BIT-GISEL-NEXT:     is_ptr64 = 1
+; NO-TRAP-BIT-GISEL-NEXT:     is_dynamic_callstack = 0
+; NO-TRAP-BIT-GISEL-NEXT:     is_debug_enabled = 0
+; NO-TRAP-BIT-GISEL-NEXT:     is_xnack_enabled = 0
+; NO-TRAP-BIT-GISEL-NEXT:     workitem_private_segment_byte_size = 0
+; NO-TRAP-BIT-GISEL-NEXT:     workgroup_group_segment_byte_size = 0
+; NO-TRAP-BIT-GISEL-NEXT:     gds_segment_byte_size = 0
+; NO-TRAP-BIT-GISEL-NEXT:     kernarg_segment_byte_size = 24
+; NO-TRAP-BIT-GISEL-NEXT:     workgroup_fbarrier_count = 0
+; NO-TRAP-BIT-GISEL-NEXT:     wavefront_sgpr_count = 10
+; NO-TRAP-BIT-GISEL-NEXT:     workitem_vgpr_count = 1
+; NO-TRAP-BIT-GISEL-NEXT:     reserved_vgpr_first = 0
+; NO-TRAP-BIT-GISEL-NEXT:     reserved_vgpr_count = 0
+; NO-TRAP-BIT-GISEL-NEXT:     reserved_sgpr_first = 0
+; NO-TRAP-BIT-GISEL-NEXT:     reserved_sgpr_count = 0
+; NO-TRAP-BIT-GISEL-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; NO-TRAP-BIT-GISEL-NEXT:     debug_private_segment_buffer_sgpr = 0
+; NO-TRAP-BIT-GISEL-NEXT:     kernarg_segment_alignment = 4
+; NO-TRAP-BIT-GISEL-NEXT:     group_segment_alignment = 4
+; NO-TRAP-BIT-GISEL-NEXT:     private_segment_alignment = 4
+; NO-TRAP-BIT-GISEL-NEXT:     wavefront_size = 6
+; NO-TRAP-BIT-GISEL-NEXT:     call_convention = -1
+; NO-TRAP-BIT-GISEL-NEXT:     runtime_loader_kernel_symbol = 0
+; NO-TRAP-BIT-GISEL-NEXT:    .end_amd_kernel_code_t
+; NO-TRAP-BIT-GISEL-NEXT:  ; %bb.0:
+; NO-TRAP-BIT-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; NO-TRAP-BIT-GISEL-NEXT:    v_mov_b32_e32 v0, 1
+; NO-TRAP-BIT-GISEL-NEXT:    s_mov_b32 s2, -1
+; NO-TRAP-BIT-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; NO-TRAP-BIT-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; NO-TRAP-BIT-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; NO-TRAP-BIT-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; NO-TRAP-BIT-GISEL-NEXT:    s_endpgm
+;
+; GCN-WARNING-SDAG-LABEL: trap:
+; GCN-WARNING-SDAG:       ; %bb.0:
+; GCN-WARNING-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-WARNING-SDAG-NEXT:    s_mov_b32 s3, 0xf000
+; GCN-WARNING-SDAG-NEXT:    s_mov_b32 s2, -1
+; GCN-WARNING-SDAG-NEXT:    v_mov_b32_e32 v0, 1
+; GCN-WARNING-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-WARNING-SDAG-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GCN-WARNING-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GCN-WARNING-SDAG-NEXT:    s_endpgm
+;
+; GCN-WARNING-GISEL-LABEL: trap:
+; GCN-WARNING-GISEL:       ; %bb.0:
+; GCN-WARNING-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-WARNING-GISEL-NEXT:    v_mov_b32_e32 v0, 1
+; GCN-WARNING-GISEL-NEXT:    s_mov_b32 s2, -1
+; GCN-WARNING-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; GCN-WARNING-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-WARNING-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GCN-WARNING-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GCN-WARNING-GISEL-NEXT:    s_endpgm
   store volatile i32 1, ptr addrspace(1) %arg0
   call void @llvm.trap()
   unreachable
@@ -97,14 +1356,532 @@ define amdgpu_kernel void @trap(ptr addrspace(1) nocapture readonly %arg0) {
   ret void
 }
 
-; GCN-LABEL: {{^}}non_entry_trap:
-; TRAP-BIT: enable_trap_handler = 1
-; NO-TRAP-BIT: enable_trap_handler = 0
-
-; HSA-TRAP: BB{{[0-9]_[0-9]+}}: ; %trap
-; HSA-TRAP: s_mov_b64 s[0:1], s[6:7]
-; HSA-TRAP-NEXT: s_trap 2
 define amdgpu_kernel void @non_entry_trap(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr {
+; HSA-TRAP-SDAG-LABEL: non_entry_trap:
+; HSA-TRAP-SDAG:       ; %bb.0: ; %entry
+; HSA-TRAP-SDAG-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; HSA-TRAP-SDAG-NEXT:    s_add_i32 s12, s12, s17
+; HSA-TRAP-SDAG-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; HSA-TRAP-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; HSA-TRAP-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; HSA-TRAP-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; HSA-TRAP-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; HSA-TRAP-SDAG-NEXT:    flat_load_dword v0, v[0:1] glc
+; HSA-TRAP-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; HSA-TRAP-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, -1, v0
+; HSA-TRAP-SDAG-NEXT:    s_cbranch_vccz .LBB3_2
+; HSA-TRAP-SDAG-NEXT:  ; %bb.1: ; %ret
+; HSA-TRAP-SDAG-NEXT:    v_mov_b32_e32 v2, 3
+; HSA-TRAP-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; HSA-TRAP-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; HSA-TRAP-SDAG-NEXT:    flat_store_dword v[0:1], v2
+; HSA-TRAP-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; HSA-TRAP-SDAG-NEXT:    s_endpgm
+; HSA-TRAP-SDAG-NEXT:  .LBB3_2: ; %trap
+; HSA-TRAP-SDAG-NEXT:    s_mov_b64 s[0:1], s[6:7]
+; HSA-TRAP-SDAG-NEXT:    s_trap 2
+;
+; HSA-TRAP-GISEL-LABEL: non_entry_trap:
+; HSA-TRAP-GISEL:       ; %bb.0: ; %entry
+; HSA-TRAP-GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; HSA-TRAP-GISEL-NEXT:    s_add_i32 s12, s12, s17
+; HSA-TRAP-GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; HSA-TRAP-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; HSA-TRAP-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; HSA-TRAP-GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; HSA-TRAP-GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; HSA-TRAP-GISEL-NEXT:    flat_load_dword v0, v[0:1] glc
+; HSA-TRAP-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; HSA-TRAP-GISEL-NEXT:    v_readfirstlane_b32 s2, v0
+; HSA-TRAP-GISEL-NEXT:    s_cmp_eq_u32 s2, -1
+; HSA-TRAP-GISEL-NEXT:    s_cbranch_scc0 .LBB3_2
+; HSA-TRAP-GISEL-NEXT:  ; %bb.1: ; %ret
+; HSA-TRAP-GISEL-NEXT:    v_mov_b32_e32 v2, 3
+; HSA-TRAP-GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; HSA-TRAP-GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; HSA-TRAP-GISEL-NEXT:    flat_store_dword v[0:1], v2
+; HSA-TRAP-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; HSA-TRAP-GISEL-NEXT:    s_endpgm
+; HSA-TRAP-GISEL-NEXT:  .LBB3_2: ; %trap
+; HSA-TRAP-GISEL-NEXT:    s_mov_b64 s[0:1], s[6:7]
+; HSA-TRAP-GISEL-NEXT:    s_trap 2
+;
+; NO-HSA-TRAP-SDAG-LABEL: non_entry_trap:
+; NO-HSA-TRAP-SDAG:       ; %bb.0: ; %entry
+; NO-HSA-TRAP-SDAG-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; NO-HSA-TRAP-SDAG-NEXT:    s_add_i32 s12, s12, s17
+; NO-HSA-TRAP-SDAG-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; NO-HSA-TRAP-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; NO-HSA-TRAP-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; NO-HSA-TRAP-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; NO-HSA-TRAP-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; NO-HSA-TRAP-SDAG-NEXT:    flat_load_dword v0, v[0:1] glc
+; NO-HSA-TRAP-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; NO-HSA-TRAP-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, -1, v0
+; NO-HSA-TRAP-SDAG-NEXT:    s_cbranch_vccz .LBB3_2
+; NO-HSA-TRAP-SDAG-NEXT:  ; %bb.1: ; %ret
+; NO-HSA-TRAP-SDAG-NEXT:    v_mov_b32_e32 v2, 3
+; NO-HSA-TRAP-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; NO-HSA-TRAP-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; NO-HSA-TRAP-SDAG-NEXT:    flat_store_dword v[0:1], v2
+; NO-HSA-TRAP-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; NO-HSA-TRAP-SDAG-NEXT:    s_endpgm
+; NO-HSA-TRAP-SDAG-NEXT:  .LBB3_2: ; %trap
+; NO-HSA-TRAP-SDAG-NEXT:    s_endpgm
+;
+; NO-HSA-TRAP-GISEL-LABEL: non_entry_trap:
+; NO-HSA-TRAP-GISEL:       ; %bb.0: ; %entry
+; NO-HSA-TRAP-GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; NO-HSA-TRAP-GISEL-NEXT:    s_add_i32 s12, s12, s17
+; NO-HSA-TRAP-GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; NO-HSA-TRAP-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; NO-HSA-TRAP-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; NO-HSA-TRAP-GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; NO-HSA-TRAP-GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; NO-HSA-TRAP-GISEL-NEXT:    flat_load_dword v0, v[0:1] glc
+; NO-HSA-TRAP-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; NO-HSA-TRAP-GISEL-NEXT:    v_readfirstlane_b32 s2, v0
+; NO-HSA-TRAP-GISEL-NEXT:    s_cmp_eq_u32 s2, -1
+; NO-HSA-TRAP-GISEL-NEXT:    s_cbranch_scc0 .LBB3_2
+; NO-HSA-TRAP-GISEL-NEXT:  ; %bb.1: ; %ret
+; NO-HSA-TRAP-GISEL-NEXT:    v_mov_b32_e32 v2, 3
+; NO-HSA-TRAP-GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; NO-HSA-TRAP-GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; NO-HSA-TRAP-GISEL-NEXT:    flat_store_dword v[0:1], v2
+; NO-HSA-TRAP-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; NO-HSA-TRAP-GISEL-NEXT:    s_endpgm
+; NO-HSA-TRAP-GISEL-NEXT:  .LBB3_2: ; %trap
+; NO-HSA-TRAP-GISEL-NEXT:    s_endpgm
+;
+; HSA-WARNING-SDAG-LABEL: non_entry_trap:
+; HSA-WARNING-SDAG:       ; %bb.0: ; %entry
+; HSA-WARNING-SDAG-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; HSA-WARNING-SDAG-NEXT:    s_add_i32 s12, s12, s17
+; HSA-WARNING-SDAG-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; HSA-WARNING-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; HSA-WARNING-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; HSA-WARNING-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; HSA-WARNING-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; HSA-WARNING-SDAG-NEXT:    flat_load_dword v0, v[0:1] glc
+; HSA-WARNING-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; HSA-WARNING-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, -1, v0
+; HSA-WARNING-SDAG-NEXT:    s_cbranch_vccz .LBB3_2
+; HSA-WARNING-SDAG-NEXT:  ; %bb.1: ; %ret
+; HSA-WARNING-SDAG-NEXT:    v_mov_b32_e32 v2, 3
+; HSA-WARNING-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; HSA-WARNING-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; HSA-WARNING-SDAG-NEXT:    flat_store_dword v[0:1], v2
+; HSA-WARNING-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; HSA-WARNING-SDAG-NEXT:    s_endpgm
+; HSA-WARNING-SDAG-NEXT:  .LBB3_2: ; %trap
+; HSA-WARNING-SDAG-NEXT:    s_endpgm
+;
+; HSA-WARNING-GISEL-LABEL: non_entry_trap:
+; HSA-WARNING-GISEL:       ; %bb.0: ; %entry
+; HSA-WARNING-GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; HSA-WARNING-GISEL-NEXT:    s_add_i32 s12, s12, s17
+; HSA-WARNING-GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; HSA-WARNING-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; HSA-WARNING-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; HSA-WARNING-GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; HSA-WARNING-GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; HSA-WARNING-GISEL-NEXT:    flat_load_dword v0, v[0:1] glc
+; HSA-WARNING-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; HSA-WARNING-GISEL-NEXT:    v_readfirstlane_b32 s2, v0
+; HSA-WARNING-GISEL-NEXT:    s_cmp_eq_u32 s2, -1
+; HSA-WARNING-GISEL-NEXT:    s_cbranch_scc0 .LBB3_2
+; HSA-WARNING-GISEL-NEXT:  ; %bb.1: ; %ret
+; HSA-WARNING-GISEL-NEXT:    v_mov_b32_e32 v2, 3
+; HSA-WARNING-GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; HSA-WARNING-GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; HSA-WARNING-GISEL-NEXT:    flat_store_dword v[0:1], v2
+; HSA-WARNING-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; HSA-WARNING-GISEL-NEXT:    s_endpgm
+; HSA-WARNING-GISEL-NEXT:  .LBB3_2: ; %trap
+; HSA-WARNING-GISEL-NEXT:    s_endpgm
+;
+; TRAP-BIT-SDAG-LABEL: non_entry_trap:
+; TRAP-BIT-SDAG:         .amd_kernel_code_t
+; TRAP-BIT-SDAG-NEXT:     amd_code_version_major = 1
+; TRAP-BIT-SDAG-NEXT:     amd_code_version_minor = 2
+; TRAP-BIT-SDAG-NEXT:     amd_machine_kind = 1
+; TRAP-BIT-SDAG-NEXT:     amd_machine_version_major = 6
+; TRAP-BIT-SDAG-NEXT:     amd_machine_version_minor = 0
+; TRAP-BIT-SDAG-NEXT:     amd_machine_version_stepping = 0
+; TRAP-BIT-SDAG-NEXT:     kernel_code_entry_byte_offset = 256
+; TRAP-BIT-SDAG-NEXT:     kernel_code_prefetch_byte_size = 0
+; TRAP-BIT-SDAG-NEXT:     granulated_workitem_vgpr_count = 0
+; TRAP-BIT-SDAG-NEXT:     granulated_wavefront_sgpr_count = 1
+; TRAP-BIT-SDAG-NEXT:     priority = 0
+; TRAP-BIT-SDAG-NEXT:     float_mode = 240
+; TRAP-BIT-SDAG-NEXT:     priv = 0
+; TRAP-BIT-SDAG-NEXT:     enable_dx10_clamp = 1
+; TRAP-BIT-SDAG-NEXT:     debug_mode = 0
+; TRAP-BIT-SDAG-NEXT:     enable_ieee_mode = 1
+; TRAP-BIT-SDAG-NEXT:     enable_wgp_mode = 0
+; TRAP-BIT-SDAG-NEXT:     enable_mem_ordered = 0
+; TRAP-BIT-SDAG-NEXT:     enable_fwd_progress = 0
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; TRAP-BIT-SDAG-NEXT:     user_sgpr_count = 12
+; TRAP-BIT-SDAG-NEXT:     enable_trap_handler = 1
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_workgroup_id_x = 1
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_workgroup_id_y = 1
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_workgroup_id_z = 1
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_workgroup_info = 0
+; TRAP-BIT-SDAG-NEXT:     enable_vgpr_workitem_id = 2
+; TRAP-BIT-SDAG-NEXT:     enable_exception_msb = 0
+; TRAP-BIT-SDAG-NEXT:     granulated_lds_size = 0
+; TRAP-BIT-SDAG-NEXT:     enable_exception = 0
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_private_segment_buffer = 1
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_dispatch_ptr = 1
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_queue_ptr = 1
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_dispatch_id = 1
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_flat_scratch_init = 0
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_private_segment_size = 0
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; TRAP-BIT-SDAG-NEXT:     enable_wavefront_size32 = 0
+; TRAP-BIT-SDAG-NEXT:     enable_ordered_append_gds = 0
+; TRAP-BIT-SDAG-NEXT:     private_element_size = 1
+; TRAP-BIT-SDAG-NEXT:     is_ptr64 = 1
+; TRAP-BIT-SDAG-NEXT:     is_dynamic_callstack = 0
+; TRAP-BIT-SDAG-NEXT:     is_debug_enabled = 0
+; TRAP-BIT-SDAG-NEXT:     is_xnack_enabled = 0
+; TRAP-BIT-SDAG-NEXT:     workitem_private_segment_byte_size = 0
+; TRAP-BIT-SDAG-NEXT:     workgroup_group_segment_byte_size = 0
+; TRAP-BIT-SDAG-NEXT:     gds_segment_byte_size = 0
+; TRAP-BIT-SDAG-NEXT:     kernarg_segment_byte_size = 24
+; TRAP-BIT-SDAG-NEXT:     workgroup_fbarrier_count = 0
+; TRAP-BIT-SDAG-NEXT:     wavefront_sgpr_count = 12
+; TRAP-BIT-SDAG-NEXT:     workitem_vgpr_count = 1
+; TRAP-BIT-SDAG-NEXT:     reserved_vgpr_first = 0
+; TRAP-BIT-SDAG-NEXT:     reserved_vgpr_count = 0
+; TRAP-BIT-SDAG-NEXT:     reserved_sgpr_first = 0
+; TRAP-BIT-SDAG-NEXT:     reserved_sgpr_count = 0
+; TRAP-BIT-SDAG-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; TRAP-BIT-SDAG-NEXT:     debug_private_segment_buffer_sgpr = 0
+; TRAP-BIT-SDAG-NEXT:     kernarg_segment_alignment = 4
+; TRAP-BIT-SDAG-NEXT:     group_segment_alignment = 4
+; TRAP-BIT-SDAG-NEXT:     private_segment_alignment = 4
+; TRAP-BIT-SDAG-NEXT:     wavefront_size = 6
+; TRAP-BIT-SDAG-NEXT:     call_convention = -1
+; TRAP-BIT-SDAG-NEXT:     runtime_loader_kernel_symbol = 0
+; TRAP-BIT-SDAG-NEXT:    .end_amd_kernel_code_t
+; TRAP-BIT-SDAG-NEXT:  ; %bb.0: ; %entry
+; TRAP-BIT-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; TRAP-BIT-SDAG-NEXT:    s_mov_b32 s3, 0xf000
+; TRAP-BIT-SDAG-NEXT:    s_mov_b32 s2, -1
+; TRAP-BIT-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; TRAP-BIT-SDAG-NEXT:    buffer_load_dword v0, off, s[0:3], 0 glc
+; TRAP-BIT-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; TRAP-BIT-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, -1, v0
+; TRAP-BIT-SDAG-NEXT:    s_cbranch_vccz .LBB3_2
+; TRAP-BIT-SDAG-NEXT:  ; %bb.1: ; %ret
+; TRAP-BIT-SDAG-NEXT:    v_mov_b32_e32 v0, 3
+; TRAP-BIT-SDAG-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; TRAP-BIT-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; TRAP-BIT-SDAG-NEXT:    s_endpgm
+; TRAP-BIT-SDAG-NEXT:  .LBB3_2: ; %trap
+; TRAP-BIT-SDAG-NEXT:    s_endpgm
+;
+; TRAP-BIT-GISEL-LABEL: non_entry_trap:
+; TRAP-BIT-GISEL:         .amd_kernel_code_t
+; TRAP-BIT-GISEL-NEXT:     amd_code_version_major = 1
+; TRAP-BIT-GISEL-NEXT:     amd_code_version_minor = 2
+; TRAP-BIT-GISEL-NEXT:     amd_machine_kind = 1
+; TRAP-BIT-GISEL-NEXT:     amd_machine_version_major = 6
+; TRAP-BIT-GISEL-NEXT:     amd_machine_version_minor = 0
+; TRAP-BIT-GISEL-NEXT:     amd_machine_version_stepping = 0
+; TRAP-BIT-GISEL-NEXT:     kernel_code_entry_byte_offset = 256
+; TRAP-BIT-GISEL-NEXT:     kernel_code_prefetch_byte_size = 0
+; TRAP-BIT-GISEL-NEXT:     granulated_workitem_vgpr_count = 0
+; TRAP-BIT-GISEL-NEXT:     granulated_wavefront_sgpr_count = 1
+; TRAP-BIT-GISEL-NEXT:     priority = 0
+; TRAP-BIT-GISEL-NEXT:     float_mode = 240
+; TRAP-BIT-GISEL-NEXT:     priv = 0
+; TRAP-BIT-GISEL-NEXT:     enable_dx10_clamp = 1
+; TRAP-BIT-GISEL-NEXT:     debug_mode = 0
+; TRAP-BIT-GISEL-NEXT:     enable_ieee_mode = 1
+; TRAP-BIT-GISEL-NEXT:     enable_wgp_mode = 0
+; TRAP-BIT-GISEL-NEXT:     enable_mem_ordered = 0
+; TRAP-BIT-GISEL-NEXT:     enable_fwd_progress = 0
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; TRAP-BIT-GISEL-NEXT:     user_sgpr_count = 12
+; TRAP-BIT-GISEL-NEXT:     enable_trap_handler = 1
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_workgroup_id_x = 1
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_workgroup_id_y = 1
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_workgroup_id_z = 1
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_workgroup_info = 0
+; TRAP-BIT-GISEL-NEXT:     enable_vgpr_workitem_id = 2
+; TRAP-BIT-GISEL-NEXT:     enable_exception_msb = 0
+; TRAP-BIT-GISEL-NEXT:     granulated_lds_size = 0
+; TRAP-BIT-GISEL-NEXT:     enable_exception = 0
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_private_segment_buffer = 1
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_dispatch_ptr = 1
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_queue_ptr = 1
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_dispatch_id = 1
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_flat_scratch_init = 0
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_private_segment_size = 0
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; TRAP-BIT-GISEL-NEXT:     enable_wavefront_size32 = 0
+; TRAP-BIT-GISEL-NEXT:     enable_ordered_append_gds = 0
+; TRAP-BIT-GISEL-NEXT:     private_element_size = 1
+; TRAP-BIT-GISEL-NEXT:     is_ptr64 = 1
+; TRAP-BIT-GISEL-NEXT:     is_dynamic_callstack = 0
+; TRAP-BIT-GISEL-NEXT:     is_debug_enabled = 0
+; TRAP-BIT-GISEL-NEXT:     is_xnack_enabled = 0
+; TRAP-BIT-GISEL-NEXT:     workitem_private_segment_byte_size = 0
+; TRAP-BIT-GISEL-NEXT:     workgroup_group_segment_byte_size = 0
+; TRAP-BIT-GISEL-NEXT:     gds_segment_byte_size = 0
+; TRAP-BIT-GISEL-NEXT:     kernarg_segment_byte_size = 24
+; TRAP-BIT-GISEL-NEXT:     workgroup_fbarrier_count = 0
+; TRAP-BIT-GISEL-NEXT:     wavefront_sgpr_count = 10
+; TRAP-BIT-GISEL-NEXT:     workitem_vgpr_count = 1
+; TRAP-BIT-GISEL-NEXT:     reserved_vgpr_first = 0
+; TRAP-BIT-GISEL-NEXT:     reserved_vgpr_count = 0
+; TRAP-BIT-GISEL-NEXT:     reserved_sgpr_first = 0
+; TRAP-BIT-GISEL-NEXT:     reserved_sgpr_count = 0
+; TRAP-BIT-GISEL-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; TRAP-BIT-GISEL-NEXT:     debug_private_segment_buffer_sgpr = 0
+; TRAP-BIT-GISEL-NEXT:     kernarg_segment_alignment = 4
+; TRAP-BIT-GISEL-NEXT:     group_segment_alignment = 4
+; TRAP-BIT-GISEL-NEXT:     private_segment_alignment = 4
+; TRAP-BIT-GISEL-NEXT:     wavefront_size = 6
+; TRAP-BIT-GISEL-NEXT:     call_convention = -1
+; TRAP-BIT-GISEL-NEXT:     runtime_loader_kernel_symbol = 0
+; TRAP-BIT-GISEL-NEXT:    .end_amd_kernel_code_t
+; TRAP-BIT-GISEL-NEXT:  ; %bb.0: ; %entry
+; TRAP-BIT-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; TRAP-BIT-GISEL-NEXT:    s_mov_b32 s2, -1
+; TRAP-BIT-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; TRAP-BIT-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; TRAP-BIT-GISEL-NEXT:    buffer_load_dword v0, off, s[0:3], 0 glc
+; TRAP-BIT-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; TRAP-BIT-GISEL-NEXT:    v_readfirstlane_b32 s4, v0
+; TRAP-BIT-GISEL-NEXT:    s_cmp_eq_u32 s4, -1
+; TRAP-BIT-GISEL-NEXT:    s_cbranch_scc0 .LBB3_2
+; TRAP-BIT-GISEL-NEXT:  ; %bb.1: ; %ret
+; TRAP-BIT-GISEL-NEXT:    v_mov_b32_e32 v0, 3
+; TRAP-BIT-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; TRAP-BIT-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; TRAP-BIT-GISEL-NEXT:    s_endpgm
+; TRAP-BIT-GISEL-NEXT:  .LBB3_2: ; %trap
+; TRAP-BIT-GISEL-NEXT:    s_endpgm
+;
+; NO-TRAP-BIT-SDAG-LABEL: non_entry_trap:
+; NO-TRAP-BIT-SDAG:         .amd_kernel_code_t
+; NO-TRAP-BIT-SDAG-NEXT:     amd_code_version_major = 1
+; NO-TRAP-BIT-SDAG-NEXT:     amd_code_version_minor = 2
+; NO-TRAP-BIT-SDAG-NEXT:     amd_machine_kind = 1
+; NO-TRAP-BIT-SDAG-NEXT:     amd_machine_version_major = 6
+; NO-TRAP-BIT-SDAG-NEXT:     amd_machine_version_minor = 0
+; NO-TRAP-BIT-SDAG-NEXT:     amd_machine_version_stepping = 0
+; NO-TRAP-BIT-SDAG-NEXT:     kernel_code_entry_byte_offset = 256
+; NO-TRAP-BIT-SDAG-NEXT:     kernel_code_prefetch_byte_size = 0
+; NO-TRAP-BIT-SDAG-NEXT:     granulated_workitem_vgpr_count = 0
+; NO-TRAP-BIT-SDAG-NEXT:     granulated_wavefront_sgpr_count = 1
+; NO-TRAP-BIT-SDAG-NEXT:     priority = 0
+; NO-TRAP-BIT-SDAG-NEXT:     float_mode = 240
+; NO-TRAP-BIT-SDAG-NEXT:     priv = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_dx10_clamp = 1
+; NO-TRAP-BIT-SDAG-NEXT:     debug_mode = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_ieee_mode = 1
+; NO-TRAP-BIT-SDAG-NEXT:     enable_wgp_mode = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_mem_ordered = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_fwd_progress = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; NO-TRAP-BIT-SDAG-NEXT:     user_sgpr_count = 12
+; NO-TRAP-BIT-SDAG-NEXT:     enable_trap_handler = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_workgroup_id_x = 1
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_workgroup_id_y = 1
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_workgroup_id_z = 1
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_workgroup_info = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_vgpr_workitem_id = 2
+; NO-TRAP-BIT-SDAG-NEXT:     enable_exception_msb = 0
+; NO-TRAP-BIT-SDAG-NEXT:     granulated_lds_size = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_exception = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_private_segment_buffer = 1
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_dispatch_ptr = 1
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_queue_ptr = 1
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_dispatch_id = 1
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_flat_scratch_init = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_private_segment_size = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_wavefront_size32 = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_ordered_append_gds = 0
+; NO-TRAP-BIT-SDAG-NEXT:     private_element_size = 1
+; NO-TRAP-BIT-SDAG-NEXT:     is_ptr64 = 1
+; NO-TRAP-BIT-SDAG-NEXT:     is_dynamic_callstack = 0
+; NO-TRAP-BIT-SDAG-NEXT:     is_debug_enabled = 0
+; NO-TRAP-BIT-SDAG-NEXT:     is_xnack_enabled = 0
+; NO-TRAP-BIT-SDAG-NEXT:     workitem_private_segment_byte_size = 0
+; NO-TRAP-BIT-SDAG-NEXT:     workgroup_group_segment_byte_size = 0
+; NO-TRAP-BIT-SDAG-NEXT:     gds_segment_byte_size = 0
+; NO-TRAP-BIT-SDAG-NEXT:     kernarg_segment_byte_size = 24
+; NO-TRAP-BIT-SDAG-NEXT:     workgroup_fbarrier_count = 0
+; NO-TRAP-BIT-SDAG-NEXT:     wavefront_sgpr_count = 12
+; NO-TRAP-BIT-SDAG-NEXT:     workitem_vgpr_count = 1
+; NO-TRAP-BIT-SDAG-NEXT:     reserved_vgpr_first = 0
+; NO-TRAP-BIT-SDAG-NEXT:     reserved_vgpr_count = 0
+; NO-TRAP-BIT-SDAG-NEXT:     reserved_sgpr_first = 0
+; NO-TRAP-BIT-SDAG-NEXT:     reserved_sgpr_count = 0
+; NO-TRAP-BIT-SDAG-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; NO-TRAP-BIT-SDAG-NEXT:     debug_private_segment_buffer_sgpr = 0
+; NO-TRAP-BIT-SDAG-NEXT:     kernarg_segment_alignment = 4
+; NO-TRAP-BIT-SDAG-NEXT:     group_segment_alignment = 4
+; NO-TRAP-BIT-SDAG-NEXT:     private_segment_alignment = 4
+; NO-TRAP-BIT-SDAG-NEXT:     wavefront_size = 6
+; NO-TRAP-BIT-SDAG-NEXT:     call_convention = -1
+; NO-TRAP-BIT-SDAG-NEXT:     runtime_loader_kernel_symbol = 0
+; NO-TRAP-BIT-SDAG-NEXT:    .end_amd_kernel_code_t
+; NO-TRAP-BIT-SDAG-NEXT:  ; %bb.0: ; %entry
+; NO-TRAP-BIT-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; NO-TRAP-BIT-SDAG-NEXT:    s_mov_b32 s3, 0xf000
+; NO-TRAP-BIT-SDAG-NEXT:    s_mov_b32 s2, -1
+; NO-TRAP-BIT-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; NO-TRAP-BIT-SDAG-NEXT:    buffer_load_dword v0, off, s[0:3], 0 glc
+; NO-TRAP-BIT-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; NO-TRAP-BIT-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, -1, v0
+; NO-TRAP-BIT-SDAG-NEXT:    s_cbranch_vccz .LBB3_2
+; NO-TRAP-BIT-SDAG-NEXT:  ; %bb.1: ; %ret
+; NO-TRAP-BIT-SDAG-NEXT:    v_mov_b32_e32 v0, 3
+; NO-TRAP-BIT-SDAG-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; NO-TRAP-BIT-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; NO-TRAP-BIT-SDAG-NEXT:    s_endpgm
+; NO-TRAP-BIT-SDAG-NEXT:  .LBB3_2: ; %trap
+; NO-TRAP-BIT-SDAG-NEXT:    s_endpgm
+;
+; NO-TRAP-BIT-GISEL-LABEL: non_entry_trap:
+; NO-TRAP-BIT-GISEL:         .amd_kernel_code_t
+; NO-TRAP-BIT-GISEL-NEXT:     amd_code_version_major = 1
+; NO-TRAP-BIT-GISEL-NEXT:     amd_code_version_minor = 2
+; NO-TRAP-BIT-GISEL-NEXT:     amd_machine_kind = 1
+; NO-TRAP-BIT-GISEL-NEXT:     amd_machine_version_major = 6
+; NO-TRAP-BIT-GISEL-NEXT:     amd_machine_version_minor = 0
+; NO-TRAP-BIT-GISEL-NEXT:     amd_machine_version_stepping = 0
+; NO-TRAP-BIT-GISEL-NEXT:     kernel_code_entry_byte_offset = 256
+; NO-TRAP-BIT-GISEL-NEXT:     kernel_code_prefetch_byte_size = 0
+; NO-TRAP-BIT-GISEL-NEXT:     granulated_workitem_vgpr_count = 0
+; NO-TRAP-BIT-GISEL-NEXT:     granulated_wavefront_sgpr_count = 1
+; NO-TRAP-BIT-GISEL-NEXT:     priority = 0
+; NO-TRAP-BIT-GISEL-NEXT:     float_mode = 240
+; NO-TRAP-BIT-GISEL-NEXT:     priv = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_dx10_clamp = 1
+; NO-TRAP-BIT-GISEL-NEXT:     debug_mode = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_ieee_mode = 1
+; NO-TRAP-BIT-GISEL-NEXT:     enable_wgp_mode = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_mem_ordered = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_fwd_progress = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; NO-TRAP-BIT-GISEL-NEXT:     user_sgpr_count = 12
+; NO-TRAP-BIT-GISEL-NEXT:     enable_trap_handler = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_workgroup_id_x = 1
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_workgroup_id_y = 1
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_workgroup_id_z = 1
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_workgroup_info = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_vgpr_workitem_id = 2
+; NO-TRAP-BIT-GISEL-NEXT:     enable_exception_msb = 0
+; NO-TRAP-BIT-GISEL-NEXT:     granulated_lds_size = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_exception = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_private_segment_buffer = 1
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_dispatch_ptr = 1
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_queue_ptr = 1
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_dispatch_id = 1
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_flat_scratch_init = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_private_segment_size = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_wavefront_size32 = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_ordered_append_gds = 0
+; NO-TRAP-BIT-GISEL-NEXT:     private_element_size = 1
+; NO-TRAP-BIT-GISEL-NEXT:     is_ptr64 = 1
+; NO-TRAP-BIT-GISEL-NEXT:     is_dynamic_callstack = 0
+; NO-TRAP-BIT-GISEL-NEXT:     is_debug_enabled = 0
+; NO-TRAP-BIT-GISEL-NEXT:     is_xnack_enabled = 0
+; NO-TRAP-BIT-GISEL-NEXT:     workitem_private_segment_byte_size = 0
+; NO-TRAP-BIT-GISEL-NEXT:     workgroup_group_segment_byte_size = 0
+; NO-TRAP-BIT-GISEL-NEXT:     gds_segment_byte_size = 0
+; NO-TRAP-BIT-GISEL-NEXT:     kernarg_segment_byte_size = 24
+; NO-TRAP-BIT-GISEL-NEXT:     workgroup_fbarrier_count = 0
+; NO-TRAP-BIT-GISEL-NEXT:     wavefront_sgpr_count = 10
+; NO-TRAP-BIT-GISEL-NEXT:     workitem_vgpr_count = 1
+; NO-TRAP-BIT-GISEL-NEXT:     reserved_vgpr_first = 0
+; NO-TRAP-BIT-GISEL-NEXT:     reserved_vgpr_count = 0
+; NO-TRAP-BIT-GISEL-NEXT:     reserved_sgpr_first = 0
+; NO-TRAP-BIT-GISEL-NEXT:     reserved_sgpr_count = 0
+; NO-TRAP-BIT-GISEL-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; NO-TRAP-BIT-GISEL-NEXT:     debug_private_segment_buffer_sgpr = 0
+; NO-TRAP-BIT-GISEL-NEXT:     kernarg_segment_alignment = 4
+; NO-TRAP-BIT-GISEL-NEXT:     group_segment_alignment = 4
+; NO-TRAP-BIT-GISEL-NEXT:     private_segment_alignment = 4
+; NO-TRAP-BIT-GISEL-NEXT:     wavefront_size = 6
+; NO-TRAP-BIT-GISEL-NEXT:     call_convention = -1
+; NO-TRAP-BIT-GISEL-NEXT:     runtime_loader_kernel_symbol = 0
+; NO-TRAP-BIT-GISEL-NEXT:    .end_amd_kernel_code_t
+; NO-TRAP-BIT-GISEL-NEXT:  ; %bb.0: ; %entry
+; NO-TRAP-BIT-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; NO-TRAP-BIT-GISEL-NEXT:    s_mov_b32 s2, -1
+; NO-TRAP-BIT-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; NO-TRAP-BIT-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; NO-TRAP-BIT-GISEL-NEXT:    buffer_load_dword v0, off, s[0:3], 0 glc
+; NO-TRAP-BIT-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; NO-TRAP-BIT-GISEL-NEXT:    v_readfirstlane_b32 s4, v0
+; NO-TRAP-BIT-GISEL-NEXT:    s_cmp_eq_u32 s4, -1
+; NO-TRAP-BIT-GISEL-NEXT:    s_cbranch_scc0 .LBB3_2
+; NO-TRAP-BIT-GISEL-NEXT:  ; %bb.1: ; %ret
+; NO-TRAP-BIT-GISEL-NEXT:    v_mov_b32_e32 v0, 3
+; NO-TRAP-BIT-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; NO-TRAP-BIT-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; NO-TRAP-BIT-GISEL-NEXT:    s_endpgm
+; NO-TRAP-BIT-GISEL-NEXT:  .LBB3_2: ; %trap
+; NO-TRAP-BIT-GISEL-NEXT:    s_endpgm
+;
+; GCN-WARNING-SDAG-LABEL: non_entry_trap:
+; GCN-WARNING-SDAG:       ; %bb.0: ; %entry
+; GCN-WARNING-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-WARNING-SDAG-NEXT:    s_mov_b32 s3, 0xf000
+; GCN-WARNING-SDAG-NEXT:    s_mov_b32 s2, -1
+; GCN-WARNING-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-WARNING-SDAG-NEXT:    buffer_load_dword v0, off, s[0:3], 0 glc
+; GCN-WARNING-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GCN-WARNING-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, -1, v0
+; GCN-WARNING-SDAG-NEXT:    s_cbranch_vccz .LBB3_2
+; GCN-WARNING-SDAG-NEXT:  ; %bb.1: ; %ret
+; GCN-WARNING-SDAG-NEXT:    v_mov_b32_e32 v0, 3
+; GCN-WARNING-SDAG-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GCN-WARNING-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GCN-WARNING-SDAG-NEXT:    s_endpgm
+; GCN-WARNING-SDAG-NEXT:  .LBB3_2: ; %trap
+; GCN-WARNING-SDAG-NEXT:    s_endpgm
+;
+; GCN-WARNING-GISEL-LABEL: non_entry_trap:
+; GCN-WARNING-GISEL:       ; %bb.0: ; %entry
+; GCN-WARNING-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-WARNING-GISEL-NEXT:    s_mov_b32 s2, -1
+; GCN-WARNING-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; GCN-WARNING-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-WARNING-GISEL-NEXT:    buffer_load_dword v0, off, s[0:3], 0 glc
+; GCN-WARNING-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GCN-WARNING-GISEL-NEXT:    v_readfirstlane_b32 s4, v0
+; GCN-WARNING-GISEL-NEXT:    s_cmp_eq_u32 s4, -1
+; GCN-WARNING-GISEL-NEXT:    s_cbranch_scc0 .LBB3_2
+; GCN-WARNING-GISEL-NEXT:  ; %bb.1: ; %ret
+; GCN-WARNING-GISEL-NEXT:    v_mov_b32_e32 v0, 3
+; GCN-WARNING-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GCN-WARNING-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GCN-WARNING-GISEL-NEXT:    s_endpgm
+; GCN-WARNING-GISEL-NEXT:  .LBB3_2: ; %trap
+; GCN-WARNING-GISEL-NEXT:    s_endpgm
 entry:
   %tmp29 = load volatile i32, ptr addrspace(1) %arg0
   %cmp = icmp eq i32 %tmp29, -1
@@ -119,14 +1896,612 @@ ret:
   ret void
 }
 
-; GCN-LABEL: {{^}}non_entry_trap_no_unreachable:
-; TRAP-BIT: enable_trap_handler = 1
-; NO-TRAP-BIT: enable_trap_handler = 0
-
-; HSA-TRAP: BB{{[0-9]_[0-9]+}}: ; %trap
-; HSA-TRAP: s_mov_b64 s[0:1], s[6:7]
-; HSA-TRAP-NEXT: s_trap 2
 define amdgpu_kernel void @non_entry_trap_no_unreachable(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr {
+; HSA-TRAP-SDAG-LABEL: non_entry_trap_no_unreachable:
+; HSA-TRAP-SDAG:       ; %bb.0: ; %entry
+; HSA-TRAP-SDAG-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; HSA-TRAP-SDAG-NEXT:    s_add_i32 s12, s12, s17
+; HSA-TRAP-SDAG-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; HSA-TRAP-SDAG-NEXT:    s_load_dwordx2 s[2:3], s[8:9], 0x0
+; HSA-TRAP-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; HSA-TRAP-SDAG-NEXT:    v_mov_b32_e32 v0, s2
+; HSA-TRAP-SDAG-NEXT:    v_mov_b32_e32 v1, s3
+; HSA-TRAP-SDAG-NEXT:    flat_load_dword v0, v[0:1] glc
+; HSA-TRAP-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; HSA-TRAP-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, -1, v0
+; HSA-TRAP-SDAG-NEXT:    s_cbranch_vccz .LBB4_2
+; HSA-TRAP-SDAG-NEXT:  .LBB4_1: ; %ret
+; HSA-TRAP-SDAG-NEXT:    v_mov_b32_e32 v2, 3
+; HSA-TRAP-SDAG-NEXT:    v_mov_b32_e32 v0, s2
+; HSA-TRAP-SDAG-NEXT:    v_mov_b32_e32 v1, s3
+; HSA-TRAP-SDAG-NEXT:    flat_store_dword v[0:1], v2
+; HSA-TRAP-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; HSA-TRAP-SDAG-NEXT:    s_endpgm
+; HSA-TRAP-SDAG-NEXT:  .LBB4_2: ; %trap
+; HSA-TRAP-SDAG-NEXT:    s_mov_b64 s[0:1], s[6:7]
+; HSA-TRAP-SDAG-NEXT:    s_trap 2
+; HSA-TRAP-SDAG-NEXT:    v_mov_b32_e32 v0, 0x4d2
+; HSA-TRAP-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; HSA-TRAP-SDAG-NEXT:    s_mov_b32 m0, -1
+; HSA-TRAP-SDAG-NEXT:    ds_write_b32 v1, v0
+; HSA-TRAP-SDAG-NEXT:    s_branch .LBB4_1
+;
+; HSA-TRAP-GISEL-LABEL: non_entry_trap_no_unreachable:
+; HSA-TRAP-GISEL:       ; %bb.0: ; %entry
+; HSA-TRAP-GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; HSA-TRAP-GISEL-NEXT:    s_add_i32 s12, s12, s17
+; HSA-TRAP-GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; HSA-TRAP-GISEL-NEXT:    s_load_dwordx2 s[2:3], s[8:9], 0x0
+; HSA-TRAP-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; HSA-TRAP-GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; HSA-TRAP-GISEL-NEXT:    v_mov_b32_e32 v1, s3
+; HSA-TRAP-GISEL-NEXT:    flat_load_dword v0, v[0:1] glc
+; HSA-TRAP-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; HSA-TRAP-GISEL-NEXT:    v_readfirstlane_b32 s0, v0
+; HSA-TRAP-GISEL-NEXT:    s_cmp_eq_u32 s0, -1
+; HSA-TRAP-GISEL-NEXT:    s_cbranch_scc0 .LBB4_2
+; HSA-TRAP-GISEL-NEXT:  .LBB4_1: ; %ret
+; HSA-TRAP-GISEL-NEXT:    v_mov_b32_e32 v2, 3
+; HSA-TRAP-GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; HSA-TRAP-GISEL-NEXT:    v_mov_b32_e32 v1, s3
+; HSA-TRAP-GISEL-NEXT:    flat_store_dword v[0:1], v2
+; HSA-TRAP-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; HSA-TRAP-GISEL-NEXT:    s_endpgm
+; HSA-TRAP-GISEL-NEXT:  .LBB4_2: ; %trap
+; HSA-TRAP-GISEL-NEXT:    s_mov_b64 s[0:1], s[6:7]
+; HSA-TRAP-GISEL-NEXT:    s_trap 2
+; HSA-TRAP-GISEL-NEXT:    v_mov_b32_e32 v0, 0x4d2
+; HSA-TRAP-GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; HSA-TRAP-GISEL-NEXT:    s_mov_b32 m0, -1
+; HSA-TRAP-GISEL-NEXT:    ds_write_b32 v1, v0
+; HSA-TRAP-GISEL-NEXT:    s_branch .LBB4_1
+;
+; NO-HSA-TRAP-SDAG-LABEL: non_entry_trap_no_unreachable:
+; NO-HSA-TRAP-SDAG:       ; %bb.0: ; %entry
+; NO-HSA-TRAP-SDAG-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; NO-HSA-TRAP-SDAG-NEXT:    s_add_i32 s12, s12, s17
+; NO-HSA-TRAP-SDAG-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; NO-HSA-TRAP-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; NO-HSA-TRAP-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; NO-HSA-TRAP-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; NO-HSA-TRAP-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; NO-HSA-TRAP-SDAG-NEXT:    flat_load_dword v0, v[0:1] glc
+; NO-HSA-TRAP-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; NO-HSA-TRAP-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, -1, v0
+; NO-HSA-TRAP-SDAG-NEXT:    s_cbranch_vccz .LBB4_3
+; NO-HSA-TRAP-SDAG-NEXT:  .LBB4_1: ; %ret
+; NO-HSA-TRAP-SDAG-NEXT:    v_mov_b32_e32 v2, 3
+; NO-HSA-TRAP-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; NO-HSA-TRAP-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; NO-HSA-TRAP-SDAG-NEXT:    flat_store_dword v[0:1], v2
+; NO-HSA-TRAP-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; NO-HSA-TRAP-SDAG-NEXT:  .LBB4_2:
+; NO-HSA-TRAP-SDAG-NEXT:    s_endpgm
+; NO-HSA-TRAP-SDAG-NEXT:  .LBB4_3: ; %trap
+; NO-HSA-TRAP-SDAG-NEXT:    s_mov_b32 m0, -1
+; NO-HSA-TRAP-SDAG-NEXT:    s_cbranch_execnz .LBB4_2
+; NO-HSA-TRAP-SDAG-NEXT:  ; %bb.4: ; %trap
+; NO-HSA-TRAP-SDAG-NEXT:    v_mov_b32_e32 v0, 0x4d2
+; NO-HSA-TRAP-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; NO-HSA-TRAP-SDAG-NEXT:    ds_write_b32 v1, v0
+; NO-HSA-TRAP-SDAG-NEXT:    s_branch .LBB4_1
+;
+; NO-HSA-TRAP-GISEL-LABEL: non_entry_trap_no_unreachable:
+; NO-HSA-TRAP-GISEL:       ; %bb.0: ; %entry
+; NO-HSA-TRAP-GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; NO-HSA-TRAP-GISEL-NEXT:    s_add_i32 s12, s12, s17
+; NO-HSA-TRAP-GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; NO-HSA-TRAP-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; NO-HSA-TRAP-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; NO-HSA-TRAP-GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; NO-HSA-TRAP-GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; NO-HSA-TRAP-GISEL-NEXT:    flat_load_dword v0, v[0:1] glc
+; NO-HSA-TRAP-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; NO-HSA-TRAP-GISEL-NEXT:    v_readfirstlane_b32 s2, v0
+; NO-HSA-TRAP-GISEL-NEXT:    s_cmp_eq_u32 s2, -1
+; NO-HSA-TRAP-GISEL-NEXT:    s_cbranch_scc0 .LBB4_3
+; NO-HSA-TRAP-GISEL-NEXT:  .LBB4_1: ; %ret
+; NO-HSA-TRAP-GISEL-NEXT:    v_mov_b32_e32 v2, 3
+; NO-HSA-TRAP-GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; NO-HSA-TRAP-GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; NO-HSA-TRAP-GISEL-NEXT:    flat_store_dword v[0:1], v2
+; NO-HSA-TRAP-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; NO-HSA-TRAP-GISEL-NEXT:  .LBB4_2:
+; NO-HSA-TRAP-GISEL-NEXT:    s_endpgm
+; NO-HSA-TRAP-GISEL-NEXT:  .LBB4_3: ; %trap
+; NO-HSA-TRAP-GISEL-NEXT:    s_cbranch_execnz .LBB4_2
+; NO-HSA-TRAP-GISEL-NEXT:  ; %bb.4: ; %trap
+; NO-HSA-TRAP-GISEL-NEXT:    v_mov_b32_e32 v0, 0x4d2
+; NO-HSA-TRAP-GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; NO-HSA-TRAP-GISEL-NEXT:    s_mov_b32 m0, -1
+; NO-HSA-TRAP-GISEL-NEXT:    ds_write_b32 v1, v0
+; NO-HSA-TRAP-GISEL-NEXT:    s_branch .LBB4_1
+;
+; HSA-WARNING-SDAG-LABEL: non_entry_trap_no_unreachable:
+; HSA-WARNING-SDAG:       ; %bb.0: ; %entry
+; HSA-WARNING-SDAG-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; HSA-WARNING-SDAG-NEXT:    s_add_i32 s12, s12, s17
+; HSA-WARNING-SDAG-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; HSA-WARNING-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; HSA-WARNING-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; HSA-WARNING-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; HSA-WARNING-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; HSA-WARNING-SDAG-NEXT:    flat_load_dword v0, v[0:1] glc
+; HSA-WARNING-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; HSA-WARNING-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, -1, v0
+; HSA-WARNING-SDAG-NEXT:    s_cbranch_vccz .LBB4_3
+; HSA-WARNING-SDAG-NEXT:  .LBB4_1: ; %ret
+; HSA-WARNING-SDAG-NEXT:    v_mov_b32_e32 v2, 3
+; HSA-WARNING-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; HSA-WARNING-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; HSA-WARNING-SDAG-NEXT:    flat_store_dword v[0:1], v2
+; HSA-WARNING-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; HSA-WARNING-SDAG-NEXT:  .LBB4_2:
+; HSA-WARNING-SDAG-NEXT:    s_endpgm
+; HSA-WARNING-SDAG-NEXT:  .LBB4_3: ; %trap
+; HSA-WARNING-SDAG-NEXT:    s_mov_b32 m0, -1
+; HSA-WARNING-SDAG-NEXT:    s_cbranch_execnz .LBB4_2
+; HSA-WARNING-SDAG-NEXT:  ; %bb.4: ; %trap
+; HSA-WARNING-SDAG-NEXT:    v_mov_b32_e32 v0, 0x4d2
+; HSA-WARNING-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; HSA-WARNING-SDAG-NEXT:    ds_write_b32 v1, v0
+; HSA-WARNING-SDAG-NEXT:    s_branch .LBB4_1
+;
+; HSA-WARNING-GISEL-LABEL: non_entry_trap_no_unreachable:
+; HSA-WARNING-GISEL:       ; %bb.0: ; %entry
+; HSA-WARNING-GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; HSA-WARNING-GISEL-NEXT:    s_add_i32 s12, s12, s17
+; HSA-WARNING-GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; HSA-WARNING-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; HSA-WARNING-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; HSA-WARNING-GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; HSA-WARNING-GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; HSA-WARNING-GISEL-NEXT:    flat_load_dword v0, v[0:1] glc
+; HSA-WARNING-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; HSA-WARNING-GISEL-NEXT:    v_readfirstlane_b32 s2, v0
+; HSA-WARNING-GISEL-NEXT:    s_cmp_eq_u32 s2, -1
+; HSA-WARNING-GISEL-NEXT:    s_cbranch_scc0 .LBB4_3
+; HSA-WARNING-GISEL-NEXT:  .LBB4_1: ; %ret
+; HSA-WARNING-GISEL-NEXT:    v_mov_b32_e32 v2, 3
+; HSA-WARNING-GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; HSA-WARNING-GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; HSA-WARNING-GISEL-NEXT:    flat_store_dword v[0:1], v2
+; HSA-WARNING-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; HSA-WARNING-GISEL-NEXT:  .LBB4_2:
+; HSA-WARNING-GISEL-NEXT:    s_endpgm
+; HSA-WARNING-GISEL-NEXT:  .LBB4_3: ; %trap
+; HSA-WARNING-GISEL-NEXT:    s_cbranch_execnz .LBB4_2
+; HSA-WARNING-GISEL-NEXT:  ; %bb.4: ; %trap
+; HSA-WARNING-GISEL-NEXT:    v_mov_b32_e32 v0, 0x4d2
+; HSA-WARNING-GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; HSA-WARNING-GISEL-NEXT:    s_mov_b32 m0, -1
+; HSA-WARNING-GISEL-NEXT:    ds_write_b32 v1, v0
+; HSA-WARNING-GISEL-NEXT:    s_branch .LBB4_1
+;
+; TRAP-BIT-SDAG-LABEL: non_entry_trap_no_unreachable:
+; TRAP-BIT-SDAG:         .amd_kernel_code_t
+; TRAP-BIT-SDAG-NEXT:     amd_code_version_major = 1
+; TRAP-BIT-SDAG-NEXT:     amd_code_version_minor = 2
+; TRAP-BIT-SDAG-NEXT:     amd_machine_kind = 1
+; TRAP-BIT-SDAG-NEXT:     amd_machine_version_major = 6
+; TRAP-BIT-SDAG-NEXT:     amd_machine_version_minor = 0
+; TRAP-BIT-SDAG-NEXT:     amd_machine_version_stepping = 0
+; TRAP-BIT-SDAG-NEXT:     kernel_code_entry_byte_offset = 256
+; TRAP-BIT-SDAG-NEXT:     kernel_code_prefetch_byte_size = 0
+; TRAP-BIT-SDAG-NEXT:     granulated_workitem_vgpr_count = 0
+; TRAP-BIT-SDAG-NEXT:     granulated_wavefront_sgpr_count = 1
+; TRAP-BIT-SDAG-NEXT:     priority = 0
+; TRAP-BIT-SDAG-NEXT:     float_mode = 240
+; TRAP-BIT-SDAG-NEXT:     priv = 0
+; TRAP-BIT-SDAG-NEXT:     enable_dx10_clamp = 1
+; TRAP-BIT-SDAG-NEXT:     debug_mode = 0
+; TRAP-BIT-SDAG-NEXT:     enable_ieee_mode = 1
+; TRAP-BIT-SDAG-NEXT:     enable_wgp_mode = 0
+; TRAP-BIT-SDAG-NEXT:     enable_mem_ordered = 0
+; TRAP-BIT-SDAG-NEXT:     enable_fwd_progress = 0
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; TRAP-BIT-SDAG-NEXT:     user_sgpr_count = 12
+; TRAP-BIT-SDAG-NEXT:     enable_trap_handler = 1
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_workgroup_id_x = 1
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_workgroup_id_y = 1
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_workgroup_id_z = 1
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_workgroup_info = 0
+; TRAP-BIT-SDAG-NEXT:     enable_vgpr_workitem_id = 2
+; TRAP-BIT-SDAG-NEXT:     enable_exception_msb = 0
+; TRAP-BIT-SDAG-NEXT:     granulated_lds_size = 0
+; TRAP-BIT-SDAG-NEXT:     enable_exception = 0
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_private_segment_buffer = 1
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_dispatch_ptr = 1
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_queue_ptr = 1
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_dispatch_id = 1
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_flat_scratch_init = 0
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_private_segment_size = 0
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; TRAP-BIT-SDAG-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; TRAP-BIT-SDAG-NEXT:     enable_wavefront_size32 = 0
+; TRAP-BIT-SDAG-NEXT:     enable_ordered_append_gds = 0
+; TRAP-BIT-SDAG-NEXT:     private_element_size = 1
+; TRAP-BIT-SDAG-NEXT:     is_ptr64 = 1
+; TRAP-BIT-SDAG-NEXT:     is_dynamic_callstack = 0
+; TRAP-BIT-SDAG-NEXT:     is_debug_enabled = 0
+; TRAP-BIT-SDAG-NEXT:     is_xnack_enabled = 0
+; TRAP-BIT-SDAG-NEXT:     workitem_private_segment_byte_size = 0
+; TRAP-BIT-SDAG-NEXT:     workgroup_group_segment_byte_size = 0
+; TRAP-BIT-SDAG-NEXT:     gds_segment_byte_size = 0
+; TRAP-BIT-SDAG-NEXT:     kernarg_segment_byte_size = 24
+; TRAP-BIT-SDAG-NEXT:     workgroup_fbarrier_count = 0
+; TRAP-BIT-SDAG-NEXT:     wavefront_sgpr_count = 12
+; TRAP-BIT-SDAG-NEXT:     workitem_vgpr_count = 2
+; TRAP-BIT-SDAG-NEXT:     reserved_vgpr_first = 0
+; TRAP-BIT-SDAG-NEXT:     reserved_vgpr_count = 0
+; TRAP-BIT-SDAG-NEXT:     reserved_sgpr_first = 0
+; TRAP-BIT-SDAG-NEXT:     reserved_sgpr_count = 0
+; TRAP-BIT-SDAG-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; TRAP-BIT-SDAG-NEXT:     debug_private_segment_buffer_sgpr = 0
+; TRAP-BIT-SDAG-NEXT:     kernarg_segment_alignment = 4
+; TRAP-BIT-SDAG-NEXT:     group_segment_alignment = 4
+; TRAP-BIT-SDAG-NEXT:     private_segment_alignment = 4
+; TRAP-BIT-SDAG-NEXT:     wavefront_size = 6
+; TRAP-BIT-SDAG-NEXT:     call_convention = -1
+; TRAP-BIT-SDAG-NEXT:     runtime_loader_kernel_symbol = 0
+; TRAP-BIT-SDAG-NEXT:    .end_amd_kernel_code_t
+; TRAP-BIT-SDAG-NEXT:  ; %bb.0: ; %entry
+; TRAP-BIT-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; TRAP-BIT-SDAG-NEXT:    s_mov_b32 s3, 0xf000
+; TRAP-BIT-SDAG-NEXT:    s_mov_b32 s2, -1
+; TRAP-BIT-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; TRAP-BIT-SDAG-NEXT:    buffer_load_dword v0, off, s[0:3], 0 glc
+; TRAP-BIT-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; TRAP-BIT-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, -1, v0
+; TRAP-BIT-SDAG-NEXT:    s_cbranch_vccz .LBB4_3
+; TRAP-BIT-SDAG-NEXT:  .LBB4_1: ; %ret
+; TRAP-BIT-SDAG-NEXT:    v_mov_b32_e32 v0, 3
+; TRAP-BIT-SDAG-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; TRAP-BIT-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; TRAP-BIT-SDAG-NEXT:  .LBB4_2:
+; TRAP-BIT-SDAG-NEXT:    s_endpgm
+; TRAP-BIT-SDAG-NEXT:  .LBB4_3: ; %trap
+; TRAP-BIT-SDAG-NEXT:    s_mov_b32 m0, -1
+; TRAP-BIT-SDAG-NEXT:    s_cbranch_execnz .LBB4_2
+; TRAP-BIT-SDAG-NEXT:  ; %bb.4: ; %trap
+; TRAP-BIT-SDAG-NEXT:    v_mov_b32_e32 v0, 0x4d2
+; TRAP-BIT-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; TRAP-BIT-SDAG-NEXT:    ds_write_b32 v1, v0
+; TRAP-BIT-SDAG-NEXT:    s_branch .LBB4_1
+;
+; TRAP-BIT-GISEL-LABEL: non_entry_trap_no_unreachable:
+; TRAP-BIT-GISEL:         .amd_kernel_code_t
+; TRAP-BIT-GISEL-NEXT:     amd_code_version_major = 1
+; TRAP-BIT-GISEL-NEXT:     amd_code_version_minor = 2
+; TRAP-BIT-GISEL-NEXT:     amd_machine_kind = 1
+; TRAP-BIT-GISEL-NEXT:     amd_machine_version_major = 6
+; TRAP-BIT-GISEL-NEXT:     amd_machine_version_minor = 0
+; TRAP-BIT-GISEL-NEXT:     amd_machine_version_stepping = 0
+; TRAP-BIT-GISEL-NEXT:     kernel_code_entry_byte_offset = 256
+; TRAP-BIT-GISEL-NEXT:     kernel_code_prefetch_byte_size = 0
+; TRAP-BIT-GISEL-NEXT:     granulated_workitem_vgpr_count = 0
+; TRAP-BIT-GISEL-NEXT:     granulated_wavefront_sgpr_count = 1
+; TRAP-BIT-GISEL-NEXT:     priority = 0
+; TRAP-BIT-GISEL-NEXT:     float_mode = 240
+; TRAP-BIT-GISEL-NEXT:     priv = 0
+; TRAP-BIT-GISEL-NEXT:     enable_dx10_clamp = 1
+; TRAP-BIT-GISEL-NEXT:     debug_mode = 0
+; TRAP-BIT-GISEL-NEXT:     enable_ieee_mode = 1
+; TRAP-BIT-GISEL-NEXT:     enable_wgp_mode = 0
+; TRAP-BIT-GISEL-NEXT:     enable_mem_ordered = 0
+; TRAP-BIT-GISEL-NEXT:     enable_fwd_progress = 0
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; TRAP-BIT-GISEL-NEXT:     user_sgpr_count = 12
+; TRAP-BIT-GISEL-NEXT:     enable_trap_handler = 1
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_workgroup_id_x = 1
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_workgroup_id_y = 1
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_workgroup_id_z = 1
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_workgroup_info = 0
+; TRAP-BIT-GISEL-NEXT:     enable_vgpr_workitem_id = 2
+; TRAP-BIT-GISEL-NEXT:     enable_exception_msb = 0
+; TRAP-BIT-GISEL-NEXT:     granulated_lds_size = 0
+; TRAP-BIT-GISEL-NEXT:     enable_exception = 0
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_private_segment_buffer = 1
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_dispatch_ptr = 1
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_queue_ptr = 1
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_dispatch_id = 1
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_flat_scratch_init = 0
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_private_segment_size = 0
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; TRAP-BIT-GISEL-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; TRAP-BIT-GISEL-NEXT:     enable_wavefront_size32 = 0
+; TRAP-BIT-GISEL-NEXT:     enable_ordered_append_gds = 0
+; TRAP-BIT-GISEL-NEXT:     private_element_size = 1
+; TRAP-BIT-GISEL-NEXT:     is_ptr64 = 1
+; TRAP-BIT-GISEL-NEXT:     is_dynamic_callstack = 0
+; TRAP-BIT-GISEL-NEXT:     is_debug_enabled = 0
+; TRAP-BIT-GISEL-NEXT:     is_xnack_enabled = 0
+; TRAP-BIT-GISEL-NEXT:     workitem_private_segment_byte_size = 0
+; TRAP-BIT-GISEL-NEXT:     workgroup_group_segment_byte_size = 0
+; TRAP-BIT-GISEL-NEXT:     gds_segment_byte_size = 0
+; TRAP-BIT-GISEL-NEXT:     kernarg_segment_byte_size = 24
+; TRAP-BIT-GISEL-NEXT:     workgroup_fbarrier_count = 0
+; TRAP-BIT-GISEL-NEXT:     wavefront_sgpr_count = 10
+; TRAP-BIT-GISEL-NEXT:     workitem_vgpr_count = 2
+; TRAP-BIT-GISEL-NEXT:     reserved_vgpr_first = 0
+; TRAP-BIT-GISEL-NEXT:     reserved_vgpr_count = 0
+; TRAP-BIT-GISEL-NEXT:     reserved_sgpr_first = 0
+; TRAP-BIT-GISEL-NEXT:     reserved_sgpr_count = 0
+; TRAP-BIT-GISEL-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; TRAP-BIT-GISEL-NEXT:     debug_private_segment_buffer_sgpr = 0
+; TRAP-BIT-GISEL-NEXT:     kernarg_segment_alignment = 4
+; TRAP-BIT-GISEL-NEXT:     group_segment_alignment = 4
+; TRAP-BIT-GISEL-NEXT:     private_segment_alignment = 4
+; TRAP-BIT-GISEL-NEXT:     wavefront_size = 6
+; TRAP-BIT-GISEL-NEXT:     call_convention = -1
+; TRAP-BIT-GISEL-NEXT:     runtime_loader_kernel_symbol = 0
+; TRAP-BIT-GISEL-NEXT:    .end_amd_kernel_code_t
+; TRAP-BIT-GISEL-NEXT:  ; %bb.0: ; %entry
+; TRAP-BIT-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; TRAP-BIT-GISEL-NEXT:    s_mov_b32 s2, -1
+; TRAP-BIT-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; TRAP-BIT-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; TRAP-BIT-GISEL-NEXT:    buffer_load_dword v0, off, s[0:3], 0 glc
+; TRAP-BIT-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; TRAP-BIT-GISEL-NEXT:    v_readfirstlane_b32 s4, v0
+; TRAP-BIT-GISEL-NEXT:    s_cmp_eq_u32 s4, -1
+; TRAP-BIT-GISEL-NEXT:    s_cbranch_scc0 .LBB4_3
+; TRAP-BIT-GISEL-NEXT:  .LBB4_1: ; %ret
+; TRAP-BIT-GISEL-NEXT:    v_mov_b32_e32 v0, 3
+; TRAP-BIT-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; TRAP-BIT-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; TRAP-BIT-GISEL-NEXT:  .LBB4_2:
+; TRAP-BIT-GISEL-NEXT:    s_endpgm
+; TRAP-BIT-GISEL-NEXT:  .LBB4_3: ; %trap
+; TRAP-BIT-GISEL-NEXT:    s_cbranch_execnz .LBB4_2
+; TRAP-BIT-GISEL-NEXT:  ; %bb.4: ; %trap
+; TRAP-BIT-GISEL-NEXT:    v_mov_b32_e32 v0, 0x4d2
+; TRAP-BIT-GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; TRAP-BIT-GISEL-NEXT:    s_mov_b32 m0, -1
+; TRAP-BIT-GISEL-NEXT:    ds_write_b32 v1, v0
+; TRAP-BIT-GISEL-NEXT:    s_branch .LBB4_1
+;
+; NO-TRAP-BIT-SDAG-LABEL: non_entry_trap_no_unreachable:
+; NO-TRAP-BIT-SDAG:         .amd_kernel_code_t
+; NO-TRAP-BIT-SDAG-NEXT:     amd_code_version_major = 1
+; NO-TRAP-BIT-SDAG-NEXT:     amd_code_version_minor = 2
+; NO-TRAP-BIT-SDAG-NEXT:     amd_machine_kind = 1
+; NO-TRAP-BIT-SDAG-NEXT:     amd_machine_version_major = 6
+; NO-TRAP-BIT-SDAG-NEXT:     amd_machine_version_minor = 0
+; NO-TRAP-BIT-SDAG-NEXT:     amd_machine_version_stepping = 0
+; NO-TRAP-BIT-SDAG-NEXT:     kernel_code_entry_byte_offset = 256
+; NO-TRAP-BIT-SDAG-NEXT:     kernel_code_prefetch_byte_size = 0
+; NO-TRAP-BIT-SDAG-NEXT:     granulated_workitem_vgpr_count = 0
+; NO-TRAP-BIT-SDAG-NEXT:     granulated_wavefront_sgpr_count = 1
+; NO-TRAP-BIT-SDAG-NEXT:     priority = 0
+; NO-TRAP-BIT-SDAG-NEXT:     float_mode = 240
+; NO-TRAP-BIT-SDAG-NEXT:     priv = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_dx10_clamp = 1
+; NO-TRAP-BIT-SDAG-NEXT:     debug_mode = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_ieee_mode = 1
+; NO-TRAP-BIT-SDAG-NEXT:     enable_wgp_mode = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_mem_ordered = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_fwd_progress = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; NO-TRAP-BIT-SDAG-NEXT:     user_sgpr_count = 12
+; NO-TRAP-BIT-SDAG-NEXT:     enable_trap_handler = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_workgroup_id_x = 1
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_workgroup_id_y = 1
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_workgroup_id_z = 1
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_workgroup_info = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_vgpr_workitem_id = 2
+; NO-TRAP-BIT-SDAG-NEXT:     enable_exception_msb = 0
+; NO-TRAP-BIT-SDAG-NEXT:     granulated_lds_size = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_exception = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_private_segment_buffer = 1
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_dispatch_ptr = 1
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_queue_ptr = 1
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_dispatch_id = 1
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_flat_scratch_init = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_private_segment_size = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_wavefront_size32 = 0
+; NO-TRAP-BIT-SDAG-NEXT:     enable_ordered_append_gds = 0
+; NO-TRAP-BIT-SDAG-NEXT:     private_element_size = 1
+; NO-TRAP-BIT-SDAG-NEXT:     is_ptr64 = 1
+; NO-TRAP-BIT-SDAG-NEXT:     is_dynamic_callstack = 0
+; NO-TRAP-BIT-SDAG-NEXT:     is_debug_enabled = 0
+; NO-TRAP-BIT-SDAG-NEXT:     is_xnack_enabled = 0
+; NO-TRAP-BIT-SDAG-NEXT:     workitem_private_segment_byte_size = 0
+; NO-TRAP-BIT-SDAG-NEXT:     workgroup_group_segment_byte_size = 0
+; NO-TRAP-BIT-SDAG-NEXT:     gds_segment_byte_size = 0
+; NO-TRAP-BIT-SDAG-NEXT:     kernarg_segment_byte_size = 24
+; NO-TRAP-BIT-SDAG-NEXT:     workgroup_fbarrier_count = 0
+; NO-TRAP-BIT-SDAG-NEXT:     wavefront_sgpr_count = 12
+; NO-TRAP-BIT-SDAG-NEXT:     workitem_vgpr_count = 2
+; NO-TRAP-BIT-SDAG-NEXT:     reserved_vgpr_first = 0
+; NO-TRAP-BIT-SDAG-NEXT:     reserved_vgpr_count = 0
+; NO-TRAP-BIT-SDAG-NEXT:     reserved_sgpr_first = 0
+; NO-TRAP-BIT-SDAG-NEXT:     reserved_sgpr_count = 0
+; NO-TRAP-BIT-SDAG-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; NO-TRAP-BIT-SDAG-NEXT:     debug_private_segment_buffer_sgpr = 0
+; NO-TRAP-BIT-SDAG-NEXT:     kernarg_segment_alignment = 4
+; NO-TRAP-BIT-SDAG-NEXT:     group_segment_alignment = 4
+; NO-TRAP-BIT-SDAG-NEXT:     private_segment_alignment = 4
+; NO-TRAP-BIT-SDAG-NEXT:     wavefront_size = 6
+; NO-TRAP-BIT-SDAG-NEXT:     call_convention = -1
+; NO-TRAP-BIT-SDAG-NEXT:     runtime_loader_kernel_symbol = 0
+; NO-TRAP-BIT-SDAG-NEXT:    .end_amd_kernel_code_t
+; NO-TRAP-BIT-SDAG-NEXT:  ; %bb.0: ; %entry
+; NO-TRAP-BIT-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; NO-TRAP-BIT-SDAG-NEXT:    s_mov_b32 s3, 0xf000
+; NO-TRAP-BIT-SDAG-NEXT:    s_mov_b32 s2, -1
+; NO-TRAP-BIT-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; NO-TRAP-BIT-SDAG-NEXT:    buffer_load_dword v0, off, s[0:3], 0 glc
+; NO-TRAP-BIT-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; NO-TRAP-BIT-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, -1, v0
+; NO-TRAP-BIT-SDAG-NEXT:    s_cbranch_vccz .LBB4_3
+; NO-TRAP-BIT-SDAG-NEXT:  .LBB4_1: ; %ret
+; NO-TRAP-BIT-SDAG-NEXT:    v_mov_b32_e32 v0, 3
+; NO-TRAP-BIT-SDAG-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; NO-TRAP-BIT-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; NO-TRAP-BIT-SDAG-NEXT:  .LBB4_2:
+; NO-TRAP-BIT-SDAG-NEXT:    s_endpgm
+; NO-TRAP-BIT-SDAG-NEXT:  .LBB4_3: ; %trap
+; NO-TRAP-BIT-SDAG-NEXT:    s_mov_b32 m0, -1
+; NO-TRAP-BIT-SDAG-NEXT:    s_cbranch_execnz .LBB4_2
+; NO-TRAP-BIT-SDAG-NEXT:  ; %bb.4: ; %trap
+; NO-TRAP-BIT-SDAG-NEXT:    v_mov_b32_e32 v0, 0x4d2
+; NO-TRAP-BIT-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; NO-TRAP-BIT-SDAG-NEXT:    ds_write_b32 v1, v0
+; NO-TRAP-BIT-SDAG-NEXT:    s_branch .LBB4_1
+;
+; NO-TRAP-BIT-GISEL-LABEL: non_entry_trap_no_unreachable:
+; NO-TRAP-BIT-GISEL:         .amd_kernel_code_t
+; NO-TRAP-BIT-GISEL-NEXT:     amd_code_version_major = 1
+; NO-TRAP-BIT-GISEL-NEXT:     amd_code_version_minor = 2
+; NO-TRAP-BIT-GISEL-NEXT:     amd_machine_kind = 1
+; NO-TRAP-BIT-GISEL-NEXT:     amd_machine_version_major = 6
+; NO-TRAP-BIT-GISEL-NEXT:     amd_machine_version_minor = 0
+; NO-TRAP-BIT-GISEL-NEXT:     amd_machine_version_stepping = 0
+; NO-TRAP-BIT-GISEL-NEXT:     kernel_code_entry_byte_offset = 256
+; NO-TRAP-BIT-GISEL-NEXT:     kernel_code_prefetch_byte_size = 0
+; NO-TRAP-BIT-GISEL-NEXT:     granulated_workitem_vgpr_count = 0
+; NO-TRAP-BIT-GISEL-NEXT:     granulated_wavefront_sgpr_count = 1
+; NO-TRAP-BIT-GISEL-NEXT:     priority = 0
+; NO-TRAP-BIT-GISEL-NEXT:     float_mode = 240
+; NO-TRAP-BIT-GISEL-NEXT:     priv = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_dx10_clamp = 1
+; NO-TRAP-BIT-GISEL-NEXT:     debug_mode = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_ieee_mode = 1
+; NO-TRAP-BIT-GISEL-NEXT:     enable_wgp_mode = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_mem_ordered = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_fwd_progress = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; NO-TRAP-BIT-GISEL-NEXT:     user_sgpr_count = 12
+; NO-TRAP-BIT-GISEL-NEXT:     enable_trap_handler = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_workgroup_id_x = 1
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_workgroup_id_y = 1
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_workgroup_id_z = 1
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_workgroup_info = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_vgpr_workitem_id = 2
+; NO-TRAP-BIT-GISEL-NEXT:     enable_exception_msb = 0
+; NO-TRAP-BIT-GISEL-NEXT:     granulated_lds_size = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_exception = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_private_segment_buffer = 1
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_dispatch_ptr = 1
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_queue_ptr = 1
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_dispatch_id = 1
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_flat_scratch_init = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_private_segment_size = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_wavefront_size32 = 0
+; NO-TRAP-BIT-GISEL-NEXT:     enable_ordered_append_gds = 0
+; NO-TRAP-BIT-GISEL-NEXT:     private_element_size = 1
+; NO-TRAP-BIT-GISEL-NEXT:     is_ptr64 = 1
+; NO-TRAP-BIT-GISEL-NEXT:     is_dynamic_callstack = 0
+; NO-TRAP-BIT-GISEL-NEXT:     is_debug_enabled = 0
+; NO-TRAP-BIT-GISEL-NEXT:     is_xnack_enabled = 0
+; NO-TRAP-BIT-GISEL-NEXT:     workitem_private_segment_byte_size = 0
+; NO-TRAP-BIT-GISEL-NEXT:     workgroup_group_segment_byte_size = 0
+; NO-TRAP-BIT-GISEL-NEXT:     gds_segment_byte_size = 0
+; NO-TRAP-BIT-GISEL-NEXT:     kernarg_segment_byte_size = 24
+; NO-TRAP-BIT-GISEL-NEXT:     workgroup_fbarrier_count = 0
+; NO-TRAP-BIT-GISEL-NEXT:     wavefront_sgpr_count = 10
+; NO-TRAP-BIT-GISEL-NEXT:     workitem_vgpr_count = 2
+; NO-TRAP-BIT-GISEL-NEXT:     reserved_vgpr_first = 0
+; NO-TRAP-BIT-GISEL-NEXT:     reserved_vgpr_count = 0
+; NO-TRAP-BIT-GISEL-NEXT:     reserved_sgpr_first = 0
+; NO-TRAP-BIT-GISEL-NEXT:     reserved_sgpr_count = 0
+; NO-TRAP-BIT-GISEL-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; NO-TRAP-BIT-GISEL-NEXT:     debug_private_segment_buffer_sgpr = 0
+; NO-TRAP-BIT-GISEL-NEXT:     kernarg_segment_alignment = 4
+; NO-TRAP-BIT-GISEL-NEXT:     group_segment_alignment = 4
+; NO-TRAP-BIT-GISEL-NEXT:     private_segment_alignment = 4
+; NO-TRAP-BIT-GISEL-NEXT:     wavefront_size = 6
+; NO-TRAP-BIT-GISEL-NEXT:     call_convention = -1
+; NO-TRAP-BIT-GISEL-NEXT:     runtime_loader_kernel_symbol = 0
+; NO-TRAP-BIT-GISEL-NEXT:    .end_amd_kernel_code_t
+; NO-TRAP-BIT-GISEL-NEXT:  ; %bb.0: ; %entry
+; NO-TRAP-BIT-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; NO-TRAP-BIT-GISEL-NEXT:    s_mov_b32 s2, -1
+; NO-TRAP-BIT-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; NO-TRAP-BIT-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; NO-TRAP-BIT-GISEL-NEXT:    buffer_load_dword v0, off, s[0:3], 0 glc
+; NO-TRAP-BIT-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; NO-TRAP-BIT-GISEL-NEXT:    v_readfirstlane_b32 s4, v0
+; NO-TRAP-BIT-GISEL-NEXT:    s_cmp_eq_u32 s4, -1
+; NO-TRAP-BIT-GISEL-NEXT:    s_cbranch_scc0 .LBB4_3
+; NO-TRAP-BIT-GISEL-NEXT:  .LBB4_1: ; %ret
+; NO-TRAP-BIT-GISEL-NEXT:    v_mov_b32_e32 v0, 3
+; NO-TRAP-BIT-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; NO-TRAP-BIT-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; NO-TRAP-BIT-GISEL-NEXT:  .LBB4_2:
+; NO-TRAP-BIT-GISEL-NEXT:    s_endpgm
+; NO-TRAP-BIT-GISEL-NEXT:  .LBB4_3: ; %trap
+; NO-TRAP-BIT-GISEL-NEXT:    s_cbranch_execnz .LBB4_2
+; NO-TRAP-BIT-GISEL-NEXT:  ; %bb.4: ; %trap
+; NO-TRAP-BIT-GISEL-NEXT:    v_mov_b32_e32 v0, 0x4d2
+; NO-TRAP-BIT-GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; NO-TRAP-BIT-GISEL-NEXT:    s_mov_b32 m0, -1
+; NO-TRAP-BIT-GISEL-NEXT:    ds_write_b32 v1, v0
+; NO-TRAP-BIT-GISEL-NEXT:    s_branch .LBB4_1
+;
+; GCN-WARNING-SDAG-LABEL: non_entry_trap_no_unreachable:
+; GCN-WARNING-SDAG:       ; %bb.0: ; %entry
+; GCN-WARNING-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-WARNING-SDAG-NEXT:    s_mov_b32 s3, 0xf000
+; GCN-WARNING-SDAG-NEXT:    s_mov_b32 s2, -1
+; GCN-WARNING-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-WARNING-SDAG-NEXT:    buffer_load_dword v0, off, s[0:3], 0 glc
+; GCN-WARNING-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GCN-WARNING-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, -1, v0
+; GCN-WARNING-SDAG-NEXT:    s_cbranch_vccz .LBB4_3
+; GCN-WARNING-SDAG-NEXT:  .LBB4_1: ; %ret
+; GCN-WARNING-SDAG-NEXT:    v_mov_b32_e32 v0, 3
+; GCN-WARNING-SDAG-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GCN-WARNING-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GCN-WARNING-SDAG-NEXT:  .LBB4_2:
+; GCN-WARNING-SDAG-NEXT:    s_endpgm
+; GCN-WARNING-SDAG-NEXT:  .LBB4_3: ; %trap
+; GCN-WARNING-SDAG-NEXT:    s_mov_b32 m0, -1
+; GCN-WARNING-SDAG-NEXT:    s_cbranch_execnz .LBB4_2
+; GCN-WARNING-SDAG-NEXT:  ; %bb.4: ; %trap
+; GCN-WARNING-SDAG-NEXT:    v_mov_b32_e32 v0, 0x4d2
+; GCN-WARNING-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GCN-WARNING-SDAG-NEXT:    ds_write_b32 v1, v0
+; GCN-WARNING-SDAG-NEXT:    s_branch .LBB4_1
+;
+; GCN-WARNING-GISEL-LABEL: non_entry_trap_no_unreachable:
+; GCN-WARNING-GISEL:       ; %bb.0: ; %entry
+; GCN-WARNING-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-WARNING-GISEL-NEXT:    s_mov_b32 s2, -1
+; GCN-WARNING-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; GCN-WARNING-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-WARNING-GISEL-NEXT:    buffer_load_dword v0, off, s[0:3], 0 glc
+; GCN-WARNING-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GCN-WARNING-GISEL-NEXT:    v_readfirstlane_b32 s4, v0
+; GCN-WARNING-GISEL-NEXT:    s_cmp_eq_u32 s4, -1
+; GCN-WARNING-GISEL-NEXT:    s_cbranch_scc0 .LBB4_3
+; GCN-WARNING-GISEL-NEXT:  .LBB4_1: ; %ret
+; GCN-WARNING-GISEL-NEXT:    v_mov_b32_e32 v0, 3
+; GCN-WARNING-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GCN-WARNING-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GCN-WARNING-GISEL-NEXT:  .LBB4_2:
+; GCN-WARNING-GISEL-NEXT:    s_endpgm
+; GCN-WARNING-GISEL-NEXT:  .LBB4_3: ; %trap
+; GCN-WARNING-GISEL-NEXT:    s_cbranch_execnz .LBB4_2
+; GCN-WARNING-GISEL-NEXT:  ; %bb.4: ; %trap
+; GCN-WARNING-GISEL-NEXT:    v_mov_b32_e32 v0, 0x4d2
+; GCN-WARNING-GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GCN-WARNING-GISEL-NEXT:    s_mov_b32 m0, -1
+; GCN-WARNING-GISEL-NEXT:    ds_write_b32 v1, v0
+; GCN-WARNING-GISEL-NEXT:    s_branch .LBB4_1
 entry:
   %tmp29 = load volatile i32, ptr addrspace(1) %arg0
   %cmp = icmp eq i32 %tmp29, -1
@@ -147,3 +2522,13 @@ attributes #1 = { nounwind }
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 400}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GCN: {{.*}}
+; MESA-TRAP-GISEL: {{.*}}
+; MESA-TRAP-SDAG: {{.*}}
+; MESA-WARNING-GISEL: {{.*}}
+; MESA-WARNING-SDAG: {{.*}}
+; NO-MESA-TRAP-GISEL: {{.*}}
+; NO-MESA-TRAP-SDAG: {{.*}}
+; NOMESA-TRAP-GISEL: {{.*}}
+; NOMESA-TRAP-SDAG: {{.*}}



More information about the llvm-commits mailing list