[llvm] 4e4c351 - AMDGPU: Avoid endpgm in middle of block for fallback trap lowering.

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 9 18:04:56 PDT 2023


Author: Matt Arsenault
Date: 2023-06-09T21:04:38-04:00
New Revision: 4e4c351ae5b81774423a6ed6eb90abf4b82eff90

URL: https://github.com/llvm/llvm-project/commit/4e4c351ae5b81774423a6ed6eb90abf4b82eff90
DIFF: https://github.com/llvm/llvm-project/commit/4e4c351ae5b81774423a6ed6eb90abf4b82eff90.diff

LOG: AMDGPU: Avoid endpgm in middle of block for fallback trap lowering.

This was inserting an s_endpgm in the middle of the block when it has
to be a terminator. Split the block and insert a branch to a new block
with the trap if it's not in a terminator position.

Fixes verifier error on LDS in function with no trap support (and
other trap sources).

Added: 
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trap.mir

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
    llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
    llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
    llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
    llvm/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/lib/Target/AMDGPU/SIInstructions.td
    llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll
    llvm/test/CodeGen/AMDGPU/trap.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 09d9430aea49a..a00a24f55662f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -4654,6 +4654,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(RET_GLUE)
   NODE_NAME_CASE(RETURN_TO_EPILOG)
   NODE_NAME_CASE(ENDPGM)
+  NODE_NAME_CASE(ENDPGM_TRAP)
   NODE_NAME_CASE(DWORDADDR)
   NODE_NAME_CASE(FRACT)
   NODE_NAME_CASE(SETCC)

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index c2d6a225a2618..aed99b2f7d08e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -386,6 +386,9 @@ enum NodeType : unsigned {
   // A uniform kernel return that terminates the wavefront.
   ENDPGM,
 
+  // s_endpgm, but we may want to insert it in the middle of the block.
+  ENDPGM_TRAP,
+
   // Return to a shader part's epilog code.
   RETURN_TO_EPILOG,
 

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index 8dcccdde3b290..9c01cb64b5397 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -39,6 +39,7 @@ def AMDGPUFmasOp : SDTypeProfile<1, 4,
   [SDTCisFP<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<4>]
 >;
 
+def ImmOp : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
 def AMDGPUKillSDT : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
 
 def AMDGPUIfOp : SDTypeProfile<1, 2,
@@ -352,6 +353,8 @@ def IL_brcond      : SDNode<"AMDGPUISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChai
 //===----------------------------------------------------------------------===//
 def AMDGPUendpgm : SDNode<"AMDGPUISD::ENDPGM", SDTNone,
     [SDNPHasChain, SDNPOptInGlue]>;
+def AMDGPUendpgm_trap : SDNode<"AMDGPUISD::ENDPGM_TRAP", SDTNone,
+    [SDNPHasChain]>;
 
 def AMDGPUreturn_to_epilog : SDNode<"AMDGPUISD::RETURN_TO_EPILOG", SDTNone,
     [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index de5778ff50172..321febadab7b2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -5661,7 +5661,29 @@ bool AMDGPULegalizerInfo::legalizeTrapIntrinsic(MachineInstr &MI,
 
 bool AMDGPULegalizerInfo::legalizeTrapEndpgm(
     MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const {
-  B.buildInstr(AMDGPU::S_ENDPGM).addImm(0);
+  const DebugLoc &DL = MI.getDebugLoc();
+  MachineBasicBlock &BB = B.getMBB();
+  MachineFunction *MF = BB.getParent();
+
+  if (BB.succ_empty() && std::next(MI.getIterator()) == BB.end()) {
+    BuildMI(BB, BB.end(), DL, B.getTII().get(AMDGPU::S_ENDPGM))
+      .addImm(0);
+    MI.eraseFromParent();
+    return true;
+  }
+
+  // We need a block split to make the real endpgm a terminator. We also don't
+  // want to break phis in successor blocks, so we can't just delete to the
+  // end of the block.
+  BB.splitAt(MI, false /*UpdateLiveIns*/);
+  MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
+  MF->push_back(TrapBB);
+  BuildMI(*TrapBB, TrapBB->end(), DL, B.getTII().get(AMDGPU::S_ENDPGM))
+    .addImm(0);
+  BuildMI(BB, &MI, DL, B.getTII().get(AMDGPU::S_CBRANCH_EXECNZ))
+    .addMBB(TrapBB);
+
+  BB.addSuccessor(TrapBB);
   MI.eraseFromParent();
   return true;
 }

diff  --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 8c24789d53c8b..0b05f96e98084 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -4549,6 +4549,30 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
     MI.eraseFromParent();
     return BB;
   }
+  case AMDGPU::ENDPGM_TRAP: {
+    const DebugLoc &DL = MI.getDebugLoc();
+    if (BB->succ_empty() && std::next(MI.getIterator()) == BB->end()) {
+      MI.setDesc(TII->get(AMDGPU::S_ENDPGM));
+      MI.addOperand(MachineOperand::CreateImm(0));
+      return BB;
+    }
+
+    // We need a block split to make the real endpgm a terminator. We also don't
+    // want to break phis in successor blocks, so we can't just delete to the
+    // end of the block.
+
+    MachineBasicBlock *SplitBB = BB->splitAt(MI, false /*UpdateLiveIns*/);
+    MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
+    MF->push_back(TrapBB);
+    BuildMI(*TrapBB, TrapBB->end(), DL, TII->get(AMDGPU::S_ENDPGM))
+      .addImm(0);
+    BuildMI(*BB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
+      .addMBB(TrapBB);
+
+    BB->addSuccessor(TrapBB);
+    MI.eraseFromParent();
+    return SplitBB;
+  }
   default:
     return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
   }
@@ -5572,7 +5596,7 @@ SDValue SITargetLowering::lowerTrapEndpgm(
     SDValue Op, SelectionDAG &DAG) const {
   SDLoc SL(Op);
   SDValue Chain = Op.getOperand(0);
-  return DAG.getNode(AMDGPUISD::ENDPGM, SL, MVT::Other, Chain);
+  return DAG.getNode(AMDGPUISD::ENDPGM_TRAP, SL, MVT::Other, Chain);
 }
 
 SDValue SITargetLowering::loadImplicitKernelArgument(SelectionDAG &DAG, MVT VT,

diff  --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index f9eba91d22b79..5ffa474857781 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -95,6 +95,16 @@ defm V_INTERP_MOV_F32 : VINTRP_m <
 //===----------------------------------------------------------------------===//
 // Pseudo Instructions
 //===----------------------------------------------------------------------===//
+
+// Insert a branch to an endpgm block to use as a fallback trap.
+def ENDPGM_TRAP : SPseudoInstSI<
+  (outs), (ins),
+  [(AMDGPUendpgm_trap)],
+  "ENDPGM_TRAP"> {
+  let hasSideEffects = 1;
+  let usesCustomInserter = 1;
+}
+
 def ATOMIC_FENCE : SPseudoInstSI<
   (outs), (ins i32imm:$ordering, i32imm:$scope),
   [(atomic_fence (i32 timm:$ordering), (i32 timm:$scope))],

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trap.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trap.mir
new file mode 100644
index 0000000000000..4dc8514a81b87
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trap.mir
@@ -0,0 +1,64 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -o - %s | FileCheck -check-prefix=GCN %s
+
+# Check edge cases for trap legalization
+
+---
+name: test_fallthrough_after_trap
+body: |
+  ; GCN-LABEL: name: test_fallthrough_after_trap
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.1(0x80000000), %bb.2(0x00000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
+  ; GCN-NEXT:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: {{$}}
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   G_STORE [[C]](s32), [[C1]](p1) :: (store (s8), addrspace 1)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   S_ENDPGM 0
+  bb.0:
+    %0:_(s8) = G_CONSTANT i8 0
+    %1:_(p1) = G_CONSTANT i64 0
+    G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.trap)
+
+  bb.1:
+    G_STORE %0, %1 :: (store 1, addrspace 1)
+
+...
+
+---
+name: test_def_fallthrough_after_trap
+body: |
+  ; GCN-LABEL: name: test_def_fallthrough_after_trap
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; GCN-NEXT:   S_CBRANCH_EXECNZ %bb.3, implicit $exec
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   successors: %bb.1(0x80000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: {{$}}
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   G_STORE [[C]](s32), [[C1]](p1) :: (store (s8), addrspace 1)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.3:
+  ; GCN-NEXT:   S_ENDPGM 0
+  bb.0:
+    %0:_(s8) = G_CONSTANT i8 0
+    G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.trap)
+    %1:_(p1) = G_CONSTANT i64 0
+
+  bb.1:
+    G_STORE %0, %1 :: (store 1, addrspace 1)
+
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll b/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll
index d7f82ce8a9911..7ba4b314ebdb8 100644
--- a/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll
+++ b/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll
@@ -11,6 +11,17 @@
 ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - -amdgpu-enable-lower-module-lds=false %s 2> %t | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
 ; RUN: FileCheck -check-prefix=ERR %s < %t
 
+; Test there's no verifier error if a function directly uses LDS and
+; we emit a trap. The s_endpgm needs to be emitted in a terminator
+; position.
+
+; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s 2> %t | FileCheck -check-prefixes=CHECK,SDAG %s
+; RUN: FileCheck -check-prefix=ERR %s < %t
+
+; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s 2> %t | FileCheck -check-prefixes=CHECK,GISEL %s
+; RUN: FileCheck -check-prefix=ERR %s < %t
+
+
 @lds = internal addrspace(3) global float poison, align 4
 
 ; FIXME: The DAG should probably move the trap before the access.
@@ -56,6 +67,30 @@ define void @func_use_lds_global() {
 ; GFX9-GISEL-NEXT:    ds_write_b32 v0, v0
 ; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG-LABEL: func_use_lds_global:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-NEXT:    ds_write_b32 v0, v0
+; SDAG-NEXT:    s_cbranch_execnz .LBB0_2
+; SDAG-NEXT:  ; %bb.1:
+; SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-NEXT:  .LBB0_2:
+; SDAG-NEXT:    s_endpgm
+;
+; GISEL-LABEL: func_use_lds_global:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    s_cbranch_execnz .LBB0_2
+; GISEL-NEXT:  ; %bb.1:
+; GISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GISEL-NEXT:    ds_write_b32 v0, v0
+; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GISEL-NEXT:  .LBB0_2:
+; GISEL-NEXT:    s_endpgm
   store volatile float 0.0, ptr addrspace(3) @lds, align 4
   ret void
 }
@@ -91,9 +126,467 @@ define void @func_use_lds_global_constexpr_cast() {
 ; GFX9-GISEL-NEXT:    global_store_dword v[0:1], v0, off
 ; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG-LABEL: func_use_lds_global_constexpr_cast:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    s_cbranch_execnz .LBB1_2
+; SDAG-NEXT:  ; %bb.1:
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-NEXT:  .LBB1_2:
+; SDAG-NEXT:    s_endpgm
+;
+; GISEL-LABEL: func_use_lds_global_constexpr_cast:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    s_cbranch_execnz .LBB1_2
+; GISEL-NEXT:  ; %bb.1:
+; GISEL-NEXT:    global_store_dword v[0:1], v0, off
+; GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GISEL-NEXT:  .LBB1_2:
+; GISEL-NEXT:    s_endpgm
   store volatile i32 ptrtoint (ptr addrspace(3) @lds to i32), ptr addrspace(1) poison, align 4
   ret void
 }
+
+; ERR: warning: <unknown>:0:0: in function func_uses_lds_multi void (i1): local memory global used by non-kernel function
+define void @func_uses_lds_multi(i1 %cond) {
+; GFX8-SDAG-LABEL: func_uses_lds_multi:
+; GFX8-SDAG:       ; %bb.0: ; %entry
+; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX8-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
+; GFX8-SDAG-NEXT:    s_xor_b64 s[4:5], vcc, -1
+; GFX8-SDAG-NEXT:    s_mov_b32 m0, -1
+; GFX8-SDAG-NEXT:    s_and_saveexec_b64 s[8:9], s[4:5]
+; GFX8-SDAG-NEXT:    s_xor_b64 s[4:5], exec, s[8:9]
+; GFX8-SDAG-NEXT:    s_cbranch_execz .LBB2_2
+; GFX8-SDAG-NEXT:  ; %bb.1: ; %bb1
+; GFX8-SDAG-NEXT:    v_mov_b32_e32 v0, 1
+; GFX8-SDAG-NEXT:    ds_write_b32 v0, v0
+; GFX8-SDAG-NEXT:    s_mov_b64 s[0:1], s[6:7]
+; GFX8-SDAG-NEXT:    s_trap 2
+; GFX8-SDAG-NEXT:  .LBB2_2: ; %Flow
+; GFX8-SDAG-NEXT:    s_andn2_saveexec_b64 s[4:5], s[4:5]
+; GFX8-SDAG-NEXT:    s_cbranch_execz .LBB2_4
+; GFX8-SDAG-NEXT:  ; %bb.3: ; %bb0
+; GFX8-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX8-SDAG-NEXT:    ds_write_b32 v0, v0
+; GFX8-SDAG-NEXT:    s_mov_b64 s[0:1], s[6:7]
+; GFX8-SDAG-NEXT:    s_trap 2
+; GFX8-SDAG-NEXT:  .LBB2_4: ; %ret
+; GFX8-SDAG-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GFX8-SDAG-NEXT:    v_mov_b32_e32 v0, 2
+; GFX8-SDAG-NEXT:    ds_write_b32 v0, v0
+; GFX8-SDAG-NEXT:    s_mov_b64 s[0:1], s[6:7]
+; GFX8-SDAG-NEXT:    s_trap 2
+; GFX8-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: func_uses_lds_multi:
+; GFX8-GISEL:       ; %bb.0: ; %entry
+; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX8-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; GFX8-GISEL-NEXT:    s_xor_b64 s[4:5], vcc, -1
+; GFX8-GISEL-NEXT:    s_and_saveexec_b64 s[8:9], s[4:5]
+; GFX8-GISEL-NEXT:    s_xor_b64 s[4:5], exec, s[8:9]
+; GFX8-GISEL-NEXT:    s_cbranch_execz .LBB2_2
+; GFX8-GISEL-NEXT:  ; %bb.1: ; %bb1
+; GFX8-GISEL-NEXT:    v_mov_b32_e32 v0, 1
+; GFX8-GISEL-NEXT:    s_mov_b32 m0, -1
+; GFX8-GISEL-NEXT:    s_mov_b64 s[0:1], s[6:7]
+; GFX8-GISEL-NEXT:    s_trap 2
+; GFX8-GISEL-NEXT:    ds_write_b32 v0, v0
+; GFX8-GISEL-NEXT:  .LBB2_2: ; %Flow
+; GFX8-GISEL-NEXT:    s_andn2_saveexec_b64 s[4:5], s[4:5]
+; GFX8-GISEL-NEXT:    s_cbranch_execz .LBB2_4
+; GFX8-GISEL-NEXT:  ; %bb.3: ; %bb0
+; GFX8-GISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX8-GISEL-NEXT:    s_mov_b32 m0, -1
+; GFX8-GISEL-NEXT:    s_mov_b64 s[0:1], s[6:7]
+; GFX8-GISEL-NEXT:    s_trap 2
+; GFX8-GISEL-NEXT:    ds_write_b32 v0, v0
+; GFX8-GISEL-NEXT:  .LBB2_4: ; %ret
+; GFX8-GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GFX8-GISEL-NEXT:    v_mov_b32_e32 v0, 2
+; GFX8-GISEL-NEXT:    s_mov_b32 m0, -1
+; GFX8-GISEL-NEXT:    s_mov_b64 s[0:1], s[6:7]
+; GFX8-GISEL-NEXT:    s_trap 2
+; GFX8-GISEL-NEXT:    ds_write_b32 v0, v0
+; GFX8-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: func_uses_lds_multi:
+; GFX9-SDAG:       ; %bb.0: ; %entry
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX9-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
+; GFX9-SDAG-NEXT:    s_xor_b64 s[4:5], vcc, -1
+; GFX9-SDAG-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
+; GFX9-SDAG-NEXT:    s_xor_b64 s[4:5], exec, s[6:7]
+; GFX9-SDAG-NEXT:    s_cbranch_execz .LBB2_2
+; GFX9-SDAG-NEXT:  ; %bb.1: ; %bb1
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, 1
+; GFX9-SDAG-NEXT:    ds_write_b32 v0, v0
+; GFX9-SDAG-NEXT:    s_trap 2
+; GFX9-SDAG-NEXT:  .LBB2_2: ; %Flow
+; GFX9-SDAG-NEXT:    s_andn2_saveexec_b64 s[4:5], s[4:5]
+; GFX9-SDAG-NEXT:    s_cbranch_execz .LBB2_4
+; GFX9-SDAG-NEXT:  ; %bb.3: ; %bb0
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-SDAG-NEXT:    ds_write_b32 v0, v0
+; GFX9-SDAG-NEXT:    s_trap 2
+; GFX9-SDAG-NEXT:  .LBB2_4: ; %ret
+; GFX9-SDAG-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, 2
+; GFX9-SDAG-NEXT:    ds_write_b32 v0, v0
+; GFX9-SDAG-NEXT:    s_trap 2
+; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: func_uses_lds_multi:
+; GFX9-GISEL:       ; %bb.0: ; %entry
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX9-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; GFX9-GISEL-NEXT:    s_xor_b64 s[4:5], vcc, -1
+; GFX9-GISEL-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
+; GFX9-GISEL-NEXT:    s_xor_b64 s[4:5], exec, s[6:7]
+; GFX9-GISEL-NEXT:    s_cbranch_execz .LBB2_2
+; GFX9-GISEL-NEXT:  ; %bb.1: ; %bb1
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, 1
+; GFX9-GISEL-NEXT:    s_trap 2
+; GFX9-GISEL-NEXT:    ds_write_b32 v0, v0
+; GFX9-GISEL-NEXT:  .LBB2_2: ; %Flow
+; GFX9-GISEL-NEXT:    s_andn2_saveexec_b64 s[4:5], s[4:5]
+; GFX9-GISEL-NEXT:    s_cbranch_execz .LBB2_4
+; GFX9-GISEL-NEXT:  ; %bb.3: ; %bb0
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GISEL-NEXT:    s_trap 2
+; GFX9-GISEL-NEXT:    ds_write_b32 v0, v0
+; GFX9-GISEL-NEXT:  .LBB2_4: ; %ret
+; GFX9-GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, 2
+; GFX9-GISEL-NEXT:    s_trap 2
+; GFX9-GISEL-NEXT:    ds_write_b32 v0, v0
+; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG-LABEL: func_uses_lds_multi:
+; SDAG:       ; %bb.0: ; %entry
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_and_b32_e32 v0, 1, v0
+; SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
+; SDAG-NEXT:    s_xor_b64 s[4:5], vcc, -1
+; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
+; SDAG-NEXT:    s_xor_b64 s[4:5], exec, s[6:7]
+; SDAG-NEXT:    s_cbranch_execz .LBB2_2
+; SDAG-NEXT:  ; %bb.1: ; %bb1
+; SDAG-NEXT:    v_mov_b32_e32 v0, 1
+; SDAG-NEXT:    ds_write_b32 v0, v0
+; SDAG-NEXT:    s_cbranch_execnz .LBB2_6
+; SDAG-NEXT:  .LBB2_2: ; %Flow
+; SDAG-NEXT:    s_andn2_saveexec_b64 s[4:5], s[4:5]
+; SDAG-NEXT:    s_cbranch_execz .LBB2_4
+; SDAG-NEXT:  ; %bb.3: ; %bb0
+; SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-NEXT:    ds_write_b32 v0, v0
+; SDAG-NEXT:    s_cbranch_execnz .LBB2_6
+; SDAG-NEXT:  .LBB2_4: ; %ret
+; SDAG-NEXT:    s_or_b64 exec, exec, s[4:5]
+; SDAG-NEXT:    v_mov_b32_e32 v0, 2
+; SDAG-NEXT:    ds_write_b32 v0, v0
+; SDAG-NEXT:    s_cbranch_execnz .LBB2_6
+; SDAG-NEXT:  ; %bb.5: ; %ret
+; SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-NEXT:  .LBB2_6:
+; SDAG-NEXT:    s_endpgm
+;
+; GISEL-LABEL: func_uses_lds_multi:
+; GISEL:       ; %bb.0: ; %entry
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_and_b32_e32 v0, 1, v0
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; GISEL-NEXT:    s_xor_b64 s[4:5], vcc, -1
+; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
+; GISEL-NEXT:    s_xor_b64 s[4:5], exec, s[6:7]
+; GISEL-NEXT:    s_cbranch_execz .LBB2_3
+; GISEL-NEXT:  ; %bb.1: ; %bb1
+; GISEL-NEXT:    s_cbranch_execnz .LBB2_8
+; GISEL-NEXT:  ; %bb.2: ; %bb1
+; GISEL-NEXT:    v_mov_b32_e32 v0, 1
+; GISEL-NEXT:    ds_write_b32 v0, v0
+; GISEL-NEXT:  .LBB2_3: ; %Flow
+; GISEL-NEXT:    s_andn2_saveexec_b64 s[4:5], s[4:5]
+; GISEL-NEXT:    s_cbranch_execz .LBB2_6
+; GISEL-NEXT:  ; %bb.4: ; %bb0
+; GISEL-NEXT:    s_cbranch_execnz .LBB2_8
+; GISEL-NEXT:  ; %bb.5: ; %bb0
+; GISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GISEL-NEXT:    ds_write_b32 v0, v0
+; GISEL-NEXT:  .LBB2_6: ; %ret
+; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GISEL-NEXT:    s_cbranch_execnz .LBB2_8
+; GISEL-NEXT:  ; %bb.7: ; %ret
+; GISEL-NEXT:    v_mov_b32_e32 v0, 2
+; GISEL-NEXT:    ds_write_b32 v0, v0
+; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GISEL-NEXT:  .LBB2_8:
+; GISEL-NEXT:    s_endpgm
+entry:
+  br i1 %cond, label %bb0, label %bb1
+
+bb0:
+  store volatile i32 0, ptr addrspace(3) @lds, align 4
+  br label %ret
+
+bb1:
+  store volatile i32 1, ptr addrspace(3) @lds, align 4
+  br label %ret
+
+ret:
+  store volatile i32 2, ptr addrspace(3) @lds, align 4
+  ret void
+}
+
+; ERR: warning: <unknown>:0:0: in function func_uses_lds_code_after void (ptr addrspace(1)): local memory global used by non-kernel function
+define void @func_uses_lds_code_after(ptr addrspace(1) %ptr) {
+; GFX8-SDAG-LABEL: func_uses_lds_code_after:
+; GFX8-SDAG:       ; %bb.0:
+; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT:    v_mov_b32_e32 v2, 0
+; GFX8-SDAG-NEXT:    s_mov_b32 m0, -1
+; GFX8-SDAG-NEXT:    ds_write_b32 v0, v2
+; GFX8-SDAG-NEXT:    v_mov_b32_e32 v2, 1
+; GFX8-SDAG-NEXT:    s_mov_b64 s[0:1], s[6:7]
+; GFX8-SDAG-NEXT:    s_trap 2
+; GFX8-SDAG-NEXT:    flat_store_dword v[0:1], v2
+; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: func_uses_lds_code_after:
+; GFX8-GISEL:       ; %bb.0:
+; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT:    v_mov_b32_e32 v2, 0
+; GFX8-GISEL-NEXT:    s_mov_b32 m0, -1
+; GFX8-GISEL-NEXT:    s_mov_b64 s[0:1], s[6:7]
+; GFX8-GISEL-NEXT:    s_trap 2
+; GFX8-GISEL-NEXT:    ds_write_b32 v0, v2
+; GFX8-GISEL-NEXT:    v_mov_b32_e32 v2, 1
+; GFX8-GISEL-NEXT:    flat_store_dword v[0:1], v2
+; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: func_uses_lds_code_after:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-SDAG-NEXT:    ds_write_b32 v0, v2
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, 1
+; GFX9-SDAG-NEXT:    s_trap 2
+; GFX9-SDAG-NEXT:    global_store_dword v[0:1], v2, off
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: func_uses_lds_code_after:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-GISEL-NEXT:    s_trap 2
+; GFX9-GISEL-NEXT:    ds_write_b32 v0, v2
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v2, 1
+; GFX9-GISEL-NEXT:    global_store_dword v[0:1], v2, off
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG-LABEL: func_uses_lds_code_after:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_mov_b32_e32 v2, 0
+; SDAG-NEXT:    ds_write_b32 v0, v2
+; SDAG-NEXT:    s_cbranch_execnz .LBB3_2
+; SDAG-NEXT:  ; %bb.1:
+; SDAG-NEXT:    v_mov_b32_e32 v2, 1
+; SDAG-NEXT:    global_store_dword v[0:1], v2, off
+; SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-NEXT:  .LBB3_2:
+; SDAG-NEXT:    s_endpgm
+;
+; GISEL-LABEL: func_uses_lds_code_after:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    s_cbranch_execnz .LBB3_2
+; GISEL-NEXT:  ; %bb.1:
+; GISEL-NEXT:    v_mov_b32_e32 v2, 0
+; GISEL-NEXT:    ds_write_b32 v0, v2
+; GISEL-NEXT:    v_mov_b32_e32 v2, 1
+; GISEL-NEXT:    global_store_dword v[0:1], v2, off
+; GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GISEL-NEXT:  .LBB3_2:
+; GISEL-NEXT:    s_endpgm
+  store volatile i32 0, ptr addrspace(3) @lds, align 4
+  store volatile i32 1, ptr addrspace(1) %ptr, align 4
+  ret void
+}
+
+; ERR: warning: <unknown>:0:0: in function func_uses_lds_phi_after i32 (i1, ptr addrspace(1)): local memory global used by non-kernel function
+define i32 @func_uses_lds_phi_after(i1 %cond, ptr addrspace(1) %ptr) {
+; GFX8-SDAG-LABEL: func_uses_lds_phi_after:
+; GFX8-SDAG:       ; %bb.0: ; %entry
+; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT:    v_mov_b32_e32 v3, v0
+; GFX8-SDAG-NEXT:    flat_load_dword v0, v[1:2] glc
+; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX8-SDAG-NEXT:    v_and_b32_e32 v3, 1, v3
+; GFX8-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v3
+; GFX8-SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; GFX8-SDAG-NEXT:    s_cbranch_execz .LBB4_2
+; GFX8-SDAG-NEXT:  ; %bb.1: ; %use.bb
+; GFX8-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX8-SDAG-NEXT:    s_mov_b32 m0, -1
+; GFX8-SDAG-NEXT:    ds_write_b32 v0, v0
+; GFX8-SDAG-NEXT:    s_mov_b64 s[0:1], s[6:7]
+; GFX8-SDAG-NEXT:    s_trap 2
+; GFX8-SDAG-NEXT:    flat_load_dword v0, v[1:2] glc
+; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX8-SDAG-NEXT:  .LBB4_2: ; %ret
+; GFX8-SDAG-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: func_uses_lds_phi_after:
+; GFX8-GISEL:       ; %bb.0: ; %entry
+; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT:    v_mov_b32_e32 v3, v0
+; GFX8-GISEL-NEXT:    flat_load_dword v0, v[1:2] glc
+; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v3, 1, v3
+; GFX8-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
+; GFX8-GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; GFX8-GISEL-NEXT:    s_cbranch_execz .LBB4_2
+; GFX8-GISEL-NEXT:  ; %bb.1: ; %use.bb
+; GFX8-GISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX8-GISEL-NEXT:    s_mov_b32 m0, -1
+; GFX8-GISEL-NEXT:    s_mov_b64 s[0:1], s[6:7]
+; GFX8-GISEL-NEXT:    s_trap 2
+; GFX8-GISEL-NEXT:    ds_write_b32 v0, v0
+; GFX8-GISEL-NEXT:    flat_load_dword v0, v[1:2] glc
+; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT:  .LBB4_2: ; %ret
+; GFX8-GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: func_uses_lds_phi_after:
+; GFX9-SDAG:       ; %bb.0: ; %entry
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v3, v0
+; GFX9-SDAG-NEXT:    global_load_dword v0, v[1:2], off glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    v_and_b32_e32 v3, 1, v3
+; GFX9-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v3
+; GFX9-SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; GFX9-SDAG-NEXT:    s_cbranch_execz .LBB4_2
+; GFX9-SDAG-NEXT:  ; %bb.1: ; %use.bb
+; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-SDAG-NEXT:    ds_write_b32 v0, v0
+; GFX9-SDAG-NEXT:    s_trap 2
+; GFX9-SDAG-NEXT:    global_load_dword v0, v[1:2], off glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:  .LBB4_2: ; %ret
+; GFX9-SDAG-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: func_uses_lds_phi_after:
+; GFX9-GISEL:       ; %bb.0: ; %entry
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v3, v0
+; GFX9-GISEL-NEXT:    global_load_dword v0, v[1:2], off glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    v_and_b32_e32 v3, 1, v3
+; GFX9-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
+; GFX9-GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; GFX9-GISEL-NEXT:    s_cbranch_execz .LBB4_2
+; GFX9-GISEL-NEXT:  ; %bb.1: ; %use.bb
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-GISEL-NEXT:    s_trap 2
+; GFX9-GISEL-NEXT:    ds_write_b32 v0, v0
+; GFX9-GISEL-NEXT:    global_load_dword v0, v[1:2], off glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:  .LBB4_2: ; %ret
+; GFX9-GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG-LABEL: func_uses_lds_phi_after:
+; SDAG:       ; %bb.0: ; %entry
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_mov_b32_e32 v3, v0
+; SDAG-NEXT:    global_load_dword v0, v[1:2], off glc
+; SDAG-NEXT:    s_waitcnt vmcnt(0)
+; SDAG-NEXT:    v_and_b32_e32 v3, 1, v3
+; SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v3
+; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; SDAG-NEXT:    s_cbranch_execz .LBB4_3
+; SDAG-NEXT:  ; %bb.1: ; %use.bb
+; SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-NEXT:    ds_write_b32 v0, v0
+; SDAG-NEXT:    s_cbranch_execnz .LBB4_4
+; SDAG-NEXT:  ; %bb.2: ; %use.bb
+; SDAG-NEXT:    global_load_dword v0, v[1:2], off glc
+; SDAG-NEXT:    s_waitcnt vmcnt(0)
+; SDAG-NEXT:  .LBB4_3: ; %ret
+; SDAG-NEXT:    s_or_b64 exec, exec, s[4:5]
+; SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-NEXT:  .LBB4_4:
+; SDAG-NEXT:    s_endpgm
+;
+; GISEL-LABEL: func_uses_lds_phi_after:
+; GISEL:       ; %bb.0: ; %entry
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_mov_b32_e32 v3, v0
+; GISEL-NEXT:    global_load_dword v0, v[1:2], off glc
+; GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GISEL-NEXT:    v_and_b32_e32 v3, 1, v3
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
+; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; GISEL-NEXT:    s_cbranch_execz .LBB4_3
+; GISEL-NEXT:  ; %bb.1: ; %use.bb
+; GISEL-NEXT:    s_cbranch_execnz .LBB4_4
+; GISEL-NEXT:  ; %bb.2: ; %use.bb
+; GISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GISEL-NEXT:    ds_write_b32 v0, v0
+; GISEL-NEXT:    global_load_dword v0, v[1:2], off glc
+; GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GISEL-NEXT:  .LBB4_3: ; %ret
+; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GISEL-NEXT:  .LBB4_4:
+; GISEL-NEXT:    s_endpgm
+entry:
+  %entry.load = load volatile i32, ptr addrspace(1) %ptr
+  br i1 %cond, label %use.bb, label %ret
+
+use.bb:
+  store volatile i32 0, ptr addrspace(3) @lds, align 4
+  %use.bb.load = load volatile i32, ptr addrspace(1) %ptr
+  br label %ret
+
+ret:
+  %phi = phi i32 [ %entry.load, %entry ], [ %use.bb.load, %use.bb ]
+  ret i32 %phi
+}
+
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
 ; GFX8: {{.*}}
 ; GFX9: {{.*}}

diff  --git a/llvm/test/CodeGen/AMDGPU/trap.ll b/llvm/test/CodeGen/AMDGPU/trap.ll
index e4cd18a0f424b..7b7a223579fb7 100644
--- a/llvm/test/CodeGen/AMDGPU/trap.ll
+++ b/llvm/test/CodeGen/AMDGPU/trap.ll
@@ -121,6 +121,29 @@ ret:
   ret void
 }
 
+; GCN-LABEL: {{^}}non_entry_trap_no_unreachable:
+; TRAP-BIT: enable_trap_handler = 1
+; NO-TRAP-BIT: enable_trap_handler = 0
+
+; HSA-TRAP: BB{{[0-9]_[0-9]+}}: ; %trap
+; HSA-TRAP: s_mov_b64 s[0:1], s[4:5]
+; HSA-TRAP-NEXT: s_trap 2
+define amdgpu_kernel void @non_entry_trap_no_unreachable(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr {
+entry:
+  %tmp29 = load volatile i32, ptr addrspace(1) %arg0
+  %cmp = icmp eq i32 %tmp29, -1
+  br i1 %cmp, label %ret, label %trap
+
+trap:
+  call void @llvm.trap()
+  store volatile i32 1234, ptr addrspace(3) null
+  br label %ret
+
+ret:
+  store volatile i32 3, ptr addrspace(1) %arg0
+  ret void
+}
+
 attributes #0 = { nounwind noreturn }
 attributes #1 = { nounwind }
 


        


More information about the llvm-commits mailing list