[llvm] 4e4c351 - AMDGPU: Avoid endpgm in middle of block for fallback trap lowering.
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 9 18:04:56 PDT 2023
Author: Matt Arsenault
Date: 2023-06-09T21:04:38-04:00
New Revision: 4e4c351ae5b81774423a6ed6eb90abf4b82eff90
URL: https://github.com/llvm/llvm-project/commit/4e4c351ae5b81774423a6ed6eb90abf4b82eff90
DIFF: https://github.com/llvm/llvm-project/commit/4e4c351ae5b81774423a6ed6eb90abf4b82eff90.diff
LOG: AMDGPU: Avoid endpgm in middle of block for fallback trap lowering.
This was inserting an s_endpgm in the middle of the block when it has
to be a terminator. Split the block and insert a branch to a new block
with the trap if it's not in a terminator position.
Fixes verifier error on LDS in function with no trap support (and
other trap sources).
Added:
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trap.mir
Modified:
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/lib/Target/AMDGPU/SIInstructions.td
llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll
llvm/test/CodeGen/AMDGPU/trap.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 09d9430aea49a..a00a24f55662f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -4654,6 +4654,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(RET_GLUE)
NODE_NAME_CASE(RETURN_TO_EPILOG)
NODE_NAME_CASE(ENDPGM)
+ NODE_NAME_CASE(ENDPGM_TRAP)
NODE_NAME_CASE(DWORDADDR)
NODE_NAME_CASE(FRACT)
NODE_NAME_CASE(SETCC)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index c2d6a225a2618..aed99b2f7d08e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -386,6 +386,9 @@ enum NodeType : unsigned {
// A uniform kernel return that terminates the wavefront.
ENDPGM,
+ // s_endpgm, but we may want to insert it in the middle of the block.
+ ENDPGM_TRAP,
+
// Return to a shader part's epilog code.
RETURN_TO_EPILOG,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index 8dcccdde3b290..9c01cb64b5397 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -39,6 +39,7 @@ def AMDGPUFmasOp : SDTypeProfile<1, 4,
[SDTCisFP<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<4>]
>;
+def ImmOp : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def AMDGPUKillSDT : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def AMDGPUIfOp : SDTypeProfile<1, 2,
@@ -352,6 +353,8 @@ def IL_brcond : SDNode<"AMDGPUISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChai
//===----------------------------------------------------------------------===//
def AMDGPUendpgm : SDNode<"AMDGPUISD::ENDPGM", SDTNone,
[SDNPHasChain, SDNPOptInGlue]>;
+def AMDGPUendpgm_trap : SDNode<"AMDGPUISD::ENDPGM_TRAP", SDTNone,
+ [SDNPHasChain]>;
def AMDGPUreturn_to_epilog : SDNode<"AMDGPUISD::RETURN_TO_EPILOG", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index de5778ff50172..321febadab7b2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -5661,7 +5661,29 @@ bool AMDGPULegalizerInfo::legalizeTrapIntrinsic(MachineInstr &MI,
bool AMDGPULegalizerInfo::legalizeTrapEndpgm(
MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const {
- B.buildInstr(AMDGPU::S_ENDPGM).addImm(0);
+ const DebugLoc &DL = MI.getDebugLoc();
+ MachineBasicBlock &BB = B.getMBB();
+ MachineFunction *MF = BB.getParent();
+
+ if (BB.succ_empty() && std::next(MI.getIterator()) == BB.end()) {
+ BuildMI(BB, BB.end(), DL, B.getTII().get(AMDGPU::S_ENDPGM))
+ .addImm(0);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ // We need a block split to make the real endpgm a terminator. We also don't
+ // want to break phis in successor blocks, so we can't just delete to the
+ // end of the block.
+ BB.splitAt(MI, false /*UpdateLiveIns*/);
+ MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
+ MF->push_back(TrapBB);
+ BuildMI(*TrapBB, TrapBB->end(), DL, B.getTII().get(AMDGPU::S_ENDPGM))
+ .addImm(0);
+ BuildMI(BB, &MI, DL, B.getTII().get(AMDGPU::S_CBRANCH_EXECNZ))
+ .addMBB(TrapBB);
+
+ BB.addSuccessor(TrapBB);
MI.eraseFromParent();
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 8c24789d53c8b..0b05f96e98084 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -4549,6 +4549,30 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
MI.eraseFromParent();
return BB;
}
+ case AMDGPU::ENDPGM_TRAP: {
+ const DebugLoc &DL = MI.getDebugLoc();
+ if (BB->succ_empty() && std::next(MI.getIterator()) == BB->end()) {
+ MI.setDesc(TII->get(AMDGPU::S_ENDPGM));
+ MI.addOperand(MachineOperand::CreateImm(0));
+ return BB;
+ }
+
+ // We need a block split to make the real endpgm a terminator. We also don't
+ // want to break phis in successor blocks, so we can't just delete to the
+ // end of the block.
+
+ MachineBasicBlock *SplitBB = BB->splitAt(MI, false /*UpdateLiveIns*/);
+ MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
+ MF->push_back(TrapBB);
+ BuildMI(*TrapBB, TrapBB->end(), DL, TII->get(AMDGPU::S_ENDPGM))
+ .addImm(0);
+ BuildMI(*BB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
+ .addMBB(TrapBB);
+
+ BB->addSuccessor(TrapBB);
+ MI.eraseFromParent();
+ return SplitBB;
+ }
default:
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
}
@@ -5572,7 +5596,7 @@ SDValue SITargetLowering::lowerTrapEndpgm(
SDValue Op, SelectionDAG &DAG) const {
SDLoc SL(Op);
SDValue Chain = Op.getOperand(0);
- return DAG.getNode(AMDGPUISD::ENDPGM, SL, MVT::Other, Chain);
+ return DAG.getNode(AMDGPUISD::ENDPGM_TRAP, SL, MVT::Other, Chain);
}
SDValue SITargetLowering::loadImplicitKernelArgument(SelectionDAG &DAG, MVT VT,
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index f9eba91d22b79..5ffa474857781 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -95,6 +95,16 @@ defm V_INTERP_MOV_F32 : VINTRP_m <
//===----------------------------------------------------------------------===//
// Pseudo Instructions
//===----------------------------------------------------------------------===//
+
+// Insert a branch to an endpgm block to use as a fallback trap.
+def ENDPGM_TRAP : SPseudoInstSI<
+ (outs), (ins),
+ [(AMDGPUendpgm_trap)],
+ "ENDPGM_TRAP"> {
+ let hasSideEffects = 1;
+ let usesCustomInserter = 1;
+}
+
def ATOMIC_FENCE : SPseudoInstSI<
(outs), (ins i32imm:$ordering, i32imm:$scope),
[(atomic_fence (i32 timm:$ordering), (i32 timm:$scope))],
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trap.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trap.mir
new file mode 100644
index 0000000000000..4dc8514a81b87
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trap.mir
@@ -0,0 +1,64 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -o - %s | FileCheck -check-prefix=GCN %s
+
+# Check edge cases for trap legalization
+
+---
+name: test_fallthrough_after_trap
+body: |
+ ; GCN-LABEL: name: test_fallthrough_after_trap
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.1(0x80000000), %bb.2(0x00000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GCN-NEXT: [[C1:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
+ ; GCN-NEXT: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: {{$}}
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: G_STORE [[C]](s32), [[C1]](p1) :: (store (s8), addrspace 1)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: S_ENDPGM 0
+ bb.0:
+ %0:_(s8) = G_CONSTANT i8 0
+ %1:_(p1) = G_CONSTANT i64 0
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.trap)
+
+ bb.1:
+ G_STORE %0, %1 :: (store 1, addrspace 1)
+
+...
+
+---
+name: test_def_fallthrough_after_trap
+body: |
+ ; GCN-LABEL: name: test_def_fallthrough_after_trap
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GCN-NEXT: S_CBRANCH_EXECNZ %bb.3, implicit $exec
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: successors: %bb.1(0x80000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[C1:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: {{$}}
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: G_STORE [[C]](s32), [[C1]](p1) :: (store (s8), addrspace 1)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.3:
+ ; GCN-NEXT: S_ENDPGM 0
+ bb.0:
+ %0:_(s8) = G_CONSTANT i8 0
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.trap)
+ %1:_(p1) = G_CONSTANT i64 0
+
+ bb.1:
+ G_STORE %0, %1 :: (store 1, addrspace 1)
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll b/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll
index d7f82ce8a9911..7ba4b314ebdb8 100644
--- a/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll
+++ b/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll
@@ -11,6 +11,17 @@
; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - -amdgpu-enable-lower-module-lds=false %s 2> %t | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
; RUN: FileCheck -check-prefix=ERR %s < %t
+; Test there's no verifier error if a function directly uses LDS and
+; we emit a trap. The s_endpgm needs to be emitted in a terminator
+; position.
+
+; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s 2> %t | FileCheck -check-prefixes=CHECK,SDAG %s
+; RUN: FileCheck -check-prefix=ERR %s < %t
+
+; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s 2> %t | FileCheck -check-prefixes=CHECK,GISEL %s
+; RUN: FileCheck -check-prefix=ERR %s < %t
+
+
@lds = internal addrspace(3) global float poison, align 4
; FIXME: The DAG should probably move the trap before the access.
@@ -56,6 +67,30 @@ define void @func_use_lds_global() {
; GFX9-GISEL-NEXT: ds_write_b32 v0, v0
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-LABEL: func_use_lds_global:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: ds_write_b32 v0, v0
+; SDAG-NEXT: s_cbranch_execnz .LBB0_2
+; SDAG-NEXT: ; %bb.1:
+; SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-NEXT: s_setpc_b64 s[30:31]
+; SDAG-NEXT: .LBB0_2:
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: func_use_lds_global:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: s_cbranch_execnz .LBB0_2
+; GISEL-NEXT: ; %bb.1:
+; GISEL-NEXT: v_mov_b32_e32 v0, 0
+; GISEL-NEXT: ds_write_b32 v0, v0
+; GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-NEXT: s_setpc_b64 s[30:31]
+; GISEL-NEXT: .LBB0_2:
+; GISEL-NEXT: s_endpgm
store volatile float 0.0, ptr addrspace(3) @lds, align 4
ret void
}
@@ -91,9 +126,467 @@ define void @func_use_lds_global_constexpr_cast() {
; GFX9-GISEL-NEXT: global_store_dword v[0:1], v0, off
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-LABEL: func_use_lds_global_constexpr_cast:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT: s_cbranch_execnz .LBB1_2
+; SDAG-NEXT: ; %bb.1:
+; SDAG-NEXT: s_setpc_b64 s[30:31]
+; SDAG-NEXT: .LBB1_2:
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: func_use_lds_global_constexpr_cast:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: s_cbranch_execnz .LBB1_2
+; GISEL-NEXT: ; %bb.1:
+; GISEL-NEXT: global_store_dword v[0:1], v0, off
+; GISEL-NEXT: s_waitcnt vmcnt(0)
+; GISEL-NEXT: s_setpc_b64 s[30:31]
+; GISEL-NEXT: .LBB1_2:
+; GISEL-NEXT: s_endpgm
store volatile i32 ptrtoint (ptr addrspace(3) @lds to i32), ptr addrspace(1) poison, align 4
ret void
}
+
+; ERR: warning: <unknown>:0:0: in function func_uses_lds_multi void (i1): local memory global used by non-kernel function
+define void @func_uses_lds_multi(i1 %cond) {
+; GFX8-SDAG-LABEL: func_uses_lds_multi:
+; GFX8-SDAG: ; %bb.0: ; %entry
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX8-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
+; GFX8-SDAG-NEXT: s_xor_b64 s[4:5], vcc, -1
+; GFX8-SDAG-NEXT: s_mov_b32 m0, -1
+; GFX8-SDAG-NEXT: s_and_saveexec_b64 s[8:9], s[4:5]
+; GFX8-SDAG-NEXT: s_xor_b64 s[4:5], exec, s[8:9]
+; GFX8-SDAG-NEXT: s_cbranch_execz .LBB2_2
+; GFX8-SDAG-NEXT: ; %bb.1: ; %bb1
+; GFX8-SDAG-NEXT: v_mov_b32_e32 v0, 1
+; GFX8-SDAG-NEXT: ds_write_b32 v0, v0
+; GFX8-SDAG-NEXT: s_mov_b64 s[0:1], s[6:7]
+; GFX8-SDAG-NEXT: s_trap 2
+; GFX8-SDAG-NEXT: .LBB2_2: ; %Flow
+; GFX8-SDAG-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
+; GFX8-SDAG-NEXT: s_cbranch_execz .LBB2_4
+; GFX8-SDAG-NEXT: ; %bb.3: ; %bb0
+; GFX8-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX8-SDAG-NEXT: ds_write_b32 v0, v0
+; GFX8-SDAG-NEXT: s_mov_b64 s[0:1], s[6:7]
+; GFX8-SDAG-NEXT: s_trap 2
+; GFX8-SDAG-NEXT: .LBB2_4: ; %ret
+; GFX8-SDAG-NEXT: s_or_b64 exec, exec, s[4:5]
+; GFX8-SDAG-NEXT: v_mov_b32_e32 v0, 2
+; GFX8-SDAG-NEXT: ds_write_b32 v0, v0
+; GFX8-SDAG-NEXT: s_mov_b64 s[0:1], s[6:7]
+; GFX8-SDAG-NEXT: s_trap 2
+; GFX8-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: func_uses_lds_multi:
+; GFX8-GISEL: ; %bb.0: ; %entry
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX8-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
+; GFX8-GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1
+; GFX8-GISEL-NEXT: s_and_saveexec_b64 s[8:9], s[4:5]
+; GFX8-GISEL-NEXT: s_xor_b64 s[4:5], exec, s[8:9]
+; GFX8-GISEL-NEXT: s_cbranch_execz .LBB2_2
+; GFX8-GISEL-NEXT: ; %bb.1: ; %bb1
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, 1
+; GFX8-GISEL-NEXT: s_mov_b32 m0, -1
+; GFX8-GISEL-NEXT: s_mov_b64 s[0:1], s[6:7]
+; GFX8-GISEL-NEXT: s_trap 2
+; GFX8-GISEL-NEXT: ds_write_b32 v0, v0
+; GFX8-GISEL-NEXT: .LBB2_2: ; %Flow
+; GFX8-GISEL-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
+; GFX8-GISEL-NEXT: s_cbranch_execz .LBB2_4
+; GFX8-GISEL-NEXT: ; %bb.3: ; %bb0
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX8-GISEL-NEXT: s_mov_b32 m0, -1
+; GFX8-GISEL-NEXT: s_mov_b64 s[0:1], s[6:7]
+; GFX8-GISEL-NEXT: s_trap 2
+; GFX8-GISEL-NEXT: ds_write_b32 v0, v0
+; GFX8-GISEL-NEXT: .LBB2_4: ; %ret
+; GFX8-GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, 2
+; GFX8-GISEL-NEXT: s_mov_b32 m0, -1
+; GFX8-GISEL-NEXT: s_mov_b64 s[0:1], s[6:7]
+; GFX8-GISEL-NEXT: s_trap 2
+; GFX8-GISEL-NEXT: ds_write_b32 v0, v0
+; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: func_uses_lds_multi:
+; GFX9-SDAG: ; %bb.0: ; %entry
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
+; GFX9-SDAG-NEXT: s_xor_b64 s[4:5], vcc, -1
+; GFX9-SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
+; GFX9-SDAG-NEXT: s_xor_b64 s[4:5], exec, s[6:7]
+; GFX9-SDAG-NEXT: s_cbranch_execz .LBB2_2
+; GFX9-SDAG-NEXT: ; %bb.1: ; %bb1
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 1
+; GFX9-SDAG-NEXT: ds_write_b32 v0, v0
+; GFX9-SDAG-NEXT: s_trap 2
+; GFX9-SDAG-NEXT: .LBB2_2: ; %Flow
+; GFX9-SDAG-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
+; GFX9-SDAG-NEXT: s_cbranch_execz .LBB2_4
+; GFX9-SDAG-NEXT: ; %bb.3: ; %bb0
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-SDAG-NEXT: ds_write_b32 v0, v0
+; GFX9-SDAG-NEXT: s_trap 2
+; GFX9-SDAG-NEXT: .LBB2_4: ; %ret
+; GFX9-SDAG-NEXT: s_or_b64 exec, exec, s[4:5]
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 2
+; GFX9-SDAG-NEXT: ds_write_b32 v0, v0
+; GFX9-SDAG-NEXT: s_trap 2
+; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: func_uses_lds_multi:
+; GFX9-GISEL: ; %bb.0: ; %entry
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX9-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
+; GFX9-GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1
+; GFX9-GISEL-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
+; GFX9-GISEL-NEXT: s_xor_b64 s[4:5], exec, s[6:7]
+; GFX9-GISEL-NEXT: s_cbranch_execz .LBB2_2
+; GFX9-GISEL-NEXT: ; %bb.1: ; %bb1
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 1
+; GFX9-GISEL-NEXT: s_trap 2
+; GFX9-GISEL-NEXT: ds_write_b32 v0, v0
+; GFX9-GISEL-NEXT: .LBB2_2: ; %Flow
+; GFX9-GISEL-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
+; GFX9-GISEL-NEXT: s_cbranch_execz .LBB2_4
+; GFX9-GISEL-NEXT: ; %bb.3: ; %bb0
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-GISEL-NEXT: s_trap 2
+; GFX9-GISEL-NEXT: ds_write_b32 v0, v0
+; GFX9-GISEL-NEXT: .LBB2_4: ; %ret
+; GFX9-GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 2
+; GFX9-GISEL-NEXT: s_trap 2
+; GFX9-GISEL-NEXT: ds_write_b32 v0, v0
+; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-LABEL: func_uses_lds_multi:
+; SDAG: ; %bb.0: ; %entry
+; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT: v_and_b32_e32 v0, 1, v0
+; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
+; SDAG-NEXT: s_xor_b64 s[4:5], vcc, -1
+; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
+; SDAG-NEXT: s_xor_b64 s[4:5], exec, s[6:7]
+; SDAG-NEXT: s_cbranch_execz .LBB2_2
+; SDAG-NEXT: ; %bb.1: ; %bb1
+; SDAG-NEXT: v_mov_b32_e32 v0, 1
+; SDAG-NEXT: ds_write_b32 v0, v0
+; SDAG-NEXT: s_cbranch_execnz .LBB2_6
+; SDAG-NEXT: .LBB2_2: ; %Flow
+; SDAG-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
+; SDAG-NEXT: s_cbranch_execz .LBB2_4
+; SDAG-NEXT: ; %bb.3: ; %bb0
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: ds_write_b32 v0, v0
+; SDAG-NEXT: s_cbranch_execnz .LBB2_6
+; SDAG-NEXT: .LBB2_4: ; %ret
+; SDAG-NEXT: s_or_b64 exec, exec, s[4:5]
+; SDAG-NEXT: v_mov_b32_e32 v0, 2
+; SDAG-NEXT: ds_write_b32 v0, v0
+; SDAG-NEXT: s_cbranch_execnz .LBB2_6
+; SDAG-NEXT: ; %bb.5: ; %ret
+; SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; SDAG-NEXT: s_setpc_b64 s[30:31]
+; SDAG-NEXT: .LBB2_6:
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: func_uses_lds_multi:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: v_and_b32_e32 v0, 1, v0
+; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
+; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1
+; GISEL-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
+; GISEL-NEXT: s_xor_b64 s[4:5], exec, s[6:7]
+; GISEL-NEXT: s_cbranch_execz .LBB2_3
+; GISEL-NEXT: ; %bb.1: ; %bb1
+; GISEL-NEXT: s_cbranch_execnz .LBB2_8
+; GISEL-NEXT: ; %bb.2: ; %bb1
+; GISEL-NEXT: v_mov_b32_e32 v0, 1
+; GISEL-NEXT: ds_write_b32 v0, v0
+; GISEL-NEXT: .LBB2_3: ; %Flow
+; GISEL-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
+; GISEL-NEXT: s_cbranch_execz .LBB2_6
+; GISEL-NEXT: ; %bb.4: ; %bb0
+; GISEL-NEXT: s_cbranch_execnz .LBB2_8
+; GISEL-NEXT: ; %bb.5: ; %bb0
+; GISEL-NEXT: v_mov_b32_e32 v0, 0
+; GISEL-NEXT: ds_write_b32 v0, v0
+; GISEL-NEXT: .LBB2_6: ; %ret
+; GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
+; GISEL-NEXT: s_cbranch_execnz .LBB2_8
+; GISEL-NEXT: ; %bb.7: ; %ret
+; GISEL-NEXT: v_mov_b32_e32 v0, 2
+; GISEL-NEXT: ds_write_b32 v0, v0
+; GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-NEXT: s_setpc_b64 s[30:31]
+; GISEL-NEXT: .LBB2_8:
+; GISEL-NEXT: s_endpgm
+entry:
+ br i1 %cond, label %bb0, label %bb1
+
+bb0:
+ store volatile i32 0, ptr addrspace(3) @lds, align 4
+ br label %ret
+
+bb1:
+ store volatile i32 1, ptr addrspace(3) @lds, align 4
+ br label %ret
+
+ret:
+ store volatile i32 2, ptr addrspace(3) @lds, align 4
+ ret void
+}
+
+; ERR: warning: <unknown>:0:0: in function func_uses_lds_code_after void (ptr addrspace(1)): local memory global used by non-kernel function
+define void @func_uses_lds_code_after(ptr addrspace(1) %ptr) {
+; GFX8-SDAG-LABEL: func_uses_lds_code_after:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; GFX8-SDAG-NEXT: s_mov_b32 m0, -1
+; GFX8-SDAG-NEXT: ds_write_b32 v0, v2
+; GFX8-SDAG-NEXT: v_mov_b32_e32 v2, 1
+; GFX8-SDAG-NEXT: s_mov_b64 s[0:1], s[6:7]
+; GFX8-SDAG-NEXT: s_trap 2
+; GFX8-SDAG-NEXT: flat_store_dword v[0:1], v2
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: func_uses_lds_code_after:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX8-GISEL-NEXT: s_mov_b32 m0, -1
+; GFX8-GISEL-NEXT: s_mov_b64 s[0:1], s[6:7]
+; GFX8-GISEL-NEXT: s_trap 2
+; GFX8-GISEL-NEXT: ds_write_b32 v0, v2
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, 1
+; GFX8-GISEL-NEXT: flat_store_dword v[0:1], v2
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: func_uses_lds_code_after:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-SDAG-NEXT: ds_write_b32 v0, v2
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 1
+; GFX9-SDAG-NEXT: s_trap 2
+; GFX9-SDAG-NEXT: global_store_dword v[0:1], v2, off
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: func_uses_lds_code_after:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-GISEL-NEXT: s_trap 2
+; GFX9-GISEL-NEXT: ds_write_b32 v0, v2
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 1
+; GFX9-GISEL-NEXT: global_store_dword v[0:1], v2, off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-LABEL: func_uses_lds_code_after:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT: v_mov_b32_e32 v2, 0
+; SDAG-NEXT: ds_write_b32 v0, v2
+; SDAG-NEXT: s_cbranch_execnz .LBB3_2
+; SDAG-NEXT: ; %bb.1:
+; SDAG-NEXT: v_mov_b32_e32 v2, 1
+; SDAG-NEXT: global_store_dword v[0:1], v2, off
+; SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; SDAG-NEXT: s_setpc_b64 s[30:31]
+; SDAG-NEXT: .LBB3_2:
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: func_uses_lds_code_after:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: s_cbranch_execnz .LBB3_2
+; GISEL-NEXT: ; %bb.1:
+; GISEL-NEXT: v_mov_b32_e32 v2, 0
+; GISEL-NEXT: ds_write_b32 v0, v2
+; GISEL-NEXT: v_mov_b32_e32 v2, 1
+; GISEL-NEXT: global_store_dword v[0:1], v2, off
+; GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GISEL-NEXT: s_setpc_b64 s[30:31]
+; GISEL-NEXT: .LBB3_2:
+; GISEL-NEXT: s_endpgm
+ store volatile i32 0, ptr addrspace(3) @lds, align 4
+ store volatile i32 1, ptr addrspace(1) %ptr, align 4
+ ret void
+}
+
+; ERR: warning: <unknown>:0:0: in function func_uses_lds_phi_after i32 (i1, ptr addrspace(1)): local memory global used by non-kernel function
+define i32 @func_uses_lds_phi_after(i1 %cond, ptr addrspace(1) %ptr) {
+; GFX8-SDAG-LABEL: func_uses_lds_phi_after:
+; GFX8-SDAG: ; %bb.0: ; %entry
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_mov_b32_e32 v3, v0
+; GFX8-SDAG-NEXT: flat_load_dword v0, v[1:2] glc
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX8-SDAG-NEXT: v_and_b32_e32 v3, 1, v3
+; GFX8-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 1, v3
+; GFX8-SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX8-SDAG-NEXT: s_cbranch_execz .LBB4_2
+; GFX8-SDAG-NEXT: ; %bb.1: ; %use.bb
+; GFX8-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX8-SDAG-NEXT: s_mov_b32 m0, -1
+; GFX8-SDAG-NEXT: ds_write_b32 v0, v0
+; GFX8-SDAG-NEXT: s_mov_b64 s[0:1], s[6:7]
+; GFX8-SDAG-NEXT: s_trap 2
+; GFX8-SDAG-NEXT: flat_load_dword v0, v[1:2] glc
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX8-SDAG-NEXT: .LBB4_2: ; %ret
+; GFX8-SDAG-NEXT: s_or_b64 exec, exec, s[4:5]
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: func_uses_lds_phi_after:
+; GFX8-GISEL: ; %bb.0: ; %entry
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v3, v0
+; GFX8-GISEL-NEXT: flat_load_dword v0, v[1:2] glc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: v_and_b32_e32 v3, 1, v3
+; GFX8-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
+; GFX8-GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX8-GISEL-NEXT: s_cbranch_execz .LBB4_2
+; GFX8-GISEL-NEXT: ; %bb.1: ; %use.bb
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX8-GISEL-NEXT: s_mov_b32 m0, -1
+; GFX8-GISEL-NEXT: s_mov_b64 s[0:1], s[6:7]
+; GFX8-GISEL-NEXT: s_trap 2
+; GFX8-GISEL-NEXT: ds_write_b32 v0, v0
+; GFX8-GISEL-NEXT: flat_load_dword v0, v[1:2] glc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: .LBB4_2: ; %ret
+; GFX8-GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: func_uses_lds_phi_after:
+; GFX9-SDAG: ; %bb.0: ; %entry
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, v0
+; GFX9-SDAG-NEXT: global_load_dword v0, v[1:2], off glc
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT: v_and_b32_e32 v3, 1, v3
+; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 1, v3
+; GFX9-SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX9-SDAG-NEXT: s_cbranch_execz .LBB4_2
+; GFX9-SDAG-NEXT: ; %bb.1: ; %use.bb
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-SDAG-NEXT: ds_write_b32 v0, v0
+; GFX9-SDAG-NEXT: s_trap 2
+; GFX9-SDAG-NEXT: global_load_dword v0, v[1:2], off glc
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT: .LBB4_2: ; %ret
+; GFX9-SDAG-NEXT: s_or_b64 exec, exec, s[4:5]
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: func_uses_lds_phi_after:
+; GFX9-GISEL: ; %bb.0: ; %entry
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, v0
+; GFX9-GISEL-NEXT: global_load_dword v0, v[1:2], off glc
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: v_and_b32_e32 v3, 1, v3
+; GFX9-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
+; GFX9-GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX9-GISEL-NEXT: s_cbranch_execz .LBB4_2
+; GFX9-GISEL-NEXT: ; %bb.1: ; %use.bb
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-GISEL-NEXT: s_trap 2
+; GFX9-GISEL-NEXT: ds_write_b32 v0, v0
+; GFX9-GISEL-NEXT: global_load_dword v0, v[1:2], off glc
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: .LBB4_2: ; %ret
+; GFX9-GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-LABEL: func_uses_lds_phi_after:
+; SDAG: ; %bb.0: ; %entry
+; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT: v_mov_b32_e32 v3, v0
+; SDAG-NEXT: global_load_dword v0, v[1:2], off glc
+; SDAG-NEXT: s_waitcnt vmcnt(0)
+; SDAG-NEXT: v_and_b32_e32 v3, 1, v3
+; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 1, v3
+; SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; SDAG-NEXT: s_cbranch_execz .LBB4_3
+; SDAG-NEXT: ; %bb.1: ; %use.bb
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: ds_write_b32 v0, v0
+; SDAG-NEXT: s_cbranch_execnz .LBB4_4
+; SDAG-NEXT: ; %bb.2: ; %use.bb
+; SDAG-NEXT: global_load_dword v0, v[1:2], off glc
+; SDAG-NEXT: s_waitcnt vmcnt(0)
+; SDAG-NEXT: .LBB4_3: ; %ret
+; SDAG-NEXT: s_or_b64 exec, exec, s[4:5]
+; SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; SDAG-NEXT: s_setpc_b64 s[30:31]
+; SDAG-NEXT: .LBB4_4:
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: func_uses_lds_phi_after:
+; GISEL: ; %bb.0: ; %entry
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: v_mov_b32_e32 v3, v0
+; GISEL-NEXT: global_load_dword v0, v[1:2], off glc
+; GISEL-NEXT: s_waitcnt vmcnt(0)
+; GISEL-NEXT: v_and_b32_e32 v3, 1, v3
+; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
+; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GISEL-NEXT: s_cbranch_execz .LBB4_3
+; GISEL-NEXT: ; %bb.1: ; %use.bb
+; GISEL-NEXT: s_cbranch_execnz .LBB4_4
+; GISEL-NEXT: ; %bb.2: ; %use.bb
+; GISEL-NEXT: v_mov_b32_e32 v0, 0
+; GISEL-NEXT: ds_write_b32 v0, v0
+; GISEL-NEXT: global_load_dword v0, v[1:2], off glc
+; GISEL-NEXT: s_waitcnt vmcnt(0)
+; GISEL-NEXT: .LBB4_3: ; %ret
+; GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
+; GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GISEL-NEXT: s_setpc_b64 s[30:31]
+; GISEL-NEXT: .LBB4_4:
+; GISEL-NEXT: s_endpgm
+entry:
+ %entry.load = load volatile i32, ptr addrspace(1) %ptr
+ br i1 %cond, label %use.bb, label %ret
+
+use.bb:
+ store volatile i32 0, ptr addrspace(3) @lds, align 4
+ %use.bb.load = load volatile i32, ptr addrspace(1) %ptr
+ br label %ret
+
+ret:
+ %phi = phi i32 [ %entry.load, %entry ], [ %use.bb.load, %use.bb ]
+ ret i32 %phi
+}
+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
; GFX8: {{.*}}
; GFX9: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/trap.ll b/llvm/test/CodeGen/AMDGPU/trap.ll
index e4cd18a0f424b..7b7a223579fb7 100644
--- a/llvm/test/CodeGen/AMDGPU/trap.ll
+++ b/llvm/test/CodeGen/AMDGPU/trap.ll
@@ -121,6 +121,29 @@ ret:
ret void
}
+; GCN-LABEL: {{^}}non_entry_trap_no_unreachable:
+; TRAP-BIT: enable_trap_handler = 1
+; NO-TRAP-BIT: enable_trap_handler = 0
+
+; HSA-TRAP: BB{{[0-9]_[0-9]+}}: ; %trap
+; HSA-TRAP: s_mov_b64 s[0:1], s[4:5]
+; HSA-TRAP-NEXT: s_trap 2
+define amdgpu_kernel void @non_entry_trap_no_unreachable(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr {
+entry:
+ %tmp29 = load volatile i32, ptr addrspace(1) %arg0
+ %cmp = icmp eq i32 %tmp29, -1
+ br i1 %cmp, label %ret, label %trap
+
+trap:
+ call void @llvm.trap()
+ store volatile i32 1234, ptr addrspace(3) null
+ br label %ret
+
+ret:
+ store volatile i32 3, ptr addrspace(1) %arg0
+ ret void
+}
+
attributes #0 = { nounwind noreturn }
attributes #1 = { nounwind }
More information about the llvm-commits
mailing list