[llvm] r275203 - AMDGPU: Fix verifier error with kill intrinsic
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 12 12:01:23 PDT 2016
Author: arsenm
Date: Tue Jul 12 14:01:23 2016
New Revision: 275203
URL: http://llvm.org/viewvc/llvm-project?rev=275203&view=rev
Log:
AMDGPU: Fix verifier error with kill intrinsic
Don't create a terminator in the middle of the block.
We should probably get rid of this intrinsic.
Added:
llvm/trunk/test/CodeGen/AMDGPU/skip-if-dead.ll
Modified:
llvm/trunk/lib/Target/AMDGPU/SILowerControlFlow.cpp
llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-undef.mir
Modified: llvm/trunk/lib/Target/AMDGPU/SILowerControlFlow.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SILowerControlFlow.cpp?rev=275203&r1=275202&r2=275203&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SILowerControlFlow.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SILowerControlFlow.cpp Tue Jul 12 14:01:23 2016
@@ -76,7 +76,7 @@ private:
bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To);
void Skip(MachineInstr &From, MachineOperand &To);
- void SkipIfDead(MachineInstr &MI);
+ bool skipIfDead(MachineInstr &MI);
void If(MachineInstr &MI);
void Else(MachineInstr &MI, bool ExecModified);
@@ -89,12 +89,16 @@ private:
void Kill(MachineInstr &MI);
void Branch(MachineInstr &MI);
- void splitBlockLiveIns(const MachineBasicBlock &MBB,
- const MachineInstr &MI,
- MachineBasicBlock &LoopBB,
- MachineBasicBlock &RemainderBB,
- unsigned SaveReg,
- const MachineOperand &IdxReg);
+ std::pair<MachineBasicBlock *, MachineBasicBlock *>
+ splitBlock(MachineBasicBlock &MBB, MachineBasicBlock::iterator I);
+
+ void splitLoadM0BlockLiveIns(LivePhysRegs &RemainderLiveRegs,
+ const MachineRegisterInfo &MRI,
+ const MachineInstr &MI,
+ MachineBasicBlock &LoopBB,
+ MachineBasicBlock &RemainderBB,
+ unsigned SaveReg,
+ const MachineOperand &IdxReg);
void emitLoadM0FromVGPRLoop(MachineBasicBlock &LoopBB, DebugLoc DL,
MachineInstr *MovRel,
@@ -171,7 +175,19 @@ bool SILowerControlFlow::shouldSkip(Mach
I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ)
return true;
- if (++NumInstr >= SkipThreshold)
+ if (I->isInlineAsm()) {
+ const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
+ const char *AsmStr = I->getOperand(0).getSymbolName();
+
+ // inlineasm length estimate is number of bytes assuming the longest
+ // instruction.
+ uint64_t MaxAsmSize = TII->getInlineAsmLength(AsmStr, *MAI);
+ NumInstr += MaxAsmSize / MAI->getMaxInstLength();
+ } else {
+ ++NumInstr;
+ }
+
+ if (NumInstr >= SkipThreshold)
return true;
}
}
@@ -189,36 +205,55 @@ void SILowerControlFlow::Skip(MachineIns
.addOperand(To);
}
-void SILowerControlFlow::SkipIfDead(MachineInstr &MI) {
-
+bool SILowerControlFlow::skipIfDead(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
- DebugLoc DL = MI.getDebugLoc();
if (MBB.getParent()->getFunction()->getCallingConv() != CallingConv::AMDGPU_PS ||
!shouldSkip(&MBB, &MBB.getParent()->back()))
- return;
+ return false;
- MachineBasicBlock::iterator Insert = &MI;
- ++Insert;
+ LivePhysRegs RemainderLiveRegs(TRI);
+ RemainderLiveRegs.addLiveOuts(MBB);
+
+ MachineBasicBlock *SkipBB;
+ MachineBasicBlock *RemainderBB;
+ std::tie(SkipBB, RemainderBB) = splitBlock(MBB, MI.getIterator());
+
+ const DebugLoc &DL = MI.getDebugLoc();
// If the exec mask is non-zero, skip the next two instructions
- BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
- .addImm(3);
+ BuildMI(&MBB, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
+ .addMBB(RemainderBB);
+
+ MBB.addSuccessor(RemainderBB);
+
+ MachineBasicBlock::iterator Insert = SkipBB->begin();
// Exec mask is zero: Export to NULL target...
- BuildMI(MBB, Insert, DL, TII->get(AMDGPU::EXP))
- .addImm(0)
- .addImm(0x09) // V_008DFC_SQ_EXP_NULL
- .addImm(0)
- .addImm(1)
- .addImm(1)
- .addReg(AMDGPU::VGPR0)
- .addReg(AMDGPU::VGPR0)
- .addReg(AMDGPU::VGPR0)
- .addReg(AMDGPU::VGPR0);
+ BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::EXP))
+ .addImm(0)
+ .addImm(0x09) // V_008DFC_SQ_EXP_NULL
+ .addImm(0)
+ .addImm(1)
+ .addImm(1)
+ .addReg(AMDGPU::VGPR0, RegState::Undef)
+ .addReg(AMDGPU::VGPR0, RegState::Undef)
+ .addReg(AMDGPU::VGPR0, RegState::Undef)
+ .addReg(AMDGPU::VGPR0, RegState::Undef);
+
+ // ... and terminate wavefront.
+ BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM));
+
+ for (const MachineInstr &Inst : reverse(*RemainderBB))
+ RemainderLiveRegs.stepBackward(Inst);
- // ... and terminate wavefront
- BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM));
+ const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ for (unsigned Reg : RemainderLiveRegs) {
+ if (MRI.isAllocatable(Reg))
+ RemainderBB->addLiveIn(Reg);
+ }
+
+ return true;
}
void SILowerControlFlow::If(MachineInstr &MI) {
@@ -386,20 +421,13 @@ void SILowerControlFlow::Kill(MachineIns
}
// All currently live registers must remain so in the remainder block.
-void SILowerControlFlow::splitBlockLiveIns(const MachineBasicBlock &MBB,
- const MachineInstr &MI,
- MachineBasicBlock &LoopBB,
- MachineBasicBlock &RemainderBB,
- unsigned SaveReg,
- const MachineOperand &IdxReg) {
- LivePhysRegs RemainderLiveRegs(TRI);
-
- RemainderLiveRegs.addLiveOuts(MBB);
- for (MachineBasicBlock::const_reverse_iterator I = MBB.rbegin(), E(&MI);
- I != E; ++I) {
- RemainderLiveRegs.stepBackward(*I);
- }
-
+void SILowerControlFlow::splitLoadM0BlockLiveIns(LivePhysRegs &RemainderLiveRegs,
+ const MachineRegisterInfo &MRI,
+ const MachineInstr &MI,
+ MachineBasicBlock &LoopBB,
+ MachineBasicBlock &RemainderBB,
+ unsigned SaveReg,
+ const MachineOperand &IdxReg) {
// Add reg defined in loop body.
RemainderLiveRegs.addReg(SaveReg);
@@ -410,13 +438,11 @@ void SILowerControlFlow::splitBlockLiveI
}
}
- const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
for (unsigned Reg : RemainderLiveRegs) {
if (MRI.isAllocatable(Reg))
RemainderBB.addLiveIn(Reg);
}
-
const MachineOperand *Src = TII->getNamedOperand(MI, AMDGPU::OpName::src);
if (!Src->isUndef())
LoopBB.addLiveIn(Src->getReg());
@@ -469,6 +495,30 @@ void SILowerControlFlow::emitLoadM0FromV
.addMBB(&LoopBB);
}
+std::pair<MachineBasicBlock *, MachineBasicBlock *>
+SILowerControlFlow::splitBlock(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) {
+ MachineFunction *MF = MBB.getParent();
+
+ // To insert the loop we need to split the block. Move everything after this
+ // point to a new block, and insert a new empty block between the two.
+ MachineBasicBlock *LoopBB = MF->CreateMachineBasicBlock();
+ MachineBasicBlock *RemainderBB = MF->CreateMachineBasicBlock();
+ MachineFunction::iterator MBBI(MBB);
+ ++MBBI;
+
+ MF->insert(MBBI, LoopBB);
+ MF->insert(MBBI, RemainderBB);
+
+ // Move the rest of the block into a new block.
+ RemainderBB->transferSuccessors(&MBB);
+ RemainderBB->splice(RemainderBB->begin(), &MBB, I, MBB.end());
+
+ MBB.addSuccessor(LoopBB);
+
+ return std::make_pair(LoopBB, RemainderBB);
+}
+
// Returns true if a new block was inserted.
bool SILowerControlFlow::loadM0(MachineInstr &MI, MachineInstr *MovRel, int Offset) {
MachineBasicBlock &MBB = *MI.getParent();
@@ -492,7 +542,6 @@ bool SILowerControlFlow::loadM0(MachineI
return false;
}
- MachineFunction &MF = *MBB.getParent();
MachineOperand *SaveOp = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
SaveOp->setIsDead(false);
unsigned Save = SaveOp->getReg();
@@ -505,25 +554,24 @@ bool SILowerControlFlow::loadM0(MachineI
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B64), Save)
.addReg(AMDGPU::EXEC);
- // To insert the loop we need to split the block. Move everything after this
- // point to a new block, and insert a new empty block between the two.
- MachineBasicBlock *LoopBB = MF.CreateMachineBasicBlock();
- MachineBasicBlock *RemainderBB = MF.CreateMachineBasicBlock();
- MachineFunction::iterator MBBI(MBB);
- ++MBBI;
+ LivePhysRegs RemainderLiveRegs(TRI);
- MF.insert(MBBI, LoopBB);
- MF.insert(MBBI, RemainderBB);
+ RemainderLiveRegs.addLiveOuts(MBB);
- LoopBB->addSuccessor(LoopBB);
- LoopBB->addSuccessor(RemainderBB);
+ MachineBasicBlock *LoopBB;
+ MachineBasicBlock *RemainderBB;
- splitBlockLiveIns(MBB, MI, *LoopBB, *RemainderBB, Save, *Idx);
+ std::tie(LoopBB, RemainderBB) = splitBlock(MBB, I);
- // Move the rest of the block into a new block.
- RemainderBB->transferSuccessors(&MBB);
- RemainderBB->splice(RemainderBB->begin(), &MBB, I, MBB.end());
- MBB.addSuccessor(LoopBB);
+ for (const MachineInstr &Inst : reverse(*RemainderBB))
+ RemainderLiveRegs.stepBackward(Inst);
+
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ LoopBB->addSuccessor(RemainderBB);
+ LoopBB->addSuccessor(LoopBB);
+
+ splitLoadM0BlockLiveIns(RemainderLiveRegs, MRI, MI, *LoopBB,
+ *RemainderBB, Save, *Idx);
emitLoadM0FromVGPRLoop(*LoopBB, DL, MovRel, *Idx, Offset);
@@ -695,16 +743,25 @@ bool SILowerControlFlow::runOnMachineFun
case AMDGPU::SI_END_CF:
if (--Depth == 0 && HaveKill) {
- SkipIfDead(MI);
HaveKill = false;
+
+ if (skipIfDead(MI)) {
+ NextBB = std::next(BI);
+ BE = MF.end();
+ Next = MBB.end();
+ }
}
EndCf(MI);
break;
case AMDGPU::SI_KILL:
- if (Depth == 0)
- SkipIfDead(MI);
- else
+ if (Depth == 0) {
+ if (skipIfDead(MI)) {
+ NextBB = std::next(BI);
+ BE = MF.end();
+ Next = MBB.end();
+ }
+ } else
HaveKill = true;
Kill(MI);
break;
Modified: llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-undef.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-undef.mir?rev=275203&r1=275202&r2=275203&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-undef.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-undef.mir Tue Jul 12 14:01:23 2016
@@ -3,7 +3,7 @@
# CHECK-LABEL: name: extract_undef_offset_vgpr{{$}}
# CHECK: bb.1:
-# CHECK: successors: %bb.1(0x40000000 / 0x80000000 = 50.00%), %bb.2(0x40000000 / 0x80000000 = 50.00%)
+# CHECK: successors: %bb.2(0x40000000 / 0x80000000 = 50.00%), %bb.1(0x40000000 / 0x80000000 = 50.00%)
# CHECK: liveins: %vgpr0_vgpr1_vgpr2_vgpr3{{$}}
# CHECK: V_READFIRSTLANE_B32 undef %vgpr10, implicit %exec
@@ -103,7 +103,7 @@ body: |
# CHECK-LABEL: name: extract_undef_neg_offset_vgpr{{$}}
# CHECK: bb.1:
-# CHECK: successors: %bb.1(0x40000000 / 0x80000000 = 50.00%), %bb.2(0x40000000 / 0x80000000 = 50.00%)
+# CHECK: successors: %bb.2(0x40000000 / 0x80000000 = 50.00%), %bb.1(0x40000000 / 0x80000000 = 50.00%)
# CHECK: liveins: %vgpr0_vgpr1_vgpr2_vgpr3{{$}}
# CHECK: %vcc_lo = V_READFIRSTLANE_B32 undef %vgpr10, implicit %exec
@@ -159,7 +159,7 @@ body: |
# CHECK-LABEL: name: insert_undef_offset_vgpr{{$}}
# CHECK: bb.1:
-# CHECK: successors: %bb.1(0x40000000 / 0x80000000 = 50.00%), %bb.2(0x40000000 / 0x80000000 = 50.00%)
+# CHECK: successors: %bb.2(0x40000000 / 0x80000000 = 50.00%), %bb.1(0x40000000 / 0x80000000 = 50.00%)
# CHECK: liveins: %vgpr4, %vgpr0_vgpr1_vgpr2_vgpr3{{$}}
# CHECK: %vcc_lo = V_READFIRSTLANE_B32 undef %vgpr10, implicit %exec
@@ -215,7 +215,7 @@ body: |
# CHECK-LABEL: name: insert_undef_neg_offset_vgpr{{$}}
# CHECK: bb.1:
-# CHECK: successors: %bb.1(0x40000000 / 0x80000000 = 50.00%), %bb.2(0x40000000 / 0x80000000 = 50.00%)
+# CHECK: successors: %bb.2(0x40000000 / 0x80000000 = 50.00%), %bb.1(0x40000000 / 0x80000000 = 50.00%)
# CHECK: liveins: %vgpr4, %vgpr0_vgpr1_vgpr2_vgpr3{{$}}
# CHECK: %vcc_lo = V_READFIRSTLANE_B32 undef %vgpr10, implicit %exec
@@ -272,7 +272,7 @@ body: |
# CHECK-LABEL: insert_undef_value_offset_vgpr{{$}}
# CHECK: bb.1:
-# CHECK: successors: %bb.1(0x40000000 / 0x80000000 = 50.00%), %bb.2(0x40000000 / 0x80000000 = 50.00%)
+# CHECK: successors: %bb.2(0x40000000 / 0x80000000 = 50.00%), %bb.1(0x40000000 / 0x80000000 = 50.00%)
# CHECK: liveins: %vgpr4, %vgpr0_vgpr1_vgpr2_vgpr3{{$}}
# CHECK: %vcc_lo = V_READFIRSTLANE_B32 %vgpr4, implicit %exec
Added: llvm/trunk/test/CodeGen/AMDGPU/skip-if-dead.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/skip-if-dead.ll?rev=275203&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/skip-if-dead.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/skip-if-dead.ll Tue Jul 12 14:01:23 2016
@@ -0,0 +1,145 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
+
+; CHECK-LABEL: {{^}}test_kill_depth_0_imm_pos:
+; CHECK-NEXT: ; BB#0:
+; CHECK-NEXT: s_endpgm
+define amdgpu_ps void @test_kill_depth_0_imm_pos() #0 {
+ call void @llvm.AMDGPU.kill(float 0.0)
+ ret void
+}
+
+; CHECK-LABEL: {{^}}test_kill_depth_0_imm_neg:
+; CHECK-NEXT: ; BB#0:
+; CHECK-NEXT: s_mov_b64 exec, 0
+; CHECK-NEXT: s_endpgm
+define amdgpu_ps void @test_kill_depth_0_imm_neg() #0 {
+ call void @llvm.AMDGPU.kill(float -0.0)
+ ret void
+}
+
+; CHECK-LABEL: {{^}}test_kill_depth_var:
+; CHECK-NEXT: ; BB#0:
+; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0
+; CHECK-NEXT: s_endpgm
+define amdgpu_ps void @test_kill_depth_var(float %x) #0 {
+ call void @llvm.AMDGPU.kill(float %x)
+ ret void
+}
+
+; FIXME: why does the skip depend on the asm length in the same block?
+
+; CHECK-LABEL: {{^}}test_kill_control_flow:
+; CHECK: s_cmp_lg_i32 s{{[0-9]+}}, 0
+; CHECK: s_cbranch_scc1 [[RETURN_BB:BB[0-9]+_[0-9]+]]
+
+; CHECK: ; BB#1:
+; CHECK: v_nop_e64
+; CHECK: v_nop_e64
+; CHECK: v_nop_e64
+; CHECK: v_nop_e64
+; CHECK: v_nop_e64
+; CHECK: v_nop_e64
+; CHECK: v_nop_e64
+; CHECK: v_nop_e64
+; CHECK: v_nop_e64
+; CHECK: v_nop_e64
+
+; CHECK: s_cbranch_execnz [[SPLIT_BB:BB[0-9]+_[0-9]+]]
+; CHECK-NEXT: ; BB#3:
+; CHECK-NEXT: exp 0, 9, 0, 1, 1, v0, v0, v0, v0
+; CHECK-NEXT: s_endpgm
+
+; CHECK-NEXT: {{^}}[[SPLIT_BB]]:
+; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v7
+; CHECK-NEXT: {{^}}BB{{[0-9]+_[0-9]+}}:
+; CHECK-NEXT: s_endpgm
+define amdgpu_ps void @test_kill_control_flow(i32 inreg %arg) #0 {
+entry:
+ %cmp = icmp eq i32 %arg, 0
+ br i1 %cmp, label %bb, label %exit
+
+bb:
+ %var = call float asm sideeffect "
+ v_mov_b32_e64 v7, -1
+ v_nop_e64
+ v_nop_e64
+ v_nop_e64
+ v_nop_e64
+ v_nop_e64
+ v_nop_e64
+ v_nop_e64
+ v_nop_e64
+ v_nop_e64
+ v_nop_e64", "={VGPR7}"()
+ call void @llvm.AMDGPU.kill(float %var)
+ br label %exit
+
+exit:
+ ret void
+}
+
+; CHECK-LABEL: {{^}}test_kill_control_flow_remainder:
+; CHECK: s_cmp_lg_i32 s{{[0-9]+}}, 0
+; CHECK-NEXT: s_cbranch_scc1 [[RETURN_BB:BB[0-9]+_[0-9]+]]
+
+; CHECK-NEXT: ; BB#1: ; %bb
+; CHECK: v_mov_b32_e64 v7, -1
+; CHECK: v_nop_e64
+; CHECK: v_nop_e64
+; CHECK: v_nop_e64
+; CHECK: v_nop_e64
+; CHECK: v_nop_e64
+; CHECK: v_nop_e64
+; CHECK: v_nop_e64
+; CHECK: v_nop_e64
+; CHECK: ;;#ASMEND
+; CHECK: v_mov_b32_e64 v8, -1
+; CHECK: ;;#ASMEND
+; CHECK-NEXT: s_cbranch_execnz [[SPLIT_BB:BB[0-9]+_[0-9]+]]
+
+; CHECK-NEXT: ; BB#3:
+; CHECK-NEXT: exp 0, 9, 0, 1, 1, v0, v0, v0, v0
+; CHECK-NEXT: s_endpgm
+
+; CHECK-NEXT: {{^}}[[SPLIT_BB]]:
+; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v7
+; CHECK: buffer_store_dword v8
+; CHECK: v_mov_b32_e64 v9, -2
+
+; CHECK: {{^}}BB{{[0-9]+_[0-9]+}}:
+; CHECK: buffer_store_dword v9
+; CHECK-NEXT: s_endpgm
+define amdgpu_ps void @test_kill_control_flow_remainder(i32 inreg %arg) #0 {
+entry:
+ %cmp = icmp eq i32 %arg, 0
+ br i1 %cmp, label %bb, label %exit
+
+bb:
+ %var = call float asm sideeffect "
+ v_mov_b32_e64 v7, -1
+ v_nop_e64
+ v_nop_e64
+ v_nop_e64
+ v_nop_e64
+ v_nop_e64
+ v_nop_e64
+ v_nop_e64
+ v_nop_e64
+ v_nop_e64
+ v_nop_e64
+ v_nop_e64", "={VGPR7}"()
+ %live.across = call float asm sideeffect "v_mov_b32_e64 v8, -1", "={VGPR8}"()
+ call void @llvm.AMDGPU.kill(float %var)
+ store volatile float %live.across, float addrspace(1)* undef
+ %live.out = call float asm sideeffect "v_mov_b32_e64 v9, -2", "={VGPR9}"()
+ br label %exit
+
+exit:
+ %phi = phi float [ 0.0, %entry ], [ %live.out, %bb ]
+ store float %phi, float addrspace(1)* undef
+ ret void
+}
+
+declare void @llvm.AMDGPU.kill(float) #0
+
+attributes #0 = { nounwind }
More information about the llvm-commits
mailing list