[llvm] r276257 - AMDGPU: Fix phis from blocks split due to register indexing
Tom Stellard via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 28 07:54:49 PDT 2016
On Thu, Jul 21, 2016 at 09:40:57AM -0000, Matt Arsenault via llvm-commits wrote:
> Author: arsenm
> Date: Thu Jul 21 04:40:57 2016
> New Revision: 276257
>
> URL: http://llvm.org/viewvc/llvm-project?rev=276257&view=rev
> Log:
> AMDGPU: Fix phis from blocks split due to register indexing
>
> Modified:
> llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
> llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-si.ll
>
Hi Hans,
Is this OK to merge to the 3.9 branch? I am the code owner, and I approve.
-Tom
> Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=276257&r1=276256&r2=276257&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Thu Jul 21 04:40:57 2016
> @@ -1072,6 +1072,24 @@ unsigned SITargetLowering::getRegisterBy
> + StringRef(RegName) + "\"."));
> }
>
> +static void replaceSuccessorPhisWith(MachineBasicBlock &BB,
> + MachineBasicBlock &SplitBB) {
> + for (MachineBasicBlock *Succ : BB.successors()) {
> + for (MachineInstr &MI : *Succ) {
> + if (!MI.isPHI())
> + break;
> +
> + for (unsigned I = 2, E = MI.getNumOperands(); I != E; I += 2) {
> + MachineOperand &FromBB = MI.getOperand(I);
> + if (&BB == FromBB.getMBB()) {
> + FromBB.setMBB(&SplitBB);
> + break;
> + }
> + }
> + }
> + }
> +}
> +
> // If kill is not the last instruction, split the block so kill is always a
> // proper terminator.
> MachineBasicBlock *SITargetLowering::splitKillBlock(MachineInstr &MI,
> @@ -1093,20 +1111,7 @@ MachineBasicBlock *SITargetLowering::spl
>
> // Fix the block phi references to point to the new block for the defs in the
> // second piece of the block.
> - for (MachineBasicBlock *Succ : BB->successors()) {
> - for (MachineInstr &MI : *Succ) {
> - if (!MI.isPHI())
> - break;
> -
> - for (unsigned I = 2, E = MI.getNumOperands(); I != E; I += 2) {
> - MachineOperand &FromBB = MI.getOperand(I);
> - if (BB == FromBB.getMBB()) {
> - FromBB.setMBB(SplitBB);
> - break;
> - }
> - }
> - }
> - }
> + replaceSuccessorPhisWith(*BB, *SplitBB);
>
> MF->insert(++MachineFunction::iterator(BB), SplitBB);
> SplitBB->splice(SplitBB->begin(), BB, SplitPoint, BB->end());
> @@ -1161,7 +1166,7 @@ static void emitLoadM0FromVGPRLoop(const
> // Compare the just read M0 value to all possible Idx values.
> BuildMI(LoopBB, I, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e64), CondReg)
> .addReg(CurrentIdxReg)
> - .addOperand(IdxReg);
> + .addReg(IdxReg.getReg(), 0, IdxReg.getSubReg());
>
> // Move index from VCC into M0
> if (Offset == 0) {
> @@ -1232,6 +1237,8 @@ static MachineBasicBlock *loadM0FromVGPR
> MF->insert(MBBI, LoopBB);
> MF->insert(MBBI, RemainderBB);
>
> + replaceSuccessorPhisWith(MBB, *RemainderBB);
> +
> LoopBB->addSuccessor(LoopBB);
> LoopBB->addSuccessor(RemainderBB);
>
>
> Modified: llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-si.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-si.ll?rev=276257&r1=276256&r2=276257&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-si.ll (original)
> +++ llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-si.ll Thu Jul 21 04:40:57 2016
> @@ -503,12 +503,12 @@ entry:
> ; Test that the or is folded into the base address register instead of
> ; added to m0
>
> -; GCN-LABEL: {{^}}extractelement_v4i32_or_index:
> -; GCN: s_load_dword [[IDX_IN:s[0-9]+]]
> -; GCN: s_lshl_b32 [[IDX_SHL:s[0-9]+]], [[IDX_IN]]
> -; GCN-NOT: [[IDX_SHL]]
> -; GCN: s_mov_b32 m0, [[IDX_SHL]]
> -; GCN: v_movreld_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}
> +; CHECK-LABEL: {{^}}extractelement_v4i32_or_index:
> +; CHECK: s_load_dword [[IDX_IN:s[0-9]+]]
> +; CHECK: s_lshl_b32 [[IDX_SHL:s[0-9]+]], [[IDX_IN]]
> +; CHECK-NOT: [[IDX_SHL]]
> +; CHECK: s_mov_b32 m0, [[IDX_SHL]]
> +; CHECK: v_movrels_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}
> define void @extractelement_v4i32_or_index(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in, i32 %idx.in) {
> entry:
> %ld = load volatile <4 x i32>, <4 x i32> addrspace(1)* %in
> @@ -519,12 +519,12 @@ entry:
> ret void
> }
>
> -; GCN-LABEL: {{^}}insertelement_v4f32_or_index:
> -; GCN: s_load_dword [[IDX_IN:s[0-9]+]]
> -; GCN: s_lshl_b32 [[IDX_SHL:s[0-9]+]], [[IDX_IN]]
> -; GCN-NOT: [[IDX_SHL]]
> -; GCN: s_mov_b32 m0, [[IDX_SHL]]
> -; GCN: v_movreld_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}
> +; CHECK-LABEL: {{^}}insertelement_v4f32_or_index:
> +; CHECK: s_load_dword [[IDX_IN:s[0-9]+]]
> +; CHECK: s_lshl_b32 [[IDX_SHL:s[0-9]+]], [[IDX_IN]]
> +; CHECK-NOT: [[IDX_SHL]]
> +; CHECK: s_mov_b32 m0, [[IDX_SHL]]
> +; CHECK: v_movreld_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}
> define void @insertelement_v4f32_or_index(<4 x float> addrspace(1)* %out, <4 x float> %a, i32 %idx.in) nounwind {
> %idx.shl = shl i32 %idx.in, 2
> %idx = or i32 %idx.shl, 1
> @@ -533,6 +533,41 @@ define void @insertelement_v4f32_or_inde
> ret void
> }
>
> +; CHECK-LABEL: {{^}}broken_phi_bb:
> +; CHECK: v_mov_b32_e32 [[PHIREG:v[0-9]+]], 8
> +
> +; CHECK: s_branch [[BB2:BB[0-9]+_[0-9]+]]
> +
> +; CHECK: {{^BB[0-9]+_[0-9]+}}:
> +; CHECK: s_mov_b64 exec,
> +
> +; CHECK: [[BB2]]:
> +; CHECK: v_cmp_le_i32_e32 vcc, s{{[0-9]+}}, [[PHIREG]]
> +; CHECK: buffer_load_dword
> +
> +; CHECK: [[REGLOOP:BB[0-9]+_[0-9]+]]:
> +; CHECK: v_movreld_b32_e32
> +; CHECK: s_cbranch_execnz [[REGLOOP]]
> +define void @broken_phi_bb(i32 %arg, i32 %arg1) #0 {
> +bb:
> + br label %bb2
> +
> +bb2: ; preds = %bb4, %bb
> + %tmp = phi i32 [ 8, %bb ], [ %tmp7, %bb4 ]
> + %tmp3 = icmp slt i32 %tmp, %arg
> + br i1 %tmp3, label %bb4, label %bb8
> +
> +bb4: ; preds = %bb2
> + %vgpr = load volatile i32, i32 addrspace(1)* undef
> + %tmp5 = insertelement <8 x i32> undef, i32 undef, i32 %vgpr
> + %tmp6 = insertelement <8 x i32> %tmp5, i32 %arg1, i32 %vgpr
> + %tmp7 = extractelement <8 x i32> %tmp6, i32 0
> + br label %bb2
> +
> +bb8: ; preds = %bb2
> + ret void
> +}
> +
> declare i32 @llvm.amdgcn.workitem.id.x() #1
>
> attributes #0 = { nounwind }
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list