[llvm] r278273 - AMDGPU: Change insertion point of si_mask_branch

Wed Aug 10 12:11:43 PDT 2016

Author: arsenm
Date: Wed Aug 10 14:11:42 2016
New Revision: 278273

URL: http://llvm.org/viewvc/llvm-project?rev=278273&view=rev
Log:
AMDGPU: Change insertion point of si_mask_branch

Insert before the skip branch if one is created.
This is a somewhat more natural placement relative
to the skip branches, and makes it possible to implement
analyzeBranch for skip blocks.

The test changes are mostly due to a quirk where
the block label is not emitted if there is a terminator
that is not also a branch.

Modified:
    llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
    llvm/trunk/lib/Target/AMDGPU/SILowerControlFlow.cpp
    llvm/trunk/test/CodeGen/AMDGPU/convergent-inlineasm.ll
    llvm/trunk/test/CodeGen/AMDGPU/skip-if-dead.ll
    llvm/trunk/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll
    llvm/trunk/test/CodeGen/AMDGPU/valu-i1.ll
    llvm/trunk/test/CodeGen/AMDGPU/wqm.ll

Modified: llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstructions.td?rev=278273&r1=278272&r2=278273&view=diff
==============================================================================

--- llvm/trunk/lib/Target/AMDGPU/SIInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstructions.td Wed Aug 10 14:11:42 2016
@@ -1777,9 +1777,9 @@ let hasSideEffects = 1 in {
 // replaced with exec mask operations.
 def SI_MASK_BRANCH : PseudoInstSI <
   (outs), (ins brtarget:$target, SReg_64:$dst)> {
-  let isBranch = 1;
+  let isBranch = 0;
   let isTerminator = 1;
-  let isBarrier = 1;
+  let isBarrier = 0;
   let SALU = 1;
 }
 

Modified: llvm/trunk/lib/Target/AMDGPU/SILowerControlFlow.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SILowerControlFlow.cpp?rev=278273&r1=278272&r2=278273&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SILowerControlFlow.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SILowerControlFlow.cpp Wed Aug 10 14:11:42 2016
@@ -80,7 +80,7 @@ private:
 
   bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To);
 
-  void Skip(MachineInstr &From, MachineOperand &To);
+  MachineInstr *Skip(MachineInstr &From, MachineOperand &To);
   bool skipIfDead(MachineInstr &MI, MachineBasicBlock &NextBB);
 
   void If(MachineInstr &MI);
@@ -182,14 +182,15 @@ bool SILowerControlFlow::shouldSkip(Mach
   return false;
 }
 
-void SILowerControlFlow::Skip(MachineInstr &From, MachineOperand &To) {
-
+MachineInstr *SILowerControlFlow::Skip(MachineInstr &From, MachineOperand &To) {
   if (!shouldSkip(*From.getParent()->succ_begin(), To.getMBB()))
-    return;
+    return nullptr;
 
-  DebugLoc DL = From.getDebugLoc();
-  BuildMI(*From.getParent(), &From, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
+  const DebugLoc &DL = From.getDebugLoc();
+  MachineInstr *Skip =
+    BuildMI(*From.getParent(), &From, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
     .addOperand(To);
+  return Skip;
 }
 
 bool SILowerControlFlow::skipIfDead(MachineInstr &MI, MachineBasicBlock &NextBB) {
@@ -242,10 +243,13 @@ void SILowerControlFlow::If(MachineInstr
           .addReg(AMDGPU::EXEC)
           .addReg(Reg);
 
-  Skip(MI, MI.getOperand(2));
+  MachineInstr *SkipInst = Skip(MI, MI.getOperand(2));
+
+  // Insert before the new branch instruction.
+  MachineInstr *InsPt = SkipInst ? SkipInst : &MI;
 
   // Insert a pseudo terminator to help keep the verifier happy.
-  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
+  BuildMI(MBB, InsPt, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
     .addOperand(MI.getOperand(2))
     .addReg(Reg);
 
@@ -275,10 +279,13 @@ void SILowerControlFlow::Else(MachineIns
           .addReg(AMDGPU::EXEC)
           .addReg(Dst);
 
-  Skip(MI, MI.getOperand(2));
+  MachineInstr *SkipInst = Skip(MI, MI.getOperand(2));
+
+  // Insert before the new branch instruction.
+  MachineInstr *InsPt = SkipInst ? SkipInst : &MI;
 
   // Insert a pseudo terminator to help keep the verifier happy.
-  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
+  BuildMI(MBB, InsPt, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
     .addOperand(MI.getOperand(2))
     .addReg(Dst);
 

Modified: llvm/trunk/test/CodeGen/AMDGPU/convergent-inlineasm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/convergent-inlineasm.ll?rev=278273&r1=278272&r2=278273&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/convergent-inlineasm.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/convergent-inlineasm.ll Wed Aug 10 14:11:42 2016
@@ -4,7 +4,8 @@ declare i32 @llvm.amdgcn.workitem.id.x()
 ; GCN-LABEL: {{^}}convergent_inlineasm:
 ; GCN: BB#0:
 ; GCN: v_cmp_ne_i32_e64
-; GCN: BB#1:
+; GCN: ; mask branch
+; GCN: BB{{[0-9]+_[0-9]+}}:
 define void @convergent_inlineasm(i64 addrspace(1)* nocapture %arg) {
 bb:
   %tmp = call i32 @llvm.amdgcn.workitem.id.x()
@@ -22,9 +23,12 @@ bb5:
 }
 
 ; GCN-LABEL: {{^}}nonconvergent_inlineasm:
-; GCN: BB#1:
+; GCN: ; mask branch
+
+; GCN: BB{{[0-9]+_[0-9]+}}:
 ; GCN: v_cmp_ne_i32_e64
-; GCN: BB1_2:
+
+; GCN: BB{{[0-9]+_[0-9]+}}:
 define void @nonconvergent_inlineasm(i64 addrspace(1)* nocapture %arg) {
 bb:
   %tmp = call i32 @llvm.amdgcn.workitem.id.x()

Modified: llvm/trunk/test/CodeGen/AMDGPU/skip-if-dead.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/skip-if-dead.ll?rev=278273&r1=278272&r2=278273&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/skip-if-dead.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/skip-if-dead.ll Wed Aug 10 14:11:42 2016
@@ -202,8 +202,11 @@ exit:
 ; CHECK: v_cmp_eq_i32_e32 vcc, 0, v0
 ; CHECK-NEXT: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], vcc
 ; CHECK-NEXT: s_xor_b64 [[SAVEEXEC]], exec, [[SAVEEXEC]]
-; CHECK-NEXT: s_cbranch_execz [[EXIT:BB[0-9]+_[0-9]+]]
-; CHECK-NEXT: ; mask branch [[EXIT]]
+; CHECK-NEXT: ; mask branch [[EXIT:BB[0-9]+_[0-9]+]]
+; CHECK-NEXT: s_cbranch_execz [[EXIT]]
+
+; CHECK: {{BB[0-9]+_[0-9]+}}: ; %bb.preheader
+; CHECK: s_mov_b32
 
 ; CHECK: [[LOOP_BB:BB[0-9]+_[0-9]+]]:
 
@@ -353,7 +356,7 @@ bb7:
 ; CHECK: mask branch [[END:BB[0-9]+_[0-9]+]]
 ; CHECK-NOT: branch
 
-; CHECK: ; BB#3: ; %bb8
+; CHECK: BB{{[0-9]+_[0-9]+}}: ; %bb8
 ; CHECK: buffer_store_dword
 
 ; CHECK: [[END]]:
@@ -387,4 +390,4 @@ declare <4 x float> @llvm.SI.image.sampl
 declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) nounwind
 
 attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
\ No newline at end of file
+attributes #1 = { nounwind readnone }

Modified: llvm/trunk/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll?rev=278273&r1=278272&r2=278273&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll Wed Aug 10 14:11:42 2016
@@ -5,6 +5,11 @@
 ; CHECK-LABEL: {{^}}test1:
 ; CHECK: v_cmp_ne_i32_e32 vcc, 0
 ; CHECK: s_and_saveexec_b64
+; CHECK-NEXT: s_xor_b64
+; CHECK-NEXT: ; mask branch
+; CHECK-NEXT: s_cbranch_execz
+
+; CHECK-NEXT: BB{{[0-9]+_[0-9]+}}: ; %loop_body.preheader
 
 ; CHECK: [[LOOP_BODY_LABEL:BB[0-9]+_[0-9]+]]:
 ; CHECK: s_and_b64 vcc, exec, vcc
@@ -30,10 +35,11 @@ out:
   ret void
 }
 
-;CHECK-LABEL: {{^}}test2:
-;CHECK: s_and_saveexec_b64
-;CHECK: s_xor_b64
-;CHECK-NEXT: s_cbranch_execz
+; CHECK-LABEL: {{^}}test2:
+; CHECK: s_and_saveexec_b64
+; CHECK-NEXT: s_xor_b64
+; CHECK-NEXT: ; mask branch
+; CHECK-NEXT: s_cbranch_execz
 define void @test2(i32 addrspace(1)* %out, i32 %a, i32 %b) {
 main_body:
   %tid = call i32 @llvm.amdgcn.workitem.id.x() #1

Modified: llvm/trunk/test/CodeGen/AMDGPU/valu-i1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/valu-i1.ll?rev=278273&r1=278272&r2=278273&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/valu-i1.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/valu-i1.ll Wed Aug 10 14:11:42 2016
@@ -47,7 +47,7 @@ end:
 ; SI: s_and_saveexec_b64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], vcc
 ; SI: s_xor_b64 [[BR_SREG]], exec, [[BR_SREG]]
 
-; SI: ; BB#1
+; SI: BB{{[0-9]+_[0-9]+}}:
 ; SI: buffer_store_dword
 ; SI: s_endpgm
 
@@ -68,7 +68,7 @@ exit:
   ret void
 }
 
-; SI-LABEL: @simple_test_v_loop
+; SI-LABEL: {{^}}simple_test_v_loop:
 ; SI: v_cmp_ne_i32_e32 vcc, 0, v{{[0-9]+}}
 ; SI: s_and_saveexec_b64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], vcc
 ; SI: s_xor_b64 [[BR_SREG]], exec, [[BR_SREG]]
@@ -106,7 +106,7 @@ exit:
   ret void
 }
 
-; SI-LABEL: @multi_vcond_loop
+; SI-LABEL: {{^}}multi_vcond_loop:
 
 ; Load loop limit from buffer
 ; Branch to exit if uniformly not taken
@@ -118,7 +118,7 @@ exit:
 ; SI: s_cbranch_execz [[LABEL_EXIT:BB[0-9]+_[0-9]+]]
 
 ; Initialize inner condition to false
-; SI: ; BB#1:
+; SI: BB{{[0-9]+_[0-9]+}}: ; %bb10.preheader
 ; SI: s_mov_b64 [[ZERO:s\[[0-9]+:[0-9]+\]]], 0{{$}}
 ; SI: s_mov_b64 [[COND_STATE:s\[[0-9]+:[0-9]+\]]], [[ZERO]]
 
@@ -133,7 +133,7 @@ exit:
 ; SI: s_xor_b64 [[ORNEG2]], exec, [[ORNEG2]]
 ; SI: s_cbranch_execz [[LABEL_FLOW:BB[0-9]+_[0-9]+]]
 
-; SI: BB#3:
+; SI: BB{{[0-9]+_[0-9]+}}: ; %bb20
 ; SI: buffer_store_dword
 ; SI: v_cmp_ge_i64_e32 [[CMP:s\[[0-9]+:[0-9]+\]|vcc]]
 ; SI: s_or_b64 [[TMP:s\[[0-9]+:[0-9]+\]]], [[CMP]], [[COND_STATE]]

Modified: llvm/trunk/test/CodeGen/AMDGPU/wqm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/wqm.ll?rev=278273&r1=278272&r2=278273&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/wqm.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/wqm.ll Wed Aug 10 14:11:42 2016
@@ -123,7 +123,7 @@ END:
 ;CHECK-NEXT: s_and_b64 [[SAVED]], exec, [[SAVED]]
 ;CHECK-NEXT: s_xor_b64 exec, exec, [[SAVED]]
 ;CHECK-NEXT: mask branch [[END_BB:BB[0-9]+_[0-9]+]]
-;CHECK-NEXT: ; BB#3: ; %ELSE
+;CHECK-NEXT: BB{{[0-9]+_[0-9]+}}: ; %ELSE
 ;CHECK: store_dword
 ;CHECK: [[END_BB]]: ; %END
 ;CHECK: s_or_b64 exec, exec,