[PATCH] D18282: AMDGPU/SI: Fix threshold calculation for branching when exec is zero

Tom Stellard via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 18 14:31:53 PDT 2016


tstellarAMD created this revision.
tstellarAMD added reviewers: nhaehnle, arsenm.
tstellarAMD added a subscriber: llvm-commits.
Herald added a subscriber: arsenm.

When control flow is implemented using the exec mask, the compiler will
insert branch instructions to skip over the masked section when exec is
zero if the section contains more than a certian number of instructions.

The previous code would only count instructions in successor blocks,
and this patch modifies the code to start counting instructions in all
blocks between the start and end of the branch.

http://reviews.llvm.org/D18282

Files:
  lib/Target/AMDGPU/SILowerControlFlow.cpp
  test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll

Index: test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll
===================================================================
--- test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll
+++ test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll
@@ -24,5 +24,39 @@
   ret void
 }
 
+;CHECK-LABEL: {{^}}test2:
+;CHECK: s_and_saveexec_b64
+;CHECK: s_xor_b64
+;CHECK-NEXT: s_cbranch_execz
+define void @test2(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+main_body:
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %cc = icmp eq i32 %tid, 0
+  br i1 %cc, label %done1, label %if
+
+if:
+  %cmp = icmp eq i32 %a, 0
+  br i1 %cmp, label %done0, label %loop_body
+
+loop_body:
+  %counter = phi i32 [ 0, %if ], [0, %done0], [ %incr, %loop_body ]
+
+  ; Prevent the loop from being optimized out
+  call void asm sideeffect "", "" ()
+
+  %incr = add i32 %counter, 1
+  %lc = icmp sge i32 %incr, 1000
+  br i1 %lc, label %done1, label %loop_body
+
+done0:
+  %cmp0 = icmp eq i32 %b, 0
+  br i1 %cmp0, label %done1, label %loop_body
+
+done1:
+  ret void
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x() #1
+
 attributes #0 = { "ShaderType"="0" }
 attributes #1 = { nounwind readonly }
Index: lib/Target/AMDGPU/SILowerControlFlow.cpp
===================================================================
--- lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -130,10 +130,12 @@
 
   unsigned NumInstr = 0;
 
-  for (MachineBasicBlock *MBB = From; MBB != To && !MBB->succ_empty();
-       MBB = *MBB->succ_begin()) {
+  for (MachineFunction::iterator MBBI = MachineFunction::iterator(From),
+                                 ToI = MachineFunction::iterator(To); MBBI != ToI; ++MBBI) {
 
-    for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+    MachineBasicBlock &MBB = *MBBI;
+
+    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
          NumInstr < SkipThreshold && I != E; ++I) {
 
       if (I->isBundle() || !I->isBundled())


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D18282.51069.patch
Type: text/x-patch
Size: 1994 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160318/3fd50091/attachment.bin>


More information about the llvm-commits mailing list