[llvm] r228302 - R600/SI: Fix bug from insertion of llvm.SI.end.cf into loop headers
Hans Wennborg
hans at chromium.org
Thu Feb 5 08:38:29 PST 2015
Yes, go ahead.
Thanks,
Hans
On Thu, Feb 5, 2015 at 8:14 AM, Tom Stellard <tom at stellard.net> wrote:
> Hi Hans,
>
> Is this patch OK to merge to the 3.6 branch?
>
> I am the code owner and I approve this patch.
>
> -Tom
>
> On Thu, Feb 05, 2015 at 03:32:15PM -0000, Tom Stellard wrote:
>> Author: tstellar
>> Date: Thu Feb 5 09:32:15 2015
>> New Revision: 228302
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=228302&view=rev
>> Log:
>> R600/SI: Fix bug from insertion of llvm.SI.end.cf into loop headers
>>
>> The llvm.SI.end.cf intrinsic is used to mark the end of if-then blocks,
>> if-then-else blocks, and loops. It is responsible for updating the
>> exec mask to re-enable threads that had been masked during the preceding
>> control flow block. For example:
>>
>> s_mov_b64 exec, 0x3 ; Initial exec mask
>> s_mov_b64 s[0:1], exec ; Saved exec mask
>> v_cmpx_gt_u32 exec, s[2:3], v0, 0 ; llvm.SI.if
>> do_stuff()
>> s_or_b64 exec, exec, s[0:1] ; llvm.SI.end.cf
>>
>> The bug fixed by this patch was one where the llvm.SI.end.cf intrinsic
>> was being inserted into the header of loops. This would happen when
>> an if block terminated in a loop header and we would end up with
>> code like this:
>>
>> s_mov_b64 exec, 0x3 ; Initial exec mask
>> s_mov_b64 s[0:1], exec ; Saved exec mask
>> v_cmpx_gt_u32 exec, s[2:3], v0, 0 ; llvm.SI.if
>> do_stuff()
>>
>> LOOP: ; Start of loop header
>> s_or_b64 exec, exec, s[0:1] ; llvm.SI.end.cf <-BUG: The exec mask has the
>> same value at the beginning of each loop
>> iteration.
>> do_stuff();
>> s_cbranch_execnz LOOP
>>
>> The fix is to create a new basic block before the loop and insert the
>> llvm.SI.end.cf there. This way the exec mask is restored before the
>> start of the loop instead of at the beginning of each iteration.
>>
>> Added:
>> llvm/trunk/test/CodeGen/R600/endcf-loop-header.ll
>> Modified:
>> llvm/trunk/lib/Target/R600/SIAnnotateControlFlow.cpp
>>
>> Modified: llvm/trunk/lib/Target/R600/SIAnnotateControlFlow.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/SIAnnotateControlFlow.cpp?rev=228302&r1=228301&r2=228302&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Target/R600/SIAnnotateControlFlow.cpp (original)
>> +++ llvm/trunk/lib/Target/R600/SIAnnotateControlFlow.cpp Thu Feb 5 09:32:15 2015
>> @@ -14,6 +14,7 @@
>>
>> #include "AMDGPU.h"
>> #include "llvm/ADT/DepthFirstIterator.h"
>> +#include "llvm/Analysis/LoopInfo.h"
>> #include "llvm/IR/Constants.h"
>> #include "llvm/IR/Dominators.h"
>> #include "llvm/IR/Instructions.h"
>> @@ -66,6 +67,8 @@ class SIAnnotateControlFlow : public Fun
>> DominatorTree *DT;
>> StackVector Stack;
>>
>> + LoopInfo *LI;
>> +
>> bool isTopOfStack(BasicBlock *BB);
>>
>> Value *popSaved();
>> @@ -99,6 +102,7 @@ public:
>> }
>>
>> void getAnalysisUsage(AnalysisUsage &AU) const override {
>> + AU.addRequired<LoopInfoWrapperPass>();
>> AU.addRequired<DominatorTreeWrapperPass>();
>> AU.addPreserved<DominatorTreeWrapperPass>();
>> FunctionPass::getAnalysisUsage(AU);
>> @@ -277,10 +281,26 @@ void SIAnnotateControlFlow::handleLoop(B
>>
>> Term->setCondition(CallInst::Create(Loop, Arg, "", Term));
>> push(Term->getSuccessor(0), Arg);
>> -}
>> -
>> -/// \brief Close the last opened control flow
>> +}/// \brief Close the last opened control flow
>> void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) {
>> + llvm::Loop *L = LI->getLoopFor(BB);
>> +
>> + if (L && L->getHeader() == BB) {
>> + // We can't insert an EndCF call into a loop header, because it will
>> + // get executed on every iteration of the loop, when it should be
>> + // executed only once before the loop.
>> + SmallVector <BasicBlock*, 8> Latches;
>> + L->getLoopLatches(Latches);
>> +
>> + std::vector<BasicBlock*> Preds;
>> + for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
>> + if (std::find(Latches.begin(), Latches.end(), *PI) == Latches.end())
>> + Preds.push_back(*PI);
>> + }
>> + BB = llvm::SplitBlockPredecessors(BB, Preds, "endcf.split", nullptr, DT,
>> + LI, false);
>> + }
>> +
>> CallInst::Create(EndCf, popSaved(), "", BB->getFirstInsertionPt());
>> }
>>
>> @@ -288,6 +308,7 @@ void SIAnnotateControlFlow::closeControl
>> /// recognize if/then/else and loops.
>> bool SIAnnotateControlFlow::runOnFunction(Function &F) {
>> DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
>> + LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
>>
>> for (df_iterator<BasicBlock *> I = df_begin(&F.getEntryBlock()),
>> E = df_end(&F.getEntryBlock()); I != E; ++I) {
>>
>> Added: llvm/trunk/test/CodeGen/R600/endcf-loop-header.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/endcf-loop-header.ll?rev=228302&view=auto
>> ==============================================================================
>> --- llvm/trunk/test/CodeGen/R600/endcf-loop-header.ll (added)
>> +++ llvm/trunk/test/CodeGen/R600/endcf-loop-header.ll Thu Feb 5 09:32:15 2015
>> @@ -0,0 +1,34 @@
>> +; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s
>> +
>> +; This tests that the llvm.SI.end.cf intrinsic is not inserted into the
>> +; loop block. This intrinsic will be lowered to s_or_b64 by the code
>> +; generator.
>> +
>> +; CHECK-LABEL: {{^}}test:
>> +
>> +; This is was lowered from the llvm.SI.end.cf intrinsic:
>> +; CHECK: s_or_b64 exec, exec
>> +
>> +; CHECK: [[LOOP_LABEL:[0-9A-Za-z_]+]]: ; %loop{{$}}
>> +; CHECK-NOT: s_or_b64 exec, exec
>> +; CHECK: s_cbranch_execnz [[LOOP_LABEL]]
>> +define void @test(i32 addrspace(1)* %out, i32 %cond) {
>> +entry:
>> + %tmp0 = icmp eq i32 %cond, 0
>> + br i1 %tmp0, label %if, label %loop
>> +
>> +if:
>> + store i32 0, i32 addrspace(1)* %out
>> + br label %loop
>> +
>> +loop:
>> + %tmp1 = phi i32 [0, %entry], [0, %if], [%inc, %loop]
>> + %inc = add i32 %tmp1, %cond
>> + %tmp2 = icmp ugt i32 %inc, 10
>> + br i1 %tmp2, label %done, label %loop
>> +
>> +done:
>> + %tmp3 = getelementptr i32 addrspace(1)* %out, i64 1
>> + store i32 %inc, i32 addrspace(1)* %tmp3
>> + ret void
>> +}
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list