[PATCH] D59312: AMDGPU: Fix a SIAnnotateControlFlow issue when there are multiple backedges.
Phabricator via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 15 14:03:00 PDT 2019
This revision was automatically updated to reflect the committed changes.
Closed by commit rL356298: AMDGPU: Fix a SIAnnotateControlFlow issue when there are multiple backedges. (authored by chfang, committed by ).
Herald added a project: LLVM.
Changed prior to commit:
https://reviews.llvm.org/D59312?vs=190891&id=190894#toc
Repository:
rL LLVM
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D59312/new/
https://reviews.llvm.org/D59312
Files:
llvm/trunk/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
llvm/trunk/test/CodeGen/AMDGPU/si-annotatecfg-multiple-backedges.ll
Index: llvm/trunk/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
===================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
+++ llvm/trunk/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
@@ -264,8 +264,17 @@
Term->setCondition(BoolTrue);
Value *Arg = handleLoopCondition(Cond, Broken, L, Term);
- for (BasicBlock *Pred : predecessors(Target))
- Broken->addIncoming(Pred == BB ? Arg : Int64Zero, Pred);
+ for (BasicBlock *Pred : predecessors(Target)) {
+ Value *PHIValue = Int64Zero;
+ if (Pred == BB) // Remember the value of the previous iteration.
+ PHIValue = Arg;
+ // If the backedge from Pred to Target could be executed before the exit
+ // of the loop at BB, it should not reset or change "Broken", which keeps
+ // track of the number of threads exited the loop at BB.
+ else if (L->contains(Pred) && DT->dominates(Pred, BB))
+ PHIValue = Broken;
+ Broken->addIncoming(PHIValue, Pred);
+ }
Term->setCondition(CallInst::Create(Loop, Arg, "", Term));
Index: llvm/trunk/test/CodeGen/AMDGPU/si-annotatecfg-multiple-backedges.ll
===================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-annotatecfg-multiple-backedges.ll
+++ llvm/trunk/test/CodeGen/AMDGPU/si-annotatecfg-multiple-backedges.ll
@@ -0,0 +1,61 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s
+
+; This test is designed to check that the backedge from a prior block won't
+; reset the variable introduced to record and accumulate the number of threads
+; which have already exited the loop.
+
+define amdgpu_kernel void @multiple_backedges(i32 %arg, i32* %arg1) {
+; OPT-LABEL: @multiple_backedges(
+; OPT-NEXT: entry:
+; OPT-NEXT: [[TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x()
+; OPT-NEXT: [[TMP2:%.*]] = shl nsw i32 [[ARG:%.*]], 1
+; OPT-NEXT: br label [[LOOP:%.*]]
+; OPT: loop:
+; OPT-NEXT: [[PHI_BROKEN1:%.*]] = phi i64 [ [[TMP7:%.*]], [[LOOP_END:%.*]] ], [ [[PHI_BROKEN1]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ]
+; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ 0, [[LOOP_END]] ], [ [[TMP0:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
+; OPT-NEXT: [[TMP4:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP5:%.*]], [[LOOP]] ], [ 0, [[LOOP_END]] ]
+; OPT-NEXT: [[TMP5]] = add nsw i32 [[TMP4]], [[TMP]]
+; OPT-NEXT: [[TMP6:%.*]] = icmp slt i32 [[ARG]], [[TMP5]]
+; OPT-NEXT: [[TMP0]] = call i64 @llvm.amdgcn.if.break(i1 [[TMP6]], i64 [[PHI_BROKEN]])
+; OPT-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop(i64 [[TMP0]])
+; OPT-NEXT: br i1 [[TMP1]], label [[LOOP_END]], label [[LOOP]]
+; OPT: loop_end:
+; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 [[TMP0]])
+; OPT-NEXT: [[EXIT:%.*]] = icmp sgt i32 [[TMP5]], [[TMP2]]
+; OPT-NEXT: [[TMP7]] = call i64 @llvm.amdgcn.if.break(i1 [[EXIT]], i64 [[PHI_BROKEN1]])
+; OPT-NEXT: [[TMP3:%.*]] = call i1 @llvm.amdgcn.loop(i64 [[TMP7]])
+; OPT-NEXT: br i1 [[TMP3]], label [[LOOP_EXIT:%.*]], label [[LOOP]]
+; OPT: loop_exit:
+; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 [[TMP7]])
+; OPT-NEXT: [[TMP12:%.*]] = zext i32 [[TMP]] to i64
+; OPT-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[ARG1:%.*]], i64 [[TMP12]]
+; OPT-NEXT: [[TMP14:%.*]] = addrspacecast i32* [[TMP13]] to i32 addrspace(1)*
+; OPT-NEXT: store i32 [[TMP5]], i32 addrspace(1)* [[TMP14]], align 4
+; OPT-NEXT: ret void
+;
+entry:
+ %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %tmp2 = shl nsw i32 %arg, 1
+ br label %loop
+
+loop:
+ %tmp4 = phi i32 [ 0, %entry ], [ %tmp5, %loop ], [ 0, %loop_end ]
+ %tmp5 = add nsw i32 %tmp4, %tmp
+ %tmp6 = icmp slt i32 %arg, %tmp5
+ br i1 %tmp6, label %loop_end, label %loop
+
+loop_end:
+ %exit = icmp sgt i32 %tmp5, %tmp2
+ br i1 %exit, label %loop_exit, label %loop
+
+loop_exit:
+ %tmp12 = zext i32 %tmp to i64
+ %tmp13 = getelementptr inbounds i32, i32* %arg1, i64 %tmp12
+ %tmp14 = addrspacecast i32* %tmp13 to i32 addrspace(1)*
+ store i32 %tmp5, i32 addrspace(1)* %tmp14, align 4
+ ret void
+}
+
+; Function Attrs: nounwind readnone speculatable
+declare i32 @llvm.amdgcn.workitem.id.x()
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D59312.190894.patch
Type: text/x-patch
Size: 4306 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190315/91b60e2f/attachment.bin>
More information about the llvm-commits
mailing list