[llvm-dev] Structurizing multi-exit regions
Matt Arsenault via llvm-dev
llvm-dev at lists.llvm.org
Wed Mar 1 17:39:29 PST 2017
Hi,
I'm trying to solve a problem from StructurizeCFG not actually handling
regions with multiple exits. Sample IR attached.
StructurizeCFG doesn't touch this function, exiting early on the
isTopLevelRegion check. SIAnnotateControlFlow then gets confused and
ends up inserting an if into one of the blocks, and the matching end.cf
into one of the return/unreachable blocks. The input to the end.cf is
then not dominated by the condition which fails the verifier.
I'm not sure exactly about how to go about fixing this. I see a few options:
- Try to make the annotator aware of multi exit regions and insert the
necessary phis for the input mask values for the end.cf calls. This
seems undesirable and I'm not sure works in all cases.
- Make StructurizeCFG duplicate blocks to get simple regions. Is there
already code to do this somewhere? CodeExtractor seems to do something
similar, but not quite the same. Can this be done in the region pass, or
does StructurizeCFG need to be converted to a function pass? RegionInfo
mentions support for "extended" regions with multiple exits, but I don't
think this helps any here.
-Matt
-------------- next part --------------
; RUN: opt -S -structurizecfg -si-annotate-control-flow %s
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
target triple = "amdgcn-amd-amdhsa-opencl"
; Function Attrs: nounwind
define amdgpu_kernel void @multi_divergent_region_exit(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2) #0 {
entry:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1
%tmp1 = add i32 0, %tmp
%tmp2 = zext i32 %tmp1 to i64
%tmp3 = add i64 0, %tmp2
%tmp4 = shl i64 %tmp3, 32
%tmp5 = ashr exact i64 %tmp4, 32
%tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %tmp5
%tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4
%tmp8 = sext i32 %tmp7 to i64
%tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp8
%tmp10 = load i32, i32 addrspace(1)* %tmp9, align 4
%tmp13 = zext i32 %tmp10 to i64
%tmp14 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp13
%tmp16 = load i32, i32 addrspace(1)* %tmp14, align 16
%Pivot = icmp slt i32 %tmp16, 2
br i1 %Pivot, label %LeafBlock, label %LeafBlock1
LeafBlock: ; preds = %entry
%SwitchLeaf = icmp eq i32 %tmp16, 1
br i1 %SwitchLeaf, label %unreachable0, label %unreachable1
LeafBlock1: ; preds = %entry
%SwitchLeaf2 = icmp eq i32 %tmp16, 2
br i1 %SwitchLeaf2, label %unreachable0, label %unreachable1
unreachable0: ; preds = %LeafBlock, %LeafBlock1
store volatile i32 9, i32 addrspace(1)* undef
unreachable
unreachable1: ; preds = %LeafBlock, %LeafBlock1
store volatile i32 17, i32 addrspace(3)* undef
unreachable
}
; Function Attrs: nounwind readnone
declare i32 @llvm.amdgcn.workitem.id.x() #1
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
More information about the llvm-dev
mailing list