[llvm-dev] Structurizing multi-exit regions

Wed Mar 1 17:39:29 PST 2017

Hi,

I'm trying to solve a problem from StructurizeCFG not actually handling 
regions with multiple exits. Sample IR attached.

StructurizeCFG doesn't touch this function, exiting early on the 
isTopLevelRegion check. SIAnnotateControlFlow then gets confused and 
ends up inserting an if into one of the blocks, and the matching end.cf 
into one of the return/unreachable blocks. The input to the end.cf is 
then not dominated by the condition which fails the verifier.

I'm not sure exactly about how to go about fixing this. I see a few options:

- Try to make the annotator aware of multi exit regions and insert the 
necessary phis for the input mask values for the end.cf calls. This 
seems undesirable and I'm not sure works in all cases.

- Make StructurizeCFG duplicate blocks to get simple regions. Is there 
already code to do this somewhere? CodeExtractor seems to do something 
similar, but not quite the same. Can this be done in the region pass, or 
does StructurizeCFG need to be converted to a function pass? RegionInfo 
mentions support for "extended" regions with multiple exits, but I don't 
think this helps any here.

-Matt

-------------- next part --------------
; RUN: opt -S -structurizecfg -si-annotate-control-flow %s

target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
target triple = "amdgcn-amd-amdhsa-opencl"

; Function Attrs: nounwind
define amdgpu_kernel void @multi_divergent_region_exit(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2) #0 {
entry:
  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1
  %tmp1 = add i32 0, %tmp
  %tmp2 = zext i32 %tmp1 to i64
  %tmp3 = add i64 0, %tmp2
  %tmp4 = shl i64 %tmp3, 32
  %tmp5 = ashr exact i64 %tmp4, 32
  %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %tmp5
  %tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4
  %tmp8 = sext i32 %tmp7 to i64
  %tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp8
  %tmp10 = load i32, i32 addrspace(1)* %tmp9, align 4
  %tmp13 = zext i32 %tmp10 to i64
  %tmp14 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp13
  %tmp16 = load i32, i32 addrspace(1)* %tmp14, align 16
  %Pivot = icmp slt i32 %tmp16, 2
  br i1 %Pivot, label %LeafBlock, label %LeafBlock1

LeafBlock:                                        ; preds = %entry
  %SwitchLeaf = icmp eq i32 %tmp16, 1
  br i1 %SwitchLeaf, label %unreachable0, label %unreachable1

LeafBlock1:                                       ; preds = %entry
  %SwitchLeaf2 = icmp eq i32 %tmp16, 2
  br i1 %SwitchLeaf2, label %unreachable0, label %unreachable1

unreachable0:                                     ; preds = %LeafBlock, %LeafBlock1
  store volatile i32 9, i32 addrspace(1)* undef
  unreachable

unreachable1:                                     ; preds = %LeafBlock, %LeafBlock1
  store volatile i32 17, i32 addrspace(3)* undef
  unreachable
}

; Function Attrs: nounwind readnone
declare i32 @llvm.amdgcn.workitem.id.x() #1

attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }