[llvm-branch-commits] [llvm] 3216d98 - Reapply [CHR] Fix up phi nodes with unreachable predecessors (PR64594)

Tobias Hieta via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Aug 16 23:25:30 PDT 2023


Author: Nikita Popov
Date: 2023-08-17T08:24:29+02:00
New Revision: 3216d98bc618f139a9e39b848a2c2058a727a9cf

URL: https://github.com/llvm/llvm-project/commit/3216d98bc618f139a9e39b848a2c2058a727a9cf
DIFF: https://github.com/llvm/llvm-project/commit/3216d98bc618f139a9e39b848a2c2058a727a9cf.diff

LOG: Reapply [CHR] Fix up phi nodes with unreachable predecessors (PR64594)

Relative to the previous attempt, this also adjusts RegionInfo
verification to allow unreachable predecessors.

-----

If a block in the CHR region has an unreachable predecessor, then
there will be no edge from that predecessor to the newly cloned
block. However, a phi node entry for it will be left behind. Make
sure that these incoming blocks get dropped as well.

Fixes https://github.com/llvm/llvm-project/issues/64594.

Differential Revision: https://reviews.llvm.org/D157621

(cherry picked from commit 7e2f1ae7e0ebc7e71ffaa865175aef27fae3b034)

Added: 
    llvm/test/Transforms/PGOProfile/chr-dead-pred.ll

Modified: 
    llvm/include/llvm/Analysis/RegionInfoImpl.h
    llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
    llvm/test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug.ll

Removed: 
    llvm/test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug-xfail.ll


################################################################################
diff  --git a/llvm/include/llvm/Analysis/RegionInfoImpl.h b/llvm/include/llvm/Analysis/RegionInfoImpl.h
index 74591ee25ae585..8b04393dd29492 100644
--- a/llvm/include/llvm/Analysis/RegionInfoImpl.h
+++ b/llvm/include/llvm/Analysis/RegionInfoImpl.h
@@ -254,7 +254,9 @@ void RegionBase<Tr>::verifyBBInRegion(BlockT *BB) const {
   if (entry != BB) {
     for (BlockT *Pred : make_range(InvBlockTraits::child_begin(BB),
                                    InvBlockTraits::child_end(BB))) {
-      if (!contains(Pred))
+      // Allow predecessors that are unreachable, as these are ignored during
+      // region analysis.
+      if (!contains(Pred) && DT->isReachableFromEntry(Pred))
         report_fatal_error("Broken region found: edges entering the region must "
                            "go to the entry node!");
     }

diff  --git a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
index 3e3be536defc5d..597cec8e61c9e3 100644
--- a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
+++ b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
@@ -1777,6 +1777,20 @@ void CHR::cloneScopeBlocks(CHRScope *Scope,
       BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F);
       NewBlocks.push_back(NewBB);
       VMap[BB] = NewBB;
+
+      // Unreachable predecessors will not be cloned and will not have an edge
+      // to the cloned block. As such, also remove them from any phi nodes.
+      // To avoid iterator invalidation, first collect the dead predecessors
+      // from the first phi node, and then perform the actual removal.
+      if (auto *FirstPN = dyn_cast<PHINode>(NewBB->begin())) {
+        SmallVector<BasicBlock *> DeadPreds;
+        for (BasicBlock *Pred : FirstPN->blocks())
+          if (!DT.isReachableFromEntry(Pred))
+            DeadPreds.push_back(Pred);
+        for (PHINode &PN : make_early_inc_range(NewBB->phis()))
+          for (BasicBlock *Pred : DeadPreds)
+            PN.removeIncomingValue(Pred);
+      }
     }
 
   // Place the cloned blocks right after the original blocks (right before the

diff  --git a/llvm/test/Transforms/PGOProfile/chr-dead-pred.ll b/llvm/test/Transforms/PGOProfile/chr-dead-pred.ll
new file mode 100644
index 00000000000000..6ce4b0cc658e78
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/chr-dead-pred.ll
@@ -0,0 +1,106 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -S -passes='require<profile-summary>,chr' -verify-region-info < %s | FileCheck %s
+
+define void @test(i1 %c, i1 %c2) !prof !29 {
+; CHECK-LABEL: define void @test
+; CHECK-SAME: (i1 [[C:%.*]], i1 [[C2:%.*]]) !prof [[PROF29:![0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = xor i1 true, [[C]]
+; CHECK-NEXT:    [[TMP1:%.*]] = freeze i1 [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 true, i1 [[TMP1]], i1 false
+; CHECK-NEXT:    [[TMP3:%.*]] = xor i1 true, [[C2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = freeze i1 [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = select i1 [[TMP2]], i1 [[TMP4]], i1 false
+; CHECK-NEXT:    br i1 [[TMP5]], label [[ENTRY_SPLIT:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof [[PROF30:![0-9]+]]
+; CHECK:       entry.split:
+; CHECK-NEXT:    switch i8 0, label [[BB1:%.*]] [
+; CHECK-NEXT:    i8 1, label [[BB2:%.*]]
+; CHECK-NEXT:    i8 2, label [[BB3:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 false, i32 0, i32 1, !prof [[PROF31:![0-9]+]]
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    [[SELECT3:%.*]] = select i1 false, i32 0, i32 1, !prof [[PROF32:![0-9]+]]
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       dead:
+; CHECK-NEXT:    br label [[BB3]]
+; CHECK:       bb3:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i64 [ 0, [[DEAD:%.*]] ], [ 1, [[ENTRY_SPLIT]] ]
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       entry.split.nonchr:
+; CHECK-NEXT:    switch i8 0, label [[BB1_NONCHR:%.*]] [
+; CHECK-NEXT:    i8 1, label [[BB2_NONCHR:%.*]]
+; CHECK-NEXT:    i8 2, label [[BB3_NONCHR:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       bb1.nonchr:
+; CHECK-NEXT:    [[SELECT_NONCHR:%.*]] = select i1 [[C]], i32 0, i32 1, !prof [[PROF31]]
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       bb2.nonchr:
+; CHECK-NEXT:    [[SELECT3_NONCHR:%.*]] = select i1 [[C2]], i32 0, i32 1, !prof [[PROF32]]
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       bb3.nonchr:
+; CHECK-NEXT:    [[PHI_NONCHR:%.*]] = phi i64 [ 1, [[ENTRY_SPLIT_NONCHR]] ]
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  switch i8 0, label %bb1 [
+  i8 1, label %bb2
+  i8 2, label %bb3
+  ]
+
+bb1:                                              ; preds = %entry
+  %select = select i1 %c, i32 0, i32 1, !prof !30
+  br label %exit
+
+bb2:                                              ; preds = %entry
+  %select3 = select i1 %c2, i32 0, i32 1, !prof !31
+  br label %exit
+
+dead:                                             ; No predecessors!
+  br label %bb3
+
+bb3:                                              ; preds = %dead, %entry
+  %phi = phi i64 [ 0, %dead ], [ 1, %entry ]
+  br label %exit
+
+exit:                                             ; preds = %bb3, %bb2, %bb1
+  ret void
+}
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 1, !"ProfileSummary", !1}
+!1 = !{!2, !3, !4, !5, !6, !7, !8, !9, !10, !11}
+!2 = !{!"ProfileFormat", !"InstrProf"}
+!3 = !{!"TotalCount", i64 597326977313}
+!4 = !{!"MaxCount", i64 12561793713}
+!5 = !{!"MaxInternalCount", i64 2509052618}
+!6 = !{!"MaxFunctionCount", i64 12561793713}
+!7 = !{!"NumCounts", i64 1694881}
+!8 = !{!"NumFunctions", i64 129214}
+!9 = !{!"IsPartialProfile", i64 0}
+!10 = !{!"PartialProfileRatio", double 0.000000e+00}
+!11 = !{!"DetailedSummary", !12}
+!12 = !{!13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28}
+!13 = !{i32 10000, i64 12561793713, i32 1}
+!14 = !{i32 100000, i64 1733566697, i32 20}
+!15 = !{i32 200000, i64 820928443, i32 71}
+!16 = !{i32 300000, i64 404967336, i32 182}
+!17 = !{i32 400000, i64 233162193, i32 376}
+!18 = !{i32 500000, i64 120552435, i32 741}
+!19 = !{i32 600000, i64 69388652, i32 1402}
+!20 = !{i32 700000, i64 33926336, i32 2643}
+!21 = !{i32 800000, i64 15635940, i32 5288}
+!22 = !{i32 900000, i64 5547105, i32 11637}
+!23 = !{i32 950000, i64 2224405, i32 20074}
+!24 = !{i32 990000, i64 359838, i32 44778}
+!25 = !{i32 999000, i64 37485, i32 81744}
+!26 = !{i32 999900, i64 3465, i32 119656}
+!27 = !{i32 999990, i64 529, i32 155440}
+!28 = !{i32 999999, i64 70, i32 178344}
+!29 = !{!"function_entry_count", i64 33781183}
+!30 = !{!"branch_weights", i32 0, i32 9263770}
+!31 = !{!"branch_weights", i32 0, i32 634318}

diff  --git a/llvm/test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug-xfail.ll b/llvm/test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug-xfail.ll
deleted file mode 100644
index 850696a9973920..00000000000000
--- a/llvm/test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug-xfail.ll
+++ /dev/null
@@ -1,77 +0,0 @@
-; XFAIL: *
-; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -structurizecfg -verify-region-info %s
-
-; FIXME: Merge into backedge-id-bug
-; Variant which has an issue with region construction
-
-define amdgpu_kernel void @loop_backedge_misidentified_alt(ptr addrspace(1) %arg0) #0 {
-entry:
-  %tmp = load volatile <2 x i32>, ptr addrspace(1) undef, align 16
-  %load1 = load volatile <2 x float>, ptr addrspace(1) undef
-  %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr inbounds i32, ptr addrspace(1) %arg0, i32 %tid
-  %i.initial = load volatile i32, ptr addrspace(1) %gep, align 4
-  br label %LOOP.HEADER
-
-LOOP.HEADER:
-  %i = phi i32 [ %i.final, %END_ELSE_BLOCK ], [ %i.initial, %entry ]
-  call void asm sideeffect "s_nop 0x100b ; loop $0 ", "r,~{memory}"(i32 %i) #0
-  %tmp12 = zext i32 %i to i64
-  %tmp13 = getelementptr inbounds <4 x i32>, ptr addrspace(1) null, i64 %tmp12
-  %tmp14 = load <4 x i32>, ptr addrspace(1) %tmp13, align 16
-  %tmp15 = extractelement <4 x i32> %tmp14, i64 0
-  %tmp16 = and i32 %tmp15, 65535
-  %tmp17 = icmp eq i32 %tmp16, 1
-  br i1 %tmp17, label %bb18, label %bb62
-
-bb18:
-  %tmp19 = extractelement <2 x i32> %tmp, i64 0
-  %tmp22 = lshr i32 %tmp19, 16
-  %tmp24 = urem i32 %tmp22, 52
-  %tmp25 = mul nuw nsw i32 %tmp24, 52
-  br label %INNER_LOOP
-
-INNER_LOOP:
-  %inner.loop.j = phi i32 [ %tmp25, %bb18 ], [ %inner.loop.j.inc, %INNER_LOOP ]
-  call void asm sideeffect "; inner loop body", ""() #0
-  %inner.loop.j.inc = add nsw i32 %inner.loop.j, 1
-  %inner.loop.cmp = icmp eq i32 %inner.loop.j, 0
-  br i1 %inner.loop.cmp, label %INNER_LOOP_BREAK, label %INNER_LOOP
-
-INNER_LOOP_BREAK:
-  %tmp59 = extractelement <4 x i32> %tmp14, i64 2
-  call void asm sideeffect "s_nop 23 ", "~{memory}"() #0
-  br label %END_ELSE_BLOCK
-
-bb62:
-  %load13 = icmp ult i32 %tmp16, 271
-  ;br i1 %load13, label %bb64, label %INCREMENT_I
-  ; branching directly to the return avoids the bug
-  br i1 %load13, label %RETURN, label %INCREMENT_I
-
-
-bb64:
-  call void asm sideeffect "s_nop 42", "~{memory}"() #0
-  br label %RETURN
-
-INCREMENT_I:
-  %inc.i = add i32 %i, 1
-  call void asm sideeffect "s_nop 0x1336 ; increment $0", "v,~{memory}"(i32 %inc.i) #0
-  br label %END_ELSE_BLOCK
-
-END_ELSE_BLOCK:
-  %i.final = phi i32 [ %tmp59, %INNER_LOOP_BREAK ], [ %inc.i, %INCREMENT_I ]
-  call void asm sideeffect "s_nop 0x1337 ; end else block $0", "v,~{memory}"(i32 %i.final) #0
-  %cmp.end.else.block = icmp eq i32 %i.final, -1
-  br i1 %cmp.end.else.block, label %RETURN, label %LOOP.HEADER
-
-RETURN:
-  call void asm sideeffect "s_nop 0x99 ; ClosureEval return", "~{memory}"() #0
-  store volatile <2 x float> %load1, ptr addrspace(1) undef, align 8
-  ret void
-}
-
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { convergent nounwind }
-attributes #1 = { convergent nounwind readnone }

diff  --git a/llvm/test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug.ll b/llvm/test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug.ll
index 6f71ef7a21382c..011624ba9c045b 100644
--- a/llvm/test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug.ll
+++ b/llvm/test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug.ll
@@ -156,7 +156,72 @@ RETURN:
 
 ; The same function, except break to return block goes directly to the
 ; return, which managed to hide the bug.
-; FIXME: Merge variant from backedge-id-bug-xfail
+define amdgpu_kernel void @loop_backedge_misidentified_alt(ptr addrspace(1) %arg0) #0 {
+entry:
+  %tmp = load volatile <2 x i32>, ptr addrspace(1) undef, align 16
+  %load1 = load volatile <2 x float>, ptr addrspace(1) undef
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
+  %gep = getelementptr inbounds i32, ptr addrspace(1) %arg0, i32 %tid
+  %i.initial = load volatile i32, ptr addrspace(1) %gep, align 4
+  br label %LOOP.HEADER
+
+LOOP.HEADER:
+  %i = phi i32 [ %i.final, %END_ELSE_BLOCK ], [ %i.initial, %entry ]
+  call void asm sideeffect "s_nop 0x100b ; loop $0 ", "r,~{memory}"(i32 %i) #0
+  %tmp12 = zext i32 %i to i64
+  %tmp13 = getelementptr inbounds <4 x i32>, ptr addrspace(1) null, i64 %tmp12
+  %tmp14 = load <4 x i32>, ptr addrspace(1) %tmp13, align 16
+  %tmp15 = extractelement <4 x i32> %tmp14, i64 0
+  %tmp16 = and i32 %tmp15, 65535
+  %tmp17 = icmp eq i32 %tmp16, 1
+  br i1 %tmp17, label %bb18, label %bb62
+
+bb18:
+  %tmp19 = extractelement <2 x i32> %tmp, i64 0
+  %tmp22 = lshr i32 %tmp19, 16
+  %tmp24 = urem i32 %tmp22, 52
+  %tmp25 = mul nuw nsw i32 %tmp24, 52
+  br label %INNER_LOOP
+
+INNER_LOOP:
+  %inner.loop.j = phi i32 [ %tmp25, %bb18 ], [ %inner.loop.j.inc, %INNER_LOOP ]
+  call void asm sideeffect "; inner loop body", ""() #0
+  %inner.loop.j.inc = add nsw i32 %inner.loop.j, 1
+  %inner.loop.cmp = icmp eq i32 %inner.loop.j, 0
+  br i1 %inner.loop.cmp, label %INNER_LOOP_BREAK, label %INNER_LOOP
+
+INNER_LOOP_BREAK:
+  %tmp59 = extractelement <4 x i32> %tmp14, i64 2
+  call void asm sideeffect "s_nop 23 ", "~{memory}"() #0
+  br label %END_ELSE_BLOCK
+
+bb62:
+  %load13 = icmp ult i32 %tmp16, 271
+  ;br i1 %load13, label %bb64, label %INCREMENT_I
+  ; branching directly to the return avoids the bug
+  br i1 %load13, label %RETURN, label %INCREMENT_I
+
+
+bb64:
+  call void asm sideeffect "s_nop 42", "~{memory}"() #0
+  br label %RETURN
+
+INCREMENT_I:
+  %inc.i = add i32 %i, 1
+  call void asm sideeffect "s_nop 0x1336 ; increment $0", "v,~{memory}"(i32 %inc.i) #0
+  br label %END_ELSE_BLOCK
+
+END_ELSE_BLOCK:
+  %i.final = phi i32 [ %tmp59, %INNER_LOOP_BREAK ], [ %inc.i, %INCREMENT_I ]
+  call void asm sideeffect "s_nop 0x1337 ; end else block $0", "v,~{memory}"(i32 %i.final) #0
+  %cmp.end.else.block = icmp eq i32 %i.final, -1
+  br i1 %cmp.end.else.block, label %RETURN, label %LOOP.HEADER
+
+RETURN:
+  call void asm sideeffect "s_nop 0x99 ; ClosureEval return", "~{memory}"() #0
+  store volatile <2 x float> %load1, ptr addrspace(1) undef, align 8
+  ret void
+}
 
 declare i32 @llvm.amdgcn.workitem.id.x() #1
 


        


More information about the llvm-branch-commits mailing list