[llvm-branch-commits] [llvm] 3216d98 - Reapply [CHR] Fix up phi nodes with unreachable predecessors (PR64594)
Tobias Hieta via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Aug 16 23:25:30 PDT 2023
Author: Nikita Popov
Date: 2023-08-17T08:24:29+02:00
New Revision: 3216d98bc618f139a9e39b848a2c2058a727a9cf
URL: https://github.com/llvm/llvm-project/commit/3216d98bc618f139a9e39b848a2c2058a727a9cf
DIFF: https://github.com/llvm/llvm-project/commit/3216d98bc618f139a9e39b848a2c2058a727a9cf.diff
LOG: Reapply [CHR] Fix up phi nodes with unreachable predecessors (PR64594)
Relative to the previous attempt, this also adjusts RegionInfo
verification to allow unreachable predecessors.
-----
If a block in the CHR region has an unreachable predecessor, then
there will be no edge from that predecessor to the newly cloned
block. However, a phi node entry for it will be left behind. Make
sure that these incoming blocks get dropped as well.
Fixes https://github.com/llvm/llvm-project/issues/64594.
Differential Revision: https://reviews.llvm.org/D157621
(cherry picked from commit 7e2f1ae7e0ebc7e71ffaa865175aef27fae3b034)
Added:
llvm/test/Transforms/PGOProfile/chr-dead-pred.ll
Modified:
llvm/include/llvm/Analysis/RegionInfoImpl.h
llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
llvm/test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug.ll
Removed:
llvm/test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug-xfail.ll
################################################################################
diff --git a/llvm/include/llvm/Analysis/RegionInfoImpl.h b/llvm/include/llvm/Analysis/RegionInfoImpl.h
index 74591ee25ae585..8b04393dd29492 100644
--- a/llvm/include/llvm/Analysis/RegionInfoImpl.h
+++ b/llvm/include/llvm/Analysis/RegionInfoImpl.h
@@ -254,7 +254,9 @@ void RegionBase<Tr>::verifyBBInRegion(BlockT *BB) const {
if (entry != BB) {
for (BlockT *Pred : make_range(InvBlockTraits::child_begin(BB),
InvBlockTraits::child_end(BB))) {
- if (!contains(Pred))
+ // Allow predecessors that are unreachable, as these are ignored during
+ // region analysis.
+ if (!contains(Pred) && DT->isReachableFromEntry(Pred))
report_fatal_error("Broken region found: edges entering the region must "
"go to the entry node!");
}
diff --git a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
index 3e3be536defc5d..597cec8e61c9e3 100644
--- a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
+++ b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
@@ -1777,6 +1777,20 @@ void CHR::cloneScopeBlocks(CHRScope *Scope,
BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F);
NewBlocks.push_back(NewBB);
VMap[BB] = NewBB;
+
+ // Unreachable predecessors will not be cloned and will not have an edge
+ // to the cloned block. As such, also remove them from any phi nodes.
+ // To avoid iterator invalidation, first collect the dead predecessors
+ // from the first phi node, and then perform the actual removal.
+ if (auto *FirstPN = dyn_cast<PHINode>(NewBB->begin())) {
+ SmallVector<BasicBlock *> DeadPreds;
+ for (BasicBlock *Pred : FirstPN->blocks())
+ if (!DT.isReachableFromEntry(Pred))
+ DeadPreds.push_back(Pred);
+ for (PHINode &PN : make_early_inc_range(NewBB->phis()))
+ for (BasicBlock *Pred : DeadPreds)
+ PN.removeIncomingValue(Pred);
+ }
}
// Place the cloned blocks right after the original blocks (right before the
diff --git a/llvm/test/Transforms/PGOProfile/chr-dead-pred.ll b/llvm/test/Transforms/PGOProfile/chr-dead-pred.ll
new file mode 100644
index 00000000000000..6ce4b0cc658e78
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/chr-dead-pred.ll
@@ -0,0 +1,106 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -S -passes='require<profile-summary>,chr' -verify-region-info < %s | FileCheck %s
+
+define void @test(i1 %c, i1 %c2) !prof !29 {
+; CHECK-LABEL: define void @test
+; CHECK-SAME: (i1 [[C:%.*]], i1 [[C2:%.*]]) !prof [[PROF29:![0-9]+]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = xor i1 true, [[C]]
+; CHECK-NEXT: [[TMP1:%.*]] = freeze i1 [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = select i1 true, i1 [[TMP1]], i1 false
+; CHECK-NEXT: [[TMP3:%.*]] = xor i1 true, [[C2]]
+; CHECK-NEXT: [[TMP4:%.*]] = freeze i1 [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP2]], i1 [[TMP4]], i1 false
+; CHECK-NEXT: br i1 [[TMP5]], label [[ENTRY_SPLIT:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof [[PROF30:![0-9]+]]
+; CHECK: entry.split:
+; CHECK-NEXT: switch i8 0, label [[BB1:%.*]] [
+; CHECK-NEXT: i8 1, label [[BB2:%.*]]
+; CHECK-NEXT: i8 2, label [[BB3:%.*]]
+; CHECK-NEXT: ]
+; CHECK: bb1:
+; CHECK-NEXT: [[SELECT:%.*]] = select i1 false, i32 0, i32 1, !prof [[PROF31:![0-9]+]]
+; CHECK-NEXT: br label [[EXIT:%.*]]
+; CHECK: bb2:
+; CHECK-NEXT: [[SELECT3:%.*]] = select i1 false, i32 0, i32 1, !prof [[PROF32:![0-9]+]]
+; CHECK-NEXT: br label [[EXIT]]
+; CHECK: dead:
+; CHECK-NEXT: br label [[BB3]]
+; CHECK: bb3:
+; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ 0, [[DEAD:%.*]] ], [ 1, [[ENTRY_SPLIT]] ]
+; CHECK-NEXT: br label [[EXIT]]
+; CHECK: entry.split.nonchr:
+; CHECK-NEXT: switch i8 0, label [[BB1_NONCHR:%.*]] [
+; CHECK-NEXT: i8 1, label [[BB2_NONCHR:%.*]]
+; CHECK-NEXT: i8 2, label [[BB3_NONCHR:%.*]]
+; CHECK-NEXT: ]
+; CHECK: bb1.nonchr:
+; CHECK-NEXT: [[SELECT_NONCHR:%.*]] = select i1 [[C]], i32 0, i32 1, !prof [[PROF31]]
+; CHECK-NEXT: br label [[EXIT]]
+; CHECK: bb2.nonchr:
+; CHECK-NEXT: [[SELECT3_NONCHR:%.*]] = select i1 [[C2]], i32 0, i32 1, !prof [[PROF32]]
+; CHECK-NEXT: br label [[EXIT]]
+; CHECK: bb3.nonchr:
+; CHECK-NEXT: [[PHI_NONCHR:%.*]] = phi i64 [ 1, [[ENTRY_SPLIT_NONCHR]] ]
+; CHECK-NEXT: br label [[EXIT]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ switch i8 0, label %bb1 [
+ i8 1, label %bb2
+ i8 2, label %bb3
+ ]
+
+bb1: ; preds = %entry
+ %select = select i1 %c, i32 0, i32 1, !prof !30
+ br label %exit
+
+bb2: ; preds = %entry
+ %select3 = select i1 %c2, i32 0, i32 1, !prof !31
+ br label %exit
+
+dead: ; No predecessors!
+ br label %bb3
+
+bb3: ; preds = %dead, %entry
+ %phi = phi i64 [ 0, %dead ], [ 1, %entry ]
+ br label %exit
+
+exit: ; preds = %bb3, %bb2, %bb1
+ ret void
+}
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 1, !"ProfileSummary", !1}
+!1 = !{!2, !3, !4, !5, !6, !7, !8, !9, !10, !11}
+!2 = !{!"ProfileFormat", !"InstrProf"}
+!3 = !{!"TotalCount", i64 597326977313}
+!4 = !{!"MaxCount", i64 12561793713}
+!5 = !{!"MaxInternalCount", i64 2509052618}
+!6 = !{!"MaxFunctionCount", i64 12561793713}
+!7 = !{!"NumCounts", i64 1694881}
+!8 = !{!"NumFunctions", i64 129214}
+!9 = !{!"IsPartialProfile", i64 0}
+!10 = !{!"PartialProfileRatio", double 0.000000e+00}
+!11 = !{!"DetailedSummary", !12}
+!12 = !{!13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28}
+!13 = !{i32 10000, i64 12561793713, i32 1}
+!14 = !{i32 100000, i64 1733566697, i32 20}
+!15 = !{i32 200000, i64 820928443, i32 71}
+!16 = !{i32 300000, i64 404967336, i32 182}
+!17 = !{i32 400000, i64 233162193, i32 376}
+!18 = !{i32 500000, i64 120552435, i32 741}
+!19 = !{i32 600000, i64 69388652, i32 1402}
+!20 = !{i32 700000, i64 33926336, i32 2643}
+!21 = !{i32 800000, i64 15635940, i32 5288}
+!22 = !{i32 900000, i64 5547105, i32 11637}
+!23 = !{i32 950000, i64 2224405, i32 20074}
+!24 = !{i32 990000, i64 359838, i32 44778}
+!25 = !{i32 999000, i64 37485, i32 81744}
+!26 = !{i32 999900, i64 3465, i32 119656}
+!27 = !{i32 999990, i64 529, i32 155440}
+!28 = !{i32 999999, i64 70, i32 178344}
+!29 = !{!"function_entry_count", i64 33781183}
+!30 = !{!"branch_weights", i32 0, i32 9263770}
+!31 = !{!"branch_weights", i32 0, i32 634318}
diff --git a/llvm/test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug-xfail.ll b/llvm/test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug-xfail.ll
deleted file mode 100644
index 850696a9973920..00000000000000
--- a/llvm/test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug-xfail.ll
+++ /dev/null
@@ -1,77 +0,0 @@
-; XFAIL: *
-; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -structurizecfg -verify-region-info %s
-
-; FIXME: Merge into backedge-id-bug
-; Variant which has an issue with region construction
-
-define amdgpu_kernel void @loop_backedge_misidentified_alt(ptr addrspace(1) %arg0) #0 {
-entry:
- %tmp = load volatile <2 x i32>, ptr addrspace(1) undef, align 16
- %load1 = load volatile <2 x float>, ptr addrspace(1) undef
- %tid = call i32 @llvm.amdgcn.workitem.id.x()
- %gep = getelementptr inbounds i32, ptr addrspace(1) %arg0, i32 %tid
- %i.initial = load volatile i32, ptr addrspace(1) %gep, align 4
- br label %LOOP.HEADER
-
-LOOP.HEADER:
- %i = phi i32 [ %i.final, %END_ELSE_BLOCK ], [ %i.initial, %entry ]
- call void asm sideeffect "s_nop 0x100b ; loop $0 ", "r,~{memory}"(i32 %i) #0
- %tmp12 = zext i32 %i to i64
- %tmp13 = getelementptr inbounds <4 x i32>, ptr addrspace(1) null, i64 %tmp12
- %tmp14 = load <4 x i32>, ptr addrspace(1) %tmp13, align 16
- %tmp15 = extractelement <4 x i32> %tmp14, i64 0
- %tmp16 = and i32 %tmp15, 65535
- %tmp17 = icmp eq i32 %tmp16, 1
- br i1 %tmp17, label %bb18, label %bb62
-
-bb18:
- %tmp19 = extractelement <2 x i32> %tmp, i64 0
- %tmp22 = lshr i32 %tmp19, 16
- %tmp24 = urem i32 %tmp22, 52
- %tmp25 = mul nuw nsw i32 %tmp24, 52
- br label %INNER_LOOP
-
-INNER_LOOP:
- %inner.loop.j = phi i32 [ %tmp25, %bb18 ], [ %inner.loop.j.inc, %INNER_LOOP ]
- call void asm sideeffect "; inner loop body", ""() #0
- %inner.loop.j.inc = add nsw i32 %inner.loop.j, 1
- %inner.loop.cmp = icmp eq i32 %inner.loop.j, 0
- br i1 %inner.loop.cmp, label %INNER_LOOP_BREAK, label %INNER_LOOP
-
-INNER_LOOP_BREAK:
- %tmp59 = extractelement <4 x i32> %tmp14, i64 2
- call void asm sideeffect "s_nop 23 ", "~{memory}"() #0
- br label %END_ELSE_BLOCK
-
-bb62:
- %load13 = icmp ult i32 %tmp16, 271
- ;br i1 %load13, label %bb64, label %INCREMENT_I
- ; branching directly to the return avoids the bug
- br i1 %load13, label %RETURN, label %INCREMENT_I
-
-
-bb64:
- call void asm sideeffect "s_nop 42", "~{memory}"() #0
- br label %RETURN
-
-INCREMENT_I:
- %inc.i = add i32 %i, 1
- call void asm sideeffect "s_nop 0x1336 ; increment $0", "v,~{memory}"(i32 %inc.i) #0
- br label %END_ELSE_BLOCK
-
-END_ELSE_BLOCK:
- %i.final = phi i32 [ %tmp59, %INNER_LOOP_BREAK ], [ %inc.i, %INCREMENT_I ]
- call void asm sideeffect "s_nop 0x1337 ; end else block $0", "v,~{memory}"(i32 %i.final) #0
- %cmp.end.else.block = icmp eq i32 %i.final, -1
- br i1 %cmp.end.else.block, label %RETURN, label %LOOP.HEADER
-
-RETURN:
- call void asm sideeffect "s_nop 0x99 ; ClosureEval return", "~{memory}"() #0
- store volatile <2 x float> %load1, ptr addrspace(1) undef, align 8
- ret void
-}
-
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { convergent nounwind }
-attributes #1 = { convergent nounwind readnone }
diff --git a/llvm/test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug.ll b/llvm/test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug.ll
index 6f71ef7a21382c..011624ba9c045b 100644
--- a/llvm/test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug.ll
+++ b/llvm/test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug.ll
@@ -156,7 +156,72 @@ RETURN:
; The same function, except break to return block goes directly to the
; return, which managed to hide the bug.
-; FIXME: Merge variant from backedge-id-bug-xfail
+define amdgpu_kernel void @loop_backedge_misidentified_alt(ptr addrspace(1) %arg0) #0 {
+entry:
+ %tmp = load volatile <2 x i32>, ptr addrspace(1) undef, align 16
+ %load1 = load volatile <2 x float>, ptr addrspace(1) undef
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep = getelementptr inbounds i32, ptr addrspace(1) %arg0, i32 %tid
+ %i.initial = load volatile i32, ptr addrspace(1) %gep, align 4
+ br label %LOOP.HEADER
+
+LOOP.HEADER:
+ %i = phi i32 [ %i.final, %END_ELSE_BLOCK ], [ %i.initial, %entry ]
+ call void asm sideeffect "s_nop 0x100b ; loop $0 ", "r,~{memory}"(i32 %i) #0
+ %tmp12 = zext i32 %i to i64
+ %tmp13 = getelementptr inbounds <4 x i32>, ptr addrspace(1) null, i64 %tmp12
+ %tmp14 = load <4 x i32>, ptr addrspace(1) %tmp13, align 16
+ %tmp15 = extractelement <4 x i32> %tmp14, i64 0
+ %tmp16 = and i32 %tmp15, 65535
+ %tmp17 = icmp eq i32 %tmp16, 1
+ br i1 %tmp17, label %bb18, label %bb62
+
+bb18:
+ %tmp19 = extractelement <2 x i32> %tmp, i64 0
+ %tmp22 = lshr i32 %tmp19, 16
+ %tmp24 = urem i32 %tmp22, 52
+ %tmp25 = mul nuw nsw i32 %tmp24, 52
+ br label %INNER_LOOP
+
+INNER_LOOP:
+ %inner.loop.j = phi i32 [ %tmp25, %bb18 ], [ %inner.loop.j.inc, %INNER_LOOP ]
+ call void asm sideeffect "; inner loop body", ""() #0
+ %inner.loop.j.inc = add nsw i32 %inner.loop.j, 1
+ %inner.loop.cmp = icmp eq i32 %inner.loop.j, 0
+ br i1 %inner.loop.cmp, label %INNER_LOOP_BREAK, label %INNER_LOOP
+
+INNER_LOOP_BREAK:
+ %tmp59 = extractelement <4 x i32> %tmp14, i64 2
+ call void asm sideeffect "s_nop 23 ", "~{memory}"() #0
+ br label %END_ELSE_BLOCK
+
+bb62:
+ %load13 = icmp ult i32 %tmp16, 271
+ ;br i1 %load13, label %bb64, label %INCREMENT_I
+ ; branching directly to the return avoids the bug
+ br i1 %load13, label %RETURN, label %INCREMENT_I
+
+
+bb64:
+ call void asm sideeffect "s_nop 42", "~{memory}"() #0
+ br label %RETURN
+
+INCREMENT_I:
+ %inc.i = add i32 %i, 1
+ call void asm sideeffect "s_nop 0x1336 ; increment $0", "v,~{memory}"(i32 %inc.i) #0
+ br label %END_ELSE_BLOCK
+
+END_ELSE_BLOCK:
+ %i.final = phi i32 [ %tmp59, %INNER_LOOP_BREAK ], [ %inc.i, %INCREMENT_I ]
+ call void asm sideeffect "s_nop 0x1337 ; end else block $0", "v,~{memory}"(i32 %i.final) #0
+ %cmp.end.else.block = icmp eq i32 %i.final, -1
+ br i1 %cmp.end.else.block, label %RETURN, label %LOOP.HEADER
+
+RETURN:
+ call void asm sideeffect "s_nop 0x99 ; ClosureEval return", "~{memory}"() #0
+ store volatile <2 x float> %load1, ptr addrspace(1) undef, align 8
+ ret void
+}
declare i32 @llvm.amdgcn.workitem.id.x() #1
More information about the llvm-branch-commits
mailing list