[llvm] [SimplifyCFG] Skip threading if the target may have divergent branches (PR #100185)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 23 23:11:02 PDT 2024
https://github.com/darkbuck updated https://github.com/llvm/llvm-project/pull/100185
>From 2d469093d125683d09c2e97eeb9416dc903f2076 Mon Sep 17 00:00:00 2001
From: Michael Liao <michael.hliao at gmail.com>
Date: Tue, 23 Jul 2024 15:37:33 -0400
Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?=
=?UTF-8?q?l=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.4
---
llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 27 +++++++-----
.../SimplifyCFG/AMDGPU/convergent.ll | 44 +++++++++++++++++++
.../test/Transforms/SimplifyCFG/convergent.ll | 39 ++++++++++++++++
3 files changed, 98 insertions(+), 12 deletions(-)
create mode 100644 llvm/test/Transforms/SimplifyCFG/AMDGPU/convergent.ll
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index f23e28888931d..1a17524b826a1 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -3246,7 +3246,12 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI,
}
/// Return true if we can thread a branch across this block.
-static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
+static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB,
+ const TargetTransformInfo &TTI) {
+ // Skip threading if the branch may be divergent.
+ if (TTI.hasBranchDivergence(BB->getParent()))
+ return false;
+
int Size = 0;
EphemeralValueTracker EphTracker;
@@ -3301,10 +3306,9 @@ static ConstantInt *getKnownValueOnEdge(Value *V, BasicBlock *From,
/// If we have a conditional branch on something for which we know the constant
/// value in predecessors (e.g. a phi node in the current block), thread edges
/// from the predecessor to their ultimate destination.
-static std::optional<bool>
-FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU,
- const DataLayout &DL,
- AssumptionCache *AC) {
+static std::optional<bool> FoldCondBranchOnValueKnownInPredecessorImpl(
+ BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL,
+ const TargetTransformInfo &TTI, AssumptionCache *AC) {
SmallMapVector<ConstantInt *, SmallSetVector<BasicBlock *, 2>, 2> KnownValues;
BasicBlock *BB = BI->getParent();
Value *Cond = BI->getCondition();
@@ -3332,7 +3336,7 @@ FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU,
// Now we know that this block has multiple preds and two succs.
// Check that the block is small enough and values defined in the block are
// not used outside of it.
- if (!BlockIsSimpleEnoughToThreadThrough(BB))
+ if (!BlockIsSimpleEnoughToThreadThrough(BB, TTI))
return false;
for (const auto &Pair : KnownValues) {
@@ -3459,15 +3463,14 @@ FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU,
return false;
}
-static bool FoldCondBranchOnValueKnownInPredecessor(BranchInst *BI,
- DomTreeUpdater *DTU,
- const DataLayout &DL,
- AssumptionCache *AC) {
+static bool FoldCondBranchOnValueKnownInPredecessor(
+ BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL,
+ const TargetTransformInfo &TTI, AssumptionCache *AC) {
std::optional<bool> Result;
bool EverChanged = false;
do {
// Note that None means "we changed things, but recurse further."
- Result = FoldCondBranchOnValueKnownInPredecessorImpl(BI, DTU, DL, AC);
+ Result = FoldCondBranchOnValueKnownInPredecessorImpl(BI, DTU, DL, TTI, AC);
EverChanged |= Result == std::nullopt || *Result;
} while (Result == std::nullopt);
return EverChanged;
@@ -7543,7 +7546,7 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
// If this is a branch on something for which we know the constant value in
// predecessors (e.g. a phi node in the current block), thread control
// through this block.
- if (FoldCondBranchOnValueKnownInPredecessor(BI, DTU, DL, Options.AC))
+ if (FoldCondBranchOnValueKnownInPredecessor(BI, DTU, DL, TTI, Options.AC))
return requestResimplify();
// Scan predecessor blocks for conditional branches.
diff --git a/llvm/test/Transforms/SimplifyCFG/AMDGPU/convergent.ll b/llvm/test/Transforms/SimplifyCFG/AMDGPU/convergent.ll
new file mode 100644
index 0000000000000..b1262e294c6d0
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/AMDGPU/convergent.ll
@@ -0,0 +1,44 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -mtriple=amdgcn -S -passes=simplifycfg < %s | FileCheck %s
+
+declare void @bar1()
+declare void @bar2()
+declare void @bar3()
+
+define i32 @test_01a(i32 %a) {
+; CHECK-LABEL: define i32 @test_01a(
+; CHECK-SAME: i32 [[A:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[A]], 0
+; CHECK-NEXT: br i1 [[COND]], label %[[MERGE:.*]], label %[[IF_FALSE:.*]]
+; CHECK: [[IF_FALSE]]:
+; CHECK-NEXT: call void @bar1()
+; CHECK-NEXT: br label %[[MERGE]]
+; CHECK: [[MERGE]]:
+; CHECK-NEXT: call void @bar2()
+; CHECK-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[IF_FALSE_2:.*]]
+; CHECK: [[IF_FALSE_2]]:
+; CHECK-NEXT: call void @bar3()
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret i32 [[A]]
+;
+entry:
+ %cond = icmp eq i32 %a, 0
+ br i1 %cond, label %merge, label %if.false
+
+if.false:
+ call void @bar1()
+ br label %merge
+
+merge:
+ call void @bar2()
+ br i1 %cond, label %exit, label %if.false.2
+
+if.false.2:
+ call void @bar3()
+ br label %exit
+
+exit:
+ ret i32 %a
+}
diff --git a/llvm/test/Transforms/SimplifyCFG/convergent.ll b/llvm/test/Transforms/SimplifyCFG/convergent.ll
index 6ba51e06460c2..d148063589de6 100644
--- a/llvm/test/Transforms/SimplifyCFG/convergent.ll
+++ b/llvm/test/Transforms/SimplifyCFG/convergent.ll
@@ -4,6 +4,9 @@
; RUN: opt -S -passes='simplifycfg<hoist-common-insts;sink-common-insts>' < %s | FileCheck -check-prefixes=CHECK,SINK %s
declare void @foo() convergent
+declare void @bar1()
+declare void @bar2()
+declare void @bar3()
declare i32 @tid()
declare i32 @mbcnt(i32 %a, i32 %b) convergent
declare i32 @bpermute(i32 %a, i32 %b) convergent
@@ -45,6 +48,42 @@ exit:
ret i32 %a
}
+define i32 @test_01a(i32 %a) {
+; CHECK-LABEL: @test_01a(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
+; CHECK-NEXT: br i1 [[COND]], label [[EXIT_CRITEDGE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK: if.false:
+; CHECK-NEXT: call void @bar1()
+; CHECK-NEXT: call void @bar2()
+; CHECK-NEXT: call void @bar3()
+; CHECK-NEXT: br label [[EXIT:%.*]]
+; CHECK: exit.critedge:
+; CHECK-NEXT: call void @bar2()
+; CHECK-NEXT: br label [[EXIT]]
+; CHECK: exit:
+; CHECK-NEXT: ret i32 [[A]]
+;
+entry:
+ %cond = icmp eq i32 %a, 0
+ br i1 %cond, label %merge, label %if.false
+
+if.false:
+ call void @bar1()
+ br label %merge
+
+merge:
+ call void @bar2()
+ br i1 %cond, label %exit, label %if.false.2
+
+if.false.2:
+ call void @bar3()
+ br label %exit
+
+exit:
+ ret i32 %a
+}
+
define void @test_02(ptr %y.coerce) convergent {
; NOSINK-LABEL: @test_02(
; NOSINK-NEXT: entry:
More information about the llvm-commits
mailing list