[llvm] fa90f6b - TTI: Pass function to hasBranchDivergence in a few passes
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 7 06:49:43 PDT 2023
Author: Matt Arsenault
Date: 2023-07-07T09:49:38-04:00
New Revision: fa90f6b9d0fa2742df4548156c498c48dc796ec4
URL: https://github.com/llvm/llvm-project/commit/fa90f6b9d0fa2742df4548156c498c48dc796ec4
DIFF: https://github.com/llvm/llvm-project/commit/fa90f6b9d0fa2742df4548156c498c48dc796ec4.diff
LOG: TTI: Pass function to hasBranchDivergence in a few passes
https://reviews.llvm.org/D152033
Added:
llvm/test/Transforms/SpeculativeExecution/single-lane-execution.ll
Modified:
llvm/lib/Transforms/Scalar/JumpThreading.cpp
llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
llvm/test/Transforms/JumpThreading/divergent-target-test.ll
llvm/test/Transforms/SimpleLoopUnswitch/AMDGPU/nontrivial-unswitch-divergent-target.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index 5b880f9fcccc61..24390f1b54f605 100644
--- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -246,7 +246,7 @@ PreservedAnalyses JumpThreadingPass::run(Function &F,
FunctionAnalysisManager &AM) {
auto &TTI = AM.getResult<TargetIRAnalysis>(F);
// Jump Threading has no sense for the targets with divergent CF
- if (TTI.hasBranchDivergence())
+ if (TTI.hasBranchDivergence(&F))
return PreservedAnalyses::all();
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
auto &LVI = AM.getResult<LazyValueAnalysis>(F);
diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index bae5846d5425fc..ad7d34b6147026 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -3552,6 +3552,8 @@ unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
return true;
}
+ const Function *F = L.getHeader()->getParent();
+
// Check whether we should continue with non-trivial conditions.
// EnableNonTrivialUnswitch: Global variable that forces non-trivial
// unswitching for testing and debugging.
@@ -3564,12 +3566,12 @@ unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
// branches even on targets that have divergence.
// https://bugs.llvm.org/show_bug.cgi?id=48819
bool ContinueWithNonTrivial =
- EnableNonTrivialUnswitch || (NonTrivial && !TTI.hasBranchDivergence());
+ EnableNonTrivialUnswitch || (NonTrivial && !TTI.hasBranchDivergence(F));
if (!ContinueWithNonTrivial)
return false;
// Skip non-trivial unswitching for optsize functions.
- if (L.getHeader()->getParent()->hasOptSize())
+ if (F->hasOptSize())
return false;
// Returns true if Loop L's loop nest is cold, i.e. if the headers of L,
diff --git a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
index 65f8d760ede3a0..e866fe68112754 100644
--- a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
+++ b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
@@ -152,7 +152,7 @@ bool SpeculativeExecutionLegacyPass::runOnFunction(Function &F) {
namespace llvm {
bool SpeculativeExecutionPass::runImpl(Function &F, TargetTransformInfo *TTI) {
- if (OnlyIfDivergentTarget && !TTI->hasBranchDivergence()) {
+ if (OnlyIfDivergentTarget && !TTI->hasBranchDivergence(&F)) {
LLVM_DEBUG(dbgs() << "Not running SpeculativeExecution because "
"TTI->hasBranchDivergence() is false.\n");
return false;
diff --git a/llvm/test/Transforms/JumpThreading/divergent-target-test.ll b/llvm/test/Transforms/JumpThreading/divergent-target-test.ll
index ec89302750b706..5e505aa695f071 100644
--- a/llvm/test/Transforms/JumpThreading/divergent-target-test.ll
+++ b/llvm/test/Transforms/JumpThreading/divergent-target-test.ll
@@ -45,3 +45,39 @@ F2:
; UNIFORM: ret i32 %v2
ret i32 %B
}
+
+; Check divergence check is skipped if there can't be divergence in
+; the function.
+define i32 @requires_single_lane_exec(i1 %cond) #0 {
+; CHECK: requires_single_lane_exec
+ br i1 %cond, label %T1, label %F1
+
+; CHECK-NOT: T1
+T1:
+ %v1 = call i32 @f1()
+ br label %Merge
+; CHECK-NOT: F1
+F1:
+ %v2 = call i32 @f2()
+ br label %Merge
+; CHECK-NOT: Merge
+Merge:
+ %A = phi i1 [true, %T1], [false, %F1]
+ %B = phi i32 [%v1, %T1], [%v2, %F1]
+ br i1 %A, label %T2, label %F2
+
+T2:
+; CHECK: T2:
+; CHECK: %v1 = call i32 @f1()
+; CHECK: call void @f3()
+; CHECK: ret i32 %v1
+ call void @f3()
+ ret i32 %B
+F2:
+; CHECK: F2:
+; CHECK: %v2 = call i32 @f2()
+; CHECK: ret i32 %v2
+ ret i32 %B
+}
+
+attributes #0 = { "amdgpu-flat-work-group-size"="1,1" }
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/AMDGPU/nontrivial-unswitch-divergent-target.ll b/llvm/test/Transforms/SimpleLoopUnswitch/AMDGPU/nontrivial-unswitch-divergent-target.ll
index 97d3d4f11e678a..8d3386031e75dd 100644
--- a/llvm/test/Transforms/SimpleLoopUnswitch/AMDGPU/nontrivial-unswitch-divergent-target.ll
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/AMDGPU/nontrivial-unswitch-divergent-target.ll
@@ -123,50 +123,101 @@ define void @test1_single_lane_execution(ptr %ptr, i1 %cond1, i1 %cond2) #0 {
entry:
br label %loop_begin
; CHECK-NEXT: entry:
-; CHECK-NEXT: br label %loop_begin
+; CHECK-NEXT: br i1 %cond1, label %entry.split.us, label %entry.split
loop_begin:
br i1 %cond1, label %loop_a, label %loop_b
-; CHECK: loop_begin:
-; CHECK-NEXT: br i1 %cond1, label %loop_a, label %loop_b
loop_a:
- %unused.a = call i32 @a()
+ call i32 @a()
br label %latch
-; CHECK: loop_a:
-; CHECK-NEXT: %unused.a = call i32 @a()
-; CHECK-NEXT: br label %latch
+; The 'loop_a' unswitched loop.
+;
+; CHECK: entry.split.us:
+; CHECK-NEXT: br label %loop_begin.us
+;
+; CHECK: loop_begin.us:
+; CHECK-NEXT: br label %loop_a.us
+;
+; CHECK: loop_a.us:
+; CHECK-NEXT: call i32 @a()
+; CHECK-NEXT: br label %latch.us
+;
+; CHECK: latch.us:
+; CHECK-NEXT: %[[V:.*]] = load i1, ptr %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_begin.us, label %loop_exit.split.us
+;
+; CHECK: loop_exit.split.us:
+; CHECK-NEXT: br label %loop_exit
loop_b:
br i1 %cond2, label %loop_b_a, label %loop_b_b
-; CHECK: loop_b:
-; CHECK-NEXT: br i1 %cond2, label %loop_b_a, label %loop_b_b
+; The second unswitched condition.
+;
+; CHECK: entry.split:
+; CHECK-NEXT: br i1 %cond2, label %entry.split.split.us, label %entry.split.split
loop_b_a:
- %unused.b = call i32 @b()
+ call i32 @b()
br label %latch
-; CHECK: loop_b_a:
-; CHECK-NEXT: %unused.b = call i32 @b()
-; CHECK-NEXT: br label %latch
+; The 'loop_b_a' unswitched loop.
+;
+; CHECK: entry.split.split.us:
+; CHECK-NEXT: br label %loop_begin.us1
+;
+; CHECK: loop_begin.us1:
+; CHECK-NEXT: br label %loop_b.us
+;
+; CHECK: loop_b.us:
+; CHECK-NEXT: br label %loop_b_a.us
+;
+; CHECK: loop_b_a.us:
+; CHECK-NEXT: call i32 @b()
+; CHECK-NEXT: br label %latch.us2
+;
+; CHECK: latch.us2:
+; CHECK-NEXT: %[[V:.*]] = load i1, ptr %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_begin.us1, label %loop_exit.split.split.us
+;
+; CHECK: loop_exit.split.split.us:
+; CHECK-NEXT: br label %loop_exit.split
loop_b_b:
- %unused.c = call i32 @c()
+ call i32 @c()
br label %latch
-; CHECK: loop_b_b:
-; CHECK-NEXT: %unused.c = call i32 @c()
-; CHECK-NEXT: br label %latch
+; The 'loop_b_b' unswitched loop.
+;
+; CHECK: entry.split.split:
+; CHECK-NEXT: br label %loop_begin
+;
+; CHECK: loop_begin:
+; CHECK-NEXT: br label %loop_b
+;
+; CHECK: loop_b:
+; CHECK-NEXT: br label %loop_b_b
+;
+; CHECK: loop_b_b:
+; CHECK-NEXT: call i32 @c()
+; CHECK-NEXT: br label %latch
+;
+; CHECK: latch:
+; CHECK-NEXT: %[[V:.*]] = load i1, ptr %ptr
+; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit.split.split
+;
+; CHECK: loop_exit.split.split:
+; CHECK-NEXT: br label %loop_exit.split
latch:
%v = load i1, ptr %ptr
br i1 %v, label %loop_begin, label %loop_exit
-; CHECK: latch:
-; CHECK-NEXT: %v = load i1, ptr %ptr
-; CHECK-NEXT: br i1 %v, label %loop_begin, label %loop_exit
loop_exit:
ret void
-; CHECK: loop_exit:
-; CHECK-NEXT: ret void
+; CHECK: loop_exit.split:
+; CHECK-NEXT: br label %loop_exit
+;
+; CHECK: loop_exit:
+; CHECK-NEXT: ret
}
attributes #0 = { "amdgpu-flat-work-group-size"="1,1" }
diff --git a/llvm/test/Transforms/SpeculativeExecution/single-lane-execution.ll b/llvm/test/Transforms/SpeculativeExecution/single-lane-execution.ll
new file mode 100644
index 00000000000000..7f881d0342b5de
--- /dev/null
+++ b/llvm/test/Transforms/SpeculativeExecution/single-lane-execution.ll
@@ -0,0 +1,25 @@
+; REQUIRES: amdgpu-registered-target
+; RUN: opt -S -passes=speculative-execution -mtriple=amdgcn-- \
+; RUN: -spec-exec-only-if-divergent-target \
+; RUN: -spec-exec-max-speculation-cost 4 -spec-exec-max-not-hoisted 3 \
+; RUN: %s | FileCheck %s
+
+; Hoist in if-then pattern.
+define void @skip_single_lane_ifThen() #0 {
+; CHECK-LABEL: @skip_single_lane_ifThen(
+; CHECK: br i1 true
+
+br i1 true, label %a, label %b
+; CHECK: a:
+; CHECK: %x = add i32 2, 3
+a:
+ %x = add i32 2, 3
+; CHECK: br label
+ br label %b
+; CHECK: b:
+b:
+; CHECK: ret void
+ ret void
+}
+
+attributes #0 = { "amdgpu-flat-work-group-size"="1,1" }
More information about the llvm-commits
mailing list