[llvm] [LowerSwitch] Don't let pass manager handle the dependency (PR #68662)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 9 23:00:23 PDT 2023
https://github.com/ruiling created https://github.com/llvm/llvm-project/pull/68662
Some passes has limitation that only support simple terminators: branch/unreachable/return. Right now, they ask the pass manager to add LowerSwitch pass to eliminate `switch`. Let's manage such kind of pass dependency by ourselves. Also add the assertion in the related passes.
>From f3254d10a81cd4a6b6218552aa518d97d52408db Mon Sep 17 00:00:00 2001
From: Ruiling Song <ruiling.song at amd.com>
Date: Tue, 10 Oct 2023 13:41:00 +0800
Subject: [PATCH] [LowerSwitch] Don't let pass manager handle the dependency
Some passes has limitation that only support simple terminators:
branch/unreachable/return. Right now, they ask the pass manager to
add LowerSwitch pass to eliminate `switch`. Let's manage such kind of pass
dependency by ourselves. Also add the assertion in the related passes.
---
llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h | 2 ++
.../Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp | 6 ++++--
llvm/lib/Transforms/Scalar/StructurizeCFG.cpp | 6 ++++--
llvm/lib/Transforms/Utils/BasicBlockUtils.cpp | 8 ++++++++
llvm/lib/Transforms/Utils/FixIrreducible.cpp | 6 +++---
llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp | 2 --
llvm/lib/Transforms/Utils/UnifyLoopExits.cpp | 6 +++---
llvm/test/CodeGen/AMDGPU/llc-pipeline.ll | 10 ----------
.../test/CodeGen/AMDGPU/multi-divergent-exit-region.ll | 4 ++--
llvm/test/CodeGen/AMDGPU/multilevel-break.ll | 2 +-
.../CodeGen/AMDGPU/si-unify-exit-return-unreachable.ll | 2 +-
llvm/test/Transforms/FixIrreducible/switch.ll | 2 +-
llvm/test/Transforms/StructurizeCFG/switch.ll | 2 +-
13 files changed, 30 insertions(+), 28 deletions(-)
diff --git a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
index 1ac0f053f67b509..c9feed08b90827e 100644
--- a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
@@ -702,6 +702,8 @@ BasicBlock *CreateControlFlowHub(
// successors
void InvertBranch(BranchInst *PBI, IRBuilderBase &Builder);
+// Check whether the block only has simple terminator: br/brcond/unreachable/ret
+bool hasOnlySimpleTerminator(const BasicBlock *BB);
} // end namespace llvm
#endif // LLVM_TRANSFORMS_UTILS_BASICBLOCKUTILS_H
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
index 9ad841c3c8a54a5..3cdaac69ac9e1d4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
@@ -46,6 +46,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -114,8 +115,6 @@ void AMDGPUUnifyDivergentExitNodes::getAnalysisUsage(AnalysisUsage &AU) const {
// We preserve the non-critical-edgeness property
AU.addPreservedID(BreakCriticalEdgesID);
- // This is a cluster of orthogonal Transforms
- AU.addPreservedID(LowerSwitchID);
FunctionPass::getAnalysisUsage(AU);
AU.addRequired<TargetTransformInfoWrapperPass>();
@@ -192,6 +191,9 @@ BasicBlock *AMDGPUUnifyDivergentExitNodesImpl::unifyReturnBlockSet(
bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree *DT,
const PostDominatorTree &PDT,
const UniformityInfo &UA) {
+ for (auto &BB : F)
+ assert(hasOnlySimpleTerminator(&BB));
+
if (PDT.root_size() == 0 ||
(PDT.root_size() == 1 &&
!isa<BranchInst>(PDT.getRoot()->getTerminator())))
diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index fac5695c7beaf17..b5f003911b4e6f9 100644
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -42,6 +42,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include <algorithm>
@@ -353,7 +354,6 @@ class StructurizeCFGLegacyPass : public RegionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
if (SkipUniformRegions)
AU.addRequired<UniformityInfoWrapperPass>();
- AU.addRequiredID(LowerSwitchID);
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
@@ -368,7 +368,6 @@ char StructurizeCFGLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg",
"Structurize the CFG", false, false)
INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg",
@@ -1173,6 +1172,9 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT) {
this->DT = DT;
Func = R->getEntry()->getParent();
+ for (auto &BB : *Func)
+ assert(hasOnlySimpleTerminator(&BB));
+
ParentRegion = R;
orderNodes();
diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index 389aab39dec3409..3297b446b1b7703 100644
--- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -2076,3 +2076,11 @@ void llvm::InvertBranch(BranchInst *PBI, IRBuilderBase &Builder) {
PBI->setCondition(NewCond);
PBI->swapSuccessors();
}
+
+bool llvm::hasOnlySimpleTerminator(const BasicBlock *BB) {
+ auto *Term = BB->getTerminator();
+ if (isa<ReturnInst>(Term) || isa<UnreachableInst>(Term) ||
+ isa<BranchInst>(Term))
+ return true;
+ return false;
+}
diff --git a/llvm/lib/Transforms/Utils/FixIrreducible.cpp b/llvm/lib/Transforms/Utils/FixIrreducible.cpp
index dda236167363db8..8192a63aacee977 100644
--- a/llvm/lib/Transforms/Utils/FixIrreducible.cpp
+++ b/llvm/lib/Transforms/Utils/FixIrreducible.cpp
@@ -87,10 +87,8 @@ struct FixIrreducible : public FunctionPass {
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequiredID(LowerSwitchID);
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
- AU.addPreservedID(LowerSwitchID);
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
}
@@ -106,7 +104,6 @@ FunctionPass *llvm::createFixIrreduciblePass() { return new FixIrreducible(); }
INITIALIZE_PASS_BEGIN(FixIrreducible, "fix-irreducible",
"Convert irreducible control-flow into natural loops",
false /* Only looks at CFG */, false /* Analysis Pass */)
-INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_END(FixIrreducible, "fix-irreducible",
@@ -317,6 +314,9 @@ static bool FixIrreducibleImpl(Function &F, LoopInfo &LI, DominatorTree &DT) {
LLVM_DEBUG(dbgs() << "===== Fix irreducible control-flow in function: "
<< F.getName() << "\n");
+ for (auto &BB : F)
+ assert(hasOnlySimpleTerminator(&BB));
+
bool Changed = false;
SmallVector<Loop *, 8> WorkList;
diff --git a/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
index 2b706858cbedd25..e5adbe5133cf457 100644
--- a/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
+++ b/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -39,8 +39,6 @@ void UnifyFunctionExitNodesLegacyPass::getAnalysisUsage(
AnalysisUsage &AU) const {
// We preserve the non-critical-edgeness property
AU.addPreservedID(BreakCriticalEdgesID);
- // This is a cluster of orthogonal Transforms
- AU.addPreservedID(LowerSwitchID);
}
namespace {
diff --git a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
index 8c781f59ff5a4b0..8c0db3bc771b1ea 100644
--- a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
+++ b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
@@ -44,10 +44,8 @@ struct UnifyLoopExitsLegacyPass : public FunctionPass {
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequiredID(LowerSwitchID);
AU.addRequired<LoopInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
- AU.addPreservedID(LowerSwitchID);
AU.addPreserved<LoopInfoWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
}
@@ -65,7 +63,6 @@ FunctionPass *llvm::createUnifyLoopExitsPass() {
INITIALIZE_PASS_BEGIN(UnifyLoopExitsLegacyPass, "unify-loop-exits",
"Fixup each natural loop to have a single exit block",
false /* Only looks at CFG */, false /* Analysis Pass */)
-INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_END(UnifyLoopExitsLegacyPass, "unify-loop-exits",
@@ -234,6 +231,9 @@ bool UnifyLoopExitsLegacyPass::runOnFunction(Function &F) {
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ for (auto &BB : F)
+ assert(hasOnlySimpleTerminator(&BB));
+
return runImpl(LI, DT);
}
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
index b939c8d2e339de4..bf2f6e983b529f8 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
@@ -60,8 +60,6 @@
; GCN-O0-NEXT: Cycle Info Analysis
; GCN-O0-NEXT: Uniformity Analysis
; GCN-O0-NEXT: Unify divergent function exit nodes
-; GCN-O0-NEXT: Lazy Value Information Analysis
-; GCN-O0-NEXT: Lower SwitchInst's to branches
; GCN-O0-NEXT: Dominator Tree Construction
; GCN-O0-NEXT: Natural Loop Information
; GCN-O0-NEXT: Convert irreducible control-flow into natural loops
@@ -251,8 +249,6 @@
; GCN-O1-NEXT: Code sinking
; GCN-O1-NEXT: Post-Dominator Tree Construction
; GCN-O1-NEXT: Unify divergent function exit nodes
-; GCN-O1-NEXT: Lazy Value Information Analysis
-; GCN-O1-NEXT: Lower SwitchInst's to branches
; GCN-O1-NEXT: Dominator Tree Construction
; GCN-O1-NEXT: Natural Loop Information
; GCN-O1-NEXT: Convert irreducible control-flow into natural loops
@@ -537,8 +533,6 @@
; GCN-O1-OPTS-NEXT: Code sinking
; GCN-O1-OPTS-NEXT: Post-Dominator Tree Construction
; GCN-O1-OPTS-NEXT: Unify divergent function exit nodes
-; GCN-O1-OPTS-NEXT: Lazy Value Information Analysis
-; GCN-O1-OPTS-NEXT: Lower SwitchInst's to branches
; GCN-O1-OPTS-NEXT: Dominator Tree Construction
; GCN-O1-OPTS-NEXT: Natural Loop Information
; GCN-O1-OPTS-NEXT: Convert irreducible control-flow into natural loops
@@ -841,8 +835,6 @@
; GCN-O2-NEXT: Code sinking
; GCN-O2-NEXT: Post-Dominator Tree Construction
; GCN-O2-NEXT: Unify divergent function exit nodes
-; GCN-O2-NEXT: Lazy Value Information Analysis
-; GCN-O2-NEXT: Lower SwitchInst's to branches
; GCN-O2-NEXT: Dominator Tree Construction
; GCN-O2-NEXT: Natural Loop Information
; GCN-O2-NEXT: Convert irreducible control-flow into natural loops
@@ -1159,8 +1151,6 @@
; GCN-O3-NEXT: Code sinking
; GCN-O3-NEXT: Post-Dominator Tree Construction
; GCN-O3-NEXT: Unify divergent function exit nodes
-; GCN-O3-NEXT: Lazy Value Information Analysis
-; GCN-O3-NEXT: Lower SwitchInst's to branches
; GCN-O3-NEXT: Dominator Tree Construction
; GCN-O3-NEXT: Natural Loop Information
; GCN-O3-NEXT: Convert irreducible control-flow into natural loops
diff --git a/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll b/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
index f58bed3b98e97c3..0a68820f902a1e8 100644
--- a/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
+++ b/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
@@ -1,5 +1,5 @@
-; RUN: opt -mtriple=amdgcn-- -mcpu=gfx600 -S -amdgpu-unify-divergent-exit-nodes -verify -structurizecfg -verify -si-annotate-control-flow -simplifycfg-require-and-preserve-domtree=1 %s | FileCheck -check-prefix=IR %s
-; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -S -amdgpu-unify-divergent-exit-nodes -verify -structurizecfg -verify -si-annotate-control-flow -simplifycfg-require-and-preserve-domtree=1 %s | FileCheck -check-prefix=IR %s
+; RUN: opt -mtriple=amdgcn-- -mcpu=gfx600 -S -lowerswitch -amdgpu-unify-divergent-exit-nodes -verify -structurizecfg -verify -si-annotate-control-flow -simplifycfg-require-and-preserve-domtree=1 %s | FileCheck -check-prefix=IR %s
+; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -S -lowerswitch -amdgpu-unify-divergent-exit-nodes -verify -structurizecfg -verify -si-annotate-control-flow -simplifycfg-require-and-preserve-domtree=1 %s | FileCheck -check-prefix=IR %s
; RUN: llc -march=amdgcn -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefix=GCN %s
; Add an extra verifier runs. There were some cases where invalid IR
diff --git a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll
index f7479c847559254..8d2a312b3463272 100644
--- a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll
+++ b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -mtriple=amdgcn-- -structurizecfg -si-annotate-control-flow < %s | FileCheck -check-prefix=OPT %s
+; RUN: opt -S -mtriple=amdgcn-- -lowerswitch -structurizecfg -si-annotate-control-flow < %s | FileCheck -check-prefix=OPT %s
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; Ensure two if.break calls, for both the inner and outer loops
diff --git a/llvm/test/CodeGen/AMDGPU/si-unify-exit-return-unreachable.ll b/llvm/test/CodeGen/AMDGPU/si-unify-exit-return-unreachable.ll
index 56d7fc335911ec6..1eef7b967f6d998 100644
--- a/llvm/test/CodeGen/AMDGPU/si-unify-exit-return-unreachable.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-unify-exit-return-unreachable.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
-; RUN: opt -mtriple=amdgcn-amd-amdhsa -amdgpu-unify-divergent-exit-nodes -verify -structurizecfg -verify -si-annotate-control-flow -verify -S %s -o - | FileCheck -check-prefix=IR %s
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -lowerswitch -amdgpu-unify-divergent-exit-nodes -verify -structurizecfg -verify -si-annotate-control-flow -verify -S %s -o - | FileCheck -check-prefix=IR %s
; A test with a divergent unreachable block and uniform return block. The
; compiler needs to create a regions that includes them so that
diff --git a/llvm/test/Transforms/FixIrreducible/switch.ll b/llvm/test/Transforms/FixIrreducible/switch.ll
index efa431091252287..57b92c08ed3115b 100644
--- a/llvm/test/Transforms/FixIrreducible/switch.ll
+++ b/llvm/test/Transforms/FixIrreducible/switch.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -fix-irreducible -S | FileCheck %s
+; RUN: opt < %s -passes='lowerswitch,fix-irreducible' -S | FileCheck %s
define void @loop_1(i32 %Value, i1 %PredEntry, i1 %PredD) {
; CHECK-LABEL: @loop_1(
diff --git a/llvm/test/Transforms/StructurizeCFG/switch.ll b/llvm/test/Transforms/StructurizeCFG/switch.ll
index 445c0ab0c2d8552..153b6d1914e67a2 100644
--- a/llvm/test/Transforms/StructurizeCFG/switch.ll
+++ b/llvm/test/Transforms/StructurizeCFG/switch.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -structurizecfg %s -o - | FileCheck %s
+; RUN: opt -S -passes='lowerswitch,structurizecfg' %s -o - | FileCheck %s
; The structurizecfg pass cannot handle switch instructions, so we need to
; make sure the lower switch pass is always run before structurizecfg.
More information about the llvm-commits
mailing list