[llvm] [LowerSwitch] Don't let pass manager handle the dependency (PR #68662)

via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 11 00:24:51 PDT 2023


https://github.com/ruiling updated https://github.com/llvm/llvm-project/pull/68662

>From 5cbd1015bcb652b36acd1b9e3ee68dad3c82929e Mon Sep 17 00:00:00 2001
From: Ruiling Song <ruiling.song at amd.com>
Date: Tue, 10 Oct 2023 13:41:00 +0800
Subject: [PATCH] [LowerSwitch] Don't let pass manager handle the dependency

Some passes has limitation that only support simple terminators:
branch/unreachable/return. Right now, they ask the pass manager to
add LowerSwitch pass to eliminate `switch`. Let's manage such kind of pass
dependency by ourselves. Also add the assertion in the related passes.
---
 llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h   |  2 ++
 .../Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp    |  6 ++++--
 llvm/lib/Transforms/Scalar/StructurizeCFG.cpp          |  6 ++++--
 llvm/lib/Transforms/Utils/BasicBlockUtils.cpp          |  6 ++++++
 llvm/lib/Transforms/Utils/FixIrreducible.cpp           |  6 +++---
 llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp   |  2 --
 llvm/lib/Transforms/Utils/UnifyLoopExits.cpp           |  6 +++---
 llvm/test/CodeGen/AMDGPU/llc-pipeline.ll               | 10 ----------
 .../test/CodeGen/AMDGPU/multi-divergent-exit-region.ll |  4 ++--
 llvm/test/CodeGen/AMDGPU/multilevel-break.ll           |  2 +-
 .../CodeGen/AMDGPU/si-unify-exit-return-unreachable.ll |  2 +-
 llvm/test/Transforms/FixIrreducible/switch.ll          |  2 +-
 llvm/test/Transforms/StructurizeCFG/switch.ll          |  2 +-
 13 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
index 1ac0f053f67b509..c9feed08b90827e 100644
--- a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
@@ -702,6 +702,8 @@ BasicBlock *CreateControlFlowHub(
 // successors
 void InvertBranch(BranchInst *PBI, IRBuilderBase &Builder);
 
+// Check whether the block only has simple terminator: br/brcond/unreachable/ret
+bool hasOnlySimpleTerminator(const BasicBlock *BB);
 } // end namespace llvm
 
 #endif // LLVM_TRANSFORMS_UTILS_BASICBLOCKUTILS_H
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
index 9ad841c3c8a54a5..3cdaac69ac9e1d4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
@@ -46,6 +46,7 @@
 #include "llvm/Support/Casting.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 
 using namespace llvm;
@@ -114,8 +115,6 @@ void AMDGPUUnifyDivergentExitNodes::getAnalysisUsage(AnalysisUsage &AU) const {
   // We preserve the non-critical-edgeness property
   AU.addPreservedID(BreakCriticalEdgesID);
 
-  // This is a cluster of orthogonal Transforms
-  AU.addPreservedID(LowerSwitchID);
   FunctionPass::getAnalysisUsage(AU);
 
   AU.addRequired<TargetTransformInfoWrapperPass>();
@@ -192,6 +191,9 @@ BasicBlock *AMDGPUUnifyDivergentExitNodesImpl::unifyReturnBlockSet(
 bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree *DT,
                                             const PostDominatorTree &PDT,
                                             const UniformityInfo &UA) {
+  for (auto &BB : F)
+    assert(hasOnlySimpleTerminator(&BB));
+
   if (PDT.root_size() == 0 ||
       (PDT.root_size() == 1 &&
        !isa<BranchInst>(PDT.getRoot()->getTerminator())))
diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index fac5695c7beaf17..b5f003911b4e6f9 100644
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -42,6 +42,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
 #include <algorithm>
@@ -353,7 +354,6 @@ class StructurizeCFGLegacyPass : public RegionPass {
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     if (SkipUniformRegions)
       AU.addRequired<UniformityInfoWrapperPass>();
-    AU.addRequiredID(LowerSwitchID);
     AU.addRequired<DominatorTreeWrapperPass>();
 
     AU.addPreserved<DominatorTreeWrapperPass>();
@@ -368,7 +368,6 @@ char StructurizeCFGLegacyPass::ID = 0;
 INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg",
                       "Structurize the CFG", false, false)
 INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
 INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg",
@@ -1173,6 +1172,9 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT) {
   this->DT = DT;
 
   Func = R->getEntry()->getParent();
+  for (auto &BB : *Func)
+    assert(hasOnlySimpleTerminator(&BB));
+
   ParentRegion = R;
 
   orderNodes();
diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index 389aab39dec3409..5adc6fd6cc29e3b 100644
--- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -2076,3 +2076,9 @@ void llvm::InvertBranch(BranchInst *PBI, IRBuilderBase &Builder) {
   PBI->setCondition(NewCond);
   PBI->swapSuccessors();
 }
+
+bool llvm::hasOnlySimpleTerminator(const BasicBlock *BB) {
+  auto *Term = BB->getTerminator();
+  return isa<ReturnInst>(Term) || isa<UnreachableInst>(Term) ||
+         isa<BranchInst>(Term);
+}
diff --git a/llvm/lib/Transforms/Utils/FixIrreducible.cpp b/llvm/lib/Transforms/Utils/FixIrreducible.cpp
index dda236167363db8..8192a63aacee977 100644
--- a/llvm/lib/Transforms/Utils/FixIrreducible.cpp
+++ b/llvm/lib/Transforms/Utils/FixIrreducible.cpp
@@ -87,10 +87,8 @@ struct FixIrreducible : public FunctionPass {
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.addRequiredID(LowerSwitchID);
     AU.addRequired<DominatorTreeWrapperPass>();
     AU.addRequired<LoopInfoWrapperPass>();
-    AU.addPreservedID(LowerSwitchID);
     AU.addPreserved<DominatorTreeWrapperPass>();
     AU.addPreserved<LoopInfoWrapperPass>();
   }
@@ -106,7 +104,6 @@ FunctionPass *llvm::createFixIrreduciblePass() { return new FixIrreducible(); }
 INITIALIZE_PASS_BEGIN(FixIrreducible, "fix-irreducible",
                       "Convert irreducible control-flow into natural loops",
                       false /* Only looks at CFG */, false /* Analysis Pass */)
-INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_END(FixIrreducible, "fix-irreducible",
@@ -317,6 +314,9 @@ static bool FixIrreducibleImpl(Function &F, LoopInfo &LI, DominatorTree &DT) {
   LLVM_DEBUG(dbgs() << "===== Fix irreducible control-flow in function: "
                     << F.getName() << "\n");
 
+  for (auto &BB : F)
+    assert(hasOnlySimpleTerminator(&BB));
+
   bool Changed = false;
   SmallVector<Loop *, 8> WorkList;
 
diff --git a/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
index 2b706858cbedd25..e5adbe5133cf457 100644
--- a/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
+++ b/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -39,8 +39,6 @@ void UnifyFunctionExitNodesLegacyPass::getAnalysisUsage(
     AnalysisUsage &AU) const {
   // We preserve the non-critical-edgeness property
   AU.addPreservedID(BreakCriticalEdgesID);
-  // This is a cluster of orthogonal Transforms
-  AU.addPreservedID(LowerSwitchID);
 }
 
 namespace {
diff --git a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
index 8c781f59ff5a4b0..8c0db3bc771b1ea 100644
--- a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
+++ b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
@@ -44,10 +44,8 @@ struct UnifyLoopExitsLegacyPass : public FunctionPass {
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.addRequiredID(LowerSwitchID);
     AU.addRequired<LoopInfoWrapperPass>();
     AU.addRequired<DominatorTreeWrapperPass>();
-    AU.addPreservedID(LowerSwitchID);
     AU.addPreserved<LoopInfoWrapperPass>();
     AU.addPreserved<DominatorTreeWrapperPass>();
   }
@@ -65,7 +63,6 @@ FunctionPass *llvm::createUnifyLoopExitsPass() {
 INITIALIZE_PASS_BEGIN(UnifyLoopExitsLegacyPass, "unify-loop-exits",
                       "Fixup each natural loop to have a single exit block",
                       false /* Only looks at CFG */, false /* Analysis Pass */)
-INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_END(UnifyLoopExitsLegacyPass, "unify-loop-exits",
@@ -234,6 +231,9 @@ bool UnifyLoopExitsLegacyPass::runOnFunction(Function &F) {
   auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
 
+  for (auto &BB : F)
+    assert(hasOnlySimpleTerminator(&BB));
+
   return runImpl(LI, DT);
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
index b939c8d2e339de4..bf2f6e983b529f8 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
@@ -60,8 +60,6 @@
 ; GCN-O0-NEXT:        Cycle Info Analysis
 ; GCN-O0-NEXT:        Uniformity Analysis
 ; GCN-O0-NEXT:        Unify divergent function exit nodes
-; GCN-O0-NEXT:        Lazy Value Information Analysis
-; GCN-O0-NEXT:        Lower SwitchInst's to branches
 ; GCN-O0-NEXT:        Dominator Tree Construction
 ; GCN-O0-NEXT:        Natural Loop Information
 ; GCN-O0-NEXT:        Convert irreducible control-flow into natural loops
@@ -251,8 +249,6 @@
 ; GCN-O1-NEXT:        Code sinking
 ; GCN-O1-NEXT:        Post-Dominator Tree Construction
 ; GCN-O1-NEXT:        Unify divergent function exit nodes
-; GCN-O1-NEXT:        Lazy Value Information Analysis
-; GCN-O1-NEXT:        Lower SwitchInst's to branches
 ; GCN-O1-NEXT:        Dominator Tree Construction
 ; GCN-O1-NEXT:        Natural Loop Information
 ; GCN-O1-NEXT:        Convert irreducible control-flow into natural loops
@@ -537,8 +533,6 @@
 ; GCN-O1-OPTS-NEXT:        Code sinking
 ; GCN-O1-OPTS-NEXT:        Post-Dominator Tree Construction
 ; GCN-O1-OPTS-NEXT:        Unify divergent function exit nodes
-; GCN-O1-OPTS-NEXT:        Lazy Value Information Analysis
-; GCN-O1-OPTS-NEXT:        Lower SwitchInst's to branches
 ; GCN-O1-OPTS-NEXT:        Dominator Tree Construction
 ; GCN-O1-OPTS-NEXT:        Natural Loop Information
 ; GCN-O1-OPTS-NEXT:        Convert irreducible control-flow into natural loops
@@ -841,8 +835,6 @@
 ; GCN-O2-NEXT:        Code sinking
 ; GCN-O2-NEXT:        Post-Dominator Tree Construction
 ; GCN-O2-NEXT:        Unify divergent function exit nodes
-; GCN-O2-NEXT:        Lazy Value Information Analysis
-; GCN-O2-NEXT:        Lower SwitchInst's to branches
 ; GCN-O2-NEXT:        Dominator Tree Construction
 ; GCN-O2-NEXT:        Natural Loop Information
 ; GCN-O2-NEXT:        Convert irreducible control-flow into natural loops
@@ -1159,8 +1151,6 @@
 ; GCN-O3-NEXT:        Code sinking
 ; GCN-O3-NEXT:        Post-Dominator Tree Construction
 ; GCN-O3-NEXT:        Unify divergent function exit nodes
-; GCN-O3-NEXT:        Lazy Value Information Analysis
-; GCN-O3-NEXT:        Lower SwitchInst's to branches
 ; GCN-O3-NEXT:        Dominator Tree Construction
 ; GCN-O3-NEXT:        Natural Loop Information
 ; GCN-O3-NEXT:        Convert irreducible control-flow into natural loops
diff --git a/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll b/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
index f58bed3b98e97c3..0a68820f902a1e8 100644
--- a/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
+++ b/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
@@ -1,5 +1,5 @@
-; RUN: opt -mtriple=amdgcn-- -mcpu=gfx600 -S -amdgpu-unify-divergent-exit-nodes -verify -structurizecfg -verify -si-annotate-control-flow -simplifycfg-require-and-preserve-domtree=1 %s | FileCheck -check-prefix=IR %s
-; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -S -amdgpu-unify-divergent-exit-nodes -verify -structurizecfg -verify -si-annotate-control-flow -simplifycfg-require-and-preserve-domtree=1 %s | FileCheck -check-prefix=IR %s
+; RUN: opt -mtriple=amdgcn-- -mcpu=gfx600 -S -lowerswitch -amdgpu-unify-divergent-exit-nodes -verify -structurizecfg -verify -si-annotate-control-flow -simplifycfg-require-and-preserve-domtree=1 %s | FileCheck -check-prefix=IR %s
+; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -S -lowerswitch -amdgpu-unify-divergent-exit-nodes -verify -structurizecfg -verify -si-annotate-control-flow -simplifycfg-require-and-preserve-domtree=1 %s | FileCheck -check-prefix=IR %s
 ; RUN: llc -march=amdgcn -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefix=GCN %s
 
 ; Add an extra verifier runs. There were some cases where invalid IR
diff --git a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll
index f7479c847559254..8d2a312b3463272 100644
--- a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll
+++ b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -mtriple=amdgcn-- -structurizecfg -si-annotate-control-flow < %s | FileCheck -check-prefix=OPT %s
+; RUN: opt -S -mtriple=amdgcn-- -lowerswitch -structurizecfg -si-annotate-control-flow < %s | FileCheck -check-prefix=OPT %s
 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
 ; Ensure two if.break calls, for both the inner and outer loops
diff --git a/llvm/test/CodeGen/AMDGPU/si-unify-exit-return-unreachable.ll b/llvm/test/CodeGen/AMDGPU/si-unify-exit-return-unreachable.ll
index 56d7fc335911ec6..1eef7b967f6d998 100644
--- a/llvm/test/CodeGen/AMDGPU/si-unify-exit-return-unreachable.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-unify-exit-return-unreachable.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
-; RUN: opt -mtriple=amdgcn-amd-amdhsa -amdgpu-unify-divergent-exit-nodes -verify -structurizecfg -verify -si-annotate-control-flow -verify -S %s -o - | FileCheck -check-prefix=IR %s
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -lowerswitch -amdgpu-unify-divergent-exit-nodes -verify -structurizecfg -verify -si-annotate-control-flow -verify -S %s -o - | FileCheck -check-prefix=IR %s
 
 ; A test with a divergent unreachable block and uniform return block. The
 ; compiler needs to create a regions that includes them so that
diff --git a/llvm/test/Transforms/FixIrreducible/switch.ll b/llvm/test/Transforms/FixIrreducible/switch.ll
index efa431091252287..57b92c08ed3115b 100644
--- a/llvm/test/Transforms/FixIrreducible/switch.ll
+++ b/llvm/test/Transforms/FixIrreducible/switch.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -fix-irreducible -S | FileCheck %s
+; RUN: opt < %s -passes='lowerswitch,fix-irreducible' -S | FileCheck %s
 
 define void @loop_1(i32 %Value, i1 %PredEntry, i1 %PredD) {
 ; CHECK-LABEL: @loop_1(
diff --git a/llvm/test/Transforms/StructurizeCFG/switch.ll b/llvm/test/Transforms/StructurizeCFG/switch.ll
index 445c0ab0c2d8552..153b6d1914e67a2 100644
--- a/llvm/test/Transforms/StructurizeCFG/switch.ll
+++ b/llvm/test/Transforms/StructurizeCFG/switch.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -structurizecfg %s -o - | FileCheck %s
+; RUN: opt -S -passes='lowerswitch,structurizecfg' %s -o - | FileCheck %s
 
 ; The structurizecfg pass cannot handle switch instructions, so we need to
 ; make sure the lower switch pass is always run before structurizecfg.



More information about the llvm-commits mailing list