[llvm] 11bf7da - [NewPM] Introduce (GPU)DivergenceAnalysis in the new pass manager
Sameer Sahasrabuddhe via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 15 20:57:51 PST 2021
Author: Sameer Sahasrabuddhe
Date: 2021-02-16T10:26:45+05:30
New Revision: 11bf7da64a11dbae422ff322f629be6950f9bfb7
URL: https://github.com/llvm/llvm-project/commit/11bf7da64a11dbae422ff322f629be6950f9bfb7
DIFF: https://github.com/llvm/llvm-project/commit/11bf7da64a11dbae422ff322f629be6950f9bfb7.diff
LOG: [NewPM] Introduce (GPU)DivergenceAnalysis in the new pass manager
The GPUDivergenceAnalysis is now renamed to just "DivergenceAnalysis"
since there is no conflict with LegacyDivergenceAnalysis. In the
legacy PM, this analysis can only be used through the legacy DA
serving as a wrapper. It is now made available as a pass in the new
PM, and has no relation with the legacy DA.
The new DA currently cannot handle irreducible control flow; its
presence can cause the analysis to run indefinitely. The analysis is
now modified to detect this and report all instructions in the
function as divergent. This is super conservative, but allows the
analysis to be used without hanging the compiler.
Reviewed By: aeubanks
Differential Revision: https://reviews.llvm.org/D96615
Added:
Modified:
llvm/include/llvm/Analysis/DivergenceAnalysis.h
llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h
llvm/lib/Analysis/DivergenceAnalysis.cpp
llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp
llvm/lib/Passes/PassBuilder.cpp
llvm/lib/Passes/PassRegistry.def
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/always_uniform.ll
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/atomics.ll
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/b42473-r1-crash.ll
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/control-flow-intrinsics.ll
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_diverge.ll
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_loopdiverge.ll
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/inline-asm.ll
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/interp_f16.ll
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/intrinsics.ll
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible.ll
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/join-at-loop-exit.ll
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/no-return-blocks.ll
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/phi-undef.ll
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/propagate-loop-live-out.ll
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/temporal_diverge.ll
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/trivial-join-at-loop-exit.ll
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/unreachable-loop-block.ll
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/workitem-intrinsics.ll
llvm/test/Analysis/DivergenceAnalysis/NVPTX/daorder.ll
llvm/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll
llvm/test/Analysis/DivergenceAnalysis/NVPTX/hidden_diverge.ll
llvm/test/Analysis/DivergenceAnalysis/NVPTX/irreducible.ll
llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/atomics.ll
llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/intrinsics.ll
llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/kernel-args.ll
llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll
llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll
llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/loads.ll
llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/no-return-blocks.ll
llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/phi-undef.ll
llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/unreachable-loop-block.ll
llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/workitem-intrinsics.ll
llvm/test/Analysis/LegacyDivergenceAnalysis/NVPTX/diverge.ll
llvm/tools/opt/opt.cpp
llvm/unittests/Analysis/DivergenceAnalysisTest.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Analysis/DivergenceAnalysis.h b/llvm/include/llvm/Analysis/DivergenceAnalysis.h
index a6530b2eb493..0b36ef35aa59 100644
--- a/llvm/include/llvm/Analysis/DivergenceAnalysis.h
+++ b/llvm/include/llvm/Analysis/DivergenceAnalysis.h
@@ -34,7 +34,7 @@ class TargetTransformInfo;
/// This analysis propagates divergence in a data-parallel context from sources
/// of divergence to all users. It requires reducible CFGs. All assignments
/// should be in SSA form.
-class DivergenceAnalysis {
+class DivergenceAnalysisImpl {
public:
/// \brief This instance will analyze the whole function \p F or the loop \p
/// RegionLoop.
@@ -43,9 +43,9 @@ class DivergenceAnalysis {
/// Otherwise the whole function is analyzed.
/// \param IsLCSSAForm whether the analysis may assume that the IR in the
/// region in in LCSSA form.
- DivergenceAnalysis(const Function &F, const Loop *RegionLoop,
- const DominatorTree &DT, const LoopInfo &LI,
- SyncDependenceAnalysis &SDA, bool IsLCSSAForm);
+ DivergenceAnalysisImpl(const Function &F, const Loop *RegionLoop,
+ const DominatorTree &DT, const LoopInfo &LI,
+ SyncDependenceAnalysis &SDA, bool IsLCSSAForm);
/// \brief The loop that defines the analyzed region (if any).
const Loop *getRegionLoop() const { return RegionLoop; }
@@ -82,8 +82,6 @@ class DivergenceAnalysis {
/// divergent.
bool isDivergentUse(const Use &U) const;
- void print(raw_ostream &OS, const Module *) const;
-
private:
/// \brief Mark \p Term as divergent and push all Instructions that become
/// divergent as a result on the worklist.
@@ -152,28 +150,39 @@ class DivergenceAnalysis {
std::vector<const Instruction *> Worklist;
};
-/// \brief Divergence analysis frontend for GPU kernels.
-class GPUDivergenceAnalysis {
- SyncDependenceAnalysis SDA;
- DivergenceAnalysis DA;
+class DivergenceInfo {
+ Function &F;
+
+ // If the function contains an irreducible region the divergence
+ // analysis can run indefinitely. We set ContainsIrreducible and no
+ // analysis is actually performed on the function. All values in
+ // this function are conservatively reported as divergent instead.
+ bool ContainsIrreducible;
+ std::unique_ptr<SyncDependenceAnalysis> SDA;
+ std::unique_ptr<DivergenceAnalysisImpl> DA;
public:
- /// Runs the divergence analysis on @F, a GPU kernel
- GPUDivergenceAnalysis(Function &F, const DominatorTree &DT,
- const PostDominatorTree &PDT, const LoopInfo &LI,
- const TargetTransformInfo &TTI);
+ DivergenceInfo(Function &F, const DominatorTree &DT,
+ const PostDominatorTree &PDT, const LoopInfo &LI,
+ const TargetTransformInfo &TTI, bool KnownReducible);
/// Whether any divergence was detected.
- bool hasDivergence() const { return DA.hasDetectedDivergence(); }
+ bool hasDivergence() const {
+ return ContainsIrreducible || DA->hasDetectedDivergence();
+ }
/// The GPU kernel this analysis result is for
- const Function &getFunction() const { return DA.getFunction(); }
+ const Function &getFunction() const { return F; }
/// Whether \p V is divergent at its definition.
- bool isDivergent(const Value &V) const;
+ bool isDivergent(const Value &V) const {
+ return ContainsIrreducible || DA->isDivergent(V);
+ }
/// Whether \p U is divergent. Uses of a uniform value can be divergent.
- bool isDivergentUse(const Use &U) const;
+ bool isDivergentUse(const Use &U) const {
+ return ContainsIrreducible || DA->isDivergentUse(U);
+ }
/// Whether \p V is uniform/non-divergent.
bool isUniform(const Value &V) const { return !isDivergent(V); }
@@ -181,11 +190,32 @@ class GPUDivergenceAnalysis {
/// Whether \p U is uniform/non-divergent. Uses of a uniform value can be
/// divergent.
bool isUniformUse(const Use &U) const { return !isDivergentUse(U); }
+};
- /// Print all divergent values in the kernel.
- void print(raw_ostream &OS, const Module *) const;
+/// \brief Divergence analysis frontend for GPU kernels.
+class DivergenceAnalysis : public AnalysisInfoMixin<DivergenceAnalysis> {
+ friend AnalysisInfoMixin<DivergenceAnalysis>;
+
+ static AnalysisKey Key;
+
+public:
+ using Result = DivergenceInfo;
+
+ /// Runs the divergence analysis on @F, a GPU kernel
+ Result run(Function &F, FunctionAnalysisManager &AM);
};
+/// Printer pass to dump divergence analysis results.
+struct DivergenceAnalysisPrinterPass
+ : public PassInfoMixin<DivergenceAnalysisPrinterPass> {
+ DivergenceAnalysisPrinterPass(raw_ostream &OS) : OS(OS) {}
+
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
+
+private:
+ raw_ostream &OS;
+}; // class DivergenceAnalysisPrinterPass
+
} // namespace llvm
#endif // LLVM_ANALYSIS_DIVERGENCEANALYSIS_H
diff --git a/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h b/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h
index 6215af3d3236..0132c88077d2 100644
--- a/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h
+++ b/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h
@@ -20,8 +20,8 @@
#include <memory>
namespace llvm {
+class DivergenceInfo;
class Function;
-class GPUDivergenceAnalysis;
class Module;
class raw_ostream;
class TargetTransformInfo;
@@ -63,7 +63,7 @@ class LegacyDivergenceAnalysis : public FunctionPass {
const TargetTransformInfo &TTI) const;
// (optional) handle to new DivergenceAnalysis
- std::unique_ptr<GPUDivergenceAnalysis> gpuDA;
+ std::unique_ptr<DivergenceInfo> gpuDA;
// Stores all divergent values.
DenseSet<const Value *> DivergentValues;
diff --git a/llvm/lib/Analysis/DivergenceAnalysis.cpp b/llvm/lib/Analysis/DivergenceAnalysis.cpp
index 287c13278014..81120b3fe928 100644
--- a/llvm/lib/Analysis/DivergenceAnalysis.cpp
+++ b/llvm/lib/Analysis/DivergenceAnalysis.cpp
@@ -31,10 +31,10 @@
// Ralf Karrenberg and Sebastian Hack
// CC '12
//
-// This DivergenceAnalysis implementation is generic in the sense that it does
+// This implementation is generic in the sense that it does
// not itself identify original sources of divergence.
// Instead specialized adapter classes, (LoopDivergenceAnalysis) for loops and
-// (GPUDivergenceAnalysis) for GPU programs, identify the sources of divergence
+// (DivergenceAnalysis) for functions, identify the sources of divergence
// (e.g., special variables that hold the thread ID or the iteration variable).
//
// The generic implementation propagates divergence to variables that are data
@@ -61,7 +61,7 @@
// The sync dependence detection (which branch induces divergence in which join
// points) is implemented in the SyncDependenceAnalysis.
//
-// The current DivergenceAnalysis implementation has the following limitations:
+// The current implementation has the following limitations:
// 1. intra-procedural. It conservatively considers the arguments of a
// non-kernel-entry function and the return value of a function call as
// divergent.
@@ -73,6 +73,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/DivergenceAnalysis.h"
+#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/PostDominators.h"
@@ -87,16 +88,15 @@
using namespace llvm;
-#define DEBUG_TYPE "divergence-analysis"
+#define DEBUG_TYPE "divergence"
-// class DivergenceAnalysis
-DivergenceAnalysis::DivergenceAnalysis(
+DivergenceAnalysisImpl::DivergenceAnalysisImpl(
const Function &F, const Loop *RegionLoop, const DominatorTree &DT,
const LoopInfo &LI, SyncDependenceAnalysis &SDA, bool IsLCSSAForm)
: F(F), RegionLoop(RegionLoop), DT(DT), LI(LI), SDA(SDA),
IsLCSSAForm(IsLCSSAForm) {}
-bool DivergenceAnalysis::markDivergent(const Value &DivVal) {
+bool DivergenceAnalysisImpl::markDivergent(const Value &DivVal) {
if (isAlwaysUniform(DivVal))
return false;
assert(isa<Instruction>(DivVal) || isa<Argument>(DivVal));
@@ -104,12 +104,12 @@ bool DivergenceAnalysis::markDivergent(const Value &DivVal) {
return DivergentValues.insert(&DivVal).second;
}
-void DivergenceAnalysis::addUniformOverride(const Value &UniVal) {
+void DivergenceAnalysisImpl::addUniformOverride(const Value &UniVal) {
UniformOverrides.insert(&UniVal);
}
-bool DivergenceAnalysis::isTemporalDivergent(const BasicBlock &ObservingBlock,
- const Value &Val) const {
+bool DivergenceAnalysisImpl::isTemporalDivergent(
+ const BasicBlock &ObservingBlock, const Value &Val) const {
const auto *Inst = dyn_cast<const Instruction>(&Val);
if (!Inst)
return false;
@@ -125,15 +125,15 @@ bool DivergenceAnalysis::isTemporalDivergent(const BasicBlock &ObservingBlock,
return false;
}
-bool DivergenceAnalysis::inRegion(const Instruction &I) const {
+bool DivergenceAnalysisImpl::inRegion(const Instruction &I) const {
return I.getParent() && inRegion(*I.getParent());
}
-bool DivergenceAnalysis::inRegion(const BasicBlock &BB) const {
+bool DivergenceAnalysisImpl::inRegion(const BasicBlock &BB) const {
return (!RegionLoop && BB.getParent() == &F) || RegionLoop->contains(&BB);
}
-void DivergenceAnalysis::pushUsers(const Value &V) {
+void DivergenceAnalysisImpl::pushUsers(const Value &V) {
const auto *I = dyn_cast<const Instruction>(&V);
if (I && I->isTerminator()) {
@@ -166,8 +166,8 @@ static const Instruction *getIfCarriedInstruction(const Use &U,
return I;
}
-void DivergenceAnalysis::analyzeTemporalDivergence(const Instruction &I,
- const Loop &OuterDivLoop) {
+void DivergenceAnalysisImpl::analyzeTemporalDivergence(
+ const Instruction &I, const Loop &OuterDivLoop) {
if (isAlwaysUniform(I))
return;
if (isDivergent(I))
@@ -188,8 +188,8 @@ void DivergenceAnalysis::analyzeTemporalDivergence(const Instruction &I,
// marks all users of loop-carried values of the loop headed by LoopHeader as
// divergent
-void DivergenceAnalysis::analyzeLoopExitDivergence(const BasicBlock &DivExit,
- const Loop &OuterDivLoop) {
+void DivergenceAnalysisImpl::analyzeLoopExitDivergence(
+ const BasicBlock &DivExit, const Loop &OuterDivLoop) {
// All users are in immediate exit blocks
if (IsLCSSAForm) {
for (const auto &Phi : DivExit.phis()) {
@@ -242,8 +242,8 @@ void DivergenceAnalysis::analyzeLoopExitDivergence(const BasicBlock &DivExit,
} while (!TaintStack.empty());
}
-void DivergenceAnalysis::propagateLoopExitDivergence(const BasicBlock &DivExit,
- const Loop &InnerDivLoop) {
+void DivergenceAnalysisImpl::propagateLoopExitDivergence(
+ const BasicBlock &DivExit, const Loop &InnerDivLoop) {
LLVM_DEBUG(dbgs() << "\tpropLoopExitDiv " << DivExit.getName() << "\n");
// Find outer-most loop that does not contain \p DivExit
@@ -265,7 +265,7 @@ void DivergenceAnalysis::propagateLoopExitDivergence(const BasicBlock &DivExit,
// this is a divergent join point - mark all phi nodes as divergent and push
// them onto the stack.
-void DivergenceAnalysis::taintAndPushPhiNodes(const BasicBlock &JoinBlock) {
+void DivergenceAnalysisImpl::taintAndPushPhiNodes(const BasicBlock &JoinBlock) {
LLVM_DEBUG(dbgs() << "taintAndPushPhiNodes in " << JoinBlock.getName()
<< "\n");
@@ -287,7 +287,7 @@ void DivergenceAnalysis::taintAndPushPhiNodes(const BasicBlock &JoinBlock) {
}
}
-void DivergenceAnalysis::analyzeControlDivergence(const Instruction &Term) {
+void DivergenceAnalysisImpl::analyzeControlDivergence(const Instruction &Term) {
LLVM_DEBUG(dbgs() << "analyzeControlDiv " << Term.getParent()->getName()
<< "\n");
@@ -310,7 +310,7 @@ void DivergenceAnalysis::analyzeControlDivergence(const Instruction &Term) {
}
}
-void DivergenceAnalysis::compute() {
+void DivergenceAnalysisImpl::compute() {
// Initialize worklist.
auto DivValuesCopy = DivergentValues;
for (const auto *DivVal : DivValuesCopy) {
@@ -330,63 +330,82 @@ void DivergenceAnalysis::compute() {
}
}
-bool DivergenceAnalysis::isAlwaysUniform(const Value &V) const {
+bool DivergenceAnalysisImpl::isAlwaysUniform(const Value &V) const {
return UniformOverrides.contains(&V);
}
-bool DivergenceAnalysis::isDivergent(const Value &V) const {
+bool DivergenceAnalysisImpl::isDivergent(const Value &V) const {
return DivergentValues.contains(&V);
}
-bool DivergenceAnalysis::isDivergentUse(const Use &U) const {
+bool DivergenceAnalysisImpl::isDivergentUse(const Use &U) const {
Value &V = *U.get();
Instruction &I = *cast<Instruction>(U.getUser());
return isDivergent(V) || isTemporalDivergent(*I.getParent(), V);
}
-void DivergenceAnalysis::print(raw_ostream &OS, const Module *) const {
- if (DivergentValues.empty())
- return;
- // iterate instructions using instructions() to ensure a deterministic order.
- for (auto &I : instructions(F)) {
- if (isDivergent(I))
- OS << "DIVERGENT:" << I << '\n';
+DivergenceInfo::DivergenceInfo(Function &F, const DominatorTree &DT,
+ const PostDominatorTree &PDT, const LoopInfo &LI,
+ const TargetTransformInfo &TTI,
+ bool KnownReducible)
+ : F(F), ContainsIrreducible(false) {
+ if (!KnownReducible) {
+ using RPOTraversal = ReversePostOrderTraversal<const Function *>;
+ RPOTraversal FuncRPOT(&F);
+ if (containsIrreducibleCFG<const BasicBlock *, const RPOTraversal,
+ const LoopInfo>(FuncRPOT, LI)) {
+ ContainsIrreducible = true;
+ return;
+ }
}
-}
-
-// class GPUDivergenceAnalysis
-GPUDivergenceAnalysis::GPUDivergenceAnalysis(Function &F,
- const DominatorTree &DT,
- const PostDominatorTree &PDT,
- const LoopInfo &LI,
- const TargetTransformInfo &TTI)
- : SDA(DT, PDT, LI), DA(F, nullptr, DT, LI, SDA, /* LCSSA */ false) {
+ SDA = std::make_unique<SyncDependenceAnalysis>(DT, PDT, LI);
+ DA = std::make_unique<DivergenceAnalysisImpl>(F, nullptr, DT, LI, *SDA,
+ /* LCSSA */ false);
for (auto &I : instructions(F)) {
if (TTI.isSourceOfDivergence(&I)) {
- DA.markDivergent(I);
+ DA->markDivergent(I);
} else if (TTI.isAlwaysUniform(&I)) {
- DA.addUniformOverride(I);
+ DA->addUniformOverride(I);
}
}
for (auto &Arg : F.args()) {
if (TTI.isSourceOfDivergence(&Arg)) {
- DA.markDivergent(Arg);
+ DA->markDivergent(Arg);
}
}
- DA.compute();
+ DA->compute();
}
-bool GPUDivergenceAnalysis::isDivergent(const Value &val) const {
- return DA.isDivergent(val);
-}
+AnalysisKey DivergenceAnalysis::Key;
-bool GPUDivergenceAnalysis::isDivergentUse(const Use &use) const {
- return DA.isDivergentUse(use);
+DivergenceAnalysis::Result
+DivergenceAnalysis::run(Function &F, FunctionAnalysisManager &AM) {
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
+ auto &LI = AM.getResult<LoopAnalysis>(F);
+ auto &TTI = AM.getResult<TargetIRAnalysis>(F);
+
+ return DivergenceInfo(F, DT, PDT, LI, TTI, /* KnownReducible = */ false);
}
-void GPUDivergenceAnalysis::print(raw_ostream &OS, const Module *mod) const {
- OS << "Divergence of kernel " << DA.getFunction().getName() << " {\n";
- DA.print(OS, mod);
- OS << "}\n";
+PreservedAnalyses
+DivergenceAnalysisPrinterPass::run(Function &F, FunctionAnalysisManager &FAM) {
+ auto &DI = FAM.getResult<DivergenceAnalysis>(F);
+ OS << "'Divergence Analysis' for function '" << F.getName() << "':\n";
+ if (DI.hasDivergence()) {
+ for (auto &Arg : F.args()) {
+ OS << (DI.isDivergent(Arg) ? "DIVERGENT: " : " ");
+ OS << Arg << "\n";
+ }
+ for (auto BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
+ auto &BB = *BI;
+ OS << "\n " << BB.getName() << ":\n";
+ for (auto &I : BB.instructionsWithoutDebug()) {
+ OS << (DI.isDivergent(I) ? "DIVERGENT: " : " ");
+ OS << I << "\n";
+ }
+ }
+ }
+ return PreservedAnalyses::all();
}
diff --git a/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp b/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp
index be8a18a21f23..031bf3bae51d 100644
--- a/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp
+++ b/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp
@@ -339,7 +339,8 @@ bool LegacyDivergenceAnalysis::runOnFunction(Function &F) {
if (shouldUseGPUDivergenceAnalysis(F, TTI)) {
// run the new GPU divergence analysis
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- gpuDA = std::make_unique<GPUDivergenceAnalysis>(F, DT, PDT, LI, TTI);
+ gpuDA = std::make_unique<DivergenceInfo>(F, DT, PDT, LI, TTI,
+ /* KnownReducible = */ true);
} else {
// run LLVM's existing DivergenceAnalysis
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 92cabeaafa39..f0e9f475b9d6 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -32,6 +32,7 @@
#include "llvm/Analysis/Delinearization.h"
#include "llvm/Analysis/DemandedBits.h"
#include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/Analysis/DivergenceAnalysis.h"
#include "llvm/Analysis/DominanceFrontier.h"
#include "llvm/Analysis/FunctionPropertiesAnalysis.h"
#include "llvm/Analysis/GlobalsModRef.h"
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 877cb9ed13b3..5d8a1a076343 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -172,6 +172,7 @@ FUNCTION_ANALYSIS("targetir",
TM ? TM->getTargetIRAnalysis() : TargetIRAnalysis())
FUNCTION_ANALYSIS("verify", VerifierAnalysis())
FUNCTION_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC))
+FUNCTION_ANALYSIS("divergence", DivergenceAnalysis())
#ifndef FUNCTION_ALIAS_ANALYSIS
#define FUNCTION_ALIAS_ANALYSIS(NAME, CREATE_PASS) \
@@ -273,6 +274,7 @@ FUNCTION_PASS("print<assumptions>", AssumptionPrinterPass(dbgs()))
FUNCTION_PASS("print<block-freq>", BlockFrequencyPrinterPass(dbgs()))
FUNCTION_PASS("print<branch-prob>", BranchProbabilityPrinterPass(dbgs()))
FUNCTION_PASS("print<da>", DependenceAnalysisPrinterPass(dbgs()))
+FUNCTION_PASS("print<divergence>", DivergenceAnalysisPrinterPass(dbgs()))
FUNCTION_PASS("print<domtree>", DominatorTreePrinterPass(dbgs()))
FUNCTION_PASS("print<postdomtree>", PostDominatorTreePrinterPass(dbgs()))
FUNCTION_PASS("print<delinearization>", DelinearizationPrinterPass(dbgs()))
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/always_uniform.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/always_uniform.ll
index 3d9485534091..13c7ba78f915 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/always_uniform.ll
+++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/always_uniform.ll
@@ -1,4 +1,5 @@
-; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+; RUN: opt -mtriple amdgcn-unknown-amdhsa -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; CHECK-LABEL: for function 'readfirstlane':
define amdgpu_kernel void @readfirstlane() {
@@ -39,7 +40,7 @@ define i32 @asm_sgpr(i32 %divergent) {
ret i32 %sgpr
}
-; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'asm_mixed_sgpr_vgpr':
+; CHECK-LABEL: Divergence Analysis' for function 'asm_mixed_sgpr_vgpr':
; CHECK: DIVERGENT: %asm = call { i32, i32 } asm "; def $0, $1, $2", "=s,=v,v"(i32 %divergent)
; CHECK-NEXT: {{^[ \t]+}}%sgpr = extractvalue { i32, i32 } %asm, 0
; CHECK-NEXT: DIVERGENT: %vgpr = extractvalue { i32, i32 } %asm, 1
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/atomics.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/atomics.ll
index 521d528d7952..97ead49a81aa 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/atomics.ll
+++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/atomics.ll
@@ -1,4 +1,5 @@
-; RUN: opt -mtriple=amdgcn-- -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+; RUN: opt -mtriple=amdgcn-- -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+; RUN: opt -mtriple amdgcn-- -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; CHECK: DIVERGENT: %orig = atomicrmw xchg i32* %ptr, i32 %val seq_cst
define i32 @test1(i32* %ptr, i32 %val) #0 {
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/b42473-r1-crash.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/b42473-r1-crash.ll
index cb3e42de363a..8fc86e95bb03 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/b42473-r1-crash.ll
+++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/b42473-r1-crash.ll
@@ -1,10 +1,11 @@
-; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+; RUN: opt -mtriple amdgcn-unknown-amdhsa -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
declare i32 @gf2(i32)
declare i32 @gf1(i32)
define void @tw1(i32 addrspace(4)* noalias nocapture readonly %A, i32 addrspace(4)* noalias nocapture %B) local_unnamed_addr #2 {
-; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'tw1':
+; CHECK: Divergence Analysis' for function 'tw1':
; CHECK: DIVERGENT: i32 addrspace(4)* %A
; CHECK: DIVERGENT: i32 addrspace(4)* %B
entry:
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/control-flow-intrinsics.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/control-flow-intrinsics.ll
index 9446a7e8e9f0..88503d70ca4e 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/control-flow-intrinsics.ll
+++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/control-flow-intrinsics.ll
@@ -1,8 +1,9 @@
-; RUN: opt -mtriple=amdgcn-mesa-mesa3d -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+; RUN: opt -mtriple=amdgcn-mesa-mesa3d -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+; RUN: opt -mtriple amdgcn-mesa-mesa3d -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; Tests control flow intrinsics that should be treated as uniform
-; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_if_break':
+; CHECK: Divergence Analysis' for function 'test_if_break':
; CHECK: DIVERGENT: %cond = icmp eq i32 %arg0, 0
; CHECK-NOT: DIVERGENT
; CHECK: ret void
@@ -14,7 +15,7 @@ entry:
ret void
}
-; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_if':
+; CHECK: Divergence Analysis' for function 'test_if':
; CHECK: DIVERGENT: %cond = icmp eq i32 %arg0, 0
; CHECK-NEXT: DIVERGENT: %if = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %cond)
; CHECK-NEXT: DIVERGENT: %if.bool = extractvalue { i1, i64 } %if, 0
@@ -33,7 +34,7 @@ entry:
}
; The result should still be treated as divergent, even with a uniform source.
-; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_if_uniform':
+; CHECK: Divergence Analysis' for function 'test_if_uniform':
; CHECK-NOT: DIVERGENT
; CHECK: DIVERGENT: %if = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %cond)
; CHECK-NEXT: DIVERGENT: %if.bool = extractvalue { i1, i64 } %if, 0
@@ -51,7 +52,7 @@ entry:
ret void
}
-; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_loop_uniform':
+; CHECK: Divergence Analysis' for function 'test_loop_uniform':
; CHECK: DIVERGENT: %loop = call i1 @llvm.amdgcn.loop.i64(i64 %mask)
define amdgpu_ps void @test_loop_uniform(i64 inreg %mask) {
entry:
@@ -61,7 +62,7 @@ entry:
ret void
}
-; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_else':
+; CHECK: Divergence Analysis' for function 'test_else':
; CHECK: DIVERGENT: %else = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %mask)
; CHECK: DIVERGENT: %else.bool = extractvalue { i1, i64 } %else, 0
; CHECK: {{^[ \t]+}}%else.mask = extractvalue { i1, i64 } %else, 1
@@ -77,7 +78,7 @@ entry:
}
; This case is probably always broken
-; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_else_divergent_mask':
+; CHECK: Divergence Analysis' for function 'test_else_divergent_mask':
; CHECK: DIVERGENT: %if = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %mask)
; CHECK-NEXT: DIVERGENT: %if.bool = extractvalue { i1, i64 } %if, 0
; CHECK-NOT: DIVERGENT
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_diverge.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_diverge.ll
index 889553d34712..767e2c1bd66e 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_diverge.ll
+++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_diverge.ll
@@ -1,7 +1,7 @@
-; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
define amdgpu_kernel void @hidden_diverge(i32 %n, i32 %a, i32 %b) #0 {
-; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'hidden_diverge'
+; CHECK-LABEL: 'Divergence Analysis' for function 'hidden_diverge'
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%cond.var = icmp slt i32 %tid, 0
@@ -22,7 +22,7 @@ merge:
}
define amdgpu_kernel void @hidden_loop_ipd(i32 %n, i32 %a, i32 %b) #0 {
-; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'hidden_loop_ipd'
+; CHECK-LABEL: 'Divergence Analysis' for function 'hidden_loop_ipd'
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%cond.var = icmp slt i32 %tid, 0
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_loopdiverge.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_loopdiverge.ll
index 774e995c7ca2..ee963dde686e 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_loopdiverge.ll
+++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_loopdiverge.ll
@@ -1,9 +1,10 @@
-; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+; RUN: opt -mtriple amdgcn-unknown-amdhsa -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; divergent loop (H<header><exiting to X>, B<exiting to Y>)
; the divergent join point in %exit is obscured by uniform control joining in %X
define amdgpu_kernel void @hidden_loop_diverge(i32 %n, i32 %a, i32 %b) #0 {
-; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'hidden_loop_diverge':
+; CHECK-LABEL: Divergence Analysis' for function 'hidden_loop_diverge'
; CHECK-NOT: DIVERGENT: %uni.
; CHECK-NOT: DIVERGENT: br i1 %uni.
@@ -45,7 +46,7 @@ exit:
; divergent loop (H<header><exiting to X>, B<exiting to Y>)
; the phi nodes in X and Y don't actually receive divergent values
define amdgpu_kernel void @unobserved_loop_diverge(i32 %n, i32 %a, i32 %b) #0 {
-; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'unobserved_loop_diverge':
+; CHECK-LABEL: Divergence Analysis' for function 'unobserved_loop_diverge':
; CHECK-NOT: DIVERGENT: %uni.
; CHECK-NOT: DIVERGENT: br i1 %uni.
@@ -86,7 +87,7 @@ exit:
; the inner loop has no exit to top level.
; the outer loop becomes divergent as its exiting branch in C is control-dependent on the inner loop's divergent loop exit in D.
define amdgpu_kernel void @hidden_nestedloop_diverge(i32 %n, i32 %a, i32 %b) #0 {
-; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'hidden_nestedloop_diverge':
+; CHECK-LABEL: Divergence Analysis' for function 'hidden_nestedloop_diverge':
; CHECK-NOT: DIVERGENT: %uni.
; CHECK-NOT: DIVERGENT: br i1 %uni.
@@ -137,7 +138,7 @@ exit:
; the outer loop has no immediately divergent exiting edge.
; the inner exiting edge is exiting to top-level through the outer loop causing both to become divergent.
define amdgpu_kernel void @hidden_doublebreak_diverge(i32 %n, i32 %a, i32 %b) #0 {
-; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'hidden_doublebreak_diverge':
+; CHECK-LABEL: Divergence Analysis' for function 'hidden_doublebreak_diverge':
; CHECK-NOT: DIVERGENT: %uni.
; CHECK-NOT: DIVERGENT: br i1 %uni.
@@ -179,7 +180,7 @@ Y:
; divergent loop (G<header>, L<exiting to D>) contained inside a uniform loop (H<header>, B, G, L , D<exiting to x>)
define amdgpu_kernel void @hidden_containedloop_diverge(i32 %n, i32 %a, i32 %b) #0 {
-; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'hidden_containedloop_diverge':
+; CHECK-LABEL: Divergence Analysis' for function 'hidden_containedloop_diverge':
; CHECK-NOT: DIVERGENT: %uni.
; CHECK-NOT: DIVERGENT: br i1 %uni.
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/inline-asm.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/inline-asm.ll
index 8443b82f3888..b9af7fcd9ef5 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/inline-asm.ll
+++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/inline-asm.ll
@@ -1,50 +1,52 @@
-; RUN: opt -mtriple=amdgcn-unknown-amdhsa -mcpu=tahiti -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
-; RUN: opt -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx908 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+; RUN: opt -mtriple=amdgcn-unknown-amdhsa -mcpu=tahiti -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+; RUN: opt -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx908 -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+; RUN: opt -mtriple amdgcn-unknown-amdhsa -mcpu=tahiti -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
+; RUN: opt -mtriple amdgcn-unknown-amdhsa -mcpu=gfx908 -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; Make sure nothing crashes on targets with or without AGPRs
-; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_1_sgpr_virtreg_output':
+; CHECK: Divergence Analysis' for function 'inline_asm_1_sgpr_virtreg_output':
; CHECK-NOT: DIVERGENT
define i32 @inline_asm_1_sgpr_virtreg_output() {
%sgpr = call i32 asm "s_mov_b32 $0, 0", "=s"()
ret i32 %sgpr
}
-; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_1_sgpr_physreg_output':
+; CHECK: Divergence Analysis' for function 'inline_asm_1_sgpr_physreg_output':
; CHECK-NOT: DIVERGENT
define i32 @inline_asm_1_sgpr_physreg_output() {
%sgpr = call i32 asm "s_mov_b32 s0, 0", "={s0}"()
ret i32 %sgpr
}
-; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_1_vgpr_virtreg_output':
+; CHECK: Divergence Analysis' for function 'inline_asm_1_vgpr_virtreg_output':
; CHECK: DIVERGENT: %vgpr = call i32 asm "v_mov_b32 $0, 0", "=v"()
define i32 @inline_asm_1_vgpr_virtreg_output() {
%vgpr = call i32 asm "v_mov_b32 $0, 0", "=v"()
ret i32 %vgpr
}
-; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_1_vgpr_physreg_output':
+; CHECK: Divergence Analysis' for function 'inline_asm_1_vgpr_physreg_output':
; CHECK: DIVERGENT: %vgpr = call i32 asm "v_mov_b32 v0, 0", "={v0}"()
define i32 @inline_asm_1_vgpr_physreg_output() {
%vgpr = call i32 asm "v_mov_b32 v0, 0", "={v0}"()
ret i32 %vgpr
}
-; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_1_agpr_virtreg_output':
+; CHECK: Divergence Analysis' for function 'inline_asm_1_agpr_virtreg_output':
; CHECK: DIVERGENT: %vgpr = call i32 asm "; def $0", "=a"()
define i32 @inline_asm_1_agpr_virtreg_output() {
%vgpr = call i32 asm "; def $0", "=a"()
ret i32 %vgpr
}
-; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_1_agpr_physreg_output':
+; CHECK: Divergence Analysis' for function 'inline_asm_1_agpr_physreg_output':
; CHECK: DIVERGENT: %vgpr = call i32 asm "; def a0", "={a0}"()
define i32 @inline_asm_1_agpr_physreg_output() {
%vgpr = call i32 asm "; def a0", "={a0}"()
ret i32 %vgpr
}
-; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_2_sgpr_virtreg_output':
+; CHECK: Divergence Analysis' for function 'inline_asm_2_sgpr_virtreg_output':
; CHECK-NOT: DIVERGENT
define void @inline_asm_2_sgpr_virtreg_output() {
%asm = call { i32, i32 } asm "; def $0, $1", "=s,=s"()
@@ -56,7 +58,7 @@ define void @inline_asm_2_sgpr_virtreg_output() {
}
; One output is SGPR, one is VGPR. Infer divergent for the aggregate, but uniform on the SGPR extract
-; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_sgpr_vgpr_virtreg_output':
+; CHECK: Divergence Analysis' for function 'inline_asm_sgpr_vgpr_virtreg_output':
; CHECK: DIVERGENT: %asm = call { i32, i32 } asm "; def $0, $1", "=s,=v"()
; CHECK-NEXT: {{^[ \t]+}}%sgpr = extractvalue { i32, i32 } %asm, 0
; CHECK-NEXT: DIVERGENT: %vgpr = extractvalue { i32, i32 } %asm, 1
@@ -69,7 +71,7 @@ define void @inline_asm_sgpr_vgpr_virtreg_output() {
ret void
}
-; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_vgpr_sgpr_virtreg_output':
+; CHECK: Divergence Analysis' for function 'inline_asm_vgpr_sgpr_virtreg_output':
; CHECK: DIVERGENT: %asm = call { i32, i32 } asm "; def $0, $1", "=v,=s"()
; CHECK-NEXT: DIVERGENT: %vgpr = extractvalue { i32, i32 } %asm, 0
; CHECK-NEXT: {{^[ \t]+}}%sgpr = extractvalue { i32, i32 } %asm, 1
@@ -83,7 +85,7 @@ define void @inline_asm_vgpr_sgpr_virtreg_output() {
}
; Have an extra output constraint
-; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'multi_sgpr_inline_asm_output_input_constraint':
+; CHECK: Divergence Analysis' for function 'multi_sgpr_inline_asm_output_input_constraint':
; CHECK-NOT: DIVERGENT
define void @multi_sgpr_inline_asm_output_input_constraint() {
%asm = call { i32, i32 } asm "; def $0, $1", "=s,=s,s"(i32 1234)
@@ -94,7 +96,7 @@ define void @multi_sgpr_inline_asm_output_input_constraint() {
ret void
}
-; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_vgpr_sgpr_virtreg_output_input_constraint':
+; CHECK: Divergence Analysis' for function 'inline_asm_vgpr_sgpr_virtreg_output_input_constraint':
; CHECK: DIVERGENT: %asm = call { i32, i32 } asm "; def $0, $1", "=v,=s,v"(i32 1234)
; CHECK-NEXT: DIVERGENT: %vgpr = extractvalue { i32, i32 } %asm, 0
; CHECK-NEXT: {{^[ \t]+}}%sgpr = extractvalue { i32, i32 } %asm, 1
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/interp_f16.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/interp_f16.ll
index 174dd5679782..da5ba6774ec9 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/interp_f16.ll
+++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/interp_f16.ll
@@ -1,4 +1,5 @@
-; RUN: opt -mtriple=amdgcn-- -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+; RUN: opt -mtriple=amdgcn-- -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+; RUN: opt -mtriple amdgcn-- -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; CHECK: for function 'interp_p1_f16'
; CHECK: DIVERGENT: %p1 = call float @llvm.amdgcn.interp.p1.f16
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/intrinsics.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/intrinsics.ll
index e9c753f027ab..88b178cb9c04 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/intrinsics.ll
+++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/intrinsics.ll
@@ -1,4 +1,5 @@
-; RUN: opt -mtriple=amdgcn-- -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+; RUN: opt -mtriple=amdgcn-- -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+; RUN: opt -mtriple amdgcn-- -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; CHECK: DIVERGENT: %swizzle = call i32 @llvm.amdgcn.ds.swizzle(i32 %src, i32 100) #0
define amdgpu_kernel void @ds_swizzle(i32 addrspace(1)* %out, i32 %src) #0 {
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible.ll
index 9a94328be676..abe85e4e0457 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible.ll
+++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible.ll
@@ -1,4 +1,14 @@
-; RUN: opt -mtriple=amdgcn-- -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+; RUN: opt %s -mtriple amdgcn-- -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
+; RUN: opt %s -mtriple amdgcn-- -passes='print<divergence>' -disable-output 2>&1 | FileCheck %s
+
+; NOTE: The new pass manager does not fall back on legacy divergence
+; analysis even when the function contains an irreducible loop. The
+; (new) divergence analysis conservatively reports all values as
+; divergent. This test does not check for this conservative
+; behaviour. Instead, it only checks for the values that are known to
+; be divergent according to the legacy analysis.
+
+; RUN: opt -mtriple amdgcn-- -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; This test contains an unstructured loop.
; +-------------- entry ----------------+
@@ -14,7 +24,7 @@
; if (i3 == 5) // divergent
; because sync dependent on (tid / i3).
define i32 @unstructured_loop(i1 %entry_cond) {
-; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'unstructured_loop'
+; CHECK-LABEL: Divergence Analysis' for function 'unstructured_loop'
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
br i1 %entry_cond, label %loop_entry_1, label %loop_entry_2
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/join-at-loop-exit.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/join-at-loop-exit.ll
index 2b9bce7657ae..1ddb869f9850 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/join-at-loop-exit.ll
+++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/join-at-loop-exit.ll
@@ -1,4 +1,5 @@
-; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+; RUN: opt -mtriple amdgcn-unknown-amdhsa -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; CHECK: bb3:
; CHECK: DIVERGENT: %Guard.bb4 = phi i1 [ true, %bb1 ], [ false, %bb2 ]
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll
index bc9ed6fb879b..21379e8fbf73 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll
+++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll
@@ -1,6 +1,7 @@
-; RUN: opt %s -mtriple amdgcn-- -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
+; RUN: opt %s -mtriple amdgcn-- -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
+; RUN: opt -mtriple amdgcn-- -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
-; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'test_amdgpu_ps':
+; CHECK-LABEL: Divergence Analysis' for function 'test_amdgpu_ps':
; CHECK: DIVERGENT: [4 x <16 x i8>] addrspace(4)* %arg0
; CHECK-NOT: DIVERGENT
; CHECK: DIVERGENT: <2 x i32> %arg3
@@ -12,7 +13,7 @@ define amdgpu_ps void @test_amdgpu_ps([4 x <16 x i8>] addrspace(4)* byref([4 x <
ret void
}
-; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'test_amdgpu_kernel':
+; CHECK-LABEL: Divergence Analysis' for function 'test_amdgpu_kernel':
; CHECK-NOT: %arg0
; CHECK-NOT: %arg1
; CHECK-NOT: %arg2
@@ -24,7 +25,7 @@ define amdgpu_kernel void @test_amdgpu_kernel([4 x <16 x i8>] addrspace(4)* byre
ret void
}
-; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'test_c':
+; CHECK-LABEL: Divergence Analysis' for function 'test_c':
; CHECK: DIVERGENT:
; CHECK: DIVERGENT:
; CHECK: DIVERGENT:
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll
index 5bc5fe34cdab..cdcc401e7a0f 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll
+++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll
@@ -1,4 +1,5 @@
-;RUN: opt -mtriple=amdgcn-mesa-mesa3d -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+;RUN: opt -mtriple=amdgcn-mesa-mesa3d -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+; RUN: opt -mtriple amdgcn-mesa-mesa3d -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.swap.i32(
define float @buffer_atomic_swap(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll
index 97ef984dc816..616bebd5cc9e 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll
+++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll
@@ -1,4 +1,5 @@
-;RUN: opt -mtriple=amdgcn-mesa-mesa3d -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+;RUN: opt -mtriple=amdgcn-mesa-mesa3d -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+; RUN: opt -mtriple amdgcn-mesa-mesa3d -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(
define float @image_atomic_swap(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/no-return-blocks.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/no-return-blocks.ll
index fb7c041e2d18..dabded9955b7 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/no-return-blocks.ll
+++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/no-return-blocks.ll
@@ -1,4 +1,5 @@
-; RUN: opt %s -mtriple amdgcn-- -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
+; RUN: opt %s -mtriple amdgcn-- -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
+; RUN: opt -mtriple amdgcn-- -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; CHECK: DIVERGENT: %tmp5 = getelementptr inbounds float, float addrspace(1)* %arg, i64 %tmp2
; CHECK: DIVERGENT: %tmp10 = load volatile float, float addrspace(1)* %tmp5, align 4
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/phi-undef.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/phi-undef.ll
index 978bc4232b1d..69cfd3d971d3 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/phi-undef.ll
+++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/phi-undef.ll
@@ -1,4 +1,5 @@
-; RUN: opt -mtriple=amdgcn-- -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+; RUN: opt -mtriple=amdgcn-- -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+; RUN: opt -mtriple amdgcn-- -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; CHECK-LABEL: 'test1':
; CHECK-NEXT: DIVERGENT: i32 %bound
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/propagate-loop-live-out.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/propagate-loop-live-out.ll
index 9ed3b0df0d58..252b6ff73356 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/propagate-loop-live-out.ll
+++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/propagate-loop-live-out.ll
@@ -1,4 +1,5 @@
-; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+; RUN: opt -mtriple amdgcn-unknown-amdhsa -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; CHECK: bb6:
; CHECK: DIVERGENT: %.126.i355.i = phi i1 [ false, %bb5 ], [ true, %bb4 ]
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/temporal_diverge.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/temporal_diverge.ll
index 4211ca28ad66..1895b0d84b20 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/temporal_diverge.ll
+++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/temporal_diverge.ll
@@ -1,8 +1,9 @@
-; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+; RUN: opt -mtriple amdgcn-unknown-amdhsa -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; temporal-divergent use of value carried by divergent loop
define amdgpu_kernel void @temporal_diverge(i32 %n, i32 %a, i32 %b) #0 {
-; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'temporal_diverge':
+; CHECK-LABEL: Divergence Analysis' for function 'temporal_diverge':
; CHECK-NOT: DIVERGENT: %uni.
; CHECK-NOT: DIVERGENT: br i1 %uni.
@@ -26,7 +27,7 @@ X:
; temporal-divergent use of value carried by divergent loop inside a top-level loop
define amdgpu_kernel void @temporal_diverge_inloop(i32 %n, i32 %a, i32 %b) #0 {
-; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'temporal_diverge_inloop':
+; CHECK-LABEL: Divergence Analysis' for function 'temporal_diverge_inloop':
; CHECK-NOT: DIVERGENT: %uni.
; CHECK-NOT: DIVERGENT: br i1 %uni.
@@ -58,7 +59,7 @@ Y:
; temporal-uniform use of a valud, definition and users are carried by a surrounding divergent loop
define amdgpu_kernel void @temporal_uniform_indivloop(i32 %n, i32 %a, i32 %b) #0 {
-; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'temporal_uniform_indivloop':
+; CHECK-LABEL: Divergence Analysis' for function 'temporal_uniform_indivloop':
; CHECK-NOT: DIVERGENT: %uni.
; CHECK-NOT: DIVERGENT: br i1 %uni.
@@ -90,7 +91,7 @@ Y:
; temporal-divergent use of value carried by divergent loop, user is inside sibling loop
define amdgpu_kernel void @temporal_diverge_loopuser(i32 %n, i32 %a, i32 %b) #0 {
-; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'temporal_diverge_loopuser':
+; CHECK-LABEL: Divergence Analysis' for function 'temporal_diverge_loopuser':
; CHECK-NOT: DIVERGENT: %uni.
; CHECK-NOT: DIVERGENT: br i1 %uni.
@@ -120,7 +121,7 @@ Y:
; temporal-divergent use of value carried by divergent loop, user is inside sibling loop, defs and use are carried by a uniform loop
define amdgpu_kernel void @temporal_diverge_loopuser_nested(i32 %n, i32 %a, i32 %b) #0 {
-; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'temporal_diverge_loopuser_nested':
+; CHECK-LABEL: Divergence Analysis' for function 'temporal_diverge_loopuser_nested':
; CHECK-NOT: DIVERGENT: %uni.
; CHECK-NOT: DIVERGENT: br i1 %uni.
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/trivial-join-at-loop-exit.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/trivial-join-at-loop-exit.ll
index b872dd8966bc..ee4167e90ae5 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/trivial-join-at-loop-exit.ll
+++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/trivial-join-at-loop-exit.ll
@@ -1,4 +1,5 @@
-; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+; RUN: opt -mtriple amdgcn-unknown-amdhsa -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; CHECK: bb2:
; CHECK-NOT: DIVERGENT: %Guard.bb2 = phi i1 [ true, %bb1 ], [ false, %bb0 ]
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/unreachable-loop-block.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/unreachable-loop-block.ll
index af3db4c88815..48294d714f26 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/unreachable-loop-block.ll
+++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/unreachable-loop-block.ll
@@ -1,4 +1,5 @@
-; RUN: opt %s -mtriple amdgcn-- -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
+; RUN: opt %s -mtriple amdgcn-- -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
+; RUN: opt -mtriple amdgcn-- -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; CHECK: DIVERGENT: %tmp = cmpxchg volatile
define amdgpu_kernel void @unreachable_loop(i32 %tidx) #0 {
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/workitem-intrinsics.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/workitem-intrinsics.ll
index b22c5f11abe6..15f79a7ef61b 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/workitem-intrinsics.ll
+++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/workitem-intrinsics.ll
@@ -1,4 +1,5 @@
-; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+; RUN: opt -mtriple amdgcn-unknown-amdhsa -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
declare i32 @llvm.amdgcn.workitem.id.x() #0
declare i32 @llvm.amdgcn.workitem.id.y() #0
diff --git a/llvm/test/Analysis/DivergenceAnalysis/NVPTX/daorder.ll b/llvm/test/Analysis/DivergenceAnalysis/NVPTX/daorder.ll
index 89954b6f7c06..eb0938e76aee 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/NVPTX/daorder.ll
+++ b/llvm/test/Analysis/DivergenceAnalysis/NVPTX/daorder.ll
@@ -1,10 +1,11 @@
-; RUN: opt %s -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
+; RUN: opt %s -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
+; RUN: opt %s -passes='print<divergence>' -disable-output 2>&1 | FileCheck %s
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-nvidia-cuda"
define i32 @daorder(i32 %n) {
-; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'daorder'
+; CHECK-LABEL: Divergence Analysis' for function 'daorder'
entry:
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
%cond = icmp slt i32 %tid, 0
diff --git a/llvm/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll b/llvm/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll
index e2e547282205..10bcd106c8c3 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll
+++ b/llvm/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll
@@ -1,11 +1,12 @@
-; RUN: opt %s -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
+; RUN: opt %s -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
+; RUN: opt %s -passes='print<divergence>' -disable-output 2>&1 | FileCheck %s
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-nvidia-cuda"
; return (n < 0 ? a + threadIdx.x : b + threadIdx.x)
define i32 @no_diverge(i32 %n, i32 %a, i32 %b) {
-; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'no_diverge'
+; CHECK-LABEL: Divergence Analysis' for function 'no_diverge'
entry:
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
%cond = icmp slt i32 %n, 0
@@ -27,7 +28,7 @@ merge:
; c = b;
; return c; // c is divergent: sync dependent
define i32 @sync(i32 %a, i32 %b) {
-; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'sync'
+; CHECK-LABEL: Divergence Analysis' for function 'sync'
bb1:
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
%cond = icmp slt i32 %tid, 5
@@ -48,7 +49,7 @@ bb3:
; // c here is divergent because it is sync dependent on threadIdx.x >= 5
; return c;
define i32 @mixed(i32 %n, i32 %a, i32 %b) {
-; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'mixed'
+; CHECK-LABEL: Divergence Analysis' for function 'mixed'
bb1:
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.z()
%cond = icmp slt i32 %tid, 5
@@ -73,7 +74,7 @@ bb6:
; We conservatively treats all parameters of a __device__ function as divergent.
define i32 @device(i32 %n, i32 %a, i32 %b) {
-; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'device'
+; CHECK-LABEL: Divergence Analysis' for function 'device'
; CHECK: DIVERGENT: i32 %n
; CHECK: DIVERGENT: i32 %a
; CHECK: DIVERGENT: i32 %b
@@ -98,7 +99,7 @@ merge:
;
; The i defined in the loop is used outside.
define i32 @loop() {
-; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'loop'
+; CHECK-LABEL: Divergence Analysis' for function 'loop'
entry:
%laneid = call i32 @llvm.nvvm.read.ptx.sreg.laneid()
br label %loop
@@ -120,7 +121,7 @@ else:
; Same as @loop, but the loop is in the LCSSA form.
define i32 @lcssa() {
-; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'lcssa'
+; CHECK-LABEL: Divergence Analysis' for function 'lcssa'
entry:
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
br label %loop
diff --git a/llvm/test/Analysis/DivergenceAnalysis/NVPTX/hidden_diverge.ll b/llvm/test/Analysis/DivergenceAnalysis/NVPTX/hidden_diverge.ll
index 3d61986657e0..ea15a7c86f48 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/NVPTX/hidden_diverge.ll
+++ b/llvm/test/Analysis/DivergenceAnalysis/NVPTX/hidden_diverge.ll
@@ -1,10 +1,11 @@
-; RUN: opt %s -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
+; RUN: opt %s -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
+; RUN: opt %s -passes='print<divergence>' -disable-output 2>&1 | FileCheck %s
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-nvidia-cuda"
define i32 @hidden_diverge(i32 %n, i32 %a, i32 %b) {
-; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'hidden_diverge'
+; CHECK-LABEL: Divergence Analysis' for function 'hidden_diverge'
entry:
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
%cond.var = icmp slt i32 %tid, 0
diff --git a/llvm/test/Analysis/DivergenceAnalysis/NVPTX/irreducible.ll b/llvm/test/Analysis/DivergenceAnalysis/NVPTX/irreducible.ll
index 2e1686a446d4..1693d64fd1a9 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/NVPTX/irreducible.ll
+++ b/llvm/test/Analysis/DivergenceAnalysis/NVPTX/irreducible.ll
@@ -1,4 +1,12 @@
-; RUN: opt %s -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
+; RUN: opt %s -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
+; RUN: opt %s -passes='print<divergence>' -disable-output 2>&1 | FileCheck %s
+
+; NOTE: The new pass manager does not fall back on legacy divergence
+; analysis even when the function contains an irreducible loop. The
+; (new) divergence analysis conservatively reports all values as
+; divergent. This test does not check for this conservative
+; behaviour. Instead, it only checks for the values that are known to
+; be divergent according to the legacy analysis.
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-nvidia-cuda"
@@ -17,7 +25,7 @@ target triple = "nvptx64-nvidia-cuda"
; if (i3 == 5) // divergent
; because sync dependent on (tid / i3).
define i32 @unstructured_loop(i1 %entry_cond) {
-; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'unstructured_loop'
+; CHECK-LABEL: Divergence Analysis' for function 'unstructured_loop'
entry:
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
br i1 %entry_cond, label %loop_entry_1, label %loop_entry_2
diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/atomics.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/atomics.ll
index 6c4b24e114b6..965d9f22a244 100644
--- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/atomics.ll
+++ b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/atomics.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=amdgcn-- -amdgpu-use-legacy-divergence-analysis -analyze -divergence %s | FileCheck %s
+; RUN: opt -mtriple=amdgcn-- -amdgpu-use-legacy-divergence-analysis -enable-new-pm=0 -analyze -divergence %s | FileCheck %s
; CHECK: DIVERGENT: %orig = atomicrmw xchg i32* %ptr, i32 %val seq_cst
define i32 @test1(i32* %ptr, i32 %val) #0 {
diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/intrinsics.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/intrinsics.ll
index 596e8143633e..894de06c4bc8 100644
--- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/intrinsics.ll
+++ b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/intrinsics.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=amdgcn-- -analyze -amdgpu-use-legacy-divergence-analysis -divergence %s | FileCheck %s
+; RUN: opt -mtriple=amdgcn-- -enable-new-pm=0 -analyze -amdgpu-use-legacy-divergence-analysis -divergence %s | FileCheck %s
; CHECK: DIVERGENT: %swizzle = call i32 @llvm.amdgcn.ds.swizzle(i32 %src, i32 100) #0
define amdgpu_kernel void @ds_swizzle(i32 addrspace(1)* %out, i32 %src) #0 {
diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/kernel-args.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/kernel-args.ll
index f06c9e2a315b..e2675f98015d 100644
--- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/kernel-args.ll
+++ b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/kernel-args.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -mtriple amdgcn-- -amdgpu-use-legacy-divergence-analysis -analyze -divergence | FileCheck %s
+; RUN: opt %s -mtriple amdgcn-- -amdgpu-use-legacy-divergence-analysis -enable-new-pm=0 -analyze -divergence | FileCheck %s
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'test_amdgpu_ps':
; CHECK: DIVERGENT: [4 x <16 x i8>] addrspace(4)* %arg0
diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll
index 9f82cd96ffe7..639a95575c42 100644
--- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll
+++ b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll
@@ -1,4 +1,4 @@
-;RUN: opt -mtriple=amdgcn-mesa-mesa3d -amdgpu-use-legacy-divergence-analysis -analyze -divergence %s | FileCheck %s
+;RUN: opt -mtriple=amdgcn-mesa-mesa3d -amdgpu-use-legacy-divergence-analysis -enable-new-pm=0 -analyze -divergence %s | FileCheck %s
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.swap.i32(
define float @buffer_atomic_swap(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll
index 81489aaf74b6..c8b9e1dacafb 100644
--- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll
+++ b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll
@@ -1,4 +1,4 @@
-;RUN: opt -mtriple=amdgcn-mesa-mesa3d -amdgpu-use-legacy-divergence-analysis -analyze -divergence %s | FileCheck %s
+;RUN: opt -mtriple=amdgcn-mesa-mesa3d -amdgpu-use-legacy-divergence-analysis -enable-new-pm=0 -analyze -divergence %s | FileCheck %s
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(
define float @image_atomic_swap(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/loads.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/loads.ll
index 122c14f389f5..903858bab375 100644
--- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/loads.ll
+++ b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/loads.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=amdgcn-- -amdgpu-use-legacy-divergence-analysis -analyze -divergence %s | FileCheck %s
+; RUN: opt -mtriple=amdgcn-- -amdgpu-use-legacy-divergence-analysis -enable-new-pm=0 -analyze -divergence %s | FileCheck %s
; Test that we consider loads from flat and private addrspaces to be divergent.
diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/no-return-blocks.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/no-return-blocks.ll
index 44eed1359850..e9a640f97c37 100644
--- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/no-return-blocks.ll
+++ b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/no-return-blocks.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -mtriple amdgcn-- -amdgpu-use-legacy-divergence-analysis -analyze -divergence | FileCheck %s
+; RUN: opt %s -mtriple amdgcn-- -amdgpu-use-legacy-divergence-analysis -enable-new-pm=0 -analyze -divergence | FileCheck %s
; CHECK: DIVERGENT: %tmp5 = getelementptr inbounds float, float addrspace(1)* %arg, i64 %tmp2
; CHECK: DIVERGENT: %tmp10 = load volatile float, float addrspace(1)* %tmp5, align 4
diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/phi-undef.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/phi-undef.ll
index 6fffc811116a..5bc388cac0e4 100644
--- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/phi-undef.ll
+++ b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/phi-undef.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=amdgcn-- -amdgpu-use-legacy-divergence-analysis -analyze -divergence %s | FileCheck %s
+; RUN: opt -mtriple=amdgcn-- -amdgpu-use-legacy-divergence-analysis -enable-new-pm=0 -analyze -divergence %s | FileCheck %s
; CHECK-LABEL: 'test1':
; CHECK-NEXT: DIVERGENT: i32 %bound
diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/unreachable-loop-block.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/unreachable-loop-block.ll
index 5ee1a56cc41f..49657d253ba1 100644
--- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/unreachable-loop-block.ll
+++ b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/unreachable-loop-block.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -mtriple amdgcn-- -amdgpu-use-legacy-divergence-analysis -analyze -divergence | FileCheck %s
+; RUN: opt %s -mtriple amdgcn-- -amdgpu-use-legacy-divergence-analysis -enable-new-pm=0 -analyze -divergence | FileCheck %s
; CHECK: DIVERGENT: %tmp = cmpxchg volatile
define amdgpu_kernel void @unreachable_loop(i32 %tidx) #0 {
diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/workitem-intrinsics.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/workitem-intrinsics.ll
index d10b4be49aad..0fd25c85ff7c 100644
--- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/workitem-intrinsics.ll
+++ b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/workitem-intrinsics.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple amdgcn-unknown-amdhsa -amdgpu-use-legacy-divergence-analysis -analyze -divergence %s | FileCheck %s
+; RUN: opt -mtriple amdgcn-unknown-amdhsa -amdgpu-use-legacy-divergence-analysis -enable-new-pm=0 -analyze -divergence %s | FileCheck %s
declare i32 @llvm.amdgcn.workitem.id.x() #0
declare i32 @llvm.amdgcn.workitem.id.y() #0
diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/NVPTX/diverge.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/NVPTX/diverge.ll
index 4e7163d3a51e..9ff837a11e8b 100644
--- a/llvm/test/Analysis/LegacyDivergenceAnalysis/NVPTX/diverge.ll
+++ b/llvm/test/Analysis/LegacyDivergenceAnalysis/NVPTX/diverge.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -analyze -divergence | FileCheck %s
+; RUN: opt %s -enable-new-pm=0 -analyze -divergence | FileCheck %s
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-nvidia-cuda"
diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp
index e7ba330bb617..bf2d59b5eed5 100644
--- a/llvm/tools/opt/opt.cpp
+++ b/llvm/tools/opt/opt.cpp
@@ -496,7 +496,7 @@ static bool shouldPinPassToLegacyPM(StringRef Pass) {
"safe-stack", "cost-model",
"codegenprepare", "interleaved-load-combine",
"unreachableblockelim", "verify-safepoint-ir",
- "divergence", "atomic-expand",
+ "atomic-expand",
"hardware-loops", "type-promotion",
"mve-tail-predication", "interleaved-access",
"global-merge", "pre-isel-intrinsic-lowering",
diff --git a/llvm/unittests/Analysis/DivergenceAnalysisTest.cpp b/llvm/unittests/Analysis/DivergenceAnalysisTest.cpp
index 9416e592012d..0737e7773fb6 100644
--- a/llvm/unittests/Analysis/DivergenceAnalysisTest.cpp
+++ b/llvm/unittests/Analysis/DivergenceAnalysisTest.cpp
@@ -38,7 +38,7 @@ BasicBlock *GetBlockByName(StringRef BlockName, Function &F) {
return nullptr;
}
-// We use this fixture to ensure that we clean up DivergenceAnalysis before
+// We use this fixture to ensure that we clean up DivergenceAnalysisImpl before
// deleting the PassManager.
class DivergenceAnalysisTest : public testing::Test {
protected:
@@ -54,21 +54,21 @@ class DivergenceAnalysisTest : public testing::Test {
DivergenceAnalysisTest() : M("", Context), TLII(), TLI(TLII) {}
- DivergenceAnalysis buildDA(Function &F, bool IsLCSSA) {
+ DivergenceAnalysisImpl buildDA(Function &F, bool IsLCSSA) {
DT.reset(new DominatorTree(F));
PDT.reset(new PostDominatorTree(F));
LI.reset(new LoopInfo(*DT));
SDA.reset(new SyncDependenceAnalysis(*DT, *PDT, *LI));
- return DivergenceAnalysis(F, nullptr, *DT, *LI, *SDA, IsLCSSA);
+ return DivergenceAnalysisImpl(F, nullptr, *DT, *LI, *SDA, IsLCSSA);
}
void runWithDA(
Module &M, StringRef FuncName, bool IsLCSSA,
- function_ref<void(Function &F, LoopInfo &LI, DivergenceAnalysis &DA)>
+ function_ref<void(Function &F, LoopInfo &LI, DivergenceAnalysisImpl &DA)>
Test) {
auto *F = M.getFunction(FuncName);
ASSERT_NE(F, nullptr) << "Could not find " << FuncName;
- DivergenceAnalysis DA = buildDA(*F, IsLCSSA);
+ DivergenceAnalysisImpl DA = buildDA(*F, IsLCSSA);
Test(*F, *LI, DA);
}
};
@@ -82,7 +82,7 @@ TEST_F(DivergenceAnalysisTest, DAInitialState) {
BasicBlock *BB = BasicBlock::Create(Context, "entry", F);
ReturnInst::Create(Context, nullptr, BB);
- DivergenceAnalysis DA = buildDA(*F, false);
+ DivergenceAnalysisImpl DA = buildDA(*F, false);
// Whole function region
EXPECT_EQ(DA.getRegionLoop(), nullptr);
@@ -135,7 +135,7 @@ TEST_F(DivergenceAnalysisTest, DANoLCSSA) {
Err, C);
Function *F = M->getFunction("f_1");
- DivergenceAnalysis DA = buildDA(*F, false);
+ DivergenceAnalysisImpl DA = buildDA(*F, false);
EXPECT_FALSE(DA.hasDetectedDivergence());
auto ItArg = F->arg_begin();
@@ -189,7 +189,7 @@ TEST_F(DivergenceAnalysisTest, DALCSSA) {
Err, C);
Function *F = M->getFunction("f_lcssa");
- DivergenceAnalysis DA = buildDA(*F, true);
+ DivergenceAnalysisImpl DA = buildDA(*F, true);
EXPECT_FALSE(DA.hasDetectedDivergence());
auto ItArg = F->arg_begin();
More information about the llvm-commits
mailing list