[llvm] ae77ace - [Analysis] Remove DA & LegacyDA
via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 17 00:01:28 PDT 2023
Author: pvanhout
Date: 2023-04-17T09:01:22+02:00
New Revision: ae77aceba5ad6ee575d3d79eb0259624322b19f4
URL: https://github.com/llvm/llvm-project/commit/ae77aceba5ad6ee575d3d79eb0259624322b19f4
DIFF: https://github.com/llvm/llvm-project/commit/ae77aceba5ad6ee575d3d79eb0259624322b19f4.diff
LOG: [Analysis] Remove DA & LegacyDA
UniformityAnalysis offers all of the same features and much more, there is no reason left to use the legacy DAs.
See RFC: https://discourse.llvm.org/t/rfc-deprecate-divergenceanalysis-legacydivergenceanalysis/69538
- Remove LegacyDivergenceAnalysis.h/.cpp
- Remove DivergenceAnalysis.h/.cpp + Unit tests
- Remove SyncDependenceAnalysis - it was not a real registered analysis and was only used by DAs
- Remove/adjust references to the passes in the docs where applicable
- Remove TTI hook associated with those passes.
- Move tests to UniformityAnalysis folder.
- Remove RUN lines for the DA, leave only the UA ones.
- Some tests had to be adjusted/removed depending on how they used the legacy DAs.
Reviewed By: foad, sameerds
Differential Revision: https://reviews.llvm.org/D148116
Added:
llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/always-uniform-gmir.mir
llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/always-uniform.mir
llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/atomics-gmir.mir
llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/atomics.mir
llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/hidden-diverge-gmir.mir
llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/hidden-diverge.mir
llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/irreducible/branch-outside-gmir.mir
llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/irreducible/diverged-entry-basic-gmir.mir
llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/irreducible/exit-divergence-gmir.mir
llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/irreducible/irreducible-1.mir
llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/irreducible/irreducible-2-gmir.mir
llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/join-loopexit-gmir.mir
llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/loads-gmir.mir
llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/never-uniform.mir
llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/temporal-diverge-gmir.mir
llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/uses-value-from-cycle.mir
llvm/test/Analysis/UniformityAnalysis/AMDGPU/always_uniform.ll
llvm/test/Analysis/UniformityAnalysis/AMDGPU/atomics.ll
llvm/test/Analysis/UniformityAnalysis/AMDGPU/b42473-r1-crash.ll
llvm/test/Analysis/UniformityAnalysis/AMDGPU/control-flow-intrinsics.ll
llvm/test/Analysis/UniformityAnalysis/AMDGPU/hidden_diverge.ll
llvm/test/Analysis/UniformityAnalysis/AMDGPU/hidden_loopdiverge.ll
llvm/test/Analysis/UniformityAnalysis/AMDGPU/inline-asm.ll
llvm/test/Analysis/UniformityAnalysis/AMDGPU/interp_f16.ll
llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll
llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/branch-outside.ll
llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/diverged-entry-basic.ll
llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/diverged-entry-headers-nested.ll
llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/diverged-entry-headers.ll
llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/exit-divergence.ll
llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/irreducible-1.ll
llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/irreducible-2.ll
llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/reducible-headers.ll
llvm/test/Analysis/UniformityAnalysis/AMDGPU/join-at-loop-exit.ll
llvm/test/Analysis/UniformityAnalysis/AMDGPU/join-at-loop-heart.ll
llvm/test/Analysis/UniformityAnalysis/AMDGPU/kernel-args.ll
llvm/test/Analysis/UniformityAnalysis/AMDGPU/lit.local.cfg
llvm/test/Analysis/UniformityAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll
llvm/test/Analysis/UniformityAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll
llvm/test/Analysis/UniformityAnalysis/AMDGPU/no-return-blocks.ll
llvm/test/Analysis/UniformityAnalysis/AMDGPU/phi-undef.ll
llvm/test/Analysis/UniformityAnalysis/AMDGPU/propagate-loop-live-out.ll
llvm/test/Analysis/UniformityAnalysis/AMDGPU/temporal_diverge.ll
llvm/test/Analysis/UniformityAnalysis/AMDGPU/trivial-join-at-loop-exit.ll
llvm/test/Analysis/UniformityAnalysis/AMDGPU/unreachable-loop-block.ll
llvm/test/Analysis/UniformityAnalysis/AMDGPU/workitem-intrinsics.ll
llvm/test/Analysis/UniformityAnalysis/NVPTX/daorder.ll
llvm/test/Analysis/UniformityAnalysis/NVPTX/diverge.ll
llvm/test/Analysis/UniformityAnalysis/NVPTX/hidden_diverge.ll
llvm/test/Analysis/UniformityAnalysis/NVPTX/irreducible.ll
llvm/test/Analysis/UniformityAnalysis/NVPTX/lit.local.cfg
Modified:
llvm/docs/ConvergenceAndUniformity.rst
llvm/include/llvm/ADT/GenericUniformityInfo.h
llvm/include/llvm/Analysis/Passes.h
llvm/include/llvm/Analysis/TargetTransformInfo.h
llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
llvm/include/llvm/CodeGen/BasicTTIImpl.h
llvm/include/llvm/InitializePasses.h
llvm/include/llvm/LinkAllPasses.h
llvm/lib/Analysis/Analysis.cpp
llvm/lib/Analysis/CMakeLists.txt
llvm/lib/Analysis/TargetTransformInfo.cpp
llvm/lib/Passes/PassBuilder.cpp
llvm/lib/Passes/PassRegistry.def
llvm/test/CodeGen/AMDGPU/always-uniform.ll
llvm/test/CodeGen/AMDGPU/smrd.ll
llvm/unittests/Analysis/CMakeLists.txt
llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn
llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn
Removed:
llvm/include/llvm/Analysis/DivergenceAnalysis.h
llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h
llvm/include/llvm/Analysis/SyncDependenceAnalysis.h
llvm/lib/Analysis/DivergenceAnalysis.cpp
llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp
llvm/lib/Analysis/SyncDependenceAnalysis.cpp
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/always-uniform-gmir.mir
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/always-uniform.mir
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/atomics-gmir.mir
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/atomics.mir
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/hidden-diverge-gmir.mir
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/hidden-diverge.mir
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/irreducible/branch-outside-gmir.mir
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/irreducible/diverged-entry-basic-gmir.mir
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/irreducible/exit-divergence-gmir.mir
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/irreducible/irreducible-1.mir
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/irreducible/irreducible-2-gmir.mir
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/join-loopexit-gmir.mir
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/loads-gmir.mir
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/never-uniform.mir
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/temporal-diverge-gmir.mir
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/uses-value-from-cycle.mir
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/always_uniform.ll
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/atomics.ll
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/b42473-r1-crash.ll
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/control-flow-intrinsics.ll
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_diverge.ll
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_loopdiverge.ll
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/inline-asm.ll
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/interp_f16.ll
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/intrinsics.ll
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/branch-outside.ll
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/diverged-entry-basic.ll
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/diverged-entry-headers-nested.ll
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/diverged-entry-headers.ll
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/exit-divergence.ll
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/irreducible-1.ll
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/irreducible-2.ll
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/reducible-headers.ll
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/join-at-loop-exit.ll
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/join-at-loop-heart.ll
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/lit.local.cfg
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/no-return-blocks.ll
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/phi-undef.ll
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/propagate-loop-live-out.ll
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/read_register.ll
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/temporal_diverge.ll
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/trivial-join-at-loop-exit.ll
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/unreachable-loop-block.ll
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/workitem-intrinsics.ll
llvm/test/Analysis/DivergenceAnalysis/NVPTX/daorder.ll
llvm/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll
llvm/test/Analysis/DivergenceAnalysis/NVPTX/hidden_diverge.ll
llvm/test/Analysis/DivergenceAnalysis/NVPTX/irreducible.ll
llvm/test/Analysis/DivergenceAnalysis/NVPTX/lit.local.cfg
llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/atomics.ll
llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/intrinsics.ll
llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/kernel-args.ll
llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/lit.local.cfg
llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll
llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll
llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/loads.ll
llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/no-return-blocks.ll
llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/phi-undef.ll
llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/unreachable-loop-block.ll
llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/workitem-intrinsics.ll
llvm/test/Analysis/LegacyDivergenceAnalysis/NVPTX/diverge.ll
llvm/test/Analysis/LegacyDivergenceAnalysis/NVPTX/lit.local.cfg
llvm/unittests/Analysis/DivergenceAnalysisTest.cpp
################################################################################
diff --git a/llvm/docs/ConvergenceAndUniformity.rst b/llvm/docs/ConvergenceAndUniformity.rst
index ae78692b50103..97d374b3b6e6a 100644
--- a/llvm/docs/ConvergenceAndUniformity.rst
+++ b/llvm/docs/ConvergenceAndUniformity.rst
@@ -51,7 +51,7 @@ subgroups):
This document presents a definition of convergence that is reasonable
for real targets and is compatible with the currently implicit
semantics of convergent operations in LLVM IR. This is accompanied by
-a *uniformity analysis* that extends the existing divergence analysis
+a *uniformity analysis* that extends previous work on divergence analysis
[DivergenceSPMD]_ to cover irreducible control-flow.
.. [DivergenceSPMD] Julian Rosemann, Simon Moll, and Sebastian
diff --git a/llvm/include/llvm/ADT/GenericUniformityInfo.h b/llvm/include/llvm/ADT/GenericUniformityInfo.h
index add5f3c68225f..bbf49843673be 100644
--- a/llvm/include/llvm/ADT/GenericUniformityInfo.h
+++ b/llvm/include/llvm/ADT/GenericUniformityInfo.h
@@ -9,11 +9,7 @@
#ifndef LLVM_ADT_GENERICUNIFORMITYINFO_H
#define LLVM_ADT_GENERICUNIFORMITYINFO_H
-// #include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/GenericCycleInfo.h"
-// #include "llvm/ADT/SmallPtrSet.h"
-// #include "llvm/ADT/Uniformity.h"
-// #include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Support/raw_ostream.h"
namespace llvm {
diff --git a/llvm/include/llvm/Analysis/DivergenceAnalysis.h b/llvm/include/llvm/Analysis/DivergenceAnalysis.h
deleted file mode 100644
index 4c2a5399ea544..0000000000000
--- a/llvm/include/llvm/Analysis/DivergenceAnalysis.h
+++ /dev/null
@@ -1,210 +0,0 @@
-//===- llvm/Analysis/DivergenceAnalysis.h - Divergence Analysis -*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// \file
-// The divergence analysis determines which instructions and branches are
-// divergent given a set of divergent source instructions.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_ANALYSIS_DIVERGENCEANALYSIS_H
-#define LLVM_ANALYSIS_DIVERGENCEANALYSIS_H
-
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/Analysis/SyncDependenceAnalysis.h"
-#include "llvm/IR/PassManager.h"
-#include <vector>
-
-namespace llvm {
-class Function;
-class Instruction;
-class Loop;
-class raw_ostream;
-class TargetTransformInfo;
-class Value;
-
-/// \brief Generic divergence analysis for reducible CFGs.
-///
-/// This analysis propagates divergence in a data-parallel context from sources
-/// of divergence to all users. It requires reducible CFGs. All assignments
-/// should be in SSA form.
-class DivergenceAnalysisImpl {
-public:
- /// \brief This instance will analyze the whole function \p F or the loop \p
- /// RegionLoop.
- ///
- /// \param RegionLoop if non-null the analysis is restricted to \p RegionLoop.
- /// Otherwise the whole function is analyzed.
- /// \param IsLCSSAForm whether the analysis may assume that the IR in the
- /// region in LCSSA form.
- DivergenceAnalysisImpl(const Function &F, const Loop *RegionLoop,
- const DominatorTree &DT, const LoopInfo &LI,
- SyncDependenceAnalysis &SDA, bool IsLCSSAForm);
-
- /// \brief The loop that defines the analyzed region (if any).
- const Loop *getRegionLoop() const { return RegionLoop; }
- const Function &getFunction() const { return F; }
-
- /// \brief Whether \p BB is part of the region.
- bool inRegion(const BasicBlock &BB) const;
- /// \brief Whether \p I is part of the region.
- bool inRegion(const Instruction &I) const;
-
- /// \brief Mark \p UniVal as a value that is always uniform.
- void addUniformOverride(const Value &UniVal);
-
- /// \brief Mark \p DivVal as a value that is always divergent. Will not do so
- /// if `isAlwaysUniform(DivVal)`.
- /// \returns Whether the tracked divergence state of \p DivVal changed.
- bool markDivergent(const Value &DivVal);
-
- /// \brief Propagate divergence to all instructions in the region.
- /// Divergence is seeded by calls to \p markDivergent.
- void compute();
-
- /// \brief Whether any value was marked or analyzed to be divergent.
- bool hasDetectedDivergence() const { return !DivergentValues.empty(); }
-
- /// \brief Whether \p Val will always return a uniform value regardless of its
- /// operands
- bool isAlwaysUniform(const Value &Val) const;
-
- /// \brief Whether \p Val is divergent at its definition.
- bool isDivergent(const Value &Val) const;
-
- /// \brief Whether \p U is divergent. Uses of a uniform value can be
- /// divergent.
- bool isDivergentUse(const Use &U) const;
-
-private:
- /// \brief Mark \p Term as divergent and push all Instructions that become
- /// divergent as a result on the worklist.
- void analyzeControlDivergence(const Instruction &Term);
- /// \brief Mark all phi nodes in \p JoinBlock as divergent and push them on
- /// the worklist.
- void taintAndPushPhiNodes(const BasicBlock &JoinBlock);
-
- /// \brief Identify all Instructions that become divergent because \p DivExit
- /// is a divergent loop exit of \p DivLoop. Mark those instructions as
- /// divergent and push them on the worklist.
- void propagateLoopExitDivergence(const BasicBlock &DivExit,
- const Loop &DivLoop);
-
- /// \brief Internal implementation function for propagateLoopExitDivergence.
- void analyzeLoopExitDivergence(const BasicBlock &DivExit,
- const Loop &OuterDivLoop);
-
- /// \brief Mark all instruction as divergent that use a value defined in \p
- /// OuterDivLoop. Push their users on the worklist.
- void analyzeTemporalDivergence(const Instruction &I,
- const Loop &OuterDivLoop);
-
- /// \brief Push all users of \p Val (in the region) to the worklist.
- void pushUsers(const Value &I);
-
- /// \brief Whether \p Val is divergent when read in \p ObservingBlock.
- bool isTemporalDivergent(const BasicBlock &ObservingBlock,
- const Value &Val) const;
-
-private:
- const Function &F;
- // If regionLoop != nullptr, analysis is only performed within \p RegionLoop.
- // Otherwise, analyze the whole function
- const Loop *RegionLoop;
-
- const DominatorTree &DT;
- const LoopInfo &LI;
-
- // Recognized divergent loops
- DenseSet<const Loop *> DivergentLoops;
-
- // The SDA links divergent branches to divergent control-flow joins.
- SyncDependenceAnalysis &SDA;
-
- // Use simplified code path for LCSSA form.
- bool IsLCSSAForm;
-
- // Set of known-uniform values.
- DenseSet<const Value *> UniformOverrides;
-
- // Detected/marked divergent values.
- DenseSet<const Value *> DivergentValues;
-
- // Internal worklist for divergence propagation.
- std::vector<const Instruction *> Worklist;
-};
-
-class DivergenceInfo {
- Function &F;
-
- // If the function contains an irreducible region the divergence
- // analysis can run indefinitely. We set ContainsIrreducible and no
- // analysis is actually performed on the function. All values in
- // this function are conservatively reported as divergent instead.
- bool ContainsIrreducible = false;
- std::unique_ptr<SyncDependenceAnalysis> SDA;
- std::unique_ptr<DivergenceAnalysisImpl> DA;
-
-public:
- DivergenceInfo(Function &F, const DominatorTree &DT,
- const PostDominatorTree &PDT, const LoopInfo &LI,
- const TargetTransformInfo &TTI, bool KnownReducible);
-
- /// Whether any divergence was detected.
- bool hasDivergence() const {
- return ContainsIrreducible || DA->hasDetectedDivergence();
- }
-
- /// The GPU kernel this analysis result is for
- const Function &getFunction() const { return F; }
-
- /// Whether \p V is divergent at its definition.
- bool isDivergent(const Value &V) const {
- return ContainsIrreducible || DA->isDivergent(V);
- }
-
- /// Whether \p U is divergent. Uses of a uniform value can be divergent.
- bool isDivergentUse(const Use &U) const {
- return ContainsIrreducible || DA->isDivergentUse(U);
- }
-
- /// Whether \p V is uniform/non-divergent.
- bool isUniform(const Value &V) const { return !isDivergent(V); }
-
- /// Whether \p U is uniform/non-divergent. Uses of a uniform value can be
- /// divergent.
- bool isUniformUse(const Use &U) const { return !isDivergentUse(U); }
-};
-
-/// \brief Divergence analysis frontend for GPU kernels.
-class DivergenceAnalysis : public AnalysisInfoMixin<DivergenceAnalysis> {
- friend AnalysisInfoMixin<DivergenceAnalysis>;
-
- static AnalysisKey Key;
-
-public:
- using Result = DivergenceInfo;
-
- /// Runs the divergence analysis on @F, a GPU kernel
- Result run(Function &F, FunctionAnalysisManager &AM);
-};
-
-/// Printer pass to dump divergence analysis results.
-struct DivergenceAnalysisPrinterPass
- : public PassInfoMixin<DivergenceAnalysisPrinterPass> {
- DivergenceAnalysisPrinterPass(raw_ostream &OS) : OS(OS) {}
-
- PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
-
-private:
- raw_ostream &OS;
-}; // class DivergenceAnalysisPrinterPass
-
-} // namespace llvm
-
-#endif // LLVM_ANALYSIS_DIVERGENCEANALYSIS_H
diff --git a/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h b/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h
deleted file mode 100644
index 261935a378190..0000000000000
--- a/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h
+++ /dev/null
@@ -1,103 +0,0 @@
-//===- llvm/Analysis/LegacyDivergenceAnalysis.h - KernelDivergence Analysis -*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// The kernel divergence analysis is an LLVM pass which can be used to find out
-// if a branch instruction in a GPU program (kernel) is divergent or not. It can help
-// branch optimizations such as jump threading and loop unswitching to make
-// better decisions.
-//
-//===----------------------------------------------------------------------===//
-#ifndef LLVM_ANALYSIS_LEGACYDIVERGENCEANALYSIS_H
-#define LLVM_ANALYSIS_LEGACYDIVERGENCEANALYSIS_H
-
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/PostDominators.h"
-#include "llvm/IR/PassManager.h"
-#include "llvm/Pass.h"
-#include <memory>
-
-namespace llvm {
-class DivergenceInfo;
-class Function;
-class Module;
-class raw_ostream;
-class TargetTransformInfo;
-class Use;
-class Value;
-
-class LegacyDivergenceAnalysisImpl {
-public:
- // Returns true if V is divergent at its definition.
- bool isDivergent(const Value *V) const;
-
- // Returns true if U is divergent. Uses of a uniform value can be divergent.
- bool isDivergentUse(const Use *U) const;
-
- // Returns true if V is uniform/non-divergent.
- bool isUniform(const Value *V) const { return !isDivergent(V); }
-
- // Returns true if U is uniform/non-divergent. Uses of a uniform value can be
- // divergent.
- bool isUniformUse(const Use *U) const { return !isDivergentUse(U); }
-
- // Keep the analysis results uptodate by removing an erased value.
- void removeValue(const Value *V) { DivergentValues.erase(V); }
-
- // Print all divergent branches in the function.
- void print(raw_ostream &OS, const Module *) const;
-
- // Whether analysis should be performed by GPUDivergenceAnalysis.
- bool shouldUseGPUDivergenceAnalysis(const Function &F,
- const TargetTransformInfo &TTI,
- const LoopInfo &LI);
-
- void run(Function &F, TargetTransformInfo &TTI, DominatorTree &DT,
- PostDominatorTree &PDT, const LoopInfo &LI);
-
-protected:
- // (optional) handle to new DivergenceAnalysis
- std::unique_ptr<DivergenceInfo> gpuDA;
-
- // Stores all divergent values.
- DenseSet<const Value *> DivergentValues;
-
- // Stores divergent uses of possibly uniform values.
- DenseSet<const Use *> DivergentUses;
-};
-
-class LegacyDivergenceAnalysis : public FunctionPass,
- public LegacyDivergenceAnalysisImpl {
-public:
- static char ID;
-
- LegacyDivergenceAnalysis();
- void getAnalysisUsage(AnalysisUsage &AU) const override;
- bool runOnFunction(Function &F) override;
-};
-
-class LegacyDivergenceAnalysisPass
- : public PassInfoMixin<LegacyDivergenceAnalysisPass>,
- public LegacyDivergenceAnalysisImpl {
-public:
- PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
-
-private:
- // (optional) handle to new DivergenceAnalysis
- std::unique_ptr<DivergenceInfo> gpuDA;
-
- // Stores all divergent values.
- DenseSet<const Value *> DivergentValues;
-
- // Stores divergent uses of possibly uniform values.
- DenseSet<const Use *> DivergentUses;
-};
-
-} // end namespace llvm
-
-#endif // LLVM_ANALYSIS_LEGACYDIVERGENCEANALYSIS_H
diff --git a/llvm/include/llvm/Analysis/Passes.h b/llvm/include/llvm/Analysis/Passes.h
index 343c239dede69..4b4d8a71ef3f5 100644
--- a/llvm/include/llvm/Analysis/Passes.h
+++ b/llvm/include/llvm/Analysis/Passes.h
@@ -46,13 +46,6 @@ namespace llvm {
//
FunctionPass *createDelinearizationPass();
- //===--------------------------------------------------------------------===//
- //
- // createLegacyDivergenceAnalysisPass - This pass determines which branches in a GPU
- // program are divergent.
- //
- FunctionPass *createLegacyDivergenceAnalysisPass();
-
//===--------------------------------------------------------------------===//
//
// Minor pass prototypes, allowing us to expose them through bugpoint and
diff --git a/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h b/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h
deleted file mode 100644
index e6e3efbe0fcb6..0000000000000
--- a/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h
+++ /dev/null
@@ -1,92 +0,0 @@
-//===- SyncDependenceAnalysis.h - Divergent Branch Dependence -*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// \file
-// This file defines the SyncDependenceAnalysis class, which computes for
-// every divergent branch the set of phi nodes that the branch will make
-// divergent.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_ANALYSIS_SYNCDEPENDENCEANALYSIS_H
-#define LLVM_ANALYSIS_SYNCDEPENDENCEANALYSIS_H
-
-#include "llvm/ADT/SmallPtrSet.h"
-#include <map>
-#include <memory>
-#include <unordered_map>
-#include <vector>
-
-namespace llvm {
-
-class BasicBlock;
-class DominatorTree;
-class Instruction;
-class LoopInfo;
-class PostDominatorTree;
-
-using ConstBlockSet = SmallPtrSet<const BasicBlock *, 4>;
-struct ControlDivergenceDesc {
- // Join points of divergent disjoint paths.
- ConstBlockSet JoinDivBlocks;
- // Divergent loop exits
- ConstBlockSet LoopDivBlocks;
-};
-
-struct ModifiedPO {
- std::vector<const BasicBlock *> LoopPO;
- std::unordered_map<const BasicBlock *, unsigned> POIndex;
- void appendBlock(const BasicBlock &BB) {
- POIndex[&BB] = LoopPO.size();
- LoopPO.push_back(&BB);
- }
- unsigned getIndexOf(const BasicBlock &BB) const {
- return POIndex.find(&BB)->second;
- }
- unsigned size() const { return LoopPO.size(); }
- const BasicBlock *getBlockAt(unsigned Idx) const { return LoopPO[Idx]; }
-};
-
-/// \brief Relates points of divergent control to join points in
-/// reducible CFGs.
-///
-/// This analysis relates points of divergent control to points of converging
-/// divergent control. The analysis requires all loops to be reducible.
-class SyncDependenceAnalysis {
-public:
- ~SyncDependenceAnalysis();
- SyncDependenceAnalysis(const DominatorTree &DT, const PostDominatorTree &PDT,
- const LoopInfo &LI);
-
- /// \brief Computes divergent join points and loop exits caused by branch
- /// divergence in \p Term.
- ///
- /// The set of blocks which are reachable by disjoint paths from \p Term.
- /// The set also contains loop exits if there two disjoint paths:
- /// one from \p Term to the loop exit and another from \p Term to the loop
- /// header. Those exit blocks are added to the returned set.
- /// If L is the parent loop of \p Term and an exit of L is in the returned
- /// set then L is a divergent loop.
- const ControlDivergenceDesc &getJoinBlocks(const Instruction &Term);
-
-private:
- static ControlDivergenceDesc EmptyDivergenceDesc;
-
- ModifiedPO LoopPO;
-
- const DominatorTree &DT;
- const PostDominatorTree &PDT;
- const LoopInfo &LI;
-
- std::map<const Instruction *, std::unique_ptr<ControlDivergenceDesc>>
- CachedControlDivDescs;
-};
-
-} // namespace llvm
-
-#endif // LLVM_ANALYSIS_SYNCDEPENDENCEANALYSIS_H
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 02465e2d2f040..1ced968ad5858 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -404,16 +404,10 @@ class TargetTransformInfo {
/// branches.
bool hasBranchDivergence() const;
- /// Return true if the target prefers to use GPU divergence analysis to
- /// replace the legacy version.
- bool useGPUDivergenceAnalysis() const;
-
/// Returns whether V is a source of divergence.
///
/// This function provides the target-dependent information for
- /// the target-independent LegacyDivergenceAnalysis. LegacyDivergenceAnalysis
- /// first builds the dependency graph, and then runs the reachability
- /// algorithm starting with the sources of divergence.
+ /// the target-independent UniformityAnalysis.
bool isSourceOfDivergence(const Value *V) const;
// Returns true for the target specific
@@ -1687,7 +1681,6 @@ class TargetTransformInfo::Concept {
TargetCostKind CostKind) = 0;
virtual BranchProbability getPredictableBranchThreshold() = 0;
virtual bool hasBranchDivergence() = 0;
- virtual bool useGPUDivergenceAnalysis() = 0;
virtual bool isSourceOfDivergence(const Value *V) = 0;
virtual bool isAlwaysUniform(const Value *V) = 0;
virtual bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
@@ -2057,9 +2050,6 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
return Impl.getPredictableBranchThreshold();
}
bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
- bool useGPUDivergenceAnalysis() override {
- return Impl.useGPUDivergenceAnalysis();
- }
bool isSourceOfDivergence(const Value *V) override {
return Impl.isSourceOfDivergence(V);
}
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index a01ea1ef0c291..9ccba7f9a0b13 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -89,8 +89,6 @@ class TargetTransformInfoImplBase {
bool hasBranchDivergence() const { return false; }
- bool useGPUDivergenceAnalysis() const { return false; }
-
bool isSourceOfDivergence(const Value *V) const { return false; }
bool isAlwaysUniform(const Value *V) const { return false; }
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index fdb6109882e08..04d63db4cf231 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -278,8 +278,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
bool hasBranchDivergence() { return false; }
- bool useGPUDivergenceAnalysis() { return false; }
-
bool isSourceOfDivergence(const Value *V) { return false; }
bool isAlwaysUniform(const Value *V) { return false; }
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index c37608d7ec75b..9d2bc7ae255ca 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -175,7 +175,6 @@ void initializeLazyBranchProbabilityInfoPassPass(PassRegistry&);
void initializeLazyMachineBlockFrequencyInfoPassPass(PassRegistry&);
void initializeLazyValueInfoPrinterPass(PassRegistry&);
void initializeLazyValueInfoWrapperPassPass(PassRegistry&);
-void initializeLegacyDivergenceAnalysisPass(PassRegistry&);
void initializeLegacyLICMPassPass(PassRegistry&);
void initializeLegacyLoopSinkPassPass(PassRegistry&);
void initializeLegalizerPass(PassRegistry&);
diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h
index a95e6bd639d14..1fb83b72ae047 100644
--- a/llvm/include/llvm/LinkAllPasses.h
+++ b/llvm/include/llvm/LinkAllPasses.h
@@ -94,7 +94,6 @@ namespace {
(void) llvm::createInstructionCombiningPass();
(void) llvm::createJMCInstrumenterPass();
(void) llvm::createLCSSAPass();
- (void) llvm::createLegacyDivergenceAnalysisPass();
(void) llvm::createLICMPass();
(void) llvm::createLoopSinkPass();
(void) llvm::createLazyValueInfoPass();
diff --git a/llvm/lib/Analysis/Analysis.cpp b/llvm/lib/Analysis/Analysis.cpp
index 9a9d268878db6..adb81ecb65e9d 100644
--- a/llvm/lib/Analysis/Analysis.cpp
+++ b/llvm/lib/Analysis/Analysis.cpp
@@ -53,7 +53,6 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
initializeLazyBlockFrequencyInfoPassPass(Registry);
initializeLazyValueInfoWrapperPassPass(Registry);
initializeLazyValueInfoPrinterPass(Registry);
- initializeLegacyDivergenceAnalysisPass(Registry);
initializeLintLegacyPassPass(Registry);
initializeLoopInfoWrapperPassPass(Registry);
initializeMemDepPrinterPass(Registry);
diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt
index d25eb5c702a7e..2f684e6ced3f7 100644
--- a/llvm/lib/Analysis/CMakeLists.txt
+++ b/llvm/lib/Analysis/CMakeLists.txt
@@ -56,7 +56,6 @@ add_llvm_component_library(LLVMAnalysis
DependenceAnalysis.cpp
DependenceGraphBuilder.cpp
DevelopmentModeInlineAdvisor.cpp
- DivergenceAnalysis.cpp
DomPrinter.cpp
DomTreeUpdater.cpp
DominanceFrontier.cpp
@@ -83,7 +82,6 @@ add_llvm_component_library(LLVMAnalysis
LazyBlockFrequencyInfo.cpp
LazyCallGraph.cpp
LazyValueInfo.cpp
- LegacyDivergenceAnalysis.cpp
Lint.cpp
Loads.cpp
Local.cpp
@@ -128,7 +126,6 @@ add_llvm_component_library(LLVMAnalysis
ScalarEvolutionNormalization.cpp
StackLifetime.cpp
StackSafetyAnalysis.cpp
- SyncDependenceAnalysis.cpp
SyntheticCountsUtils.cpp
TFLiteUtils.cpp
TargetLibraryInfo.cpp
diff --git a/llvm/lib/Analysis/DivergenceAnalysis.cpp b/llvm/lib/Analysis/DivergenceAnalysis.cpp
deleted file mode 100644
index 02c40d2640c18..0000000000000
--- a/llvm/lib/Analysis/DivergenceAnalysis.cpp
+++ /dev/null
@@ -1,409 +0,0 @@
-//===---- DivergenceAnalysis.cpp --- Divergence Analysis Implementation ----==//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a general divergence analysis for loop vectorization
-// and GPU programs. It determines which branches and values in a loop or GPU
-// program are divergent. It can help branch optimizations such as jump
-// threading and loop unswitching to make better decisions.
-//
-// GPU programs typically use the SIMD execution model, where multiple threads
-// in the same execution group have to execute in lock-step. Therefore, if the
-// code contains divergent branches (i.e., threads in a group do not agree on
-// which path of the branch to take), the group of threads has to execute all
-// the paths from that branch with
diff erent subsets of threads enabled until
-// they re-converge.
-//
-// Due to this execution model, some optimizations such as jump
-// threading and loop unswitching can interfere with thread re-convergence.
-// Therefore, an analysis that computes which branches in a GPU program are
-// divergent can help the compiler to selectively run these optimizations.
-//
-// This implementation is derived from the Vectorization Analysis of the
-// Region Vectorizer (RV). The analysis is based on the approach described in
-//
-// An abstract interpretation for SPMD divergence
-// on reducible control flow graphs.
-// Julian Rosemann, Simon Moll and Sebastian Hack
-// POPL '21
-//
-// This implementation is generic in the sense that it does
-// not itself identify original sources of divergence.
-// Instead specialized adapter classes, (LoopDivergenceAnalysis) for loops and
-// (DivergenceAnalysis) for functions, identify the sources of divergence
-// (e.g., special variables that hold the thread ID or the iteration variable).
-//
-// The generic implementation propagates divergence to variables that are data
-// or sync dependent on a source of divergence.
-//
-// While data dependency is a well-known concept, the notion of sync dependency
-// is worth more explanation. Sync dependence characterizes the control flow
-// aspect of the propagation of branch divergence. For example,
-//
-// %cond = icmp slt i32 %tid, 10
-// br i1 %cond, label %then, label %else
-// then:
-// br label %merge
-// else:
-// br label %merge
-// merge:
-// %a = phi i32 [ 0, %then ], [ 1, %else ]
-//
-// Suppose %tid holds the thread ID. Although %a is not data dependent on %tid
-// because %tid is not on its use-def chains, %a is sync dependent on %tid
-// because the branch "br i1 %cond" depends on %tid and affects which value %a
-// is assigned to.
-//
-// The sync dependence detection (which branch induces divergence in which join
-// points) is implemented in the SyncDependenceAnalysis.
-//
-// The current implementation has the following limitations:
-// 1. intra-procedural. It conservatively considers the arguments of a
-// non-kernel-entry function and the return value of a function call as
-// divergent.
-// 2. memory as black box. It conservatively considers values loaded from
-// generic or local address as divergent. This can be improved by leveraging
-// pointer analysis and/or by modelling non-escaping memory objects in SSA
-// as done in RV.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/DivergenceAnalysis.h"
-#include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/PostDominators.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Value.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "divergence"
-
-DivergenceAnalysisImpl::DivergenceAnalysisImpl(
- const Function &F, const Loop *RegionLoop, const DominatorTree &DT,
- const LoopInfo &LI, SyncDependenceAnalysis &SDA, bool IsLCSSAForm)
- : F(F), RegionLoop(RegionLoop), DT(DT), LI(LI), SDA(SDA),
- IsLCSSAForm(IsLCSSAForm) {}
-
-bool DivergenceAnalysisImpl::markDivergent(const Value &DivVal) {
- if (isAlwaysUniform(DivVal))
- return false;
- assert(isa<Instruction>(DivVal) || isa<Argument>(DivVal));
- assert(!isAlwaysUniform(DivVal) && "cannot be a divergent");
- return DivergentValues.insert(&DivVal).second;
-}
-
-void DivergenceAnalysisImpl::addUniformOverride(const Value &UniVal) {
- UniformOverrides.insert(&UniVal);
-}
-
-bool DivergenceAnalysisImpl::isTemporalDivergent(
- const BasicBlock &ObservingBlock, const Value &Val) const {
- const auto *Inst = dyn_cast<const Instruction>(&Val);
- if (!Inst)
- return false;
- // check whether any divergent loop carrying Val terminates before control
- // proceeds to ObservingBlock
- for (const auto *Loop = LI.getLoopFor(Inst->getParent());
- Loop != RegionLoop && !Loop->contains(&ObservingBlock);
- Loop = Loop->getParentLoop()) {
- if (DivergentLoops.contains(Loop))
- return true;
- }
-
- return false;
-}
-
-bool DivergenceAnalysisImpl::inRegion(const Instruction &I) const {
- return I.getParent() && inRegion(*I.getParent());
-}
-
-bool DivergenceAnalysisImpl::inRegion(const BasicBlock &BB) const {
- return RegionLoop ? RegionLoop->contains(&BB) : (BB.getParent() == &F);
-}
-
-void DivergenceAnalysisImpl::pushUsers(const Value &V) {
- const auto *I = dyn_cast<const Instruction>(&V);
-
- if (I && I->isTerminator()) {
- analyzeControlDivergence(*I);
- return;
- }
-
- for (const auto *User : V.users()) {
- const auto *UserInst = dyn_cast<const Instruction>(User);
- if (!UserInst)
- continue;
-
- // only compute divergent inside loop
- if (!inRegion(*UserInst))
- continue;
-
- // All users of divergent values are immediate divergent
- if (markDivergent(*UserInst))
- Worklist.push_back(UserInst);
- }
-}
-
-static const Instruction *getIfCarriedInstruction(const Use &U,
- const Loop &DivLoop) {
- const auto *I = dyn_cast<const Instruction>(&U);
- if (!I)
- return nullptr;
- if (!DivLoop.contains(I))
- return nullptr;
- return I;
-}
-
-void DivergenceAnalysisImpl::analyzeTemporalDivergence(
- const Instruction &I, const Loop &OuterDivLoop) {
- if (isAlwaysUniform(I))
- return;
- if (isDivergent(I))
- return;
-
- LLVM_DEBUG(dbgs() << "Analyze temporal divergence: " << I.getName() << "\n");
- assert((isa<PHINode>(I) || !IsLCSSAForm) &&
- "In LCSSA form all users of loop-exiting defs are Phi nodes.");
- for (const Use &Op : I.operands()) {
- const auto *OpInst = getIfCarriedInstruction(Op, OuterDivLoop);
- if (!OpInst)
- continue;
- if (markDivergent(I))
- pushUsers(I);
- return;
- }
-}
-
-// marks all users of loop-carried values of the loop headed by LoopHeader as
-// divergent
-void DivergenceAnalysisImpl::analyzeLoopExitDivergence(
- const BasicBlock &DivExit, const Loop &OuterDivLoop) {
- // All users are in immediate exit blocks
- if (IsLCSSAForm) {
- for (const auto &Phi : DivExit.phis()) {
- analyzeTemporalDivergence(Phi, OuterDivLoop);
- }
- return;
- }
-
- // For non-LCSSA we have to follow all live out edges wherever they may lead.
- const BasicBlock &LoopHeader = *OuterDivLoop.getHeader();
- SmallVector<const BasicBlock *, 8> TaintStack;
- TaintStack.push_back(&DivExit);
-
- // Otherwise potential users of loop-carried values could be anywhere in the
- // dominance region of DivLoop (including its fringes for phi nodes)
- DenseSet<const BasicBlock *> Visited;
- Visited.insert(&DivExit);
-
- do {
- auto *UserBlock = TaintStack.pop_back_val();
-
- // don't spread divergence beyond the region
- if (!inRegion(*UserBlock))
- continue;
-
- assert(!OuterDivLoop.contains(UserBlock) &&
- "irreducible control flow detected");
-
- // phi nodes at the fringes of the dominance region
- if (!DT.dominates(&LoopHeader, UserBlock)) {
- // all PHI nodes of UserBlock become divergent
- for (const auto &Phi : UserBlock->phis()) {
- analyzeTemporalDivergence(Phi, OuterDivLoop);
- }
- continue;
- }
-
- // Taint outside users of values carried by OuterDivLoop.
- for (const auto &I : *UserBlock) {
- analyzeTemporalDivergence(I, OuterDivLoop);
- }
-
- // visit all blocks in the dominance region
- for (const auto *SuccBlock : successors(UserBlock)) {
- if (!Visited.insert(SuccBlock).second) {
- continue;
- }
- TaintStack.push_back(SuccBlock);
- }
- } while (!TaintStack.empty());
-}
-
-void DivergenceAnalysisImpl::propagateLoopExitDivergence(
- const BasicBlock &DivExit, const Loop &InnerDivLoop) {
- LLVM_DEBUG(dbgs() << "\tpropLoopExitDiv " << DivExit.getName() << "\n");
-
- // Find outer-most loop that does not contain \p DivExit
- const Loop *DivLoop = &InnerDivLoop;
- const Loop *OuterDivLoop = DivLoop;
- const Loop *ExitLevelLoop = LI.getLoopFor(&DivExit);
- const unsigned LoopExitDepth =
- ExitLevelLoop ? ExitLevelLoop->getLoopDepth() : 0;
- while (DivLoop && DivLoop->getLoopDepth() > LoopExitDepth) {
- DivergentLoops.insert(DivLoop); // all crossed loops are divergent
- OuterDivLoop = DivLoop;
- DivLoop = DivLoop->getParentLoop();
- }
- LLVM_DEBUG(dbgs() << "\tOuter-most left loop: " << OuterDivLoop->getName()
- << "\n");
-
- analyzeLoopExitDivergence(DivExit, *OuterDivLoop);
-}
-
-// this is a divergent join point - mark all phi nodes as divergent and push
-// them onto the stack.
-void DivergenceAnalysisImpl::taintAndPushPhiNodes(const BasicBlock &JoinBlock) {
- LLVM_DEBUG(dbgs() << "taintAndPushPhiNodes in " << JoinBlock.getName()
- << "\n");
-
- // ignore divergence outside the region
- if (!inRegion(JoinBlock)) {
- return;
- }
-
- // push non-divergent phi nodes in JoinBlock to the worklist
- for (const auto &Phi : JoinBlock.phis()) {
- if (isDivergent(Phi))
- continue;
- // FIXME Theoretically ,the 'undef' value could be replaced by any other
- // value causing spurious divergence.
- if (Phi.hasConstantOrUndefValue())
- continue;
- if (markDivergent(Phi))
- Worklist.push_back(&Phi);
- }
-}
-
-void DivergenceAnalysisImpl::analyzeControlDivergence(const Instruction &Term) {
- LLVM_DEBUG(dbgs() << "analyzeControlDiv " << Term.getParent()->getName()
- << "\n");
-
- // Don't propagate divergence from unreachable blocks.
- if (!DT.isReachableFromEntry(Term.getParent()))
- return;
-
- const auto *BranchLoop = LI.getLoopFor(Term.getParent());
-
- const auto &DivDesc = SDA.getJoinBlocks(Term);
-
- // Iterate over all blocks now reachable by a disjoint path join
- for (const auto *JoinBlock : DivDesc.JoinDivBlocks) {
- taintAndPushPhiNodes(*JoinBlock);
- }
-
- assert(DivDesc.LoopDivBlocks.empty() || BranchLoop);
- for (const auto *DivExitBlock : DivDesc.LoopDivBlocks) {
- propagateLoopExitDivergence(*DivExitBlock, *BranchLoop);
- }
-}
-
-void DivergenceAnalysisImpl::compute() {
- // Initialize worklist.
- auto DivValuesCopy = DivergentValues;
- for (const auto *DivVal : DivValuesCopy) {
- assert(isDivergent(*DivVal) && "Worklist invariant violated!");
- pushUsers(*DivVal);
- }
-
- // All values on the Worklist are divergent.
- // Their users may not have been updated yed.
- while (!Worklist.empty()) {
- const Instruction &I = *Worklist.back();
- Worklist.pop_back();
-
- // propagate value divergence to users
- assert(isDivergent(I) && "Worklist invariant violated!");
- pushUsers(I);
- }
-}
-
-bool DivergenceAnalysisImpl::isAlwaysUniform(const Value &V) const {
- return UniformOverrides.contains(&V);
-}
-
-bool DivergenceAnalysisImpl::isDivergent(const Value &V) const {
- return DivergentValues.contains(&V);
-}
-
-bool DivergenceAnalysisImpl::isDivergentUse(const Use &U) const {
- Value &V = *U.get();
- Instruction &I = *cast<Instruction>(U.getUser());
- return isDivergent(V) || isTemporalDivergent(*I.getParent(), V);
-}
-
-DivergenceInfo::DivergenceInfo(Function &F, const DominatorTree &DT,
- const PostDominatorTree &PDT, const LoopInfo &LI,
- const TargetTransformInfo &TTI,
- bool KnownReducible)
- : F(F) {
- if (!KnownReducible) {
- using RPOTraversal = ReversePostOrderTraversal<const Function *>;
- RPOTraversal FuncRPOT(&F);
- if (containsIrreducibleCFG<const BasicBlock *, const RPOTraversal,
- const LoopInfo>(FuncRPOT, LI)) {
- ContainsIrreducible = true;
- return;
- }
- }
- SDA = std::make_unique<SyncDependenceAnalysis>(DT, PDT, LI);
- DA = std::make_unique<DivergenceAnalysisImpl>(F, nullptr, DT, LI, *SDA,
- /* LCSSA */ false);
- for (auto &I : instructions(F)) {
- if (TTI.isSourceOfDivergence(&I)) {
- DA->markDivergent(I);
- } else if (TTI.isAlwaysUniform(&I)) {
- DA->addUniformOverride(I);
- }
- }
- for (auto &Arg : F.args()) {
- if (TTI.isSourceOfDivergence(&Arg)) {
- DA->markDivergent(Arg);
- }
- }
-
- DA->compute();
-}
-
-AnalysisKey DivergenceAnalysis::Key;
-
-DivergenceAnalysis::Result
-DivergenceAnalysis::run(Function &F, FunctionAnalysisManager &AM) {
- auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
- auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
- auto &LI = AM.getResult<LoopAnalysis>(F);
- auto &TTI = AM.getResult<TargetIRAnalysis>(F);
-
- return DivergenceInfo(F, DT, PDT, LI, TTI, /* KnownReducible = */ false);
-}
-
-PreservedAnalyses
-DivergenceAnalysisPrinterPass::run(Function &F, FunctionAnalysisManager &FAM) {
- auto &DI = FAM.getResult<DivergenceAnalysis>(F);
- OS << "'Divergence Analysis' for function '" << F.getName() << "':\n";
- if (DI.hasDivergence()) {
- for (auto &Arg : F.args()) {
- OS << (DI.isDivergent(Arg) ? "DIVERGENT: " : " ");
- OS << Arg << "\n";
- }
- for (const BasicBlock &BB : F) {
- OS << "\n " << BB.getName() << ":\n";
- for (const auto &I : BB.instructionsWithoutDebug()) {
- OS << (DI.isDivergent(I) ? "DIVERGENT: " : " ");
- OS << I << "\n";
- }
- }
- }
- return PreservedAnalyses::all();
-}
diff --git a/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp b/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp
deleted file mode 100644
index baa7e9daa0ae9..0000000000000
--- a/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp
+++ /dev/null
@@ -1,435 +0,0 @@
-//===- LegacyDivergenceAnalysis.cpp --------- Legacy Divergence Analysis
-//Implementation -==//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements divergence analysis which determines whether a branch
-// in a GPU program is divergent.It can help branch optimizations such as jump
-// threading and loop unswitching to make better decisions.
-//
-// GPU programs typically use the SIMD execution model, where multiple threads
-// in the same execution group have to execute in lock-step. Therefore, if the
-// code contains divergent branches (i.e., threads in a group do not agree on
-// which path of the branch to take), the group of threads has to execute all
-// the paths from that branch with
diff erent subsets of threads enabled until
-// they converge at the immediately post-dominating BB of the paths.
-//
-// Due to this execution model, some optimizations such as jump
-// threading and loop unswitching can be unfortunately harmful when performed on
-// divergent branches. Therefore, an analysis that computes which branches in a
-// GPU program are divergent can help the compiler to selectively run these
-// optimizations.
-//
-// This file defines divergence analysis which computes a conservative but
-// non-trivial approximation of all divergent branches in a GPU program. It
-// partially implements the approach described in
-//
-// Divergence Analysis
-// Sampaio, Souza, Collange, Pereira
-// TOPLAS '13
-//
-// The divergence analysis identifies the sources of divergence (e.g., special
-// variables that hold the thread ID), and recursively marks variables that are
-// data or sync dependent on a source of divergence as divergent.
-//
-// While data dependency is a well-known concept, the notion of sync dependency
-// is worth more explanation. Sync dependence characterizes the control flow
-// aspect of the propagation of branch divergence. For example,
-//
-// %cond = icmp slt i32 %tid, 10
-// br i1 %cond, label %then, label %else
-// then:
-// br label %merge
-// else:
-// br label %merge
-// merge:
-// %a = phi i32 [ 0, %then ], [ 1, %else ]
-//
-// Suppose %tid holds the thread ID. Although %a is not data dependent on %tid
-// because %tid is not on its use-def chains, %a is sync dependent on %tid
-// because the branch "br i1 %cond" depends on %tid and affects which value %a
-// is assigned to.
-//
-// The current implementation has the following limitations:
-// 1. intra-procedural. It conservatively considers the arguments of a
-// non-kernel-entry function and the return value of a function call as
-// divergent.
-// 2. memory as black box. It conservatively considers values loaded from
-// generic or local address as divergent. This can be improved by leveraging
-// pointer analysis.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
-#include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/DivergenceAnalysis.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/PostDominators.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Value.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include <vector>
-using namespace llvm;
-
-#define DEBUG_TYPE "divergence"
-
-// transparently use the GPUDivergenceAnalysis
-static cl::opt<bool> UseGPUDA("use-gpu-divergence-analysis", cl::init(false),
- cl::Hidden,
- cl::desc("turn the LegacyDivergenceAnalysis into "
- "a wrapper for GPUDivergenceAnalysis"));
-
-namespace {
-
-class DivergencePropagator {
-public:
- DivergencePropagator(Function &F, TargetTransformInfo &TTI, DominatorTree &DT,
- PostDominatorTree &PDT, DenseSet<const Value *> &DV,
- DenseSet<const Use *> &DU)
- : F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV), DU(DU) {}
- void populateWithSourcesOfDivergence();
- void propagate();
-
-private:
- // A helper function that explores data dependents of V.
- void exploreDataDependency(Value *V);
- // A helper function that explores sync dependents of TI.
- void exploreSyncDependency(Instruction *TI);
- // Computes the influence region from Start to End. This region includes all
- // basic blocks on any simple path from Start to End.
- void computeInfluenceRegion(BasicBlock *Start, BasicBlock *End,
- DenseSet<BasicBlock *> &InfluenceRegion);
- // Finds all users of I that are outside the influence region, and add these
- // users to Worklist.
- void findUsersOutsideInfluenceRegion(
- Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion);
-
- Function &F;
- TargetTransformInfo &TTI;
- DominatorTree &DT;
- PostDominatorTree &PDT;
- std::vector<Value *> Worklist; // Stack for DFS.
- DenseSet<const Value *> &DV; // Stores all divergent values.
- DenseSet<const Use *> &DU; // Stores divergent uses of possibly uniform
- // values.
-};
-
-void DivergencePropagator::populateWithSourcesOfDivergence() {
- Worklist.clear();
- DV.clear();
- DU.clear();
- for (auto &I : instructions(F)) {
- if (TTI.isSourceOfDivergence(&I)) {
- Worklist.push_back(&I);
- DV.insert(&I);
- }
- }
- for (auto &Arg : F.args()) {
- if (TTI.isSourceOfDivergence(&Arg)) {
- Worklist.push_back(&Arg);
- DV.insert(&Arg);
- }
- }
-}
-
-void DivergencePropagator::exploreSyncDependency(Instruction *TI) {
- // Propagation rule 1: if branch TI is divergent, all PHINodes in TI's
- // immediate post dominator are divergent. This rule handles if-then-else
- // patterns. For example,
- //
- // if (tid < 5)
- // a1 = 1;
- // else
- // a2 = 2;
- // a = phi(a1, a2); // sync dependent on (tid < 5)
- BasicBlock *ThisBB = TI->getParent();
-
- // Unreachable blocks may not be in the dominator tree.
- if (!DT.isReachableFromEntry(ThisBB))
- return;
-
- // If the function has no exit blocks or doesn't reach any exit blocks, the
- // post dominator may be null.
- DomTreeNode *ThisNode = PDT.getNode(ThisBB);
- if (!ThisNode)
- return;
-
- BasicBlock *IPostDom = ThisNode->getIDom()->getBlock();
- if (IPostDom == nullptr)
- return;
-
- for (auto I = IPostDom->begin(); isa<PHINode>(I); ++I) {
- // A PHINode is uniform if it returns the same value no matter which path is
- // taken.
- if (!cast<PHINode>(I)->hasConstantOrUndefValue() && DV.insert(&*I).second)
- Worklist.push_back(&*I);
- }
-
- // Propagation rule 2: if a value defined in a loop is used outside, the user
- // is sync dependent on the condition of the loop exits that dominate the
- // user. For example,
- //
- // int i = 0;
- // do {
- // i++;
- // if (foo(i)) ... // uniform
- // } while (i < tid);
- // if (bar(i)) ... // divergent
- //
- // A program may contain unstructured loops. Therefore, we cannot leverage
- // LoopInfo, which only recognizes natural loops.
- //
- // The algorithm used here handles both natural and unstructured loops. Given
- // a branch TI, we first compute its influence region, the union of all simple
- // paths from TI to its immediate post dominator (IPostDom). Then, we search
- // for all the values defined in the influence region but used outside. All
- // these users are sync dependent on TI.
- DenseSet<BasicBlock *> InfluenceRegion;
- computeInfluenceRegion(ThisBB, IPostDom, InfluenceRegion);
- // An insight that can speed up the search process is that all the in-region
- // values that are used outside must dominate TI. Therefore, instead of
- // searching every basic blocks in the influence region, we search all the
- // dominators of TI until it is outside the influence region.
- BasicBlock *InfluencedBB = ThisBB;
- while (InfluenceRegion.count(InfluencedBB)) {
- for (auto &I : *InfluencedBB) {
- if (!DV.count(&I))
- findUsersOutsideInfluenceRegion(I, InfluenceRegion);
- }
- DomTreeNode *IDomNode = DT.getNode(InfluencedBB)->getIDom();
- if (IDomNode == nullptr)
- break;
- InfluencedBB = IDomNode->getBlock();
- }
-}
-
-void DivergencePropagator::findUsersOutsideInfluenceRegion(
- Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion) {
- for (Use &Use : I.uses()) {
- Instruction *UserInst = cast<Instruction>(Use.getUser());
- if (!InfluenceRegion.count(UserInst->getParent())) {
- DU.insert(&Use);
- if (DV.insert(UserInst).second)
- Worklist.push_back(UserInst);
- }
- }
-}
-
-// A helper function for computeInfluenceRegion that adds successors of "ThisBB"
-// to the influence region.
-static void
-addSuccessorsToInfluenceRegion(BasicBlock *ThisBB, BasicBlock *End,
- DenseSet<BasicBlock *> &InfluenceRegion,
- std::vector<BasicBlock *> &InfluenceStack) {
- for (BasicBlock *Succ : successors(ThisBB)) {
- if (Succ != End && InfluenceRegion.insert(Succ).second)
- InfluenceStack.push_back(Succ);
- }
-}
-
-void DivergencePropagator::computeInfluenceRegion(
- BasicBlock *Start, BasicBlock *End,
- DenseSet<BasicBlock *> &InfluenceRegion) {
- assert(PDT.properlyDominates(End, Start) &&
- "End does not properly dominate Start");
-
- // The influence region starts from the end of "Start" to the beginning of
- // "End". Therefore, "Start" should not be in the region unless "Start" is in
- // a loop that doesn't contain "End".
- std::vector<BasicBlock *> InfluenceStack;
- addSuccessorsToInfluenceRegion(Start, End, InfluenceRegion, InfluenceStack);
- while (!InfluenceStack.empty()) {
- BasicBlock *BB = InfluenceStack.back();
- InfluenceStack.pop_back();
- addSuccessorsToInfluenceRegion(BB, End, InfluenceRegion, InfluenceStack);
- }
-}
-
-void DivergencePropagator::exploreDataDependency(Value *V) {
- // Follow def-use chains of V.
- for (User *U : V->users()) {
- if (!TTI.isAlwaysUniform(U) && DV.insert(U).second)
- Worklist.push_back(U);
- }
-}
-
-void DivergencePropagator::propagate() {
- // Traverse the dependency graph using DFS.
- while (!Worklist.empty()) {
- Value *V = Worklist.back();
- Worklist.pop_back();
- if (Instruction *I = dyn_cast<Instruction>(V)) {
- // Terminators with less than two successors won't introduce sync
- // dependency. Ignore them.
- if (I->isTerminator() && I->getNumSuccessors() > 1)
- exploreSyncDependency(I);
- }
- exploreDataDependency(V);
- }
-}
-
-} // namespace
-
-// Register this pass.
-char LegacyDivergenceAnalysis::ID = 0;
-LegacyDivergenceAnalysis::LegacyDivergenceAnalysis() : FunctionPass(ID) {
- initializeLegacyDivergenceAnalysisPass(*PassRegistry::getPassRegistry());
-}
-INITIALIZE_PASS_BEGIN(LegacyDivergenceAnalysis, "divergence",
- "Legacy Divergence Analysis", false, true)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_END(LegacyDivergenceAnalysis, "divergence",
- "Legacy Divergence Analysis", false, true)
-
-FunctionPass *llvm::createLegacyDivergenceAnalysisPass() {
- return new LegacyDivergenceAnalysis();
-}
-
-bool LegacyDivergenceAnalysisImpl::shouldUseGPUDivergenceAnalysis(
- const Function &F, const TargetTransformInfo &TTI, const LoopInfo &LI) {
- if (!(UseGPUDA || TTI.useGPUDivergenceAnalysis()))
- return false;
-
- // GPUDivergenceAnalysis requires a reducible CFG.
- using RPOTraversal = ReversePostOrderTraversal<const Function *>;
- RPOTraversal FuncRPOT(&F);
- return !containsIrreducibleCFG<const BasicBlock *, const RPOTraversal,
- const LoopInfo>(FuncRPOT, LI);
-}
-
-void LegacyDivergenceAnalysisImpl::run(Function &F,
- llvm::TargetTransformInfo &TTI,
- llvm::DominatorTree &DT,
- llvm::PostDominatorTree &PDT,
- const llvm::LoopInfo &LI) {
- if (shouldUseGPUDivergenceAnalysis(F, TTI, LI)) {
- // run the new GPU divergence analysis
- gpuDA = std::make_unique<DivergenceInfo>(F, DT, PDT, LI, TTI,
- /* KnownReducible = */ true);
-
- } else {
- // run LLVM's existing DivergenceAnalysis
- DivergencePropagator DP(F, TTI, DT, PDT, DivergentValues, DivergentUses);
- DP.populateWithSourcesOfDivergence();
- DP.propagate();
- }
-}
-
-bool LegacyDivergenceAnalysisImpl::isDivergent(const Value *V) const {
- if (gpuDA) {
- return gpuDA->isDivergent(*V);
- }
- return DivergentValues.count(V);
-}
-
-bool LegacyDivergenceAnalysisImpl::isDivergentUse(const Use *U) const {
- if (gpuDA) {
- return gpuDA->isDivergentUse(*U);
- }
- return DivergentValues.count(U->get()) || DivergentUses.count(U);
-}
-
-void LegacyDivergenceAnalysisImpl::print(raw_ostream &OS,
- const Module *) const {
- if ((!gpuDA || !gpuDA->hasDivergence()) && DivergentValues.empty())
- return;
-
- const Function *F = nullptr;
- if (!DivergentValues.empty()) {
- const Value *FirstDivergentValue = *DivergentValues.begin();
- if (const Argument *Arg = dyn_cast<Argument>(FirstDivergentValue)) {
- F = Arg->getParent();
- } else if (const Instruction *I =
- dyn_cast<Instruction>(FirstDivergentValue)) {
- F = I->getParent()->getParent();
- } else {
- llvm_unreachable("Only arguments and instructions can be divergent");
- }
- } else if (gpuDA) {
- F = &gpuDA->getFunction();
- }
- if (!F)
- return;
-
- // Dumps all divergent values in F, arguments and then instructions.
- for (const auto &Arg : F->args()) {
- OS << (isDivergent(&Arg) ? "DIVERGENT: " : " ");
- OS << Arg << "\n";
- }
- // Iterate instructions using instructions() to ensure a deterministic order.
- for (const BasicBlock &BB : *F) {
- OS << "\n " << BB.getName() << ":\n";
- for (const auto &I : BB.instructionsWithoutDebug()) {
- OS << (isDivergent(&I) ? "DIVERGENT: " : " ");
- OS << I << "\n";
- }
- }
- OS << "\n";
-}
-
-void LegacyDivergenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequiredTransitive<DominatorTreeWrapperPass>();
- AU.addRequiredTransitive<PostDominatorTreeWrapperPass>();
- AU.addRequiredTransitive<LoopInfoWrapperPass>();
- AU.setPreservesAll();
-}
-
-bool LegacyDivergenceAnalysis::runOnFunction(Function &F) {
- auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
- if (TTIWP == nullptr)
- return false;
-
- TargetTransformInfo &TTI = TTIWP->getTTI(F);
- // Fast path: if the target does not have branch divergence, we do not mark
- // any branch as divergent.
- if (!TTI.hasBranchDivergence())
- return false;
-
- DivergentValues.clear();
- DivergentUses.clear();
- gpuDA = nullptr;
-
- auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
- auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- LegacyDivergenceAnalysisImpl::run(F, TTI, DT, PDT, LI);
- LLVM_DEBUG(dbgs() << "\nAfter divergence analysis on " << F.getName()
- << ":\n";
- LegacyDivergenceAnalysisImpl::print(dbgs(), F.getParent()));
-
- return false;
-}
-
-PreservedAnalyses
-LegacyDivergenceAnalysisPass::run(Function &F, FunctionAnalysisManager &AM) {
- auto &TTI = AM.getResult<TargetIRAnalysis>(F);
- if (!TTI.hasBranchDivergence())
- return PreservedAnalyses::all();
-
- DivergentValues.clear();
- DivergentUses.clear();
- gpuDA = nullptr;
-
- auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
- auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
- auto &LI = AM.getResult<LoopAnalysis>(F);
- LegacyDivergenceAnalysisImpl::run(F, TTI, DT, PDT, LI);
- LLVM_DEBUG(dbgs() << "\nAfter divergence analysis on " << F.getName()
- << ":\n";
- LegacyDivergenceAnalysisImpl::print(dbgs(), F.getParent()));
- return PreservedAnalyses::all();
-}
diff --git a/llvm/lib/Analysis/SyncDependenceAnalysis.cpp b/llvm/lib/Analysis/SyncDependenceAnalysis.cpp
deleted file mode 100644
index 17d7676024a5d..0000000000000
--- a/llvm/lib/Analysis/SyncDependenceAnalysis.cpp
+++ /dev/null
@@ -1,478 +0,0 @@
-//===--- SyncDependenceAnalysis.cpp - Compute Control Divergence Effects --===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements an algorithm that returns for a divergent branch
-// the set of basic blocks whose phi nodes become divergent due to divergent
-// control. These are the blocks that are reachable by two disjoint paths from
-// the branch or loop exits that have a reaching path that is disjoint from a
-// path to the loop latch.
-//
-// The SyncDependenceAnalysis is used in the DivergenceAnalysis to model
-// control-induced divergence in phi nodes.
-//
-//
-// -- Reference --
-// The algorithm is presented in Section 5 of
-//
-// An abstract interpretation for SPMD divergence
-// on reducible control flow graphs.
-// Julian Rosemann, Simon Moll and Sebastian Hack
-// POPL '21
-//
-//
-// -- Sync dependence --
-// Sync dependence characterizes the control flow aspect of the
-// propagation of branch divergence. For example,
-//
-// %cond = icmp slt i32 %tid, 10
-// br i1 %cond, label %then, label %else
-// then:
-// br label %merge
-// else:
-// br label %merge
-// merge:
-// %a = phi i32 [ 0, %then ], [ 1, %else ]
-//
-// Suppose %tid holds the thread ID. Although %a is not data dependent on %tid
-// because %tid is not on its use-def chains, %a is sync dependent on %tid
-// because the branch "br i1 %cond" depends on %tid and affects which value %a
-// is assigned to.
-//
-//
-// -- Reduction to SSA construction --
-// There are two disjoint paths from A to X, if a certain variant of SSA
-// construction places a phi node in X under the following set-up scheme.
-//
-// This variant of SSA construction ignores incoming undef values.
-// That is paths from the entry without a definition do not result in
-// phi nodes.
-//
-// entry
-// / \
-// A \
-// / \ Y
-// B C /
-// \ / \ /
-// D E
-// \ /
-// F
-//
-// Assume that A contains a divergent branch. We are interested
-// in the set of all blocks where each block is reachable from A
-// via two disjoint paths. This would be the set {D, F} in this
-// case.
-// To generally reduce this query to SSA construction we introduce
-// a virtual variable x and assign to x
diff erent values in each
-// successor block of A.
-//
-// entry
-// / \
-// A \
-// / \ Y
-// x = 0 x = 1 /
-// \ / \ /
-// D E
-// \ /
-// F
-//
-// Our flavor of SSA construction for x will construct the following
-//
-// entry
-// / \
-// A \
-// / \ Y
-// x0 = 0 x1 = 1 /
-// \ / \ /
-// x2 = phi E
-// \ /
-// x3 = phi
-//
-// The blocks D and F contain phi nodes and are thus each reachable
-// by two disjoins paths from A.
-//
-// -- Remarks --
-// * In case of loop exits we need to check the disjoint path criterion for loops.
-// To this end, we check whether the definition of x
diff ers between the
-// loop exit and the loop header (_after_ SSA construction).
-//
-// -- Known Limitations & Future Work --
-// * The algorithm requires reducible loops because the implementation
-// implicitly performs a single iteration of the underlying data flow analysis.
-// This was done for pragmatism, simplicity and speed.
-//
-// Relevant related work for extending the algorithm to irreducible control:
-// A simple algorithm for global data flow analysis problems.
-// Matthew S. Hecht and Jeffrey D. Ullman.
-// SIAM Journal on Computing, 4(4):519–532, December 1975.
-//
-// * Another reason for requiring reducible loops is that points of
-// synchronization in irreducible loops aren't 'obvious' - there is no unique
-// header where threads 'should' synchronize when entering or coming back
-// around from the latch.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/SyncDependenceAnalysis.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CFG.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
-
-#include <functional>
-
-#define DEBUG_TYPE "sync-dependence"
-
-// The SDA algorithm operates on a modified CFG - we modify the edges leaving
-// loop headers as follows:
-//
-// * We remove all edges leaving all loop headers.
-// * We add additional edges from the loop headers to their exit blocks.
-//
-// The modification is virtual, that is whenever we visit a loop header we
-// pretend it had
diff erent successors.
-namespace {
-using namespace llvm;
-
-// Custom Post-Order Traveral
-//
-// We cannot use the vanilla (R)PO computation of LLVM because:
-// * We (virtually) modify the CFG.
-// * We want a loop-compact block enumeration, that is the numbers assigned to
-// blocks of a loop form an interval
-//
-using POCB = std::function<void(const BasicBlock &)>;
-using VisitedSet = std::set<const BasicBlock *>;
-using BlockStack = std::vector<const BasicBlock *>;
-
-// forward
-static void computeLoopPO(const LoopInfo &LI, Loop &Loop, POCB CallBack,
- VisitedSet &Finalized);
-
-// for a nested region (top-level loop or nested loop)
-static void computeStackPO(BlockStack &Stack, const LoopInfo &LI, Loop *Loop,
- POCB CallBack, VisitedSet &Finalized) {
- const auto *LoopHeader = Loop ? Loop->getHeader() : nullptr;
- while (!Stack.empty()) {
- const auto *NextBB = Stack.back();
-
- auto *NestedLoop = LI.getLoopFor(NextBB);
- bool IsNestedLoop = NestedLoop != Loop;
-
- // Treat the loop as a node
- if (IsNestedLoop) {
- SmallVector<BasicBlock *, 3> NestedExits;
- NestedLoop->getUniqueExitBlocks(NestedExits);
- bool PushedNodes = false;
- for (const auto *NestedExitBB : NestedExits) {
- if (NestedExitBB == LoopHeader)
- continue;
- if (Loop && !Loop->contains(NestedExitBB))
- continue;
- if (Finalized.count(NestedExitBB))
- continue;
- PushedNodes = true;
- Stack.push_back(NestedExitBB);
- }
- if (!PushedNodes) {
- // All loop exits finalized -> finish this node
- Stack.pop_back();
- computeLoopPO(LI, *NestedLoop, CallBack, Finalized);
- }
- continue;
- }
-
- // DAG-style
- bool PushedNodes = false;
- for (const auto *SuccBB : successors(NextBB)) {
- if (SuccBB == LoopHeader)
- continue;
- if (Loop && !Loop->contains(SuccBB))
- continue;
- if (Finalized.count(SuccBB))
- continue;
- PushedNodes = true;
- Stack.push_back(SuccBB);
- }
- if (!PushedNodes) {
- // Never push nodes twice
- Stack.pop_back();
- if (!Finalized.insert(NextBB).second)
- continue;
- CallBack(*NextBB);
- }
- }
-}
-
-static void computeTopLevelPO(Function &F, const LoopInfo &LI, POCB CallBack) {
- VisitedSet Finalized;
- BlockStack Stack;
- Stack.reserve(24); // FIXME made-up number
- Stack.push_back(&F.getEntryBlock());
- computeStackPO(Stack, LI, nullptr, CallBack, Finalized);
-}
-
-static void computeLoopPO(const LoopInfo &LI, Loop &Loop, POCB CallBack,
- VisitedSet &Finalized) {
- /// Call CallBack on all loop blocks.
- std::vector<const BasicBlock *> Stack;
- const auto *LoopHeader = Loop.getHeader();
-
- // Visit the header last
- Finalized.insert(LoopHeader);
- CallBack(*LoopHeader);
-
- // Initialize with immediate successors
- for (const auto *BB : successors(LoopHeader)) {
- if (!Loop.contains(BB))
- continue;
- if (BB == LoopHeader)
- continue;
- Stack.push_back(BB);
- }
-
- // Compute PO inside region
- computeStackPO(Stack, LI, &Loop, CallBack, Finalized);
-}
-
-} // namespace
-
-namespace llvm {
-
-ControlDivergenceDesc SyncDependenceAnalysis::EmptyDivergenceDesc;
-
-SyncDependenceAnalysis::SyncDependenceAnalysis(const DominatorTree &DT,
- const PostDominatorTree &PDT,
- const LoopInfo &LI)
- : DT(DT), PDT(PDT), LI(LI) {
- computeTopLevelPO(*DT.getRoot()->getParent(), LI,
- [&](const BasicBlock &BB) { LoopPO.appendBlock(BB); });
-}
-
-SyncDependenceAnalysis::~SyncDependenceAnalysis() = default;
-
-namespace {
-// divergence propagator for reducible CFGs
-struct DivergencePropagator {
- const ModifiedPO &LoopPOT;
- const DominatorTree &DT;
- const PostDominatorTree &PDT;
- const LoopInfo &LI;
- const BasicBlock &DivTermBlock;
-
- // * if BlockLabels[IndexOf(B)] == C then C is the dominating definition at
- // block B
- // * if BlockLabels[IndexOf(B)] ~ undef then we haven't seen B yet
- // * if BlockLabels[IndexOf(B)] == B then B is a join point of disjoint paths
- // from X or B is an immediate successor of X (initial value).
- using BlockLabelVec = std::vector<const BasicBlock *>;
- BlockLabelVec BlockLabels;
- // divergent join and loop exit descriptor.
- std::unique_ptr<ControlDivergenceDesc> DivDesc;
-
- DivergencePropagator(const ModifiedPO &LoopPOT, const DominatorTree &DT,
- const PostDominatorTree &PDT, const LoopInfo &LI,
- const BasicBlock &DivTermBlock)
- : LoopPOT(LoopPOT), DT(DT), PDT(PDT), LI(LI), DivTermBlock(DivTermBlock),
- BlockLabels(LoopPOT.size(), nullptr),
- DivDesc(new ControlDivergenceDesc) {}
-
- void printDefs(raw_ostream &Out) {
- Out << "Propagator::BlockLabels {\n";
- for (int BlockIdx = (int)BlockLabels.size() - 1; BlockIdx > 0; --BlockIdx) {
- const auto *Label = BlockLabels[BlockIdx];
- Out << LoopPOT.getBlockAt(BlockIdx)->getName().str() << "(" << BlockIdx
- << ") : ";
- if (!Label) {
- Out << "<null>\n";
- } else {
- Out << Label->getName() << "\n";
- }
- }
- Out << "}\n";
- }
-
- // Push a definition (\p PushedLabel) to \p SuccBlock and return whether this
- // causes a divergent join.
- bool computeJoin(const BasicBlock &SuccBlock, const BasicBlock &PushedLabel) {
- auto SuccIdx = LoopPOT.getIndexOf(SuccBlock);
-
- // unset or same reaching label
- const auto *OldLabel = BlockLabels[SuccIdx];
- if (!OldLabel || (OldLabel == &PushedLabel)) {
- BlockLabels[SuccIdx] = &PushedLabel;
- return false;
- }
-
- // Update the definition
- BlockLabels[SuccIdx] = &SuccBlock;
- return true;
- }
-
- // visiting a virtual loop exit edge from the loop header --> temporal
- // divergence on join
- bool visitLoopExitEdge(const BasicBlock &ExitBlock,
- const BasicBlock &DefBlock, bool FromParentLoop) {
- // Pushing from a non-parent loop cannot cause temporal divergence.
- if (!FromParentLoop)
- return visitEdge(ExitBlock, DefBlock);
-
- if (!computeJoin(ExitBlock, DefBlock))
- return false;
-
- // Identified a divergent loop exit
- DivDesc->LoopDivBlocks.insert(&ExitBlock);
- LLVM_DEBUG(dbgs() << "\tDivergent loop exit: " << ExitBlock.getName()
- << "\n");
- return true;
- }
-
- // process \p SuccBlock with reaching definition \p DefBlock
- bool visitEdge(const BasicBlock &SuccBlock, const BasicBlock &DefBlock) {
- if (!computeJoin(SuccBlock, DefBlock))
- return false;
-
- // Divergent, disjoint paths join.
- DivDesc->JoinDivBlocks.insert(&SuccBlock);
- LLVM_DEBUG(dbgs() << "\tDivergent join: " << SuccBlock.getName());
- return true;
- }
-
- std::unique_ptr<ControlDivergenceDesc> computeJoinPoints() {
- assert(DivDesc);
-
- LLVM_DEBUG(dbgs() << "SDA:computeJoinPoints: " << DivTermBlock.getName()
- << "\n");
-
- const auto *DivBlockLoop = LI.getLoopFor(&DivTermBlock);
-
- // Early stopping criterion
- int FloorIdx = LoopPOT.size() - 1;
- const BasicBlock *FloorLabel = nullptr;
-
- // bootstrap with branch targets
- int BlockIdx = 0;
-
- for (const auto *SuccBlock : successors(&DivTermBlock)) {
- auto SuccIdx = LoopPOT.getIndexOf(*SuccBlock);
- BlockLabels[SuccIdx] = SuccBlock;
-
- // Find the successor with the highest index to start with
- BlockIdx = std::max<int>(BlockIdx, SuccIdx);
- FloorIdx = std::min<int>(FloorIdx, SuccIdx);
-
- // Identify immediate divergent loop exits
- if (!DivBlockLoop)
- continue;
-
- const auto *BlockLoop = LI.getLoopFor(SuccBlock);
- if (BlockLoop && DivBlockLoop->contains(BlockLoop))
- continue;
- DivDesc->LoopDivBlocks.insert(SuccBlock);
- LLVM_DEBUG(dbgs() << "\tImmediate divergent loop exit: "
- << SuccBlock->getName() << "\n");
- }
-
- // propagate definitions at the immediate successors of the node in RPO
- for (; BlockIdx >= FloorIdx; --BlockIdx) {
- LLVM_DEBUG(dbgs() << "Before next visit:\n"; printDefs(dbgs()));
-
- // Any label available here
- const auto *Label = BlockLabels[BlockIdx];
- if (!Label)
- continue;
-
- // Ok. Get the block
- const auto *Block = LoopPOT.getBlockAt(BlockIdx);
- LLVM_DEBUG(dbgs() << "SDA::joins. visiting " << Block->getName() << "\n");
-
- auto *BlockLoop = LI.getLoopFor(Block);
- bool IsLoopHeader = BlockLoop && BlockLoop->getHeader() == Block;
- bool CausedJoin = false;
- int LoweredFloorIdx = FloorIdx;
- if (IsLoopHeader) {
- // Disconnect from immediate successors and propagate directly to loop
- // exits.
- SmallVector<BasicBlock *, 4> BlockLoopExits;
- BlockLoop->getExitBlocks(BlockLoopExits);
-
- bool IsParentLoop = BlockLoop->contains(&DivTermBlock);
- for (const auto *BlockLoopExit : BlockLoopExits) {
- CausedJoin |= visitLoopExitEdge(*BlockLoopExit, *Label, IsParentLoop);
- LoweredFloorIdx = std::min<int>(LoweredFloorIdx,
- LoopPOT.getIndexOf(*BlockLoopExit));
- }
- } else {
- // Acyclic successor case
- for (const auto *SuccBlock : successors(Block)) {
- CausedJoin |= visitEdge(*SuccBlock, *Label);
- LoweredFloorIdx =
- std::min<int>(LoweredFloorIdx, LoopPOT.getIndexOf(*SuccBlock));
- }
- }
-
- // Floor update
- if (CausedJoin) {
- // 1. Different labels pushed to successors
- FloorIdx = LoweredFloorIdx;
- } else if (FloorLabel != Label) {
- // 2. No join caused BUT we pushed a label that is
diff erent than the
- // last pushed label
- FloorIdx = LoweredFloorIdx;
- FloorLabel = Label;
- }
- }
-
- LLVM_DEBUG(dbgs() << "SDA::joins. After propagation:\n"; printDefs(dbgs()));
-
- return std::move(DivDesc);
- }
-};
-} // end anonymous namespace
-
-#ifndef NDEBUG
-static void printBlockSet(ConstBlockSet &Blocks, raw_ostream &Out) {
- Out << "[";
- ListSeparator LS;
- for (const auto *BB : Blocks)
- Out << LS << BB->getName();
- Out << "]";
-}
-#endif
-
-const ControlDivergenceDesc &
-SyncDependenceAnalysis::getJoinBlocks(const Instruction &Term) {
- // trivial case
- if (Term.getNumSuccessors() <= 1) {
- return EmptyDivergenceDesc;
- }
-
- // already available in cache?
- auto ItCached = CachedControlDivDescs.find(&Term);
- if (ItCached != CachedControlDivDescs.end())
- return *ItCached->second;
-
- // compute all join points
- // Special handling of divergent loop exits is not needed for LCSSA
- const auto &TermBlock = *Term.getParent();
- DivergencePropagator Propagator(LoopPO, DT, PDT, LI, TermBlock);
- auto DivDesc = Propagator.computeJoinPoints();
-
- LLVM_DEBUG(dbgs() << "Result (" << Term.getParent()->getName() << "):\n";
- dbgs() << "JoinDivBlocks: ";
- printBlockSet(DivDesc->JoinDivBlocks, dbgs());
- dbgs() << "\nLoopDivBlocks: ";
- printBlockSet(DivDesc->LoopDivBlocks, dbgs()); dbgs() << "\n";);
-
- auto ItInserted = CachedControlDivDescs.emplace(&Term, std::move(DivDesc));
- assert(ItInserted.second);
- return *ItInserted.first->second;
-}
-
-} // namespace llvm
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 46e77d477c233..92199eb33c4cd 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -262,10 +262,6 @@ bool TargetTransformInfo::hasBranchDivergence() const {
return TTIImpl->hasBranchDivergence();
}
-bool TargetTransformInfo::useGPUDivergenceAnalysis() const {
- return TTIImpl->useGPUDivergenceAnalysis();
-}
-
bool TargetTransformInfo::isSourceOfDivergence(const Value *V) const {
return TTIImpl->isSourceOfDivergence(V);
}
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index a04f8bbaa5dc0..a7d65aa8f5088 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -34,7 +34,6 @@
#include "llvm/Analysis/Delinearization.h"
#include "llvm/Analysis/DemandedBits.h"
#include "llvm/Analysis/DependenceAnalysis.h"
-#include "llvm/Analysis/DivergenceAnalysis.h"
#include "llvm/Analysis/DomPrinter.h"
#include "llvm/Analysis/DominanceFrontier.h"
#include "llvm/Analysis/FunctionPropertiesAnalysis.h"
@@ -46,7 +45,6 @@
#include "llvm/Analysis/InstCount.h"
#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/LazyValueInfo.h"
-#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/Lint.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopCacheAnalysis.h"
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 82592a1ee9b55..891221d534526 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -249,7 +249,6 @@ FUNCTION_ANALYSIS("targetir",
TM ? TM->getTargetIRAnalysis() : TargetIRAnalysis())
FUNCTION_ANALYSIS("verify", VerifierAnalysis())
FUNCTION_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC))
-FUNCTION_ANALYSIS("divergence", DivergenceAnalysis())
FUNCTION_ANALYSIS("uniformity", UniformityInfoAnalysis())
#ifndef FUNCTION_ALIAS_ANALYSIS
@@ -317,7 +316,6 @@ FUNCTION_PASS("libcalls-shrinkwrap", LibCallsShrinkWrapPass())
FUNCTION_PASS("lint", LintPass())
FUNCTION_PASS("inject-tli-mappings", InjectTLIMappings())
FUNCTION_PASS("instnamer", InstructionNamerPass())
-FUNCTION_PASS("legacy-divergence-analysis", LegacyDivergenceAnalysisPass())
FUNCTION_PASS("loweratomic", LowerAtomicPass())
FUNCTION_PASS("lower-expect", LowerExpectIntrinsicPass())
FUNCTION_PASS("lower-guard-intrinsic", LowerGuardIntrinsicPass())
@@ -357,7 +355,6 @@ FUNCTION_PASS("print<branch-prob>", BranchProbabilityPrinterPass(dbgs()))
FUNCTION_PASS("print<cost-model>", CostModelPrinterPass(dbgs()))
FUNCTION_PASS("print<cycles>", CycleInfoPrinterPass(dbgs()))
FUNCTION_PASS("print<da>", DependenceAnalysisPrinterPass(dbgs()))
-FUNCTION_PASS("print<divergence>", DivergenceAnalysisPrinterPass(dbgs()))
FUNCTION_PASS("print<domtree>", DominatorTreePrinterPass(dbgs()))
FUNCTION_PASS("print<postdomtree>", PostDominatorTreePrinterPass(dbgs()))
FUNCTION_PASS("print<delinearization>", DelinearizationPrinterPass(dbgs()))
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/read_register.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/read_register.ll
deleted file mode 100644
index 91e5d588710ab..0000000000000
--- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/read_register.ll
+++ /dev/null
@@ -1,142 +0,0 @@
-; RUN: opt -mtriple amdgcn-unknown-amdhsa -mcpu=gfx90a -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
-
-; CHECK-LABEL: Divergence Analysis' for function 'read_register_exec':
-; CHECK-NOT: DIVERGENT
-define i64 @read_register_exec() {
- %reg = call i64 @llvm.read_register.i64(metadata !0)
- ret i64 %reg
-}
-
-; CHECK-LABEL: Divergence Analysis' for function 'read_register_m0':
-; CHECK-NOT: DIVERGENT
-define i32 @read_register_m0() {
- %reg = call i32 @llvm.read_register.i32(metadata !1)
- ret i32 %reg
-}
-
-; CHECK-LABEL: Divergence Analysis' for function 'read_register_s17':
-; CHECK-NOT: DIVERGENT
-define i32 @read_register_s17() {
- %reg = call i32 @llvm.read_register.i32(metadata !2)
- ret i32 %reg
-}
-
-; CHECK-LABEL: Divergence Analysis' for function 'read_register_s17_i17':
-; CHECK-NOT: DIVERGENT
-define i17 @read_register_s17_i17() {
- %reg = call i17 @llvm.read_register.i17(metadata !2)
- ret i17 %reg
-}
-
-; CHECK-LABEL: Divergence Analysis' for function 'read_register_v0':
-; CHECK: DIVERGENT
-define i32 @read_register_v0() {
- %reg = call i32 @llvm.read_register.i32(metadata !3)
- ret i32 %reg
-}
-
-; CHECK-LABEL: Divergence Analysis' for function 'read_register_v0_v1':
-; CHECK: DIVERGENT
-define i64 @read_register_v0_v1() {
- %reg = call i64 @llvm.read_register.i64(metadata !4)
- ret i64 %reg
-}
-
-; CHECK-LABEL: Divergence Analysis' for function 'read_register_a0':
-; CHECK: DIVERGENT
-define i32 @read_register_a0() {
- %reg = call i32 @llvm.read_register.i32(metadata !5)
- ret i32 %reg
-}
-
-; CHECK-LABEL: Divergence Analysis' for function 'read_register_a0_a1':
-; CHECK: DIVERGENT
-define i64 @read_register_a0_a1() {
- %reg = call i64 @llvm.read_register.i64(metadata !6)
- ret i64 %reg
-}
-
-; CHECK-LABEL: Divergence Analysis' for function 'read_register_vcc_i64':
-; CHECK-NOT: DIVERGENT
-define i64 @read_register_vcc_i64() {
- %reg = call i64 @llvm.read_register.i64(metadata !7)
- ret i64 %reg
-}
-
-; CHECK-LABEL: Divergence Analysis' for function 'read_register_vcc_i1':
-; CHECK: DIVERGENT
-define i1 @read_register_vcc_i1() {
- %reg = call i1 @llvm.read_register.i1(metadata !7)
- ret i1 %reg
-}
-
-; CHECK-LABEL: Divergence Analysis' for function 'read_register_invalid_reg':
-; CHECK-NOT: DIVERGENT
-define i64 @read_register_invalid_reg() {
- %reg = call i64 @llvm.read_register.i64(metadata !8)
- ret i64 %reg
-}
-
-; CHECK-LABEL: Divergence Analysis' for function 'read_register_flat_scratch':
-; CHECK-NOT: DIVERGENT
-define i32 @read_register_flat_scratch() {
- %reg = call i32 @llvm.read_register.i32(metadata !9)
- ret i32 %reg
-}
-
-; CHECK-LABEL: Divergence Analysis' for function 'read_register_vcc_lo_i32':
-; CHECK-NOT: DIVERGENT
-define i32 @read_register_vcc_lo_i32() {
- %reg = call i32 @llvm.read_register.i32(metadata !10)
- ret i32 %reg
-}
-
-; CHECK-LABEL: Divergence Analysis' for function 'read_register_vcc_hi_i32':
-; CHECK-NOT: DIVERGENT
-define i32 @read_register_vcc_hi_i32() {
- %reg = call i32 @llvm.read_register.i32(metadata !11)
- ret i32 %reg
-}
-
-; CHECK-LABEL: Divergence Analysis' for function 'read_register_exec_lo_i32':
-; CHECK-NOT: DIVERGENT
-define i32 @read_register_exec_lo_i32() {
- %reg = call i32 @llvm.read_register.i32(metadata !12)
- ret i32 %reg
-}
-
-; CHECK-LABEL: Divergence Analysis' for function 'read_register_exec_hi_i32':
-; CHECK-NOT: DIVERGENT
-define i32 @read_register_exec_hi_i32() {
- %reg = call i32 @llvm.read_register.i32(metadata !13)
- ret i32 %reg
-}
-
-; FIXME: Why does the verifier allow this?
-; CHECK-LABEL: Divergence Analysis' for function 'read_register_empty_str_i32':
-; CHECK-NOT: DIVERGENT
-define i32 @read_register_empty_str_i32() {
- %reg = call i32 @llvm.read_register.i32(metadata !14)
- ret i32 %reg
-}
-
-declare i64 @llvm.read_register.i64(metadata)
-declare i32 @llvm.read_register.i32(metadata)
-declare i17 @llvm.read_register.i17(metadata)
-declare i1 @llvm.read_register.i1(metadata)
-
-!0 = !{!"exec"}
-!1 = !{!"m0"}
-!2 = !{!"s17"}
-!3 = !{!"v0"}
-!4 = !{!"v[0:1]"}
-!5 = !{!"a0"}
-!6 = !{!"a[0:1]"}
-!7 = !{!"vcc"}
-!8 = !{!"not a register"}
-!9 = !{!"flat_scratch"}
-!10 = !{!"vcc_lo"}
-!11 = !{!"vcc_hi"}
-!12 = !{!"exec_lo"}
-!13 = !{!"exec_hi"}
-!14 = !{!""}
diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/atomics.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/atomics.ll
deleted file mode 100644
index 9aa482dfb899c..0000000000000
--- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/atomics.ll
+++ /dev/null
@@ -1,57 +0,0 @@
-; RUN: opt -mtriple=amdgcn-- -passes='print<divergence>' 2>&1 -disable-output %s | FileCheck %s
-
-; CHECK: DIVERGENT: %orig = atomicrmw xchg ptr %ptr, i32 %val seq_cst
-define amdgpu_kernel void @test1(ptr %ptr, i32 %val) #0 {
- %orig = atomicrmw xchg ptr %ptr, i32 %val seq_cst
- store i32 %orig, ptr %ptr
- ret void
-}
-
-; CHECK: DIVERGENT: %orig = cmpxchg ptr %ptr, i32 %cmp, i32 %new seq_cst seq_cst
-define amdgpu_kernel void @test2(ptr %ptr, i32 %cmp, i32 %new) {
- %orig = cmpxchg ptr %ptr, i32 %cmp, i32 %new seq_cst seq_cst
- %val = extractvalue { i32, i1 } %orig, 0
- store i32 %val, ptr %ptr
- ret void
-}
-
-; CHECK: DIVERGENT: %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %ptr, i32 %val, i32 0, i32 0, i1 false)
-define i32 @test_atomic_inc_i32(ptr addrspace(1) %ptr, i32 %val) #0 {
- %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %ptr, i32 %val, i32 0, i32 0, i1 false)
- ret i32 %ret
-}
-
-; CHECK: DIVERGENT: %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p1(ptr addrspace(1) %ptr, i64 %val, i32 0, i32 0, i1 false)
-define i64 @test_atomic_inc_i64(ptr addrspace(1) %ptr, i64 %val) #0 {
- %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p1(ptr addrspace(1) %ptr, i64 %val, i32 0, i32 0, i1 false)
- ret i64 %ret
-}
-
-; CHECK: DIVERGENT: %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %ptr, i32 %val, i32 0, i32 0, i1 false)
-define i32 @test_atomic_dec_i32(ptr addrspace(1) %ptr, i32 %val) #0 {
- %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %ptr, i32 %val, i32 0, i32 0, i1 false)
- ret i32 %ret
-}
-
-; CHECK: DIVERGENT: %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p1(ptr addrspace(1) %ptr, i64 %val, i32 0, i32 0, i1 false)
-define i64 @test_atomic_dec_i64(ptr addrspace(1) %ptr, i64 %val) #0 {
- %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p1(ptr addrspace(1) %ptr, i64 %val, i32 0, i32 0, i1 false)
- ret i64 %ret
-}
-
-declare i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) nocapture, i32, i32, i32, i1) #1
-declare i64 @llvm.amdgcn.atomic.inc.i64.p1(ptr addrspace(1) nocapture, i64, i32, i32, i1) #1
-declare i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) nocapture, i32, i32, i32, i1) #1
-declare i64 @llvm.amdgcn.atomic.dec.i64.p1(ptr addrspace(1) nocapture, i64, i32, i32, i1) #1
-
-; CHECK: DIVERGENT: %ret = call i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) %ptr, i32 %val)
-define amdgpu_kernel void @test_atomic_csub_i32(ptr addrspace(1) %ptr, i32 %val) #0 {
- %ret = call i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) %ptr, i32 %val)
- store i32 %ret, ptr addrspace(1) %ptr, align 4
- ret void
-}
-
-declare i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) nocapture, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { argmemonly nounwind willreturn }
diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/intrinsics.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/intrinsics.ll
deleted file mode 100644
index fffdd9dc1b790..0000000000000
--- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/intrinsics.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: opt -mtriple=amdgcn-- -passes='print<divergence>' 2>&1 -disable-output %s | FileCheck %s
-
-; CHECK: DIVERGENT: %swizzle = call i32 @llvm.amdgcn.ds.swizzle(i32 %src, i32 100) #0
-define amdgpu_kernel void @ds_swizzle(ptr addrspace(1) %out, i32 %src) #0 {
- %swizzle = call i32 @llvm.amdgcn.ds.swizzle(i32 %src, i32 100) #0
- store i32 %swizzle, ptr addrspace(1) %out, align 4
- ret void
-}
-
-declare i32 @llvm.amdgcn.ds.swizzle(i32, i32) #1
-
-attributes #0 = { nounwind convergent }
-attributes #1 = { nounwind readnone convergent }
diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/kernel-args.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/kernel-args.ll
deleted file mode 100644
index 4ecb5e5c4faae..0000000000000
--- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/kernel-args.ll
+++ /dev/null
@@ -1,39 +0,0 @@
-; RUN: opt %s -mtriple amdgcn-- -passes='print<divergence>' 2>&1 -disable-output | FileCheck %s
-
-; CHECK-LABEL: function 'test_amdgpu_ps':
-; CHECK: DIVERGENT: ptr addrspace(4) %arg0
-; CHECK-NOT: DIVERGENT
-; CHECK: DIVERGENT: <2 x i32> %arg3
-; CHECK: DIVERGENT: <3 x i32> %arg4
-; CHECK: DIVERGENT: float %arg5
-; CHECK: DIVERGENT: i32 %arg6
-
-define amdgpu_ps void @test_amdgpu_ps(ptr addrspace(4) byref([4 x <16 x i8>]) %arg0, float inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <3 x i32> %arg4, float %arg5, i32 %arg6) #0 {
- ret void
-}
-
-; CHECK-LABEL: function 'test_amdgpu_kernel':
-; CHECK-NOT: %arg0
-; CHECK-NOT: %arg1
-; CHECK-NOT: %arg2
-; CHECK-NOT: %arg3
-; CHECK-NOT: %arg4
-; CHECK-NOT: %arg5
-; CHECK-NOT: %arg6
-define amdgpu_kernel void @test_amdgpu_kernel(ptr addrspace(4) byref([4 x <16 x i8>]) %arg0, float inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <3 x i32> %arg4, float %arg5, i32 %arg6) #0 {
- ret void
-}
-
-; CHECK-LABEL: function 'test_c':
-; CHECK: DIVERGENT:
-; CHECK: DIVERGENT:
-; CHECK: DIVERGENT:
-; CHECK: DIVERGENT:
-; CHECK: DIVERGENT:
-; CHECK: DIVERGENT:
-; CHECK: DIVERGENT:
-define void @test_c(ptr addrspace(4) byval([4 x <16 x i8>]) %arg0, float inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <3 x i32> %arg4, float %arg5, i32 %arg6) #0 {
- ret void
-}
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/lit.local.cfg b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/lit.local.cfg
deleted file mode 100644
index 2a665f06be72e..0000000000000
--- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/lit.local.cfg
+++ /dev/null
@@ -1,2 +0,0 @@
-if not 'AMDGPU' in config.root.targets:
- config.unsupported = True
diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll
deleted file mode 100644
index 62f046c2b2f29..0000000000000
--- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll
+++ /dev/null
@@ -1,103 +0,0 @@
-;RUN: opt -mtriple=amdgcn-mesa-mesa3d -passes='print<divergence>' 2>&1 -disable-output %s | FileCheck %s
-
-;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.swap.i32(
-define float @buffer_atomic_swap(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
-main_body:
- %orig = call i32 @llvm.amdgcn.buffer.atomic.swap.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
- %r = bitcast i32 %orig to float
- ret float %r
-}
-
-;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.add.i32(
-define float @buffer_atomic_add(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
-main_body:
- %orig = call i32 @llvm.amdgcn.buffer.atomic.add.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
- %r = bitcast i32 %orig to float
- ret float %r
-}
-
-;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.sub.i32(
-define float @buffer_atomic_sub(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
-main_body:
- %orig = call i32 @llvm.amdgcn.buffer.atomic.sub.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
- %r = bitcast i32 %orig to float
- ret float %r
-}
-
-;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.smin.i32(
-define float @buffer_atomic_smin(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
-main_body:
- %orig = call i32 @llvm.amdgcn.buffer.atomic.smin.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
- %r = bitcast i32 %orig to float
- ret float %r
-}
-
-;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.umin.i32(
-define float @buffer_atomic_umin(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
-main_body:
- %orig = call i32 @llvm.amdgcn.buffer.atomic.umin.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
- %r = bitcast i32 %orig to float
- ret float %r
-}
-
-;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.smax.i32(
-define float @buffer_atomic_smax(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
-main_body:
- %orig = call i32 @llvm.amdgcn.buffer.atomic.smax.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
- %r = bitcast i32 %orig to float
- ret float %r
-}
-
-;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.umax.i32(
-define float @buffer_atomic_umax(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
-main_body:
- %orig = call i32 @llvm.amdgcn.buffer.atomic.umax.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
- %r = bitcast i32 %orig to float
- ret float %r
-}
-
-;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.and.i32(
-define float @buffer_atomic_and(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
-main_body:
- %orig = call i32 @llvm.amdgcn.buffer.atomic.and.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
- %r = bitcast i32 %orig to float
- ret float %r
-}
-
-;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.or.i32(
-define float @buffer_atomic_or(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
-main_body:
- %orig = call i32 @llvm.amdgcn.buffer.atomic.or.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
- %r = bitcast i32 %orig to float
- ret float %r
-}
-
-;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.xor.i32(
-define float @buffer_atomic_xor(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
-main_body:
- %orig = call i32 @llvm.amdgcn.buffer.atomic.xor.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
- %r = bitcast i32 %orig to float
- ret float %r
-}
-
-;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.cmpswap(
-define float @buffer_atomic_cmpswap(<4 x i32> inreg %rsrc, i32 inreg %data, i32 inreg %cmp) #0 {
-main_body:
- %orig = call i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32 %data, i32 %cmp, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
- %r = bitcast i32 %orig to float
- ret float %r
-}
-
-declare i32 @llvm.amdgcn.buffer.atomic.swap.i32(i32, <4 x i32>, i32, i32, i1) #0
-declare i32 @llvm.amdgcn.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i1) #0
-declare i32 @llvm.amdgcn.buffer.atomic.sub.i32(i32, <4 x i32>, i32, i32, i1) #0
-declare i32 @llvm.amdgcn.buffer.atomic.smin.i32(i32, <4 x i32>, i32, i32, i1) #0
-declare i32 @llvm.amdgcn.buffer.atomic.umin.i32(i32, <4 x i32>, i32, i32, i1) #0
-declare i32 @llvm.amdgcn.buffer.atomic.smax.i32(i32, <4 x i32>, i32, i32, i1) #0
-declare i32 @llvm.amdgcn.buffer.atomic.umax.i32(i32, <4 x i32>, i32, i32, i1) #0
-declare i32 @llvm.amdgcn.buffer.atomic.and.i32(i32, <4 x i32>, i32, i32, i1) #0
-declare i32 @llvm.amdgcn.buffer.atomic.or.i32(i32, <4 x i32>, i32, i32, i1) #0
-declare i32 @llvm.amdgcn.buffer.atomic.xor.i32(i32, <4 x i32>, i32, i32, i1) #0
-declare i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32, i32, <4 x i32>, i32, i32, i1) #0
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll
deleted file mode 100644
index b6ead36f70b94..0000000000000
--- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll
+++ /dev/null
@@ -1,131 +0,0 @@
-;RUN: opt -mtriple=amdgcn-mesa-mesa3d -passes='print<divergence>' 2>&1 -disable-output %s | FileCheck %s
-
-;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(
-define float @image_atomic_swap(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
-main_body:
- %orig = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
- %r = bitcast i32 %orig to float
- ret float %r
-}
-
-;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(
-define float @image_atomic_add(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
-main_body:
- %orig = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
- %r = bitcast i32 %orig to float
- ret float %r
-}
-
-;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(
-define float @image_atomic_sub(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
-main_body:
- %orig = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
- %r = bitcast i32 %orig to float
- ret float %r
-}
-
-;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(
-define float @image_atomic_smin(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
-main_body:
- %orig = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
- %r = bitcast i32 %orig to float
- ret float %r
-}
-
-;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(
-define float @image_atomic_umin(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
-main_body:
- %orig = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
- %r = bitcast i32 %orig to float
- ret float %r
-}
-
-;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(
-define float @image_atomic_smax(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
-main_body:
- %orig = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
- %r = bitcast i32 %orig to float
- ret float %r
-}
-
-;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(
-define float @image_atomic_umax(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
-main_body:
- %orig = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
- %r = bitcast i32 %orig to float
- ret float %r
-}
-
-;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(
-define float @image_atomic_and(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
-main_body:
- %orig = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
- %r = bitcast i32 %orig to float
- ret float %r
-}
-
-;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(
-define float @image_atomic_or(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
-main_body:
- %orig = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
- %r = bitcast i32 %orig to float
- ret float %r
-}
-
-;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(
-define float @image_atomic_xor(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
-main_body:
- %orig = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
- %r = bitcast i32 %orig to float
- ret float %r
-}
-
-;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(
-define float @image_atomic_inc(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
-main_body:
- %orig = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
- %r = bitcast i32 %orig to float
- ret float %r
-}
-
-;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(
-define float @image_atomic_dec(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
-main_body:
- %orig = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
- %r = bitcast i32 %orig to float
- ret float %r
-}
-
-;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(
-define float @image_atomic_cmpswap(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data, i32 inreg %cmp) #0 {
-main_body:
- %orig = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %data, i32 %cmp, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
- %r = bitcast i32 %orig to float
- ret float %r
-}
-
-;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(
-define float @image_atomic_add_2d(<8 x i32> inreg %rsrc, i32 inreg %s, i32 inreg %t, i32 inreg %data) #0 {
-main_body:
- %orig = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32 %data, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
- %r = bitcast i32 %orig to float
- ret float %r
-}
-
-declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
-declare i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
-declare i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
-declare i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
-declare i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
-declare i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
-declare i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
-declare i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
-declare i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
-declare i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
-declare i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
-declare i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
-declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #0
-
-declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #0
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/loads.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/loads.ll
deleted file mode 100644
index f6e3e90ee63a1..0000000000000
--- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/loads.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: opt -mtriple=amdgcn-- -passes='print<divergence>' 2>&1 -disable-output %s | FileCheck %s
-
-; Test that we consider loads from flat and private addrspaces to be divergent.
-
-; CHECK: DIVERGENT: %val = load i32, ptr %flat, align 4
-define amdgpu_kernel void @flat_load(ptr %flat) {
- %val = load i32, ptr %flat, align 4
- ret void
-}
-
-; CHECK: DIVERGENT: %val = load i32, ptr addrspace(5) %priv, align 4
-define amdgpu_kernel void @private_load(ptr addrspace(5) %priv) {
- %val = load i32, ptr addrspace(5) %priv, align 4
- ret void
-}
diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/no-return-blocks.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/no-return-blocks.ll
deleted file mode 100644
index 01aa24d813bce..0000000000000
--- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/no-return-blocks.ll
+++ /dev/null
@@ -1,30 +0,0 @@
-; RUN: opt %s -mtriple amdgcn-- -passes='print<divergence>' 2>&1 -disable-output | FileCheck %s
-
-; CHECK: DIVERGENT: %tmp5 = getelementptr inbounds float, ptr addrspace(1) %arg, i64 %tmp2
-; CHECK: DIVERGENT: %tmp10 = load volatile float, ptr addrspace(1) %tmp5, align 4
-; CHECK: DIVERGENT: %tmp11 = load volatile float, ptr addrspace(1) %tmp5, align 4
-
-; The post dominator tree does not have a root node in this case
-define amdgpu_kernel void @no_return_blocks(ptr addrspace(1) noalias nocapture readonly %arg, ptr addrspace(1) noalias nocapture readonly %arg1) #0 {
-bb0:
- %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0
- %tmp2 = sext i32 %tmp to i64
- %tmp5 = getelementptr inbounds float, ptr addrspace(1) %arg, i64 %tmp2
- %tmp6 = load volatile float, ptr addrspace(1) %tmp5, align 4
- %tmp8 = fcmp olt float %tmp6, 0.000000e+00
- br i1 %tmp8, label %bb1, label %bb2
-
-bb1:
- %tmp10 = load volatile float, ptr addrspace(1) %tmp5, align 4
- br label %bb2
-
-bb2:
- %tmp11 = load volatile float, ptr addrspace(1) %tmp5, align 4
- br label %bb1
-}
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/phi-undef.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/phi-undef.ll
deleted file mode 100644
index 0a8f4568b0249..0000000000000
--- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/phi-undef.ll
+++ /dev/null
@@ -1,31 +0,0 @@
-; RUN: opt -mtriple=amdgcn-- -passes='print<divergence>' 2>&1 -disable-output %s | FileCheck %s
-
-; CHECK-LABEL: 'test1':
-; CHECK-NEXT: DIVERGENT: i32 %bound
-; CHECK: {{^ *}}%counter =
-; CHECK-NEXT: DIVERGENT: %break = icmp sge i32 %counter, %bound
-; CHECK-NEXT: DIVERGENT: br i1 %break, label %footer, label %body
-; CHECK: {{^ *}}%counter.next =
-; CHECK: {{^ *}}%counter.footer =
-; CHECK: DIVERGENT: br i1 %break, label %end, label %header
-; Note: %counter is not divergent!
-define amdgpu_ps void @test1(i32 %bound) {
-entry:
- br label %header
-
-header:
- %counter = phi i32 [ 0, %entry ], [ %counter.footer, %footer ]
- %break = icmp sge i32 %counter, %bound
- br i1 %break, label %footer, label %body
-
-body:
- %counter.next = add i32 %counter, 1
- br label %footer
-
-footer:
- %counter.footer = phi i32 [ %counter.next, %body ], [ undef, %header ]
- br i1 %break, label %end, label %header
-
-end:
- ret void
-}
diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/unreachable-loop-block.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/unreachable-loop-block.ll
deleted file mode 100644
index 0feecc927882b..0000000000000
--- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/unreachable-loop-block.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: opt %s -mtriple amdgcn-- -passes='print<divergence>' 2>&1 -disable-output | FileCheck %s
-
-; CHECK: DIVERGENT: %tmp = cmpxchg volatile
-define amdgpu_kernel void @unreachable_loop(i32 %tidx) #0 {
-entry:
- unreachable
-
-unreachable_loop: ; preds = %do.body.i, %if.then11
- %tmp = cmpxchg volatile ptr addrspace(1) null, i32 0, i32 0 seq_cst seq_cst
- %cmp.i = extractvalue { i32, i1 } %tmp, 1
- br i1 %cmp.i, label %unreachable_loop, label %end
-
-end: ; preds = %do.body.i51, %atomicAdd_g_f.exit
- unreachable
-}
-
-attributes #0 = { norecurse nounwind }
diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/workitem-intrinsics.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/workitem-intrinsics.ll
deleted file mode 100644
index 41798fa0acd75..0000000000000
--- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/workitem-intrinsics.ll
+++ /dev/null
@@ -1,45 +0,0 @@
-; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<divergence>' 2>&1 -disable-output %s | FileCheck %s
-
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare i32 @llvm.amdgcn.workitem.id.y() #0
-declare i32 @llvm.amdgcn.workitem.id.z() #0
-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
-declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #0
-
-; CHECK: DIVERGENT: %id.x = call i32 @llvm.amdgcn.workitem.id.x()
-define amdgpu_kernel void @workitem_id_x() #1 {
- %id.x = call i32 @llvm.amdgcn.workitem.id.x()
- store volatile i32 %id.x, ptr addrspace(1) undef
- ret void
-}
-
-; CHECK: DIVERGENT: %id.y = call i32 @llvm.amdgcn.workitem.id.y()
-define amdgpu_kernel void @workitem_id_y() #1 {
- %id.y = call i32 @llvm.amdgcn.workitem.id.y()
- store volatile i32 %id.y, ptr addrspace(1) undef
- ret void
-}
-
-; CHECK: DIVERGENT: %id.z = call i32 @llvm.amdgcn.workitem.id.z()
-define amdgpu_kernel void @workitem_id_z() #1 {
- %id.z = call i32 @llvm.amdgcn.workitem.id.z()
- store volatile i32 %id.z, ptr addrspace(1) undef
- ret void
-}
-
-; CHECK: DIVERGENT: %mbcnt.lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 0, i32 0)
-define amdgpu_kernel void @mbcnt_lo() #1 {
- %mbcnt.lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 0, i32 0)
- store volatile i32 %mbcnt.lo, ptr addrspace(1) undef
- ret void
-}
-
-; CHECK: DIVERGENT: %mbcnt.hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 0, i32 0)
-define amdgpu_kernel void @mbcnt_hi() #1 {
- %mbcnt.hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 0, i32 0)
- store volatile i32 %mbcnt.hi, ptr addrspace(1) undef
- ret void
-}
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/NVPTX/diverge.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/NVPTX/diverge.ll
deleted file mode 100644
index cd965a5cb27fd..0000000000000
--- a/llvm/test/Analysis/LegacyDivergenceAnalysis/NVPTX/diverge.ll
+++ /dev/null
@@ -1,219 +0,0 @@
-; RUN: opt %s -passes='print<divergence>' 2>&1 -disable-output | FileCheck %s
-
-target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
-target triple = "nvptx64-nvidia-cuda"
-
-; return (n < 0 ? a + threadIdx.x : b + threadIdx.x)
-define i32 @no_diverge(i32 %n, i32 %a, i32 %b) {
-; CHECK-LABEL: function 'no_diverge'
-entry:
- %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
- %cond = icmp slt i32 %n, 0
- br i1 %cond, label %then, label %else ; uniform
-; CHECK-NOT: DIVERGENT: br i1 %cond,
-then:
- %a1 = add i32 %a, %tid
- br label %merge
-else:
- %b2 = add i32 %b, %tid
- br label %merge
-merge:
- %c = phi i32 [ %a1, %then ], [ %b2, %else ]
- ret i32 %c
-}
-
-; c = a;
-; if (threadIdx.x < 5) // divergent: data dependent
-; c = b;
-; return c; // c is divergent: sync dependent
-define i32 @sync(i32 %a, i32 %b) {
-; CHECK-LABEL: function 'sync'
-bb1:
- %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
- %cond = icmp slt i32 %tid, 5
- br i1 %cond, label %bb2, label %bb3
-; CHECK: DIVERGENT: br i1 %cond,
-bb2:
- br label %bb3
-bb3:
- %c = phi i32 [ %a, %bb1 ], [ %b, %bb2 ] ; sync dependent on tid
-; CHECK: DIVERGENT: %c =
- ret i32 %c
-}
-
-; c = 0;
-; if (threadIdx.x >= 5) { // divergent
-; c = (n < 0 ? a : b); // c here is uniform because n is uniform
-; }
-; // c here is divergent because it is sync dependent on threadIdx.x >= 5
-; return c;
-define i32 @mixed(i32 %n, i32 %a, i32 %b) {
-; CHECK-LABEL: function 'mixed'
-bb1:
- %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.z()
- %cond = icmp slt i32 %tid, 5
- br i1 %cond, label %bb6, label %bb2
-; CHECK: DIVERGENT: br i1 %cond,
-bb2:
- %cond2 = icmp slt i32 %n, 0
- br i1 %cond2, label %bb4, label %bb3
-bb3:
- br label %bb5
-bb4:
- br label %bb5
-bb5:
- %c = phi i32 [ %a, %bb3 ], [ %b, %bb4 ]
-; CHECK-NOT: DIVERGENT: %c =
- br label %bb6
-bb6:
- %c2 = phi i32 [ 0, %bb1], [ %c, %bb5 ]
-; CHECK: DIVERGENT: %c2 =
- ret i32 %c2
-}
-
-; We conservatively treats all parameters of a __device__ function as divergent.
-define i32 @device(i32 %n, i32 %a, i32 %b) {
-; CHECK-LABEL: function 'device'
-; CHECK: DIVERGENT: i32 %n
-; CHECK: DIVERGENT: i32 %a
-; CHECK: DIVERGENT: i32 %b
-entry:
- %cond = icmp slt i32 %n, 0
- br i1 %cond, label %then, label %else
-; CHECK: DIVERGENT: br i1 %cond,
-then:
- br label %merge
-else:
- br label %merge
-merge:
- %c = phi i32 [ %a, %then ], [ %b, %else ]
- ret i32 %c
-}
-
-; int i = 0;
-; do {
-; i++; // i here is uniform
-; } while (i < laneid);
-; return i == 10 ? 0 : 1; // i here is divergent
-;
-; The i defined in the loop is used outside.
-define i32 @loop() {
-; CHECK-LABEL: function 'loop'
-entry:
- %laneid = call i32 @llvm.nvvm.read.ptx.sreg.laneid()
- br label %loop
-loop:
- %i = phi i32 [ 0, %entry ], [ %i1, %loop ]
-; CHECK-NOT: DIVERGENT: %i =
- %i1 = add i32 %i, 1
- %exit_cond = icmp sge i32 %i1, %laneid
- br i1 %exit_cond, label %loop_exit, label %loop
-loop_exit:
- %cond = icmp eq i32 %i, 10
- br i1 %cond, label %then, label %else
-; CHECK: DIVERGENT: br i1 %cond,
-then:
- ret i32 0
-else:
- ret i32 1
-}
-
-; Same as @loop, but the loop is in the LCSSA form.
-define i32 @lcssa() {
-; CHECK-LABEL: function 'lcssa'
-entry:
- %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
- br label %loop
-loop:
- %i = phi i32 [ 0, %entry ], [ %i1, %loop ]
-; CHECK-NOT: DIVERGENT: %i =
- %i1 = add i32 %i, 1
- %exit_cond = icmp sge i32 %i1, %tid
- br i1 %exit_cond, label %loop_exit, label %loop
-loop_exit:
- %i.lcssa = phi i32 [ %i, %loop ]
-; CHECK: DIVERGENT: %i.lcssa =
- %cond = icmp eq i32 %i.lcssa, 10
- br i1 %cond, label %then, label %else
-; CHECK: DIVERGENT: br i1 %cond,
-then:
- ret i32 0
-else:
- ret i32 1
-}
-
-; This test contains an unstructured loop.
-; +-------------- entry ----------------+
-; | |
-; V V
-; i1 = phi(0, i3) i2 = phi(0, i3)
-; j1 = i1 + 1 ---> i3 = phi(j1, j2) <--- j2 = i2 + 2
-; ^ | ^
-; | V |
-; +-------- switch (tid / i3) ----------+
-; |
-; V
-; if (i3 == 5) // divergent
-; because sync dependent on (tid / i3).
-define i32 @unstructured_loop(i1 %entry_cond) {
-; CHECK-LABEL: function 'unstructured_loop'
-entry:
- %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
- br i1 %entry_cond, label %loop_entry_1, label %loop_entry_2
-loop_entry_1:
- %i1 = phi i32 [ 0, %entry ], [ %i3, %loop_latch ]
- %j1 = add i32 %i1, 1
- br label %loop_body
-loop_entry_2:
- %i2 = phi i32 [ 0, %entry ], [ %i3, %loop_latch ]
- %j2 = add i32 %i2, 2
- br label %loop_body
-loop_body:
- %i3 = phi i32 [ %j1, %loop_entry_1 ], [ %j2, %loop_entry_2 ]
- br label %loop_latch
-loop_latch:
- %div = sdiv i32 %tid, %i3
- switch i32 %div, label %branch [ i32 1, label %loop_entry_1
- i32 2, label %loop_entry_2 ]
-branch:
- %cmp = icmp eq i32 %i3, 5
- br i1 %cmp, label %then, label %else
-; CHECK: DIVERGENT: br i1 %cmp,
-then:
- ret i32 0
-else:
- ret i32 1
-}
-
-; Verifies sync-dependence is computed correctly in the absense of loops.
-define i32 @sync_no_loop(i32 %arg) {
-entry:
- %0 = add i32 %arg, 1
- %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
- %1 = icmp sge i32 %tid, 10
- br i1 %1, label %bb1, label %bb2
-
-bb1:
- br label %bb3
-
-bb2:
- br label %bb3
-
-bb3:
- %2 = add i32 %0, 2
- ; CHECK-NOT: DIVERGENT: %2
- ret i32 %2
-}
-
-declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
-declare i32 @llvm.nvvm.read.ptx.sreg.tid.y()
-declare i32 @llvm.nvvm.read.ptx.sreg.tid.z()
-declare i32 @llvm.nvvm.read.ptx.sreg.laneid()
-
-!nvvm.annotations = !{!0, !1, !2, !3, !4, !5}
-!0 = !{ptr @no_diverge, !"kernel", i32 1}
-!1 = !{ptr @sync, !"kernel", i32 1}
-!2 = !{ptr @mixed, !"kernel", i32 1}
-!3 = !{ptr @loop, !"kernel", i32 1}
-!4 = !{ptr @unstructured_loop, !"kernel", i32 1}
-!5 = !{ptr @sync_no_loop, !"kernel", i32 1}
diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/NVPTX/lit.local.cfg b/llvm/test/Analysis/LegacyDivergenceAnalysis/NVPTX/lit.local.cfg
deleted file mode 100644
index 2cb98eb371b21..0000000000000
--- a/llvm/test/Analysis/LegacyDivergenceAnalysis/NVPTX/lit.local.cfg
+++ /dev/null
@@ -1,2 +0,0 @@
-if not 'NVPTX' in config.root.targets:
- config.unsupported = True
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/always-uniform-gmir.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/always-uniform-gmir.mir
similarity index 97%
rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/always-uniform-gmir.mir
rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/always-uniform-gmir.mir
index 49a7ff491830e..87af933f3eaec 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/always-uniform-gmir.mir
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/always-uniform-gmir.mir
@@ -1,4 +1,4 @@
-# NOTE: This file is Generic MIR translation of test/Analysis/DivergenceAnalysis/AMDGPU/always_uniform.ll test file
+# NOTE: This file is Generic MIR translation of test/Analysis/UniformityAnalysis/AMDGPU/always_uniform.ll test file
# RUN: llc -mtriple=amdgcn-- -run-pass=print-machine-uniformity -o - %s 2>&1 | FileCheck %s
---
name: readfirstlane
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/always-uniform.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/always-uniform.mir
similarity index 100%
rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/always-uniform.mir
rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/always-uniform.mir
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/atomics-gmir.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/atomics-gmir.mir
similarity index 100%
rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/atomics-gmir.mir
rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/atomics-gmir.mir
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/atomics.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/atomics.mir
similarity index 100%
rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/atomics.mir
rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/atomics.mir
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/hidden-diverge-gmir.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/hidden-diverge-gmir.mir
similarity index 100%
rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/hidden-diverge-gmir.mir
rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/hidden-diverge-gmir.mir
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/hidden-diverge.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/hidden-diverge.mir
similarity index 100%
rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/hidden-diverge.mir
rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/hidden-diverge.mir
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/irreducible/branch-outside-gmir.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/irreducible/branch-outside-gmir.mir
similarity index 100%
rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/irreducible/branch-outside-gmir.mir
rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/irreducible/branch-outside-gmir.mir
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/irreducible/diverged-entry-basic-gmir.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/irreducible/diverged-entry-basic-gmir.mir
similarity index 100%
rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/irreducible/diverged-entry-basic-gmir.mir
rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/irreducible/diverged-entry-basic-gmir.mir
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/irreducible/exit-divergence-gmir.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/irreducible/exit-divergence-gmir.mir
similarity index 100%
rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/irreducible/exit-divergence-gmir.mir
rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/irreducible/exit-divergence-gmir.mir
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/irreducible/irreducible-1.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/irreducible/irreducible-1.mir
similarity index 100%
rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/irreducible/irreducible-1.mir
rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/irreducible/irreducible-1.mir
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/irreducible/irreducible-2-gmir.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/irreducible/irreducible-2-gmir.mir
similarity index 100%
rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/irreducible/irreducible-2-gmir.mir
rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/irreducible/irreducible-2-gmir.mir
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/join-loopexit-gmir.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/join-loopexit-gmir.mir
similarity index 100%
rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/join-loopexit-gmir.mir
rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/join-loopexit-gmir.mir
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/loads-gmir.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/loads-gmir.mir
similarity index 100%
rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/loads-gmir.mir
rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/loads-gmir.mir
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/never-uniform.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/never-uniform.mir
similarity index 100%
rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/never-uniform.mir
rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/never-uniform.mir
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/temporal-diverge-gmir.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/temporal-diverge-gmir.mir
similarity index 100%
rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/temporal-diverge-gmir.mir
rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/temporal-diverge-gmir.mir
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/uses-value-from-cycle.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/uses-value-from-cycle.mir
similarity index 100%
rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/uses-value-from-cycle.mir
rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/uses-value-from-cycle.mir
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/always_uniform.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/always_uniform.ll
similarity index 95%
rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/always_uniform.ll
rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/always_uniform.ll
index 0f055183c7983..205d69eb8b3a1 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/always_uniform.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/always_uniform.ll
@@ -1,4 +1,3 @@
-; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<uniformity>' -disable-output %s 2>&1 | FileCheck %s
; CHECK-LABEL: for function 'readfirstlane':
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/atomics.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/atomics.ll
similarity index 96%
rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/atomics.ll
rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/atomics.ll
index 7c7e5e17ef39b..bf6c6cdc6f10a 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/atomics.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/atomics.ll
@@ -1,4 +1,3 @@
-; RUN: opt -mtriple amdgcn-- -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; RUN: opt -mtriple amdgcn-- -passes='print<uniformity>' -disable-output %s 2>&1 | FileCheck %s
; CHECK: DIVERGENT: %orig = atomicrmw xchg ptr %ptr, i32 %val seq_cst
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/b42473-r1-crash.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/b42473-r1-crash.ll
similarity index 97%
rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/b42473-r1-crash.ll
rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/b42473-r1-crash.ll
index 2b0a02cd6a5b8..a3707780cb29b 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/b42473-r1-crash.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/b42473-r1-crash.ll
@@ -1,4 +1,3 @@
-; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<uniformity>' -disable-output %s 2>&1 | FileCheck %s
declare i32 @gf2(i32)
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/control-flow-intrinsics.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/control-flow-intrinsics.ll
similarity index 97%
rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/control-flow-intrinsics.ll
rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/control-flow-intrinsics.ll
index e2f3bfe46e716..b92daa64040e4 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/control-flow-intrinsics.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/control-flow-intrinsics.ll
@@ -1,4 +1,3 @@
-; RUN: opt -mtriple amdgcn-mesa-mesa3d -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; RUN: opt -mtriple amdgcn-mesa-mesa3d -passes='print<uniformity>' -disable-output %s 2>&1 | FileCheck %s
; Tests control flow intrinsics that should be treated as uniform
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_diverge.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/hidden_diverge.ll
similarity index 94%
rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_diverge.ll
rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/hidden_diverge.ll
index c2b707ac4251d..fba41cc3d2eda 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_diverge.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/hidden_diverge.ll
@@ -1,4 +1,3 @@
-; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<uniformity>' -disable-output %s 2>&1 | FileCheck %s
define amdgpu_kernel void @hidden_diverge(i32 %n, i32 %a, i32 %b) #0 {
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_loopdiverge.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/hidden_loopdiverge.ll
similarity index 88%
rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_loopdiverge.ll
rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/hidden_loopdiverge.ll
index 8aa13c4bb5faa..a2467a5480940 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_loopdiverge.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/hidden_loopdiverge.ll
@@ -1,4 +1,3 @@
-; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<uniformity>' -disable-output %s 2>&1 | FileCheck %s
; divergent loop (H<header><exiting to X>, B<exiting to Y>)
@@ -17,15 +16,15 @@ H:
%uni.merge.h = phi i32 [ 0, %entry ], [ %uni.inc, %B ]
%div.exitx = icmp slt i32 %tid, 0
br i1 %div.exitx, label %X, label %B ; divergent branch
-; CHECK: DIVERGENT: %div.exitx =
-; CHECK: DIVERGENT: br i1 %div.exitx,
+; CHECK: DIVERGENT: %div.exitx =
+; CHECK: DIVERGENT: br i1 %div.exitx,
B:
%uni.inc = add i32 %uni.merge.h, 1
%div.exity = icmp sgt i32 %tid, 0
br i1 %div.exity, label %Y, label %H ; divergent branch
-; CHECK: DIVERGENT: %div.exity =
-; CHECK: DIVERGENT: br i1 %div.exity,
+; CHECK: DIVERGENT: %div.exity =
+; CHECK: DIVERGENT: br i1 %div.exity,
X:
%div.merge.x = phi i32 [ %a, %entry ], [ %uni.merge.h, %H ] ; temporal divergent phi
@@ -59,18 +58,18 @@ H:
%uni.merge.h = phi i32 [ 0, %entry ], [ %uni.inc, %B ]
%div.exitx = icmp slt i32 %tid, 0
br i1 %div.exitx, label %X, label %B ; divergent branch
-; CHECK: DIVERGENT: %div.exitx =
-; CHECK: DIVERGENT: br i1 %div.exitx,
+; CHECK: DIVERGENT: %div.exitx =
+; CHECK: DIVERGENT: br i1 %div.exitx,
B:
%uni.inc = add i32 %uni.merge.h, 1
%div.exity = icmp sgt i32 %tid, 0
br i1 %div.exity, label %Y, label %H ; divergent branch
-; CHECK: DIVERGENT: %div.exity =
-; CHECK: DIVERGENT: br i1 %div.exity,
+; CHECK: DIVERGENT: %div.exity =
+; CHECK: DIVERGENT: br i1 %div.exity,
X:
- %uni.merge.x = phi i32 [ %a, %entry ], [ %b, %H ]
+ %uni.merge.x = phi i32 [ %a, %entry ], [ %b, %H ]
br label %exit
Y:
@@ -100,10 +99,10 @@ entry:
H:
%uni.merge.h = phi i32 [ 0, %entry ], [ %uni.inc, %D ]
br i1 %uni.cond, label %G, label %B
-; CHECK: DIVERGENT: %div.exitx =
+; CHECK: DIVERGENT: %div.exitx =
B:
- br i1 %uni.cond, label %X, label %C
+ br i1 %uni.cond, label %X, label %C
C:
br i1 %uni.cond, label %Y, label %D
@@ -114,7 +113,7 @@ D:
G:
br i1 %div.exitx, label %C, label %L
-; CHECK: DIVERGENT: br i1 %div.exitx,
+; CHECK: DIVERGENT: br i1 %div.exitx,
L:
br i1 %uni.cond, label %D, label %G
@@ -151,10 +150,10 @@ entry:
H:
%uni.merge.h = phi i32 [ 0, %entry ], [ %uni.inc, %C ]
br i1 %uni.cond, label %G, label %B
-; CHECK: DIVERGENT: %div.exitx =
+; CHECK: DIVERGENT: %div.exitx =
B:
- br i1 %uni.cond, label %Y, label %C
+ br i1 %uni.cond, label %Y, label %C
C:
%uni.inc = add i32 %uni.merge.h, 1
@@ -162,7 +161,7 @@ C:
G:
br i1 %div.exitx, label %X, label %L ; two-level break
-; CHECK: DIVERGENT: br i1 %div.exitx,
+; CHECK: DIVERGENT: br i1 %div.exitx,
L:
br i1 %uni.cond, label %C, label %G
@@ -193,7 +192,7 @@ entry:
H:
%uni.merge.h = phi i32 [ 0, %entry ], [ %uni.inc.d, %D ]
br i1 %uni.cond, label %G, label %B
-; CHECK: DIVERGENT: %div.exitx =
+; CHECK: DIVERGENT: %div.exitx =
B:
%div.merge.b = phi i32 [ 42, %H ], [ %uni.merge.g, %G ]
@@ -203,7 +202,7 @@ B:
G:
%uni.merge.g = phi i32 [ 123, %H ], [ %uni.inc.l, %L ]
br i1 %div.exitx, label %B, label %L
-; CHECK: DIVERGENT: br i1 %div.exitx,
+; CHECK: DIVERGENT: br i1 %div.exitx,
L:
%uni.inc.l = add i32 %uni.merge.g, 1
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/inline-asm.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/inline-asm.ll
similarity index 94%
rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/inline-asm.ll
rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/inline-asm.ll
index 8d3df813bc140..8190137682be2 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/inline-asm.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/inline-asm.ll
@@ -1,5 +1,3 @@
-; RUN: opt -mtriple amdgcn-unknown-amdhsa -mcpu=tahiti -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
-; RUN: opt -mtriple amdgcn-unknown-amdhsa -mcpu=gfx908 -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; RUN: opt -mtriple amdgcn-unknown-amdhsa -mcpu=tahiti -passes='print<uniformity>' -disable-output %s 2>&1 | FileCheck %s
; RUN: opt -mtriple amdgcn-unknown-amdhsa -mcpu=gfx908 -passes='print<uniformity>' -disable-output %s 2>&1 | FileCheck %s
; Make sure nothing crashes on targets with or without AGPRs
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/interp_f16.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/interp_f16.ll
similarity index 92%
rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/interp_f16.ll
rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/interp_f16.ll
index 420c0299d2341..b63abc85cfd95 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/interp_f16.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/interp_f16.ll
@@ -1,4 +1,3 @@
-; RUN: opt -mtriple amdgcn-- -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; RUN: opt -mtriple amdgcn-- -passes='print<uniformity>' -disable-output %s 2>&1 | FileCheck %s
; CHECK: for function 'interp_p1_f16'
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/intrinsics.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll
similarity index 98%
rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/intrinsics.ll
rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll
index 3a1c1aa41129a..1b3f7973c0d9e 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/intrinsics.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll
@@ -1,4 +1,3 @@
-; RUN: opt -mtriple amdgcn-- -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; RUN: opt -mtriple amdgcn-- -passes='print<uniformity>' -disable-output %s 2>&1 | FileCheck %s
; CHECK: DIVERGENT: %swizzle = call i32 @llvm.amdgcn.ds.swizzle(i32 %src, i32 100) #0
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/branch-outside.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/branch-outside.ll
similarity index 100%
rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/branch-outside.ll
rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/branch-outside.ll
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/diverged-entry-basic.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/diverged-entry-basic.ll
similarity index 100%
rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/diverged-entry-basic.ll
rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/diverged-entry-basic.ll
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/diverged-entry-headers-nested.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/diverged-entry-headers-nested.ll
similarity index 100%
rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/diverged-entry-headers-nested.ll
rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/diverged-entry-headers-nested.ll
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/diverged-entry-headers.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/diverged-entry-headers.ll
similarity index 100%
rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/diverged-entry-headers.ll
rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/diverged-entry-headers.ll
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/exit-divergence.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/exit-divergence.ll
similarity index 100%
rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/exit-divergence.ll
rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/exit-divergence.ll
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/irreducible-1.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/irreducible-1.ll
similarity index 95%
rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/irreducible-1.ll
rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/irreducible-1.ll
index 0f4916a87f0aa..a77a608b3ad28 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/irreducible-1.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/irreducible-1.ll
@@ -1,4 +1,3 @@
-; RUN: opt %s -mtriple amdgcn-- -passes='print<divergence>' -disable-output 2>&1 | FileCheck %s
; RUN: opt %s -mtriple amdgcn-- -passes='print<uniformity>' -disable-output 2>&1 | FileCheck %s
; This test contains an unstructured loop.
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/irreducible-2.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/irreducible-2.ll
similarity index 100%
rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/irreducible-2.ll
rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/irreducible-2.ll
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/reducible-headers.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/reducible-headers.ll
similarity index 100%
rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/reducible-headers.ll
rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/reducible-headers.ll
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/join-at-loop-exit.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/join-at-loop-exit.ll
similarity index 91%
rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/join-at-loop-exit.ll
rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/join-at-loop-exit.ll
index f7cba8bb30eb5..1e8a1c3c75f94 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/join-at-loop-exit.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/join-at-loop-exit.ll
@@ -1,4 +1,3 @@
-; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<uniformity>' -disable-output %s 2>&1 | FileCheck %s
; CHECK: DIVERGENT: %Guard.bb4 = phi i1 [ true, %bb1 ], [ false, %bb2 ]
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/join-at-loop-heart.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/join-at-loop-heart.ll
similarity index 91%
rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/join-at-loop-heart.ll
rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/join-at-loop-heart.ll
index 9e659602333aa..1c60589a52540 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/join-at-loop-heart.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/join-at-loop-heart.ll
@@ -1,4 +1,3 @@
-; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<uniformity>' -disable-output %s 2>&1 | FileCheck %s
; CHECK: DIVERGENT: %phi.h = phi i32 [ 0, %entry ], [ %inc, %C ], [ %inc, %D ], [ %inc, %E ]
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/kernel-args.ll
similarity index 93%
rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll
rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/kernel-args.ll
index 36e5cde1bd4b7..7473353d054d1 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/kernel-args.ll
@@ -1,4 +1,3 @@
-; RUN: opt -mtriple amdgcn-- -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; RUN: opt -mtriple amdgcn-- -passes='print<uniformity>' -disable-output %s 2>&1 | FileCheck %s
; CHECK-LABEL: for function 'test_amdgpu_ps':
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/lit.local.cfg b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/lit.local.cfg
similarity index 100%
rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/lit.local.cfg
rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/lit.local.cfg
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll
similarity index 99%
rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll
rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll
index 2e8d352e29786..a9ad7b56c95de 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll
@@ -1,4 +1,3 @@
-; RUN: opt -mtriple amdgcn-mesa-mesa3d -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; RUN: opt -mtriple amdgcn-mesa-mesa3d -passes='print<uniformity>' -disable-output %s 2>&1 | FileCheck %s
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.swap.i32(
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll
similarity index 98%
rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll
rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll
index 9cf2eec038940..f6ac8fbd99c58 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll
@@ -1,4 +1,3 @@
-; RUN: opt -mtriple amdgcn-mesa-mesa3d -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; RUN: opt -mtriple amdgcn-mesa-mesa3d -passes='print<uniformity>' -disable-output %s 2>&1 | FileCheck %s
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/no-return-blocks.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/no-return-blocks.ll
similarity index 92%
rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/no-return-blocks.ll
rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/no-return-blocks.ll
index c8498c4ed52ad..1fea5cbb47fe9 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/no-return-blocks.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/no-return-blocks.ll
@@ -1,4 +1,3 @@
-; RUN: opt -mtriple amdgcn-- -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; RUN: opt -mtriple amdgcn-- -passes='print<uniformity>' -disable-output %s 2>&1 | FileCheck %s
; CHECK: DIVERGENT: %tmp5 = getelementptr inbounds float, ptr addrspace(1) %arg, i64 %tmp2
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/phi-undef.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/phi-undef.ll
similarity index 90%
rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/phi-undef.ll
rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/phi-undef.ll
index 780d03b987f01..f4826b29bc2b3 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/phi-undef.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/phi-undef.ll
@@ -1,4 +1,3 @@
-; RUN: opt -mtriple amdgcn-- -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; RUN: opt -mtriple amdgcn-- -passes='print<uniformity>' -disable-output %s 2>&1 | FileCheck %s
; CHECK-LABEL: 'test1':
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/propagate-loop-live-out.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/propagate-loop-live-out.ll
similarity index 93%
rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/propagate-loop-live-out.ll
rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/propagate-loop-live-out.ll
index e2a07ae956488..996fee510eda5 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/propagate-loop-live-out.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/propagate-loop-live-out.ll
@@ -1,4 +1,3 @@
-; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<uniformity>' -disable-output %s 2>&1 | FileCheck %s
; CHECK: DIVERGENT: %.126.i355.i = phi i1 [ false, %bb5 ], [ true, %bb4 ]
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/temporal_diverge.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/temporal_diverge.ll
similarity index 88%
rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/temporal_diverge.ll
rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/temporal_diverge.ll
index b6e7a32f71eaa..842636aa952f0 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/temporal_diverge.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/temporal_diverge.ll
@@ -1,4 +1,3 @@
-; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<uniformity>' -disable-output %s 2>&1 | FileCheck %s
; temporal-divergent use of value carried by divergent loop
@@ -17,8 +16,8 @@ H:
%uni.inc = add i32 %uni.merge.h, 1
%div.exitx = icmp slt i32 %tid, 0
br i1 %div.exitx, label %X, label %H ; divergent branch
-; CHECK: DIVERGENT: %div.exitx =
-; CHECK: DIVERGENT: br i1 %div.exitx,
+; CHECK: DIVERGENT: %div.exitx =
+; CHECK: DIVERGENT: br i1 %div.exitx,
X:
%div.user = add i32 %uni.inc, 5
@@ -44,8 +43,8 @@ H:
%uni.inc = add i32 %uni.merge.h, 1
%div.exitx = icmp slt i32 %tid, 0
br i1 %div.exitx, label %X, label %H ; divergent branch
-; CHECK: DIVERGENT: %div.exitx =
-; CHECK: DIVERGENT: br i1 %div.exitx,
+; CHECK: DIVERGENT: %div.exitx =
+; CHECK: DIVERGENT: br i1 %div.exitx,
X:
%div.user = add i32 %uni.inc, 5
@@ -79,9 +78,9 @@ H:
X:
%uni.user = add i32 %uni.inc, 5
%div.exity = icmp slt i32 %tid, 0
-; CHECK: DIVERGENT: %div.exity =
+; CHECK: DIVERGENT: %div.exity =
br i1 %div.exity, label %G, label %Y
-; CHECK: DIVERGENT: br i1 %div.exity,
+; CHECK: DIVERGENT: br i1 %div.exity,
Y:
%div.alsouser = add i32 %uni.inc, 5
@@ -105,8 +104,8 @@ H:
%uni.inc = add i32 %uni.merge.h, 1
%div.exitx = icmp slt i32 %tid, 0
br i1 %div.exitx, label %X, label %H ; divergent branch
-; CHECK: DIVERGENT: %div.exitx =
-; CHECK: DIVERGENT: br i1 %div.exitx,
+; CHECK: DIVERGENT: %div.exitx =
+; CHECK: DIVERGENT: br i1 %div.exitx,
X:
br label %G
@@ -135,8 +134,8 @@ H:
%uni.inc = add i32 %uni.merge.h, 1
%div.exitx = icmp slt i32 %tid, 0
br i1 %div.exitx, label %X, label %H ; divergent branch
-; CHECK: DIVERGENT: %div.exitx =
-; CHECK: DIVERGENT: br i1 %div.exitx,
+; CHECK: DIVERGENT: %div.exitx =
+; CHECK: DIVERGENT: br i1 %div.exitx,
X:
br label %G
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/trivial-join-at-loop-exit.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/trivial-join-at-loop-exit.ll
similarity index 88%
rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/trivial-join-at-loop-exit.ll
rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/trivial-join-at-loop-exit.ll
index 9b32614ca624b..00c13b5a0372c 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/trivial-join-at-loop-exit.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/trivial-join-at-loop-exit.ll
@@ -1,4 +1,3 @@
-; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<uniformity>' -disable-output %s 2>&1 | FileCheck %s
; CHECK-NOT: DIVERGENT: %Guard.bb2 = phi i1 [ true, %bb1 ], [ false, %bb0 ]
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/unreachable-loop-block.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/unreachable-loop-block.ll
similarity index 86%
rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/unreachable-loop-block.ll
rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/unreachable-loop-block.ll
index ef335c04c8b52..d9af81ab957e4 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/unreachable-loop-block.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/unreachable-loop-block.ll
@@ -1,4 +1,3 @@
-; RUN: opt -mtriple amdgcn-- -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; RUN: opt -mtriple amdgcn-- -passes='print<uniformity>' -disable-output %s 2>&1 | FileCheck %s
; CHECK: DIVERGENT: %tmp = cmpxchg volatile
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/workitem-intrinsics.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/workitem-intrinsics.ll
similarity index 93%
rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/workitem-intrinsics.ll
rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/workitem-intrinsics.ll
index 608e21bbdeab6..ed05aaad7efd4 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/workitem-intrinsics.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/workitem-intrinsics.ll
@@ -1,4 +1,3 @@
-; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<uniformity>' -disable-output %s 2>&1 | FileCheck %s
declare i32 @llvm.amdgcn.workitem.id.x() #0
diff --git a/llvm/test/Analysis/DivergenceAnalysis/NVPTX/daorder.ll b/llvm/test/Analysis/UniformityAnalysis/NVPTX/daorder.ll
similarity index 88%
rename from llvm/test/Analysis/DivergenceAnalysis/NVPTX/daorder.ll
rename to llvm/test/Analysis/UniformityAnalysis/NVPTX/daorder.ll
index f43e9ff94132d..89d8c5aa90ab1 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/NVPTX/daorder.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/NVPTX/daorder.ll
@@ -1,4 +1,3 @@
-; RUN: opt %s -passes='print<divergence>' -disable-output 2>&1 | FileCheck %s
; RUN: opt %s -passes='print<uniformity>' -disable-output 2>&1 | FileCheck %s
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
@@ -14,11 +13,11 @@ entry:
; CHECK: DIVERGENT: br i1 %cond,
A:
%defAtA = add i32 %n, 1 ; uniform
-; CHECK-NOT: DIVERGENT: %defAtA =
+; CHECK-NOT: DIVERGENT: %defAtA =
br label %C
B:
%defAtB = add i32 %n, 2 ; uniform
-; CHECK-NOT: DIVERGENT: %defAtB =
+; CHECK-NOT: DIVERGENT: %defAtB =
br label %C
C:
%defAtC = phi i32 [ %defAtA, %A ], [ %defAtB, %B ] ; divergent
diff --git a/llvm/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll b/llvm/test/Analysis/UniformityAnalysis/NVPTX/diverge.ll
similarity index 98%
rename from llvm/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll
rename to llvm/test/Analysis/UniformityAnalysis/NVPTX/diverge.ll
index a2ac88ca7e2b6..0ac1b5f541471 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/NVPTX/diverge.ll
@@ -1,4 +1,3 @@
-; RUN: opt %s -passes='print<divergence>' -disable-output 2>&1 | FileCheck %s
; RUN: opt %s -passes='print<uniformity>' -disable-output 2>&1 | FileCheck %s
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
diff --git a/llvm/test/Analysis/DivergenceAnalysis/NVPTX/hidden_diverge.ll b/llvm/test/Analysis/UniformityAnalysis/NVPTX/hidden_diverge.ll
similarity index 92%
rename from llvm/test/Analysis/DivergenceAnalysis/NVPTX/hidden_diverge.ll
rename to llvm/test/Analysis/UniformityAnalysis/NVPTX/hidden_diverge.ll
index 54329e099d42c..e319211771c0c 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/NVPTX/hidden_diverge.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/NVPTX/hidden_diverge.ll
@@ -1,4 +1,3 @@
-; RUN: opt %s -passes='print<divergence>' -disable-output 2>&1 | FileCheck %s
; RUN: opt %s -passes='print<uniformity>' -disable-output 2>&1 | FileCheck %s
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
diff --git a/llvm/test/Analysis/DivergenceAnalysis/NVPTX/irreducible.ll b/llvm/test/Analysis/UniformityAnalysis/NVPTX/irreducible.ll
similarity index 96%
rename from llvm/test/Analysis/DivergenceAnalysis/NVPTX/irreducible.ll
rename to llvm/test/Analysis/UniformityAnalysis/NVPTX/irreducible.ll
index 5cfecbd12f4ae..cd729a918f814 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/NVPTX/irreducible.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/NVPTX/irreducible.ll
@@ -1,4 +1,3 @@
-; RUN: opt %s -passes='print<divergence>' -disable-output 2>&1 | FileCheck %s
; RUN: opt %s -passes='print<uniformity>' -disable-output 2>&1 | FileCheck %s
; NOTE: The new pass manager does not fall back on legacy divergence
diff --git a/llvm/test/Analysis/DivergenceAnalysis/NVPTX/lit.local.cfg b/llvm/test/Analysis/UniformityAnalysis/NVPTX/lit.local.cfg
similarity index 100%
rename from llvm/test/Analysis/DivergenceAnalysis/NVPTX/lit.local.cfg
rename to llvm/test/Analysis/UniformityAnalysis/NVPTX/lit.local.cfg
diff --git a/llvm/test/CodeGen/AMDGPU/always-uniform.ll b/llvm/test/CodeGen/AMDGPU/always-uniform.ll
index 5a927ded9d67c..51398ce61e9f1 100644
--- a/llvm/test/CodeGen/AMDGPU/always-uniform.ll
+++ b/llvm/test/CodeGen/AMDGPU/always-uniform.ll
@@ -1,19 +1,8 @@
-; RUN: opt -mtriple amdgcn-amdhsa -mcpu=gfx90a -passes=legacy-divergence-analysis < %s -S 2>&1 | FileCheck -check-prefix=OPT %s
; RUN: llc -mtriple amdgcn-amdhsa -mcpu=fiji -amdgpu-scalarize-global-loads -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
declare i32 @llvm.amdgcn.workitem.id.x()
declare i32 @llvm.amdgcn.readfirstlane(i32)
-; OPT-LABEL: define amdgpu_kernel void @readfirstlane_uniform(
-; OPT-NEXT: %tid = tail call i32 @llvm.amdgcn.workitem.id.x()
-; OPT-NEXT: %scalar = tail call i32 @llvm.amdgcn.readfirstlane(i32 %tid)
-; OPT-NEXT: %idx = zext i32 %scalar to i64
-; OPT-NEXT: %gep0 = getelementptr inbounds float, ptr addrspace(1) %0, i64 %idx
-; OPT-NEXT: %val = load float, ptr addrspace(1) %gep0, align 4
-; OPT-NEXT: %gep1 = getelementptr inbounds float, ptr addrspace(1) %1, i64 10
-; OPT-NEXT: store float %val, ptr addrspace(1) %gep1, align 4
-; OPT-NEXT: ret void
-;
; GCN-LABEL: readfirstlane_uniform
; GCN: s_load_dwordx4 s[[[IN_ADDR:[0-9]+]]:3], s[4:5], 0x0
; GCN: v_readfirstlane_b32 s[[SCALAR:[0-9]+]], v0
diff --git a/llvm/test/CodeGen/AMDGPU/smrd.ll b/llvm/test/CodeGen/AMDGPU/smrd.ll
index 49c1bb378bcc6..517b23abd75a6 100644
--- a/llvm/test/CodeGen/AMDGPU/smrd.ll
+++ b/llvm/test/CodeGen/AMDGPU/smrd.ll
@@ -645,8 +645,7 @@ exit:
; GCN-LABEL: {{^}}smrd_uniform_loop2:
-; (this test
diff ers from smrd_uniform_loop by the more complex structure of phis,
-; which used to confuse the DivergenceAnalysis after structurization)
+; (this test
diff ers from smrd_uniform_loop by the more complex structure of phis)
;
; TODO: we should keep the loop counter in an SGPR and use an S_BUFFER_LOAD
;
diff --git a/llvm/unittests/Analysis/CMakeLists.txt b/llvm/unittests/Analysis/CMakeLists.txt
index 91a129edbc145..c21ad3afa18a3 100644
--- a/llvm/unittests/Analysis/CMakeLists.txt
+++ b/llvm/unittests/Analysis/CMakeLists.txt
@@ -22,7 +22,6 @@ set(ANALYSIS_TEST_SOURCES
CGSCCPassManagerTest.cpp
ConstraintSystemTest.cpp
DDGTest.cpp
- DivergenceAnalysisTest.cpp
DomTreeUpdaterTest.cpp
GlobalsModRefTest.cpp
FunctionPropertiesAnalysisTest.cpp
diff --git a/llvm/unittests/Analysis/DivergenceAnalysisTest.cpp b/llvm/unittests/Analysis/DivergenceAnalysisTest.cpp
deleted file mode 100644
index 0737e7773fb64..0000000000000
--- a/llvm/unittests/Analysis/DivergenceAnalysisTest.cpp
+++ /dev/null
@@ -1,430 +0,0 @@
-//===- DivergenceAnalysisTest.cpp - DivergenceAnalysis unit tests ---------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/DivergenceAnalysis.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/PostDominators.h"
-#include "llvm/Analysis/SyncDependenceAnalysis.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/AsmParser/Parser.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/LegacyPassManager.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Verifier.h"
-#include "llvm/Support/SourceMgr.h"
-#include "gtest/gtest.h"
-
-namespace llvm {
-namespace {
-
-BasicBlock *GetBlockByName(StringRef BlockName, Function &F) {
- for (auto &BB : F) {
- if (BB.getName() != BlockName)
- continue;
- return &BB;
- }
- return nullptr;
-}
-
-// We use this fixture to ensure that we clean up DivergenceAnalysisImpl before
-// deleting the PassManager.
-class DivergenceAnalysisTest : public testing::Test {
-protected:
- LLVMContext Context;
- Module M;
- TargetLibraryInfoImpl TLII;
- TargetLibraryInfo TLI;
-
- std::unique_ptr<DominatorTree> DT;
- std::unique_ptr<PostDominatorTree> PDT;
- std::unique_ptr<LoopInfo> LI;
- std::unique_ptr<SyncDependenceAnalysis> SDA;
-
- DivergenceAnalysisTest() : M("", Context), TLII(), TLI(TLII) {}
-
- DivergenceAnalysisImpl buildDA(Function &F, bool IsLCSSA) {
- DT.reset(new DominatorTree(F));
- PDT.reset(new PostDominatorTree(F));
- LI.reset(new LoopInfo(*DT));
- SDA.reset(new SyncDependenceAnalysis(*DT, *PDT, *LI));
- return DivergenceAnalysisImpl(F, nullptr, *DT, *LI, *SDA, IsLCSSA);
- }
-
- void runWithDA(
- Module &M, StringRef FuncName, bool IsLCSSA,
- function_ref<void(Function &F, LoopInfo &LI, DivergenceAnalysisImpl &DA)>
- Test) {
- auto *F = M.getFunction(FuncName);
- ASSERT_NE(F, nullptr) << "Could not find " << FuncName;
- DivergenceAnalysisImpl DA = buildDA(*F, IsLCSSA);
- Test(*F, *LI, DA);
- }
-};
-
-// Simple initial state test
-TEST_F(DivergenceAnalysisTest, DAInitialState) {
- IntegerType *IntTy = IntegerType::getInt32Ty(Context);
- FunctionType *FTy =
- FunctionType::get(Type::getVoidTy(Context), {IntTy}, false);
- Function *F = Function::Create(FTy, Function::ExternalLinkage, "f", M);
- BasicBlock *BB = BasicBlock::Create(Context, "entry", F);
- ReturnInst::Create(Context, nullptr, BB);
-
- DivergenceAnalysisImpl DA = buildDA(*F, false);
-
- // Whole function region
- EXPECT_EQ(DA.getRegionLoop(), nullptr);
-
- // No divergence in initial state
- EXPECT_FALSE(DA.hasDetectedDivergence());
-
- // No spurious divergence
- DA.compute();
- EXPECT_FALSE(DA.hasDetectedDivergence());
-
- // Detected divergence after marking
- Argument &arg = *F->arg_begin();
- DA.markDivergent(arg);
-
- EXPECT_TRUE(DA.hasDetectedDivergence());
- EXPECT_TRUE(DA.isDivergent(arg));
-
- DA.compute();
- EXPECT_TRUE(DA.hasDetectedDivergence());
- EXPECT_TRUE(DA.isDivergent(arg));
-}
-
-TEST_F(DivergenceAnalysisTest, DANoLCSSA) {
- LLVMContext C;
- SMDiagnostic Err;
-
- std::unique_ptr<Module> M = parseAssemblyString(
- "target datalayout = \"e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128\" "
- " "
- "define i32 @f_1(i8* nocapture %arr, i32 %n, i32* %A, i32* %B) "
- " local_unnamed_addr { "
- "entry: "
- " br label %loop.ph "
- " "
- "loop.ph: "
- " br label %loop "
- " "
- "loop: "
- " %iv0 = phi i32 [ %iv0.inc, %loop ], [ 0, %loop.ph ] "
- " %iv1 = phi i32 [ %iv1.inc, %loop ], [ -2147483648, %loop.ph ] "
- " %iv0.inc = add i32 %iv0, 1 "
- " %iv1.inc = add i32 %iv1, 3 "
- " %cond.cont = icmp slt i32 %iv0, %n "
- " br i1 %cond.cont, label %loop, label %for.end.loopexit "
- " "
- "for.end.loopexit: "
- " ret i32 %iv0 "
- "} ",
- Err, C);
-
- Function *F = M->getFunction("f_1");
- DivergenceAnalysisImpl DA = buildDA(*F, false);
- EXPECT_FALSE(DA.hasDetectedDivergence());
-
- auto ItArg = F->arg_begin();
- ItArg++;
- auto &NArg = *ItArg;
-
- // Seed divergence in argument %n
- DA.markDivergent(NArg);
-
- DA.compute();
- EXPECT_TRUE(DA.hasDetectedDivergence());
-
- // Verify that "ret %iv.0" is divergent
- auto ItBlock = F->begin();
- std::advance(ItBlock, 3);
- auto &ExitBlock = *GetBlockByName("for.end.loopexit", *F);
- auto &RetInst = *cast<ReturnInst>(ExitBlock.begin());
- EXPECT_TRUE(DA.isDivergent(RetInst));
-}
-
-TEST_F(DivergenceAnalysisTest, DALCSSA) {
- LLVMContext C;
- SMDiagnostic Err;
-
- std::unique_ptr<Module> M = parseAssemblyString(
- "target datalayout = \"e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128\" "
- " "
- "define i32 @f_lcssa(i8* nocapture %arr, i32 %n, i32* %A, i32* %B) "
- " local_unnamed_addr { "
- "entry: "
- " br label %loop.ph "
- " "
- "loop.ph: "
- " br label %loop "
- " "
- "loop: "
- " %iv0 = phi i32 [ %iv0.inc, %loop ], [ 0, %loop.ph ] "
- " %iv1 = phi i32 [ %iv1.inc, %loop ], [ -2147483648, %loop.ph ] "
- " %iv0.inc = add i32 %iv0, 1 "
- " %iv1.inc = add i32 %iv1, 3 "
- " %cond.cont = icmp slt i32 %iv0, %n "
- " br i1 %cond.cont, label %loop, label %for.end.loopexit "
- " "
- "for.end.loopexit: "
- " %val.ret = phi i32 [ %iv0, %loop ] "
- " br label %detached.return "
- " "
- "detached.return: "
- " ret i32 %val.ret "
- "} ",
- Err, C);
-
- Function *F = M->getFunction("f_lcssa");
- DivergenceAnalysisImpl DA = buildDA(*F, true);
- EXPECT_FALSE(DA.hasDetectedDivergence());
-
- auto ItArg = F->arg_begin();
- ItArg++;
- auto &NArg = *ItArg;
-
- // Seed divergence in argument %n
- DA.markDivergent(NArg);
-
- DA.compute();
- EXPECT_TRUE(DA.hasDetectedDivergence());
-
- // Verify that "ret %iv.0" is divergent
- auto ItBlock = F->begin();
- std::advance(ItBlock, 4);
- auto &ExitBlock = *GetBlockByName("detached.return", *F);
- auto &RetInst = *cast<ReturnInst>(ExitBlock.begin());
- EXPECT_TRUE(DA.isDivergent(RetInst));
-}
-
-TEST_F(DivergenceAnalysisTest, DAJoinDivergence) {
- LLVMContext C;
- SMDiagnostic Err;
-
- std::unique_ptr<Module> M = parseAssemblyString(
- "target datalayout = \"e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128\" "
- " "
- "define void @f_1(i1 %a, i1 %b, i1 %c) "
- " local_unnamed_addr { "
- "A: "
- " br i1 %a, label %B, label %C "
- " "
- "B: "
- " br i1 %b, label %C, label %D "
- " "
- "C: "
- " %c.join = phi i32 [ 0, %A ], [ 1, %B ] "
- " br i1 %c, label %D, label %E "
- " "
- "D: "
- " %d.join = phi i32 [ 0, %B ], [ 1, %C ] "
- " br label %E "
- " "
- "E: "
- " %e.join = phi i32 [ 0, %C ], [ 1, %D ] "
- " ret void "
- "} "
- " "
- "define void @f_2(i1 %a, i1 %b, i1 %c) "
- " local_unnamed_addr { "
- "A: "
- " br i1 %a, label %B, label %E "
- " "
- "B: "
- " br i1 %b, label %C, label %D "
- " "
- "C: "
- " br label %D "
- " "
- "D: "
- " %d.join = phi i32 [ 0, %B ], [ 1, %C ] "
- " br label %E "
- " "
- "E: "
- " %e.join = phi i32 [ 0, %A ], [ 1, %D ] "
- " ret void "
- "} "
- " "
- "define void @f_3(i1 %a, i1 %b, i1 %c)"
- " local_unnamed_addr { "
- "A: "
- " br i1 %a, label %B, label %C "
- " "
- "B: "
- " br label %C "
- " "
- "C: "
- " %c.join = phi i32 [ 0, %A ], [ 1, %B ] "
- " br i1 %c, label %D, label %E "
- " "
- "D: "
- " br label %E "
- " "
- "E: "
- " %e.join = phi i32 [ 0, %C ], [ 1, %D ] "
- " ret void "
- "} ",
- Err, C);
-
- // Maps divergent conditions to the basic blocks whose Phi nodes become
- // divergent. Blocks need to be listed in IR order.
- using SmallBlockVec = SmallVector<const BasicBlock *, 4>;
- using InducedDivJoinMap = std::map<const Value *, SmallBlockVec>;
-
- // Actual function performing the checks.
- auto CheckDivergenceFunc = [this](Function &F,
- InducedDivJoinMap &ExpectedDivJoins) {
- for (auto &ItCase : ExpectedDivJoins) {
- auto *DivVal = ItCase.first;
- auto DA = buildDA(F, false);
- DA.markDivergent(*DivVal);
- DA.compute();
-
- // List of basic blocks that shall host divergent Phi nodes.
- auto ItDivJoins = ItCase.second.begin();
-
- for (auto &BB : F) {
- auto *Phi = dyn_cast<PHINode>(BB.begin());
- if (!Phi)
- continue;
-
- if (ItDivJoins != ItCase.second.end() && &BB == *ItDivJoins) {
- EXPECT_TRUE(DA.isDivergent(*Phi));
- // Advance to next block with expected divergent PHI node.
- ++ItDivJoins;
- } else {
- EXPECT_FALSE(DA.isDivergent(*Phi));
- }
- }
- }
- };
-
- {
- auto *F = M->getFunction("f_1");
- auto ItBlocks = F->begin();
- ItBlocks++; // Skip A
- ItBlocks++; // Skip B
- auto *C = &*ItBlocks++;
- auto *D = &*ItBlocks++;
- auto *E = &*ItBlocks;
-
- auto ItArg = F->arg_begin();
- auto *AArg = &*ItArg++;
- auto *BArg = &*ItArg++;
- auto *CArg = &*ItArg;
-
- InducedDivJoinMap DivJoins;
- DivJoins.emplace(AArg, SmallBlockVec({C, D, E}));
- DivJoins.emplace(BArg, SmallBlockVec({D, E}));
- DivJoins.emplace(CArg, SmallBlockVec({E}));
-
- CheckDivergenceFunc(*F, DivJoins);
- }
-
- {
- auto *F = M->getFunction("f_2");
- auto ItBlocks = F->begin();
- ItBlocks++; // Skip A
- ItBlocks++; // Skip B
- ItBlocks++; // Skip C
- auto *D = &*ItBlocks++;
- auto *E = &*ItBlocks;
-
- auto ItArg = F->arg_begin();
- auto *AArg = &*ItArg++;
- auto *BArg = &*ItArg++;
- auto *CArg = &*ItArg;
-
- InducedDivJoinMap DivJoins;
- DivJoins.emplace(AArg, SmallBlockVec({E}));
- DivJoins.emplace(BArg, SmallBlockVec({D}));
- DivJoins.emplace(CArg, SmallBlockVec({}));
-
- CheckDivergenceFunc(*F, DivJoins);
- }
-
- {
- auto *F = M->getFunction("f_3");
- auto ItBlocks = F->begin();
- ItBlocks++; // Skip A
- ItBlocks++; // Skip B
- auto *C = &*ItBlocks++;
- ItBlocks++; // Skip D
- auto *E = &*ItBlocks;
-
- auto ItArg = F->arg_begin();
- auto *AArg = &*ItArg++;
- auto *BArg = &*ItArg++;
- auto *CArg = &*ItArg;
-
- InducedDivJoinMap DivJoins;
- DivJoins.emplace(AArg, SmallBlockVec({C}));
- DivJoins.emplace(BArg, SmallBlockVec({}));
- DivJoins.emplace(CArg, SmallBlockVec({E}));
-
- CheckDivergenceFunc(*F, DivJoins);
- }
-}
-
-TEST_F(DivergenceAnalysisTest, DASwitchUnreachableDefault) {
- LLVMContext C;
- SMDiagnostic Err;
-
- std::unique_ptr<Module> M = parseAssemblyString(
- "target datalayout = \"e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128\" "
- " "
- "define void @switch_unreachable_default(i32 %cond) local_unnamed_addr { "
- "entry: "
- " switch i32 %cond, label %sw.default [ "
- " i32 0, label %sw.bb0 "
- " i32 1, label %sw.bb1 "
- " ] "
- " "
- "sw.bb0: "
- " br label %sw.epilog "
- " "
- "sw.bb1: "
- " br label %sw.epilog "
- " "
- "sw.default: "
- " unreachable "
- " "
- "sw.epilog: "
- " %div.dbl = phi double [ 0.0, %sw.bb0], [ -1.0, %sw.bb1 ] "
- " ret void "
- "}",
- Err, C);
-
- auto *F = M->getFunction("switch_unreachable_default");
- auto &CondArg = *F->arg_begin();
- auto DA = buildDA(*F, false);
-
- EXPECT_FALSE(DA.hasDetectedDivergence());
-
- DA.markDivergent(CondArg);
- DA.compute();
-
- // Still %CondArg is divergent.
- EXPECT_TRUE(DA.hasDetectedDivergence());
-
- // The join uni.dbl is not divergent (see D52221)
- auto &ExitBlock = *GetBlockByName("sw.epilog", *F);
- auto &DivDblPhi = *cast<PHINode>(ExitBlock.begin());
- EXPECT_TRUE(DA.isDivergent(DivDblPhi));
-}
-
-} // end anonymous namespace
-} // end namespace llvm
diff --git a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn
index 23dd1cbd79c36..32bd748621976 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn
@@ -45,7 +45,6 @@ static_library("Analysis") {
"DependenceAnalysis.cpp",
"DependenceGraphBuilder.cpp",
"DevelopmentModeInlineAdvisor.cpp",
- "DivergenceAnalysis.cpp",
"DomPrinter.cpp",
"DomTreeUpdater.cpp",
"DominanceFrontier.cpp",
@@ -72,7 +71,6 @@ static_library("Analysis") {
"LazyBranchProbabilityInfo.cpp",
"LazyCallGraph.cpp",
"LazyValueInfo.cpp",
- "LegacyDivergenceAnalysis.cpp",
"Lint.cpp",
"Loads.cpp",
"Local.cpp",
@@ -118,7 +116,6 @@ static_library("Analysis") {
"ScopedNoAliasAA.cpp",
"StackLifetime.cpp",
"StackSafetyAnalysis.cpp",
- "SyncDependenceAnalysis.cpp",
"SyntheticCountsUtils.cpp",
"TFLiteUtils.cpp",
"TargetLibraryInfo.cpp",
diff --git a/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn
index dbeed697ce497..c24f99555ce36 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn
@@ -24,7 +24,6 @@ unittest("AnalysisTests") {
"CaptureTrackingTest.cpp",
"ConstraintSystemTest.cpp",
"DDGTest.cpp",
- "DivergenceAnalysisTest.cpp",
"DomTreeUpdaterTest.cpp",
"FunctionPropertiesAnalysisTest.cpp",
"GlobalsModRefTest.cpp",
More information about the llvm-commits
mailing list