[llvm] r348048 - [DA] GPUDivergenceAnalysis for unstructured GPU kernels
Nicolai Haehnle via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 30 14:55:20 PST 2018
Author: nha
Date: Fri Nov 30 14:55:20 2018
New Revision: 348048
URL: http://llvm.org/viewvc/llvm-project?rev=348048&view=rev
Log:
[DA] GPUDivergenceAnalysis for unstructured GPU kernels
Summary:
This is patch #3 of the new DivergenceAnalysis
<https://lists.llvm.org/pipermail/llvm-dev/2018-May/123606.html>
The GPUDivergenceAnalysis is intended to eventually supersede the existing
LegacyDivergenceAnalysis. The existing LegacyDivergenceAnalysis produces
incorrect results on unstructured Control-Flow Graphs:
<https://bugs.llvm.org/show_bug.cgi?id=37185>
This patch adds the option -use-gpu-divergence-analysis to the
LegacyDivergenceAnalysis to turn it into a transparent wrapper for the
GPUDivergenceAnalysis.
Reviewers: nhaehnle
Reviewed By: nhaehnle
Subscribers: jholewinski, jvesely, jfb, llvm-commits, alex-t, sameerds, arsenm, nhaehnle
Differential Revision: https://reviews.llvm.org/D53493
Added:
llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/always_uniform.ll
llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/atomics.ll
llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_diverge.ll
llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_loopdiverge.ll
llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/intrinsics.ll
llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible.ll
llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll
llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/lit.local.cfg
llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll
llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll
llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/no-return-blocks.ll
llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/phi-undef.ll
llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/temporal_diverge.ll
llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/workitem-intrinsics.ll
llvm/trunk/test/Analysis/DivergenceAnalysis/NVPTX/daorder.ll
llvm/trunk/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll
llvm/trunk/test/Analysis/DivergenceAnalysis/NVPTX/hidden_diverge.ll
llvm/trunk/test/Analysis/DivergenceAnalysis/NVPTX/irreducible.ll
llvm/trunk/test/Analysis/DivergenceAnalysis/NVPTX/lit.local.cfg
Modified:
llvm/trunk/include/llvm/Analysis/DivergenceAnalysis.h
llvm/trunk/include/llvm/Analysis/LegacyDivergenceAnalysis.h
llvm/trunk/lib/Analysis/DivergenceAnalysis.cpp
llvm/trunk/lib/Analysis/LegacyDivergenceAnalysis.cpp
Modified: llvm/trunk/include/llvm/Analysis/DivergenceAnalysis.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/DivergenceAnalysis.h?rev=348048&r1=348047&r2=348048&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Analysis/DivergenceAnalysis.h (original)
+++ llvm/trunk/include/llvm/Analysis/DivergenceAnalysis.h Fri Nov 30 14:55:20 2018
@@ -173,6 +173,33 @@ private:
std::vector<const Instruction *> Worklist;
};
+/// \brief Divergence analysis frontend for GPU kernels.
+class GPUDivergenceAnalysis {
+ SyncDependenceAnalysis SDA;
+ DivergenceAnalysis DA;
+
+public:
+ /// Runs the divergence analysis on @F, a GPU kernel
+ GPUDivergenceAnalysis(Function &F, const DominatorTree &DT,
+ const PostDominatorTree &PDT, const LoopInfo &LI,
+ const TargetTransformInfo &TTI);
+
+ /// Whether any divergence was detected.
+ bool hasDivergence() const { return DA.hasDetectedDivergence(); }
+
+ /// The GPU kernel this analysis result is for
+ const Function &getFunction() const { return DA.getFunction(); }
+
+ /// Whether \p V is divergent.
+ bool isDivergent(const Value &V) const;
+
+ /// Whether \p V is uniform/non-divergent
+ bool isUniform(const Value &V) const { return !isDivergent(V); }
+
+ /// Print all divergent values in the kernel.
+ void print(raw_ostream &OS, const Module *) const;
+};
+
} // namespace llvm
#endif // LLVM_ANALYSIS_DIVERGENCE_ANALYSIS_H
Modified: llvm/trunk/include/llvm/Analysis/LegacyDivergenceAnalysis.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/LegacyDivergenceAnalysis.h?rev=348048&r1=348047&r2=348048&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Analysis/LegacyDivergenceAnalysis.h (original)
+++ llvm/trunk/include/llvm/Analysis/LegacyDivergenceAnalysis.h Fri Nov 30 14:55:20 2018
@@ -19,9 +19,11 @@
#include "llvm/ADT/DenseSet.h"
#include "llvm/IR/Function.h"
#include "llvm/Pass.h"
+#include "llvm/Analysis/DivergenceAnalysis.h"
namespace llvm {
class Value;
+class GPUDivergenceAnalysis;
class LegacyDivergenceAnalysis : public FunctionPass {
public:
static char ID;
@@ -41,7 +43,7 @@ public:
//
// Even if this function returns false, V may still be divergent when used
// in a different basic block.
- bool isDivergent(const Value *V) const { return DivergentValues.count(V); }
+ bool isDivergent(const Value *V) const;
// Returns true if V is uniform/non-divergent.
//
@@ -53,6 +55,12 @@ public:
void removeValue(const Value *V) { DivergentValues.erase(V); }
private:
+ // Whether analysis should be performed by GPUDivergenceAnalysis.
+ bool shouldUseGPUDivergenceAnalysis(const Function &F) const;
+
+ // (optional) handle to new DivergenceAnalysis
+ std::unique_ptr<GPUDivergenceAnalysis> gpuDA;
+
// Stores all divergent values.
DenseSet<const Value *> DivergentValues;
};
Modified: llvm/trunk/lib/Analysis/DivergenceAnalysis.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/DivergenceAnalysis.cpp?rev=348048&r1=348047&r2=348048&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/DivergenceAnalysis.cpp (original)
+++ llvm/trunk/lib/Analysis/DivergenceAnalysis.cpp Fri Nov 30 14:55:20 2018
@@ -422,3 +422,36 @@ void DivergenceAnalysis::print(raw_ostre
OS << "DIVERGENT:" << I << '\n';
}
}
+
+// class GPUDivergenceAnalysis
+GPUDivergenceAnalysis::GPUDivergenceAnalysis(Function &F,
+ const DominatorTree &DT,
+ const PostDominatorTree &PDT,
+ const LoopInfo &LI,
+ const TargetTransformInfo &TTI)
+ : SDA(DT, PDT, LI), DA(F, nullptr, DT, LI, SDA, false) {
+ for (auto &I : instructions(F)) {
+ if (TTI.isSourceOfDivergence(&I)) {
+ DA.markDivergent(I);
+ } else if (TTI.isAlwaysUniform(&I)) {
+ DA.addUniformOverride(I);
+ }
+ }
+ for (auto &Arg : F.args()) {
+ if (TTI.isSourceOfDivergence(&Arg)) {
+ DA.markDivergent(Arg);
+ }
+ }
+
+ DA.compute();
+}
+
+bool GPUDivergenceAnalysis::isDivergent(const Value &val) const {
+ return DA.isDivergent(val);
+}
+
+void GPUDivergenceAnalysis::print(raw_ostream &OS, const Module *mod) const {
+ OS << "Divergence of kernel " << DA.getFunction().getName() << " {\n";
+ DA.print(OS, mod);
+ OS << "}\n";
+}
Modified: llvm/trunk/lib/Analysis/LegacyDivergenceAnalysis.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/LegacyDivergenceAnalysis.cpp?rev=348048&r1=348047&r2=348048&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/LegacyDivergenceAnalysis.cpp (original)
+++ llvm/trunk/lib/Analysis/LegacyDivergenceAnalysis.cpp Fri Nov 30 14:55:20 2018
@@ -1,4 +1,5 @@
-//===- LegacyDivergenceAnalysis.cpp --------- Legacy Divergence Analysis Implementation -==//
+//===- LegacyDivergenceAnalysis.cpp --------- Legacy Divergence Analysis
+//Implementation -==//
//
// The LLVM Compiler Infrastructure
//
@@ -64,6 +65,9 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/DivergenceAnalysis.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/PostDominators.h"
@@ -79,6 +83,12 @@ using namespace llvm;
#define DEBUG_TYPE "divergence"
+// transparently use the GPUDivergenceAnalysis
+static cl::opt<bool> UseGPUDA("use-gpu-divergence-analysis", cl::init(false),
+ cl::Hidden,
+ cl::desc("turn the LegacyDivergenceAnalysis into "
+ "a wrapper for GPUDivergenceAnalysis"));
+
namespace {
class DivergencePropagator {
@@ -262,16 +272,17 @@ void DivergencePropagator::propagate() {
}
}
-} /// end namespace anonymous
+} // namespace
// Register this pass.
char LegacyDivergenceAnalysis::ID = 0;
-INITIALIZE_PASS_BEGIN(LegacyDivergenceAnalysis, "divergence", "Legacy Divergence Analysis",
- false, true)
+INITIALIZE_PASS_BEGIN(LegacyDivergenceAnalysis, "divergence",
+ "Legacy Divergence Analysis", false, true)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
-INITIALIZE_PASS_END(LegacyDivergenceAnalysis, "divergence", "Legacy Divergence Analysis",
- false, true)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_END(LegacyDivergenceAnalysis, "divergence",
+ "Legacy Divergence Analysis", false, true)
FunctionPass *llvm::createLegacyDivergenceAnalysisPass() {
return new LegacyDivergenceAnalysis();
@@ -280,9 +291,24 @@ FunctionPass *llvm::createLegacyDivergen
void LegacyDivergenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<PostDominatorTreeWrapperPass>();
+ if (UseGPUDA)
+ AU.addRequired<LoopInfoWrapperPass>();
AU.setPreservesAll();
}
+bool LegacyDivergenceAnalysis::shouldUseGPUDivergenceAnalysis(
+ const Function &F) const {
+ if (!UseGPUDA)
+ return false;
+
+ // GPUDivergenceAnalysis requires a reducible CFG.
+ auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ using RPOTraversal = ReversePostOrderTraversal<const Function *>;
+ RPOTraversal FuncRPOT(&F);
+ return !containsIrreducibleCFG<const BasicBlock *, const RPOTraversal,
+ const LoopInfo>(FuncRPOT, LI);
+}
+
bool LegacyDivergenceAnalysis::runOnFunction(Function &F) {
auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
if (TTIWP == nullptr)
@@ -295,36 +321,59 @@ bool LegacyDivergenceAnalysis::runOnFunc
return false;
DivergentValues.clear();
+ gpuDA = nullptr;
+
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
- DivergencePropagator DP(F, TTI,
- getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
- PDT, DivergentValues);
- DP.populateWithSourcesOfDivergence();
- DP.propagate();
- LLVM_DEBUG(
- dbgs() << "\nAfter divergence analysis on " << F.getName() << ":\n";
- print(dbgs(), F.getParent())
- );
+
+ if (shouldUseGPUDivergenceAnalysis(F)) {
+ // run the new GPU divergence analysis
+ auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ gpuDA = llvm::make_unique<GPUDivergenceAnalysis>(F, DT, PDT, LI, TTI);
+
+ } else {
+ // run LLVM's existing DivergenceAnalysis
+ DivergencePropagator DP(F, TTI, DT, PDT, DivergentValues);
+ DP.populateWithSourcesOfDivergence();
+ DP.propagate();
+ }
+
+ LLVM_DEBUG(dbgs() << "\nAfter divergence analysis on " << F.getName()
+ << ":\n";
+ print(dbgs(), F.getParent()));
+
return false;
}
+bool LegacyDivergenceAnalysis::isDivergent(const Value *V) const {
+ if (gpuDA) {
+ return gpuDA->isDivergent(*V);
+ }
+ return DivergentValues.count(V);
+}
+
void LegacyDivergenceAnalysis::print(raw_ostream &OS, const Module *) const {
- if (DivergentValues.empty())
+ if ((!gpuDA || !gpuDA->hasDivergence()) && DivergentValues.empty())
return;
- const Value *FirstDivergentValue = *DivergentValues.begin();
+
const Function *F;
- if (const Argument *Arg = dyn_cast<Argument>(FirstDivergentValue)) {
- F = Arg->getParent();
- } else if (const Instruction *I =
- dyn_cast<Instruction>(FirstDivergentValue)) {
- F = I->getParent()->getParent();
- } else {
- llvm_unreachable("Only arguments and instructions can be divergent");
+ if (!DivergentValues.empty()) {
+ const Value *FirstDivergentValue = *DivergentValues.begin();
+ if (const Argument *Arg = dyn_cast<Argument>(FirstDivergentValue)) {
+ F = Arg->getParent();
+ } else if (const Instruction *I =
+ dyn_cast<Instruction>(FirstDivergentValue)) {
+ F = I->getParent()->getParent();
+ } else {
+ llvm_unreachable("Only arguments and instructions can be divergent");
+ }
+ } else if (gpuDA) {
+ F = &gpuDA->getFunction();
}
// Dumps all divergent values in F, arguments and then instructions.
for (auto &Arg : F->args()) {
- OS << (DivergentValues.count(&Arg) ? "DIVERGENT: " : " ");
+ OS << (isDivergent(&Arg) ? "DIVERGENT: " : " ");
OS << Arg << "\n";
}
// Iterate instructions using instructions() to ensure a deterministic order.
@@ -332,7 +381,7 @@ void LegacyDivergenceAnalysis::print(raw
auto &BB = *BI;
OS << "\n " << BB.getName() << ":\n";
for (auto &I : BB.instructionsWithoutDebug()) {
- OS << (DivergentValues.count(&I) ? "DIVERGENT: " : " ");
+ OS << (isDivergent(&I) ? "DIVERGENT: " : " ");
OS << I << "\n";
}
}
Added: llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/always_uniform.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/always_uniform.ll?rev=348048&view=auto
==============================================================================
--- llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/always_uniform.ll (added)
+++ llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/always_uniform.ll Fri Nov 30 14:55:20 2018
@@ -0,0 +1,14 @@
+; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+
+define amdgpu_kernel void @workitem_id_x() #1 {
+ %id.x = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK: DIVERGENT: %id.x = call i32 @llvm.amdgcn.workitem.id.x()
+ %first.lane = call i32 @llvm.amdgcn.readfirstlane(i32 %id.x)
+; CHECK-NOT: DIVERGENT: %first.lane = call i32 @llvm.amdgcn.readfirstlane(i32 %id.x)
+ ret void
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x() #0
+declare i32 @llvm.amdgcn.readfirstlane(i32) #0
+
+attributes #0 = { nounwind readnone }
Added: llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/atomics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/atomics.ll?rev=348048&view=auto
==============================================================================
--- llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/atomics.ll (added)
+++ llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/atomics.ll Fri Nov 30 14:55:20 2018
@@ -0,0 +1,45 @@
+; RUN: opt -mtriple=amdgcn-- -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+
+; CHECK: DIVERGENT: %orig = atomicrmw xchg i32* %ptr, i32 %val seq_cst
+define i32 @test1(i32* %ptr, i32 %val) #0 {
+ %orig = atomicrmw xchg i32* %ptr, i32 %val seq_cst
+ ret i32 %orig
+}
+
+; CHECK: DIVERGENT: %orig = cmpxchg i32* %ptr, i32 %cmp, i32 %new seq_cst seq_cst
+define {i32, i1} @test2(i32* %ptr, i32 %cmp, i32 %new) {
+ %orig = cmpxchg i32* %ptr, i32 %cmp, i32 %new seq_cst seq_cst
+ ret {i32, i1} %orig
+}
+
+; CHECK: DIVERGENT: %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 %val, i32 0, i32 0, i1 false)
+define i32 @test_atomic_inc_i32(i32 addrspace(1)* %ptr, i32 %val) #0 {
+ %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 %val, i32 0, i32 0, i1 false)
+ ret i32 %ret
+}
+
+; CHECK: DIVERGENT: %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 %val, i32 0, i32 0, i1 false)
+define i64 @test_atomic_inc_i64(i64 addrspace(1)* %ptr, i64 %val) #0 {
+ %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 %val, i32 0, i32 0, i1 false)
+ ret i64 %ret
+}
+
+; CHECK: DIVERGENT: %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %ptr, i32 %val, i32 0, i32 0, i1 false)
+define i32 @test_atomic_dec_i32(i32 addrspace(1)* %ptr, i32 %val) #0 {
+ %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %ptr, i32 %val, i32 0, i32 0, i1 false)
+ ret i32 %ret
+}
+
+; CHECK: DIVERGENT: %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %ptr, i64 %val, i32 0, i32 0, i1 false)
+define i64 @test_atomic_dec_i64(i64 addrspace(1)* %ptr, i64 %val) #0 {
+ %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %ptr, i64 %val, i32 0, i32 0, i1 false)
+ ret i64 %ret
+}
+
+declare i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* nocapture, i32, i32, i32, i1) #1
+declare i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* nocapture, i64, i32, i32, i1) #1
+declare i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* nocapture, i32, i32, i32, i1) #1
+declare i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* nocapture, i64, i32, i32, i1) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind argmemonly }
Added: llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_diverge.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_diverge.ll?rev=348048&view=auto
==============================================================================
--- llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_diverge.ll (added)
+++ llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_diverge.ll Fri Nov 30 14:55:20 2018
@@ -0,0 +1,26 @@
+; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+
+define amdgpu_kernel void @hidden_diverge(i32 %n, i32 %a, i32 %b) #0 {
+; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'hidden_diverge'
+entry:
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %cond.var = icmp slt i32 %tid, 0
+ br i1 %cond.var, label %B, label %C ; divergent
+; CHECK: DIVERGENT: br i1 %cond.var,
+B:
+ %cond.uni = icmp slt i32 %n, 0
+ br i1 %cond.uni, label %C, label %merge ; uniform
+; CHECK-NOT: DIVERGENT: br i1 %cond.uni,
+C:
+ %phi.var.hidden = phi i32 [ 1, %entry ], [ 2, %B ]
+; CHECK: DIVERGENT: %phi.var.hidden = phi i32
+ br label %merge
+merge:
+ %phi.ipd = phi i32 [ %a, %B ], [ %b, %C ]
+; CHECK: DIVERGENT: %phi.ipd = phi i32
+ ret void
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x() #0
+
+attributes #0 = { nounwind readnone }
Added: llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_loopdiverge.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_loopdiverge.ll?rev=348048&view=auto
==============================================================================
--- llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_loopdiverge.ll (added)
+++ llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_loopdiverge.ll Fri Nov 30 14:55:20 2018
@@ -0,0 +1,223 @@
+; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+
+; divergent loop (H<header><exiting to X>, B<exiting to Y>)
+; the divergent join point in %exit is obscured by uniform control joining in %X
+define amdgpu_kernel void @hidden_loop_diverge(i32 %n, i32 %a, i32 %b) #0 {
+; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'hidden_loop_diverge':
+; CHECK-NOT: DIVERGENT: %uni.
+; CHECK-NOT: DIVERGENT: br i1 %uni.
+
+entry:
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %uni.cond = icmp slt i32 %a, 0
+ br i1 %uni.cond, label %X, label %H ; uniform
+
+H:
+ %uni.merge.h = phi i32 [ 0, %entry ], [ %uni.inc, %B ]
+ %div.exitx = icmp slt i32 %tid, 0
+ br i1 %div.exitx, label %X, label %B ; divergent branch
+; CHECK: DIVERGENT: %div.exitx =
+; CHECK: DIVERGENT: br i1 %div.exitx,
+
+B:
+ %uni.inc = add i32 %uni.merge.h, 1
+ %div.exity = icmp sgt i32 %tid, 0
+ br i1 %div.exity, label %Y, label %H ; divergent branch
+; CHECK: DIVERGENT: %div.exity =
+; CHECK: DIVERGENT: br i1 %div.exity,
+
+X:
+ %div.merge.x = phi i32 [ %a, %entry ], [ %uni.merge.h, %H ] ; temporal divergent phi
+ br i1 %uni.cond, label %Y, label %exit
+; CHECK: DIVERGENT: %div.merge.x =
+
+Y:
+ %div.merge.y = phi i32 [ 42, %X ], [ %b, %B ]
+ br label %exit
+; CHECK: DIVERGENT: %div.merge.y =
+
+exit:
+ %div.merge.exit = phi i32 [ %a, %X ], [ %b, %Y ]
+ ret void
+; CHECK: DIVERGENT: %div.merge.exit =
+}
+
+; divergent loop (H<header><exiting to X>, B<exiting to Y>)
+; the phi nodes in X and Y don't actually receive divergent values
+define amdgpu_kernel void @unobserved_loop_diverge(i32 %n, i32 %a, i32 %b) #0 {
+; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'unobserved_loop_diverge':
+; CHECK-NOT: DIVERGENT: %uni.
+; CHECK-NOT: DIVERGENT: br i1 %uni.
+
+entry:
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %uni.cond = icmp slt i32 %a, 0
+ br i1 %uni.cond, label %X, label %H ; uniform
+
+H:
+ %uni.merge.h = phi i32 [ 0, %entry ], [ %uni.inc, %B ]
+ %div.exitx = icmp slt i32 %tid, 0
+ br i1 %div.exitx, label %X, label %B ; divergent branch
+; CHECK: DIVERGENT: %div.exitx =
+; CHECK: DIVERGENT: br i1 %div.exitx,
+
+B:
+ %uni.inc = add i32 %uni.merge.h, 1
+ %div.exity = icmp sgt i32 %tid, 0
+ br i1 %div.exity, label %Y, label %H ; divergent branch
+; CHECK: DIVERGENT: %div.exity =
+; CHECK: DIVERGENT: br i1 %div.exity,
+
+X:
+ %uni.merge.x = phi i32 [ %a, %entry ], [ %b, %H ]
+ br label %exit
+
+Y:
+ %uni.merge.y = phi i32 [ %b, %B ]
+ br label %exit
+
+exit:
+ %div.merge.exit = phi i32 [ %a, %X ], [ %b, %Y ]
+ ret void
+; CHECK: DIVERGENT: %div.merge.exit =
+}
+
+; divergent loop (G<header>, L<exiting to D>) inside divergent loop (H<header>, B<exiting to X>, C<exiting to Y>, D, G, L)
+; the inner loop has no exit to top level.
+; the outer loop becomes divergent as its exiting branch in C is control-dependent on the inner loop's divergent loop exit in D.
+define amdgpu_kernel void @hidden_nestedloop_diverge(i32 %n, i32 %a, i32 %b) #0 {
+; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'hidden_nestedloop_diverge':
+; CHECK-NOT: DIVERGENT: %uni.
+; CHECK-NOT: DIVERGENT: br i1 %uni.
+
+entry:
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %uni.cond = icmp slt i32 %a, 0
+ %div.exitx = icmp slt i32 %tid, 0
+ br i1 %uni.cond, label %X, label %H
+
+H:
+ %uni.merge.h = phi i32 [ 0, %entry ], [ %uni.inc, %D ]
+ br i1 %uni.cond, label %G, label %B
+; CHECK: DIVERGENT: %div.exitx =
+
+B:
+ br i1 %uni.cond, label %X, label %C
+
+C:
+ br i1 %uni.cond, label %Y, label %D
+
+D:
+ %uni.inc = add i32 %uni.merge.h, 1
+ br label %H
+
+G:
+ br i1 %div.exitx, label %C, label %L
+; CHECK: DIVERGENT: br i1 %div.exitx,
+
+L:
+ br i1 %uni.cond, label %D, label %G
+
+X:
+ %div.merge.x = phi i32 [ %a, %entry ], [ %uni.merge.h, %B ] ; temporal divergent phi
+ br i1 %uni.cond, label %Y, label %exit
+; CHECK: DIVERGENT: %div.merge.x =
+
+Y:
+ %div.merge.y = phi i32 [ 42, %X ], [ %b, %C ]
+ br label %exit
+; CHECK: DIVERGENT: %div.merge.y =
+
+exit:
+ %div.merge.exit = phi i32 [ %a, %X ], [ %b, %Y ]
+ ret void
+; CHECK: DIVERGENT: %div.merge.exit =
+}
+
+; divergent loop (G<header>, L<exiting to X>) in divergent loop (H<header>, B<exiting to C>, C, G, L)
+; the outer loop has no immediately divergent exiting edge.
+; the inner exiting edge is exiting to top-level through the outer loop causing both to become divergent.
+define amdgpu_kernel void @hidden_doublebreak_diverge(i32 %n, i32 %a, i32 %b) #0 {
+; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'hidden_doublebreak_diverge':
+; CHECK-NOT: DIVERGENT: %uni.
+; CHECK-NOT: DIVERGENT: br i1 %uni.
+
+entry:
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %uni.cond = icmp slt i32 %a, 0
+ %div.exitx = icmp slt i32 %tid, 0
+ br i1 %uni.cond, label %X, label %H
+
+H:
+ %uni.merge.h = phi i32 [ 0, %entry ], [ %uni.inc, %C ]
+ br i1 %uni.cond, label %G, label %B
+; CHECK: DIVERGENT: %div.exitx =
+
+B:
+ br i1 %uni.cond, label %Y, label %C
+
+C:
+ %uni.inc = add i32 %uni.merge.h, 1
+ br label %H
+
+G:
+ br i1 %div.exitx, label %X, label %L ; two-level break
+; CHECK: DIVERGENT: br i1 %div.exitx,
+
+L:
+ br i1 %uni.cond, label %C, label %G
+
+X:
+ %div.merge.x = phi i32 [ %a, %entry ], [ %uni.merge.h, %G ] ; temporal divergence
+ br label %Y
+; CHECK: DIVERGENT: %div.merge.x =
+
+Y:
+ %div.merge.y = phi i32 [ 42, %X ], [ %b, %B ]
+ ret void
+; CHECK: DIVERGENT: %div.merge.y =
+}
+
+; divergent loop (G<header>, L<exiting to D>) contained inside a uniform loop (H<header>, B, G, L , D<exiting to x>)
+define amdgpu_kernel void @hidden_containedloop_diverge(i32 %n, i32 %a, i32 %b) #0 {
+; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'hidden_containedloop_diverge':
+; CHECK-NOT: DIVERGENT: %uni.
+; CHECK-NOT: DIVERGENT: br i1 %uni.
+
+entry:
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %uni.cond = icmp slt i32 %a, 0
+ %div.exitx = icmp slt i32 %tid, 0
+ br i1 %uni.cond, label %X, label %H
+
+H:
+ %uni.merge.h = phi i32 [ 0, %entry ], [ %uni.inc.d, %D ]
+ br i1 %uni.cond, label %G, label %B
+; CHECK: DIVERGENT: %div.exitx =
+
+B:
+ %div.merge.b = phi i32 [ 42, %H ], [ %uni.merge.g, %G ]
+ br label %D
+; CHECK: DIVERGENT: %div.merge.b =
+
+G:
+ %uni.merge.g = phi i32 [ 123, %H ], [ %uni.inc.l, %L ]
+ br i1 %div.exitx, label %B, label %L
+; CHECK: DIVERGENT: br i1 %div.exitx,
+
+L:
+ %uni.inc.l = add i32 %uni.merge.g, 1
+ br i1 %uni.cond, label %G, label %D
+
+D:
+ %uni.inc.d = add i32 %uni.merge.h, 1
+ br i1 %uni.cond, label %X, label %H
+
+X:
+ %uni.merge.x = phi i32 [ %a, %entry ], [ %uni.inc.d, %D ]
+ ret void
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x() #0
+
+attributes #0 = { nounwind readnone }
Added: llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/intrinsics.ll?rev=348048&view=auto
==============================================================================
--- llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/intrinsics.ll (added)
+++ llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/intrinsics.ll Fri Nov 30 14:55:20 2018
@@ -0,0 +1,13 @@
+; RUN: opt -mtriple=amdgcn-- -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+
+; CHECK: DIVERGENT: %swizzle = call i32 @llvm.amdgcn.ds.swizzle(i32 %src, i32 100) #0
+define amdgpu_kernel void @ds_swizzle(i32 addrspace(1)* %out, i32 %src) #0 {
+ %swizzle = call i32 @llvm.amdgcn.ds.swizzle(i32 %src, i32 100) #0
+ store i32 %swizzle, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+declare i32 @llvm.amdgcn.ds.swizzle(i32, i32) #1
+
+attributes #0 = { nounwind convergent }
+attributes #1 = { nounwind readnone convergent }
Added: llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible.ll?rev=348048&view=auto
==============================================================================
--- llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible.ll (added)
+++ llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible.ll Fri Nov 30 14:55:20 2018
@@ -0,0 +1,48 @@
+; RUN: opt -mtriple=amdgcn-- -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+
+; This test contains an unstructured loop.
+; +-------------- entry ----------------+
+; | |
+; V V
+; i1 = phi(0, i3) i2 = phi(0, i3)
+; j1 = i1 + 1 ---> i3 = phi(j1, j2) <--- j2 = i2 + 2
+; ^ | ^
+; | V |
+; +-------- switch (tid / i3) ----------+
+; |
+; V
+; if (i3 == 5) // divergent
+; because sync dependent on (tid / i3).
+define i32 @unstructured_loop(i1 %entry_cond) {
+; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'unstructured_loop'
+entry:
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ br i1 %entry_cond, label %loop_entry_1, label %loop_entry_2
+loop_entry_1:
+ %i1 = phi i32 [ 0, %entry ], [ %i3, %loop_latch ]
+ %j1 = add i32 %i1, 1
+ br label %loop_body
+loop_entry_2:
+ %i2 = phi i32 [ 0, %entry ], [ %i3, %loop_latch ]
+ %j2 = add i32 %i2, 2
+ br label %loop_body
+loop_body:
+ %i3 = phi i32 [ %j1, %loop_entry_1 ], [ %j2, %loop_entry_2 ]
+ br label %loop_latch
+loop_latch:
+ %div = sdiv i32 %tid, %i3
+ switch i32 %div, label %branch [ i32 1, label %loop_entry_1
+ i32 2, label %loop_entry_2 ]
+branch:
+ %cmp = icmp eq i32 %i3, 5
+ br i1 %cmp, label %then, label %else
+; CHECK: DIVERGENT: br i1 %cmp,
+then:
+ ret i32 0
+else:
+ ret i32 1
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x() #0
+
+attributes #0 = { nounwind readnone }
Added: llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll?rev=348048&view=auto
==============================================================================
--- llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll (added)
+++ llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll Fri Nov 30 14:55:20 2018
@@ -0,0 +1,41 @@
+; RUN: opt %s -mtriple amdgcn-- -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
+
+; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'test_amdgpu_ps':
+; CHECK: DIVERGENT:
+; CHECK-NOT: %arg0
+; CHECK-NOT: %arg1
+; CHECK-NOT: %arg2
+; CHECK: <2 x i32> %arg3
+; CHECK: DIVERGENT: <3 x i32> %arg4
+; CHECK: DIVERGENT: float %arg5
+; CHECK: DIVERGENT: i32 %arg6
+
+define amdgpu_ps void @test_amdgpu_ps([4 x <16 x i8>] addrspace(2)* byval %arg0, float inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <3 x i32> %arg4, float %arg5, i32 %arg6) #0 {
+ ret void
+}
+
+; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'test_amdgpu_kernel':
+; CHECK-NOT: %arg0
+; CHECK-NOT: %arg1
+; CHECK-NOT: %arg2
+; CHECK-NOT: %arg3
+; CHECK-NOT: %arg4
+; CHECK-NOT: %arg5
+; CHECK-NOT: %arg6
+define amdgpu_kernel void @test_amdgpu_kernel([4 x <16 x i8>] addrspace(2)* byval %arg0, float inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <3 x i32> %arg4, float %arg5, i32 %arg6) #0 {
+ ret void
+}
+
+; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'test_c':
+; CHECK: DIVERGENT:
+; CHECK: DIVERGENT:
+; CHECK: DIVERGENT:
+; CHECK: DIVERGENT:
+; CHECK: DIVERGENT:
+; CHECK: DIVERGENT:
+; CHECK: DIVERGENT:
+define void @test_c([4 x <16 x i8>] addrspace(2)* byval %arg0, float inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <3 x i32> %arg4, float %arg5, i32 %arg6) #0 {
+ ret void
+}
+
+attributes #0 = { nounwind }
Added: llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/lit.local.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/lit.local.cfg?rev=348048&view=auto
==============================================================================
--- llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/lit.local.cfg (added)
+++ llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/lit.local.cfg Fri Nov 30 14:55:20 2018
@@ -0,0 +1,2 @@
+if not 'AMDGPU' in config.root.targets:
+ config.unsupported = True
Added: llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll?rev=348048&view=auto
==============================================================================
--- llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll (added)
+++ llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll Fri Nov 30 14:55:20 2018
@@ -0,0 +1,103 @@
+;RUN: opt -mtriple=amdgcn-mesa-mesa3d -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.swap(
+define float @buffer_atomic_swap(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
+main_body:
+ %orig = call i32 @llvm.amdgcn.buffer.atomic.swap(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
+ %r = bitcast i32 %orig to float
+ ret float %r
+}
+
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.add(
+define float @buffer_atomic_add(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
+main_body:
+ %orig = call i32 @llvm.amdgcn.buffer.atomic.add(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
+ %r = bitcast i32 %orig to float
+ ret float %r
+}
+
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.sub(
+define float @buffer_atomic_sub(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
+main_body:
+ %orig = call i32 @llvm.amdgcn.buffer.atomic.sub(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
+ %r = bitcast i32 %orig to float
+ ret float %r
+}
+
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.smin(
+define float @buffer_atomic_smin(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
+main_body:
+ %orig = call i32 @llvm.amdgcn.buffer.atomic.smin(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
+ %r = bitcast i32 %orig to float
+ ret float %r
+}
+
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.umin(
+define float @buffer_atomic_umin(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
+main_body:
+ %orig = call i32 @llvm.amdgcn.buffer.atomic.umin(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
+ %r = bitcast i32 %orig to float
+ ret float %r
+}
+
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.smax(
+define float @buffer_atomic_smax(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
+main_body:
+ %orig = call i32 @llvm.amdgcn.buffer.atomic.smax(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
+ %r = bitcast i32 %orig to float
+ ret float %r
+}
+
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.umax(
+define float @buffer_atomic_umax(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
+main_body:
+ %orig = call i32 @llvm.amdgcn.buffer.atomic.umax(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
+ %r = bitcast i32 %orig to float
+ ret float %r
+}
+
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.and(
+define float @buffer_atomic_and(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
+main_body:
+ %orig = call i32 @llvm.amdgcn.buffer.atomic.and(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
+ %r = bitcast i32 %orig to float
+ ret float %r
+}
+
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.or(
+define float @buffer_atomic_or(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
+main_body:
+ %orig = call i32 @llvm.amdgcn.buffer.atomic.or(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
+ %r = bitcast i32 %orig to float
+ ret float %r
+}
+
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.xor(
+define float @buffer_atomic_xor(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
+main_body:
+ %orig = call i32 @llvm.amdgcn.buffer.atomic.xor(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
+ %r = bitcast i32 %orig to float
+ ret float %r
+}
+
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.cmpswap(
+define float @buffer_atomic_cmpswap(<4 x i32> inreg %rsrc, i32 inreg %data, i32 inreg %cmp) #0 {
+main_body:
+ %orig = call i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32 %data, i32 %cmp, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
+ %r = bitcast i32 %orig to float
+ ret float %r
+}
+
+declare i32 @llvm.amdgcn.buffer.atomic.swap(i32, <4 x i32>, i32, i32, i1) #0
+declare i32 @llvm.amdgcn.buffer.atomic.add(i32, <4 x i32>, i32, i32, i1) #0
+declare i32 @llvm.amdgcn.buffer.atomic.sub(i32, <4 x i32>, i32, i32, i1) #0
+declare i32 @llvm.amdgcn.buffer.atomic.smin(i32, <4 x i32>, i32, i32, i1) #0
+declare i32 @llvm.amdgcn.buffer.atomic.umin(i32, <4 x i32>, i32, i32, i1) #0
+declare i32 @llvm.amdgcn.buffer.atomic.smax(i32, <4 x i32>, i32, i32, i1) #0
+declare i32 @llvm.amdgcn.buffer.atomic.umax(i32, <4 x i32>, i32, i32, i1) #0
+declare i32 @llvm.amdgcn.buffer.atomic.and(i32, <4 x i32>, i32, i32, i1) #0
+declare i32 @llvm.amdgcn.buffer.atomic.or(i32, <4 x i32>, i32, i32, i1) #0
+declare i32 @llvm.amdgcn.buffer.atomic.xor(i32, <4 x i32>, i32, i32, i1) #0
+declare i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32, i32, <4 x i32>, i32, i32, i1) #0
+
+attributes #0 = { nounwind }
Added: llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll?rev=348048&view=auto
==============================================================================
--- llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll (added)
+++ llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll Fri Nov 30 14:55:20 2018
@@ -0,0 +1,131 @@
+;RUN: opt -mtriple=amdgcn-mesa-mesa3d -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(
+define float @image_atomic_swap(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
+main_body:
+ %orig = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
+ %r = bitcast i32 %orig to float
+ ret float %r
+}
+
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(
+define float @image_atomic_add(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
+main_body:
+ %orig = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
+ %r = bitcast i32 %orig to float
+ ret float %r
+}
+
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(
+define float @image_atomic_sub(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
+main_body:
+ %orig = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
+ %r = bitcast i32 %orig to float
+ ret float %r
+}
+
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(
+define float @image_atomic_smin(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
+main_body:
+ %orig = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
+ %r = bitcast i32 %orig to float
+ ret float %r
+}
+
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(
+define float @image_atomic_umin(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
+main_body:
+ %orig = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
+ %r = bitcast i32 %orig to float
+ ret float %r
+}
+
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(
+define float @image_atomic_smax(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
+main_body:
+ %orig = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
+ %r = bitcast i32 %orig to float
+ ret float %r
+}
+
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(
+define float @image_atomic_umax(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
+main_body:
+ %orig = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
+ %r = bitcast i32 %orig to float
+ ret float %r
+}
+
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(
+define float @image_atomic_and(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
+main_body:
+ %orig = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
+ %r = bitcast i32 %orig to float
+ ret float %r
+}
+
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(
+define float @image_atomic_or(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
+main_body:
+ %orig = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
+ %r = bitcast i32 %orig to float
+ ret float %r
+}
+
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(
+define float @image_atomic_xor(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
+main_body:
+ %orig = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
+ %r = bitcast i32 %orig to float
+ ret float %r
+}
+
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(
+define float @image_atomic_inc(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
+main_body:
+ %orig = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
+ %r = bitcast i32 %orig to float
+ ret float %r
+}
+
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(
+define float @image_atomic_dec(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
+main_body:
+ %orig = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
+ %r = bitcast i32 %orig to float
+ ret float %r
+}
+
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(
+define float @image_atomic_cmpswap(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data, i32 inreg %cmp) #0 {
+main_body:
+ %orig = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %data, i32 %cmp, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
+ %r = bitcast i32 %orig to float
+ ret float %r
+}
+
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(
+define float @image_atomic_add_2d(<8 x i32> inreg %rsrc, i32 inreg %s, i32 inreg %t, i32 inreg %data) #0 {
+main_body:
+ %orig = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32 %data, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
+ %r = bitcast i32 %orig to float
+ ret float %r
+}
+
+declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #0
+
+declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #0
+
+attributes #0 = { nounwind }
Added: llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/no-return-blocks.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/no-return-blocks.ll?rev=348048&view=auto
==============================================================================
--- llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/no-return-blocks.ll (added)
+++ llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/no-return-blocks.ll Fri Nov 30 14:55:20 2018
@@ -0,0 +1,30 @@
+; RUN: opt %s -mtriple amdgcn-- -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
+
+; CHECK: DIVERGENT: %tmp5 = getelementptr inbounds float, float addrspace(1)* %arg, i64 %tmp2
+; CHECK: DIVERGENT: %tmp10 = load volatile float, float addrspace(1)* %tmp5, align 4
+; CHECK: DIVERGENT: %tmp11 = load volatile float, float addrspace(1)* %tmp5, align 4
+
+; The post dominator tree does not have a root node in this case
+define amdgpu_kernel void @no_return_blocks(float addrspace(1)* noalias nocapture readonly %arg, float addrspace(1)* noalias nocapture readonly %arg1) #0 {
+bb0:
+ %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+ %tmp2 = sext i32 %tmp to i64
+ %tmp5 = getelementptr inbounds float, float addrspace(1)* %arg, i64 %tmp2
+ %tmp6 = load volatile float, float addrspace(1)* %tmp5, align 4
+ %tmp8 = fcmp olt float %tmp6, 0.000000e+00
+ br i1 %tmp8, label %bb1, label %bb2
+
+bb1:
+ %tmp10 = load volatile float, float addrspace(1)* %tmp5, align 4
+ br label %bb2
+
+bb2:
+ %tmp11 = load volatile float, float addrspace(1)* %tmp5, align 4
+ br label %bb1
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.amdgcn.workitem.id.x() #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
Added: llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/phi-undef.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/phi-undef.ll?rev=348048&view=auto
==============================================================================
--- llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/phi-undef.ll (added)
+++ llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/phi-undef.ll Fri Nov 30 14:55:20 2018
@@ -0,0 +1,31 @@
+; RUN: opt -mtriple=amdgcn-- -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+
+; CHECK-LABEL: 'test1':
+; CHECK-NEXT: DIVERGENT: i32 %bound
+; CHECK: {{^ *}}%counter =
+; CHECK-NEXT: DIVERGENT: %break = icmp sge i32 %counter, %bound
+; CHECK-NEXT: DIVERGENT: br i1 %break, label %footer, label %body
+; CHECK: {{^ *}}%counter.next =
+; CHECK: {{^ *}}%counter.footer =
+; CHECK: DIVERGENT: br i1 %break, label %end, label %header
+; Note: %counter is not divergent!
+define amdgpu_ps void @test1(i32 %bound) {
+entry:
+ br label %header
+
+header:
+ %counter = phi i32 [ 0, %entry ], [ %counter.footer, %footer ]
+ %break = icmp sge i32 %counter, %bound
+ br i1 %break, label %footer, label %body
+
+body:
+ %counter.next = add i32 %counter, 1
+ br label %footer
+
+footer:
+ %counter.footer = phi i32 [ %counter.next, %body ], [ undef, %header ]
+ br i1 %break, label %end, label %header
+
+end:
+ ret void
+}
Added: llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/temporal_diverge.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/temporal_diverge.ll?rev=348048&view=auto
==============================================================================
--- llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/temporal_diverge.ll (added)
+++ llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/temporal_diverge.ll Fri Nov 30 14:55:20 2018
@@ -0,0 +1,154 @@
+; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+
+; temporal-divergent use of value carried by divergent loop
+define amdgpu_kernel void @temporal_diverge(i32 %n, i32 %a, i32 %b) #0 {
+; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'temporal_diverge':
+; CHECK-NOT: DIVERGENT: %uni.
+; CHECK-NOT: DIVERGENT: br i1 %uni.
+
+entry:
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %uni.cond = icmp slt i32 %a, 0
+ br label %H
+
+H:
+ %uni.merge.h = phi i32 [ 0, %entry ], [ %uni.inc, %H ]
+ %uni.inc = add i32 %uni.merge.h, 1
+ %div.exitx = icmp slt i32 %tid, 0
+ br i1 %div.exitx, label %X, label %H ; divergent branch
+; CHECK: DIVERGENT: %div.exitx =
+; CHECK: DIVERGENT: br i1 %div.exitx,
+
+X:
+ %div.user = add i32 %uni.inc, 5
+ ret void
+}
+
+; temporal-divergent use of value carried by divergent loop inside a top-level loop
+define amdgpu_kernel void @temporal_diverge_inloop(i32 %n, i32 %a, i32 %b) #0 {
+; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'temporal_diverge_inloop':
+; CHECK-NOT: DIVERGENT: %uni.
+; CHECK-NOT: DIVERGENT: br i1 %uni.
+
+entry:
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %uni.cond = icmp slt i32 %a, 0
+ br label %G
+
+G:
+ br label %H
+
+H:
+ %uni.merge.h = phi i32 [ 0, %G ], [ %uni.inc, %H ]
+ %uni.inc = add i32 %uni.merge.h, 1
+ %div.exitx = icmp slt i32 %tid, 0
+ br i1 %div.exitx, label %X, label %H ; divergent branch
+; CHECK: DIVERGENT: %div.exitx =
+; CHECK: DIVERGENT: br i1 %div.exitx,
+
+X:
+ %div.user = add i32 %uni.inc, 5
+ br i1 %uni.cond, label %G, label %Y
+
+Y:
+ %div.alsouser = add i32 %uni.inc, 5
+ ret void
+}
+
+
+; temporal-uniform use of a valud, definition and users are carried by a surrounding divergent loop
+define amdgpu_kernel void @temporal_uniform_indivloop(i32 %n, i32 %a, i32 %b) #0 {
+; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'temporal_uniform_indivloop':
+; CHECK-NOT: DIVERGENT: %uni.
+; CHECK-NOT: DIVERGENT: br i1 %uni.
+
+entry:
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %uni.cond = icmp slt i32 %a, 0
+ br label %G
+
+G:
+ br label %H
+
+H:
+ %uni.merge.h = phi i32 [ 0, %G ], [ %uni.inc, %H ]
+ %uni.inc = add i32 %uni.merge.h, 1
+ br i1 %uni.cond, label %X, label %H ; divergent branch
+
+X:
+ %uni.user = add i32 %uni.inc, 5
+ %div.exity = icmp slt i32 %tid, 0
+; CHECK: DIVERGENT: %div.exity =
+ br i1 %div.exity, label %G, label %Y
+; CHECK: DIVERGENT: br i1 %div.exity,
+
+Y:
+ %div.alsouser = add i32 %uni.inc, 5
+ ret void
+}
+
+
+; temporal-divergent use of value carried by divergent loop, user is inside sibling loop
+define amdgpu_kernel void @temporal_diverge_loopuser(i32 %n, i32 %a, i32 %b) #0 {
+; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'temporal_diverge_loopuser':
+; CHECK-NOT: DIVERGENT: %uni.
+; CHECK-NOT: DIVERGENT: br i1 %uni.
+
+entry:
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %uni.cond = icmp slt i32 %a, 0
+ br label %H
+
+H:
+ %uni.merge.h = phi i32 [ 0, %entry ], [ %uni.inc, %H ]
+ %uni.inc = add i32 %uni.merge.h, 1
+ %div.exitx = icmp slt i32 %tid, 0
+ br i1 %div.exitx, label %X, label %H ; divergent branch
+; CHECK: DIVERGENT: %div.exitx =
+; CHECK: DIVERGENT: br i1 %div.exitx,
+
+X:
+ br label %G
+
+G:
+ %div.user = add i32 %uni.inc, 5
+ br i1 %uni.cond, label %G, label %Y
+
+Y:
+ ret void
+}
+
+; temporal-divergent use of value carried by divergent loop, user is inside sibling loop, defs and use are carried by a uniform loop
+define amdgpu_kernel void @temporal_diverge_loopuser_nested(i32 %n, i32 %a, i32 %b) #0 {
+; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'temporal_diverge_loopuser_nested':
+; CHECK-NOT: DIVERGENT: %uni.
+; CHECK-NOT: DIVERGENT: br i1 %uni.
+
+entry:
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %uni.cond = icmp slt i32 %a, 0
+ br label %H
+
+H:
+ %uni.merge.h = phi i32 [ 0, %entry ], [ %uni.inc, %H ]
+ %uni.inc = add i32 %uni.merge.h, 1
+ %div.exitx = icmp slt i32 %tid, 0
+ br i1 %div.exitx, label %X, label %H ; divergent branch
+; CHECK: DIVERGENT: %div.exitx =
+; CHECK: DIVERGENT: br i1 %div.exitx,
+
+X:
+ br label %G
+
+G:
+ %div.user = add i32 %uni.inc, 5
+ br i1 %uni.cond, label %G, label %Y
+
+Y:
+ ret void
+}
+
+
+declare i32 @llvm.amdgcn.workitem.id.x() #0
+
+attributes #0 = { nounwind readnone }
Added: llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/workitem-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/workitem-intrinsics.ll?rev=348048&view=auto
==============================================================================
--- llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/workitem-intrinsics.ll (added)
+++ llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/workitem-intrinsics.ll Fri Nov 30 14:55:20 2018
@@ -0,0 +1,45 @@
+; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+
+declare i32 @llvm.amdgcn.workitem.id.x() #0
+declare i32 @llvm.amdgcn.workitem.id.y() #0
+declare i32 @llvm.amdgcn.workitem.id.z() #0
+declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
+declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #0
+
+; CHECK: DIVERGENT: %id.x = call i32 @llvm.amdgcn.workitem.id.x()
+define amdgpu_kernel void @workitem_id_x() #1 {
+ %id.x = call i32 @llvm.amdgcn.workitem.id.x()
+ store volatile i32 %id.x, i32 addrspace(1)* undef
+ ret void
+}
+
+; CHECK: DIVERGENT: %id.y = call i32 @llvm.amdgcn.workitem.id.y()
+define amdgpu_kernel void @workitem_id_y() #1 {
+ %id.y = call i32 @llvm.amdgcn.workitem.id.y()
+ store volatile i32 %id.y, i32 addrspace(1)* undef
+ ret void
+}
+
+; CHECK: DIVERGENT: %id.z = call i32 @llvm.amdgcn.workitem.id.z()
+define amdgpu_kernel void @workitem_id_z() #1 {
+ %id.z = call i32 @llvm.amdgcn.workitem.id.z()
+ store volatile i32 %id.z, i32 addrspace(1)* undef
+ ret void
+}
+
+; CHECK: DIVERGENT: %mbcnt.lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 0, i32 0)
+define amdgpu_kernel void @mbcnt_lo() #1 {
+ %mbcnt.lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 0, i32 0)
+ store volatile i32 %mbcnt.lo, i32 addrspace(1)* undef
+ ret void
+}
+
+; CHECK: DIVERGENT: %mbcnt.hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 0, i32 0)
+define amdgpu_kernel void @mbcnt_hi() #1 {
+ %mbcnt.hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 0, i32 0)
+ store volatile i32 %mbcnt.hi, i32 addrspace(1)* undef
+ ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
Added: llvm/trunk/test/Analysis/DivergenceAnalysis/NVPTX/daorder.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/DivergenceAnalysis/NVPTX/daorder.ll?rev=348048&view=auto
==============================================================================
--- llvm/trunk/test/Analysis/DivergenceAnalysis/NVPTX/daorder.ll (added)
+++ llvm/trunk/test/Analysis/DivergenceAnalysis/NVPTX/daorder.ll Fri Nov 30 14:55:20 2018
@@ -0,0 +1,47 @@
+; RUN: opt %s -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+define i32 @daorder(i32 %n) {
+; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'daorder'
+entry:
+ %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+ %cond = icmp slt i32 %tid, 0
+ br i1 %cond, label %A, label %B ; divergent
+; CHECK: DIVERGENT: br i1 %cond,
+A:
+ %defAtA = add i32 %n, 1 ; uniform
+; CHECK-NOT: DIVERGENT: %defAtA =
+ br label %C
+B:
+ %defAtB = add i32 %n, 2 ; uniform
+; CHECK-NOT: DIVERGENT: %defAtB =
+ br label %C
+C:
+ %defAtC = phi i32 [ %defAtA, %A ], [ %defAtB, %B ] ; divergent
+; CHECK: DIVERGENT: %defAtC =
+ br label %D
+
+D:
+ %i = phi i32 [0, %C], [ %i.inc, %E ] ; uniform
+; CHECK-NOT: DIVERGENT: %i = phi
+ br label %E
+
+E:
+ %i.inc = add i32 %i, 1
+ %loopCnt = icmp slt i32 %i.inc, %n
+; CHECK-NOT: DIVERGENT: %loopCnt =
+ br i1 %loopCnt, label %D, label %exit
+
+exit:
+ ret i32 %n
+}
+
+declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+declare i32 @llvm.nvvm.read.ptx.sreg.tid.y()
+declare i32 @llvm.nvvm.read.ptx.sreg.tid.z()
+declare i32 @llvm.nvvm.read.ptx.sreg.laneid()
+
+!nvvm.annotations = !{!0}
+!0 = !{i32 (i32)* @daorder, !"kernel", i32 1}
Added: llvm/trunk/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll?rev=348048&view=auto
==============================================================================
--- llvm/trunk/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll (added)
+++ llvm/trunk/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll Fri Nov 30 14:55:20 2018
@@ -0,0 +1,175 @@
+; RUN: opt %s -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+; return (n < 0 ? a + threadIdx.x : b + threadIdx.x)
+define i32 @no_diverge(i32 %n, i32 %a, i32 %b) {
+; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'no_diverge'
+entry:
+ %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+ %cond = icmp slt i32 %n, 0
+ br i1 %cond, label %then, label %else ; uniform
+; CHECK-NOT: DIVERGENT: br i1 %cond,
+then:
+ %a1 = add i32 %a, %tid
+ br label %merge
+else:
+ %b2 = add i32 %b, %tid
+ br label %merge
+merge:
+ %c = phi i32 [ %a1, %then ], [ %b2, %else ]
+ ret i32 %c
+}
+
+; c = a;
+; if (threadIdx.x < 5) // divergent: data dependent
+; c = b;
+; return c; // c is divergent: sync dependent
+define i32 @sync(i32 %a, i32 %b) {
+; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'sync'
+bb1:
+ %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
+ %cond = icmp slt i32 %tid, 5
+ br i1 %cond, label %bb2, label %bb3
+; CHECK: DIVERGENT: br i1 %cond,
+bb2:
+ br label %bb3
+bb3:
+ %c = phi i32 [ %a, %bb1 ], [ %b, %bb2 ] ; sync dependent on tid
+; CHECK: DIVERGENT: %c =
+ ret i32 %c
+}
+
+; c = 0;
+; if (threadIdx.x >= 5) { // divergent
+; c = (n < 0 ? a : b); // c here is uniform because n is uniform
+; }
+; // c here is divergent because it is sync dependent on threadIdx.x >= 5
+; return c;
+define i32 @mixed(i32 %n, i32 %a, i32 %b) {
+; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'mixed'
+bb1:
+ %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.z()
+ %cond = icmp slt i32 %tid, 5
+ br i1 %cond, label %bb6, label %bb2
+; CHECK: DIVERGENT: br i1 %cond,
+bb2:
+ %cond2 = icmp slt i32 %n, 0
+ br i1 %cond2, label %bb4, label %bb3
+bb3:
+ br label %bb5
+bb4:
+ br label %bb5
+bb5:
+ %c = phi i32 [ %a, %bb3 ], [ %b, %bb4 ]
+; CHECK-NOT: DIVERGENT: %c =
+ br label %bb6
+bb6:
+ %c2 = phi i32 [ 0, %bb1], [ %c, %bb5 ]
+; CHECK: DIVERGENT: %c2 =
+ ret i32 %c2
+}
+
+; We conservatively treats all parameters of a __device__ function as divergent.
+define i32 @device(i32 %n, i32 %a, i32 %b) {
+; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'device'
+; CHECK: DIVERGENT: i32 %n
+; CHECK: DIVERGENT: i32 %a
+; CHECK: DIVERGENT: i32 %b
+entry:
+ %cond = icmp slt i32 %n, 0
+ br i1 %cond, label %then, label %else
+; CHECK: DIVERGENT: br i1 %cond,
+then:
+ br label %merge
+else:
+ br label %merge
+merge:
+ %c = phi i32 [ %a, %then ], [ %b, %else ]
+ ret i32 %c
+}
+
+; int i = 0;
+; do {
+; i++; // i here is uniform
+; } while (i < laneid);
+; return i == 10 ? 0 : 1; // i here is divergent
+;
+; The i defined in the loop is used outside.
+define i32 @loop() {
+; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'loop'
+entry:
+ %laneid = call i32 @llvm.nvvm.read.ptx.sreg.laneid()
+ br label %loop
+loop:
+ %i = phi i32 [ 0, %entry ], [ %i1, %loop ]
+; CHECK-NOT: DIVERGENT: %i =
+ %i1 = add i32 %i, 1
+ %exit_cond = icmp sge i32 %i1, %laneid
+ br i1 %exit_cond, label %loop_exit, label %loop
+loop_exit:
+ %cond = icmp eq i32 %i, 10
+ br i1 %cond, label %then, label %else
+; CHECK: DIVERGENT: br i1 %cond,
+then:
+ ret i32 0
+else:
+ ret i32 1
+}
+
+; Same as @loop, but the loop is in the LCSSA form.
+define i32 @lcssa() {
+; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'lcssa'
+entry:
+ %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+ br label %loop
+loop:
+ %i = phi i32 [ 0, %entry ], [ %i1, %loop ]
+; CHECK-NOT: DIVERGENT: %i =
+ %i1 = add i32 %i, 1
+ %exit_cond = icmp sge i32 %i1, %tid
+ br i1 %exit_cond, label %loop_exit, label %loop
+loop_exit:
+ %i.lcssa = phi i32 [ %i, %loop ]
+; CHECK: DIVERGENT: %i.lcssa =
+ %cond = icmp eq i32 %i.lcssa, 10
+ br i1 %cond, label %then, label %else
+; CHECK: DIVERGENT: br i1 %cond,
+then:
+ ret i32 0
+else:
+ ret i32 1
+}
+
+; Verifies sync-dependence is computed correctly in the absense of loops.
+define i32 @sync_no_loop(i32 %arg) {
+entry:
+ %0 = add i32 %arg, 1
+ %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+ %1 = icmp sge i32 %tid, 10
+ br i1 %1, label %bb1, label %bb2
+
+bb1:
+ br label %bb3
+
+bb2:
+ br label %bb3
+
+bb3:
+ %2 = add i32 %0, 2
+ ; CHECK-NOT: DIVERGENT: %2
+ ret i32 %2
+}
+
+declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+declare i32 @llvm.nvvm.read.ptx.sreg.tid.y()
+declare i32 @llvm.nvvm.read.ptx.sreg.tid.z()
+declare i32 @llvm.nvvm.read.ptx.sreg.laneid()
+
+!nvvm.annotations = !{!0, !1, !2, !3, !4}
+!0 = !{i32 (i32, i32, i32)* @no_diverge, !"kernel", i32 1}
+!1 = !{i32 (i32, i32)* @sync, !"kernel", i32 1}
+!2 = !{i32 (i32, i32, i32)* @mixed, !"kernel", i32 1}
+!3 = !{i32 ()* @loop, !"kernel", i32 1}
+!4 = !{i32 (i32)* @sync_no_loop, !"kernel", i32 1}
Added: llvm/trunk/test/Analysis/DivergenceAnalysis/NVPTX/hidden_diverge.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/DivergenceAnalysis/NVPTX/hidden_diverge.ll?rev=348048&view=auto
==============================================================================
--- llvm/trunk/test/Analysis/DivergenceAnalysis/NVPTX/hidden_diverge.ll (added)
+++ llvm/trunk/test/Analysis/DivergenceAnalysis/NVPTX/hidden_diverge.ll Fri Nov 30 14:55:20 2018
@@ -0,0 +1,30 @@
+; RUN: opt %s -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+define i32 @hidden_diverge(i32 %n, i32 %a, i32 %b) {
+; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'hidden_diverge'
+entry:
+ %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+ %cond.var = icmp slt i32 %tid, 0
+ br i1 %cond.var, label %B, label %C ; divergent
+; CHECK: DIVERGENT: br i1 %cond.var,
+B:
+ %cond.uni = icmp slt i32 %n, 0
+ br i1 %cond.uni, label %C, label %merge ; uniform
+; CHECK-NOT: DIVERGENT: br i1 %cond.uni,
+C:
+ %phi.var.hidden = phi i32 [ 1, %entry ], [ 2, %B ]
+; CHECK: DIVERGENT: %phi.var.hidden = phi i32
+ br label %merge
+merge:
+ %phi.ipd = phi i32 [ %a, %B ], [ %b, %C ]
+; CHECK: DIVERGENT: %phi.ipd = phi i32
+ ret i32 %phi.ipd
+}
+
+declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+
+!nvvm.annotations = !{!0}
+!0 = !{i32 (i32, i32, i32)* @hidden_diverge, !"kernel", i32 1}
Added: llvm/trunk/test/Analysis/DivergenceAnalysis/NVPTX/irreducible.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/DivergenceAnalysis/NVPTX/irreducible.ll?rev=348048&view=auto
==============================================================================
--- llvm/trunk/test/Analysis/DivergenceAnalysis/NVPTX/irreducible.ll (added)
+++ llvm/trunk/test/Analysis/DivergenceAnalysis/NVPTX/irreducible.ll Fri Nov 30 14:55:20 2018
@@ -0,0 +1,55 @@
+; RUN: opt %s -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+; This test contains an unstructured loop.
+; +-------------- entry ----------------+
+; | |
+; V V
+; i1 = phi(0, i3) i2 = phi(0, i3)
+; j1 = i1 + 1 ---> i3 = phi(j1, j2) <--- j2 = i2 + 2
+; ^ | ^
+; | V |
+; +-------- switch (tid / i3) ----------+
+; |
+; V
+; if (i3 == 5) // divergent
+; because sync dependent on (tid / i3).
+define i32 @unstructured_loop(i1 %entry_cond) {
+; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'unstructured_loop'
+entry:
+ %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+ br i1 %entry_cond, label %loop_entry_1, label %loop_entry_2
+loop_entry_1:
+ %i1 = phi i32 [ 0, %entry ], [ %i3, %loop_latch ]
+ %j1 = add i32 %i1, 1
+ br label %loop_body
+loop_entry_2:
+ %i2 = phi i32 [ 0, %entry ], [ %i3, %loop_latch ]
+ %j2 = add i32 %i2, 2
+ br label %loop_body
+loop_body:
+ %i3 = phi i32 [ %j1, %loop_entry_1 ], [ %j2, %loop_entry_2 ]
+ br label %loop_latch
+loop_latch:
+ %div = sdiv i32 %tid, %i3
+ switch i32 %div, label %branch [ i32 1, label %loop_entry_1
+ i32 2, label %loop_entry_2 ]
+branch:
+ %cmp = icmp eq i32 %i3, 5
+ br i1 %cmp, label %then, label %else
+; CHECK: DIVERGENT: br i1 %cmp,
+then:
+ ret i32 0
+else:
+ ret i32 1
+}
+
+declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+declare i32 @llvm.nvvm.read.ptx.sreg.tid.y()
+declare i32 @llvm.nvvm.read.ptx.sreg.tid.z()
+declare i32 @llvm.nvvm.read.ptx.sreg.laneid()
+
+!nvvm.annotations = !{!0}
+!0 = !{i32 (i1)* @unstructured_loop, !"kernel", i32 1}
Added: llvm/trunk/test/Analysis/DivergenceAnalysis/NVPTX/lit.local.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/DivergenceAnalysis/NVPTX/lit.local.cfg?rev=348048&view=auto
==============================================================================
--- llvm/trunk/test/Analysis/DivergenceAnalysis/NVPTX/lit.local.cfg (added)
+++ llvm/trunk/test/Analysis/DivergenceAnalysis/NVPTX/lit.local.cfg Fri Nov 30 14:55:20 2018
@@ -0,0 +1,2 @@
+if not 'NVPTX' in config.root.targets:
+ config.unsupported = True
More information about the llvm-commits
mailing list