[llvm] r298104 - Only unswitch loops with uniform conditions
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 17 10:13:42 PDT 2017
Author: rampitec
Date: Fri Mar 17 12:13:41 2017
New Revision: 298104
URL: http://llvm.org/viewvc/llvm-project?rev=298104&view=rev
Log:
Only unswitch loops with uniform conditions
Loop unswitching can be extremely harmful for a SIMT target. In case
if hoisted condition is not uniform a SIMT machine will execute both
clones of a loop sequentially. Therefor LoopUnswitch checks if the
condition is non-divergent.
Since DivergenceAnalysis adds an expensive PostDominatorTree analysis
not needed for non-SIMT targets a new option is added to avoid unneded
analysis initialization. The method getAnalysisUsage is called when
TargetTransformInfo is not yet available and we cannot use it here.
For that reason a new field DivergentTarget is added to PassManagerBuilder
to control the behavior and set this field from a target.
Differential Revision: https://reviews.llvm.org/D30796
Added:
llvm/trunk/test/Transforms/LoopUnswitch/AMDGPU/
llvm/trunk/test/Transforms/LoopUnswitch/AMDGPU/divergent-unswitch.ll
llvm/trunk/test/Transforms/LoopUnswitch/AMDGPU/lit.local.cfg
Modified:
llvm/trunk/include/llvm/Transforms/IPO/PassManagerBuilder.h
llvm/trunk/include/llvm/Transforms/Scalar.h
llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp
llvm/trunk/lib/Transforms/Scalar/LoopUnswitch.cpp
Modified: llvm/trunk/include/llvm/Transforms/IPO/PassManagerBuilder.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/IPO/PassManagerBuilder.h?rev=298104&r1=298103&r2=298104&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Transforms/IPO/PassManagerBuilder.h (original)
+++ llvm/trunk/include/llvm/Transforms/IPO/PassManagerBuilder.h Fri Mar 17 12:13:41 2017
@@ -153,6 +153,7 @@ public:
bool PrepareForLTO;
bool PrepareForThinLTO;
bool PerformThinLTO;
+ bool DivergentTarget;
/// Enable profile instrumentation pass.
bool EnablePGOInstrGen;
Modified: llvm/trunk/include/llvm/Transforms/Scalar.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Scalar.h?rev=298104&r1=298103&r2=298104&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Transforms/Scalar.h (original)
+++ llvm/trunk/include/llvm/Transforms/Scalar.h Fri Mar 17 12:13:41 2017
@@ -169,7 +169,8 @@ Pass *createLoopStrengthReducePass();
//
// LoopUnswitch - This pass is a simple loop unswitching pass.
//
-Pass *createLoopUnswitchPass(bool OptimizeForSize = false);
+Pass *createLoopUnswitchPass(bool OptimizeForSize = false,
+ bool hasBranchDivergence = false);
//===----------------------------------------------------------------------===//
//
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp?rev=298104&r1=298103&r2=298104&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp Fri Mar 17 12:13:41 2017
@@ -216,6 +216,8 @@ StringRef AMDGPUTargetMachine::getFeatur
}
void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
+ Builder.DivergentTarget = true;
+
bool Internalize = InternalizeSymbols &&
(getOptLevel() > CodeGenOpt::None) &&
(getTargetTriple().getArch() == Triple::amdgcn);
Modified: llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp?rev=298104&r1=298103&r2=298104&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp (original)
+++ llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp Fri Mar 17 12:13:41 2017
@@ -168,6 +168,7 @@ PassManagerBuilder::PassManagerBuilder()
PGOInstrUse = RunPGOInstrUse;
PrepareForThinLTO = EnablePrepareForThinLTO;
PerformThinLTO = false;
+ DivergentTarget = false;
}
PassManagerBuilder::~PassManagerBuilder() {
@@ -307,7 +308,7 @@ void PassManagerBuilder::addFunctionSimp
// Rotate Loop - disable header duplication at -Oz
MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
MPM.add(createLICMPass()); // Hoist loop invariants
- MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3));
+ MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
MPM.add(createCFGSimplificationPass());
addInstructionCombiningPass(MPM);
MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars
@@ -588,7 +589,7 @@ void PassManagerBuilder::populateModuleP
MPM.add(createCorrelatedValuePropagationPass());
addInstructionCombiningPass(MPM);
MPM.add(createLICMPass());
- MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3));
+ MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
MPM.add(createCFGSimplificationPass());
addInstructionCombiningPass(MPM);
}
Modified: llvm/trunk/lib/Transforms/Scalar/LoopUnswitch.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopUnswitch.cpp?rev=298104&r1=298103&r2=298104&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LoopUnswitch.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopUnswitch.cpp Fri Mar 17 12:13:41 2017
@@ -33,6 +33,7 @@
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/DivergenceAnalysis.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
@@ -180,12 +181,14 @@ namespace {
// NewBlocks contained cloned copy of basic blocks from LoopBlocks.
std::vector<BasicBlock*> NewBlocks;
+ bool hasBranchDivergence;
+
public:
static char ID; // Pass ID, replacement for typeid
- explicit LoopUnswitch(bool Os = false) :
+ explicit LoopUnswitch(bool Os = false, bool hasBranchDivergence = false) :
LoopPass(ID), OptimizeForSize(Os), redoLoop(false),
currentLoop(nullptr), DT(nullptr), loopHeader(nullptr),
- loopPreheader(nullptr) {
+ loopPreheader(nullptr), hasBranchDivergence(hasBranchDivergence) {
initializeLoopUnswitchPass(*PassRegistry::getPassRegistry());
}
@@ -198,6 +201,8 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetTransformInfoWrapperPass>();
+ if (hasBranchDivergence)
+ AU.addRequired<DivergenceAnalysis>();
getLoopAnalysisUsage(AU);
}
@@ -367,11 +372,12 @@ INITIALIZE_PASS_BEGIN(LoopUnswitch, "loo
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(LoopPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)
INITIALIZE_PASS_END(LoopUnswitch, "loop-unswitch", "Unswitch loops",
false, false)
-Pass *llvm::createLoopUnswitchPass(bool Os) {
- return new LoopUnswitch(Os);
+Pass *llvm::createLoopUnswitchPass(bool Os, bool hasBranchDivergence) {
+ return new LoopUnswitch(Os, hasBranchDivergence);
}
/// Operator chain lattice.
@@ -808,6 +814,15 @@ bool LoopUnswitch::UnswitchIfProfitable(
<< ". Cost too high.\n");
return false;
}
+ if (hasBranchDivergence &&
+ getAnalysis<DivergenceAnalysis>().isDivergent(LoopCond)) {
+ DEBUG(dbgs() << "NOT unswitching loop %"
+ << currentLoop->getHeader()->getName()
+ << " at non-trivial condition '" << *Val
+ << "' == " << *LoopCond << "\n"
+ << ". Condition is divergent.\n");
+ return false;
+ }
UnswitchNontrivialCondition(LoopCond, Val, currentLoop, TI);
return true;
Added: llvm/trunk/test/Transforms/LoopUnswitch/AMDGPU/divergent-unswitch.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnswitch/AMDGPU/divergent-unswitch.ll?rev=298104&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnswitch/AMDGPU/divergent-unswitch.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnswitch/AMDGPU/divergent-unswitch.ll Fri Mar 17 12:13:41 2017
@@ -0,0 +1,85 @@
+; RUN: opt -mtriple=amdgcn-- -O3 -S %s | FileCheck %s
+
+; Check that loop unswitch happened and condition hoisted out of the loop.
+; Condition is uniform so all targets should perform unswitching.
+
+; CHECK-LABEL: {{^}}define void @uniform_unswitch
+; CHECK: entry:
+; CHECK-NEXT: [[LOOP_COND:%[a-z0-9]+]] = icmp
+; CHECK-NEXT: [[IF_COND:%[a-z0-9]+]] = icmp eq i32 %x, 123456
+; CHECK-NEXT: and i1 [[LOOP_COND]], [[IF_COND]]
+; CHECK-NEXT: br i1
+
+define void @uniform_unswitch(i32 * nocapture %out, i32 %n, i32 %x) {
+entry:
+ %cmp6 = icmp sgt i32 %n, 0
+ br i1 %cmp6, label %for.body.lr.ph, label %for.cond.cleanup
+
+for.body.lr.ph: ; preds = %entry
+ %cmp1 = icmp eq i32 %x, 123456
+ br label %for.body
+
+for.cond.cleanup.loopexit: ; preds = %for.inc
+ br label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
+ ret void
+
+for.body: ; preds = %for.inc, %for.body.lr.ph
+ %i.07 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
+ br i1 %cmp1, label %if.then, label %for.inc
+
+if.then: ; preds = %for.body
+ %arrayidx = getelementptr inbounds i32, i32 * %out, i32 %i.07
+ store i32 %i.07, i32 * %arrayidx, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body, %if.then
+ %inc = add nuw nsw i32 %i.07, 1
+ %exitcond = icmp eq i32 %inc, %n
+ br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
+}
+
+; Check that loop unswitch does not happen if condition is divergent.
+
+; CHECK-LABEL: {{^}}define void @divergent_unswitch
+; CHECK: entry:
+; CHECK: icmp
+; CHECK: [[IF_COND:%[a-z0-9]+]] = icmp {{.*}} 567890
+; CHECK: br label
+; CHECK: br i1 [[IF_COND]]
+
+define void @divergent_unswitch(i32 * nocapture %out, i32 %n) {
+entry:
+ %cmp9 = icmp sgt i32 %n, 0
+ br i1 %cmp9, label %for.body.lr.ph, label %for.cond.cleanup
+
+for.body.lr.ph: ; preds = %entry
+ %call = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+ %cmp2 = icmp eq i32 %call, 567890
+ br label %for.body
+
+for.cond.cleanup.loopexit: ; preds = %for.inc
+ br label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
+ ret void
+
+for.body: ; preds = %for.inc, %for.body.lr.ph
+ %i.010 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
+ br i1 %cmp2, label %if.then, label %for.inc
+
+if.then: ; preds = %for.body
+ %arrayidx = getelementptr inbounds i32, i32 * %out, i32 %i.010
+ store i32 %i.010, i32 * %arrayidx, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body, %if.then
+ %inc = add nuw nsw i32 %i.010, 1
+ %exitcond = icmp eq i32 %inc, %n
+ br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x() #0
+
+attributes #0 = { nounwind readnone }
Added: llvm/trunk/test/Transforms/LoopUnswitch/AMDGPU/lit.local.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnswitch/AMDGPU/lit.local.cfg?rev=298104&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnswitch/AMDGPU/lit.local.cfg (added)
+++ llvm/trunk/test/Transforms/LoopUnswitch/AMDGPU/lit.local.cfg Fri Mar 17 12:13:41 2017
@@ -0,0 +1,2 @@
+if not 'AMDGPU' in config.root.targets:
+ config.unsupported = True
More information about the llvm-commits
mailing list