[llvm] r299779 - [AMDGPU] Unroll more to eliminate phis and conditions
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 7 09:26:29 PDT 2017
Author: rampitec
Date: Fri Apr 7 11:26:28 2017
New Revision: 299779
URL: http://llvm.org/viewvc/llvm-project?rev=299779&view=rev
Log:
[AMDGPU] Unroll more to eliminate phis and conditions
Increase threshold to unroll a loop which contains an "if" statement
whose condition defined by a PHI belonging to the loop. This may help
to eliminate if region and potentially even PHI itself, saving on
both divergence and registers used for the PHI.
Add a small bonus for each of such "if" statements.
Differential Revision: https://reviews.llvm.org/D31693
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
llvm/trunk/test/CodeGen/AMDGPU/unroll.ll
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp?rev=299779&r1=299778&r2=299779&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp Fri Apr 7 11:26:28 2017
@@ -32,13 +32,37 @@ using namespace llvm;
static cl::opt<unsigned> UnrollThresholdPrivate(
"amdgpu-unroll-threshold-private",
cl::desc("Unroll threshold for AMDGPU if private memory used in a loop"),
- cl::init(2000), cl::Hidden);
+ cl::init(2500), cl::Hidden);
static cl::opt<unsigned> UnrollThresholdLocal(
"amdgpu-unroll-threshold-local",
cl::desc("Unroll threshold for AMDGPU if local memory used in a loop"),
cl::init(1000), cl::Hidden);
+static cl::opt<unsigned> UnrollThresholdIf(
+ "amdgpu-unroll-threshold-if",
+ cl::desc("Unroll threshold increment for AMDGPU for each if statement inside loop"),
+ cl::init(150), cl::Hidden);
+
+static bool dependsOnLocalPhi(const Loop *L, const Value *Cond,
+ unsigned Depth = 0) {
+ const Instruction *I = dyn_cast<Instruction>(Cond);
+ if (!I)
+ return false;
+
+ for (const Value *V : I->operand_values()) {
+ if (!L->contains(I))
+ continue;
+ if (const PHINode *PHI = dyn_cast<PHINode>(V)) {
+ if (none_of(L->getSubLoops(), [PHI](const Loop* SubLoop) {
+ return SubLoop->contains(PHI); }))
+ return true;
+ } else if (Depth < 10 && dependsOnLocalPhi(L, V, Depth+1))
+ return true;
+ }
+ return false;
+}
+
void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L,
TTI::UnrollingPreferences &UP) {
UP.Threshold = 300; // Twice the default.
@@ -57,7 +81,33 @@ void AMDGPUTTIImpl::getUnrollingPreferen
const DataLayout &DL = BB->getModule()->getDataLayout();
unsigned LocalGEPsSeen = 0;
+ if (any_of(L->getSubLoops(), [BB](const Loop* SubLoop) {
+ return SubLoop->contains(BB); }))
+ continue; // Block belongs to an inner loop.
+
for (const Instruction &I : *BB) {
+
+ // Unroll a loop which contains an "if" statement whose condition
+ // defined by a PHI belonging to the loop. This may help to eliminate
+ // if region and potentially even PHI itself, saving on both divergence
+ // and registers used for the PHI.
+ // Add a small bonus for each of such "if" statements.
+ if (const BranchInst *Br = dyn_cast<BranchInst>(&I)) {
+ if (UP.Threshold < MaxBoost && Br->isConditional()) {
+ if (L->isLoopExiting(Br->getSuccessor(0)) ||
+ L->isLoopExiting(Br->getSuccessor(1)))
+ continue;
+ if (dependsOnLocalPhi(L, Br->getCondition())) {
+ UP.Threshold += UnrollThresholdIf;
+ DEBUG(dbgs() << "Set unroll threshold " << UP.Threshold
+ << " for loop:\n" << *L << " due to " << *Br << '\n');
+ if (UP.Threshold >= MaxBoost)
+ return;
+ }
+ }
+ continue;
+ }
+
const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I);
if (!GEP)
continue;
@@ -128,7 +178,7 @@ void AMDGPUTTIImpl::getUnrollingPreferen
UP.Threshold = Threshold;
DEBUG(dbgs() << "Set unroll threshold " << Threshold << " for loop:\n"
<< *L << " due to " << *GEP << '\n');
- if (UP.Threshold == MaxBoost)
+ if (UP.Threshold >= MaxBoost)
return;
}
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/unroll.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/unroll.ll?rev=299779&r1=299778&r2=299779&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/unroll.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/unroll.ll Fri Apr 7 11:26:28 2017
@@ -64,3 +64,37 @@ loop.inc:
exit:
ret void
}
+
+; Check that a loop with if inside completely unrolled to eliminate phi and if
+
+; CHECK-LABEL: @unroll_for_if
+; CHECK: entry:
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: store
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: store
+; CHECK-NOT: br
+define amdgpu_kernel void @unroll_for_if(i32* %a) {
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.inc
+ %i1 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+ %and = and i32 %i1, 1
+ %tobool = icmp eq i32 %and, 0
+ br i1 %tobool, label %for.inc, label %if.then
+
+if.then: ; preds = %for.body
+ %0 = sext i32 %i1 to i64
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %0
+ store i32 0, i32* %arrayidx, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body, %if.then
+ %inc = add nuw nsw i32 %i1, 1
+ %cmp = icmp ult i32 %inc, 48
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end: ; preds = %for.cond
+ ret void
+}
More information about the llvm-commits
mailing list