[polly] r187728 - ScopInfo: Split start value from SCEVAddRecExpr to enable parameter sharing.
Tobias Grosser
tobias at grosser.es
Mon Aug 5 08:14:16 PDT 2013
Author: grosser
Date: Mon Aug 5 10:14:15 2013
New Revision: 187728
URL: http://llvm.org/viewvc/llvm-project?rev=187728&view=rev
Log:
ScopInfo: Split start value from SCEVAddRecExpr to enable parameter sharing.
SCoP invariant parameters with the different start value would deter parameter
sharing. For example, when compiling the following C code:
void foo(float *input) {
for (long j = 0; j < 8; j++) {
// SCoP begin
for (long i = 0; i < 8; i++) {
float x = input[j * 64 + i + 1];
input[j * 64 + i] = x * x;
}
}
}
Polly would creat two parameters for these memory accesses:
p_0: {0,+,256}
p_2: {4,+,256}
[j * 64 + i + 1] => MemRef_input[o0] : 4o0 = p_1 + 4i0
[j * 64 + i] => MemRef_input[o0] : 4o0 = p_0 + 4i0
These parameters only differ from start value. To enable parameter sharing,
we split the start value from SCEVAddRecExpr, so they would share a single
parameter that always has zero start value:
p0: {0,+,256}<%for.cond1.preheader>
[j * 64 + i + 1] => MemRef_input[o0] : 4o0 = 4 + p_1 + 4i0
[j * 64 + i] => MemRef_input[o0] : 4o0 = p_0 + 4i0
Such translation can make the polly-dependence much faster.
Contributed-by: Star Tan <tanmx_star at yeah.net>
Added:
polly/trunk/test/ScopInfo/constant_start_integer.ll
polly/trunk/test/ScopInfo/multidim_nested_start_integer.ll
Modified:
polly/trunk/lib/Analysis/ScopInfo.cpp
polly/trunk/lib/Support/SCEVValidator.cpp
polly/trunk/test/ScopInfo/20111108-Parameter-not-detected.ll
Modified: polly/trunk/lib/Analysis/ScopInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/Analysis/ScopInfo.cpp?rev=187728&r1=187727&r2=187728&view=diff
==============================================================================
--- polly/trunk/lib/Analysis/ScopInfo.cpp (original)
+++ polly/trunk/lib/Analysis/ScopInfo.cpp Mon Aug 5 10:14:15 2013
@@ -182,22 +182,38 @@ public:
__isl_give isl_pw_aff *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
assert(Expr->isAffine() && "Only affine AddRecurrences allowed");
- assert(S->getRegion().contains(Expr->getLoop()) &&
- "Scop does not contain the loop referenced in this AddRec");
+ // Directly generate isl_pw_aff for Expr if 'start' is zero.
+ if (Expr->getStart()->isZero()) {
+ assert(S->getRegion().contains(Expr->getLoop()) &&
+ "Scop does not contain the loop referenced in this AddRec");
+
+ isl_pw_aff *Start = visit(Expr->getStart());
+ isl_pw_aff *Step = visit(Expr->getOperand(1));
+ isl_space *Space = isl_space_set_alloc(Ctx, 0, NbLoopSpaces);
+ isl_local_space *LocalSpace = isl_local_space_from_space(Space);
+
+ int loopDimension = getLoopDepth(Expr->getLoop());
+
+ isl_aff *LAff = isl_aff_set_coefficient_si(
+ isl_aff_zero_on_domain(LocalSpace), isl_dim_in, loopDimension, 1);
+ isl_pw_aff *LPwAff = isl_pw_aff_from_aff(LAff);
+
+ // TODO: Do we need to check for NSW and NUW?
+ return isl_pw_aff_add(Start, isl_pw_aff_mul(Step, LPwAff));
+ }
+
+ // Translate AddRecExpr from '{start, +, inc}' into 'start + {0, +, inc}'
+ // if 'start' is not zero.
+ ScalarEvolution &SE = *S->getSE();
+ const SCEV *ZeroStartExpr = SE.getAddRecExpr(
+ SE.getConstant(Expr->getStart()->getType(), 0),
+ Expr->getStepRecurrence(SE), Expr->getLoop(), SCEV::FlagAnyWrap);
+
+ isl_pw_aff *ZeroStartResult = visit(ZeroStartExpr);
isl_pw_aff *Start = visit(Expr->getStart());
- isl_pw_aff *Step = visit(Expr->getOperand(1));
- isl_space *Space = isl_space_set_alloc(Ctx, 0, NbLoopSpaces);
- isl_local_space *LocalSpace = isl_local_space_from_space(Space);
-
- int loopDimension = getLoopDepth(Expr->getLoop());
-
- isl_aff *LAff = isl_aff_set_coefficient_si(
- isl_aff_zero_on_domain(LocalSpace), isl_dim_in, loopDimension, 1);
- isl_pw_aff *LPwAff = isl_pw_aff_from_aff(LAff);
- // TODO: Do we need to check for NSW and NUW?
- return isl_pw_aff_add(Start, isl_pw_aff_mul(Step, LPwAff));
+ return isl_pw_aff_add(ZeroStartResult, Start);
}
__isl_give isl_pw_aff *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
Modified: polly/trunk/lib/Support/SCEVValidator.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/Support/SCEVValidator.cpp?rev=187728&r1=187727&r2=187728&view=diff
==============================================================================
--- polly/trunk/lib/Support/SCEVValidator.cpp (original)
+++ polly/trunk/lib/Support/SCEVValidator.cpp Mon Aug 5 10:14:15 2013
@@ -278,7 +278,22 @@ public:
assert(Start.isConstant() && Recurrence.isConstant() &&
"Expected 'Start' and 'Recurrence' to be constant");
- return ValidatorResult(SCEVType::PARAM, Expr);
+
+ // Directly generate ValidatorResult for Expr if 'start' is zero.
+ if (Expr->getStart()->isZero())
+ return ValidatorResult(SCEVType::PARAM, Expr);
+
+ // Translate AddRecExpr from '{start, +, inc}' into 'start + {0, +, inc}'
+ // if 'start' is not zero.
+ const SCEV *ZeroStartExpr = SE.getAddRecExpr(
+ SE.getConstant(Expr->getStart()->getType(), 0),
+ Expr->getStepRecurrence(SE), Expr->getLoop(), SCEV::FlagAnyWrap);
+
+ ValidatorResult ZeroStartResult =
+ ValidatorResult(SCEVType::PARAM, ZeroStartExpr);
+ ZeroStartResult.addParamsFrom(Start);
+
+ return ZeroStartResult;
}
class ValidatorResult visitSMaxExpr(const SCEVSMaxExpr *Expr) {
Modified: polly/trunk/test/ScopInfo/20111108-Parameter-not-detected.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/ScopInfo/20111108-Parameter-not-detected.ll?rev=187728&r1=187727&r2=187728&view=diff
==============================================================================
--- polly/trunk/test/ScopInfo/20111108-Parameter-not-detected.ll (original)
+++ polly/trunk/test/ScopInfo/20111108-Parameter-not-detected.ll Mon Aug 5 10:14:15 2013
@@ -51,6 +51,9 @@ for.end7:
ret i32 0
}
+; CHECK: Context:
+; CHECK: p0: {0,+,1}<%for.cond>
+
; CHECK: Domain :=
-; CHECK: [p_0] -> { Stmt_if_then[i0] : i0 >= 0 and i0 <= 1022 and i0 >= 1001 - p_0 };
+; CHECK: [p_0] -> { Stmt_if_then[i0] : i0 >= 0 and i0 <= 1022 and i0 >= 999 - p_0 };
Added: polly/trunk/test/ScopInfo/constant_start_integer.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/ScopInfo/constant_start_integer.ll?rev=187728&view=auto
==============================================================================
--- polly/trunk/test/ScopInfo/constant_start_integer.ll (added)
+++ polly/trunk/test/ScopInfo/constant_start_integer.ll Mon Aug 5 10:14:15 2013
@@ -0,0 +1,66 @@
+; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; void foo(float *input) {
+; for (int j = 0; j < 8; j++) {
+; //SCoP begin
+; for (int i = 0; i < 63; i++) {
+; float x = input[j * 64 + i + 1];
+; input[j * 64 + i + 0] = x * x;
+; }
+; }
+; }
+;
+; Access functions:
+;
+; input[j * 64 + i + 1] => {4,+,256}<%for.cond1.preheader>
+; input[j * 64 + i + 0] => {0,+,256}<%for.cond1.preheader>
+;
+; They should share the same zero-start parameter:
+;
+; p0: {0,+,256}<%for.cond1.preheader>
+; input[j * 64 + i + 1] => p0 + 4
+; input[j * 64 + i + 0] => p0
+;
+
+; Function Attrs: nounwind
+define void @foo(float* nocapture %input) {
+entry:
+ br label %for.cond1.preheader
+
+for.cond1.preheader: ; preds = %for.inc10, %entry
+ %j.021 = phi i64 [ 0, %entry ], [ %inc11, %for.inc10 ]
+ %mul = shl nsw i64 %j.021, 6
+ br label %for.body3
+
+for.body3: ; preds = %for.body3, %for.cond1.preheader
+ %i.020 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
+ %add = add nsw i64 %i.020, %mul
+ %add4 = add nsw i64 %add, 1
+ %arrayidx = getelementptr inbounds float* %input, i64 %add4
+ %0 = load float* %arrayidx, align 8
+ %mul5 = fmul float %0, %0
+ %arrayidx9 = getelementptr inbounds float* %input, i64 %add
+ store float %mul5, float* %arrayidx9, align 8
+ %inc = add nsw i64 %i.020, 1
+ %exitcond = icmp eq i64 %inc, 63
+ br i1 %exitcond, label %for.inc10, label %for.body3
+
+for.inc10: ; preds = %for.body3
+ %inc11 = add nsw i64 %j.021, 1
+ %exitcond22 = icmp eq i64 %inc11, 8
+ fence seq_cst
+ br i1 %exitcond22, label %for.end12, label %for.cond1.preheader
+
+for.end12: ; preds = %for.inc10
+ ret void
+}
+
+; CHECK p0: {0,+,256}<%for.cond1.preheader>
+; CHECK-NOT: p1
+
+; CHECK: ReadAccess :=
+; CHECK: [p_0] -> { Stmt_for_body3[i0] -> MemRef_input[o0] : 4o0 = 4 + p_0 + 4i0 };
+; CHECK: MustWriteAccess :=
+; CHECK: [p_0] -> { Stmt_for_body3[i0] -> MemRef_input[o0] : 4o0 = p_0 + 4i0 };
Added: polly/trunk/test/ScopInfo/multidim_nested_start_integer.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/ScopInfo/multidim_nested_start_integer.ll?rev=187728&view=auto
==============================================================================
--- polly/trunk/test/ScopInfo/multidim_nested_start_integer.ll (added)
+++ polly/trunk/test/ScopInfo/multidim_nested_start_integer.ll Mon Aug 5 10:14:15 2013
@@ -0,0 +1,78 @@
+; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; void foo(long n, long m, long o, double A[n][m][o]) {
+; for (long i = 0; i < n; i++)
+; for (long j = 0; j < m; j++)
+; for (long k = 0; k < o; k++)
+; A[i+3][j-4][k+7] = 1.0;
+; }
+;
+; Access function:
+;
+; {{{(56 + (8 * (-4 + (3 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,
+; (8 * %o)}<%for.j>,+,8}<%for.k>
+;
+; The nested 'start' should be splitted into three parameters:
+; p1: {0,+,(8 * %o)}<%for.j>
+; p2: {0,+,(8 * %m * %o)}<%for.i>
+; p3: (8 * (-4 + (3 * %m)) * %o)
+;
+
+define void @foo(i64 %n, i64 %m, i64 %o, double* %A) {
+entry:
+ br label %for.i
+
+for.i:
+ %i = phi i64 [ 0, %entry ], [ %i.inc, %for.i.inc ]
+ br label %for.j
+
+for.j:
+ %j = phi i64 [ 0, %for.i ], [ %j.inc, %for.j.inc ]
+ br label %for.k
+
+for.k:
+ %k = phi i64 [ 0, %for.j ], [ %k.inc, %for.k.inc ]
+ %offset0 = add nsw i64 %i, 3
+ %subscript0 = mul i64 %offset0, %m
+ %offset1 = add nsw i64 %j, -4
+ %subscript1 = add i64 %offset1, %subscript0
+ %subscript2 = mul i64 %subscript1, %o
+ %offset2 = add nsw i64 %k, 7
+ %subscript = add i64 %subscript2, %offset2
+ %idx = getelementptr inbounds double* %A, i64 %subscript
+ store double 1.0, double* %idx
+ br label %for.k.inc
+
+for.k.inc:
+ %k.inc = add nsw i64 %k, 1
+ %k.exitcond = icmp eq i64 %k.inc, %o
+ br i1 %k.exitcond, label %for.j.inc, label %for.k
+
+for.j.inc:
+ %j.inc = add nsw i64 %j, 1
+ %j.exitcond = icmp eq i64 %j.inc, %m
+ br i1 %j.exitcond, label %for.i.inc, label %for.j
+
+for.i.inc:
+ %i.inc = add nsw i64 %i, 1
+ %i.exitcond = icmp eq i64 %i.inc, %n
+ br i1 %i.exitcond, label %end, label %for.i
+
+end:
+ ret void
+}
+
+; CHECK: p0: %o
+; CHECK: p1: {0,+,(8 * %o)}<%for.j>
+; CHECK: p2: {0,+,(8 * %m * %o)}<%for.i>
+; CHECK: p3: (8 * (-4 + (3 * %m)) * %o)
+; CHECK-NOT: p4
+
+; CHECK: Domain
+; CHECK: [o, p_1, p_2, p_3] -> { Stmt_for_k[i0] : i0 >= 0 and i0 <= -1 + o };
+; CHECK: Scattering
+; CHECK: [o, p_1, p_2, p_3] -> { Stmt_for_k[i0] -> scattering[0, i0, 0] };
+; CHECK: MustWriteAccess
+; CHECK: [o, p_1, p_2, p_3] -> { Stmt_for_k[i0] -> MemRef_A[o0] : 8o0 = 56 + p_1 + p_2 + p_3 + 8i0 };
More information about the llvm-commits
mailing list