[polly] r264514 - Generalize the domain complexity restrictions
Johannes Doerfert via llvm-commits
llvm-commits at lists.llvm.org
Sat Mar 26 09:17:00 PDT 2016
Author: jdoerfert
Date: Sat Mar 26 11:17:00 2016
New Revision: 264514
URL: http://llvm.org/viewvc/llvm-project?rev=264514&view=rev
Log:
Generalize the domain complexity restrictions
This patch applies the restrictions on the number of domain conjuncts
also to the domain parts of piecewise affine expressions we generate.
To this end the wording is change slightly. It was needed to support
complex additions featuring zext-instructions but it also fixes PR27045.
lnt profitable runs reports only little changes that might be noise:
Compile Time:
Polybench/[...]/2mm +4.34%
SingleSource/[...]/stepanov_container -2.43%
Execution Time:
External/[...]/186_crafty -2.32%
External/[...]/188_ammp -1.89%
External/[...]/473_astar -1.87%
Added:
polly/trunk/test/ScopInfo/complex-expression.ll
Modified:
polly/trunk/include/polly/ScopInfo.h
polly/trunk/lib/Analysis/ScopInfo.cpp
polly/trunk/lib/Support/SCEVAffinator.cpp
polly/trunk/test/ScopInfo/complex-branch-structure.ll
polly/trunk/test/ScopInfo/complex-successor-structure.ll
Modified: polly/trunk/include/polly/ScopInfo.h
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/include/polly/ScopInfo.h?rev=264514&r1=264513&r2=264514&view=diff
==============================================================================
--- polly/trunk/include/polly/ScopInfo.h (original)
+++ polly/trunk/include/polly/ScopInfo.h Sat Mar 26 11:17:00 2016
@@ -76,10 +76,10 @@ enum AssumptionKind {
INBOUNDS,
WRAPPING,
ERRORBLOCK,
+ COMPLEXITY,
INFINITELOOP,
INVARIANTLOAD,
DELINEARIZATION,
- ERROR_DOMAINCONJUNCTS,
};
/// @brief Enum to distinguish between assumptions and restrictions.
@@ -1981,6 +1981,11 @@ public:
/// @param BB An (optional) basic block in which the isl_pw_aff is computed.
/// SCEVs known to not reference any loops in the SCoP can be
/// passed without a @p BB.
+ ///
+ /// Note that this function will always return a valid isl_pw_aff. However, if
+ /// the translation of @p E was deemed to complex the SCoP is invalidated and
+ /// a dummy value of appropriate dimension is returned. This allows to bail
+ /// for complex cases without "error handling code" needed on the users side.
__isl_give isl_pw_aff *getPwAff(const SCEV *E, BasicBlock *BB = nullptr);
/// @brief Return the non-loop carried conditions on the domain of @p Stmt.
Modified: polly/trunk/lib/Analysis/ScopInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/Analysis/ScopInfo.cpp?rev=264514&r1=264513&r2=264514&view=diff
==============================================================================
--- polly/trunk/lib/Analysis/ScopInfo.cpp (original)
+++ polly/trunk/lib/Analysis/ScopInfo.cpp Sat Mar 26 11:17:00 2016
@@ -2293,7 +2293,7 @@ void Scop::buildDomainsWithBranchConstra
isl_set_free(SuccDomain);
SuccDomain = Empty;
HasComplexCFG = true;
- invalidate(ERROR_DOMAINCONJUNCTS, DebugLoc());
+ invalidate(COMPLEXITY, DebugLoc());
}
}
}
@@ -3213,6 +3213,8 @@ static std::string toString(AssumptionKi
return "Inbounds";
case WRAPPING:
return "No-overflows";
+ case COMPLEXITY:
+ return "Low complexity";
case ERRORBLOCK:
return "No-error";
case INFINITELOOP:
@@ -3221,8 +3223,6 @@ static std::string toString(AssumptionKi
return "Invariant load";
case DELINEARIZATION:
return "Delinearization";
- case ERROR_DOMAINCONJUNCTS:
- return "Low number of domain conjuncts";
}
llvm_unreachable("Unknown AssumptionKind!");
}
@@ -3381,7 +3381,18 @@ void Scop::dump() const { print(dbgs());
isl_ctx *Scop::getIslCtx() const { return IslCtx.get(); }
__isl_give isl_pw_aff *Scop::getPwAff(const SCEV *E, BasicBlock *BB) {
- return Affinator.getPwAff(E, BB);
+ // First try to use the SCEVAffinator to generate a piecewise defined
+ // affine function from @p E in the context of @p BB. If that tasks becomes to
+ // complex the affinator might return a nullptr. In such a case we invalidate
+ // the SCoP and return a dummy value. This way we do not need to add error
+ // handling cdoe to all users of this function.
+ auto *PWA = Affinator.getPwAff(E, BB);
+ if (PWA)
+ return PWA;
+
+ auto DL = BB ? BB->getTerminator()->getDebugLoc() : DebugLoc();
+ invalidate(COMPLEXITY, DL);
+ return Affinator.getPwAff(SE->getZero(E->getType()), BB);
}
__isl_give isl_union_set *Scop::getDomains() const {
Modified: polly/trunk/lib/Support/SCEVAffinator.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/Support/SCEVAffinator.cpp?rev=264514&r1=264513&r2=264514&view=diff
==============================================================================
--- polly/trunk/lib/Support/SCEVAffinator.cpp (original)
+++ polly/trunk/lib/Support/SCEVAffinator.cpp Sat Mar 26 11:17:00 2016
@@ -24,6 +24,33 @@
using namespace llvm;
using namespace polly;
+// The maximal number of basic sets we allow during the construction of a
+// piecewise affine function. More complex ones will result in very high
+// compile time.
+static int const MaxConjunctsInPwAff = 100;
+
+/// @brief Add the number of basic sets in @p Domain to @p User
+static isl_stat addNumBasicSets(isl_set *Domain, isl_aff *Aff, void *User) {
+ auto *NumBasicSets = static_cast<unsigned *>(User);
+ *NumBasicSets += isl_set_n_basic_set(Domain);
+ isl_set_free(Domain);
+ isl_aff_free(Aff);
+ return isl_stat_ok;
+}
+
+/// @brief Determine if @p PWA is to complex to continue
+///
+/// Note that @p PWA will be "free" (deallocated) if this function returns true,
+/// but not if this function returns false.
+static bool isToComplex(isl_pw_aff *PWA) {
+ unsigned NumBasicSets = 0;
+ isl_pw_aff_foreach_piece(PWA, addNumBasicSets, &NumBasicSets);
+ if (NumBasicSets <= MaxConjunctsInPwAff)
+ return false;
+ isl_pw_aff_free(PWA);
+ return true;
+}
+
SCEVAffinator::SCEVAffinator(Scop *S, LoopInfo &LI)
: S(S), Ctx(S->getIslCtx()), R(S->getRegion()), SE(*S->getSE()), LI(LI),
TD(R.getEntry()->getParent()->getParent()->getDataLayout()) {}
@@ -233,6 +260,8 @@ __isl_give isl_pw_aff *SCEVAffinator::vi
for (int i = 1, e = Expr->getNumOperands(); i < e; ++i) {
isl_pw_aff *NextSummand = visit(Expr->getOperand(i));
Sum = isl_pw_aff_add(Sum, NextSummand);
+ if (isToComplex(Sum))
+ return nullptr;
}
return Sum;
@@ -292,6 +321,8 @@ __isl_give isl_pw_aff *SCEVAffinator::vi
for (int i = 1, e = Expr->getNumOperands(); i < e; ++i) {
isl_pw_aff *NextOperand = visit(Expr->getOperand(i));
Max = isl_pw_aff_max(Max, NextOperand);
+ if (isToComplex(Max))
+ return nullptr;
}
return Max;
Modified: polly/trunk/test/ScopInfo/complex-branch-structure.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/ScopInfo/complex-branch-structure.ll?rev=264514&r1=264513&r2=264514&view=diff
==============================================================================
--- polly/trunk/test/ScopInfo/complex-branch-structure.ll (original)
+++ polly/trunk/test/ScopInfo/complex-branch-structure.ll Sat Mar 26 11:17:00 2016
@@ -20,7 +20,7 @@
; \ / /
; loop backedge
-; CHECK: Low number of domain conjuncts assumption: { : 1 = 0 }
+; CHECK: Low complexity assumption: { : 1 = 0 }
define void @foo(float* %A, float* %B, float* %C, float* %D, float* %E,
i64 %A1.p, i64 %A2.p, i64 %A3.p,
Added: polly/trunk/test/ScopInfo/complex-expression.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/ScopInfo/complex-expression.ll?rev=264514&view=auto
==============================================================================
--- polly/trunk/test/ScopInfo/complex-expression.ll (added)
+++ polly/trunk/test/ScopInfo/complex-expression.ll Sat Mar 26 11:17:00 2016
@@ -0,0 +1,142 @@
+; RUN: opt %loadPolly -pass-remarks-analysis="polly-scops" -polly-scops \
+; RUN: < %s 2>&1 | FileCheck %s
+;
+; This test case has an SCEVSMax expression with a very high arity. The
+; piecewise affine function we would create for it would have a huge amount of
+; conjuncts, thus it would take a lot of time creating and handling it.
+;
+; This ensures we bail out for really complex expressions:
+;
+; CHECK: Low complexity assumption: { : 1 = 0 }
+;
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+
+; Function Attrs: norecurse nounwind
+define i32 @foo(i32* nocapture readonly %src1, i32* nocapture readonly %src2, i32* nocapture %score, i32* nocapture %max, i32 %n) #0 {
+entry:
+ %cmp33 = icmp sgt i32 %n, 0
+ br i1 %cmp33, label %for.body.preheader, label %for.body7.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body7.preheader.loopexit: ; preds = %for.body
+ br label %for.body7.preheader
+
+for.body7.preheader: ; preds = %for.body7.preheader.loopexit, %entry
+ %0 = load i32, i32* %score, align 4, !tbaa !3
+ %cmp9 = icmp sgt i32 %0, -1
+ %.scoreMax.0 = select i1 %cmp9, i32 %0, i32 -1
+ %arrayidx8.1 = getelementptr inbounds i32, i32* %score, i32 1
+ %1 = load i32, i32* %arrayidx8.1, align 4, !tbaa !3
+ %cmp9.1 = icmp sgt i32 %1, %.scoreMax.0
+ %.scoreMax.0.1 = select i1 %cmp9.1, i32 %1, i32 %.scoreMax.0
+ %arrayidx8.2 = getelementptr inbounds i32, i32* %score, i32 2
+ %2 = load i32, i32* %arrayidx8.2, align 4, !tbaa !3
+ %cmp9.2 = icmp sgt i32 %2, %.scoreMax.0.1
+ %.scoreMax.0.2 = select i1 %cmp9.2, i32 %2, i32 %.scoreMax.0.1
+ %arrayidx8.3 = getelementptr inbounds i32, i32* %score, i32 3
+ %3 = load i32, i32* %arrayidx8.3, align 4, !tbaa !3
+ %cmp9.3 = icmp sgt i32 %3, %.scoreMax.0.2
+ %.scoreMax.0.3 = select i1 %cmp9.3, i32 %3, i32 %.scoreMax.0.2
+ %arrayidx8.4 = getelementptr inbounds i32, i32* %score, i32 4
+ %4 = load i32, i32* %arrayidx8.4, align 4, !tbaa !3
+ %cmp9.4 = icmp sgt i32 %4, %.scoreMax.0.3
+ %.scoreMax.0.4 = select i1 %cmp9.4, i32 %4, i32 %.scoreMax.0.3
+ %arrayidx8.5 = getelementptr inbounds i32, i32* %score, i32 5
+ %5 = load i32, i32* %arrayidx8.5, align 4, !tbaa !3
+ %cmp9.5 = icmp sgt i32 %5, %.scoreMax.0.4
+ %.scoreMax.0.5 = select i1 %cmp9.5, i32 %5, i32 %.scoreMax.0.4
+ %arrayidx8.6 = getelementptr inbounds i32, i32* %score, i32 6
+ %6 = load i32, i32* %arrayidx8.6, align 4, !tbaa !3
+ %cmp9.6 = icmp sgt i32 %6, %.scoreMax.0.5
+ %.scoreMax.0.6 = select i1 %cmp9.6, i32 %6, i32 %.scoreMax.0.5
+ %arrayidx8.7 = getelementptr inbounds i32, i32* %score, i32 7
+ %7 = load i32, i32* %arrayidx8.7, align 4, !tbaa !3
+ %cmp9.7 = icmp sgt i32 %7, %.scoreMax.0.6
+ %.scoreMax.0.7 = select i1 %cmp9.7, i32 %7, i32 %.scoreMax.0.6
+ %arrayidx8.8 = getelementptr inbounds i32, i32* %score, i32 8
+ %8 = load i32, i32* %arrayidx8.8, align 4, !tbaa !3
+ %cmp9.8 = icmp sgt i32 %8, %.scoreMax.0.7
+ %.scoreMax.0.8 = select i1 %cmp9.8, i32 %8, i32 %.scoreMax.0.7
+ %arrayidx8.9 = getelementptr inbounds i32, i32* %score, i32 9
+ %9 = load i32, i32* %arrayidx8.9, align 4, !tbaa !3
+ %cmp9.9 = icmp sgt i32 %9, %.scoreMax.0.8
+ %.scoreMax.0.9 = select i1 %cmp9.9, i32 %9, i32 %.scoreMax.0.8
+ %arrayidx8.10 = getelementptr inbounds i32, i32* %score, i32 10
+ %10 = load i32, i32* %arrayidx8.10, align 4, !tbaa !3
+ %cmp9.10 = icmp sgt i32 %10, %.scoreMax.0.9
+ %.scoreMax.0.10 = select i1 %cmp9.10, i32 %10, i32 %.scoreMax.0.9
+ %arrayidx8.11 = getelementptr inbounds i32, i32* %score, i32 11
+ %11 = load i32, i32* %arrayidx8.11, align 4, !tbaa !3
+ %cmp9.11 = icmp sgt i32 %11, %.scoreMax.0.10
+ %.scoreMax.0.11 = select i1 %cmp9.11, i32 %11, i32 %.scoreMax.0.10
+ %arrayidx8.12 = getelementptr inbounds i32, i32* %score, i32 12
+ %12 = load i32, i32* %arrayidx8.12, align 4, !tbaa !3
+ %cmp9.12 = icmp sgt i32 %12, %.scoreMax.0.11
+ %.scoreMax.0.12 = select i1 %cmp9.12, i32 %12, i32 %.scoreMax.0.11
+ %arrayidx8.13 = getelementptr inbounds i32, i32* %score, i32 13
+ %13 = load i32, i32* %arrayidx8.13, align 4, !tbaa !3
+ %cmp9.13 = icmp sgt i32 %13, %.scoreMax.0.12
+ %.scoreMax.0.13 = select i1 %cmp9.13, i32 %13, i32 %.scoreMax.0.12
+ %arrayidx8.14 = getelementptr inbounds i32, i32* %score, i32 14
+ %14 = load i32, i32* %arrayidx8.14, align 4, !tbaa !3
+ %cmp9.14 = icmp sgt i32 %14, %.scoreMax.0.13
+ %.scoreMax.0.14 = select i1 %cmp9.14, i32 %14, i32 %.scoreMax.0.13
+ %arrayidx8.15 = getelementptr inbounds i32, i32* %score, i32 15
+ %15 = load i32, i32* %arrayidx8.15, align 4, !tbaa !3
+ %cmp9.15 = icmp sgt i32 %15, %.scoreMax.0.14
+ %.scoreMax.0.15 = select i1 %cmp9.15, i32 %15, i32 %.scoreMax.0.14
+ %arrayidx8.16 = getelementptr inbounds i32, i32* %score, i32 16
+ %16 = load i32, i32* %arrayidx8.16, align 4, !tbaa !3
+ %cmp9.16 = icmp sgt i32 %16, %.scoreMax.0.15
+ %.scoreMax.0.16 = select i1 %cmp9.16, i32 %16, i32 %.scoreMax.0.15
+ %arrayidx8.17 = getelementptr inbounds i32, i32* %score, i32 17
+ %17 = load i32, i32* %arrayidx8.17, align 4, !tbaa !3
+ %cmp9.17 = icmp sgt i32 %17, %.scoreMax.0.16
+ %.scoreMax.0.17 = select i1 %cmp9.17, i32 %17, i32 %.scoreMax.0.16
+ %arrayidx8.18 = getelementptr inbounds i32, i32* %score, i32 18
+ %18 = load i32, i32* %arrayidx8.18, align 4, !tbaa !3
+ %cmp9.18 = icmp sgt i32 %18, %.scoreMax.0.17
+ %.scoreMax.0.18 = select i1 %cmp9.18, i32 %18, i32 %.scoreMax.0.17
+ %arrayidx8.19 = getelementptr inbounds i32, i32* %score, i32 19
+ %19 = load i32, i32* %arrayidx8.19, align 4, !tbaa !3
+ %cmp9.19 = icmp sgt i32 %19, %.scoreMax.0.18
+ %.scoreMax.0.19 = select i1 %cmp9.19, i32 %19, i32 %.scoreMax.0.18
+ %cmp14 = icmp eq i32 %.scoreMax.0.19, -1
+ br i1 %cmp14, label %cleanup, label %if.end16
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %i.034 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds i32, i32* %src1, i32 %i.034
+ %20 = load i32, i32* %arrayidx, align 4, !tbaa !3
+ %arrayidx1 = getelementptr inbounds i32, i32* %src2, i32 %i.034
+ %21 = load i32, i32* %arrayidx1, align 4, !tbaa !3
+ %add = add nsw i32 %21, %20
+ %arrayidx2 = getelementptr inbounds i32, i32* %score, i32 %i.034
+ store i32 %add, i32* %arrayidx2, align 4, !tbaa !3
+ %inc = add nuw nsw i32 %i.034, 1
+ %exitcond = icmp eq i32 %inc, %n
+ br i1 %exitcond, label %for.body7.preheader.loopexit, label %for.body
+
+if.end16: ; preds = %for.body7.preheader
+ store i32 %.scoreMax.0.19, i32* %max, align 4, !tbaa !3
+ br label %cleanup
+
+cleanup: ; preds = %for.body7.preheader, %if.end16
+ %retval.0 = phi i32 [ 1, %if.end16 ], [ 0, %for.body7.preheader ]
+ ret i32 %retval.0
+}
+
+attributes #0 = { norecurse nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="arm7tdmi" "target-features"="+strict-align" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 1, !"min_enum_size", i32 4}
+!2 = !{!"clang version 3.9.0 (http://llvm.org/git/clang.git 24028ac5d033be4c1e87ce65affb4ea61deb1897) (http://llvm.org/git/llvm.git b88337289681d6ede5bba5da77007c83509e54ae)"}
+!3 = !{!4, !4, i64 0}
+!4 = !{!"int", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}
Modified: polly/trunk/test/ScopInfo/complex-successor-structure.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/ScopInfo/complex-successor-structure.ll?rev=264514&r1=264513&r2=264514&view=diff
==============================================================================
--- polly/trunk/test/ScopInfo/complex-successor-structure.ll (original)
+++ polly/trunk/test/ScopInfo/complex-successor-structure.ll Sat Mar 26 11:17:00 2016
@@ -5,7 +5,7 @@
; of following form and check that the domain construction does not take a huge
; amount of time.
;
-; CHECK: Low number of domain conjuncts assumption: { : 1 = 0 }
+; CHECK: Low complexity assumption: { : 1 = 0 }
; |
; for.body <--+
More information about the llvm-commits
mailing list