[polly] r218566 - [RTC] Bail if too many parameters are involved in a RTC access.

Johannes Doerfert doerfert at cs.uni-saarland.de
Sat Sep 27 04:02:42 PDT 2014


Author: jdoerfert
Date: Sat Sep 27 06:02:39 2014
New Revision: 218566

URL: http://llvm.org/viewvc/llvm-project?rev=218566&view=rev
Log:
[RTC] Bail if too many parameters are involved in a RTC access.

  If too many parameters are involved in accesses used to create RTCs
  we might end up with enormous compile times and RTC expressions.
  The reason is that the lexmin/lexmax is dependent on all these
  parameters and isl might need to create a case for every "ordering"
  of them (e.g., p0 <= p1 <= p2, p1 <= p0 <= p2, ...).

  The exact number of parameters allowed in accesses is defined by the
  command line option -polly-rtc-max-parameters=XXX and set by default
  to 8.

  Differential Revision: http://reviews.llvm.org/D5500

Added:
    polly/trunk/test/ScopInfo/aliasing_many_parameters_not_all_involved.ll
    polly/trunk/test/ScopInfo/run-time-check-many-parameters.ll
Modified:
    polly/trunk/include/polly/ScopInfo.h
    polly/trunk/lib/Analysis/ScopInfo.cpp

Modified: polly/trunk/include/polly/ScopInfo.h
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/include/polly/ScopInfo.h?rev=218566&r1=218565&r2=218566&view=diff
==============================================================================
--- polly/trunk/include/polly/ScopInfo.h (original)
+++ polly/trunk/include/polly/ScopInfo.h Sat Sep 27 06:02:39 2014
@@ -692,7 +692,9 @@ public:
   void addAssumption(__isl_take isl_set *Set);
 
   /// @brief Build all alias groups for this SCoP.
-  void buildAliasGroups(AliasAnalysis &AA);
+  ///
+  /// @returns True if __no__ error occurred, false otherwise.
+  bool buildAliasGroups(AliasAnalysis &AA);
 
   /// @brief Return all alias groups for this SCoP.
   const MinMaxVectorVectorTy &getAliasGroups() const {

Modified: polly/trunk/lib/Analysis/ScopInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/Analysis/ScopInfo.cpp?rev=218566&r1=218565&r2=218566&view=diff
==============================================================================
--- polly/trunk/lib/Analysis/ScopInfo.cpp (original)
+++ polly/trunk/lib/Analysis/ScopInfo.cpp Sat Sep 27 06:02:39 2014
@@ -64,6 +64,11 @@ static cl::opt<bool> DisableMultiplicati
     cl::desc("Disable multiplicative reductions"), cl::Hidden, cl::ZeroOrMore,
     cl::init(false), cl::cat(PollyCategory));
 
+static cl::opt<unsigned> RunTimeChecksMaxParameters(
+    "polly-rtc-max-parameters",
+    cl::desc("The maximal number of parameters allowed in RTCs."), cl::Hidden,
+    cl::ZeroOrMore, cl::init(8), cl::cat(PollyCategory));
+
 /// Translate a 'const SCEV *' expression in an isl_pw_aff.
 struct SCEVAffinator : public SCEVVisitor<SCEVAffinator, isl_pw_aff *> {
 public:
@@ -1137,6 +1142,32 @@ static int buildMinMaxAccess(__isl_take
   isl_aff *OneAff;
   unsigned Pos;
 
+  // Restrict the number of parameters involved in the access as the lexmin/
+  // lexmax computation will take too long if this number is high.
+  //
+  // Experiments with a simple test case using an i7 4800MQ:
+  //
+  //  #Parameters involved | Time (in sec)
+  //            6          |     0.01
+  //            7          |     0.04
+  //            8          |     0.12
+  //            9          |     0.40
+  //           10          |     1.54
+  //           11          |     6.78
+  //           12          |    30.38
+  //
+  if (isl_set_n_param(Set) > RunTimeChecksMaxParameters) {
+    unsigned InvolvedParams = 0;
+    for (unsigned u = 0, e = isl_set_n_param(Set); u < e; u++)
+      if (isl_set_involves_dims(Set, isl_dim_param, u, 1))
+        InvolvedParams++;
+
+    if (InvolvedParams > RunTimeChecksMaxParameters) {
+      isl_set_free(Set);
+      return -1;
+    }
+  }
+
   MinPMA = isl_set_lexmin_pw_multi_aff(isl_set_copy(Set));
   MaxPMA = isl_set_lexmax_pw_multi_aff(isl_set_copy(Set));
 
@@ -1160,7 +1191,7 @@ static int buildMinMaxAccess(__isl_take
   return 0;
 }
 
-void Scop::buildAliasGroups(AliasAnalysis &AA) {
+bool Scop::buildAliasGroups(AliasAnalysis &AA) {
   // To create sound alias checks we perform the following steps:
   //   o) Use the alias analysis and an alias set tracker to build alias sets
   //      for all memory accesses inside the SCoP.
@@ -1207,6 +1238,7 @@ void Scop::buildAliasGroups(AliasAnalysi
       I = AliasGroups.erase(I);
   }
 
+  bool Valid = true;
   for (AliasGroupTy &AG : AliasGroups) {
     MinMaxVectorTy *MinMaxAccesses = new MinMaxVectorTy();
     MinMaxAccesses->reserve(AG.size());
@@ -1220,11 +1252,16 @@ void Scop::buildAliasGroups(AliasAnalysi
     Locations = isl_union_set_intersect_params(Locations, getAssumedContext());
     Locations = isl_union_set_coalesce(Locations);
     Locations = isl_union_set_detect_equalities(Locations);
-    isl_union_set_foreach_set(Locations, buildMinMaxAccess, MinMaxAccesses);
+    Valid = (0 == isl_union_set_foreach_set(Locations, buildMinMaxAccess,
+                                            MinMaxAccesses));
     isl_union_set_free(Locations);
-
     MinMaxAliasGroups.push_back(MinMaxAccesses);
+
+    if (!Valid)
+      break;
   }
+
+  return Valid;
 }
 
 Scop::Scop(TempScop &tempScop, LoopInfo &LI, ScalarEvolution &ScalarEvolution,
@@ -1566,9 +1603,28 @@ bool ScopInfo::runOnRegion(Region *R, RG
 
   scop = new Scop(*tempScop, LI, SE, ctx);
 
-  if (PollyUseRuntimeAliasChecks)
-    scop->buildAliasGroups(AA);
+  if (!PollyUseRuntimeAliasChecks)
+    return false;
+
+  // If a problem occurs while building the alias groups we need to delete
+  // this SCoP and pretend it wasn't valid in the first place.
+  if (scop->buildAliasGroups(AA))
+    return false;
+
+  --ScopFound;
+  if (tempScop->getMaxLoopDepth() > 0)
+    --RichScopFound;
+
+  DEBUG(dbgs()
+        << "\n\nNOTE: Run time checks for " << scop->getNameStr()
+        << " could not be created as the number of parameters involved is too "
+           "high. The SCoP will be "
+           "dismissed.\nUse:\n\t--polly-rtc-max-parameters=X\nto adjust the "
+           "maximal number of parameters but be advised that the compile time "
+           "might increase exponentially.\n\n");
 
+  delete scop;
+  scop = nullptr;
   return false;
 }
 

Added: polly/trunk/test/ScopInfo/aliasing_many_parameters_not_all_involved.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/ScopInfo/aliasing_many_parameters_not_all_involved.ll?rev=218566&view=auto
==============================================================================
--- polly/trunk/test/ScopInfo/aliasing_many_parameters_not_all_involved.ll (added)
+++ polly/trunk/test/ScopInfo/aliasing_many_parameters_not_all_involved.ll Sat Sep 27 06:02:39 2014
@@ -0,0 +1,91 @@
+; RUN: opt %loadPolly -polly-scops -polly-code-generator=isl -polly-rtc-max-parameters=8 -analyze < %s | FileCheck %s --check-prefix=MAX8
+; RUN: opt %loadPolly -polly-scops -polly-code-generator=isl -polly-rtc-max-parameters=7 -analyze < %s | FileCheck %s --check-prefix=MAX7
+;
+; Check that we allow this SCoP even though it has 10 parameters involved in posisbly aliasing accesses.
+; However, only 7 are involved in accesses through B, 8 through C and none in accesses through A.
+;
+; MAX8:       Printing analysis 'Polly - Create polyhedral description of Scops' for region: 'for.cond => for.end' in function 'jd':
+; MAX8-NEXT:  Function: jd
+
+; MAX7:       Printing analysis 'Polly - Create polyhedral description of Scops' for region: 'for.cond => for.end' in function 'jd':
+; MAX7-NEXT:  Invalid Scop!
+;
+;    void jd(int *A, int *B, int *C, long p1, long p2, long p3, long p4, long p5,
+;            long p6, long p7, long p8, long p9, long p10) {
+;      for (int i = 0; i < 1024; i++)
+;        A[i] = B[p1] - B[p2] + B[-p3] - B[p4] + B[p5] - B[-p6] + B[p7] - C[p3] +
+;               C[-p4] - C[p5] + C[p6] - C[-p7] + C[p8] - C[p9] + C[-p10];
+;    }
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @jd(i32* %A, i32* %B, i32* %C, i64 %p1, i64 %p2, i64 %p3, i64 %p4, i64 %p5, i64 %p6, i64 %p7, i64 %p8, i64 %p9, i64 %p10) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+  %exitcond = icmp ne i64 %indvars.iv, 1024
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %arrayidx = getelementptr inbounds i32* %B, i64 %p1
+  %tmp = load i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32* %B, i64 %p2
+  %tmp1 = load i32* %arrayidx1, align 4
+  %sub = sub nsw i32 %tmp, %tmp1
+  %sub2 = sub nsw i64 0, %p3
+  %arrayidx3 = getelementptr inbounds i32* %B, i64 %sub2
+  %tmp2 = load i32* %arrayidx3, align 4
+  %add = add nsw i32 %sub, %tmp2
+  %arrayidx4 = getelementptr inbounds i32* %B, i64 %p4
+  %tmp3 = load i32* %arrayidx4, align 4
+  %sub5 = sub nsw i32 %add, %tmp3
+  %arrayidx6 = getelementptr inbounds i32* %B, i64 %p5
+  %tmp4 = load i32* %arrayidx6, align 4
+  %add7 = add nsw i32 %sub5, %tmp4
+  %sub8 = sub nsw i64 0, %p6
+  %arrayidx9 = getelementptr inbounds i32* %B, i64 %sub8
+  %tmp5 = load i32* %arrayidx9, align 4
+  %sub10 = sub nsw i32 %add7, %tmp5
+  %arrayidx11 = getelementptr inbounds i32* %B, i64 %p7
+  %tmp6 = load i32* %arrayidx11, align 4
+  %add12 = add nsw i32 %sub10, %tmp6
+  %arrayidx13 = getelementptr inbounds i32* %C, i64 %p3
+  %tmp7 = load i32* %arrayidx13, align 4
+  %sub14 = sub nsw i32 %add12, %tmp7
+  %sub15 = sub nsw i64 0, %p4
+  %arrayidx16 = getelementptr inbounds i32* %C, i64 %sub15
+  %tmp8 = load i32* %arrayidx16, align 4
+  %add17 = add nsw i32 %sub14, %tmp8
+  %arrayidx18 = getelementptr inbounds i32* %C, i64 %p5
+  %tmp9 = load i32* %arrayidx18, align 4
+  %sub19 = sub nsw i32 %add17, %tmp9
+  %arrayidx20 = getelementptr inbounds i32* %C, i64 %p6
+  %tmp10 = load i32* %arrayidx20, align 4
+  %add21 = add nsw i32 %sub19, %tmp10
+  %sub22 = sub nsw i64 0, %p7
+  %arrayidx23 = getelementptr inbounds i32* %C, i64 %sub22
+  %tmp11 = load i32* %arrayidx23, align 4
+  %sub24 = sub nsw i32 %add21, %tmp11
+  %arrayidx25 = getelementptr inbounds i32* %C, i64 %p8
+  %tmp12 = load i32* %arrayidx25, align 4
+  %add26 = add nsw i32 %sub24, %tmp12
+  %arrayidx27 = getelementptr inbounds i32* %C, i64 %p9
+  %tmp13 = load i32* %arrayidx27, align 4
+  %sub28 = sub nsw i32 %add26, %tmp13
+  %sub29 = sub nsw i64 0, %p10
+  %arrayidx30 = getelementptr inbounds i32* %C, i64 %sub29
+  %tmp14 = load i32* %arrayidx30, align 4
+  %add31 = add nsw i32 %sub28, %tmp14
+  %arrayidx32 = getelementptr inbounds i32* %A, i64 %indvars.iv
+  store i32 %add31, i32* %arrayidx32, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}

Added: polly/trunk/test/ScopInfo/run-time-check-many-parameters.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/ScopInfo/run-time-check-many-parameters.ll?rev=218566&view=auto
==============================================================================
--- polly/trunk/test/ScopInfo/run-time-check-many-parameters.ll (added)
+++ polly/trunk/test/ScopInfo/run-time-check-many-parameters.ll Sat Sep 27 06:02:39 2014
@@ -0,0 +1,130 @@
+; RUN: opt %loadPolly -polly-scops -polly-code-generator=isl -analyze < %s | FileCheck %s
+;
+; A valid Scop would print the list of it's statements, we check that we do not
+; see that list.
+;
+; CHECK-NOT: Statements
+;
+; FIXME: Handling this is an open problem, at the moment we just bail out.
+;
+; void foo(float *A, float *B,
+; 	long p1,
+; 	long p2,
+; 	long p3,
+; 	long p4,
+; 	long p5,
+; 	long p6,
+; 	long p7,
+; 	long p8,
+; 	long p9,
+; 	long p10,
+; 	long p11,
+; 	long p12) {
+;   for (long i = 0; i < 100; i++) {
+;     A[i] =
+; 	B[i + p1] +
+; 	B[i + p2] +
+; 	B[i + p3] +
+; 	B[i + p4] +
+; 	B[i + p5] +
+; 	B[i + p6] +
+; 	B[i + p7] +
+; 	B[i + p8] +
+; 	B[i + p9] +
+; 	B[i + p10] +
+; 	B[i + p11] +
+; 	B[i + p12];
+;   }
+; }
+;
+; Computing the minimal and maximal element accessed in B is very expensive.
+; Expressing the minimal element itself yields a rather complex isl_pw_aff which
+; looks as follows:
+; { ...
+;   MemRef_B[(100 + p11)] : p2 <= -1 + p1 and p3 <= -1 + p1 and p4 <= -1 + p1
+;                           and p5 <= -1 + p1 and p6 <= -1 + p1 and
+;                           p7 <= -1 + p1 and p8 <= -1 + p1 and p9 <= -1 + p1
+;                           and p10 <= -1 + p1 and p11 >= p1 and
+;                           p12 <= -1 + p11;
+;   MemRef_B[(100 + p12)] : p2 <= -1 + p1 and p3 <= -1 + p1 and p4 <= -1 + p1
+;                           and p5 <= -1 + p1 and p6 <= -1 + p1 and
+;                           p7 <= -1 + p1 and p8 <= -1 + p1 and p9 <= -1 + p1
+;                           and p10 <= -1 + p1 and p11 <= -1 + p1 and p12 >= p1;
+;
+; and this isl_pw_aff is then 1:1 translated into a isl ast expression.
+;
+; In the best case, we would create a run-time check such as:
+;
+; if (B[99 + max(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12)] < A[0]
+;     || A[99] B[min(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9]))
+;
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define void @foo(float* %A, float* %B, i64 %p1, i64 %p2, i64 %p3, i64 %p4, i64 %p5, i64 %p6, i64 %p7, i64 %p8, i64 %p9, i64 %p10, i64 %p11, i64 %p12) #0 {
+entry:
+  br label %entry.split
+
+entry.split:                                      ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %entry.split, %for.body
+  %i.01 = phi i64 [ 0, %entry.split ], [ %tmp25, %for.body ]
+  %tmp = add i64 %p1, %i.01
+  %arrayidx = getelementptr float* %B, i64 %tmp
+  %tmp2 = add i64 %p2, %i.01
+  %arrayidx2 = getelementptr float* %B, i64 %tmp2
+  %tmp3 = add i64 %p3, %i.01
+  %arrayidx5 = getelementptr float* %B, i64 %tmp3
+  %tmp4 = add i64 %p4, %i.01
+  %arrayidx8 = getelementptr float* %B, i64 %tmp4
+  %tmp5 = add i64 %p5, %i.01
+  %arrayidx11 = getelementptr float* %B, i64 %tmp5
+  %tmp6 = add i64 %p6, %i.01
+  %arrayidx14 = getelementptr float* %B, i64 %tmp6
+  %tmp7 = add i64 %p7, %i.01
+  %arrayidx17 = getelementptr float* %B, i64 %tmp7
+  %tmp8 = add i64 %p8, %i.01
+  %arrayidx20 = getelementptr float* %B, i64 %tmp8
+  %tmp9 = add i64 %p9, %i.01
+  %arrayidx23 = getelementptr float* %B, i64 %tmp9
+  %tmp10 = add i64 %p10, %i.01
+  %arrayidx26 = getelementptr float* %B, i64 %tmp10
+  %tmp11 = add i64 %p11, %i.01
+  %arrayidx29 = getelementptr float* %B, i64 %tmp11
+  %tmp12 = add i64 %p12, %i.01
+  %arrayidx32 = getelementptr float* %B, i64 %tmp12
+  %arrayidx34 = getelementptr float* %A, i64 %i.01
+  %tmp13 = load float* %arrayidx, align 4
+  %tmp14 = load float* %arrayidx2, align 4
+  %add3 = fadd float %tmp13, %tmp14
+  %tmp15 = load float* %arrayidx5, align 4
+  %add6 = fadd float %add3, %tmp15
+  %tmp16 = load float* %arrayidx8, align 4
+  %add9 = fadd float %add6, %tmp16
+  %tmp17 = load float* %arrayidx11, align 4
+  %add12 = fadd float %add9, %tmp17
+  %tmp18 = load float* %arrayidx14, align 4
+  %add15 = fadd float %add12, %tmp18
+  %tmp19 = load float* %arrayidx17, align 4
+  %add18 = fadd float %add15, %tmp19
+  %tmp20 = load float* %arrayidx20, align 4
+  %add21 = fadd float %add18, %tmp20
+  %tmp21 = load float* %arrayidx23, align 4
+  %add24 = fadd float %add21, %tmp21
+  %tmp22 = load float* %arrayidx26, align 4
+  %add27 = fadd float %add24, %tmp22
+  %tmp23 = load float* %arrayidx29, align 4
+  %add30 = fadd float %add27, %tmp23
+  %tmp24 = load float* %arrayidx32, align 4
+  %add33 = fadd float %add30, %tmp24
+  store float %add33, float* %arrayidx34, align 4
+  %tmp25 = add nsw i64 %i.01, 1
+  %exitcond = icmp ne i64 %tmp25, 100
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}





More information about the llvm-commits mailing list