[polly] r262327 - [FIX] Prevent compile time problems due to complex invariant loads

Johannes Doerfert via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 1 05:05:14 PST 2016


Author: jdoerfert
Date: Tue Mar  1 07:05:14 2016
New Revision: 262327

URL: http://llvm.org/viewvc/llvm-project?rev=262327&view=rev
Log:
[FIX] Prevent compile time problems due to complex invariant loads

  This cures the symptoms we see in h264 of SPEC2006 but not the cause.


Added:
    polly/trunk/test/Isl/CodeGen/invariant_load_complex_condition.ll
Modified:
    polly/trunk/lib/CodeGen/IslNodeBuilder.cpp

Modified: polly/trunk/lib/CodeGen/IslNodeBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/IslNodeBuilder.cpp?rev=262327&r1=262326&r2=262327&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/IslNodeBuilder.cpp (original)
+++ polly/trunk/lib/CodeGen/IslNodeBuilder.cpp Tue Mar  1 07:05:14 2016
@@ -48,6 +48,12 @@
 using namespace polly;
 using namespace llvm;
 
+// The maximal number of basic sets we allow during invariant load construction.
+// More complex access ranges will result in very high compile time and are also
+// unlikely to result in good code. This value is very high and should only
+// trigger for corner cases (e.g., the "dct_luma" function in h264, SPEC2006).
+static int const MaxConjunctsInAccessRange = 80;
+
 __isl_give isl_ast_expr *
 IslNodeBuilder::getUpperBound(__isl_keep isl_ast_node *For,
                               ICmpInst::Predicate &Predicate) {
@@ -915,6 +921,11 @@ bool IslNodeBuilder::materializeParamete
 Value *IslNodeBuilder::preloadUnconditionally(isl_set *AccessRange,
                                               isl_ast_build *Build,
                                               Instruction *AccInst) {
+  if (isl_set_n_basic_set(AccessRange) > MaxConjunctsInAccessRange) {
+    isl_set_free(AccessRange);
+    return nullptr;
+  }
+
   isl_pw_multi_aff *PWAccRel = isl_pw_multi_aff_from_set(AccessRange);
   PWAccRel = isl_pw_multi_aff_gist_params(PWAccRel, S.getContext());
   isl_ast_expr *Access =
@@ -1005,9 +1016,15 @@ Value *IslNodeBuilder::preloadInvariantL
   Builder.SetInsertPoint(MergeBB->getTerminator());
   auto *MergePHI = Builder.CreatePHI(
       AccInstTy, 2, "polly.preload." + AccInst->getName() + ".merge");
+  PreloadVal = MergePHI;
+
+  if (!PreAccInst) {
+    PreloadVal = nullptr;
+    PreAccInst = UndefValue::get(AccInstTy);
+  }
+
   MergePHI->addIncoming(PreAccInst, ExecBB);
   MergePHI->addIncoming(Constant::getNullValue(AccInstTy), CondBB);
-  PreloadVal = MergePHI;
 
   isl_ast_build_free(Build);
   return PreloadVal;

Added: polly/trunk/test/Isl/CodeGen/invariant_load_complex_condition.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Isl/CodeGen/invariant_load_complex_condition.ll?rev=262327&view=auto
==============================================================================
--- polly/trunk/test/Isl/CodeGen/invariant_load_complex_condition.ll (added)
+++ polly/trunk/test/Isl/CodeGen/invariant_load_complex_condition.ll Tue Mar  1 07:05:14 2016
@@ -0,0 +1,70 @@
+; RUN: opt %loadPolly -S -polly-codegen < %s | FileCheck %s
+;
+; Extracted from h246 in SPEC 2006.
+;
+; TODO: We check that we do compile this benchmark in reasonable time.
+;       To do so we currently bail out due to the complex access range
+;       (multiple modulos) of the invariant load.
+;
+; FIXME: We should not bail with a false RTC here.
+;
+; CHECK-LABEL: polly.preload.begin:
+; CHECK-NOT:     br i1
+; CHECK-NOT:     br label
+; CHECK:         br i1 false, label %polly.start, label %entry.split
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+%struct.IP = type { i32****, i32***, %struct.P, %struct.S, %struct.m }
+%struct.P = type { i32 }
+%struct.S = type { i32 }
+%struct.D = type { i32 }
+%struct.B = type { i32 }
+%struct.E = type { i32 }
+%struct.s = type { i32 }
+%struct.M = type { i32 }
+%struct.C = type { i32 }
+%struct.T = type { i32 }
+%struct.R = type { i32 }
+%struct.m = type { i32 }
+%struct.d = type { i32 }
+
+ at img = external global %struct.IP*, align 8
+
+; Function Attrs: nounwind uwtable
+define void @dct_luma(i32 %block_x, i32 %block_y) #0 {
+entry:
+  br label %entry.split
+
+entry.split:                                      ; preds = %entry
+  %div = sdiv i32 %block_x, 4
+  %div1 = sdiv i32 %block_y, 4
+  %rem = srem i32 %div1, 2
+  %mul4 = shl nsw i32 %rem, 1
+  %rem5 = srem i32 %div, 2
+  %add6 = add nsw i32 %mul4, %rem5
+  %idxprom = sext i32 %add6 to i64
+  %0 = load %struct.IP*, %struct.IP** @img, align 8
+  %cofAC = getelementptr inbounds %struct.IP, %struct.IP* %0, i32 0, i32 0
+  %1 = load i32****, i32***** %cofAC, align 8
+  %arrayidx = getelementptr inbounds i32***, i32**** %1, i64 0
+  %2 = load i32***, i32**** %arrayidx, align 8
+  %arrayidx8 = getelementptr inbounds i32**, i32*** %2, i64 %idxprom
+  %3 = load i32**, i32*** %arrayidx8, align 8
+  %mb_data = getelementptr inbounds %struct.IP, %struct.IP* %0, i64 0, i32 4
+  %4 = load %struct.m, %struct.m* %mb_data, align 8
+  br i1 false, label %land.rhs, label %land.end
+
+land.rhs:                                         ; preds = %entry.split
+  br label %land.end
+
+land.end:                                         ; preds = %land.rhs, %entry.split
+  %5 = phi i1 [ false, %entry.split ], [ undef, %land.rhs ]
+  br i1 %5, label %for.cond104.preheader, label %for.cond34.preheader
+
+for.cond34.preheader:                             ; preds = %land.end
+  unreachable
+
+for.cond104.preheader:                            ; preds = %land.end
+  unreachable
+}




More information about the llvm-commits mailing list