[polly] r248881 - [FIX] Use escape logic for invariant loads

Johannes Doerfert via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 30 02:43:20 PDT 2015


Author: jdoerfert
Date: Wed Sep 30 04:43:20 2015
New Revision: 248881

URL: http://llvm.org/viewvc/llvm-project?rev=248881&view=rev
Log:
[FIX] Use escape logic for invariant loads

  Before we unconditinoally forced all users outside the SCoP to use
  the preloaded value. However, if the SCoP is not executed due to the
  runtime checks, we need to use the original value because it might not
  be invariant in the first place.

Added:
    polly/trunk/test/Isl/CodeGen/invariant_load_escaping.ll
    polly/trunk/test/Isl/CodeGen/two-loops-right-after-each-other-2.ll
Modified:
    polly/trunk/lib/CodeGen/IslNodeBuilder.cpp

Modified: polly/trunk/lib/CodeGen/IslNodeBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/IslNodeBuilder.cpp?rev=248881&r1=248880&r2=248881&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/IslNodeBuilder.cpp (original)
+++ polly/trunk/lib/CodeGen/IslNodeBuilder.cpp Wed Sep 30 04:43:20 2015
@@ -890,6 +890,7 @@ void IslNodeBuilder::preloadInvariantLoa
     return;
 
   const Region &R = S.getRegion();
+  BasicBlock *EntryBB = &Builder.GetInsertBlock()->getParent()->getEntryBlock();
 
   BasicBlock *PreLoadBB =
       SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), &DT, &LI);
@@ -915,17 +916,27 @@ void IslNodeBuilder::preloadInvariantLoa
       isl_id_free(ParamId);
     }
 
-    SmallVector<Instruction *, 4> Users;
+    auto *SAI = S.getScopArrayInfo(MA->getBaseAddr());
+    for (auto *DerivedSAI : SAI->getDerivedSAIs())
+      DerivedSAI->setBasePtr(PreloadVal);
+
+    // Use the escape system to get the correct value to users outside
+    // the SCoP.
+    BlockGenerator::EscapeUserVectorTy EscapeUsers;
     for (auto *U : AccInst->users())
       if (Instruction *UI = dyn_cast<Instruction>(U))
         if (!R.contains(UI))
-          Users.push_back(UI);
-    for (auto *U : Users)
-      U->replaceUsesOfWith(AccInst, PreloadVal);
+          EscapeUsers.push_back(UI);
 
-    auto *SAI = S.getScopArrayInfo(MA->getBaseAddr());
-    for (auto *DerivedSAI : SAI->getDerivedSAIs())
-      DerivedSAI->setBasePtr(PreloadVal);
+    if (EscapeUsers.empty())
+      continue;
+
+    auto *Ty = AccInst->getType();
+    auto *Alloca = new AllocaInst(Ty, AccInst->getName() + ".preload.s2a");
+    Alloca->insertBefore(EntryBB->getFirstInsertionPt());
+    Builder.CreateStore(PreloadVal, Alloca);
+
+    EscapeMap[AccInst] = std::make_pair(Alloca, std::move(EscapeUsers));
   }
 
   isl_ast_build_free(Build);

Added: polly/trunk/test/Isl/CodeGen/invariant_load_escaping.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Isl/CodeGen/invariant_load_escaping.ll?rev=248881&view=auto
==============================================================================
--- polly/trunk/test/Isl/CodeGen/invariant_load_escaping.ll (added)
+++ polly/trunk/test/Isl/CodeGen/invariant_load_escaping.ll Wed Sep 30 04:43:20 2015
@@ -0,0 +1,55 @@
+; RUN: opt %loadPolly -polly-codegen -polly-detect-unprofitable -S < %s | FileCheck %s
+;
+;    int f(int *A, int *B) {
+;      // Possible aliasing between A and B but if not then *B would be
+;      // invariant. We assume this and hoist *B but need to use a merged 
+;      // version in the return.
+;      int i = 0;
+;      int x = 0;
+;
+;      do {
+;        x = *B;
+;        A[i] += x;
+;      } while (i++ < 100);
+;
+;      return x;
+;    }
+;
+; CHECK: polly.preload.begin:
+; CHECK:   %polly.access.B = getelementptr i32, i32* %B, i64 0
+; CHECK:   %polly.access.B.load = load i32, i32* %polly.access.B
+; CHECK:   store i32 %polly.access.B.load, i32* %tmp.preload.s2a
+;
+; CHECK: polly.merge_new_and_old:
+; CHECK:   %tmp.merge = phi i32 [ %tmp.final_reload, %polly.loop_exit ], [ %tmp, %do.cond ]
+; CHECK:   br label %do.end
+;
+; CHECK: do.end:
+; CHECK:   ret i32 %tmp.merge
+;
+; CHECK: polly.loop_exit:
+; CHECK:   %tmp.final_reload = load i32, i32* %tmp.preload.s2a
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define i32 @f(i32* %A, i32* %B) {
+entry:
+  br label %do.body
+
+do.body:                                          ; preds = %do.cond, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %do.cond ], [ 0, %entry ]
+  %tmp = load i32, i32* %B, align 4
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %tmp1 = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %tmp1, %tmp
+  store i32 %add, i32* %arrayidx, align 4
+  br label %do.cond
+
+do.cond:                                          ; preds = %do.body
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp ne i64 %indvars.iv.next, 101
+  br i1 %exitcond, label %do.body, label %do.end
+
+do.end:                                           ; preds = %do.cond
+  ret i32 %tmp
+}

Added: polly/trunk/test/Isl/CodeGen/two-loops-right-after-each-other-2.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Isl/CodeGen/two-loops-right-after-each-other-2.ll?rev=248881&view=auto
==============================================================================
--- polly/trunk/test/Isl/CodeGen/two-loops-right-after-each-other-2.ll (added)
+++ polly/trunk/test/Isl/CodeGen/two-loops-right-after-each-other-2.ll Wed Sep 30 04:43:20 2015
@@ -0,0 +1,59 @@
+; RUN: opt %loadPolly -polly-detect-unprofitable -polly-codegen -S < %s | FileCheck %s
+
+; CHECK:       polly.merge_new_and_old:
+; CHECK-NEXT:    merge = phi
+
+%struct.ImageParameters = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8**, i8**, i32, i32***, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [9 x [16 x [16 x i16]]], [5 x [16 x [16 x i16]]], [9 x [8 x [8 x i16]]], [2 x [4 x [16 x [16 x i16]]]], [16 x [16 x i16]], [16 x [16 x i32]], i32****, i32***, i32***, i32***, i32****, i32****, %struct.Picture*, %struct.Slice*, %struct.macroblock*, i32*, i32*, i32, i32, i32, i32, [4 x [4 x i32]], i32, i32, i32, i32, i32, double, i32, i32, i32, i32, i16******, i16******, i16******, i16******, [15 x i16], i32, i32, i32, i32, i32, i32, i32, i32, [6 x [32 x i32]], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [1 x i32], i32, i32, [2 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.DecRefPicMarking*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
  i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, double**, double***, i32***, double**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [3 x [2 x i32]], [2 x i32], i32, i32, i16, i32, i32, i32, i32, i32 }
+%struct.Picture = type { i32, i32, [100 x %struct.Slice*], i32, float, float, float }
+%struct.Slice = type { i32, i32, i32, i32, i32, i32, %struct.datapartition*, %struct.MotionInfoContexts*, %struct.TextureInfoContexts*, i32, i32*, i32*, i32*, i32, i32*, i32*, i32*, i32 (i32)*, [3 x [2 x i32]] }
+%struct.datapartition = type { %struct.Bitstream*, %struct.EncodingEnvironment, %struct.EncodingEnvironment }
+%struct.Bitstream = type { i32, i32, i8, i32, i32, i8, i8, i32, i32, i8*, i32 }
+%struct.EncodingEnvironment = type { i32, i32, i32, i32, i32, i8*, i32*, i32, i32 }
+%struct.MotionInfoContexts = type { [3 x [11 x %struct.BiContextType]], [2 x [9 x %struct.BiContextType]], [2 x [10 x %struct.BiContextType]], [2 x [6 x %struct.BiContextType]], [4 x %struct.BiContextType], [4 x %struct.BiContextType], [3 x %struct.BiContextType] }
+%struct.BiContextType = type { i16, i8, i64 }
+%struct.TextureInfoContexts = type { [2 x %struct.BiContextType], [3 x [4 x %struct.BiContextType]], [10 x [4 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]], [10 x [5 x %struct.BiContextType]], [10 x [5 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]] }
+%struct.macroblock = type { i32, i32, i32, [2 x i32], i32, [8 x i32], %struct.macroblock*, i32, [2 x [4 x [4 x [2 x i32]]]], [16 x i8], [16 x i8], i32, i64, [4 x i32], [4 x i32], i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i16, double, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+%struct.DecRefPicMarking = type { i32, i32, i32, i32, i32, %struct.DecRefPicMarking* }
+
+ at img = external global %struct.ImageParameters*, align 8
+
+define void @intrapred_luma() {
+entry:
+  %PredPel = alloca [13 x i16], align 16
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  br i1 undef, label %for.body, label %for.body.262
+
+for.body.262:                                     ; preds = %for.body
+  %0 = load %struct.ImageParameters*, %struct.ImageParameters** @img, align 8
+  br label %for.body.280
+
+for.body.280:                                     ; preds = %for.body.280, %for.body.262
+  %indvars.iv66 = phi i64 [ 0, %for.body.262 ], [ %indvars.iv.next67, %for.body.280 ]
+  %arrayidx282 = getelementptr inbounds [13 x i16], [13 x i16]* %PredPel, i64 0, i64 1
+  %arrayidx283 = getelementptr inbounds i16, i16* %arrayidx282, i64 %indvars.iv66
+  %1 = load i16, i16* %arrayidx283, align 2
+  %arrayidx289 = getelementptr inbounds %struct.ImageParameters, %struct.ImageParameters* %0, i64 0, i32 47, i64 0, i64 2, i64 %indvars.iv66
+  store i16 %1, i16* %arrayidx289, align 2
+  %indvars.iv.next67 = add nuw nsw i64 %indvars.iv66, 1
+  br i1 false, label %for.body.280, label %for.end.298
+
+for.end.298:                                      ; preds = %for.body.280
+  %2 = load %struct.ImageParameters*, %struct.ImageParameters** @img, align 8
+  br label %for.body.310
+
+for.body.310:                                     ; preds = %for.body.310, %for.end.298
+  %indvars.iv = phi i64 [ 0, %for.end.298 ], [ %indvars.iv.next, %for.body.310 ]
+  %InterScopSext = sext i16 %1 to i64
+  %arrayidx312 = getelementptr inbounds [13 x i16], [13 x i16]* %PredPel, i64 0, i64 %InterScopSext
+  %arrayidx313 = getelementptr inbounds i16, i16* %arrayidx312, i64 %indvars.iv
+  %3 = load i16, i16* %arrayidx313, align 2
+  %arrayidx322 = getelementptr inbounds %struct.ImageParameters, %struct.ImageParameters* %2, i64 0, i32 47, i64 1, i64 %indvars.iv, i64 1
+  store i16 %3, i16* %arrayidx322, align 2
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  br i1 false, label %for.body.310, label %for.end.328
+
+for.end.328:                                      ; preds = %for.body.310
+  ret void
+}




More information about the llvm-commits mailing list