[polly] r260886 - [FIX] Check the next base pointer for possible invariant loads

Johannes Doerfert via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 15 04:42:06 PST 2016


Author: jdoerfert
Date: Mon Feb 15 06:42:05 2016
New Revision: 260886

URL: http://llvm.org/viewvc/llvm-project?rev=260886&view=rev
Log:
[FIX] Check the next base pointer for possible invariant loads

  A load can only be invariant if its base pointer is invariant too. To
  this end, we check if the base pointer is defined inside the region or
  outside. In the former case we recursively check if we can (and
  therefore will) hoist the base pointer too. Only if that happends we
  can hoist the load.


Added:
    polly/trunk/test/Isl/CodeGen/hoisting_1.ll
    polly/trunk/test/Isl/CodeGen/hoisting_2.ll
Modified:
    polly/trunk/lib/Analysis/ScopInfo.cpp

Modified: polly/trunk/lib/Analysis/ScopInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/Analysis/ScopInfo.cpp?rev=260886&r1=260885&r2=260886&view=diff
==============================================================================
--- polly/trunk/lib/Analysis/ScopInfo.cpp (original)
+++ polly/trunk/lib/Analysis/ScopInfo.cpp Mon Feb 15 06:42:05 2016
@@ -2969,12 +2969,18 @@ bool Scop::isHoistableAccess(MemoryAcces
   // no base pointer origin we check that the base pointer is defined
   // outside the region.
   const ScopArrayInfo *SAI = Access->getScopArrayInfo();
-  while (auto *BasePtrOriginSAI = SAI->getBasePtrOriginSAI())
-    SAI = BasePtrOriginSAI;
-
-  if (auto *BasePtrInst = dyn_cast<Instruction>(SAI->getBasePtr()))
-    if (R.contains(BasePtrInst))
+  auto *BasePtrInst = dyn_cast<Instruction>(SAI->getBasePtr());
+  if (SAI->getBasePtrOriginSAI()) {
+    assert(BasePtrInst && R.contains(BasePtrInst));
+    if (!isa<LoadInst>(BasePtrInst))
+      return false;
+    auto *BasePtrStmt = getStmtForBasicBlock(BasePtrInst->getParent());
+    assert(BasePtrStmt);
+    auto *BasePtrMA = BasePtrStmt->getArrayAccessOrNULLFor(BasePtrInst);
+    if (BasePtrMA && !isHoistableAccess(BasePtrMA, Writes))
       return false;
+  } else if (BasePtrInst && R.contains(BasePtrInst))
+    return false;
 
   // Skip accesses in non-affine subregions as they might not be executed
   // under the same condition as the entry of the non-affine subregion.

Added: polly/trunk/test/Isl/CodeGen/hoisting_1.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Isl/CodeGen/hoisting_1.ll?rev=260886&view=auto
==============================================================================
--- polly/trunk/test/Isl/CodeGen/hoisting_1.ll (added)
+++ polly/trunk/test/Isl/CodeGen/hoisting_1.ll Mon Feb 15 06:42:05 2016
@@ -0,0 +1,68 @@
+; RUN: opt %loadPolly -tbaa -polly-codegen -polly-allow-differing-element-types -disable-output %s
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+%struct.hoge = type { %struct.widget*, %struct.barney*, %struct.foo*, i32, i32, %struct.wibble*, i32, i32, i32, i32, double, i32, i32, i32, %struct.foo.1*, [4 x %struct.hoge.2*], [4 x %struct.blam*], [4 x %struct.blam*], [16 x i8], [16 x i8], [16 x i8], i32, %struct.barney.3*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8, i16, i16, i32, i32, i32, i32, i32, i32, i32, [4 x %struct.foo.1*], i32, i32, i32, [10 x i32], i32, i32, i32, i32, %struct.foo.4*, %struct.wombat.5*, %struct.blam.6*, %struct.foo.7*, %struct.bar*, %struct.wibble.8*, %struct.barney.9*, %struct.hoge.10*, %struct.bar.11* }
+%struct.widget = type { void (%struct.quux*)*, void (%struct.quux*, i32)*, void (%struct.quux*)*, void (%struct.quux*, i8*)*, void (%struct.quux*)*, i32, %struct.hoge.0, i32, i64, i8**, i32, i8**, i32, i32 }
+%struct.quux = type { %struct.widget*, %struct.barney*, %struct.foo*, i32, i32 }
+%struct.hoge.0 = type { [8 x i32], [48 x i8] }
+%struct.barney = type { i8* (%struct.quux*, i32, i64)*, i8* (%struct.quux*, i32, i64)*, i8** (%struct.quux*, i32, i32, i32)*, [64 x i16]** (%struct.quux*, i32, i32, i32)*, %struct.ham* (%struct.quux*, i32, i32, i32, i32, i32)*, %struct.wombat* (%struct.quux*, i32, i32, i32, i32, i32)*, {}*, i8** (%struct.quux*, %struct.ham*, i32, i32, i32)*, [64 x i16]** (%struct.quux*, %struct.wombat*, i32, i32, i32)*, void (%struct.quux*, i32)*, {}*, i64 }
+%struct.ham = type opaque
+%struct.wombat = type opaque
+%struct.foo = type { {}*, i64, i64, i32, i32 }
+%struct.wibble = type { i8*, i64, void (%struct.hoge*)*, i32 (%struct.hoge*)*, void (%struct.hoge*)* }
+%struct.foo.1 = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.hoge.2*, i8* }
+%struct.hoge.2 = type { [64 x i16], i32 }
+%struct.blam = type { [17 x i8], [256 x i8], i32 }
+%struct.barney.3 = type { i32, [4 x i32], i32, i32, i32, i32 }
+%struct.foo.4 = type { void (%struct.hoge*)*, void (%struct.hoge*)*, void (%struct.hoge*)*, i32, i32 }
+%struct.wombat.5 = type { void (%struct.hoge*, i32)*, void (%struct.hoge*, i8**, i32*, i32)* }
+%struct.blam.6 = type { void (%struct.hoge*, i32)*, void (%struct.hoge*, i8**, i32*, i32, i8***, i32*, i32)* }
+%struct.foo.7 = type { void (%struct.hoge*, i32)*, i32 (%struct.hoge*, i8***)* }
+%struct.bar = type { void (%struct.hoge*, i32, i8*, i32)*, void (%struct.hoge*)*, void (%struct.hoge*)*, void (%struct.hoge*)*, void (%struct.hoge*)*, void (%struct.hoge*)* }
+%struct.wibble.8 = type { void (%struct.hoge*)*, void (%struct.hoge*, i8**, i8***, i32, i32)* }
+%struct.barney.9 = type { void (%struct.hoge*)*, void (%struct.hoge*, i8***, i32, i8***, i32)*, i32 }
+%struct.hoge.10 = type { void (%struct.hoge*)*, void (%struct.hoge*, %struct.foo.1*, i8**, [64 x i16]*, i32, i32, i32)* }
+%struct.bar.11 = type { {}*, i32 (%struct.hoge*, [64 x i16]**)*, void (%struct.hoge*)* }
+
+; Function Attrs: nounwind uwtable
+define void @foo(%struct.hoge* %arg) #0 {
+bb:
+  br label %bb2
+
+bb2:                                              ; preds = %bb
+  %tmp3 = getelementptr inbounds %struct.hoge, %struct.hoge* %arg, i32 0, i32 42
+  %tmp4 = getelementptr inbounds [4 x %struct.foo.1*], [4 x %struct.foo.1*]* %tmp3, i64 0, i64 0
+  %tmp = load %struct.foo.1*, %struct.foo.1** %tmp4, align 8, !tbaa !1
+  %tmp5 = getelementptr inbounds %struct.foo.1, %struct.foo.1* %tmp, i32 0, i32 7
+  %tmp6 = load i32, i32* %tmp5, align 4, !tbaa !5
+  %tmp7 = getelementptr inbounds %struct.hoge, %struct.hoge* %arg, i32 0, i32 43
+  store i32 %tmp6, i32* %tmp7, align 8, !tbaa !8
+  br i1 false, label %bb8, label %bb9
+
+bb8:                                              ; preds = %bb2
+  br label %bb9
+
+bb9:                                              ; preds = %bb8, %bb2
+  br label %bb10
+
+bb10:                                             ; preds = %bb9
+  ret void
+}
+
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 3.9.0 (trunk 259751) (llvm/trunk 259869)"}
+!1 = !{!2, !2, i64 0}
+!2 = !{!"any pointer", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
+!5 = !{!6, !7, i64 28}
+!6 = !{!"", !7, i64 0, !7, i64 4, !7, i64 8, !7, i64 12, !7, i64 16, !7, i64 20, !7, i64 24, !7, i64 28, !7, i64 32, !7, i64 36, !7, i64 40, !7, i64 44, !7, i64 48, !7, i64 52, !7, i64 56, !7, i64 60, !7, i64 64, !7, i64 68, !7, i64 72, !2, i64 80, !2, i64 88}
+!7 = !{!"int", !3, i64 0}
+!8 = !{!9, !7, i64 352}
+!9 = !{!"jpeg_compress_struct", !2, i64 0, !2, i64 8, !2, i64 16, !7, i64 24, !7, i64 28, !2, i64 32, !7, i64 40, !7, i64 44, !7, i64 48, !3, i64 52, !10, i64 56, !7, i64 64, !7, i64 68, !3, i64 72, !2, i64 80, !3, i64 88, !3, i64 120, !3, i64 152, !3, i64 184, !3, i64 200, !3, i64 216, !7, i64 232, !2, i64 240, !7, i64 248, !7, i64 252, !7, i64 256, !7, i64 260, !7, i64 264, !3, i64 268, !7, i64 272, !7, i64 276, !7, i64 280, !3, i64 284, !11, i64 286, !11, i64 288, !7, i64 292, !7, i64 296, !7, i64 300, !7, i64 304, !7, i64 308, !7, i64 312, !7, i64 316, !3, i64 320, !7, i64 352, !7, i64 356, !7, i64 360, !3, i64 364, !7, i64 404, !7, i64 408, !7, i64 412, !7, i64 416, !2, i64 424, !2, i64 432, !2, i64 440, !2, i64 448, !2, i64 456, !2, i64 464, !2, i64 472, !2, i64 480, !2, i64 488}
+!10 = !{!"double", !3, i64 0}
+!11 = !{!"short", !3, i64 0}

Added: polly/trunk/test/Isl/CodeGen/hoisting_2.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Isl/CodeGen/hoisting_2.ll?rev=260886&view=auto
==============================================================================
--- polly/trunk/test/Isl/CodeGen/hoisting_2.ll (added)
+++ polly/trunk/test/Isl/CodeGen/hoisting_2.ll Mon Feb 15 06:42:05 2016
@@ -0,0 +1,87 @@
+; RUN: opt %loadPolly -tbaa -polly-codegen -polly-allow-differing-element-types -disable-output %s
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+%struct.hoge = type { %struct.widget*, %struct.barney*, %struct.foo*, i32, i32, %struct.wibble*, i32, i32, i32, i32, double, i32, i32, i32, %struct.foo.1*, [4 x %struct.hoge.2*], [4 x %struct.blam*], [4 x %struct.blam*], [16 x i8], [16 x i8], [16 x i8], i32, %struct.barney.3*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8, i16, i16, i32, i32, i32, i32, i32, i32, i32, [4 x %struct.foo.1*], i32, i32, i32, [10 x i32], i32, i32, i32, i32, %struct.foo.4*, %struct.wombat.5*, %struct.blam.6*, %struct.foo.7*, %struct.bar*, %struct.wibble.8*, %struct.barney.9*, %struct.hoge.10*, %struct.bar.11* }
+%struct.widget = type { void (%struct.quux*)*, void (%struct.quux*, i32)*, void (%struct.quux*)*, void (%struct.quux*, i8*)*, void (%struct.quux*)*, i32, %struct.hoge.0, i32, i64, i8**, i32, i8**, i32, i32 }
+%struct.quux = type { %struct.widget*, %struct.barney*, %struct.foo*, i32, i32 }
+%struct.hoge.0 = type { [8 x i32], [48 x i8] }
+%struct.barney = type { i8* (%struct.quux*, i32, i64)*, i8* (%struct.quux*, i32, i64)*, i8** (%struct.quux*, i32, i32, i32)*, [64 x i16]** (%struct.quux*, i32, i32, i32)*, %struct.ham* (%struct.quux*, i32, i32, i32, i32, i32)*, %struct.wombat* (%struct.quux*, i32, i32, i32, i32, i32)*, {}*, i8** (%struct.quux*, %struct.ham*, i32, i32, i32)*, [64 x i16]** (%struct.quux*, %struct.wombat*, i32, i32, i32)*, void (%struct.quux*, i32)*, {}*, i64 }
+%struct.ham = type opaque
+%struct.wombat = type opaque
+%struct.foo = type { {}*, i64, i64, i32, i32 }
+%struct.wibble = type { i8*, i64, void (%struct.hoge*)*, i32 (%struct.hoge*)*, void (%struct.hoge*)* }
+%struct.foo.1 = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.hoge.2*, i8* }
+%struct.hoge.2 = type { [64 x i16], i32 }
+%struct.blam = type { [17 x i8], [256 x i8], i32 }
+%struct.barney.3 = type { i32, [4 x i32], i32, i32, i32, i32 }
+%struct.foo.4 = type { void (%struct.hoge*)*, void (%struct.hoge*)*, void (%struct.hoge*)*, i32, i32 }
+%struct.wombat.5 = type { void (%struct.hoge*, i32)*, void (%struct.hoge*, i8**, i32*, i32)* }
+%struct.blam.6 = type { void (%struct.hoge*, i32)*, void (%struct.hoge*, i8**, i32*, i32, i8***, i32*, i32)* }
+%struct.foo.7 = type { void (%struct.hoge*, i32)*, i32 (%struct.hoge*, i8***)* }
+%struct.bar = type { void (%struct.hoge*, i32, i8*, i32)*, void (%struct.hoge*)*, void (%struct.hoge*)*, void (%struct.hoge*)*, void (%struct.hoge*)*, void (%struct.hoge*)* }
+%struct.wibble.8 = type { void (%struct.hoge*)*, void (%struct.hoge*, i8**, i8***, i32, i32)* }
+%struct.barney.9 = type { void (%struct.hoge*)*, void (%struct.hoge*, i8***, i32, i8***, i32)*, i32 }
+%struct.hoge.10 = type { void (%struct.hoge*)*, void (%struct.hoge*, %struct.foo.1*, i8**, [64 x i16]*, i32, i32, i32)* }
+%struct.bar.11 = type { {}*, i32 (%struct.hoge*, [64 x i16]**)*, void (%struct.hoge*)* }
+%struct.foo.12 = type { %struct.foo.4, i32, i32, i32, i32 }
+
+; Function Attrs: nounwind uwtable
+define void @eggs(%struct.hoge* %arg) #0 {
+bb:
+  %tmp = load %struct.barney.3*, %struct.barney.3** undef, align 8, !tbaa !1
+  br label %bb5
+
+bb5:                                              ; preds = %bb
+  %tmp6 = getelementptr inbounds %struct.hoge, %struct.hoge* %arg, i32 0, i32 51
+  %tmp7 = load %struct.foo.4*, %struct.foo.4** %tmp6, align 8, !tbaa !9
+  %tmp8 = bitcast %struct.foo.4* %tmp7 to %struct.foo.12*
+  %tmp9 = getelementptr inbounds %struct.foo.12, %struct.foo.12* %tmp8, i32 0, i32 4
+  %tmp10 = load i32, i32* %tmp9, align 4, !tbaa !10
+  %tmp11 = getelementptr inbounds %struct.barney.3, %struct.barney.3* %tmp, i64 0
+  %tmp12 = getelementptr inbounds %struct.barney.3, %struct.barney.3* %tmp11, i32 0, i32 0
+  %tmp151 = load i32, i32* %tmp12, align 4, !tbaa !13
+  %tmp162 = icmp slt i32 0, %tmp151
+  br i1 %tmp162, label %bb17.lr.ph, label %bb22
+
+bb17.lr.ph:                                       ; preds = %bb5
+  br label %bb17
+
+bb17:                                             ; preds = %bb17.lr.ph, %bb17
+  %tmp143 = phi i32 [ 0, %bb17.lr.ph ], [ %tmp21, %bb17 ]
+  %tmp18 = sext i32 %tmp143 to i64
+  %tmp19 = getelementptr inbounds %struct.hoge, %struct.hoge* %arg, i32 0, i32 42
+  %tmp20 = getelementptr inbounds [4 x %struct.foo.1*], [4 x %struct.foo.1*]* %tmp19, i64 0, i64 %tmp18
+  store %struct.foo.1* undef, %struct.foo.1** %tmp20, align 8, !tbaa !15
+  %tmp21 = add nsw i32 %tmp143, 1
+  %tmp15 = load i32, i32* %tmp12, align 4, !tbaa !13
+  %tmp16 = icmp slt i32 %tmp21, %tmp15
+  br i1 %tmp16, label %bb17, label %bb13.bb22_crit_edge
+
+bb13.bb22_crit_edge:                              ; preds = %bb17
+  br label %bb22
+
+bb22:                                             ; preds = %bb13.bb22_crit_edge, %bb5
+  ret void
+}
+
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 3.9.0 (trunk 259751) (llvm/trunk 259869)"}
+!1 = !{!2, !3, i64 240}
+!2 = !{!"jpeg_compress_struct", !3, i64 0, !3, i64 8, !3, i64 16, !6, i64 24, !6, i64 28, !3, i64 32, !6, i64 40, !6, i64 44, !6, i64 48, !4, i64 52, !7, i64 56, !6, i64 64, !6, i64 68, !4, i64 72, !3, i64 80, !4, i64 88, !4, i64 120, !4, i64 152, !4, i64 184, !4, i64 200, !4, i64 216, !6, i64 232, !3, i64 240, !6, i64 248, !6, i64 252, !6, i64 256, !6, i64 260, !6, i64 264, !4, i64 268, !6, i64 272, !6, i64 276, !6, i64 280, !4, i64 284, !8, i64 286, !8, i64 288, !6, i64 292, !6, i64 296, !6, i64 300, !6, i64 304, !6, i64 308, !6, i64 312, !6, i64 316, !4, i64 320, !6, i64 352, !6, i64 356, !6, i64 360, !4, i64 364, !6, i64 404, !6, i64 408, !6, i64 412, !6, i64 416, !3, i64 424, !3, i64 432, !3, i64 440, !3, i64 448, !3, i64 456, !3, i64 464, !3, i64 472, !3, i64 480, !3, i64 488}
+!3 = !{!"any pointer", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}
+!6 = !{!"int", !4, i64 0}
+!7 = !{!"double", !4, i64 0}
+!8 = !{!"short", !4, i64 0}
+!9 = !{!2, !3, i64 424}
+!10 = !{!11, !6, i64 44}
+!11 = !{!"", !12, i64 0, !4, i64 32, !6, i64 36, !6, i64 40, !6, i64 44}
+!12 = !{!"jpeg_comp_master", !3, i64 0, !3, i64 8, !3, i64 16, !6, i64 24, !6, i64 28}
+!13 = !{!14, !6, i64 0}
+!14 = !{!"", !6, i64 0, !4, i64 4, !6, i64 20, !6, i64 24, !6, i64 28, !6, i64 32}
+!15 = !{!3, !3, i64 0}




More information about the llvm-commits mailing list