[llvm-branch-commits] [llvm-branch] r287779 - Merging r279930:

Tom Stellard via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Nov 23 09:26:29 PST 2016


Author: tstellar
Date: Wed Nov 23 11:26:28 2016
New Revision: 287779

URL: http://llvm.org/viewvc/llvm-project?rev=287779&view=rev
Log:
Merging r279930:

------------------------------------------------------------------------
r279930 | elena.demikhovsky | 2016-08-28 01:53:53 -0700 (Sun, 28 Aug 2016) | 7 lines

[Loop Vectorizer] Fixed memory confilict checks.

Fixed a bug in run-time checks for possible memory conflicts inside loop.
The bug is in Low <-> High boundaries calculation. The High boundary
should be calculated as "last memory access pointer + element size".

Differential revision: https://reviews.llvm.org/D23176

------------------------------------------------------------------------

Added:
    llvm/branches/release_39/test/Analysis/LoopAccessAnalysis/memcheck-off-by-one-error.ll
Modified:
    llvm/branches/release_39/include/llvm/Analysis/LoopAccessAnalysis.h
    llvm/branches/release_39/lib/Analysis/LoopAccessAnalysis.cpp
    llvm/branches/release_39/test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll
    llvm/branches/release_39/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll
    llvm/branches/release_39/test/Transforms/LoopVectorize/runtime-check-readonly.ll
    llvm/branches/release_39/test/Transforms/LoopVectorize/tbaa-nodep.ll
    llvm/branches/release_39/test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll

Modified: llvm/branches/release_39/include/llvm/Analysis/LoopAccessAnalysis.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_39/include/llvm/Analysis/LoopAccessAnalysis.h?rev=287779&r1=287778&r2=287779&view=diff
==============================================================================
--- llvm/branches/release_39/include/llvm/Analysis/LoopAccessAnalysis.h (original)
+++ llvm/branches/release_39/include/llvm/Analysis/LoopAccessAnalysis.h Wed Nov 23 11:26:28 2016
@@ -334,9 +334,11 @@ public:
   struct PointerInfo {
     /// Holds the pointer value that we need to check.
     TrackingVH<Value> PointerValue;
-    /// Holds the pointer value at the beginning of the loop.
+    /// Holds the smallest byte address accessed by the pointer throughout all
+    /// iterations of the loop.
     const SCEV *Start;
-    /// Holds the pointer value at the end of the loop.
+    /// Holds the largest byte address accessed by the pointer throughout all
+    /// iterations of the loop, plus 1.
     const SCEV *End;
     /// Holds the information if this pointer is used for writing to memory.
     bool IsWritePtr;

Modified: llvm/branches/release_39/lib/Analysis/LoopAccessAnalysis.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_39/lib/Analysis/LoopAccessAnalysis.cpp?rev=287779&r1=287778&r2=287779&view=diff
==============================================================================
--- llvm/branches/release_39/lib/Analysis/LoopAccessAnalysis.cpp (original)
+++ llvm/branches/release_39/lib/Analysis/LoopAccessAnalysis.cpp Wed Nov 23 11:26:28 2016
@@ -148,6 +148,19 @@ const SCEV *llvm::replaceSymbolicStrideS
   return OrigSCEV;
 }
 
+/// Calculate Start and End points of memory access.
+/// Let's assume A is the first access and B is a memory access on N-th loop
+/// iteration. Then B is calculated as:  
+///   B = A + Step*N . 
+/// Step value may be positive or negative.
+/// N is a calculated back-edge taken count:
+///     N = (TripCount > 0) ? RoundDown(TripCount -1 , VF) : 0
+/// Start and End points are calculated in the following way:
+/// Start = UMIN(A, B) ; End = UMAX(A, B) + SizeOfElt,
+/// where SizeOfElt is the size of single memory access in bytes.
+///
+/// There is no conflict when the intervals are disjoint:
+/// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)
 void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
                                     unsigned DepSetId, unsigned ASId,
                                     const ValueToValueMap &Strides,
@@ -176,12 +189,17 @@ void RuntimePointerChecking::insert(Loop
       if (CStep->getValue()->isNegative())
         std::swap(ScStart, ScEnd);
     } else {
-      // Fallback case: the step is not constant, but the we can still
+      // Fallback case: the step is not constant, but we can still
       // get the upper and lower bounds of the interval by using min/max
       // expressions.
       ScStart = SE->getUMinExpr(ScStart, ScEnd);
       ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd);
     }
+    // Add the size of the pointed element to ScEnd.
+    unsigned EltSize =
+      Ptr->getType()->getPointerElementType()->getScalarSizeInBits() / 8;
+    const SCEV *EltSizeSCEV = SE->getConstant(ScEnd->getType(), EltSize);
+    ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
   }
 
   Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc);
@@ -1863,9 +1881,17 @@ std::pair<Instruction *, Instruction *>
     Value *End0 =   ChkBuilder.CreateBitCast(A.End,   PtrArithTy1, "bc");
     Value *End1 =   ChkBuilder.CreateBitCast(B.End,   PtrArithTy0, "bc");
 
-    Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0");
+    // [A|B].Start points to the first accessed byte under base [A|B].
+    // [A|B].End points to the last accessed byte, plus one.
+    // There is no conflict when the intervals are disjoint:
+    // NoConflict = (B.Start >= A.End) || (A.Start >= B.End)
+    //
+    // bound0 = (B.Start < A.End)
+    // bound1 = (A.Start < B.End)
+    //  IsConflict = bound0 & bound1
+    Value *Cmp0 = ChkBuilder.CreateICmpULT(Start0, End1, "bound0");
     FirstInst = getFirstInst(FirstInst, Cmp0, Loc);
-    Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1");
+    Value *Cmp1 = ChkBuilder.CreateICmpULT(Start1, End0, "bound1");
     FirstInst = getFirstInst(FirstInst, Cmp1, Loc);
     Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
     FirstInst = getFirstInst(FirstInst, IsConflict, Loc);

Added: llvm/branches/release_39/test/Analysis/LoopAccessAnalysis/memcheck-off-by-one-error.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_39/test/Analysis/LoopAccessAnalysis/memcheck-off-by-one-error.ll?rev=287779&view=auto
==============================================================================
--- llvm/branches/release_39/test/Analysis/LoopAccessAnalysis/memcheck-off-by-one-error.ll (added)
+++ llvm/branches/release_39/test/Analysis/LoopAccessAnalysis/memcheck-off-by-one-error.ll Wed Nov 23 11:26:28 2016
@@ -0,0 +1,51 @@
+; RUN: opt -analyze --loop-accesses %s | FileCheck %s
+
+; This test verifies run-time boundary check of memory accesses.
+; The original loop:
+;   void fastCopy(const char* src, char* op) {
+;     int len = 32;
+;     while (len > 0) {
+;       *(reinterpret_cast<long long*>(op)) = *(reinterpret_cast<const long long*>(src));
+;       src += 8;
+;       op += 8;
+;       len -= 8;
+;     }
+;   }
+; Boundaries calculations before this patch:
+; (Low: %src High: (24 + %src))
+; and the actual distance between two pointers was 31,  (%op - %src = 31)
+; IsConflict = (24 > 31) = false -> execution is directed to the vectorized loop.
+; The loop was vectorized to 4, 32 byte memory access ( <4 x i64> ),
+; store a value at *%op touched memory under *%src.
+
+;CHECK: Printing analysis 'Loop Access Analysis' for function 'fastCopy'
+;CHECK: (Low: %op High: (32 + %op))
+;CHECK: (Low: %src High: (32 + %src))
+
+define void @fastCopy(i8* nocapture readonly %src, i8* nocapture %op) {
+entry:
+  br label %while.body.preheader
+
+while.body.preheader:                             ; preds = %entry
+  br label %while.body
+
+while.body:                                       ; preds = %while.body.preheader, %while.body
+  %len.addr.07 = phi i32 [ %sub, %while.body ], [ 32, %while.body.preheader ]
+  %op.addr.06 = phi i8* [ %add.ptr1, %while.body ], [ %op, %while.body.preheader ]
+  %src.addr.05 = phi i8* [ %add.ptr, %while.body ], [ %src, %while.body.preheader ]
+  %0 = bitcast i8* %src.addr.05 to i64*
+  %1 = load i64, i64* %0, align 8
+  %2 = bitcast i8* %op.addr.06 to i64*
+  store i64 %1, i64* %2, align 8
+  %add.ptr = getelementptr inbounds i8, i8* %src.addr.05, i64 8
+  %add.ptr1 = getelementptr inbounds i8, i8* %op.addr.06, i64 8
+  %sub = add nsw i32 %len.addr.07, -8
+  %cmp = icmp sgt i32 %len.addr.07, 8
+  br i1 %cmp, label %while.body, label %while.end.loopexit
+
+while.end.loopexit:                               ; preds = %while.body
+  br label %while.end
+
+while.end:                                        ; preds = %while.end.loopexit, %entry
+  ret void
+}

Modified: llvm/branches/release_39/test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_39/test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll?rev=287779&r1=287778&r2=287779&view=diff
==============================================================================
--- llvm/branches/release_39/test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll (original)
+++ llvm/branches/release_39/test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll Wed Nov 23 11:26:28 2016
@@ -96,15 +96,15 @@ for.end:
 ; CHECK-NEXT:       %arrayidxB = getelementptr inbounds i16, i16* %b, i64 %ind
 ; CHECK-NEXT:   Grouped accesses:
 ; CHECK-NEXT:    Group {{.*}}[[ZERO]]:
-; CHECK-NEXT:       (Low: %c High: (78 + %c))
+; CHECK-NEXT:       (Low: %c High: (80 + %c))
 ; CHECK-NEXT:         Member: {(2 + %c)<nsw>,+,4}
 ; CHECK-NEXT:         Member: {%c,+,4}
 ; CHECK-NEXT:     Group {{.*}}[[ONE]]:
-; CHECK-NEXT:       (Low: %a High: (40 + %a))
+; CHECK-NEXT:       (Low: %a High: (42 + %a))
 ; CHECK-NEXT:         Member: {(2 + %a)<nsw>,+,2}
 ; CHECK-NEXT:         Member: {%a,+,2}
 ; CHECK-NEXT:     Group {{.*}}[[TWO]]:
-; CHECK-NEXT:       (Low: %b High: (38 + %b))
+; CHECK-NEXT:       (Low: %b High: (40 + %b))
 ; CHECK-NEXT:         Member: {%b,+,2}
 
 define void @testg(i16* %a,
@@ -168,15 +168,15 @@ for.end:
 ; CHECK-NEXT:         %arrayidxB = getelementptr i16, i16* %b, i64 %ind
 ; CHECK-NEXT:   Grouped accesses:
 ; CHECK-NEXT:     Group {{.*}}[[ZERO]]:
-; CHECK-NEXT:       (Low: %c High: (78 + %c))
+; CHECK-NEXT:       (Low: %c High: (80 + %c))
 ; CHECK-NEXT:         Member: {(2 + %c)<nsw>,+,4}
 ; CHECK-NEXT:         Member: {%c,+,4}
 ; CHECK-NEXT:     Group {{.*}}[[ONE]]:
-; CHECK-NEXT:       (Low: %a High: (40 + %a))
+; CHECK-NEXT:       (Low: %a High: (42 + %a))
 ; CHECK-NEXT:         Member: {(2 + %a),+,2}
 ; CHECK-NEXT:         Member: {%a,+,2}
 ; CHECK-NEXT:     Group {{.*}}[[TWO]]:
-; CHECK-NEXT:       (Low: %b High: (38 + %b))
+; CHECK-NEXT:       (Low: %b High: (40 + %b))
 ; CHECK-NEXT:         Member: {%b,+,2}
 
 define void @testh(i16* %a,
@@ -247,13 +247,13 @@ for.end:
 ; CHECK-NEXT:       %arrayidxA2 = getelementptr i16, i16* %a, i64 %ind2
 ; CHECK-NEXT:   Grouped accesses:
 ; CHECK-NEXT:     Group {{.*}}[[ZERO]]:
-; CHECK-NEXT:       (Low: ((2 * %offset) + %a)<nsw> High: (9998 + (2 * %offset) + %a))
+; CHECK-NEXT:       (Low: ((2 * %offset) + %a)<nsw> High: (10000 + (2 * %offset) + %a))
 ; CHECK-NEXT:         Member: {((2 * %offset) + %a)<nsw>,+,2}<nsw><%for.body>
 ; CHECK-NEXT:     Group {{.*}}[[ONE]]:
-; CHECK-NEXT:       (Low: %a High: (9998 + %a))
+; CHECK-NEXT:       (Low: %a High: (10000 + %a))
 ; CHECK-NEXT:         Member: {%a,+,2}<%for.body>
 ; CHECK-NEXT:     Group {{.*}}[[TWO]]:
-; CHECK-NEXT:       (Low: (20000 + %a) High: (29998 + %a))
+; CHECK-NEXT:       (Low: (20000 + %a) High: (30000 + %a))
 ; CHECK-NEXT:         Member: {(20000 + %a),+,2}<%for.body>
 
 define void @testi(i16* %a,

Modified: llvm/branches/release_39/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_39/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll?rev=287779&r1=287778&r2=287779&view=diff
==============================================================================
--- llvm/branches/release_39/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll (original)
+++ llvm/branches/release_39/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll Wed Nov 23 11:26:28 2016
@@ -16,7 +16,7 @@ target datalayout = "e-m:e-i64:64-i128:1
 target triple = "aarch64--linux-gnueabi"
 
 ; CHECK: function 'f':
-; CHECK: (Low: (20000 + %a) High: (60000 + %a)<nsw>)
+; CHECK: (Low: (20000 + %a) High: (60004 + %a))
 
 @B = common global i32* null, align 8
 @A = common global i32* null, align 8
@@ -59,7 +59,7 @@ for.end:
 ; Here it is not obvious what the limits are, since 'step' could be negative.
 
 ; CHECK: Low: (-1 + (-1 * ((-60001 + (-1 * %a)) umax (-60001 + (40000 * %step) + (-1 * %a)))))
-; CHECK: High: ((60000 + %a)<nsw> umax (60000 + (-40000 * %step) + %a))
+; CHECK: High: (4 + ((60000 + %a)<nsw> umax (60000 + (-40000 * %step) + %a)))
 
 define void @g(i64 %step) {
 entry:

Modified: llvm/branches/release_39/test/Transforms/LoopVectorize/runtime-check-readonly.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_39/test/Transforms/LoopVectorize/runtime-check-readonly.ll?rev=287779&r1=287778&r2=287779&view=diff
==============================================================================
--- llvm/branches/release_39/test/Transforms/LoopVectorize/runtime-check-readonly.ll (original)
+++ llvm/branches/release_39/test/Transforms/LoopVectorize/runtime-check-readonly.ll Wed Nov 23 11:26:28 2016
@@ -8,10 +8,10 @@ target triple = "x86_64-apple-macosx10.8
 ;CHECK: br
 ;CHECK: getelementptr
 ;CHECK-DAG: getelementptr
-;CHECK-DAG: icmp uge
-;CHECK-DAG: icmp uge
-;CHECK-DAG: icmp uge
-;CHECK-DAG: icmp uge
+;CHECK-DAG: icmp ugt
+;CHECK-DAG: icmp ugt
+;CHECK-DAG: icmp ugt
+;CHECK-DAG: icmp ugt
 ;CHECK-DAG: and
 ;CHECK-DAG: and
 ;CHECK: br

Modified: llvm/branches/release_39/test/Transforms/LoopVectorize/tbaa-nodep.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_39/test/Transforms/LoopVectorize/tbaa-nodep.ll?rev=287779&r1=287778&r2=287779&view=diff
==============================================================================
--- llvm/branches/release_39/test/Transforms/LoopVectorize/tbaa-nodep.ll (original)
+++ llvm/branches/release_39/test/Transforms/LoopVectorize/tbaa-nodep.ll Wed Nov 23 11:26:28 2016
@@ -36,7 +36,7 @@ for.end:
 ; CHECK: ret i32 0
 
 ; CHECK-NOTBAA-LABEL: @test1
-; CHECK-NOTBAA: icmp uge i32*
+; CHECK-NOTBAA: icmp ugt i32*
 
 ; CHECK-NOTBAA: load <4 x float>, <4 x float>* %{{.*}}, align 4, !tbaa
 ; CHECK-NOTBAA: store <4 x i32> %{{.*}}, <4 x i32>* %{{.*}}, align 4, !tbaa
@@ -70,8 +70,8 @@ for.end:
 ; required. Without TBAA, however, two checks are required.
 
 ; CHECK-LABEL: @test2
-; CHECK: icmp uge float*
-; CHECK: icmp uge float*
+; CHECK: icmp ugt float*
+; CHECK: icmp ugt float*
 ; CHECK-NOT: icmp uge i32*
 
 ; CHECK: load <4 x float>, <4 x float>* %{{.*}}, align 4, !tbaa
@@ -80,10 +80,10 @@ for.end:
 ; CHECK: ret i32 0
 
 ; CHECK-NOTBAA-LABEL: @test2
-; CHECK-NOTBAA: icmp uge float*
-; CHECK-NOTBAA: icmp uge float*
-; CHECK-NOTBAA-DAG: icmp uge float*
-; CHECK-NOTBAA-DAG: icmp uge i32*
+; CHECK-NOTBAA: icmp ugt float*
+; CHECK-NOTBAA: icmp ugt float*
+; CHECK-NOTBAA-DAG: icmp ugt float*
+; CHECK-NOTBAA-DAG: icmp ugt i32*
 
 ; CHECK-NOTBAA: load <4 x float>, <4 x float>* %{{.*}}, align 4, !tbaa
 ; CHECK-NOTBAA: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 4, !tbaa

Modified: llvm/branches/release_39/test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_39/test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll?rev=287779&r1=287778&r2=287779&view=diff
==============================================================================
--- llvm/branches/release_39/test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll (original)
+++ llvm/branches/release_39/test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll Wed Nov 23 11:26:28 2016
@@ -8,15 +8,15 @@
 ; CHECK-NEXT:   Loop Versioning found to be beneficial
 ;
 ; CHECK: for.body3:
-; CHECK-NEXT: %add86 = phi i32 [ %arrayidx7.promoted, %for.body3.ph ], [ %add8, %for.body3 ]
+; CHECK-NEXT: %[[induction:.*]] = phi i32 [ %arrayidx7.promoted, %for.body3.ph ], [ %add8, %for.body3 ]
 ; CHECK-NEXT: %j.113 = phi i32 [ %j.016, %for.body3.ph ], [ %inc, %for.body3 ]
 ; CHECK-NEXT: %idxprom = zext i32 %j.113 to i64
 ; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, i32* %var1, i64 %idxprom
 ; CHECK-NEXT: store i32 %add, i32* %arrayidx, align 4, !alias.scope !6, !noalias !6
-; CHECK-NEXT: %add8 = add nsw i32 %add86, %add
+; CHECK-NEXT: %add8 = add nsw i32 %[[induction]], %add
 ; CHECK-NEXT: %inc = add nuw i32 %j.113, 1
 ; CHECK-NEXT: %cmp2 = icmp ult i32 %inc, %itr
-; CHECK-NEXT: br i1 %cmp2, label %for.body3, label %for.inc11.loopexit.loopexit5, !llvm.loop !7
+; CHECK-NEXT: br i1 %cmp2, label %for.body3, label %for.inc11.loopexit.loopexit6, !llvm.loop !7
 define i32 @foo(i32* nocapture %var1, i32* nocapture readnone %var2, i32* nocapture %var3, i32 %itr) #0 {
 entry:
   %cmp14 = icmp eq i32 %itr, 0




More information about the llvm-branch-commits mailing list