[llvm-branch-commits] [llvm-branch] r287779 - Merging r279930:
Tom Stellard via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Nov 23 09:26:29 PST 2016
Author: tstellar
Date: Wed Nov 23 11:26:28 2016
New Revision: 287779
URL: http://llvm.org/viewvc/llvm-project?rev=287779&view=rev
Log:
Merging r279930:
------------------------------------------------------------------------
r279930 | elena.demikhovsky | 2016-08-28 01:53:53 -0700 (Sun, 28 Aug 2016) | 7 lines
[Loop Vectorizer] Fixed memory confilict checks.
Fixed a bug in run-time checks for possible memory conflicts inside loop.
The bug is in Low <-> High boundaries calculation. The High boundary
should be calculated as "last memory access pointer + element size".
Differential revision: https://reviews.llvm.org/D23176
------------------------------------------------------------------------
Added:
llvm/branches/release_39/test/Analysis/LoopAccessAnalysis/memcheck-off-by-one-error.ll
Modified:
llvm/branches/release_39/include/llvm/Analysis/LoopAccessAnalysis.h
llvm/branches/release_39/lib/Analysis/LoopAccessAnalysis.cpp
llvm/branches/release_39/test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll
llvm/branches/release_39/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll
llvm/branches/release_39/test/Transforms/LoopVectorize/runtime-check-readonly.ll
llvm/branches/release_39/test/Transforms/LoopVectorize/tbaa-nodep.ll
llvm/branches/release_39/test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll
Modified: llvm/branches/release_39/include/llvm/Analysis/LoopAccessAnalysis.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_39/include/llvm/Analysis/LoopAccessAnalysis.h?rev=287779&r1=287778&r2=287779&view=diff
==============================================================================
--- llvm/branches/release_39/include/llvm/Analysis/LoopAccessAnalysis.h (original)
+++ llvm/branches/release_39/include/llvm/Analysis/LoopAccessAnalysis.h Wed Nov 23 11:26:28 2016
@@ -334,9 +334,11 @@ public:
struct PointerInfo {
/// Holds the pointer value that we need to check.
TrackingVH<Value> PointerValue;
- /// Holds the pointer value at the beginning of the loop.
+ /// Holds the smallest byte address accessed by the pointer throughout all
+ /// iterations of the loop.
const SCEV *Start;
- /// Holds the pointer value at the end of the loop.
+ /// Holds the largest byte address accessed by the pointer throughout all
+ /// iterations of the loop, plus 1.
const SCEV *End;
/// Holds the information if this pointer is used for writing to memory.
bool IsWritePtr;
Modified: llvm/branches/release_39/lib/Analysis/LoopAccessAnalysis.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_39/lib/Analysis/LoopAccessAnalysis.cpp?rev=287779&r1=287778&r2=287779&view=diff
==============================================================================
--- llvm/branches/release_39/lib/Analysis/LoopAccessAnalysis.cpp (original)
+++ llvm/branches/release_39/lib/Analysis/LoopAccessAnalysis.cpp Wed Nov 23 11:26:28 2016
@@ -148,6 +148,19 @@ const SCEV *llvm::replaceSymbolicStrideS
return OrigSCEV;
}
+/// Calculate Start and End points of memory access.
+/// Let's assume A is the first access and B is a memory access on N-th loop
+/// iteration. Then B is calculated as:
+/// B = A + Step*N .
+/// Step value may be positive or negative.
+/// N is a calculated back-edge taken count:
+/// N = (TripCount > 0) ? RoundDown(TripCount -1 , VF) : 0
+/// Start and End points are calculated in the following way:
+/// Start = UMIN(A, B) ; End = UMAX(A, B) + SizeOfElt,
+/// where SizeOfElt is the size of single memory access in bytes.
+///
+/// There is no conflict when the intervals are disjoint:
+/// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)
void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
unsigned DepSetId, unsigned ASId,
const ValueToValueMap &Strides,
@@ -176,12 +189,17 @@ void RuntimePointerChecking::insert(Loop
if (CStep->getValue()->isNegative())
std::swap(ScStart, ScEnd);
} else {
- // Fallback case: the step is not constant, but the we can still
+ // Fallback case: the step is not constant, but we can still
// get the upper and lower bounds of the interval by using min/max
// expressions.
ScStart = SE->getUMinExpr(ScStart, ScEnd);
ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd);
}
+ // Add the size of the pointed element to ScEnd.
+ unsigned EltSize =
+ Ptr->getType()->getPointerElementType()->getScalarSizeInBits() / 8;
+ const SCEV *EltSizeSCEV = SE->getConstant(ScEnd->getType(), EltSize);
+ ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
}
Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc);
@@ -1863,9 +1881,17 @@ std::pair<Instruction *, Instruction *>
Value *End0 = ChkBuilder.CreateBitCast(A.End, PtrArithTy1, "bc");
Value *End1 = ChkBuilder.CreateBitCast(B.End, PtrArithTy0, "bc");
- Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0");
+ // [A|B].Start points to the first accessed byte under base [A|B].
+ // [A|B].End points to the last accessed byte, plus one.
+ // There is no conflict when the intervals are disjoint:
+ // NoConflict = (B.Start >= A.End) || (A.Start >= B.End)
+ //
+ // bound0 = (B.Start < A.End)
+ // bound1 = (A.Start < B.End)
+ // IsConflict = bound0 & bound1
+ Value *Cmp0 = ChkBuilder.CreateICmpULT(Start0, End1, "bound0");
FirstInst = getFirstInst(FirstInst, Cmp0, Loc);
- Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1");
+ Value *Cmp1 = ChkBuilder.CreateICmpULT(Start1, End0, "bound1");
FirstInst = getFirstInst(FirstInst, Cmp1, Loc);
Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
Added: llvm/branches/release_39/test/Analysis/LoopAccessAnalysis/memcheck-off-by-one-error.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_39/test/Analysis/LoopAccessAnalysis/memcheck-off-by-one-error.ll?rev=287779&view=auto
==============================================================================
--- llvm/branches/release_39/test/Analysis/LoopAccessAnalysis/memcheck-off-by-one-error.ll (added)
+++ llvm/branches/release_39/test/Analysis/LoopAccessAnalysis/memcheck-off-by-one-error.ll Wed Nov 23 11:26:28 2016
@@ -0,0 +1,51 @@
+; RUN: opt -analyze --loop-accesses %s | FileCheck %s
+
+; This test verifies run-time boundary check of memory accesses.
+; The original loop:
+; void fastCopy(const char* src, char* op) {
+; int len = 32;
+; while (len > 0) {
+; *(reinterpret_cast<long long*>(op)) = *(reinterpret_cast<const long long*>(src));
+; src += 8;
+; op += 8;
+; len -= 8;
+; }
+; }
+; Boundaries calculations before this patch:
+; (Low: %src High: (24 + %src))
+; and the actual distance between two pointers was 31, (%op - %src = 31)
+; IsConflict = (24 > 31) = false -> execution is directed to the vectorized loop.
+; The loop was vectorized to 4, 32 byte memory access ( <4 x i64> ),
+; store a value at *%op touched memory under *%src.
+
+;CHECK: Printing analysis 'Loop Access Analysis' for function 'fastCopy'
+;CHECK: (Low: %op High: (32 + %op))
+;CHECK: (Low: %src High: (32 + %src))
+
+define void @fastCopy(i8* nocapture readonly %src, i8* nocapture %op) {
+entry:
+ br label %while.body.preheader
+
+while.body.preheader: ; preds = %entry
+ br label %while.body
+
+while.body: ; preds = %while.body.preheader, %while.body
+ %len.addr.07 = phi i32 [ %sub, %while.body ], [ 32, %while.body.preheader ]
+ %op.addr.06 = phi i8* [ %add.ptr1, %while.body ], [ %op, %while.body.preheader ]
+ %src.addr.05 = phi i8* [ %add.ptr, %while.body ], [ %src, %while.body.preheader ]
+ %0 = bitcast i8* %src.addr.05 to i64*
+ %1 = load i64, i64* %0, align 8
+ %2 = bitcast i8* %op.addr.06 to i64*
+ store i64 %1, i64* %2, align 8
+ %add.ptr = getelementptr inbounds i8, i8* %src.addr.05, i64 8
+ %add.ptr1 = getelementptr inbounds i8, i8* %op.addr.06, i64 8
+ %sub = add nsw i32 %len.addr.07, -8
+ %cmp = icmp sgt i32 %len.addr.07, 8
+ br i1 %cmp, label %while.body, label %while.end.loopexit
+
+while.end.loopexit: ; preds = %while.body
+ br label %while.end
+
+while.end: ; preds = %while.end.loopexit, %entry
+ ret void
+}
Modified: llvm/branches/release_39/test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_39/test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll?rev=287779&r1=287778&r2=287779&view=diff
==============================================================================
--- llvm/branches/release_39/test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll (original)
+++ llvm/branches/release_39/test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll Wed Nov 23 11:26:28 2016
@@ -96,15 +96,15 @@ for.end:
; CHECK-NEXT: %arrayidxB = getelementptr inbounds i16, i16* %b, i64 %ind
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group {{.*}}[[ZERO]]:
-; CHECK-NEXT: (Low: %c High: (78 + %c))
+; CHECK-NEXT: (Low: %c High: (80 + %c))
; CHECK-NEXT: Member: {(2 + %c)<nsw>,+,4}
; CHECK-NEXT: Member: {%c,+,4}
; CHECK-NEXT: Group {{.*}}[[ONE]]:
-; CHECK-NEXT: (Low: %a High: (40 + %a))
+; CHECK-NEXT: (Low: %a High: (42 + %a))
; CHECK-NEXT: Member: {(2 + %a)<nsw>,+,2}
; CHECK-NEXT: Member: {%a,+,2}
; CHECK-NEXT: Group {{.*}}[[TWO]]:
-; CHECK-NEXT: (Low: %b High: (38 + %b))
+; CHECK-NEXT: (Low: %b High: (40 + %b))
; CHECK-NEXT: Member: {%b,+,2}
define void @testg(i16* %a,
@@ -168,15 +168,15 @@ for.end:
; CHECK-NEXT: %arrayidxB = getelementptr i16, i16* %b, i64 %ind
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group {{.*}}[[ZERO]]:
-; CHECK-NEXT: (Low: %c High: (78 + %c))
+; CHECK-NEXT: (Low: %c High: (80 + %c))
; CHECK-NEXT: Member: {(2 + %c)<nsw>,+,4}
; CHECK-NEXT: Member: {%c,+,4}
; CHECK-NEXT: Group {{.*}}[[ONE]]:
-; CHECK-NEXT: (Low: %a High: (40 + %a))
+; CHECK-NEXT: (Low: %a High: (42 + %a))
; CHECK-NEXT: Member: {(2 + %a),+,2}
; CHECK-NEXT: Member: {%a,+,2}
; CHECK-NEXT: Group {{.*}}[[TWO]]:
-; CHECK-NEXT: (Low: %b High: (38 + %b))
+; CHECK-NEXT: (Low: %b High: (40 + %b))
; CHECK-NEXT: Member: {%b,+,2}
define void @testh(i16* %a,
@@ -247,13 +247,13 @@ for.end:
; CHECK-NEXT: %arrayidxA2 = getelementptr i16, i16* %a, i64 %ind2
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group {{.*}}[[ZERO]]:
-; CHECK-NEXT: (Low: ((2 * %offset) + %a)<nsw> High: (9998 + (2 * %offset) + %a))
+; CHECK-NEXT: (Low: ((2 * %offset) + %a)<nsw> High: (10000 + (2 * %offset) + %a))
; CHECK-NEXT: Member: {((2 * %offset) + %a)<nsw>,+,2}<nsw><%for.body>
; CHECK-NEXT: Group {{.*}}[[ONE]]:
-; CHECK-NEXT: (Low: %a High: (9998 + %a))
+; CHECK-NEXT: (Low: %a High: (10000 + %a))
; CHECK-NEXT: Member: {%a,+,2}<%for.body>
; CHECK-NEXT: Group {{.*}}[[TWO]]:
-; CHECK-NEXT: (Low: (20000 + %a) High: (29998 + %a))
+; CHECK-NEXT: (Low: (20000 + %a) High: (30000 + %a))
; CHECK-NEXT: Member: {(20000 + %a),+,2}<%for.body>
define void @testi(i16* %a,
Modified: llvm/branches/release_39/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_39/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll?rev=287779&r1=287778&r2=287779&view=diff
==============================================================================
--- llvm/branches/release_39/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll (original)
+++ llvm/branches/release_39/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll Wed Nov 23 11:26:28 2016
@@ -16,7 +16,7 @@ target datalayout = "e-m:e-i64:64-i128:1
target triple = "aarch64--linux-gnueabi"
; CHECK: function 'f':
-; CHECK: (Low: (20000 + %a) High: (60000 + %a)<nsw>)
+; CHECK: (Low: (20000 + %a) High: (60004 + %a))
@B = common global i32* null, align 8
@A = common global i32* null, align 8
@@ -59,7 +59,7 @@ for.end:
; Here it is not obvious what the limits are, since 'step' could be negative.
; CHECK: Low: (-1 + (-1 * ((-60001 + (-1 * %a)) umax (-60001 + (40000 * %step) + (-1 * %a)))))
-; CHECK: High: ((60000 + %a)<nsw> umax (60000 + (-40000 * %step) + %a))
+; CHECK: High: (4 + ((60000 + %a)<nsw> umax (60000 + (-40000 * %step) + %a)))
define void @g(i64 %step) {
entry:
Modified: llvm/branches/release_39/test/Transforms/LoopVectorize/runtime-check-readonly.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_39/test/Transforms/LoopVectorize/runtime-check-readonly.ll?rev=287779&r1=287778&r2=287779&view=diff
==============================================================================
--- llvm/branches/release_39/test/Transforms/LoopVectorize/runtime-check-readonly.ll (original)
+++ llvm/branches/release_39/test/Transforms/LoopVectorize/runtime-check-readonly.ll Wed Nov 23 11:26:28 2016
@@ -8,10 +8,10 @@ target triple = "x86_64-apple-macosx10.8
;CHECK: br
;CHECK: getelementptr
;CHECK-DAG: getelementptr
-;CHECK-DAG: icmp uge
-;CHECK-DAG: icmp uge
-;CHECK-DAG: icmp uge
-;CHECK-DAG: icmp uge
+;CHECK-DAG: icmp ugt
+;CHECK-DAG: icmp ugt
+;CHECK-DAG: icmp ugt
+;CHECK-DAG: icmp ugt
;CHECK-DAG: and
;CHECK-DAG: and
;CHECK: br
Modified: llvm/branches/release_39/test/Transforms/LoopVectorize/tbaa-nodep.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_39/test/Transforms/LoopVectorize/tbaa-nodep.ll?rev=287779&r1=287778&r2=287779&view=diff
==============================================================================
--- llvm/branches/release_39/test/Transforms/LoopVectorize/tbaa-nodep.ll (original)
+++ llvm/branches/release_39/test/Transforms/LoopVectorize/tbaa-nodep.ll Wed Nov 23 11:26:28 2016
@@ -36,7 +36,7 @@ for.end:
; CHECK: ret i32 0
; CHECK-NOTBAA-LABEL: @test1
-; CHECK-NOTBAA: icmp uge i32*
+; CHECK-NOTBAA: icmp ugt i32*
; CHECK-NOTBAA: load <4 x float>, <4 x float>* %{{.*}}, align 4, !tbaa
; CHECK-NOTBAA: store <4 x i32> %{{.*}}, <4 x i32>* %{{.*}}, align 4, !tbaa
@@ -70,8 +70,8 @@ for.end:
; required. Without TBAA, however, two checks are required.
; CHECK-LABEL: @test2
-; CHECK: icmp uge float*
-; CHECK: icmp uge float*
+; CHECK: icmp ugt float*
+; CHECK: icmp ugt float*
; CHECK-NOT: icmp uge i32*
; CHECK: load <4 x float>, <4 x float>* %{{.*}}, align 4, !tbaa
@@ -80,10 +80,10 @@ for.end:
; CHECK: ret i32 0
; CHECK-NOTBAA-LABEL: @test2
-; CHECK-NOTBAA: icmp uge float*
-; CHECK-NOTBAA: icmp uge float*
-; CHECK-NOTBAA-DAG: icmp uge float*
-; CHECK-NOTBAA-DAG: icmp uge i32*
+; CHECK-NOTBAA: icmp ugt float*
+; CHECK-NOTBAA: icmp ugt float*
+; CHECK-NOTBAA-DAG: icmp ugt float*
+; CHECK-NOTBAA-DAG: icmp ugt i32*
; CHECK-NOTBAA: load <4 x float>, <4 x float>* %{{.*}}, align 4, !tbaa
; CHECK-NOTBAA: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 4, !tbaa
Modified: llvm/branches/release_39/test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_39/test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll?rev=287779&r1=287778&r2=287779&view=diff
==============================================================================
--- llvm/branches/release_39/test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll (original)
+++ llvm/branches/release_39/test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll Wed Nov 23 11:26:28 2016
@@ -8,15 +8,15 @@
; CHECK-NEXT: Loop Versioning found to be beneficial
;
; CHECK: for.body3:
-; CHECK-NEXT: %add86 = phi i32 [ %arrayidx7.promoted, %for.body3.ph ], [ %add8, %for.body3 ]
+; CHECK-NEXT: %[[induction:.*]] = phi i32 [ %arrayidx7.promoted, %for.body3.ph ], [ %add8, %for.body3 ]
; CHECK-NEXT: %j.113 = phi i32 [ %j.016, %for.body3.ph ], [ %inc, %for.body3 ]
; CHECK-NEXT: %idxprom = zext i32 %j.113 to i64
; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, i32* %var1, i64 %idxprom
; CHECK-NEXT: store i32 %add, i32* %arrayidx, align 4, !alias.scope !6, !noalias !6
-; CHECK-NEXT: %add8 = add nsw i32 %add86, %add
+; CHECK-NEXT: %add8 = add nsw i32 %[[induction]], %add
; CHECK-NEXT: %inc = add nuw i32 %j.113, 1
; CHECK-NEXT: %cmp2 = icmp ult i32 %inc, %itr
-; CHECK-NEXT: br i1 %cmp2, label %for.body3, label %for.inc11.loopexit.loopexit5, !llvm.loop !7
+; CHECK-NEXT: br i1 %cmp2, label %for.body3, label %for.inc11.loopexit.loopexit6, !llvm.loop !7
define i32 @foo(i32* nocapture %var1, i32* nocapture readnone %var2, i32* nocapture %var3, i32 %itr) #0 {
entry:
%cmp14 = icmp eq i32 %itr, 0
More information about the llvm-branch-commits
mailing list