[llvm] r274737 - [LoopAccessAnalysis] Fix an integer overflow

Wed Jul 6 23:24:36 PDT 2016

Author: majnemer
Date: Thu Jul  7 01:24:36 2016
New Revision: 274737

URL: http://llvm.org/viewvc/llvm-project?rev=274737&view=rev
Log:
[LoopAccessAnalysis] Fix an integer overflow

We were inappropriately using 32-bit types to account for quantities
that can be far larger.

Fixed in PR28443.

Added:
    llvm/trunk/test/Transforms/LoopDistribute/pr28443.ll
Modified:
    llvm/trunk/include/llvm/Analysis/LoopAccessAnalysis.h
    llvm/trunk/lib/Analysis/LoopAccessAnalysis.cpp

Modified: llvm/trunk/include/llvm/Analysis/LoopAccessAnalysis.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/LoopAccessAnalysis.h?rev=274737&r1=274736&r2=274737&view=diff
==============================================================================

--- llvm/trunk/include/llvm/Analysis/LoopAccessAnalysis.h (original)
+++ llvm/trunk/include/llvm/Analysis/LoopAccessAnalysis.h Thu Jul  7 01:24:36 2016
@@ -228,7 +228,7 @@ public:
 
   /// \brief The maximum number of bytes of a vector register we can vectorize
   /// the accesses safely with.
-  unsigned getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; }
+  uint64_t getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; }
 
   /// \brief In same cases when the dependency check fails we can still
   /// vectorize the loop with a dynamic array access check.
@@ -284,7 +284,7 @@ private:
   unsigned AccessIdx;
 
   // We can access this many bytes in parallel safely.
-  unsigned MaxSafeDepDistBytes;
+  uint64_t MaxSafeDepDistBytes;
 
   /// \brief If we see a non-constant dependence distance we can still try to
   /// vectorize this loop with runtime checks.
@@ -324,7 +324,7 @@ private:
   ///
   /// \return false if we shouldn't vectorize at all or avoid larger
   /// vectorization factors by limiting MaxSafeDepDistBytes.
-  bool couldPreventStoreLoadForward(unsigned Distance, unsigned TypeByteSize);
+  bool couldPreventStoreLoadForward(uint64_t Distance, uint64_t TypeByteSize);
 };
 
 /// \brief Holds information about the memory runtime legality checks to verify
@@ -575,7 +575,7 @@ public:
   /// Returns true if the value V is uniform within the loop.
   bool isUniform(Value *V) const;
 
-  unsigned getMaxSafeDepDistBytes() const { return MaxSafeDepDistBytes; }
+  uint64_t getMaxSafeDepDistBytes() const { return MaxSafeDepDistBytes; }
   unsigned getNumStores() const { return NumStores; }
   unsigned getNumLoads() const { return NumLoads;}
 
@@ -672,7 +672,7 @@ private:
   unsigned NumLoads;
   unsigned NumStores;
 
-  unsigned MaxSafeDepDistBytes;
+  uint64_t MaxSafeDepDistBytes;
 
   /// \brief Cache the result of analyzeLoop.
   bool CanVecMem;
@@ -719,9 +719,9 @@ const SCEV *replaceSymbolicStrideSCEV(Pr
 /// to \p PtrToStride and therefore add further predicates to \p PSE.
 /// The \p Assume parameter indicates if we are allowed to make additional
 /// run-time assumptions.
-int getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp,
-                 const ValueToValueMap &StridesMap = ValueToValueMap(),
-                 bool Assume = false);
+int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp,
+                     const ValueToValueMap &StridesMap = ValueToValueMap(),
+                     bool Assume = false);
 
 /// \brief Returns true if the memory operations \p A and \p B are consecutive.
 /// This is a simple API that does not depend on the analysis pass. 

Modified: llvm/trunk/lib/Analysis/LoopAccessAnalysis.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/LoopAccessAnalysis.cpp?rev=274737&r1=274736&r2=274737&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/LoopAccessAnalysis.cpp (original)
+++ llvm/trunk/lib/Analysis/LoopAccessAnalysis.cpp Thu Jul  7 01:24:36 2016
@@ -575,7 +575,7 @@ static bool isNoWrap(PredicatedScalarEvo
   if (PSE.getSE()->isLoopInvariant(PtrScev, L))
     return true;
 
-  int Stride = getPtrStride(PSE, Ptr, L, Strides);
+  int64_t Stride = getPtrStride(PSE, Ptr, L, Strides);
   return Stride == 1;
 }
 
@@ -866,9 +866,9 @@ static bool isNoWrapAddRec(Value *Ptr, c
 }
 
 /// \brief Check whether the access through \p Ptr has a constant stride.
-int llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
-                       const Loop *Lp, const ValueToValueMap &StridesMap,
-                       bool Assume) {
+int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
+                           const Loop *Lp, const ValueToValueMap &StridesMap,
+                           bool Assume) {
   Type *Ty = Ptr->getType();
   assert(Ty->isPointerTy() && "Unexpected non-ptr");
 
@@ -1097,8 +1097,8 @@ bool MemoryDepChecker::Dependence::isFor
   llvm_unreachable("unexpected DepType!");
 }
 
-bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance,
-                                                    unsigned TypeByteSize) {
+bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
+                                                    uint64_t TypeByteSize) {
   // If loads occur at a distance that is not a multiple of a feasible vector
   // factor store-load forwarding does not take place.
   // Positive dependences might cause troubles because vectorizing them might
@@ -1111,13 +1111,13 @@ bool MemoryDepChecker::couldPreventStore
 
   // After this many iterations store-to-load forwarding conflicts should not
   // cause any slowdowns.
-  const unsigned NumItersForStoreLoadThroughMemory = 8 * TypeByteSize;
+  const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize;
   // Maximum vector factor.
-  unsigned MaxVFWithoutSLForwardIssues = std::min(
+  uint64_t MaxVFWithoutSLForwardIssues = std::min(
       VectorizerParams::MaxVectorWidth * TypeByteSize, MaxSafeDepDistBytes);
 
   // Compute the smallest VF at which the store and load would be misaligned.
-  for (unsigned VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues;
+  for (uint64_t VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues;
        VF *= 2) {
     // If the number of vector iteration between the store and the load are
     // small we could incur conflicts.
@@ -1145,8 +1145,8 @@ bool MemoryDepChecker::couldPreventStore
 /// bytes.
 ///
 /// \returns true if they are independent.
-static bool areStridedAccessesIndependent(unsigned Distance, unsigned Stride,
-                                          unsigned TypeByteSize) {
+static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride,
+                                          uint64_t TypeByteSize) {
   assert(Stride > 1 && "The stride must be greater than 1");
   assert(TypeByteSize > 0 && "The type size in byte must be non-zero");
   assert(Distance > 0 && "The distance must be non-zero");
@@ -1155,7 +1155,7 @@ static bool areStridedAccessesIndependen
   if (Distance % TypeByteSize)
     return false;
 
-  unsigned ScaledDist = Distance / TypeByteSize;
+  uint64_t ScaledDist = Distance / TypeByteSize;
 
   // No dependence if the scaled distance is not multiple of the stride.
   // E.g.
@@ -1196,8 +1196,8 @@ MemoryDepChecker::isDependent(const MemA
       BPtr->getType()->getPointerAddressSpace())
     return Dependence::Unknown;
 
-  int StrideAPtr = getPtrStride(PSE, APtr, InnermostLoop, Strides, true);
-  int StrideBPtr = getPtrStride(PSE, BPtr, InnermostLoop, Strides, true);
+  int64_t StrideAPtr = getPtrStride(PSE, APtr, InnermostLoop, Strides, true);
+  int64_t StrideBPtr = getPtrStride(PSE, BPtr, InnermostLoop, Strides, true);
 
   const SCEV *Src = PSE.getSCEV(APtr);
   const SCEV *Sink = PSE.getSCEV(BPtr);
@@ -1237,11 +1237,11 @@ MemoryDepChecker::isDependent(const MemA
   Type *ATy = APtr->getType()->getPointerElementType();
   Type *BTy = BPtr->getType()->getPointerElementType();
   auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
-  unsigned TypeByteSize = DL.getTypeAllocSize(ATy);
+  uint64_t TypeByteSize = DL.getTypeAllocSize(ATy);
 
   const APInt &Val = C->getAPInt();
   int64_t Distance = Val.getSExtValue();
-  unsigned Stride = std::abs(StrideAPtr);
+  uint64_t Stride = std::abs(StrideAPtr);
 
   // Attempt to prove strided accesses independent.
   if (std::abs(Distance) > 0 && Stride > 1 && ATy == BTy &&
@@ -1315,9 +1315,9 @@ MemoryDepChecker::isDependent(const MemA
   // If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4),
   // the minimum distance needed is 28, which is greater than distance. It is
   // not safe to do vectorization.
-  unsigned MinDistanceNeeded =
+  uint64_t MinDistanceNeeded =
       TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize;
-  if (MinDistanceNeeded > Distance) {
+  if (MinDistanceNeeded > static_cast<uint64_t>(Distance)) {
     DEBUG(dbgs() << "LAA: Failure because of positive distance " << Distance
                  << '\n');
     return Dependence::Backward;
@@ -1347,7 +1347,7 @@ MemoryDepChecker::isDependent(const MemA
   // is 8, which is less than 2 and forbidden vectorization, But actually
   // both A and B could be vectorized by 2 iterations.
   MaxSafeDepDistBytes =
-      Distance < MaxSafeDepDistBytes ? Distance : MaxSafeDepDistBytes;
+      std::min(static_cast<uint64_t>(Distance), MaxSafeDepDistBytes);
 
   bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
   if (IsTrueDataDependence && EnableForwardingConflictDetection &&
@@ -1365,7 +1365,7 @@ bool MemoryDepChecker::areDepsSafe(DepCa
                                    MemAccessInfoSet &CheckDeps,
                                    const ValueToValueMap &Strides) {
 
-  MaxSafeDepDistBytes = -1U;
+  MaxSafeDepDistBytes = -1;
   while (!CheckDeps.empty()) {
     MemAccessInfo CurAccess = *CheckDeps.begin();
 
@@ -1926,7 +1926,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L,
       PtrRtChecking(llvm::make_unique<RuntimePointerChecking>(SE)),
       DepChecker(llvm::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L),
       DL(&DL), TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0),
-      MaxSafeDepDistBytes(-1U), CanVecMem(false),
+      MaxSafeDepDistBytes(-1), CanVecMem(false),
       StoreToLoopInvariantAddress(false) {
   if (canAnalyzeLoop())
     analyzeLoop();
@@ -1935,7 +1935,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L,
 void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
   if (CanVecMem) {
     OS.indent(Depth) << "Memory dependences are safe";
-    if (MaxSafeDepDistBytes != -1U)
+    if (MaxSafeDepDistBytes != -1ULL)
       OS << " with a maximum dependence distance of " << MaxSafeDepDistBytes
          << " bytes";
     if (PtrRtChecking->Need)

Added: llvm/trunk/test/Transforms/LoopDistribute/pr28443.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopDistribute/pr28443.ll?rev=274737&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopDistribute/pr28443.ll (added)
+++ llvm/trunk/test/Transforms/LoopDistribute/pr28443.ll Thu Jul  7 01:24:36 2016
@@ -0,0 +1,36 @@
+; RUN: opt -basicaa -loop-distribute -verify-loop-info -verify-dom-info -S \
+; RUN:   < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @fn1(i64 %a, i64* %b) {
+entry:
+  br label %for.body
+
+for.body:
+  %add75.epil = phi i64 [ %add7.epil, %for.body ], [ %a, %entry ]
+  %add1.epil = add nsw i64 %add75.epil, 268435457
+  %arrayidx.epil = getelementptr inbounds i64, i64* %b, i64 %add1.epil
+  %load = load i64, i64* %arrayidx.epil, align 8
+  %add5.epil = add nsw i64 %add75.epil, 805306369
+  %arrayidx6.epil = getelementptr inbounds i64, i64* %b, i64 %add5.epil
+  store i64 %load, i64* %arrayidx6.epil, align 8
+  %add7.epil = add nsw i64 %add75.epil, 2
+  %epil.iter.cmp = icmp eq i64 %add7.epil, 0
+  br i1 %epil.iter.cmp, label %for.end, label %for.body
+
+  ; CHECK: %[[phi:.*]]  = phi i64
+  ; CHECK: %[[add1:.*]] = add nsw i64 %[[phi]], 268435457
+  ; CHECK: %[[gep1:.*]] = getelementptr inbounds i64, i64* %b, i64 %[[add1]]
+  ; CHECK: %[[load:.*]] = load i64, i64* %[[gep1]], align 8
+  ; CHECK: %[[add2:.*]] = add nsw i64 %[[phi]], 805306369
+  ; CHECK: %[[gep2:.*]] = getelementptr inbounds i64, i64* %b, i64 %[[add2]]
+  ; CHECK: store i64 %[[load]], i64* %[[gep2]], align 8
+  ; CHECK: %[[incr:.*]] = add nsw i64 %[[phi]], 2
+  ; CHECK: %[[cmp:.*]]  = icmp eq i64 %[[incr]], 0
+  ; CHECK: br i1 %[[cmp]]
+
+for.end:
+  ret void
+}