[llvm] ff31020 - [OpaquePtr][LoopAccessAnalysis] Support opaque pointers

Wed Feb 9 09:15:33 PST 2022

Author: Arthur Eubanks
Date: 2022-02-09T09:11:27-08:00
New Revision: ff31020ee651d4867c5f1910fa0ded11cf8c2b13

URL: https://github.com/llvm/llvm-project/commit/ff31020ee651d4867c5f1910fa0ded11cf8c2b13
DIFF: https://github.com/llvm/llvm-project/commit/ff31020ee651d4867c5f1910fa0ded11cf8c2b13.diff

LOG: [OpaquePtr][LoopAccessAnalysis] Support opaque pointers

Previously we relied on the pointee type to determine what type we need
to do runtime pointer access checks.

With opaque pointers, we can access a pointer with more than one type,
so now we keep track of all the types we're accessing a pointer's
memory with.

Also some other minor getPointerElementType() removals.

Reviewed By: #opaque-pointers, nikic

Differential Revision: https://reviews.llvm.org/D119047

Added: 
    llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types_opaque_ptr.ll

Modified: 
    llvm/include/llvm/Analysis/LoopAccessAnalysis.h
    llvm/lib/Analysis/LoopAccessAnalysis.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index 29587df5de95..3724fd49ba92 100644

--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -406,8 +406,8 @@ class RuntimePointerChecking {
   /// according to the assumptions that we've made during the analysis.
   /// The method might also version the pointer stride according to \p Strides,
   /// and add new predicates to \p PSE.
-  void insert(Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId,
-              unsigned ASId, const ValueToValueMap &Strides,
+  void insert(Loop *Lp, Value *Ptr, Type *AccessTy, bool WritePtr,
+              unsigned DepSetId, unsigned ASId, const ValueToValueMap &Strides,
               PredicatedScalarEvolution &PSE);
 
   /// No run-time memory checking is necessary.

diff  --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 88c437d9bcf7..828877926ea8 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -189,8 +189,9 @@ RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup(
 ///
 /// There is no conflict when the intervals are disjoint:
 /// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)
-void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
-                                    unsigned DepSetId, unsigned ASId,
+void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, Type *AccessTy,
+                                    bool WritePtr, unsigned DepSetId,
+                                    unsigned ASId,
                                     const ValueToValueMap &Strides,
                                     PredicatedScalarEvolution &PSE) {
   // Get the stride replaced scev.
@@ -227,8 +228,7 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
   // Add the size of the pointed element to ScEnd.
   auto &DL = Lp->getHeader()->getModule()->getDataLayout();
   Type *IdxTy = DL.getIndexType(Ptr->getType());
-  const SCEV *EltSizeSCEV =
-      SE->getStoreSizeOfExpr(IdxTy, Ptr->getType()->getPointerElementType());
+  const SCEV *EltSizeSCEV = SE->getStoreSizeOfExpr(IdxTy, AccessTy);
   ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
 
   Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc);
@@ -522,19 +522,19 @@ class AccessAnalysis {
       : TheLoop(TheLoop), AST(*AA), LI(LI), DepCands(DA), PSE(PSE) {}
 
   /// Register a load  and whether it is only read from.
-  void addLoad(MemoryLocation &Loc, bool IsReadOnly) {
+  void addLoad(MemoryLocation &Loc, Type *AccessTy, bool IsReadOnly) {
     Value *Ptr = const_cast<Value*>(Loc.Ptr);
     AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags);
-    Accesses.insert(MemAccessInfo(Ptr, false));
+    Accesses[MemAccessInfo(Ptr, false)].insert(AccessTy);
     if (IsReadOnly)
       ReadOnlyPtr.insert(Ptr);
   }
 
   /// Register a store.
-  void addStore(MemoryLocation &Loc) {
+  void addStore(MemoryLocation &Loc, Type *AccessTy) {
     Value *Ptr = const_cast<Value*>(Loc.Ptr);
     AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags);
-    Accesses.insert(MemAccessInfo(Ptr, true));
+    Accesses[MemAccessInfo(Ptr, true)].insert(AccessTy);
   }
 
   /// Check if we can emit a run-time no-alias check for \p Access.
@@ -545,12 +545,11 @@ class AccessAnalysis {
   /// we will attempt to use additional run-time checks in order to get
   /// the bounds of the pointer.
   bool createCheckForAccess(RuntimePointerChecking &RtCheck,
-                            MemAccessInfo Access,
+                            MemAccessInfo Access, Type *AccessTy,
                             const ValueToValueMap &Strides,
                             DenseMap<Value *, unsigned> &DepSetId,
                             Loop *TheLoop, unsigned &RunningDepId,
-                            unsigned ASId, bool ShouldCheckStride,
-                            bool Assume);
+                            unsigned ASId, bool ShouldCheckStride, bool Assume);
 
   /// Check whether we can check the pointers at runtime for
   /// non-intersection.
@@ -583,14 +582,15 @@ class AccessAnalysis {
   MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; }
 
 private:
-  typedef SetVector<MemAccessInfo> PtrAccessSet;
+  typedef MapVector<MemAccessInfo, SmallSetVector<Type *, 1>> PtrAccessMap;
 
   /// Go over all memory access and check whether runtime pointer checks
   /// are needed and build sets of dependency check candidates.
   void processMemAccesses();
 
-  /// Set of all accesses.
-  PtrAccessSet Accesses;
+  /// Map of all accesses. Values are the types used to access memory pointed to
+  /// by the pointer.
+  PtrAccessMap Accesses;
 
   /// The loop being checked.
   const Loop *TheLoop;
@@ -652,12 +652,12 @@ static bool hasComputableBounds(PredicatedScalarEvolution &PSE,
 
 /// Check whether a pointer address cannot wrap.
 static bool isNoWrap(PredicatedScalarEvolution &PSE,
-                     const ValueToValueMap &Strides, Value *Ptr, Loop *L) {
+                     const ValueToValueMap &Strides, Value *Ptr, Type *AccessTy,
+                     Loop *L) {
   const SCEV *PtrScev = PSE.getSCEV(Ptr);
   if (PSE.getSE()->isLoopInvariant(PtrScev, L))
     return true;
 
-  Type *AccessTy = Ptr->getType()->getPointerElementType();
   int64_t Stride = getPtrStride(PSE, AccessTy, Ptr, L, Strides);
   if (Stride == 1 || PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW))
     return true;
@@ -689,7 +689,7 @@ static void visitPointers(Value *StartPtr, const Loop &InnermostLoop,
 }
 
 bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
-                                          MemAccessInfo Access,
+                                          MemAccessInfo Access, Type *AccessTy,
                                           const ValueToValueMap &StridesMap,
                                           DenseMap<Value *, unsigned> &DepSetId,
                                           Loop *TheLoop, unsigned &RunningDepId,
@@ -702,7 +702,7 @@ bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
 
   // When we run after a failing dependency check we have to make sure
   // we don't have wrapping pointers.
-  if (ShouldCheckWrap && !isNoWrap(PSE, StridesMap, Ptr, TheLoop)) {
+  if (ShouldCheckWrap && !isNoWrap(PSE, StridesMap, Ptr, AccessTy, TheLoop)) {
     auto *Expr = PSE.getSCEV(Ptr);
     if (!Assume || !isa<SCEVAddRecExpr>(Expr))
       return false;
@@ -723,11 +723,11 @@ bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
     DepId = RunningDepId++;
 
   bool IsWrite = Access.getInt();
-  RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE);
+  RtCheck.insert(TheLoop, Ptr, AccessTy, IsWrite, DepId, ASId, StridesMap, PSE);
   LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
 
   return true;
- }
+}
 
 bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
                                      ScalarEvolution *SE, Loop *TheLoop,
@@ -788,12 +788,15 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
     }
 
     for (auto &Access : AccessInfos) {
-      if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId, TheLoop,
-                                RunningDepId, ASId, ShouldCheckWrap, false)) {
-        LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:"
-                          << *Access.getPointer() << '\n');
-        Retries.push_back(Access);
-        CanDoAliasSetRT = false;
+      for (auto &AccessTy : Accesses[Access]) {
+        if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap,
+                                  DepSetId, TheLoop, RunningDepId, ASId,
+                                  ShouldCheckWrap, false)) {
+          LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:"
+                            << *Access.getPointer() << '\n');
+          Retries.push_back(Access);
+          CanDoAliasSetRT = false;
+        }
       }
     }
 
@@ -815,13 +818,16 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
       // We know that we need these checks, so we can now be more aggressive
       // and add further checks if required (overflow checks).
       CanDoAliasSetRT = true;
-      for (auto Access : Retries)
-        if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId,
-                                  TheLoop, RunningDepId, ASId,
-                                  ShouldCheckWrap, /*Assume=*/true)) {
-          CanDoAliasSetRT = false;
-          break;
+      for (auto Access : Retries) {
+        for (auto &AccessTy : Accesses[Access]) {
+          if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap,
+                                    DepSetId, TheLoop, RunningDepId, ASId,
+                                    ShouldCheckWrap, /*Assume=*/true)) {
+            CanDoAliasSetRT = false;
+            break;
+          }
         }
+      }
     }
 
     CanDoRT &= CanDoAliasSetRT;
@@ -886,9 +892,12 @@ void AccessAnalysis::processMemAccesses() {
   LLVM_DEBUG(dbgs() << "LAA:   Accesses(" << Accesses.size() << "):\n");
   LLVM_DEBUG({
     for (auto A : Accesses)
-      dbgs() << "\t" << *A.getPointer() << " (" <<
-                (A.getInt() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ?
-                                         "read-only" : "read")) << ")\n";
+      dbgs() << "\t" << *A.first.getPointer() << " ("
+             << (A.first.getInt()
+                     ? "write"
+                     : (ReadOnlyPtr.count(A.first.getPointer()) ? "read-only"
+                                                                : "read"))
+             << ")\n";
   });
 
   // The AliasSetTracker has nicely partitioned our pointers by metadata
@@ -907,13 +916,13 @@ void AccessAnalysis::processMemAccesses() {
     UnderlyingObjToAccessMap ObjToLastAccess;
 
     // Set of access to check after all writes have been processed.
-    PtrAccessSet DeferredAccesses;
+    PtrAccessMap DeferredAccesses;
 
     // Iterate over each alias set twice, once to process read/write pointers,
     // and then to process read-only pointers.
     for (int SetIteration = 0; SetIteration < 2; ++SetIteration) {
       bool UseDeferred = SetIteration > 0;
-      PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses;
+      PtrAccessMap &S = UseDeferred ? DeferredAccesses : Accesses;
 
       for (const auto &AV : AS) {
         Value *Ptr = AV.getValue();
@@ -921,10 +930,10 @@ void AccessAnalysis::processMemAccesses() {
         // For a single memory access in AliasSetTracker, Accesses may contain
         // both read and write, and they both need to be handled for CheckDeps.
         for (const auto &AC : S) {
-          if (AC.getPointer() != Ptr)
+          if (AC.first.getPointer() != Ptr)
             continue;
 
-          bool IsWrite = AC.getInt();
+          bool IsWrite = AC.first.getInt();
 
           // If we're using the deferred access set, then it contains only
           // reads.
@@ -946,7 +955,9 @@ void AccessAnalysis::processMemAccesses() {
           // consecutive as "read-only" pointers (so that we check
           // "a[b[i]] +="). Hence, we need the second check for "!IsWrite".
           if (!UseDeferred && IsReadOnlyPtr) {
-            DeferredAccesses.insert(Access);
+            // We only use the pointer keys, the types vector values don't
+            // matter.
+            DeferredAccesses.insert({Access, {}});
             continue;
           }
 
@@ -1518,8 +1529,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
   Value *BPtr = B.getPointer();
   bool AIsWrite = A.getInt();
   bool BIsWrite = B.getInt();
-  Type *ATy = APtr->getType()->getPointerElementType();
-  Type *BTy = BPtr->getType()->getPointerElementType();
+  Type *ATy = getLoadStoreType(InstMap[AIdx]);
+  Type *BTy = getLoadStoreType(InstMap[BIdx]);
 
   // Two reads are independent.
   if (!AIsWrite && !BIsWrite)
@@ -1842,8 +1853,6 @@ bool LoopAccessInfo::canAnalyzeLoop() {
 void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
                                  const TargetLibraryInfo *TLI,
                                  DominatorTree *DT) {
-  typedef SmallPtrSet<Value*, 16> ValueSet;
-
   // Holds the Load and Store instructions.
   SmallVector<LoadInst *, 16> Loads;
   SmallVector<StoreInst *, 16> Stores;
@@ -1975,11 +1984,11 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
   // for read and once for write, it will only appear once (on the write
   // list). This is okay, since we are going to check for conflicts between
   // writes and between reads and writes, but not between reads and reads.
-  ValueSet Seen;
+  SmallSet<std::pair<Value *, Type *>, 16> Seen;
 
   // Record uniform store addresses to identify if we have multiple stores
   // to the same address.
-  ValueSet UniformStores;
+  SmallPtrSet<Value *, 16> UniformStores;
 
   for (StoreInst *ST : Stores) {
     Value *Ptr = ST->getPointerOperand();
@@ -1990,7 +1999,8 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
 
     // If we did *not* see this pointer before, insert it to  the read-write
     // list. At this phase it is only a 'write' list.
-    if (Seen.insert(Ptr).second) {
+    Type *AccessTy = getLoadStoreType(ST);
+    if (Seen.insert({Ptr, AccessTy}).second) {
       ++NumReadWrites;
 
       MemoryLocation Loc = MemoryLocation::get(ST);
@@ -2001,9 +2011,9 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
         Loc.AATags.TBAA = nullptr;
 
       visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop,
-                    [&Accesses, Loc](Value *Ptr) {
+                    [&Accesses, AccessTy, Loc](Value *Ptr) {
                       MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
-                      Accesses.addStore(NewLoc);
+                      Accesses.addStore(NewLoc, AccessTy);
                     });
     }
   }
@@ -2027,7 +2037,8 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
     // read a few words, modify, and write a few words, and some of the
     // words may be written to the same address.
     bool IsReadOnlyPtr = false;
-    if (Seen.insert(Ptr).second ||
+    Type *AccessTy = getLoadStoreType(LD);
+    if (Seen.insert({Ptr, AccessTy}).second ||
         !getPtrStride(*PSE, LD->getType(), Ptr, TheLoop, SymbolicStrides)) {
       ++NumReads;
       IsReadOnlyPtr = true;
@@ -2049,9 +2060,9 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
       Loc.AATags.TBAA = nullptr;
 
     visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop,
-                  [&Accesses, Loc, IsReadOnlyPtr](Value *Ptr) {
+                  [&Accesses, AccessTy, Loc, IsReadOnlyPtr](Value *Ptr) {
                     MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
-                    Accesses.addLoad(NewLoc, IsReadOnlyPtr);
+                    Accesses.addLoad(NewLoc, AccessTy, IsReadOnlyPtr);
                   });
   }
 

diff  --git a/llvm/test/Analysis/LoopAccessAnalysis/depend_
diff _types_opaque_ptr.ll b/llvm/test/Analysis/LoopAccessAnalysis/depend_
diff _types_opaque_ptr.ll
new file mode 100644
index 000000000000..f09433069ad2
--- /dev/null
+++ b/llvm/test/Analysis/LoopAccessAnalysis/depend_
diff _types_opaque_ptr.ll
@@ -0,0 +1,179 @@
+; RUN: opt -S -disable-output --opaque-pointers -passes='print-access-info' < %s 2>&1 | FileCheck %s
+
+; In the function below some of the accesses are done as float types and some
+; are done as i32 types. When doing dependence analysis the type should not
+; matter if it can be determined that they are the same size.
+
+%int_pair = type { i32, i32 }
+
+; CHECK-LABEL: function 'backdep_type_size_equivalence':
+; CHECK-NEXT:    loop:
+; CHECK-NEXT:      Memory dependences are safe with a maximum dependence distance of 800 bytes
+; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:        Forward:
+; CHECK-NEXT:            %ld.f32 = load float, ptr %gep.iv, align 8 ->
+; CHECK-NEXT:            store i32 %indvars.iv.i32, ptr %gep.iv, align 8
+; CHECK-EMPTY:
+; CHECK-NEXT:        Forward:
+; CHECK-NEXT:            %ld.f32 = load float, ptr %gep.iv, align 8 ->
+; CHECK-NEXT:            store float %val, ptr %gep.iv.min.100, align 8
+; CHECK-EMPTY:
+; CHECK-NEXT:        BackwardVectorizable:
+; CHECK-NEXT:            store float %val, ptr %gep.iv.min.100, align 8 ->
+; CHECK-NEXT:            store i32 %indvars.iv.i32, ptr %gep.iv, align 8
+; CHECK-EMPTY:
+; CHECK-NEXT:        Run-time memory checks:
+; CHECK-NEXT:        Grouped accesses:
+
+define void @backdep_type_size_equivalence(ptr nocapture %vec, i64 %n) {
+entry:
+  br label %loop
+
+loop:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %loop ]
+
+  ;; Load from vec[indvars.iv].x as float
+  %gep.iv = getelementptr inbounds %int_pair, ptr %vec, i64 %indvars.iv, i32 0
+  %ld.f32 = load float, ptr %gep.iv, align 8
+  %val = fmul fast float %ld.f32, 5.0
+
+  ;; Store to vec[indvars.iv - 100].x as float
+  %indvars.iv.min.100 = add nsw i64 %indvars.iv, -100
+  %gep.iv.min.100 = getelementptr inbounds %int_pair, ptr %vec, i64 %indvars.iv.min.100, i32 0
+  store float %val, ptr %gep.iv.min.100, align 8
+
+  ;; Store to vec[indvars.iv].x as i32, creating a backward dependency between
+  ;; the two stores with 
diff erent element types but the same element size.
+  %indvars.iv.i32 = trunc i64 %indvars.iv to i32
+  store i32 %indvars.iv.i32, ptr %gep.iv, align 8
+
+  ;; Store to vec[indvars.iv].y as i32, strided accesses should be independent
+  ;; between the two stores with 
diff erent element types but the same element size.
+  %gep.iv.1 = getelementptr inbounds %int_pair, ptr %vec, i64 %indvars.iv, i32 1
+  store i32 %indvars.iv.i32, ptr %gep.iv.1, align 8
+
+  ;; Store to vec[indvars.iv + n].y as i32, to verify no dependence in the case
+  ;; of unknown dependence distance.
+  %indvars.iv.n = add nuw nsw i64 %indvars.iv, %n
+  %gep.iv.n = getelementptr inbounds %int_pair, ptr %vec, i64 %indvars.iv.n, i32 1
+  store i32 %indvars.iv.i32, ptr %gep.iv.n, align 8
+
+  ;; Loop condition.
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %cond = icmp eq i64 %indvars.iv.next, %n
+  br i1 %cond, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; In the function below one of the accesses is done as i19 type, which has a
+; 
diff erent store size than the i32 type, even though their alloc sizes are
+; equivalent. This is a negative test to ensure that they are not analyzed as
+; in the tests above.
+;
+; CHECK-LABEL: function 'backdep_type_store_size_equivalence':
+; CHECK-NEXT:    loop:
+; CHECK-NEXT:      Report: unsafe dependent memory operations in loop.
+; CHECK-NEXT:      Unknown data dependence.
+; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:        Unknown:
+; CHECK-NEXT:            %ld.f32 = load float, ptr %gep.iv, align 8 ->
+; CHECK-NEXT:            store i19 %indvars.iv.i19, ptr %gep.iv, align 8
+; CHECK-EMPTY:
+; CHECK-NEXT:        Run-time memory checks:
+; CHECK-NEXT:        Grouped accesses:
+
+define void @backdep_type_store_size_equivalence(ptr nocapture %vec, i64 %n) {
+entry:
+  br label %loop
+
+loop:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %loop ]
+
+  ;; Load from vec[indvars.iv].x as float
+  %gep.iv = getelementptr inbounds %int_pair, ptr %vec, i64 %indvars.iv, i32 0
+  %ld.f32 = load float, ptr %gep.iv, align 8
+  %val = fmul fast float %ld.f32, 5.0
+
+  ;; Store to vec[indvars.iv].x as i19.
+  %indvars.iv.i19 = trunc i64 %indvars.iv to i19
+  store i19 %indvars.iv.i19, ptr %gep.iv, align 8
+
+  ;; Loop condition.
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %cond = icmp eq i64 %indvars.iv.next, %n
+  br i1 %cond, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; In the function below some of the accesses are done as double types and some
+; are done as i64 and i32 types. This is a negative test to ensure that they
+; are not analyzed as in the tests above.
+
+; CHECK-LABEL: function 'neg_dist_dep_type_size_equivalence':
+; CHECK-NEXT:    loop:
+; CHECK-NEXT:      Report: unsafe dependent memory operations in loop.
+; CHECK-NEXT:      Unknown data dependence.
+; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:        Unknown:
+; CHECK-NEXT:            %ld.f64 = load double, ptr %gep.iv, align 8 ->
+; CHECK-NEXT:		         store i32 %ld.i64.i32, ptr %gep.iv.n, align 8
+; CHECK-EMPTY:
+; CHECK-NEXT:        Unknown:
+; CHECK-NEXT:            %ld.i64 = load i64, ptr %gep.iv, align 8 ->
+; CHECK-NEXT:		         store i32 %ld.i64.i32, ptr %gep.iv.n, align 8
+; CHECK-EMPTY:
+; CHECK-NEXT:        BackwardVectorizableButPreventsForwarding:
+; CHECK-NEXT:            %ld.f64 = load double, ptr %gep.iv, align 8 ->
+; CHECK-NEXT:            store double %val, ptr %gep.iv.101, align 8
+; CHECK-EMPTY:
+; CHECK-NEXT:        ForwardButPreventsForwarding:
+; CHECK-NEXT:            store double %val, ptr %gep.iv.101, align 8 ->
+; CHECK-NEXT:            %ld.i64 = load i64, ptr %gep.iv, align 8
+; CHECK-EMPTY:
+; CHECK-NEXT:        Unknown:
+; CHECK-NEXT:            store double %val, ptr %gep.iv.101, align 8 ->
+; CHECK-NEXT:		         store i32 %ld.i64.i32, ptr %gep.iv.n, align 8
+; CHECK-EMPTY:
+; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Grouped accesses:
+
+define void @neg_dist_dep_type_size_equivalence(ptr nocapture %vec, i64 %n) {
+entry:
+  br label %loop
+
+loop:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %loop ]
+
+  ;; Load from vec[indvars.iv] as double
+  %gep.iv = getelementptr i64, ptr %vec, i64 %indvars.iv
+  %ld.f64 = load double, ptr %gep.iv, align 8
+  %val = fmul fast double %ld.f64, 5.0
+
+  ;; Store to vec[indvars.iv + 101] as double
+  %indvars.iv.101 = add nsw i64 %indvars.iv, 101
+  %gep.iv.101 = getelementptr i64, ptr %vec, i64 %indvars.iv.101
+  store double %val, ptr %gep.iv.101, align 8
+
+  ;; Read from vec[indvars.iv] as i64 creating
+  ;; a forward but prevents forwarding dependence
+  ;; with 
diff erent types but same sizes.
+  %ld.i64 = load i64, ptr %gep.iv, align 8
+
+  ;; Different sizes
+  %indvars.iv.n = add nuw nsw i64 %indvars.iv, %n
+  %gep.iv.n = getelementptr inbounds i64, ptr %vec, i64 %indvars.iv.n
+  %ld.i64.i32 = trunc i64 %ld.i64 to i32
+  store i32 %ld.i64.i32, ptr %gep.iv.n, align 8
+
+  ;; Loop condition.
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %cond = icmp eq i64 %indvars.iv.next, %n
+  br i1 %cond, label %exit, label %loop
+
+exit:
+  ret void
+}