[llvm] r225159 - Fixed a bug in memory dependence checking module of loop vectorization. The following loop should not be vectorized with current algorithm.

Jiangning Liu jiangning.liu at arm.com
Mon Jan 5 02:08:59 PST 2015


Author: jiangning
Date: Mon Jan  5 04:08:58 2015
New Revision: 225159

URL: http://llvm.org/viewvc/llvm-project?rev=225159&view=rev
Log:
Fixed a bug in memory dependence checking module of loop vectorization. The following loop should not be vectorized with current algorithm.

{code}
// loop body
   ... = a[i]          (1)
    ... = a[i+1]       (2)
 .......
a[i+1] = ....          (3)
   a[i] = ...          (4)
{code}

The algorithm tries to collect memory access candidates from AliasSetTracker, and then check memory dependences one another. The memory accesses are unique in AliasSetTracker, and a single memory access in AliasSetTracker may map to multiple entries in AccessAnalysis, which could cover both 'read' and 'write'. Originally the algorithm only checked 'write' entry in Accesses if only 'write' exists. This is incorrect and the consequence is it ignored all read access, and finally some RAW and WAR dependence are missed.

For the case given above, if we ignore two reads, the dependence between (1) and (3) would not be able to be captured, and finally this loop will be incorrectly vectorized.

The fix simply inserts a new loop to find all entries in Accesses. Since it will skip most of all other memory accesses by checking the Value pointer at the very beginning of the loop, it should not increase compile-time visibly.


Added:
    llvm/trunk/test/Transforms/LoopVectorize/loop-vect-memdep.ll
Modified:
    llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp

Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=225159&r1=225158&r2=225159&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Mon Jan  5 04:08:58 2015
@@ -4268,57 +4268,66 @@ void AccessAnalysis::processMemAccesses(
       bool UseDeferred = SetIteration > 0;
       PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses;
 
-      for (auto A : AS) {
-        Value *Ptr = A.getValue();
-        bool IsWrite = S.count(MemAccessInfo(Ptr, true));
-
-        // If we're using the deferred access set, then it contains only reads.
-        bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite;
-        if (UseDeferred && !IsReadOnlyPtr)
-          continue;
-        // Otherwise, the pointer must be in the PtrAccessSet, either as a read
-        // or a write.
-        assert(((IsReadOnlyPtr && UseDeferred) || IsWrite ||
-                 S.count(MemAccessInfo(Ptr, false))) &&
-               "Alias-set pointer not in the access set?");
-
-        MemAccessInfo Access(Ptr, IsWrite);
-        DepCands.insert(Access);
-
-        // Memorize read-only pointers for later processing and skip them in the
-        // first round (they need to be checked after we have seen all write
-        // pointers). Note: we also mark pointer that are not consecutive as
-        // "read-only" pointers (so that we check "a[b[i]] +="). Hence, we need
-        // the second check for "!IsWrite".
-        if (!UseDeferred && IsReadOnlyPtr) {
-          DeferredAccesses.insert(Access);
-          continue;
-        }
-
-        // If this is a write - check other reads and writes for conflicts.  If
-        // this is a read only check other writes for conflicts (but only if
-        // there is no other write to the ptr - this is an optimization to
-        // catch "a[i] = a[i] + " without having to do a dependence check).
-        if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
-          CheckDeps.insert(Access);
-          IsRTCheckNeeded = true;
-        }
-
-        if (IsWrite)
-          SetHasWrite = true;
+      for (auto AV : AS) {
+        Value *Ptr = AV.getValue();
 
-        // Create sets of pointers connected by a shared alias set and
-        // underlying object.
-        typedef SmallVector<Value *, 16> ValueVector;
-        ValueVector TempObjects;
-        GetUnderlyingObjects(Ptr, TempObjects, DL);
-        for (Value *UnderlyingObj : TempObjects) {
-          UnderlyingObjToAccessMap::iterator Prev =
-            ObjToLastAccess.find(UnderlyingObj);
-          if (Prev != ObjToLastAccess.end())
-            DepCands.unionSets(Access, Prev->second);
+        // For a single memory access in AliasSetTracker, Accesses may contain
+        // both read and write, and they both need to be handled for CheckDeps.
+        for (auto AC : S) {
+          if (AC.getPointer() != Ptr)
+            continue;
+
+          bool IsWrite = AC.getInt();
+
+          // If we're using the deferred access set, then it contains only
+          // reads.
+          bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite;
+          if (UseDeferred && !IsReadOnlyPtr)
+            continue;
+          // Otherwise, the pointer must be in the PtrAccessSet, either as a
+          // read or a write.
+          assert(((IsReadOnlyPtr && UseDeferred) || IsWrite ||
+                  S.count(MemAccessInfo(Ptr, false))) &&
+                 "Alias-set pointer not in the access set?");
+
+          MemAccessInfo Access(Ptr, IsWrite);
+          DepCands.insert(Access);
+
+          // Memorize read-only pointers for later processing and skip them in
+          // the first round (they need to be checked after we have seen all
+          // write pointers). Note: we also mark pointer that are not
+          // consecutive as "read-only" pointers (so that we check
+          // "a[b[i]] +="). Hence, we need the second check for "!IsWrite".
+          if (!UseDeferred && IsReadOnlyPtr) {
+            DeferredAccesses.insert(Access);
+            continue;
+          }
+
+          // If this is a write - check other reads and writes for conflicts. If
+          // this is a read only check other writes for conflicts (but only if
+          // there is no other write to the ptr - this is an optimization to
+          // catch "a[i] = a[i] + " without having to do a dependence check).
+          if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
+            CheckDeps.insert(Access);
+            IsRTCheckNeeded = true;
+          }
+
+          if (IsWrite)
+            SetHasWrite = true;
+
+          // Create sets of pointers connected by a shared alias set and
+          // underlying object.
+          typedef SmallVector<Value *, 16> ValueVector;
+          ValueVector TempObjects;
+          GetUnderlyingObjects(Ptr, TempObjects, DL);
+          for (Value *UnderlyingObj : TempObjects) {
+            UnderlyingObjToAccessMap::iterator Prev =
+                ObjToLastAccess.find(UnderlyingObj);
+            if (Prev != ObjToLastAccess.end())
+              DepCands.unionSets(Access, Prev->second);
 
-          ObjToLastAccess[UnderlyingObj] = Access;
+            ObjToLastAccess[UnderlyingObj] = Access;
+          }
         }
       }
     }

Added: llvm/trunk/test/Transforms/LoopVectorize/loop-vect-memdep.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/loop-vect-memdep.ll?rev=225159&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/loop-vect-memdep.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/loop-vect-memdep.ll Mon Jan  5 04:08:58 2015
@@ -0,0 +1,26 @@
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; RUN: opt < %s -S -loop-vectorize -debug-only=loop-vectorize 2>&1 | FileCheck %s
+
+; CHECK: LV: Can't vectorize due to memory conflicts
+
+define void @test_loop_novect(double** %arr, i64 %n) {
+for.body.lr.ph:
+  %t = load double** %arr, align 8
+  br label %for.body
+
+for.body:                                      ; preds = %for.body, %for.body.lr.ph
+  %i = phi i64 [ 0, %for.body.lr.ph ], [ %i.next, %for.body ]
+  %a = getelementptr inbounds double* %t, i64 %i
+  %i.next = add nuw nsw i64 %i, 1
+  %a.next = getelementptr inbounds double* %t, i64 %i.next
+  %t1 = load double* %a, align 8
+  %t2 = load double* %a.next, align 8
+  store double %t1, double* %a.next, align 8
+  store double %t2, double* %a, align 8
+  %c = icmp eq i64 %i, %n
+  br i1 %c, label %final, label %for.body
+
+final:                                   ; preds = %for.body
+  ret void
+}





More information about the llvm-commits mailing list