[llvm] 77b2bb5 - [LAA] Use type sizes when determining dependence.

Wed Dec 8 07:01:10 PST 2021

Author: Jolanta Jensen
Date: 2021-12-08T15:00:58Z
New Revision: 77b2bb55671ab0f2daa84f48db7947dc58f065f2

URL: https://github.com/llvm/llvm-project/commit/77b2bb55671ab0f2daa84f48db7947dc58f065f2
DIFF: https://github.com/llvm/llvm-project/commit/77b2bb55671ab0f2daa84f48db7947dc58f065f2.diff

LOG: [LAA] Use type sizes when determining dependence.

In the isDependence function the code does not try hard enough
to determine the dependence between types. If the types are
different it simply gives up, whereas in fact what we really
care about are the type sizes. I've changed the code to compare
sizes instead of types.

Reviewed By: fhahn, sdesmalen

Differential Revision: https://reviews.llvm.org/D108763

Added: 
    llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll

Modified: 
    llvm/lib/Analysis/LoopAccessAnalysis.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 19a24ac6a484a..6444518dc70c8 100644

--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -1568,11 +1568,12 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
 
   auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
   uint64_t TypeByteSize = DL.getTypeAllocSize(ATy);
+  bool HasSameSize =
+      DL.getTypeStoreSizeInBits(ATy) == DL.getTypeStoreSizeInBits(BTy);
   uint64_t Stride = std::abs(StrideAPtr);
   const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
   if (!C) {
-    if (!isa<SCEVCouldNotCompute>(Dist) &&
-        TypeByteSize == DL.getTypeAllocSize(BTy) &&
+    if (!isa<SCEVCouldNotCompute>(Dist) && HasSameSize &&
         isSafeDependenceDistance(DL, *(PSE.getSE()),
                                  *(PSE.getBackedgeTakenCount()), *Dist, Stride,
                                  TypeByteSize))
@@ -1587,7 +1588,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
   int64_t Distance = Val.getSExtValue();
 
   // Attempt to prove strided accesses independent.
-  if (std::abs(Distance) > 0 && Stride > 1 && ATy == BTy &&
+  if (std::abs(Distance) > 0 && Stride > 1 && HasSameSize &&
       areStridedAccessesIndependent(std::abs(Distance), Stride, TypeByteSize)) {
     LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
     return Dependence::NoDep;
@@ -1598,7 +1599,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
     bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
     if (IsTrueDataDependence && EnableForwardingConflictDetection &&
         (couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) ||
-         ATy != BTy)) {
+         !HasSameSize)) {
       LLVM_DEBUG(dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");
       return Dependence::ForwardButPreventsForwarding;
     }
@@ -1608,21 +1609,19 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
   }
 
   // Write to the same location with the same size.
-  // Could be improved to assert type sizes are the same (i32 == float, etc).
   if (Val == 0) {
-    if (ATy == BTy)
+    if (HasSameSize)
       return Dependence::Forward;
     LLVM_DEBUG(
-        dbgs() << "LAA: Zero dependence 
diff erence but 
diff erent types\n");
+        dbgs() << "LAA: Zero dependence 
diff erence but 
diff erent type sizes\n");
     return Dependence::Unknown;
   }
 
   assert(Val.isStrictlyPositive() && "Expect a positive value");
 
-  if (ATy != BTy) {
-    LLVM_DEBUG(
-        dbgs()
-        << "LAA: ReadWrite-Write positive dependency with 
diff erent types\n");
+  if (!HasSameSize) {
+    LLVM_DEBUG(dbgs() << "LAA: ReadWrite-Write positive dependency with "
+                         "
diff erent type sizes\n");
     return Dependence::Unknown;
   }
 

diff  --git a/llvm/test/Analysis/LoopAccessAnalysis/depend_
diff _types.ll b/llvm/test/Analysis/LoopAccessAnalysis/depend_
diff _types.ll
new file mode 100644
index 0000000000000..9cdc5a8e27de5
--- /dev/null
+++ b/llvm/test/Analysis/LoopAccessAnalysis/depend_
diff _types.ll
@@ -0,0 +1,185 @@
+; RUN: opt -S -disable-output -passes='require<scalar-evolution>,require<aa>,loop(print-access-info)' < %s 2>&1 | FileCheck %s
+
+
+; In the function below some of the accesses are done as float types and some
+; are done as i32 types. When doing dependence analysis the type should not
+; matter if it can be determined that they are the same size.
+
+%int_pair = type { i32, i32 }
+
+; CHECK-LABEL: function 'backdep_type_size_equivalence':
+; CHECK-NEXT:    loop:
+; CHECK-NEXT:      Memory dependences are safe with a maximum dependence distance of 800 bytes
+; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:        Forward:
+; CHECK-NEXT:            %ld.f32 = load float, float* %gep.iv.f32, align 8 ->
+; CHECK-NEXT:            store i32 %indvars.iv.i32, i32* %gep.iv, align 8
+; CHECK-EMPTY:
+; CHECK-NEXT:        Forward:
+; CHECK-NEXT:            %ld.f32 = load float, float* %gep.iv.f32, align 8 ->
+; CHECK-NEXT:            store float %val, float* %gep.iv.min.100.f32, align 8
+; CHECK-EMPTY:
+; CHECK-NEXT:        BackwardVectorizable:
+; CHECK-NEXT:            store float %val, float* %gep.iv.min.100.f32, align 8 ->
+; CHECK-NEXT:            store i32 %indvars.iv.i32, i32* %gep.iv, align 8
+; CHECK-EMPTY:
+; CHECK-NEXT:        Run-time memory checks:
+; CHECK-NEXT:        Grouped accesses:
+
+define void @backdep_type_size_equivalence(%int_pair* nocapture %vec, i64 %n) {
+entry:
+  br label %loop
+
+loop:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %loop ]
+
+  ;; Load from vec[indvars.iv].x as float
+  %gep.iv = getelementptr inbounds %int_pair, %int_pair* %vec, i64 %indvars.iv, i32 0
+  %gep.iv.f32 = bitcast i32* %gep.iv to float*
+  %ld.f32 = load float, float* %gep.iv.f32, align 8
+  %val = fmul fast float %ld.f32, 5.0
+
+  ;; Store to vec[indvars.iv - 100].x as float
+  %indvars.iv.min.100 = add nsw i64 %indvars.iv, -100
+  %gep.iv.min.100 = getelementptr inbounds %int_pair, %int_pair* %vec, i64 %indvars.iv.min.100, i32 0
+  %gep.iv.min.100.f32 = bitcast i32* %gep.iv.min.100 to float*
+  store float %val, float* %gep.iv.min.100.f32, align 8
+
+  ;; Store to vec[indvars.iv].x as i32, creating a backward dependency between
+  ;; the two stores with 
diff erent element types but the same element size.
+  %indvars.iv.i32 = trunc i64 %indvars.iv to i32
+  store i32 %indvars.iv.i32, i32* %gep.iv, align 8
+
+  ;; Store to vec[indvars.iv].y as i32, strided accesses should be independent
+  ;; between the two stores with 
diff erent element types but the same element size.
+  %gep.iv.1 = getelementptr inbounds %int_pair, %int_pair* %vec, i64 %indvars.iv, i32 1
+  store i32 %indvars.iv.i32, i32* %gep.iv.1, align 8
+
+  ;; Store to vec[indvars.iv + n].y as i32, to verify no dependence in the case
+  ;; of unknown dependence distance.
+  %indvars.iv.n = add nuw nsw i64 %indvars.iv, %n
+  %gep.iv.n = getelementptr inbounds %int_pair, %int_pair* %vec, i64 %indvars.iv.n, i32 1
+  store i32 %indvars.iv.i32, i32* %gep.iv.n, align 8
+
+  ;; Loop condition.
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %cond = icmp eq i64 %indvars.iv.next, %n
+  br i1 %cond, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; In the function below one of the accesses is done as i19 type, which has a
+; 
diff erent store size than the i32 type, even though their alloc sizes are
+; equivalent. This is a negative test to ensure that they are not analyzed as
+; in the tests above.
+;
+; CHECK-LABEL: function 'backdep_type_store_size_equivalence':
+; CHECK-NEXT:    loop:
+; CHECK-NEXT:      Report: unsafe dependent memory operations in loop.
+; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:        Unknown:
+; CHECK-NEXT:            %ld.f32 = load float, float* %gep.iv.f32, align 8 ->
+; CHECK-NEXT:            store i19 %indvars.iv.i19, i19* %gep.iv.i19, align 8
+; CHECK-EMPTY:
+; CHECK-NEXT:        Run-time memory checks:
+; CHECK-NEXT:        Grouped accesses:
+
+define void @backdep_type_store_size_equivalence(%int_pair* nocapture %vec, i64 %n) {
+entry:
+  br label %loop
+
+loop:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %loop ]
+
+  ;; Load from vec[indvars.iv].x as float
+  %gep.iv = getelementptr inbounds %int_pair, %int_pair* %vec, i64 %indvars.iv, i32 0
+  %gep.iv.f32 = bitcast i32* %gep.iv to float*
+  %ld.f32 = load float, float* %gep.iv.f32, align 8
+  %val = fmul fast float %ld.f32, 5.0
+
+  ;; Store to vec[indvars.iv].x as i19.
+  %indvars.iv.i19 = trunc i64 %indvars.iv to i19
+  %gep.iv.i19 = bitcast i32* %gep.iv to i19*
+  store i19 %indvars.iv.i19, i19* %gep.iv.i19, align 8
+
+  ;; Loop condition.
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %cond = icmp eq i64 %indvars.iv.next, %n
+  br i1 %cond, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; In the function below some of the accesses are done as double types and some
+; are done as i64 and i32 types. This is a negative test to ensure that they
+; are not analyzed as in the tests above.
+
+; CHECK-LABEL: function 'neg_dist_dep_type_size_equivalence':
+; CHECK-NEXT:    loop:
+; CHECK-NEXT:      Report: unsafe dependent memory operations in loop.
+; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:        Unknown:
+; CHECK-NEXT:            %ld.i64 = load i64, i64* %gep.iv, align 8 ->
+; CHECK-NEXT:		         store i32 %ld.i64.i32, i32* %gep.iv.n.i32, align 8
+; CHECK-EMPTY:
+; CHECK-NEXT:        ForwardButPreventsForwarding:
+; CHECK-NEXT:            store double %val, double* %gep.iv.101.f64, align 8 ->
+; CHECK-NEXT:            %ld.i64 = load i64, i64* %gep.iv, align 8
+; CHECK-EMPTY:
+; CHECK-NEXT:        Unknown:
+; CHECK-NEXT:            %ld.f64 = load double, double* %gep.iv.f64, align 8 ->
+; CHECK-NEXT:		         store i32 %ld.i64.i32, i32* %gep.iv.n.i32, align 8
+; CHECK-EMPTY:
+; CHECK-NEXT:        BackwardVectorizableButPreventsForwarding:
+; CHECK-NEXT:            %ld.f64 = load double, double* %gep.iv.f64, align 8 ->
+; CHECK-NEXT:            store double %val, double* %gep.iv.101.f64, align 8
+; CHECK-EMPTY:
+; CHECK-NEXT:        Unknown:
+; CHECK-NEXT:            store double %val, double* %gep.iv.101.f64, align 8 ->
+; CHECK-NEXT:		         store i32 %ld.i64.i32, i32* %gep.iv.n.i32, align 8
+; CHECK-EMPTY:
+; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Grouped accesses:
+
+define void @neg_dist_dep_type_size_equivalence(i64* nocapture %vec, i64 %n) {
+entry:
+  br label %loop
+
+loop:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %loop ]
+
+  ;; Load from vec[indvars.iv] as double
+  %gep.iv = getelementptr i64, i64* %vec, i64 %indvars.iv
+  %gep.iv.f64 = bitcast i64* %gep.iv to double*
+  %ld.f64 = load double, double* %gep.iv.f64, align 8
+  %val = fmul fast double %ld.f64, 5.0
+
+  ;; Store to vec[indvars.iv + 101] as double
+  %indvars.iv.101 = add nsw i64 %indvars.iv, 101
+  %gep.iv.101.i64 = getelementptr i64, i64* %vec, i64 %indvars.iv.101
+  %gep.iv.101.f64 = bitcast i64* %gep.iv.101.i64 to double*
+  store double %val, double* %gep.iv.101.f64, align 8
+
+  ;; Read from vec[indvars.iv] as i64 creating
+  ;; a forward but prevents forwarding dependence
+  ;; with 
diff erent types but same sizes.
+  %ld.i64 = load i64, i64* %gep.iv, align 8
+
+  ;; Different sizes
+  %indvars.iv.n = add nuw nsw i64 %indvars.iv, %n
+  %gep.iv.n.i64 = getelementptr inbounds i64, i64* %vec, i64 %indvars.iv.n
+  %gep.iv.n.i32 = bitcast i64* %gep.iv.n.i64 to i32*
+  %ld.i64.i32 = trunc i64 %ld.i64 to i32
+  store i32 %ld.i64.i32, i32* %gep.iv.n.i32, align 8
+
+  ;; Loop condition.
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %cond = icmp eq i64 %indvars.iv.next, %n
+  br i1 %cond, label %exit, label %loop
+
+exit:
+  ret void
+}