[llvm] 05ccde8 - [LoopCacheAnalysis] Fix a type mismatch problem in cost calculation

Wed Jul 20 22:58:13 PDT 2022

Author: Congzhe Cao
Date: 2022-07-21T01:57:05-04:00
New Revision: 05ccde8023a6176927454f478730d29e53104c05

URL: https://github.com/llvm/llvm-project/commit/05ccde8023a6176927454f478730d29e53104c05
DIFF: https://github.com/llvm/llvm-project/commit/05ccde8023a6176927454f478730d29e53104c05.diff

LOG: [LoopCacheAnalysis] Fix a type mismatch problem in cost calculation

There is a problem in loop cache analysis that the types of SCEV variables
`Coeff` and `ElemSize` in function `isConsecutive()` may not match. The
mismatch would cause SCEV failures when `Coeff` is multiplied with `ElemSize`.

The fix in this patch is to extend the type of both `Coeff` and `ElemSize` to
whichever is wider in those two variables. As a clean-up, duplicate calculations
of `Stride` in `computeRefCost()` is then removed.

Reviewed By: Meinersbur, #loopoptwg

Differential Revision: https://reviews.llvm.org/D128877

Added: 
    

Modified: 
    llvm/include/llvm/Analysis/LoopCacheAnalysis.h
    llvm/lib/Analysis/LoopCacheAnalysis.cpp
    llvm/test/Analysis/LoopCacheAnalysis/PowerPC/compute-cost.ll
    llvm/test/Analysis/LoopCacheAnalysis/compute-cost.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/LoopCacheAnalysis.h b/llvm/include/llvm/Analysis/LoopCacheAnalysis.h
index 4c5083f3c9803..a323cacdbcdca 100644

--- a/llvm/include/llvm/Analysis/LoopCacheAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopCacheAnalysis.h
@@ -108,8 +108,9 @@ class IndexedReference {
   /// Return true if the indexed reference is 'consecutive' in loop \p L.
   /// An indexed reference is 'consecutive' if the only coefficient that uses
   /// the loop induction variable is the rightmost one, and the access stride is
-  /// smaller than the cache line size \p CLS.
-  bool isConsecutive(const Loop &L, unsigned CLS) const;
+  /// smaller than the cache line size \p CLS. Provide a valid \p Stride value
+  /// if the indexed reference is 'consecutive'.
+  bool isConsecutive(const Loop &L, const SCEV *&Stride, unsigned CLS) const;
 
   /// Retrieve the index of the subscript corresponding to the given loop \p
   /// L. Return a zero-based positive index if the subscript index is

diff  --git a/llvm/lib/Analysis/LoopCacheAnalysis.cpp b/llvm/lib/Analysis/LoopCacheAnalysis.cpp
index 2cbf1f7f2d28e..85f2dad867115 100644
--- a/llvm/lib/Analysis/LoopCacheAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopCacheAnalysis.cpp
@@ -289,18 +289,14 @@ CacheCostTy IndexedReference::computeRefCost(const Loop &L,
   LLVM_DEBUG(dbgs() << "TripCount=" << *TripCount << "\n");
 
   const SCEV *RefCost = nullptr;
-  if (isConsecutive(L, CLS)) {
+  const SCEV *Stride = nullptr;
+  if (isConsecutive(L, Stride, CLS)) {
     // If the indexed reference is 'consecutive' the cost is
     // (TripCount*Stride)/CLS.
-    const SCEV *Coeff = getLastCoefficient();
-    const SCEV *ElemSize = Sizes.back();
-    assert(Coeff->getType() == ElemSize->getType() &&
-           "Expecting the same type");
-    const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize);
+    assert(Stride != nullptr &&
+           "Stride should not be null for consecutive access!");
     Type *WiderType = SE.getWiderType(Stride->getType(), TripCount->getType());
     const SCEV *CacheLineSize = SE.getConstant(WiderType, CLS);
-    if (SE.isKnownNegative(Stride))
-      Stride = SE.getNegativeSCEV(Stride);
     Stride = SE.getNoopOrAnyExtend(Stride, WiderType);
     TripCount = SE.getNoopOrAnyExtend(TripCount, WiderType);
     const SCEV *Numerator = SE.getMulExpr(Stride, TripCount);
@@ -464,7 +460,8 @@ bool IndexedReference::isLoopInvariant(const Loop &L) const {
   return allCoeffForLoopAreZero;
 }
 
-bool IndexedReference::isConsecutive(const Loop &L, unsigned CLS) const {
+bool IndexedReference::isConsecutive(const Loop &L, const SCEV *&Stride,
+                                     unsigned CLS) const {
   // The indexed reference is 'consecutive' if the only coefficient that uses
   // the loop induction variable is the last one...
   const SCEV *LastSubscript = Subscripts.back();
@@ -478,7 +475,19 @@ bool IndexedReference::isConsecutive(const Loop &L, unsigned CLS) const {
   // ...and the access stride is less than the cache line size.
   const SCEV *Coeff = getLastCoefficient();
   const SCEV *ElemSize = Sizes.back();
-  const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize);
+  Type *WiderType = SE.getWiderType(Coeff->getType(), ElemSize->getType());
+  // FIXME: This assumes that all values are signed integers which may
+  // be incorrect in unusual codes and incorrectly use sext instead of zext.
+  // for (uint32_t i = 0; i < 512; ++i) {
+  //   uint8_t trunc = i;
+  //   A[trunc] = 42;
+  // }
+  // This consecutively iterates twice over A. If `trunc` is sign-extended,
+  // we would conclude that this may iterate backwards over the array.
+  // However, LoopCacheAnalysis is heuristic anyway and transformations must
+  // not result in wrong optimizations if the heuristic was incorrect.
+  Stride = SE.getMulExpr(SE.getNoopOrSignExtend(Coeff, WiderType),
+                         SE.getNoopOrSignExtend(ElemSize, WiderType));
   const SCEV *CacheLineSize = SE.getConstant(Stride->getType(), CLS);
 
   Stride = SE.isKnownNegative(Stride) ? SE.getNegativeSCEV(Stride) : Stride;

diff  --git a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/compute-cost.ll b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/compute-cost.ll
index 1cf42ac758797..96b4d723b83f3 100644
--- a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/compute-cost.ll
+++ b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/compute-cost.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -passes='print<loop-cache-cost>' -disable-output 2>&1 | FileCheck %s
+; RUN: opt < %s -opaque-pointers -passes='print<loop-cache-cost>' -disable-output 2>&1 | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-n32:64"
 target triple = "powerpc64le-unknown-linux-gnu"
@@ -33,7 +33,54 @@ for.end:                                          ; preds = %for.cond
   ret void
 }
 
+; Check IndexedReference::computeRefCost can handle type 
diff erences between
+; Coeff and ElemSize.
+
+; CHECK: Loop 'for.cond' has cost = 100000000
+; CHECK: Loop 'for.cond1' has cost = 1000000
+; CHECK: Loop 'for.cond5' has cost = 30000
+
+ at data = external dso_local global [2 x [4 x [18 x i32]]], align 1
+
+define dso_local void @handle_to_ptr_2(i1 %b0, i1 %b1, i1 %b2) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %i.0 = phi i16 [ 0, %entry ], [ %inc18, %for.inc17 ]
+  %idxprom = zext i16 %i.0 to i32
+  br i1 %b2, label %for.end19, label %for.cond1
+
+for.cond1:
+  %j.0 = phi i16 [ %inc15, %for.inc14 ], [ 0, %for.cond ]
+  br i1 %b1, label %for.inc17, label %for.cond5.preheader
 
+for.cond5.preheader:
+  %idxprom10 = zext i16 %j.0 to i32
+  br label %for.cond5
+
+for.cond5:
+  %k.0 = phi i16 [ %inc, %for.inc ], [ 0, %for.cond5.preheader ]
+  br i1 %b0, label %for.inc14, label %for.inc
+
+for.inc:
+  %idxprom12 = zext i16 %k.0 to i32
+  %arrayidx13 = getelementptr inbounds [2 x [4 x [18 x i32]]], ptr @data, i32 0, i32 %idxprom, i32 %idxprom10, i32 %idxprom12
+  store i32 7, ptr %arrayidx13, align 1
+  %inc = add nuw nsw i16 %k.0, 1
+  br label %for.cond5
+
+for.inc14:
+  %inc15 = add nuw nsw i16 %j.0, 1
+  br label %for.cond1
+
+for.inc17:
+  %inc18 = add nuw nsw i16 %i.0, 1
+  br label %for.cond
+
+for.end19:
+  ret void
+}
 
 ; Check IndexedReference::computeRefCost can handle negative stride
 

diff  --git a/llvm/test/Analysis/LoopCacheAnalysis/compute-cost.ll b/llvm/test/Analysis/LoopCacheAnalysis/compute-cost.ll
index 68ea016194395..d4584fd010dcf 100644
--- a/llvm/test/Analysis/LoopCacheAnalysis/compute-cost.ll
+++ b/llvm/test/Analysis/LoopCacheAnalysis/compute-cost.ll
@@ -35,7 +35,56 @@ for.end:                                          ; preds = %for.cond
   ret void
 }
 
+; Check IndexedReference::computeRefCost can handle type 
diff erences between
+; Coeff and ElemSize.
+
+; SMALLER-CACHELINE: Loop 'for.cond' has cost = 100000000
+; SMALLER-CACHELINE: Loop 'for.cond1' has cost = 1000000
+; SMALLER-CACHELINE: Loop 'for.cond5' has cost = 120000
+; LARGER-CACHELINE: Loop 'for.cond' has cost = 100000000
+; LARGER-CACHELINE: Loop 'for.cond1' has cost = 1000000
+; LARGER-CACHELINE: Loop 'for.cond5' has cost = 10000
+ at data = external dso_local global [2 x [4 x [18 x i32]]], align 1
+
+define dso_local void @handle_to_ptr_2(i1 %b0, i1 %b1, i1 %b2) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %i.0 = phi i16 [ 0, %entry ], [ %inc18, %for.inc17 ]
+  %idxprom = zext i16 %i.0 to i32
+  br i1 %b2, label %for.end19, label %for.cond1
+
+for.cond1:
+  %j.0 = phi i16 [ %inc15, %for.inc14 ], [ 0, %for.cond ]
+  br i1 %b1, label %for.inc17, label %for.cond5.preheader
+
+for.cond5.preheader:
+  %idxprom10 = zext i16 %j.0 to i32
+  br label %for.cond5
 
+for.cond5:
+  %k.0 = phi i16 [ %inc, %for.inc ], [ 0, %for.cond5.preheader ]
+  br i1 %b0, label %for.inc14, label %for.inc
+
+for.inc:
+  %idxprom12 = zext i16 %k.0 to i32
+  %arrayidx13 = getelementptr inbounds [2 x [4 x [18 x i32]]], ptr @data, i32 0, i32 %idxprom, i32 %idxprom10, i32 %idxprom12
+  store i32 7, ptr %arrayidx13, align 1
+  %inc = add nuw nsw i16 %k.0, 1
+  br label %for.cond5
+
+for.inc14:
+  %inc15 = add nuw nsw i16 %j.0, 1
+  br label %for.cond1
+
+for.inc17:
+  %inc18 = add nuw nsw i16 %i.0, 1
+  br label %for.cond
+
+for.end19:
+  ret void
+}
 
 ; Check IndexedReference::computeRefCost can handle negative stride