[llvm] 05ccde8 - [LoopCacheAnalysis] Fix a type mismatch problem in cost calculation
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 20 22:58:13 PDT 2022
Author: Congzhe Cao
Date: 2022-07-21T01:57:05-04:00
New Revision: 05ccde8023a6176927454f478730d29e53104c05
URL: https://github.com/llvm/llvm-project/commit/05ccde8023a6176927454f478730d29e53104c05
DIFF: https://github.com/llvm/llvm-project/commit/05ccde8023a6176927454f478730d29e53104c05.diff
LOG: [LoopCacheAnalysis] Fix a type mismatch problem in cost calculation
There is a problem in loop cache analysis that the types of SCEV variables
`Coeff` and `ElemSize` in function `isConsecutive()` may not match. The
mismatch would cause SCEV failures when `Coeff` is multiplied with `ElemSize`.
The fix in this patch is to extend the type of both `Coeff` and `ElemSize` to
whichever is wider in those two variables. As a clean-up, duplicate calculations
of `Stride` in `computeRefCost()` is then removed.
Reviewed By: Meinersbur, #loopoptwg
Differential Revision: https://reviews.llvm.org/D128877
Added:
Modified:
llvm/include/llvm/Analysis/LoopCacheAnalysis.h
llvm/lib/Analysis/LoopCacheAnalysis.cpp
llvm/test/Analysis/LoopCacheAnalysis/PowerPC/compute-cost.ll
llvm/test/Analysis/LoopCacheAnalysis/compute-cost.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Analysis/LoopCacheAnalysis.h b/llvm/include/llvm/Analysis/LoopCacheAnalysis.h
index 4c5083f3c9803..a323cacdbcdca 100644
--- a/llvm/include/llvm/Analysis/LoopCacheAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopCacheAnalysis.h
@@ -108,8 +108,9 @@ class IndexedReference {
/// Return true if the indexed reference is 'consecutive' in loop \p L.
/// An indexed reference is 'consecutive' if the only coefficient that uses
/// the loop induction variable is the rightmost one, and the access stride is
- /// smaller than the cache line size \p CLS.
- bool isConsecutive(const Loop &L, unsigned CLS) const;
+ /// smaller than the cache line size \p CLS. Provide a valid \p Stride value
+ /// if the indexed reference is 'consecutive'.
+ bool isConsecutive(const Loop &L, const SCEV *&Stride, unsigned CLS) const;
/// Retrieve the index of the subscript corresponding to the given loop \p
/// L. Return a zero-based positive index if the subscript index is
diff --git a/llvm/lib/Analysis/LoopCacheAnalysis.cpp b/llvm/lib/Analysis/LoopCacheAnalysis.cpp
index 2cbf1f7f2d28e..85f2dad867115 100644
--- a/llvm/lib/Analysis/LoopCacheAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopCacheAnalysis.cpp
@@ -289,18 +289,14 @@ CacheCostTy IndexedReference::computeRefCost(const Loop &L,
LLVM_DEBUG(dbgs() << "TripCount=" << *TripCount << "\n");
const SCEV *RefCost = nullptr;
- if (isConsecutive(L, CLS)) {
+ const SCEV *Stride = nullptr;
+ if (isConsecutive(L, Stride, CLS)) {
// If the indexed reference is 'consecutive' the cost is
// (TripCount*Stride)/CLS.
- const SCEV *Coeff = getLastCoefficient();
- const SCEV *ElemSize = Sizes.back();
- assert(Coeff->getType() == ElemSize->getType() &&
- "Expecting the same type");
- const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize);
+ assert(Stride != nullptr &&
+ "Stride should not be null for consecutive access!");
Type *WiderType = SE.getWiderType(Stride->getType(), TripCount->getType());
const SCEV *CacheLineSize = SE.getConstant(WiderType, CLS);
- if (SE.isKnownNegative(Stride))
- Stride = SE.getNegativeSCEV(Stride);
Stride = SE.getNoopOrAnyExtend(Stride, WiderType);
TripCount = SE.getNoopOrAnyExtend(TripCount, WiderType);
const SCEV *Numerator = SE.getMulExpr(Stride, TripCount);
@@ -464,7 +460,8 @@ bool IndexedReference::isLoopInvariant(const Loop &L) const {
return allCoeffForLoopAreZero;
}
-bool IndexedReference::isConsecutive(const Loop &L, unsigned CLS) const {
+bool IndexedReference::isConsecutive(const Loop &L, const SCEV *&Stride,
+ unsigned CLS) const {
// The indexed reference is 'consecutive' if the only coefficient that uses
// the loop induction variable is the last one...
const SCEV *LastSubscript = Subscripts.back();
@@ -478,7 +475,19 @@ bool IndexedReference::isConsecutive(const Loop &L, unsigned CLS) const {
// ...and the access stride is less than the cache line size.
const SCEV *Coeff = getLastCoefficient();
const SCEV *ElemSize = Sizes.back();
- const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize);
+ Type *WiderType = SE.getWiderType(Coeff->getType(), ElemSize->getType());
+ // FIXME: This assumes that all values are signed integers which may
+ // be incorrect in unusual codes and incorrectly use sext instead of zext.
+ // for (uint32_t i = 0; i < 512; ++i) {
+ // uint8_t trunc = i;
+ // A[trunc] = 42;
+ // }
+ // This consecutively iterates twice over A. If `trunc` is sign-extended,
+ // we would conclude that this may iterate backwards over the array.
+ // However, LoopCacheAnalysis is heuristic anyway and transformations must
+ // not result in wrong optimizations if the heuristic was incorrect.
+ Stride = SE.getMulExpr(SE.getNoopOrSignExtend(Coeff, WiderType),
+ SE.getNoopOrSignExtend(ElemSize, WiderType));
const SCEV *CacheLineSize = SE.getConstant(Stride->getType(), CLS);
Stride = SE.isKnownNegative(Stride) ? SE.getNegativeSCEV(Stride) : Stride;
diff --git a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/compute-cost.ll b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/compute-cost.ll
index 1cf42ac758797..96b4d723b83f3 100644
--- a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/compute-cost.ll
+++ b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/compute-cost.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -passes='print<loop-cache-cost>' -disable-output 2>&1 | FileCheck %s
+; RUN: opt < %s -opaque-pointers -passes='print<loop-cache-cost>' -disable-output 2>&1 | FileCheck %s
target datalayout = "e-m:e-i64:64-n32:64"
target triple = "powerpc64le-unknown-linux-gnu"
@@ -33,7 +33,54 @@ for.end: ; preds = %for.cond
ret void
}
+; Check IndexedReference::computeRefCost can handle type
diff erences between
+; Coeff and ElemSize.
+
+; CHECK: Loop 'for.cond' has cost = 100000000
+; CHECK: Loop 'for.cond1' has cost = 1000000
+; CHECK: Loop 'for.cond5' has cost = 30000
+
+ at data = external dso_local global [2 x [4 x [18 x i32]]], align 1
+
+define dso_local void @handle_to_ptr_2(i1 %b0, i1 %b1, i1 %b2) {
+entry:
+ br label %for.cond
+
+for.cond:
+ %i.0 = phi i16 [ 0, %entry ], [ %inc18, %for.inc17 ]
+ %idxprom = zext i16 %i.0 to i32
+ br i1 %b2, label %for.end19, label %for.cond1
+
+for.cond1:
+ %j.0 = phi i16 [ %inc15, %for.inc14 ], [ 0, %for.cond ]
+ br i1 %b1, label %for.inc17, label %for.cond5.preheader
+for.cond5.preheader:
+ %idxprom10 = zext i16 %j.0 to i32
+ br label %for.cond5
+
+for.cond5:
+ %k.0 = phi i16 [ %inc, %for.inc ], [ 0, %for.cond5.preheader ]
+ br i1 %b0, label %for.inc14, label %for.inc
+
+for.inc:
+ %idxprom12 = zext i16 %k.0 to i32
+ %arrayidx13 = getelementptr inbounds [2 x [4 x [18 x i32]]], ptr @data, i32 0, i32 %idxprom, i32 %idxprom10, i32 %idxprom12
+ store i32 7, ptr %arrayidx13, align 1
+ %inc = add nuw nsw i16 %k.0, 1
+ br label %for.cond5
+
+for.inc14:
+ %inc15 = add nuw nsw i16 %j.0, 1
+ br label %for.cond1
+
+for.inc17:
+ %inc18 = add nuw nsw i16 %i.0, 1
+ br label %for.cond
+
+for.end19:
+ ret void
+}
; Check IndexedReference::computeRefCost can handle negative stride
diff --git a/llvm/test/Analysis/LoopCacheAnalysis/compute-cost.ll b/llvm/test/Analysis/LoopCacheAnalysis/compute-cost.ll
index 68ea016194395..d4584fd010dcf 100644
--- a/llvm/test/Analysis/LoopCacheAnalysis/compute-cost.ll
+++ b/llvm/test/Analysis/LoopCacheAnalysis/compute-cost.ll
@@ -35,7 +35,56 @@ for.end: ; preds = %for.cond
ret void
}
+; Check IndexedReference::computeRefCost can handle type
diff erences between
+; Coeff and ElemSize.
+
+; SMALLER-CACHELINE: Loop 'for.cond' has cost = 100000000
+; SMALLER-CACHELINE: Loop 'for.cond1' has cost = 1000000
+; SMALLER-CACHELINE: Loop 'for.cond5' has cost = 120000
+; LARGER-CACHELINE: Loop 'for.cond' has cost = 100000000
+; LARGER-CACHELINE: Loop 'for.cond1' has cost = 1000000
+; LARGER-CACHELINE: Loop 'for.cond5' has cost = 10000
+ at data = external dso_local global [2 x [4 x [18 x i32]]], align 1
+
+define dso_local void @handle_to_ptr_2(i1 %b0, i1 %b1, i1 %b2) {
+entry:
+ br label %for.cond
+
+for.cond:
+ %i.0 = phi i16 [ 0, %entry ], [ %inc18, %for.inc17 ]
+ %idxprom = zext i16 %i.0 to i32
+ br i1 %b2, label %for.end19, label %for.cond1
+
+for.cond1:
+ %j.0 = phi i16 [ %inc15, %for.inc14 ], [ 0, %for.cond ]
+ br i1 %b1, label %for.inc17, label %for.cond5.preheader
+
+for.cond5.preheader:
+ %idxprom10 = zext i16 %j.0 to i32
+ br label %for.cond5
+for.cond5:
+ %k.0 = phi i16 [ %inc, %for.inc ], [ 0, %for.cond5.preheader ]
+ br i1 %b0, label %for.inc14, label %for.inc
+
+for.inc:
+ %idxprom12 = zext i16 %k.0 to i32
+ %arrayidx13 = getelementptr inbounds [2 x [4 x [18 x i32]]], ptr @data, i32 0, i32 %idxprom, i32 %idxprom10, i32 %idxprom12
+ store i32 7, ptr %arrayidx13, align 1
+ %inc = add nuw nsw i16 %k.0, 1
+ br label %for.cond5
+
+for.inc14:
+ %inc15 = add nuw nsw i16 %j.0, 1
+ br label %for.cond1
+
+for.inc17:
+ %inc18 = add nuw nsw i16 %i.0, 1
+ br label %for.cond
+
+for.end19:
+ ret void
+}
; Check IndexedReference::computeRefCost can handle negative stride
More information about the llvm-commits
mailing list