[llvm] [DA] Cache delinearization results. NFCI. (PR #164379)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 21 02:14:50 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-analysis
Author: Sjoerd Meijer (sjoerdmeijer)
<details>
<summary>Changes</summary>
An instruction can appear in multiple source-destination dependency pairs. If this is the case, delinearization is requested and recomputed for the same instruction again and again. Instead, cache the delinearization and query the cache first before computing it. I made this observation while going through debug logs for DA, and wanted to test whether you like this idea or not before I try to measure whether this has a compile-time benefit, which is of course the reason to do this.
I was just looking at this example:
```
loop:
%i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
%subscript.0 = mul i64 %mk, %i
%subscript.1 = add i64 %subscript.0, %kk.inc
%idx.0 = getelementptr i8, ptr %a, i64 %subscript.0 ; a[-k * i]
%idx.1 = getelementptr i8, ptr %a, i64 %subscript.1 ; a[-k * i + (2 * k + 1)]
store i8 42, ptr %idx.0
store i8 42, ptr %idx.1
%i.next = add i64 %i, 1
%cond.exit = icmp eq i64 %i.next, 3
br i1 %cond.exit, label %exit, label %loop
```
and noticed that we delinearize first this:
```
Src: store i8 42, ptr %idx.0, align 1 --> Dst: store i8 42, ptr %idx.0, align 1
da analyze - SrcSCEV = {%a,+,(-1 * %k)}<%loop>
DstSCEV = {%a,+,(-1 * %k)}<%loop>
GEP to delinearize: %idx.0 = getelementptr i8, ptr %a, i64 %subscript.0
```
then this:
```
Src: store i8 42, ptr %idx.0, align 1 --> Dst: store i8 42, ptr %idx.1, align 1
da analyze - SrcSCEV = {%a,+,(-1 * %k)}<%loop>
DstSCEV = {(1 + (2 * %k) + %a),+,(-1 * %k)}<%loop>
GEP to delinearize: %idx.0 = getelementptr i8, ptr %a, i64 %subscript.0
```
and then this:
```
Src: store i8 42, ptr %idx.1, align 1 --> Dst: store i8 42, ptr %idx.1, align 1
da analyze - SrcSCEV = {(1 + (2 * %k) + %a),+,(-1 * %k)}<%loop>
DstSCEV = {(1 + (2 * %k) + %a),+,(-1 * %k)}<%loop>
GEP to delinearize: %idx.1 = getelementptr i8, ptr %a, i64 %subscript.1
```
With this change, we will cache the src and dst subscripts in the first call:
```
Src: store i8 42, ptr %idx.0, align 1 --> Dst: store i8 42, ptr %idx.0, align 1
da analyze - SrcSCEV = {%a,+,(-1 * %k)}<%loop>
DstSCEV = {%a,+,(-1 * %k)}<%loop>
Cached Src subscripts
Cached Dst subscripts
```
In the second call, cache the dst:
```
Src: store i8 42, ptr %idx.0, align 1 --> Dst: store i8 42, ptr %idx.1, align 1
da analyze - SrcSCEV = {%a,+,(-1 * %k)}<%loop>
DstSCEV = {(1 + (2 * %k) + %a),+,(-1 * %k)}<%loop>
Cached Dst subscripts
```
and the third call has a cache hit for both the dst and src:
```
Src: store i8 42, ptr %idx.1, align 1 --> Dst: store i8 42, ptr %idx.1, align 1
da analyze - SrcSCEV = {(1 + (2 * %k) + %a),+,(-1 * %k)}<%loop>
DstSCEV = {(1 + (2 * %k) + %a),+,(-1 * %k)}<%loop>
Delinearization cache hit for both Src and Dst
```
---
Full diff: https://github.com/llvm/llvm-project/pull/164379.diff
2 Files Affected:
- (modified) llvm/include/llvm/Analysis/DependenceAnalysis.h (+6)
- (modified) llvm/lib/Analysis/DependenceAnalysis.cpp (+31-5)
``````````diff
diff --git a/llvm/include/llvm/Analysis/DependenceAnalysis.h b/llvm/include/llvm/Analysis/DependenceAnalysis.h
index 18a8f8aabb44a..04fa9ad0774bd 100644
--- a/llvm/include/llvm/Analysis/DependenceAnalysis.h
+++ b/llvm/include/llvm/Analysis/DependenceAnalysis.h
@@ -420,6 +420,12 @@ class DependenceInfo {
Function *F;
SmallVector<const SCEVPredicate *, 4> Assumptions;
+ /// Cache for delinearized subscripts to avoid recomputation.
+ /// Maps (Instruction, Loop, AccessFn) -> Subscripts
+ DenseMap<std::tuple<Instruction *, Loop *, const SCEV *>,
+ SmallVector<const SCEV *, 4>>
+ DelinearizationCache;
+
/// Subscript - This private struct represents a pair of subscripts from
/// a pair of potentially multi-dimensional array references. We use a
/// vector of them to guide subscript partitioning.
diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp
index 805b6820e1e1c..7e413c65a71a6 100644
--- a/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -3463,11 +3463,37 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts;
- if (!tryDelinearizeFixedSize(Src, Dst, SrcAccessFn, DstAccessFn,
- SrcSubscripts, DstSubscripts) &&
- !tryDelinearizeParametricSize(Src, Dst, SrcAccessFn, DstAccessFn,
- SrcSubscripts, DstSubscripts))
- return false;
+ // Check cache for both Src and Dst subscripts
+ auto SrcCacheKey = std::make_tuple(Src, SrcLoop, SrcAccessFn);
+ auto DstCacheKey = std::make_tuple(Dst, DstLoop, DstAccessFn);
+ auto SrcCacheIt = DelinearizationCache.find(SrcCacheKey);
+ auto DstCacheIt = DelinearizationCache.find(DstCacheKey);
+ bool SrcCached = (SrcCacheIt != DelinearizationCache.end());
+ bool DstCached = (DstCacheIt != DelinearizationCache.end());
+
+ if (SrcCached && DstCached) {
+ // Both are cached - use cached values and skip delinearization
+ SrcSubscripts = SrcCacheIt->second;
+ DstSubscripts = DstCacheIt->second;
+ LLVM_DEBUG(dbgs() << " Delinearization cache hit for both Src and Dst\n");
+ } else {
+ // At least one is not cached - need to compute both
+ if (!tryDelinearizeFixedSize(Src, Dst, SrcAccessFn, DstAccessFn,
+ SrcSubscripts, DstSubscripts) &&
+ !tryDelinearizeParametricSize(Src, Dst, SrcAccessFn, DstAccessFn,
+ SrcSubscripts, DstSubscripts))
+ return false;
+
+ // Cache the results
+ if (!SrcCached) {
+ DelinearizationCache[SrcCacheKey] = SrcSubscripts;
+ LLVM_DEBUG(dbgs() << " Cached Src subscripts\n");
+ }
+ if (!DstCached) {
+ DelinearizationCache[DstCacheKey] = DstSubscripts;
+ LLVM_DEBUG(dbgs() << " Cached Dst subscripts\n");
+ }
+ }
assert(isLoopInvariant(SrcBase, SrcLoop) &&
isLoopInvariant(DstBase, DstLoop) &&
``````````
</details>
https://github.com/llvm/llvm-project/pull/164379
More information about the llvm-commits
mailing list