[llvm] [DA] Add option to dump delinearization result in printing analysis (PR #157859)
Ryotaro Kasuga via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 10 06:42:18 PDT 2025
https://github.com/kasuga-fj created https://github.com/llvm/llvm-project/pull/157859
None
>From bbb7ddc1a8d1269c4e529019e7e134ea9e9442fb Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Wed, 10 Sep 2025 13:16:44 +0000
Subject: [PATCH] [DA] Add option to dump delinearization result in printing
analysis
---
.../llvm/Analysis/DependenceAnalysis.h | 48 +++--
llvm/lib/Analysis/DependenceAnalysis.cpp | 202 ++++++++++++------
.../Analysis/DependenceAnalysis/DADelin.ll | 152 ++++++++++++-
3 files changed, 319 insertions(+), 83 deletions(-)
diff --git a/llvm/include/llvm/Analysis/DependenceAnalysis.h b/llvm/include/llvm/Analysis/DependenceAnalysis.h
index f66c79d915665..eec10d62f9b8a 100644
--- a/llvm/include/llvm/Analysis/DependenceAnalysis.h
+++ b/llvm/include/llvm/Analysis/DependenceAnalysis.h
@@ -284,6 +284,27 @@ class LLVM_ABI FullDependence final : public Dependence {
/// DependenceInfo - This class is the main dependence-analysis driver.
class DependenceInfo {
public:
+ /// Represents the result of delinearization computed a depends query. Since
+ /// a depends query analyzes the relationship between two memory accesses,
+ /// the delinearization result has two lists of subscript expressions (one
+ /// for each memory access), The sizes (dimensions) must be shared between
+ /// the two accesses.
+ /// TODO: It might be better to define this (or a variant of it) within
+ /// Delinearize, rather than inside DependenceAnalysis.
+ struct DelinearizedAccessesInfo {
+ /// Subscriptions for the source memory access.
+ SmallVector<const SCEV *, 4> SrcSubscripts;
+
+ /// Subscriptions for the destination memory access.
+ SmallVector<const SCEV *, 4> DstSubscripts;
+
+ /// Sizes (dimensions) shared between the two accesses.
+ SmallVector<const SCEV *, 4> Sizes;
+
+ /// Print the delinearization result.
+ void print(raw_ostream &OS, unsigned Depth) const;
+ };
+
DependenceInfo(Function *F, AAResults *AA, ScalarEvolution *SE, LoopInfo *LI)
: AA(AA), SE(SE), LI(LI), F(F) {}
@@ -298,9 +319,13 @@ class DependenceInfo {
/// solved at compilation time. By default UnderRuntimeAssumptions is false
/// for a safe approximation of the dependence relation that does not
/// require runtime checks.
+ /// If \p RecordDelinearization is provided and the delinearization process
+ /// is successful, the result is stored in \p RecordDelinearization. It's
+ /// primary for testing purposes.
LLVM_ABI std::unique_ptr<Dependence>
depends(Instruction *Src, Instruction *Dst,
- bool UnderRuntimeAssumptions = false);
+ bool UnderRuntimeAssumptions = false,
+ DelinearizedAccessesInfo *RecordDelinearization = nullptr);
/// getSplitIteration - Give a dependence that's splittable at some
/// particular level, return the iteration that should be used to split
@@ -897,25 +922,24 @@ class DependenceInfo {
/// Given a linear access function, tries to recover subscripts
/// for each dimension of the array element access.
- bool tryDelinearize(Instruction *Src, Instruction *Dst,
- SmallVectorImpl<Subscript> &Pair);
+ std::optional<DelinearizedAccessesInfo>
+ tryDelinearize(Instruction *Src, Instruction *Dst,
+ SmallVectorImpl<Subscript> &Pair);
/// Tries to delinearize \p Src and \p Dst access functions for a fixed size
/// multi-dimensional array. Calls tryDelinearizeFixedSizeImpl() to
/// delinearize \p Src and \p Dst separately,
- bool tryDelinearizeFixedSize(Instruction *Src, Instruction *Dst,
- const SCEV *SrcAccessFn, const SCEV *DstAccessFn,
- SmallVectorImpl<const SCEV *> &SrcSubscripts,
- SmallVectorImpl<const SCEV *> &DstSubscripts);
+ std::optional<DelinearizedAccessesInfo>
+ tryDelinearizeFixedSize(Instruction *Src, Instruction *Dst,
+ const SCEV *SrcAccessFn, const SCEV *DstAccessFn);
/// Tries to delinearize access function for a multi-dimensional array with
/// symbolic runtime sizes.
- /// Returns true upon success and false otherwise.
- bool
+ /// Returns delineazied result upon success and nullopt otherwise.
+ std::optional<DelinearizedAccessesInfo>
tryDelinearizeParametricSize(Instruction *Src, Instruction *Dst,
- const SCEV *SrcAccessFn, const SCEV *DstAccessFn,
- SmallVectorImpl<const SCEV *> &SrcSubscripts,
- SmallVectorImpl<const SCEV *> &DstSubscripts);
+ const SCEV *SrcAccessFn,
+ const SCEV *DstAccessFn);
/// checkSubscript - Helper function for checkSrcSubscript and
/// checkDstSubscript to avoid duplicate code
diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp
index da86a8d2cc9c0..48885617c0849 100644
--- a/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -121,6 +121,11 @@ static cl::opt<unsigned> MIVMaxLevelThreshold(
cl::desc("Maximum depth allowed for the recursive algorithm used to "
"explore MIV direction vectors."));
+static cl::opt<bool> DumpDelinearizationResult(
+ "da-dump-delinearization-result", cl::init(false), cl::Hidden,
+ cl::desc("When printing analysis, dump the result of delinearization along "
+ "with dependence."));
+
//===----------------------------------------------------------------------===//
// basics
@@ -183,11 +188,19 @@ static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA,
for (inst_iterator DstI = SrcI, DstE = inst_end(F); DstI != DstE;
++DstI) {
if (DstI->mayReadOrWriteMemory()) {
+ DependenceInfo::DelinearizedAccessesInfo DAI;
+ std::unique_ptr<Dependence> D = DA->depends(
+ &*SrcI, &*DstI, /*UnderRuntimeAssumptions=*/true, &DAI);
+
OS << "Src:" << *SrcI << " --> Dst:" << *DstI << "\n";
- OS << " da analyze - ";
- if (auto D = DA->depends(&*SrcI, &*DstI,
- /*UnderRuntimeAssumptions=*/true)) {
+ // Dump the delinearization result if available.
+ if (DumpDelinearizationResult && !DAI.Sizes.empty())
+ DAI.print(OS, 4);
+
+ // Dump the dependence result.
+ OS << " da analyze - ";
+ if (D) {
#ifndef NDEBUG
// Verify that the distance being zero is equivalent to the
// direction being EQ.
@@ -200,7 +213,6 @@ static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA,
"Inconsistent distance and direction.");
}
#endif
-
// Normalize negative direction vectors if required by clients.
if (NormalizeResults && D->normalize(&SE))
OS << "normalized - ";
@@ -489,6 +501,46 @@ LLVM_DUMP_METHOD void DependenceInfo::Constraint::dump(raw_ostream &OS) const {
}
#endif
+void DependenceInfo::DelinearizedAccessesInfo::print(raw_ostream &OS,
+ unsigned Depth) const {
+ assert(SrcSubscripts.size() == DstSubscripts.size() &&
+ "Mismatched number of subscripts");
+
+ // Currently tryDelinenearizeParametricSize appends the element size to Sizes,
+ // while tryDelinenearizeFixedSize does not. Thus the length of Sizes differs
+ // depending on which function processed the delinearization.
+ // TODO: This inconsistency will be fixed after removing GEP-type driven
+ // delinearization.
+ assert((SrcSubscripts.size() == Sizes.size() ||
+ SrcSubscripts.size() == Sizes.size() + 1) &&
+ "Mismatched number of subscripts and sizes");
+
+ if (SrcSubscripts.empty()) {
+ OS.indent(Depth) << "Delinearization failed\n";
+ return;
+ }
+
+ OS.indent(Depth) << "Delinearized accesses:\n";
+
+ OS.indent(Depth + 2) << "Sizes[*]";
+ // If the length of Sizes is same as that of SrcSubscripts, the last element
+ // of Sizes is the element size. It's not particularly useful to print it out
+ // here.
+ for (unsigned I = 0; I < SrcSubscripts.size() - 1; I++)
+ OS << "[" << *Sizes[I] << "]";
+ OS << "\n";
+
+ OS.indent(Depth + 2) << "Src";
+ for (const SCEV *S : SrcSubscripts)
+ OS << "[" << *S << "]";
+ OS << "\n";
+
+ OS.indent(Depth + 2) << "Dst";
+ for (const SCEV *S : DstSubscripts)
+ OS << "[" << *S << "]";
+ OS << "\n";
+}
+
// Updates X with the intersection
// of the Constraints X and Y. Returns true if X has changed.
// Corresponds to Figure 4 from the paper
@@ -3312,8 +3364,9 @@ void DependenceInfo::updateDirection(Dependence::DVEntry &Level,
/// source and destination array references are recurrences on a nested loop,
/// this function flattens the nested recurrences into separate recurrences
/// for each loop level.
-bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
- SmallVectorImpl<Subscript> &Pair) {
+std::optional<DependenceInfo::DelinearizedAccessesInfo>
+DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
+ SmallVectorImpl<Subscript> &Pair) {
assert(isLoadOrStore(Src) && "instruction is not load or store");
assert(isLoadOrStore(Dst) && "instruction is not load or store");
Value *SrcPtr = getLoadStorePointerOperand(Src);
@@ -3328,28 +3381,27 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn));
if (!SrcBase || !DstBase || SrcBase != DstBase)
- return false;
+ return std::nullopt;
- SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts;
-
- if (!tryDelinearizeFixedSize(Src, Dst, SrcAccessFn, DstAccessFn,
- SrcSubscripts, DstSubscripts) &&
- !tryDelinearizeParametricSize(Src, Dst, SrcAccessFn, DstAccessFn,
- SrcSubscripts, DstSubscripts))
- return false;
+ std::optional<DelinearizedAccessesInfo> DAI =
+ tryDelinearizeFixedSize(Src, Dst, SrcAccessFn, DstAccessFn);
+ if (!DAI)
+ DAI = tryDelinearizeParametricSize(Src, Dst, SrcAccessFn, DstAccessFn);
+ if (!DAI)
+ return std::nullopt;
assert(isLoopInvariant(SrcBase, SrcLoop) &&
isLoopInvariant(DstBase, DstLoop) &&
"Expected SrcBase and DstBase to be loop invariant");
- int Size = SrcSubscripts.size();
+ int Size = DAI->SrcSubscripts.size();
LLVM_DEBUG({
dbgs() << "\nSrcSubscripts: ";
for (int I = 0; I < Size; I++)
- dbgs() << *SrcSubscripts[I];
+ dbgs() << *DAI->SrcSubscripts[I];
dbgs() << "\nDstSubscripts: ";
for (int I = 0; I < Size; I++)
- dbgs() << *DstSubscripts[I];
+ dbgs() << *DAI->DstSubscripts[I];
});
// The delinearization transforms a single-subscript MIV dependence test into
@@ -3358,21 +3410,21 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
// has found, and then initialize the pairs following the delinearization.
Pair.resize(Size);
for (int I = 0; I < Size; ++I) {
- Pair[I].Src = SrcSubscripts[I];
- Pair[I].Dst = DstSubscripts[I];
+ Pair[I].Src = DAI->SrcSubscripts[I];
+ Pair[I].Dst = DAI->DstSubscripts[I];
unifySubscriptType(&Pair[I]);
}
- return true;
+ return DAI;
}
/// Try to delinearize \p SrcAccessFn and \p DstAccessFn if the underlying
-/// arrays accessed are fixed-size arrays. Return true if delinearization was
+/// arrays accessed are fixed-size arrays. Return delinearized result if
/// successful.
-bool DependenceInfo::tryDelinearizeFixedSize(
- Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn,
- const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts,
- SmallVectorImpl<const SCEV *> &DstSubscripts) {
+std::optional<DependenceInfo::DelinearizedAccessesInfo>
+DependenceInfo::tryDelinearizeFixedSize(Instruction *Src, Instruction *Dst,
+ const SCEV *SrcAccessFn,
+ const SCEV *DstAccessFn) {
LLVM_DEBUG({
const SCEVUnknown *SrcBase =
dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));
@@ -3382,24 +3434,23 @@ bool DependenceInfo::tryDelinearizeFixedSize(
"expected src and dst scev unknowns to be equal");
});
+ DelinearizedAccessesInfo DAI;
SmallVector<int, 4> SrcSizes;
SmallVector<int, 4> DstSizes;
- if (!tryDelinearizeFixedSizeImpl(SE, Src, SrcAccessFn, SrcSubscripts,
+ if (!tryDelinearizeFixedSizeImpl(SE, Src, SrcAccessFn, DAI.SrcSubscripts,
SrcSizes) ||
- !tryDelinearizeFixedSizeImpl(SE, Dst, DstAccessFn, DstSubscripts,
- DstSizes))
- return false;
+ !tryDelinearizeFixedSizeImpl(SE, Dst, DstAccessFn, DAI.DstSubscripts,
+ DstSizes)) {
+ return std::nullopt;
+ }
// Check that the two size arrays are non-empty and equal in length and
// value.
if (SrcSizes.size() != DstSizes.size() ||
- !std::equal(SrcSizes.begin(), SrcSizes.end(), DstSizes.begin())) {
- SrcSubscripts.clear();
- DstSubscripts.clear();
- return false;
- }
+ !std::equal(SrcSizes.begin(), SrcSizes.end(), DstSizes.begin()))
+ return std::nullopt;
- assert(SrcSubscripts.size() == DstSubscripts.size() &&
+ assert(DAI.SrcSubscripts.size() == DAI.DstSubscripts.size() &&
"Expected equal number of entries in the list of SrcSubscripts and "
"DstSubscripts.");
@@ -3412,6 +3463,9 @@ bool DependenceInfo::tryDelinearizeFixedSize(
// impossible to verify this at compile-time. As such we can only delinearize
// iff the subscripts are positive and are less than the range of the
// dimension.
+ // TODO: It might be better to consolidate these checks with those in
+ // tryDelinearizeParametricSize and move them into a member function of
+ // DelinearizedAccessesInfo.
if (!DisableDelinearizationChecks) {
auto AllIndicesInRange = [&](SmallVector<int, 4> &DimensionSizes,
SmallVectorImpl<const SCEV *> &Subscripts,
@@ -3442,26 +3496,30 @@ bool DependenceInfo::tryDelinearizeFixedSize(
return true;
};
- if (!AllIndicesInRange(SrcSizes, SrcSubscripts, SrcPtr) ||
- !AllIndicesInRange(DstSizes, DstSubscripts, DstPtr)) {
+ if (!AllIndicesInRange(SrcSizes, DAI.SrcSubscripts, SrcPtr) ||
+ !AllIndicesInRange(DstSizes, DAI.DstSubscripts, DstPtr)) {
LLVM_DEBUG(dbgs() << "Check failed: AllIndicesInRange.\n");
- SrcSubscripts.clear();
- DstSubscripts.clear();
- return false;
+ return std::nullopt;
}
}
+
+ for (int Size : SrcSizes) {
+ assert(Size > 0 && "Expected array dimension size to be positive.");
+ DAI.Sizes.push_back(SE->getConstant(SrcAccessFn->getType(), Size));
+ }
+
LLVM_DEBUG({
dbgs() << "Delinearized subscripts of fixed-size array\n"
<< "SrcGEP:" << *SrcPtr << "\n"
<< "DstGEP:" << *DstPtr << "\n";
});
- return true;
+ return DAI;
}
-bool DependenceInfo::tryDelinearizeParametricSize(
- Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn,
- const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts,
- SmallVectorImpl<const SCEV *> &DstSubscripts) {
+std::optional<DependenceInfo::DelinearizedAccessesInfo>
+DependenceInfo::tryDelinearizeParametricSize(Instruction *Src, Instruction *Dst,
+ const SCEV *SrcAccessFn,
+ const SCEV *DstAccessFn) {
Value *SrcPtr = getLoadStorePointerOperand(Src);
Value *DstPtr = getLoadStorePointerOperand(Dst);
@@ -3474,7 +3532,7 @@ bool DependenceInfo::tryDelinearizeParametricSize(
const SCEV *ElementSize = SE->getElementSize(Src);
if (ElementSize != SE->getElementSize(Dst))
- return false;
+ return std::nullopt;
const SCEV *SrcSCEV = SE->getMinusSCEV(SrcAccessFn, SrcBase);
const SCEV *DstSCEV = SE->getMinusSCEV(DstAccessFn, DstBase);
@@ -3482,7 +3540,9 @@ bool DependenceInfo::tryDelinearizeParametricSize(
const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV);
const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV);
if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine())
- return false;
+ return std::nullopt;
+
+ DelinearizedAccessesInfo DAI;
// First step: collect parametric terms in both array references.
SmallVector<const SCEV *, 4> Terms;
@@ -3490,19 +3550,18 @@ bool DependenceInfo::tryDelinearizeParametricSize(
collectParametricTerms(*SE, DstAR, Terms);
// Second step: find subscript sizes.
- SmallVector<const SCEV *, 4> Sizes;
- findArrayDimensions(*SE, Terms, Sizes, ElementSize);
+ findArrayDimensions(*SE, Terms, DAI.Sizes, ElementSize);
// Third step: compute the access functions for each subscript.
- computeAccessFunctions(*SE, SrcAR, SrcSubscripts, Sizes);
- computeAccessFunctions(*SE, DstAR, DstSubscripts, Sizes);
+ computeAccessFunctions(*SE, SrcAR, DAI.SrcSubscripts, DAI.Sizes);
+ computeAccessFunctions(*SE, DstAR, DAI.DstSubscripts, DAI.Sizes);
// Fail when there is only a subscript: that's a linearized access function.
- if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 ||
- SrcSubscripts.size() != DstSubscripts.size())
- return false;
+ if (DAI.SrcSubscripts.size() < 2 || DAI.DstSubscripts.size() < 2 ||
+ DAI.SrcSubscripts.size() != DAI.DstSubscripts.size())
+ return std::nullopt;
- size_t Size = SrcSubscripts.size();
+ size_t Size = DAI.SrcSubscripts.size();
// Statically check that the array bounds are in-range. The first subscript we
// don't have a size for and it cannot overflow into another subscript, so is
@@ -3512,30 +3571,30 @@ bool DependenceInfo::tryDelinearizeParametricSize(
// to the dependency checks.
if (!DisableDelinearizationChecks)
for (size_t I = 1; I < Size; ++I) {
- bool SNN = isKnownNonNegative(SrcSubscripts[I], SrcPtr);
- bool DNN = isKnownNonNegative(DstSubscripts[I], DstPtr);
- bool SLT = isKnownLessThan(SrcSubscripts[I], Sizes[I - 1]);
- bool DLT = isKnownLessThan(DstSubscripts[I], Sizes[I - 1]);
+ bool SNN = isKnownNonNegative(DAI.SrcSubscripts[I], SrcPtr);
+ bool DNN = isKnownNonNegative(DAI.DstSubscripts[I], DstPtr);
+ bool SLT = isKnownLessThan(DAI.SrcSubscripts[I], DAI.Sizes[I - 1]);
+ bool DLT = isKnownLessThan(DAI.DstSubscripts[I], DAI.Sizes[I - 1]);
if (SNN && DNN && SLT && DLT)
continue;
LLVM_DEBUG({
dbgs() << "Delinearization checks failed: can't prove the following\n";
if (!SNN)
- dbgs() << " isKnownNonNegative(" << *SrcSubscripts[I] << ")\n";
+ dbgs() << " isKnownNonNegative(" << *DAI.SrcSubscripts[I] << ")\n";
if (!DNN)
- dbgs() << " isKnownNonNegative(" << *DstSubscripts[I] << ")\n";
+ dbgs() << " isKnownNonNegative(" << *DAI.DstSubscripts[I] << ")\n";
if (!SLT)
- dbgs() << " isKnownLessThan(" << *SrcSubscripts[I] << ", "
- << *Sizes[I - 1] << ")\n";
+ dbgs() << " isKnownLessThan(" << *DAI.SrcSubscripts[I] << ", "
+ << *DAI.Sizes[I - 1] << ")\n";
if (!DLT)
- dbgs() << " isKnownLessThan(" << *DstSubscripts[I] << ", "
- << *Sizes[I - 1] << ")\n";
+ dbgs() << " isKnownLessThan(" << *DAI.DstSubscripts[I] << ", "
+ << *DAI.Sizes[I - 1] << ")\n";
});
- return false;
+ return std::nullopt;
}
- return true;
+ return DAI;
}
//===----------------------------------------------------------------------===//
@@ -3583,7 +3642,8 @@ SCEVUnionPredicate DependenceInfo::getRuntimeAssumptions() const {
// up to date with respect to this routine.
std::unique_ptr<Dependence>
DependenceInfo::depends(Instruction *Src, Instruction *Dst,
- bool UnderRuntimeAssumptions) {
+ bool UnderRuntimeAssumptions,
+ DelinearizedAccessesInfo *RecordDelinearization) {
SmallVector<const SCEVPredicate *, 4> Assume;
bool PossiblyLoopIndependent = true;
if (Src == Dst)
@@ -3702,9 +3762,13 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
Pair[0].Dst = DstSCEV;
if (Delinearize) {
- if (tryDelinearize(Src, Dst, Pair)) {
+ std::optional<DelinearizedAccessesInfo> DAI =
+ tryDelinearize(Src, Dst, Pair);
+ if (DAI) {
LLVM_DEBUG(dbgs() << " delinearized\n");
Pairs = Pair.size();
+ if (RecordDelinearization)
+ *RecordDelinearization = *DAI;
}
}
diff --git a/llvm/test/Analysis/DependenceAnalysis/DADelin.ll b/llvm/test/Analysis/DependenceAnalysis/DADelin.ll
index 8f94a455d3724..e53977dd0b9e8 100644
--- a/llvm/test/Analysis/DependenceAnalysis/DADelin.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/DADelin.ll
@@ -1,10 +1,14 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
-; RUN: opt < %s -disable-output "-passes=print<da>" -aa-pipeline=basic-aa 2>&1 \
-; RUN: | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<da>" -aa-pipeline=basic-aa \
+; RUN: -da-dump-delinearization-result 2>&1 | FileCheck %s
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv8m.main-arm-none-eabi"
+; FIXME: It seems that we cannot prove that %m * %o doesn't signed wrap in
+; almost all cases. If it wraps, the delinearization result should be
+; discarded, otherwise the result can be incorrect.
+
;; for (int i = 0; i < n; i++)
;; for (int j = 0; j < m; j++)
;; for (int k = 0; k < o; k++)
@@ -13,10 +17,22 @@ target triple = "thumbv8m.main-arm-none-eabi"
define void @t1(i32 %n, i32 %m, i32 %o, ptr nocapture %A) {
; CHECK-LABEL: 't1'
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4
+; CHECK-NEXT: Delinearized accesses:
+; CHECK-NEXT: Sizes[*][%m][%o]
+; CHECK-NEXT: Src[{0,+,1}<nuw><nsw><%for.cond1.preheader>][{0,+,1}<nuw><nsw><%for.cond5.preheader>][{0,+,1}<nuw><nsw><%for.body8>]
+; CHECK-NEXT: Dst[{0,+,1}<nuw><nsw><%for.cond1.preheader>][{0,+,1}<nuw><nsw><%for.cond5.preheader>][{0,+,1}<nuw><nsw><%for.body8>]
; CHECK-NEXT: da analyze - none!
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx, align 4
+; CHECK-NEXT: Delinearized accesses:
+; CHECK-NEXT: Sizes[*][%m][%o]
+; CHECK-NEXT: Src[{0,+,1}<nuw><nsw><%for.cond1.preheader>][{0,+,1}<nuw><nsw><%for.cond5.preheader>][{0,+,1}<nuw><nsw><%for.body8>]
+; CHECK-NEXT: Dst[{0,+,1}<nuw><nsw><%for.cond1.preheader>][{0,+,1}<nuw><nsw><%for.cond5.preheader>][{0,+,1}<nuw><nsw><%for.body8>]
; CHECK-NEXT: da analyze - consistent anti [0 0 0|<]!
; CHECK-NEXT: Src: store i32 %add12, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx, align 4
+; CHECK-NEXT: Delinearized accesses:
+; CHECK-NEXT: Sizes[*][%m][%o]
+; CHECK-NEXT: Src[{0,+,1}<nuw><nsw><%for.cond1.preheader>][{0,+,1}<nuw><nsw><%for.cond5.preheader>][{0,+,1}<nuw><nsw><%for.body8>]
+; CHECK-NEXT: Dst[{0,+,1}<nuw><nsw><%for.cond1.preheader>][{0,+,1}<nuw><nsw><%for.cond5.preheader>][{0,+,1}<nuw><nsw><%for.body8>]
; CHECK-NEXT: da analyze - none!
;
entry:
@@ -78,6 +94,10 @@ for.cond.cleanup: ; preds = %for.cond.cleanup3,
define void @t2(i32 %n, i32 %m, i32 %o, ptr nocapture %A) {
; CHECK-LABEL: 't2'
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4
+; CHECK-NEXT: Delinearized accesses:
+; CHECK-NEXT: Sizes[*][%m][%o]
+; CHECK-NEXT: Src[{0,+,1}<nuw><nsw><%for.cond1.preheader>][{0,+,1}<nuw><nsw><%for.cond5.preheader>][{0,+,1}<nuw><nsw><%for.body8>]
+; CHECK-NEXT: Dst[{0,+,1}<nuw><nsw><%for.cond1.preheader>][{0,+,1}<nuw><nsw><%for.cond5.preheader>][{0,+,1}<nuw><nsw><%for.body8>]
; CHECK-NEXT: da analyze - none!
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4
; CHECK-NEXT: da analyze - anti [* * *|<]!
@@ -145,6 +165,10 @@ for.cond.cleanup: ; preds = %for.cond.cleanup3,
define void @t3(i32 %n, i32 %m, i32 %o, ptr nocapture %A) {
; CHECK-LABEL: 't3'
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4
+; CHECK-NEXT: Delinearized accesses:
+; CHECK-NEXT: Sizes[*][%m][%o]
+; CHECK-NEXT: Src[{0,+,1}<nuw><nsw><%for.cond1.preheader>][{0,+,1}<nuw><nsw><%for.cond5.preheader>][{0,+,1}<nuw><nsw><%for.body8>]
+; CHECK-NEXT: Dst[{0,+,1}<nuw><nsw><%for.cond1.preheader>][{0,+,1}<nuw><nsw><%for.cond5.preheader>][{0,+,1}<nuw><nsw><%for.body8>]
; CHECK-NEXT: da analyze - none!
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4
; CHECK-NEXT: da analyze - anti [* * *|<]!
@@ -212,6 +236,10 @@ for.cond.cleanup: ; preds = %for.cond.cleanup3,
define void @t4(i32 %n, i32 %m, i32 %o, ptr nocapture %A) {
; CHECK-LABEL: 't4'
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4
+; CHECK-NEXT: Delinearized accesses:
+; CHECK-NEXT: Sizes[*][%m][%o]
+; CHECK-NEXT: Src[{0,+,1}<nuw><nsw><%for.cond1.preheader>][{0,+,1}<nuw><nsw><%for.cond5.preheader>][{0,+,1}<nuw><nsw><%for.body8>]
+; CHECK-NEXT: Dst[{0,+,1}<nuw><nsw><%for.cond1.preheader>][{0,+,1}<nuw><nsw><%for.cond5.preheader>][{0,+,1}<nuw><nsw><%for.body8>]
; CHECK-NEXT: da analyze - none!
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4
; CHECK-NEXT: da analyze - anti [* * *|<]!
@@ -279,6 +307,10 @@ for.cond.cleanup: ; preds = %for.cond.cleanup3,
define void @t5(i32 %n, i32 %m, i32 %o, ptr nocapture %A) {
; CHECK-LABEL: 't5'
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4
+; CHECK-NEXT: Delinearized accesses:
+; CHECK-NEXT: Sizes[*][%m][%o]
+; CHECK-NEXT: Src[{0,+,1}<nuw><nsw><%for.cond1.preheader>][{0,+,1}<nuw><nsw><%for.cond5.preheader>][{0,+,1}<nuw><nsw><%for.body8>]
+; CHECK-NEXT: Dst[{0,+,1}<nuw><nsw><%for.cond1.preheader>][{0,+,1}<nuw><nsw><%for.cond5.preheader>][{0,+,1}<nuw><nsw><%for.body8>]
; CHECK-NEXT: da analyze - none!
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4
; CHECK-NEXT: da analyze - anti [* * *|<]!
@@ -346,10 +378,22 @@ for.cond.cleanup: ; preds = %for.cond.cleanup3,
define void @t6(i32 %n, i32 %m, i32 %o, ptr nocapture %A) {
; CHECK-LABEL: 't6'
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4
+; CHECK-NEXT: Delinearized accesses:
+; CHECK-NEXT: Sizes[*][%m][%o]
+; CHECK-NEXT: Src[{0,+,1}<nuw><nsw><%for.cond1.preheader>][{0,+,1}<nuw><nsw><%for.cond5.preheader>][{0,+,1}<nuw><nsw><%for.body8>]
+; CHECK-NEXT: Dst[{0,+,1}<nuw><nsw><%for.cond1.preheader>][{0,+,1}<nuw><nsw><%for.cond5.preheader>][{0,+,1}<nuw><nsw><%for.body8>]
; CHECK-NEXT: da analyze - none!
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4
+; CHECK-NEXT: Delinearized accesses:
+; CHECK-NEXT: Sizes[*][%m][%o]
+; CHECK-NEXT: Src[{0,+,1}<nuw><nsw><%for.cond1.preheader>][{0,+,1}<nuw><nsw><%for.cond5.preheader>][{0,+,1}<nuw><nsw><%for.body8>]
+; CHECK-NEXT: Dst[{1,+,1}<nuw><nsw><%for.cond1.preheader>][{0,+,1}<nuw><nsw><%for.cond5.preheader>][{0,+,1}<nuw><nsw><%for.body8>]
; CHECK-NEXT: da analyze - consistent anti [-1 0 0]!
; CHECK-NEXT: Src: store i32 %add12, ptr %arrayidx2, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4
+; CHECK-NEXT: Delinearized accesses:
+; CHECK-NEXT: Sizes[*][%m][%o]
+; CHECK-NEXT: Src[{1,+,1}<nuw><nsw><%for.cond1.preheader>][{0,+,1}<nuw><nsw><%for.cond5.preheader>][{0,+,1}<nuw><nsw><%for.body8>]
+; CHECK-NEXT: Dst[{1,+,1}<nuw><nsw><%for.cond1.preheader>][{0,+,1}<nuw><nsw><%for.cond5.preheader>][{0,+,1}<nuw><nsw><%for.body8>]
; CHECK-NEXT: da analyze - none!
;
entry:
@@ -414,10 +458,22 @@ for.cond.cleanup: ; preds = %for.cond.cleanup3,
define void @t7(i32 %n, i32 %m, i32 %o, ptr nocapture %A) {
; CHECK-LABEL: 't7'
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4
+; CHECK-NEXT: Delinearized accesses:
+; CHECK-NEXT: Sizes[*][%m][%o]
+; CHECK-NEXT: Src[{0,+,1}<nuw><nsw><%for.cond1.preheader>][{0,+,1}<nuw><nsw><%for.cond5.preheader>][{0,+,1}<nuw><nsw><%for.body8>]
+; CHECK-NEXT: Dst[{0,+,1}<nuw><nsw><%for.cond1.preheader>][{0,+,1}<nuw><nsw><%for.cond5.preheader>][{0,+,1}<nuw><nsw><%for.body8>]
; CHECK-NEXT: da analyze - none!
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4
+; CHECK-NEXT: Delinearized accesses:
+; CHECK-NEXT: Sizes[*][%m][%o]
+; CHECK-NEXT: Src[{0,+,1}<nuw><nsw><%for.cond1.preheader>][{0,+,1}<nuw><nsw><%for.cond5.preheader>][{0,+,1}<nuw><nsw><%for.body8>]
+; CHECK-NEXT: Dst[{-1,+,1}<%for.cond1.preheader>][{0,+,1}<nuw><nsw><%for.cond5.preheader>][{0,+,1}<nuw><nsw><%for.body8>]
; CHECK-NEXT: da analyze - consistent anti [1 0 0]!
; CHECK-NEXT: Src: store i32 %add12, ptr %arrayidx2, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4
+; CHECK-NEXT: Delinearized accesses:
+; CHECK-NEXT: Sizes[*][%m][%o]
+; CHECK-NEXT: Src[{-1,+,1}<%for.cond1.preheader>][{0,+,1}<nuw><nsw><%for.cond5.preheader>][{0,+,1}<nuw><nsw><%for.body8>]
+; CHECK-NEXT: Dst[{-1,+,1}<%for.cond1.preheader>][{0,+,1}<nuw><nsw><%for.cond5.preheader>][{0,+,1}<nuw><nsw><%for.body8>]
; CHECK-NEXT: da analyze - none!
;
entry:
@@ -482,10 +538,22 @@ for.cond.cleanup: ; preds = %for.cond.cleanup3,
define void @t8(i32 %n, i32 %m, i32 %o, ptr nocapture %A) {
; CHECK-LABEL: 't8'
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4
+; CHECK-NEXT: Delinearized accesses:
+; CHECK-NEXT: Sizes[*][%m][%o]
+; CHECK-NEXT: Src[{0,+,1}<nuw><nsw><%for.cond1.preheader>][{0,+,1}<nuw><nsw><%for.cond5.preheader>][{1,+,1}<nuw><nsw><%for.body8>]
+; CHECK-NEXT: Dst[{0,+,1}<nuw><nsw><%for.cond1.preheader>][{0,+,1}<nuw><nsw><%for.cond5.preheader>][{1,+,1}<nuw><nsw><%for.body8>]
; CHECK-NEXT: da analyze - none!
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4
+; CHECK-NEXT: Delinearized accesses:
+; CHECK-NEXT: Sizes[*][%m][%o]
+; CHECK-NEXT: Src[{0,+,1}<nuw><nsw><%for.cond1.preheader>][{0,+,1}<nuw><nsw><%for.cond5.preheader>][{1,+,1}<nuw><nsw><%for.body8>]
+; CHECK-NEXT: Dst[{0,+,1}<nuw><nsw><%for.cond1.preheader>][{0,+,1}<nuw><nsw><%for.cond5.preheader>][{0,+,1}<nuw><nsw><%for.body8>]
; CHECK-NEXT: da analyze - consistent anti [0 0 1]!
; CHECK-NEXT: Src: store i32 %add12, ptr %arrayidx2, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4
+; CHECK-NEXT: Delinearized accesses:
+; CHECK-NEXT: Sizes[*][%m][%o]
+; CHECK-NEXT: Src[{0,+,1}<nuw><nsw><%for.cond1.preheader>][{0,+,1}<nuw><nsw><%for.cond5.preheader>][{0,+,1}<nuw><nsw><%for.body8>]
+; CHECK-NEXT: Dst[{0,+,1}<nuw><nsw><%for.cond1.preheader>][{0,+,1}<nuw><nsw><%for.cond5.preheader>][{0,+,1}<nuw><nsw><%for.body8>]
; CHECK-NEXT: da analyze - none!
;
entry:
@@ -709,3 +777,83 @@ loop:
exit:
ret void
}
+
+; for (int i = 0; i < 5; i++)
+; for (int j = 0; j < n; j++)
+; for (int k = 0; k < m; k++)
+; if (j < 5 && k < 5)
+; a[i*n*m + j*m + k] = 0;
+;
+; FIXME: The memory access would be delinearized as follows:
+;
+; - Sizes: [*][%n][%m]
+; - Subscripts: [{0,+,1}<%loop.i>][{0,+,1}<%loop.j>][{0,+,1}<%loop.k>]
+;
+; The subsequent analysis assumes that different subscript values map to
+; different memory locations (i.e., bijective). It is not true in this case,
+; which lead incorrect analysis results. For example, there are cases like the
+; following:
+;
+; (n, m) | i*n*m + j*m + k
+; -----------------|---------------------------------------------
+; (2^16, 2^16) | i*n*m is always 0
+; (2^16 - 1, 2^16) | n*m = -m, hence i*n*m + j*m is 0 when i = j
+;
+define void @prod_of_sizes_can_wrap(ptr nocapture %a, i32 %n, i32 %m) {
+; CHECK-LABEL: 'prod_of_sizes_can_wrap'
+; CHECK-NEXT: Src: store i8 0, ptr %gep, align 1 --> Dst: store i8 0, ptr %gep, align 1
+; CHECK-NEXT: Delinearized accesses:
+; CHECK-NEXT: Sizes[*][%n][%m]
+; CHECK-NEXT: Src[{0,+,1}<nuw><nsw><%loop.i.header>][{0,+,1}<nuw><nsw><%loop.j.header>][{0,+,1}<nuw><nsw><%loop.k.header>]
+; CHECK-NEXT: Dst[{0,+,1}<nuw><nsw><%loop.i.header>][{0,+,1}<nuw><nsw><%loop.j.header>][{0,+,1}<nuw><nsw><%loop.k.header>]
+; CHECK-NEXT: da analyze - none!
+;
+entry:
+ %cmp.n = icmp sgt i32 %n, 0
+ %cmp.m = icmp sgt i32 %m, 0
+ %guard = and i1 %cmp.n, %cmp.m
+ %nm = mul i32 %n, %m
+ br i1 %guard, label %loop.i.header, label %exit
+
+loop.i.header:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %loop.i.latch ]
+ br label %loop.j.header
+
+loop.j.header:
+ %j = phi i32 [ 0, %loop.i.header ], [ %j.next, %loop.j.latch ]
+ br label %loop.k.header
+
+loop.k.header:
+ %k = phi i32 [ 0, %loop.j.header ], [ %k.next, %loop.k.latch ]
+ %small.j = icmp slt i32 %j, 5
+ %small.k = icmp slt i32 %k, 5
+ %small = and i1 %small.j, %small.k
+ br i1 %small, label %if.then, label %loop.k.latch
+
+if.then:
+ %offset.i = mul nsw i32 %i, %nm
+ %offset.j = mul nsw i32 %j, %m
+ %offset.0 = add nsw i32 %offset.i, %offset.j
+ %offset = add nsw i32 %offset.0, %k
+ %gep = getelementptr inbounds i8, ptr %a, i32 %offset
+ store i8 0, ptr %gep
+ br label %loop.k.latch
+
+loop.k.latch:
+ %k.next = add nsw i32 %k, 1
+ %ec.k = icmp eq i32 %k.next, %m
+ br i1 %ec.k, label %loop.j.latch, label %loop.k.header
+
+loop.j.latch:
+ %j.next = add nsw i32 %j, 1
+ %ec.j = icmp eq i32 %j.next, %n
+ br i1 %ec.j, label %loop.i.latch, label %loop.j.header
+
+loop.i.latch:
+ %i.next = add nsw i32 %i, 1
+ %ec.i = icmp eq i32 %i.next, 5
+ br i1 %ec.i, label %exit, label %loop.i.header
+
+exit:
+ ret void
+}
More information about the llvm-commits
mailing list