[llvm] r241673 - [LAA] Merge memchecks for accesses separated by a constant offset
Reid Kleckner
rnk at google.com
Wed Jul 8 14:14:50 PDT 2015
The number-memchecks.ll test fails for me on Windows because the output
ordering is different. The two check groups are printed in opposite order:
"""
Printing analysis 'Loop Access Analysis' for function 'testg':
for.body:
Report: unsafe dependent memory operations in loop
Interesting Dependences:
Unknown:
store i16 %mul1, i16* %arrayidxC, align 2 ->
store i16 %mul, i16* %arrayidxC1, align 2
Run-time memory checks:
Check 0:
Comparing group 0:
%arrayidxB = getelementptr inbounds i16, i16* %b, i64 %ind
Against group 2:
%arrayidxC1 = getelementptr inbounds i16, i16* %c, i64
%store_ind_inc
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
Check 1:
Comparing group 1:
%arrayidxA1 = getelementptr inbounds i16, i16* %a, i64 %add
%arrayidxA = getelementptr inbounds i16, i16* %a, i64 %ind
Against group 2:
%arrayidxC1 = getelementptr inbounds i16, i16* %c, i64
%store_ind_inc
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
"""
Can you fix the code to be deterministic, i.e. not rely on hashtable
ordering?
On Wed, Jul 8, 2015 at 2:16 AM, Silviu Baranga <silviu.baranga at arm.com>
wrote:
> Author: sbaranga
> Date: Wed Jul 8 04:16:33 2015
> New Revision: 241673
>
> URL: http://llvm.org/viewvc/llvm-project?rev=241673&view=rev
> Log:
> [LAA] Merge memchecks for accesses separated by a constant offset
>
> Summary:
> Often filter-like loops will do memory accesses that are
> separated by constant offsets. In these cases it is
> common that we will exceed the threshold for the
> allowable number of checks.
>
> However, it should be possible to merge such checks,
> sice a check of any interval againt two other intervals separated
> by a constant offset (a,b), (a+c, b+c) will be equivalent with
> a check againt (a, b+c), as long as (a,b) and (a+c, b+c) overlap.
> Assuming the loop will be executed for a sufficient number of
> iterations, this will be true. If not true, checking against
> (a, b+c) is still safe (although not equivalent).
>
> As long as there are no dependencies between two accesses,
> we can merge their checks into a single one. We use this
> technique to construct groups of accesses, and then check
> the intervals associated with the groups instead of
> checking the accesses directly.
>
> Reviewers: anemet
>
> Subscribers: llvm-commits
>
> Differential Revision: http://reviews.llvm.org/D10386
>
> Modified:
> llvm/trunk/include/llvm/Analysis/LoopAccessAnalysis.h
> llvm/trunk/lib/Analysis/LoopAccessAnalysis.cpp
> llvm/trunk/test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll
> llvm/trunk/test/Analysis/LoopAccessAnalysis/resort-to-memchecks-only.ll
> llvm/trunk/test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks.ll
> llvm/trunk/test/Transforms/LoopDistribute/basic-with-memchecks.ll
>
> Modified: llvm/trunk/include/llvm/Analysis/LoopAccessAnalysis.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/LoopAccessAnalysis.h?rev=241673&r1=241672&r2=241673&view=diff
>
> ==============================================================================
> --- llvm/trunk/include/llvm/Analysis/LoopAccessAnalysis.h (original)
> +++ llvm/trunk/include/llvm/Analysis/LoopAccessAnalysis.h Wed Jul 8
> 04:16:33 2015
> @@ -311,7 +311,7 @@ public:
> /// This struct holds information about the memory runtime legality
> check that
> /// a group of pointers do not overlap.
> struct RuntimePointerCheck {
> - RuntimePointerCheck() : Need(false) {}
> + RuntimePointerCheck(ScalarEvolution *SE) : Need(false), SE(SE) {}
>
> /// Reset the state of the pointer runtime information.
> void reset() {
> @@ -322,16 +322,55 @@ public:
> IsWritePtr.clear();
> DependencySetId.clear();
> AliasSetId.clear();
> + Exprs.clear();
> }
>
> /// Insert a pointer and calculate the start and end SCEVs.
> - void insert(ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr,
> - unsigned DepSetId, unsigned ASId,
> - const ValueToValueMap &Strides);
> + void insert(Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId,
> + unsigned ASId, const ValueToValueMap &Strides);
>
> /// \brief No run-time memory checking is necessary.
> bool empty() const { return Pointers.empty(); }
>
> + /// A grouping of pointers. A single memcheck is required between
> + /// two groups.
> + struct CheckingPtrGroup {
> + /// \brief Create a new pointer checking group containing a single
> + /// pointer, with index \p Index in RtCheck.
> + CheckingPtrGroup(unsigned Index, RuntimePointerCheck &RtCheck)
> + : RtCheck(RtCheck), High(RtCheck.Ends[Index]),
> + Low(RtCheck.Starts[Index]) {
> + Members.push_back(Index);
> + }
> +
> + /// \brief Tries to add the pointer recorded in RtCheck at index
> + /// \p Index to this pointer checking group. We can only add a
> pointer
> + /// to a checking group if we will still be able to get
> + /// the upper and lower bounds of the check. Returns true in case
> + /// of success, false otherwise.
> + bool addPointer(unsigned Index);
> +
> + /// Constitutes the context of this pointer checking group. For each
> + /// pointer that is a member of this group we will retain the index
> + /// at which it appears in RtCheck.
> + RuntimePointerCheck &RtCheck;
> + /// The SCEV expression which represents the upper bound of all the
> + /// pointers in this group.
> + const SCEV *High;
> + /// The SCEV expression which represents the lower bound of all the
> + /// pointers in this group.
> + const SCEV *Low;
> + /// Indices of all the pointers that constitute this grouping.
> + SmallVector<unsigned, 2> Members;
> + };
> +
> + /// \brief Groups pointers such that a single memcheck is required
> + /// between two different groups. This will clear the CheckingGroups
> vector
> + /// and re-compute it. We will only group dependecies if \p
> UseDependencies
> + /// is true, otherwise we will create a separate group for each
> pointer.
> + void groupChecks(MemoryDepChecker::DepCandidates &DepCands,
> + bool UseDependencies);
> +
> /// \brief Decide whether we need to issue a run-time check for
> pointer at
> /// index \p I and \p J to prove their independence.
> ///
> @@ -341,6 +380,12 @@ public:
> bool needsChecking(unsigned I, unsigned J,
> const SmallVectorImpl<int> *PtrPartition) const;
>
> + /// \brief Decide if we need to add a check between two groups of
> pointers,
> + /// according to needsChecking.
> + bool needsChecking(const CheckingPtrGroup &M,
> + const CheckingPtrGroup &N,
> + const SmallVectorImpl<int> *PtrPartition) const;
> +
> /// \brief Return true if any pointer requires run-time checking
> according
> /// to needsChecking.
> bool needsAnyChecking(const SmallVectorImpl<int> *PtrPartition) const;
> @@ -372,6 +417,12 @@ public:
> SmallVector<unsigned, 2> DependencySetId;
> /// Holds the id of the disjoint alias set to which this pointer
> belongs.
> SmallVector<unsigned, 2> AliasSetId;
> + /// Holds at position i the SCEV for the access i
> + SmallVector<const SCEV *, 2> Exprs;
> + /// Holds a partitioning of pointers into "check groups".
> + SmallVector<CheckingPtrGroup, 2> CheckingGroups;
> + /// Holds a pointer to the ScalarEvolution analysis.
> + ScalarEvolution *SE;
> };
>
> LoopAccessInfo(Loop *L, ScalarEvolution *SE, const DataLayout &DL,
>
> Modified: llvm/trunk/lib/Analysis/LoopAccessAnalysis.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/LoopAccessAnalysis.cpp?rev=241673&r1=241672&r2=241673&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Analysis/LoopAccessAnalysis.cpp (original)
> +++ llvm/trunk/lib/Analysis/LoopAccessAnalysis.cpp Wed Jul 8 04:16:33 2015
> @@ -48,6 +48,13 @@ static cl::opt<unsigned, true> RuntimeMe
> cl::location(VectorizerParams::RuntimeMemoryCheckThreshold),
> cl::init(8));
> unsigned VectorizerParams::RuntimeMemoryCheckThreshold;
>
> +/// \brief The maximum iterations used to merge memory checks
> +static cl::opt<unsigned> MemoryCheckMergeThreshold(
> + "memory-check-merge-threshold", cl::Hidden,
> + cl::desc("Maximum number of comparisons done when trying to merge "
> + "runtime memory checks. (default = 100)"),
> + cl::init(100));
> +
> /// Maximum SIMD width.
> const unsigned VectorizerParams::MaxVectorWidth = 64;
>
> @@ -113,8 +120,8 @@ const SCEV *llvm::replaceSymbolicStrideS
> }
>
> void LoopAccessInfo::RuntimePointerCheck::insert(
> - ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr, unsigned
> DepSetId,
> - unsigned ASId, const ValueToValueMap &Strides) {
> + Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId, unsigned ASId,
> + const ValueToValueMap &Strides) {
> // Get the stride replaced scev.
> const SCEV *Sc = replaceSymbolicStrideSCEV(SE, Strides, Ptr);
> const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
> @@ -127,6 +134,136 @@ void LoopAccessInfo::RuntimePointerCheck
> IsWritePtr.push_back(WritePtr);
> DependencySetId.push_back(DepSetId);
> AliasSetId.push_back(ASId);
> + Exprs.push_back(Sc);
> +}
> +
> +bool LoopAccessInfo::RuntimePointerCheck::needsChecking(
> + const CheckingPtrGroup &M, const CheckingPtrGroup &N,
> + const SmallVectorImpl<int> *PtrPartition) const {
> + for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I)
> + for (unsigned J = 0, EJ = N.Members.size(); EJ != J; ++J)
> + if (needsChecking(M.Members[I], N.Members[J], PtrPartition))
> + return true;
> + return false;
> +}
> +
> +/// Compare \p I and \p J and return the minimum.
> +/// Return nullptr in case we couldn't find an answer.
> +static const SCEV *getMinFromExprs(const SCEV *I, const SCEV *J,
> + ScalarEvolution *SE) {
> + const SCEV *Diff = SE->getMinusSCEV(J, I);
> + const SCEVConstant *C = dyn_cast<const SCEVConstant>(Diff);
> +
> + if (!C)
> + return nullptr;
> + if (C->getValue()->isNegative())
> + return J;
> + return I;
> +}
> +
> +bool LoopAccessInfo::RuntimePointerCheck::CheckingPtrGroup::addPointer(
> + unsigned Index) {
> + // Compare the starts and ends with the known minimum and maximum
> + // of this set. We need to know how we compare against the min/max
> + // of the set in order to be able to emit memchecks.
> + const SCEV *Min0 = getMinFromExprs(RtCheck.Starts[Index], Low,
> RtCheck.SE);
> + if (!Min0)
> + return false;
> +
> + const SCEV *Min1 = getMinFromExprs(RtCheck.Ends[Index], High,
> RtCheck.SE);
> + if (!Min1)
> + return false;
> +
> + // Update the low bound expression if we've found a new min value.
> + if (Min0 == RtCheck.Starts[Index])
> + Low = RtCheck.Starts[Index];
> +
> + // Update the high bound expression if we've found a new max value.
> + if (Min1 != RtCheck.Ends[Index])
> + High = RtCheck.Ends[Index];
> +
> + Members.push_back(Index);
> + return true;
> +}
> +
> +void LoopAccessInfo::RuntimePointerCheck::groupChecks(
> + MemoryDepChecker::DepCandidates &DepCands,
> + bool UseDependencies) {
> + // We build the groups from dependency candidates equivalence classes
> + // because:
> + // - We know that pointers in the same equivalence class share
> + // the same underlying object and therefore there is a chance
> + // that we can compare pointers
> + // - We wouldn't be able to merge two pointers for which we need
> + // to emit a memcheck. The classes in DepCands are already
> + // conveniently built such that no two pointers in the same
> + // class need checking against each other.
> +
> + // We use the following (greedy) algorithm to construct the groups
> + // For every pointer in the equivalence class:
> + // For each existing group:
> + // - if the difference between this pointer and the min/max bounds
> + // of the group is a constant, then make the pointer part of the
> + // group and update the min/max bounds of that group as required.
> +
> + CheckingGroups.clear();
> +
> + // If we don't have the dependency partitions, construct a new
> + // checking pointer group for each pointer.
> + if (!UseDependencies) {
> + for (unsigned I = 0; I < Pointers.size(); ++I)
> + CheckingGroups.push_back(CheckingPtrGroup(I, *this));
> + return;
> + }
> +
> + unsigned TotalComparisons = 0;
> +
> + DenseMap<Value *, unsigned> PositionMap;
> + for (unsigned Pointer = 0; Pointer < Pointers.size(); ++Pointer)
> + PositionMap[Pointers[Pointer]] = Pointer;
> +
> + // Go through all equivalence classes, get the the "pointer check
> groups"
> + // and add them to the overall solution.
> + for (auto DI = DepCands.begin(), DE = DepCands.end(); DI != DE; ++DI) {
> + if (!DI->isLeader())
> + continue;
> +
> + SmallVector<CheckingPtrGroup, 2> Groups;
> +
> + for (auto MI = DepCands.member_begin(DI), ME = DepCands.member_end();
> + MI != ME; ++MI) {
> + unsigned Pointer = PositionMap[MI->getPointer()];
> + bool Merged = false;
> +
> + // Go through all the existing sets and see if we can find one
> + // which can include this pointer.
> + for (CheckingPtrGroup &Group : Groups) {
> + // Don't perform more than a certain amount of comparisons.
> + // This should limit the cost of grouping the pointers to
> something
> + // reasonable. If we do end up hitting this threshold, the
> algorithm
> + // will create separate groups for all remaining pointers.
> + if (TotalComparisons > MemoryCheckMergeThreshold)
> + break;
> +
> + TotalComparisons++;
> +
> + if (Group.addPointer(Pointer)) {
> + Merged = true;
> + break;
> + }
> + }
> +
> + if (!Merged)
> + // We couldn't add this pointer to any existing set or the
> threshold
> + // for the number of comparisons has been reached. Create a new
> group
> + // to hold the current pointer.
> + Groups.push_back(CheckingPtrGroup(Pointer, *this));
> + }
> +
> + // We've computed the grouped checks for this partition.
> + // Save the results and continue with the next one.
> + std::copy(Groups.begin(), Groups.end(),
> std::back_inserter(CheckingGroups));
> + }
> }
>
> bool LoopAccessInfo::RuntimePointerCheck::needsChecking(
> @@ -156,42 +293,71 @@ bool LoopAccessInfo::RuntimePointerCheck
> void LoopAccessInfo::RuntimePointerCheck::print(
> raw_ostream &OS, unsigned Depth,
> const SmallVectorImpl<int> *PtrPartition) const {
> - unsigned NumPointers = Pointers.size();
> - if (NumPointers == 0)
> - return;
>
> OS.indent(Depth) << "Run-time memory checks:\n";
> +
> unsigned N = 0;
> - for (unsigned I = 0; I < NumPointers; ++I)
> - for (unsigned J = I + 1; J < NumPointers; ++J)
> - if (needsChecking(I, J, PtrPartition)) {
> - OS.indent(Depth) << N++ << ":\n";
> - OS.indent(Depth + 2) << *Pointers[I];
> - if (PtrPartition)
> - OS << " (Partition: " << (*PtrPartition)[I] << ")";
> - OS << "\n";
> - OS.indent(Depth + 2) << *Pointers[J];
> - if (PtrPartition)
> - OS << " (Partition: " << (*PtrPartition)[J] << ")";
> - OS << "\n";
> + for (unsigned I = 0; I < CheckingGroups.size(); ++I)
> + for (unsigned J = I + 1; J < CheckingGroups.size(); ++J)
> + if (needsChecking(CheckingGroups[I], CheckingGroups[J],
> PtrPartition)) {
> + OS.indent(Depth) << "Check " << N++ << ":\n";
> + OS.indent(Depth + 2) << "Comparing group " << I << ":\n";
> +
> + for (unsigned K = 0; K < CheckingGroups[I].Members.size(); ++K) {
> + OS.indent(Depth + 2) << *Pointers[CheckingGroups[I].Members[K]]
> + << "\n";
> + if (PtrPartition)
> + OS << " (Partition: "
> + << (*PtrPartition)[CheckingGroups[I].Members[K]] << ")"
> + << "\n";
> + }
> +
> + OS.indent(Depth + 2) << "Against group " << J << ":\n";
> +
> + for (unsigned K = 0; K < CheckingGroups[J].Members.size(); ++K) {
> + OS.indent(Depth + 2) << *Pointers[CheckingGroups[J].Members[K]]
> + << "\n";
> + if (PtrPartition)
> + OS << " (Partition: "
> + << (*PtrPartition)[CheckingGroups[J].Members[K]] << ")"
> + << "\n";
> + }
> }
> +
> + OS.indent(Depth) << "Grouped accesses:\n";
> + for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
> + OS.indent(Depth + 2) << "Group " << I << ":\n";
> + OS.indent(Depth + 4) << "(Low: " << *CheckingGroups[I].Low
> + << " High: " << *CheckingGroups[I].High << ")\n";
> + for (unsigned J = 0; J < CheckingGroups[I].Members.size(); ++J) {
> + OS.indent(Depth + 6) << "Member: " <<
> *Exprs[CheckingGroups[I].Members[J]]
> + << "\n";
> + }
> + }
> }
>
> unsigned LoopAccessInfo::RuntimePointerCheck::getNumberOfChecks(
> const SmallVectorImpl<int> *PtrPartition) const {
> - unsigned NumPointers = Pointers.size();
> +
> + unsigned NumPartitions = CheckingGroups.size();
> unsigned CheckCount = 0;
>
> - for (unsigned I = 0; I < NumPointers; ++I)
> - for (unsigned J = I + 1; J < NumPointers; ++J)
> - if (needsChecking(I, J, PtrPartition))
> + for (unsigned I = 0; I < NumPartitions; ++I)
> + for (unsigned J = I + 1; J < NumPartitions; ++J)
> + if (needsChecking(CheckingGroups[I], CheckingGroups[J],
> PtrPartition))
> CheckCount++;
> return CheckCount;
> }
>
> bool LoopAccessInfo::RuntimePointerCheck::needsAnyChecking(
> const SmallVectorImpl<int> *PtrPartition) const {
> - return getNumberOfChecks(PtrPartition) != 0;
> + unsigned NumPointers = Pointers.size();
> +
> + for (unsigned I = 0; I < NumPointers; ++I)
> + for (unsigned J = I + 1; J < NumPointers; ++J)
> + if (needsChecking(I, J, PtrPartition))
> + return true;
> + return false;
> }
>
> namespace {
> @@ -341,7 +507,7 @@ bool AccessAnalysis::canCheckPtrAtRT(
> // Each access has its own dependence set.
> DepId = RunningDepId++;
>
> - RtCheck.insert(SE, TheLoop, Ptr, IsWrite, DepId, ASId,
> StridesMap);
> + RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap);
>
> DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr <<
> '\n');
> } else {
> @@ -387,6 +553,9 @@ bool AccessAnalysis::canCheckPtrAtRT(
> }
> }
>
> + if (NeedRTCheck && CanDoRT)
> + RtCheck.groupChecks(DepCands, IsDepCheckNeeded);
> +
> return CanDoRT;
> }
>
> @@ -1360,32 +1529,35 @@ std::pair<Instruction *, Instruction *>
> if (!PtrRtCheck.Need)
> return std::make_pair(nullptr, nullptr);
>
> - unsigned NumPointers = PtrRtCheck.Pointers.size();
> - SmallVector<TrackingVH<Value> , 2> Starts;
> - SmallVector<TrackingVH<Value> , 2> Ends;
> + SmallVector<TrackingVH<Value>, 2> Starts;
> + SmallVector<TrackingVH<Value>, 2> Ends;
>
> LLVMContext &Ctx = Loc->getContext();
> SCEVExpander Exp(*SE, DL, "induction");
> Instruction *FirstInst = nullptr;
>
> - for (unsigned i = 0; i < NumPointers; ++i) {
> - Value *Ptr = PtrRtCheck.Pointers[i];
> + for (unsigned i = 0; i < PtrRtCheck.CheckingGroups.size(); ++i) {
> + const RuntimePointerCheck::CheckingPtrGroup &CG =
> + PtrRtCheck.CheckingGroups[i];
> + Value *Ptr = PtrRtCheck.Pointers[CG.Members[0]];
> const SCEV *Sc = SE->getSCEV(Ptr);
>
> if (SE->isLoopInvariant(Sc, TheLoop)) {
> - DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" <<
> - *Ptr <<"\n");
> + DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" <<
> *Ptr
> + << "\n");
> Starts.push_back(Ptr);
> Ends.push_back(Ptr);
> } else {
> - DEBUG(dbgs() << "LAA: Adding RT check for range:" << *Ptr << '\n');
> unsigned AS = Ptr->getType()->getPointerAddressSpace();
>
> // Use this type for pointer arithmetic.
> Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
> + Value *Start = nullptr, *End = nullptr;
>
> - Value *Start = Exp.expandCodeFor(PtrRtCheck.Starts[i], PtrArithTy,
> Loc);
> - Value *End = Exp.expandCodeFor(PtrRtCheck.Ends[i], PtrArithTy, Loc);
> + DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
> + Start = Exp.expandCodeFor(CG.Low, PtrArithTy, Loc);
> + End = Exp.expandCodeFor(CG.High, PtrArithTy, Loc);
> + DEBUG(dbgs() << "Start: " << *CG.Low << " End: " << *CG.High <<
> "\n");
> Starts.push_back(Start);
> Ends.push_back(End);
> }
> @@ -1394,9 +1566,14 @@ std::pair<Instruction *, Instruction *>
> IRBuilder<> ChkBuilder(Loc);
> // Our instructions might fold to a constant.
> Value *MemoryRuntimeCheck = nullptr;
> - for (unsigned i = 0; i < NumPointers; ++i) {
> - for (unsigned j = i+1; j < NumPointers; ++j) {
> - if (!PtrRtCheck.needsChecking(i, j, PtrPartition))
> + for (unsigned i = 0; i < PtrRtCheck.CheckingGroups.size(); ++i) {
> + for (unsigned j = i + 1; j < PtrRtCheck.CheckingGroups.size(); ++j) {
> + const RuntimePointerCheck::CheckingPtrGroup &CGI =
> + PtrRtCheck.CheckingGroups[i];
> + const RuntimePointerCheck::CheckingPtrGroup &CGJ =
> + PtrRtCheck.CheckingGroups[j];
> +
> + if (!PtrRtCheck.needsChecking(CGI, CGJ, PtrPartition))
> continue;
>
> unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace();
> @@ -1447,8 +1624,8 @@ LoopAccessInfo::LoopAccessInfo(Loop *L,
> const TargetLibraryInfo *TLI,
> AliasAnalysis *AA,
> DominatorTree *DT, LoopInfo *LI,
> const ValueToValueMap &Strides)
> - : DepChecker(SE, L), TheLoop(L), SE(SE), DL(DL),
> - TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0),
> + : PtrRtCheck(SE), DepChecker(SE, L), TheLoop(L), SE(SE), DL(DL),
> TLI(TLI),
> + AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0),
> MaxSafeDepDistBytes(-1U), CanVecMem(false),
> StoreToLoopInvariantAddress(false) {
> if (canAnalyzeLoop())
>
> Modified:
> llvm/trunk/test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll?rev=241673&r1=241672&r2=241673&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll
> (original)
> +++ llvm/trunk/test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll Wed
> Jul 8 04:16:33 2015
> @@ -1,19 +1,20 @@
> ; RUN: opt -loop-accesses -analyze < %s | FileCheck %s
>
> -; 3 reads and 3 writes should need 12 memchecks
> -
> target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
> target triple = "aarch64--linux-gnueabi"
>
> +; 3 reads and 3 writes should need 12 memchecks
> +; CHECK: function 'testf':
> ; CHECK: Memory dependences are safe with run-time checks
> -; Memory dependecies have labels starting from 0, so in
> +
> +; Memory dependencies have labels starting from 0, so in
> ; order to verify that we have n checks, we look for
> ; (n-1): and not n:.
>
> ; CHECK: Run-time memory checks:
> -; CHECK-NEXT: 0:
> -; CHECK: 11:
> -; CHECK-NOT: 12:
> +; CHECK-NEXT: Check 0:
> +; CHECK: Check 11:
> +; CHECK-NOT: Check 12:
>
> define void @testf(i16* %a,
> i16* %b,
> @@ -52,6 +53,165 @@ for.body:
>
> %exitcond = icmp eq i64 %add, 20
> br i1 %exitcond, label %for.end, label %for.body
> +
> +for.end: ; preds = %for.body
> + ret void
> +}
> +
> +; The following (testg and testh) check that we can group
> +; memory checks of accesses which differ by a constant value.
> +; Both tests are based on the following C code:
> +;
> +; void testh(short *a, short *b, short *c) {
> +; unsigned long ind = 0;
> +; for (unsigned long ind = 0; ind < 20; ++ind) {
> +; c[2 * ind] = a[ind] * a[ind + 1];
> +; c[2 * ind + 1] = a[ind] * a[ind + 1] * b[ind];
> +; }
> +; }
> +;
> +; It is sufficient to check the intervals
> +; [a, a + 21], [b, b + 20] against [c, c + 41].
> +
> +; 3 reads and 2 writes - two of the reads can be merged,
> +; and the writes can be merged as well. This gives us a
> +; total of 2 memory checks.
> +
> +; CHECK: function 'testg':
> +
> +; CHECK: Run-time memory checks:
> +; CHECK-NEXT: Check 0:
> +; CHECK-NEXT: Comparing group 0:
> +; CHECK-NEXT: %arrayidxA1 = getelementptr inbounds i16, i16* %a,
> i64 %add
> +; CHECK-NEXT: %arrayidxA = getelementptr inbounds i16, i16* %a, i64
> %ind
> +; CHECK-NEXT: Against group 2:
> +; CHECK-NEXT: %arrayidxC1 = getelementptr inbounds i16, i16* %c,
> i64 %store_ind_inc
> +; CHECK-NEXT: %arrayidxC = getelementptr inbounds i16, i16* %c, i64
> %store_ind
> +; CHECK-NEXT: Check 1:
> +; CHECK-NEXT: Comparing group 1:
> +; CHECK-NEXT: %arrayidxB = getelementptr inbounds i16, i16* %b, i64
> %ind
> +; CHECK-NEXT: Against group 2:
> +; CHECK-NEXT: %arrayidxC1 = getelementptr inbounds i16, i16* %c,
> i64 %store_ind_inc
> +; CHECK-NEXT: %arrayidxC = getelementptr inbounds i16, i16* %c, i64
> %store_ind
> +; CHECK-NEXT: Grouped accesses:
> +; CHECK-NEXT: Group 0:
> +; CHECK-NEXT: (Low: %a High: (40 + %a))
> +; CHECK-NEXT: Member: {(2 + %a),+,2}
> +; CHECK-NEXT: Member: {%a,+,2}
> +; CHECK-NEXT: Group 1:
> +; CHECK-NEXT: (Low: %b High: (38 + %b))
> +; CHECK-NEXT: Member: {%b,+,2}
> +; CHECK-NEXT: Group 2:
> +; CHECK-NEXT: (Low: %c High: (78 + %c))
> +; CHECK-NEXT: Member: {(2 + %c),+,4}
> +; CHECK-NEXT: Member: {%c,+,4}
> +
> +define void @testg(i16* %a,
> + i16* %b,
> + i16* %c) {
> +entry:
> + br label %for.body
> +
> +for.body: ; preds = %for.body,
> %entry
> + %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
> + %store_ind = phi i64 [ 0, %entry ], [ %store_ind_next, %for.body ]
> +
> + %add = add nuw nsw i64 %ind, 1
> + %store_ind_inc = add nuw nsw i64 %store_ind, 1
> + %store_ind_next = add nuw nsw i64 %store_ind_inc, 1
> +
> + %arrayidxA = getelementptr inbounds i16, i16* %a, i64 %ind
> + %loadA = load i16, i16* %arrayidxA, align 2
> +
> + %arrayidxA1 = getelementptr inbounds i16, i16* %a, i64 %add
> + %loadA1 = load i16, i16* %arrayidxA1, align 2
> +
> + %arrayidxB = getelementptr inbounds i16, i16* %b, i64 %ind
> + %loadB = load i16, i16* %arrayidxB, align 2
> +
> + %mul = mul i16 %loadA, %loadA1
> + %mul1 = mul i16 %mul, %loadB
> +
> + %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
> + store i16 %mul1, i16* %arrayidxC, align 2
> +
> + %arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
> + store i16 %mul, i16* %arrayidxC1, align 2
> +
> + %exitcond = icmp eq i64 %add, 20
> + br i1 %exitcond, label %for.end, label %for.body
> +
> +for.end: ; preds = %for.body
> + ret void
> +}
> +
> +; 3 reads and 2 writes - the writes can be merged into a single
> +; group, but the GEPs used for the reads are not marked as inbounds.
> +; We can still merge them because we are using a unit stride for
> +; accesses, so we cannot overflow the GEPs.
> +
> +; CHECK: function 'testh':
> +; CHECK: Run-time memory checks:
> +; CHECK-NEXT: Check 0:
> +; CHECK-NEXT: Comparing group 0:
> +; CHECK-NEXT: %arrayidxA1 = getelementptr i16, i16* %a, i64 %add
> +; CHECK-NEXT: %arrayidxA = getelementptr i16, i16* %a, i64 %ind
> +; CHECK-NEXT: Against group 2:
> +; CHECK-NEXT: %arrayidxC1 = getelementptr inbounds i16, i16* %c,
> i64 %store_ind_inc
> +; CHECK-NEXT: %arrayidxC = getelementptr inbounds i16, i16* %c,
> i64 %store_ind
> +; CHECK-NEXT: Check 1:
> +; CHECK-NEXT: Comparing group 1:
> +; CHECK-NEXT: %arrayidxB = getelementptr i16, i16* %b, i64 %ind
> +; CHECK-NEXT: Against group 2:
> +; CHECK-NEXT: %arrayidxC1 = getelementptr inbounds i16, i16* %c,
> i64 %store_ind_inc
> +; CHECK-NEXT: %arrayidxC = getelementptr inbounds i16, i16* %c,
> i64 %store_ind
> +; CHECK-NEXT: Grouped accesses:
> +; CHECK-NEXT: Group 0:
> +; CHECK-NEXT: (Low: %a High: (40 + %a))
> +; CHECK-NEXT: Member: {(2 + %a),+,2}
> +; CHECK-NEXT: Member: {%a,+,2}
> +; CHECK-NEXT: Group 1:
> +; CHECK-NEXT: (Low: %b High: (38 + %b))
> +; CHECK-NEXT: Member: {%b,+,2}
> +; CHECK-NEXT: Group 2:
> +; CHECK-NEXT: (Low: %c High: (78 + %c))
> +; CHECK-NEXT: Member: {(2 + %c),+,4}
> +; CHECK-NEXT: Member: {%c,+,4}
> +
> +define void @testh(i16* %a,
> + i16* %b,
> + i16* %c) {
> +entry:
> + br label %for.body
> +
> +for.body: ; preds = %for.body,
> %entry
> + %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
> + %store_ind = phi i64 [ 0, %entry ], [ %store_ind_next, %for.body ]
> +
> + %add = add nuw nsw i64 %ind, 1
> + %store_ind_inc = add nuw nsw i64 %store_ind, 1
> + %store_ind_next = add nuw nsw i64 %store_ind_inc, 1
> +
> + %arrayidxA = getelementptr i16, i16* %a, i64 %ind
> + %loadA = load i16, i16* %arrayidxA, align 2
> +
> + %arrayidxA1 = getelementptr i16, i16* %a, i64 %add
> + %loadA1 = load i16, i16* %arrayidxA1, align 2
> +
> + %arrayidxB = getelementptr i16, i16* %b, i64 %ind
> + %loadB = load i16, i16* %arrayidxB, align 2
> +
> + %mul = mul i16 %loadA, %loadA1
> + %mul1 = mul i16 %mul, %loadB
> +
> + %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
> + store i16 %mul1, i16* %arrayidxC, align 2
> +
> + %arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
> + store i16 %mul, i16* %arrayidxC1, align 2
> +
> + %exitcond = icmp eq i64 %add, 20
> + br i1 %exitcond, label %for.end, label %for.body
>
> for.end: ; preds = %for.body
> ret void
>
> Modified:
> llvm/trunk/test/Analysis/LoopAccessAnalysis/resort-to-memchecks-only.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/LoopAccessAnalysis/resort-to-memchecks-only.ll?rev=241673&r1=241672&r2=241673&view=diff
>
> ==============================================================================
> ---
> llvm/trunk/test/Analysis/LoopAccessAnalysis/resort-to-memchecks-only.ll
> (original)
> +++
> llvm/trunk/test/Analysis/LoopAccessAnalysis/resort-to-memchecks-only.ll Wed
> Jul 8 04:16:33 2015
> @@ -15,7 +15,9 @@ target triple = "x86_64-apple-macosx10.1
> ; CHECK-NEXT: Interesting Dependences:
> ; CHECK-NEXT: Run-time memory checks:
> ; CHECK-NEXT: 0:
> +; CHECK-NEXT: Comparing group
> ; CHECK-NEXT: %arrayidxA2 = getelementptr inbounds i16, i16* %a, i64
> %idx
> +; CHECK-NEXT: Against group
> ; CHECK-NEXT: %arrayidxA = getelementptr inbounds i16, i16* %a, i64
> %indvar
>
> @B = common global i16* null, align 8
>
> Modified:
> llvm/trunk/test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks.ll?rev=241673&r1=241672&r2=241673&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks.ll
> (original)
> +++ llvm/trunk/test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks.ll
> Wed Jul 8 04:16:33 2015
> @@ -14,10 +14,16 @@ target triple = "x86_64-apple-macosx10.1
> ; CHECK-NEXT: store i16 %mul1, i16* %arrayidxA_plus_2, align 2
> ; CHECK: Run-time memory checks:
> ; CHECK-NEXT: 0:
> +; CHECK-NEXT: Comparing group
> +; CHECK-NEXT: %arrayidxA = getelementptr inbounds i16, i16* %a, i64
> %storemerge3
> ; CHECK-NEXT: %arrayidxA_plus_2 = getelementptr inbounds i16, i16* %a,
> i64 %add
> +; CHECK-NEXT: Against group
> ; CHECK-NEXT: %arrayidxB = getelementptr inbounds i16, i16* %b, i64
> %storemerge3
> ; CHECK-NEXT: 1:
> +; CHECK-NEXT: Comparing group
> +; CHECK-NEXT: %arrayidxA = getelementptr inbounds i16, i16* %a, i64
> %storemerge3
> ; CHECK-NEXT: %arrayidxA_plus_2 = getelementptr inbounds i16, i16* %a,
> i64 %add
> +; CHECK-NEXT: Against group
> ; CHECK-NEXT: %arrayidxC = getelementptr inbounds i16, i16* %c, i64
> %storemerge3
>
> @B = common global i16* null, align 8
>
> Modified: llvm/trunk/test/Transforms/LoopDistribute/basic-with-memchecks.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopDistribute/basic-with-memchecks.ll?rev=241673&r1=241672&r2=241673&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/Transforms/LoopDistribute/basic-with-memchecks.ll
> (original)
> +++ llvm/trunk/test/Transforms/LoopDistribute/basic-with-memchecks.ll Wed
> Jul 8 04:16:33 2015
> @@ -32,16 +32,14 @@ entry:
> %e = load i32*, i32** @E, align 8
> br label %for.body
>
> -; We have two compares for each array overlap check which is a total of 10
> -; compares.
> +; We have two compares for each array overlap check.
> +; Since the checks to A and A + 4 get merged, this will give us a
> +; total of 8 compares.
> ;
> ; CHECK: for.body.lver.memcheck:
> ; CHECK: = icmp
> ; CHECK: = icmp
>
> -; CHECK: = icmp
> -; CHECK: = icmp
> -
> ; CHECK: = icmp
> ; CHECK: = icmp
>
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150708/3e1e10a3/attachment.html>
More information about the llvm-commits
mailing list