[llvm] r342027 - [LV] Move InterleaveGroup and InterleavedAccessInfo to VectorUtils.h (NFC)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 12 01:01:57 PDT 2018
Author: fhahn
Date: Wed Sep 12 01:01:57 2018
New Revision: 342027
URL: http://llvm.org/viewvc/llvm-project?rev=342027&view=rev
Log:
[LV] Move InterleaveGroup and InterleavedAccessInfo to VectorUtils.h (NFC)
Move the 2 classes out of LoopVectorize.cpp to make it easier to re-use
them for VPlan outside LoopVectorize.cpp
Reviewers: Ayal, mssimpso, rengolin, dcaballe, mkuper, hsaito, hfinkel, xbolva00
Reviewed By: rengolin, xbolva00
Differential Revision: https://reviews.llvm.org/D49488
Modified:
llvm/trunk/include/llvm/Analysis/VectorUtils.h
llvm/trunk/include/llvm/IR/Instructions.h
llvm/trunk/lib/Analysis/VectorUtils.cpp
llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/trunk/test/Transforms/LoopVectorize/SystemZ/mem-interleaving-costs.ll
Modified: llvm/trunk/include/llvm/Analysis/VectorUtils.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/VectorUtils.h?rev=342027&r1=342026&r2=342027&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Analysis/VectorUtils.h (original)
+++ llvm/trunk/include/llvm/Analysis/VectorUtils.h Wed Sep 12 01:01:57 2018
@@ -15,6 +15,7 @@
#define LLVM_ANALYSIS_VECTORUTILS_H
#include "llvm/ADT/MapVector.h"
+#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/IRBuilder.h"
@@ -176,6 +177,338 @@ Constant *createSequentialMask(IRBuilder
/// elements, it will be padded with undefs.
Value *concatenateVectors(IRBuilder<> &Builder, ArrayRef<Value *> Vecs);
+/// The group of interleaved loads/stores sharing the same stride and
+/// close to each other.
+///
+/// Each member in this group has an index starting from 0, and the largest
+/// index should be less than interleaved factor, which is equal to the absolute
+/// value of the access's stride.
+///
+/// E.g. An interleaved load group of factor 4:
+/// for (unsigned i = 0; i < 1024; i+=4) {
+/// a = A[i]; // Member of index 0
+/// b = A[i+1]; // Member of index 1
+/// d = A[i+3]; // Member of index 3
+/// ...
+/// }
+///
+/// An interleaved store group of factor 4:
+/// for (unsigned i = 0; i < 1024; i+=4) {
+/// ...
+/// A[i] = a; // Member of index 0
+/// A[i+1] = b; // Member of index 1
+/// A[i+2] = c; // Member of index 2
+/// A[i+3] = d; // Member of index 3
+/// }
+///
+/// Note: the interleaved load group could have gaps (missing members), but
+/// the interleaved store group doesn't allow gaps.
+class InterleaveGroup {
+public:
+ InterleaveGroup(Instruction *Instr, int Stride, unsigned Align)
+ : Align(Align), InsertPos(Instr) {
+ assert(Align && "The alignment should be non-zero");
+
+ Factor = std::abs(Stride);
+ assert(Factor > 1 && "Invalid interleave factor");
+
+ Reverse = Stride < 0;
+ Members[0] = Instr;
+ }
+
+ bool isReverse() const { return Reverse; }
+ unsigned getFactor() const { return Factor; }
+ unsigned getAlignment() const { return Align; }
+ unsigned getNumMembers() const { return Members.size(); }
+
+ /// Try to insert a new member \p Instr with index \p Index and
+ /// alignment \p NewAlign. The index is related to the leader and it could be
+ /// negative if it is the new leader.
+ ///
+ /// \returns false if the instruction doesn't belong to the group.
+ bool insertMember(Instruction *Instr, int Index, unsigned NewAlign) {
+ assert(NewAlign && "The new member's alignment should be non-zero");
+
+ int Key = Index + SmallestKey;
+
+ // Skip if there is already a member with the same index.
+ if (Members.find(Key) != Members.end())
+ return false;
+
+ if (Key > LargestKey) {
+ // The largest index is always less than the interleave factor.
+ if (Index >= static_cast<int>(Factor))
+ return false;
+
+ LargestKey = Key;
+ } else if (Key < SmallestKey) {
+ // The largest index is always less than the interleave factor.
+ if (LargestKey - Key >= static_cast<int>(Factor))
+ return false;
+
+ SmallestKey = Key;
+ }
+
+ // It's always safe to select the minimum alignment.
+ Align = std::min(Align, NewAlign);
+ Members[Key] = Instr;
+ return true;
+ }
+
+ /// Get the member with the given index \p Index
+ ///
+ /// \returns nullptr if contains no such member.
+ Instruction *getMember(unsigned Index) const {
+ int Key = SmallestKey + Index;
+ auto Member = Members.find(Key);
+ if (Member == Members.end())
+ return nullptr;
+
+ return Member->second;
+ }
+
+ /// Get the index for the given member. Unlike the key in the member
+ /// map, the index starts from 0.
+ unsigned getIndex(Instruction *Instr) const {
+ for (auto I : Members)
+ if (I.second == Instr)
+ return I.first - SmallestKey;
+
+ llvm_unreachable("InterleaveGroup contains no such member");
+ }
+
+ Instruction *getInsertPos() const { return InsertPos; }
+ void setInsertPos(Instruction *Inst) { InsertPos = Inst; }
+
+ /// Add metadata (e.g. alias info) from the instructions in this group to \p
+ /// NewInst.
+ ///
+ /// FIXME: this function currently does not add noalias metadata a'la
+ /// addNewMedata. To do that we need to compute the intersection of the
+ /// noalias info from all members.
+ void addMetadata(Instruction *NewInst) const {
+ SmallVector<Value *, 4> VL;
+ std::transform(Members.begin(), Members.end(), std::back_inserter(VL),
+ [](std::pair<int, Instruction *> p) { return p.second; });
+ propagateMetadata(NewInst, VL);
+ }
+
+private:
+ unsigned Factor; // Interleave Factor.
+ bool Reverse;
+ unsigned Align;
+ DenseMap<int, Instruction *> Members;
+ int SmallestKey = 0;
+ int LargestKey = 0;
+
+ // To avoid breaking dependences, vectorized instructions of an interleave
+ // group should be inserted at either the first load or the last store in
+ // program order.
+ //
+ // E.g. %even = load i32 // Insert Position
+ // %add = add i32 %even // Use of %even
+ // %odd = load i32
+ //
+ // store i32 %even
+ // %odd = add i32 // Def of %odd
+ // store i32 %odd // Insert Position
+ Instruction *InsertPos;
+};
+
+/// Drive the analysis of interleaved memory accesses in the loop.
+///
+/// Use this class to analyze interleaved accesses only when we can vectorize
+/// a loop. Otherwise it's meaningless to do analysis as the vectorization
+/// on interleaved accesses is unsafe.
+///
+/// The analysis collects interleave groups and records the relationships
+/// between the member and the group in a map.
+class InterleavedAccessInfo {
+public:
+ InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L,
+ DominatorTree *DT, LoopInfo *LI,
+ const LoopAccessInfo *LAI)
+ : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {}
+
+ ~InterleavedAccessInfo() {
+ SmallPtrSet<InterleaveGroup *, 4> DelSet;
+ // Avoid releasing a pointer twice.
+ for (auto &I : InterleaveGroupMap)
+ DelSet.insert(I.second);
+ for (auto *Ptr : DelSet)
+ delete Ptr;
+ }
+
+ /// Analyze the interleaved accesses and collect them in interleave
+ /// groups. Substitute symbolic strides using \p Strides.
+ void analyzeInterleaving();
+
+ /// Check if \p Instr belongs to any interleave group.
+ bool isInterleaved(Instruction *Instr) const {
+ return InterleaveGroupMap.find(Instr) != InterleaveGroupMap.end();
+ }
+
+ /// Get the interleave group that \p Instr belongs to.
+ ///
+ /// \returns nullptr if doesn't have such group.
+ InterleaveGroup *getInterleaveGroup(Instruction *Instr) const {
+ auto Group = InterleaveGroupMap.find(Instr);
+ if (Group == InterleaveGroupMap.end())
+ return nullptr;
+ return Group->second;
+ }
+
+ /// Returns true if an interleaved group that may access memory
+ /// out-of-bounds requires a scalar epilogue iteration for correctness.
+ bool requiresScalarEpilogue() const { return RequiresScalarEpilogue; }
+
+private:
+ /// A wrapper around ScalarEvolution, used to add runtime SCEV checks.
+ /// Simplifies SCEV expressions in the context of existing SCEV assumptions.
+ /// The interleaved access analysis can also add new predicates (for example
+ /// by versioning strides of pointers).
+ PredicatedScalarEvolution &PSE;
+
+ Loop *TheLoop;
+ DominatorTree *DT;
+ LoopInfo *LI;
+ const LoopAccessInfo *LAI;
+
+ /// True if the loop may contain non-reversed interleaved groups with
+ /// out-of-bounds accesses. We ensure we don't speculatively access memory
+ /// out-of-bounds by executing at least one scalar epilogue iteration.
+ bool RequiresScalarEpilogue = false;
+
+ /// Holds the relationships between the members and the interleave group.
+ DenseMap<Instruction *, InterleaveGroup *> InterleaveGroupMap;
+
+ /// Holds dependences among the memory accesses in the loop. It maps a source
+ /// access to a set of dependent sink accesses.
+ DenseMap<Instruction *, SmallPtrSet<Instruction *, 2>> Dependences;
+
+ /// The descriptor for a strided memory access.
+ struct StrideDescriptor {
+ StrideDescriptor() = default;
+ StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size,
+ unsigned Align)
+ : Stride(Stride), Scev(Scev), Size(Size), Align(Align) {}
+
+ // The access's stride. It is negative for a reverse access.
+ int64_t Stride = 0;
+
+ // The scalar expression of this access.
+ const SCEV *Scev = nullptr;
+
+ // The size of the memory object.
+ uint64_t Size = 0;
+
+ // The alignment of this access.
+ unsigned Align = 0;
+ };
+
+ /// A type for holding instructions and their stride descriptors.
+ using StrideEntry = std::pair<Instruction *, StrideDescriptor>;
+
+ /// Create a new interleave group with the given instruction \p Instr,
+ /// stride \p Stride and alignment \p Align.
+ ///
+ /// \returns the newly created interleave group.
+ InterleaveGroup *createInterleaveGroup(Instruction *Instr, int Stride,
+ unsigned Align) {
+ assert(!isInterleaved(Instr) && "Already in an interleaved access group");
+ InterleaveGroupMap[Instr] = new InterleaveGroup(Instr, Stride, Align);
+ return InterleaveGroupMap[Instr];
+ }
+
+ /// Release the group and remove all the relationships.
+ void releaseGroup(InterleaveGroup *Group) {
+ for (unsigned i = 0; i < Group->getFactor(); i++)
+ if (Instruction *Member = Group->getMember(i))
+ InterleaveGroupMap.erase(Member);
+
+ delete Group;
+ }
+
+ /// Collect all the accesses with a constant stride in program order.
+ void collectConstStrideAccesses(
+ MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo,
+ const ValueToValueMap &Strides);
+
+ /// Returns true if \p Stride is allowed in an interleaved group.
+ static bool isStrided(int Stride);
+
+ /// Returns true if \p BB is a predicated block.
+ bool isPredicated(BasicBlock *BB) const {
+ return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
+ }
+
+ /// Returns true if LoopAccessInfo can be used for dependence queries.
+ bool areDependencesValid() const {
+ return LAI && LAI->getDepChecker().getDependences();
+ }
+
+ /// Returns true if memory accesses \p A and \p B can be reordered, if
+ /// necessary, when constructing interleaved groups.
+ ///
+ /// \p A must precede \p B in program order. We return false if reordering is
+ /// not necessary or is prevented because \p A and \p B may be dependent.
+ bool canReorderMemAccessesForInterleavedGroups(StrideEntry *A,
+ StrideEntry *B) const {
+ // Code motion for interleaved accesses can potentially hoist strided loads
+ // and sink strided stores. The code below checks the legality of the
+ // following two conditions:
+ //
+ // 1. Potentially moving a strided load (B) before any store (A) that
+ // precedes B, or
+ //
+ // 2. Potentially moving a strided store (A) after any load or store (B)
+ // that A precedes.
+ //
+ // It's legal to reorder A and B if we know there isn't a dependence from A
+ // to B. Note that this determination is conservative since some
+ // dependences could potentially be reordered safely.
+
+ // A is potentially the source of a dependence.
+ auto *Src = A->first;
+ auto SrcDes = A->second;
+
+ // B is potentially the sink of a dependence.
+ auto *Sink = B->first;
+ auto SinkDes = B->second;
+
+ // Code motion for interleaved accesses can't violate WAR dependences.
+ // Thus, reordering is legal if the source isn't a write.
+ if (!Src->mayWriteToMemory())
+ return true;
+
+ // At least one of the accesses must be strided.
+ if (!isStrided(SrcDes.Stride) && !isStrided(SinkDes.Stride))
+ return true;
+
+ // If dependence information is not available from LoopAccessInfo,
+ // conservatively assume the instructions can't be reordered.
+ if (!areDependencesValid())
+ return false;
+
+ // If we know there is a dependence from source to sink, assume the
+ // instructions can't be reordered. Otherwise, reordering is legal.
+ return Dependences.find(Src) == Dependences.end() ||
+ !Dependences.lookup(Src).count(Sink);
+ }
+
+ /// Collect the dependences from LoopAccessInfo.
+ ///
+ /// We process the dependences once during the interleaved access analysis to
+ /// enable constant-time dependence queries.
+ void collectDependences() {
+ if (!areDependencesValid())
+ return;
+ auto *Deps = LAI->getDepChecker().getDependences();
+ for (auto Dep : *Deps)
+ Dependences[Dep.getSource(*LAI)].insert(Dep.getDestination(*LAI));
+ }
+};
+
} // llvm namespace
#endif
Modified: llvm/trunk/include/llvm/IR/Instructions.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/Instructions.h?rev=342027&r1=342026&r2=342027&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/Instructions.h (original)
+++ llvm/trunk/include/llvm/IR/Instructions.h Wed Sep 12 01:01:57 2018
@@ -5331,6 +5331,25 @@ inline Value *getPointerOperand(Value *V
return nullptr;
}
+/// A helper function that returns the alignment of load or store instruction.
+inline unsigned getLoadStoreAlignment(Value *I) {
+ assert((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
+ "Expected Load or Store instruction");
+ if (auto *LI = dyn_cast<LoadInst>(I))
+ return LI->getAlignment();
+ return cast<StoreInst>(I)->getAlignment();
+}
+
+/// A helper function that returns the address space of the pointer operand of
+/// load or store instruction.
+inline unsigned getLoadStoreAddressSpace(Value *I) {
+ assert((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
+ "Expected Load or Store instruction");
+ if (auto *LI = dyn_cast<LoadInst>(I))
+ return LI->getPointerAddressSpace();
+ return cast<StoreInst>(I)->getPointerAddressSpace();
+}
+
} // end namespace llvm
#endif // LLVM_IR_INSTRUCTIONS_H
Modified: llvm/trunk/lib/Analysis/VectorUtils.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/VectorUtils.cpp?rev=342027&r1=342026&r2=342027&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/VectorUtils.cpp (original)
+++ llvm/trunk/lib/Analysis/VectorUtils.cpp Wed Sep 12 01:01:57 2018
@@ -15,6 +15,7 @@
#include "llvm/ADT/EquivalenceClasses.h"
#include "llvm/Analysis/DemandedBits.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -25,9 +26,17 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Value.h"
+#define DEBUG_TYPE "vectorutils"
+
using namespace llvm;
using namespace llvm::PatternMatch;
+/// Maximum factor for an interleaved memory access.
+static cl::opt<unsigned> MaxInterleaveGroupFactor(
+ "max-interleave-group-factor", cl::Hidden,
+ cl::desc("Maximum factor for an interleaved access group (default = 8)"),
+ cl::init(8));
+
/// Identify if the intrinsic is trivially vectorizable.
/// This method returns true if the intrinsic's argument types are all
/// scalars for the scalar form of the intrinsic and all vectors for
@@ -575,3 +584,321 @@ Value *llvm::concatenateVectors(IRBuilde
return ResList[0];
}
+
+bool InterleavedAccessInfo::isStrided(int Stride) {
+ unsigned Factor = std::abs(Stride);
+ return Factor >= 2 && Factor <= MaxInterleaveGroupFactor;
+}
+
+void InterleavedAccessInfo::collectConstStrideAccesses(
+ MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo,
+ const ValueToValueMap &Strides) {
+ auto &DL = TheLoop->getHeader()->getModule()->getDataLayout();
+
+ // Since it's desired that the load/store instructions be maintained in
+ // "program order" for the interleaved access analysis, we have to visit the
+ // blocks in the loop in reverse postorder (i.e., in a topological order).
+ // Such an ordering will ensure that any load/store that may be executed
+ // before a second load/store will precede the second load/store in
+ // AccessStrideInfo.
+ LoopBlocksDFS DFS(TheLoop);
+ DFS.perform(LI);
+ for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO()))
+ for (auto &I : *BB) {
+ auto *LI = dyn_cast<LoadInst>(&I);
+ auto *SI = dyn_cast<StoreInst>(&I);
+ if (!LI && !SI)
+ continue;
+
+ Value *Ptr = getLoadStorePointerOperand(&I);
+ // We don't check wrapping here because we don't know yet if Ptr will be
+ // part of a full group or a group with gaps. Checking wrapping for all
+ // pointers (even those that end up in groups with no gaps) will be overly
+ // conservative. For full groups, wrapping should be ok since if we would
+ // wrap around the address space we would do a memory access at nullptr
+ // even without the transformation. The wrapping checks are therefore
+ // deferred until after we've formed the interleaved groups.
+ int64_t Stride = getPtrStride(PSE, Ptr, TheLoop, Strides,
+ /*Assume=*/true, /*ShouldCheckWrap=*/false);
+
+ const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
+ PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
+ uint64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
+
+ // An alignment of 0 means target ABI alignment.
+ unsigned Align = getLoadStoreAlignment(&I);
+ if (!Align)
+ Align = DL.getABITypeAlignment(PtrTy->getElementType());
+
+ AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size, Align);
+ }
+}
+
+// Analyze interleaved accesses and collect them into interleaved load and
+// store groups.
+//
+// When generating code for an interleaved load group, we effectively hoist all
+// loads in the group to the location of the first load in program order. When
+// generating code for an interleaved store group, we sink all stores to the
+// location of the last store. This code motion can change the order of load
+// and store instructions and may break dependences.
+//
+// The code generation strategy mentioned above ensures that we won't violate
+// any write-after-read (WAR) dependences.
+//
+// E.g., for the WAR dependence: a = A[i]; // (1)
+// A[i] = b; // (2)
+//
+// The store group of (2) is always inserted at or below (2), and the load
+// group of (1) is always inserted at or above (1). Thus, the instructions will
+// never be reordered. All other dependences are checked to ensure the
+// correctness of the instruction reordering.
+//
+// The algorithm visits all memory accesses in the loop in bottom-up program
+// order. Program order is established by traversing the blocks in the loop in
+// reverse postorder when collecting the accesses.
+//
+// We visit the memory accesses in bottom-up order because it can simplify the
+// construction of store groups in the presence of write-after-write (WAW)
+// dependences.
+//
+// E.g., for the WAW dependence: A[i] = a; // (1)
+// A[i] = b; // (2)
+// A[i + 1] = c; // (3)
+//
+// We will first create a store group with (3) and (2). (1) can't be added to
+// this group because it and (2) are dependent. However, (1) can be grouped
+// with other accesses that may precede it in program order. Note that a
+// bottom-up order does not imply that WAW dependences should not be checked.
+void InterleavedAccessInfo::analyzeInterleaving() {
+ LLVM_DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n");
+ const ValueToValueMap &Strides = LAI->getSymbolicStrides();
+
+ // Holds all accesses with a constant stride.
+ MapVector<Instruction *, StrideDescriptor> AccessStrideInfo;
+ collectConstStrideAccesses(AccessStrideInfo, Strides);
+
+ if (AccessStrideInfo.empty())
+ return;
+
+ // Collect the dependences in the loop.
+ collectDependences();
+
+ // Holds all interleaved store groups temporarily.
+ SmallSetVector<InterleaveGroup *, 4> StoreGroups;
+ // Holds all interleaved load groups temporarily.
+ SmallSetVector<InterleaveGroup *, 4> LoadGroups;
+
+ // Search in bottom-up program order for pairs of accesses (A and B) that can
+ // form interleaved load or store groups. In the algorithm below, access A
+ // precedes access B in program order. We initialize a group for B in the
+ // outer loop of the algorithm, and then in the inner loop, we attempt to
+ // insert each A into B's group if:
+ //
+ // 1. A and B have the same stride,
+ // 2. A and B have the same memory object size, and
+ // 3. A belongs in B's group according to its distance from B.
+ //
+ // Special care is taken to ensure group formation will not break any
+ // dependences.
+ for (auto BI = AccessStrideInfo.rbegin(), E = AccessStrideInfo.rend();
+ BI != E; ++BI) {
+ Instruction *B = BI->first;
+ StrideDescriptor DesB = BI->second;
+
+ // Initialize a group for B if it has an allowable stride. Even if we don't
+ // create a group for B, we continue with the bottom-up algorithm to ensure
+ // we don't break any of B's dependences.
+ InterleaveGroup *Group = nullptr;
+ if (isStrided(DesB.Stride)) {
+ Group = getInterleaveGroup(B);
+ if (!Group) {
+ LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B
+ << '\n');
+ Group = createInterleaveGroup(B, DesB.Stride, DesB.Align);
+ }
+ if (B->mayWriteToMemory())
+ StoreGroups.insert(Group);
+ else
+ LoadGroups.insert(Group);
+ }
+
+ for (auto AI = std::next(BI); AI != E; ++AI) {
+ Instruction *A = AI->first;
+ StrideDescriptor DesA = AI->second;
+
+ // Our code motion strategy implies that we can't have dependences
+ // between accesses in an interleaved group and other accesses located
+ // between the first and last member of the group. Note that this also
+ // means that a group can't have more than one member at a given offset.
+ // The accesses in a group can have dependences with other accesses, but
+ // we must ensure we don't extend the boundaries of the group such that
+ // we encompass those dependent accesses.
+ //
+ // For example, assume we have the sequence of accesses shown below in a
+ // stride-2 loop:
+ //
+ // (1, 2) is a group | A[i] = a; // (1)
+ // | A[i-1] = b; // (2) |
+ // A[i-3] = c; // (3)
+ // A[i] = d; // (4) | (2, 4) is not a group
+ //
+ // Because accesses (2) and (3) are dependent, we can group (2) with (1)
+ // but not with (4). If we did, the dependent access (3) would be within
+ // the boundaries of the (2, 4) group.
+ if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI)) {
+ // If a dependence exists and A is already in a group, we know that A
+ // must be a store since A precedes B and WAR dependences are allowed.
+ // Thus, A would be sunk below B. We release A's group to prevent this
+ // illegal code motion. A will then be free to form another group with
+ // instructions that precede it.
+ if (isInterleaved(A)) {
+ InterleaveGroup *StoreGroup = getInterleaveGroup(A);
+ StoreGroups.remove(StoreGroup);
+ releaseGroup(StoreGroup);
+ }
+
+ // If a dependence exists and A is not already in a group (or it was
+ // and we just released it), B might be hoisted above A (if B is a
+ // load) or another store might be sunk below A (if B is a store). In
+ // either case, we can't add additional instructions to B's group. B
+ // will only form a group with instructions that it precedes.
+ break;
+ }
+
+ // At this point, we've checked for illegal code motion. If either A or B
+ // isn't strided, there's nothing left to do.
+ if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride))
+ continue;
+
+ // Ignore A if it's already in a group or isn't the same kind of memory
+ // operation as B.
+ // Note that mayReadFromMemory() isn't mutually exclusive to
+ // mayWriteToMemory in the case of atomic loads. We shouldn't see those
+ // here, canVectorizeMemory() should have returned false - except for the
+ // case we asked for optimization remarks.
+ if (isInterleaved(A) ||
+ (A->mayReadFromMemory() != B->mayReadFromMemory()) ||
+ (A->mayWriteToMemory() != B->mayWriteToMemory()))
+ continue;
+
+ // Check rules 1 and 2. Ignore A if its stride or size is different from
+ // that of B.
+ if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size)
+ continue;
+
+ // Ignore A if the memory object of A and B don't belong to the same
+ // address space
+ if (getLoadStoreAddressSpace(A) != getLoadStoreAddressSpace(B))
+ continue;
+
+ // Calculate the distance from A to B.
+ const SCEVConstant *DistToB = dyn_cast<SCEVConstant>(
+ PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev));
+ if (!DistToB)
+ continue;
+ int64_t DistanceToB = DistToB->getAPInt().getSExtValue();
+
+ // Check rule 3. Ignore A if its distance to B is not a multiple of the
+ // size.
+ if (DistanceToB % static_cast<int64_t>(DesB.Size))
+ continue;
+
+ // Ignore A if either A or B is in a predicated block. Although we
+ // currently prevent group formation for predicated accesses, we may be
+ // able to relax this limitation in the future once we handle more
+ // complicated blocks.
+ if (isPredicated(A->getParent()) || isPredicated(B->getParent()))
+ continue;
+
+ // The index of A is the index of B plus A's distance to B in multiples
+ // of the size.
+ int IndexA =
+ Group->getIndex(B) + DistanceToB / static_cast<int64_t>(DesB.Size);
+
+ // Try to insert A into B's group.
+ if (Group->insertMember(A, IndexA, DesA.Align)) {
+ LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n'
+ << " into the interleave group with" << *B
+ << '\n');
+ InterleaveGroupMap[A] = Group;
+
+ // Set the first load in program order as the insert position.
+ if (A->mayReadFromMemory())
+ Group->setInsertPos(A);
+ }
+ } // Iteration over A accesses.
+ } // Iteration over B accesses.
+
+ // Remove interleaved store groups with gaps.
+ for (InterleaveGroup *Group : StoreGroups)
+ if (Group->getNumMembers() != Group->getFactor()) {
+ LLVM_DEBUG(
+ dbgs() << "LV: Invalidate candidate interleaved store group due "
+ "to gaps.\n");
+ releaseGroup(Group);
+ }
+ // Remove interleaved groups with gaps (currently only loads) whose memory
+ // accesses may wrap around. We have to revisit the getPtrStride analysis,
+ // this time with ShouldCheckWrap=true, since collectConstStrideAccesses does
+ // not check wrapping (see documentation there).
+ // FORNOW we use Assume=false;
+ // TODO: Change to Assume=true but making sure we don't exceed the threshold
+ // of runtime SCEV assumptions checks (thereby potentially failing to
+ // vectorize altogether).
+ // Additional optional optimizations:
+ // TODO: If we are peeling the loop and we know that the first pointer doesn't
+ // wrap then we can deduce that all pointers in the group don't wrap.
+ // This means that we can forcefully peel the loop in order to only have to
+ // check the first pointer for no-wrap. When we'll change to use Assume=true
+ // we'll only need at most one runtime check per interleaved group.
+ for (InterleaveGroup *Group : LoadGroups) {
+ // Case 1: A full group. Can Skip the checks; For full groups, if the wide
+ // load would wrap around the address space we would do a memory access at
+ // nullptr even without the transformation.
+ if (Group->getNumMembers() == Group->getFactor())
+ continue;
+
+ // Case 2: If first and last members of the group don't wrap this implies
+ // that all the pointers in the group don't wrap.
+ // So we check only group member 0 (which is always guaranteed to exist),
+ // and group member Factor - 1; If the latter doesn't exist we rely on
+ // peeling (if it is a non-reveresed accsess -- see Case 3).
+ Value *FirstMemberPtr = getLoadStorePointerOperand(Group->getMember(0));
+ if (!getPtrStride(PSE, FirstMemberPtr, TheLoop, Strides, /*Assume=*/false,
+ /*ShouldCheckWrap=*/true)) {
+ LLVM_DEBUG(
+ dbgs() << "LV: Invalidate candidate interleaved group due to "
+ "first group member potentially pointer-wrapping.\n");
+ releaseGroup(Group);
+ continue;
+ }
+ Instruction *LastMember = Group->getMember(Group->getFactor() - 1);
+ if (LastMember) {
+ Value *LastMemberPtr = getLoadStorePointerOperand(LastMember);
+ if (!getPtrStride(PSE, LastMemberPtr, TheLoop, Strides, /*Assume=*/false,
+ /*ShouldCheckWrap=*/true)) {
+ LLVM_DEBUG(
+ dbgs() << "LV: Invalidate candidate interleaved group due to "
+ "last group member potentially pointer-wrapping.\n");
+ releaseGroup(Group);
+ }
+ } else {
+ // Case 3: A non-reversed interleaved load group with gaps: We need
+ // to execute at least one scalar epilogue iteration. This will ensure
+ // we don't speculatively access memory out-of-bounds. We only need
+ // to look for a member at index factor - 1, since every group must have
+ // a member at index zero.
+ if (Group->isReverse()) {
+ LLVM_DEBUG(
+ dbgs() << "LV: Invalidate candidate interleaved group due to "
+ "a reverse access with gaps.\n");
+ releaseGroup(Group);
+ continue;
+ }
+ LLVM_DEBUG(
+ dbgs() << "LV: Interleaved group requires epilogue iteration.\n");
+ RequiresScalarEpilogue = true;
+ }
+ }
+}
Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=342027&r1=342026&r2=342027&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Wed Sep 12 01:01:57 2018
@@ -171,12 +171,6 @@ static cl::opt<bool> EnableInterleavedMe
"enable-interleaved-mem-accesses", cl::init(false), cl::Hidden,
cl::desc("Enable vectorization on interleaved memory accesses in a loop"));
-/// Maximum factor for an interleaved memory access.
-static cl::opt<unsigned> MaxInterleaveGroupFactor(
- "max-interleave-group-factor", cl::Hidden,
- cl::desc("Maximum factor for an interleaved access group (default = 8)"),
- cl::init(8));
-
/// We don't interleave loops with a known constant trip count below this
/// number.
static const unsigned TinyTripCountInterleaveThreshold = 128;
@@ -265,10 +259,6 @@ static Type *ToVectorTy(Type *Scalar, un
return VectorType::get(Scalar, VF);
}
-// FIXME: The following helper functions have multiple implementations
-// in the project. They can be effectively organized in a common Load/Store
-// utilities unit.
-
/// A helper function that returns the type of loaded or stored value.
static Type *getMemInstValueType(Value *I) {
assert((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
@@ -278,25 +268,6 @@ static Type *getMemInstValueType(Value *
return cast<StoreInst>(I)->getValueOperand()->getType();
}
-/// A helper function that returns the alignment of load or store instruction.
-static unsigned getMemInstAlignment(Value *I) {
- assert((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
- "Expected Load or Store instruction");
- if (auto *LI = dyn_cast<LoadInst>(I))
- return LI->getAlignment();
- return cast<StoreInst>(I)->getAlignment();
-}
-
-/// A helper function that returns the address space of the pointer operand of
-/// load or store instruction.
-static unsigned getMemInstAddressSpace(Value *I) {
- assert((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
- "Expected Load or Store instruction");
- if (auto *LI = dyn_cast<LoadInst>(I))
- return LI->getPointerAddressSpace();
- return cast<StoreInst>(I)->getPointerAddressSpace();
-}
-
/// A helper function that returns true if the given type is irregular. The
/// type is irregular if its allocated size doesn't equal the store size of an
/// element of the corresponding vector type at the given vectorization factor.
@@ -809,348 +780,6 @@ void InnerLoopVectorizer::addMetadata(Ar
}
}
-namespace llvm {
-
-/// The group of interleaved loads/stores sharing the same stride and
-/// close to each other.
-///
-/// Each member in this group has an index starting from 0, and the largest
-/// index should be less than interleaved factor, which is equal to the absolute
-/// value of the access's stride.
-///
-/// E.g. An interleaved load group of factor 4:
-/// for (unsigned i = 0; i < 1024; i+=4) {
-/// a = A[i]; // Member of index 0
-/// b = A[i+1]; // Member of index 1
-/// d = A[i+3]; // Member of index 3
-/// ...
-/// }
-///
-/// An interleaved store group of factor 4:
-/// for (unsigned i = 0; i < 1024; i+=4) {
-/// ...
-/// A[i] = a; // Member of index 0
-/// A[i+1] = b; // Member of index 1
-/// A[i+2] = c; // Member of index 2
-/// A[i+3] = d; // Member of index 3
-/// }
-///
-/// Note: the interleaved load group could have gaps (missing members), but
-/// the interleaved store group doesn't allow gaps.
-class InterleaveGroup {
-public:
- InterleaveGroup(Instruction *Instr, int Stride, unsigned Align)
- : Align(Align), InsertPos(Instr) {
- assert(Align && "The alignment should be non-zero");
-
- Factor = std::abs(Stride);
- assert(Factor > 1 && "Invalid interleave factor");
-
- Reverse = Stride < 0;
- Members[0] = Instr;
- }
-
- bool isReverse() const { return Reverse; }
- unsigned getFactor() const { return Factor; }
- unsigned getAlignment() const { return Align; }
- unsigned getNumMembers() const { return Members.size(); }
-
- /// Try to insert a new member \p Instr with index \p Index and
- /// alignment \p NewAlign. The index is related to the leader and it could be
- /// negative if it is the new leader.
- ///
- /// \returns false if the instruction doesn't belong to the group.
- bool insertMember(Instruction *Instr, int Index, unsigned NewAlign) {
- assert(NewAlign && "The new member's alignment should be non-zero");
-
- int Key = Index + SmallestKey;
-
- // Skip if there is already a member with the same index.
- if (Members.find(Key) != Members.end())
- return false;
-
- if (Key > LargestKey) {
- // The largest index is always less than the interleave factor.
- if (Index >= static_cast<int>(Factor))
- return false;
-
- LargestKey = Key;
- } else if (Key < SmallestKey) {
- // The largest index is always less than the interleave factor.
- if (LargestKey - Key >= static_cast<int>(Factor))
- return false;
-
- SmallestKey = Key;
- }
-
- // It's always safe to select the minimum alignment.
- Align = std::min(Align, NewAlign);
- Members[Key] = Instr;
- return true;
- }
-
- /// Get the member with the given index \p Index
- ///
- /// \returns nullptr if contains no such member.
- Instruction *getMember(unsigned Index) const {
- int Key = SmallestKey + Index;
- auto Member = Members.find(Key);
- if (Member == Members.end())
- return nullptr;
-
- return Member->second;
- }
-
- /// Get the index for the given member. Unlike the key in the member
- /// map, the index starts from 0.
- unsigned getIndex(Instruction *Instr) const {
- for (auto I : Members)
- if (I.second == Instr)
- return I.first - SmallestKey;
-
- llvm_unreachable("InterleaveGroup contains no such member");
- }
-
- Instruction *getInsertPos() const { return InsertPos; }
- void setInsertPos(Instruction *Inst) { InsertPos = Inst; }
-
- /// Add metadata (e.g. alias info) from the instructions in this group to \p
- /// NewInst.
- ///
- /// FIXME: this function currently does not add noalias metadata a'la
- /// addNewMedata. To do that we need to compute the intersection of the
- /// noalias info from all members.
- void addMetadata(Instruction *NewInst) const {
- SmallVector<Value *, 4> VL;
- std::transform(Members.begin(), Members.end(), std::back_inserter(VL),
- [](std::pair<int, Instruction *> p) { return p.second; });
- propagateMetadata(NewInst, VL);
- }
-
-private:
- unsigned Factor; // Interleave Factor.
- bool Reverse;
- unsigned Align;
- DenseMap<int, Instruction *> Members;
- int SmallestKey = 0;
- int LargestKey = 0;
-
- // To avoid breaking dependences, vectorized instructions of an interleave
- // group should be inserted at either the first load or the last store in
- // program order.
- //
- // E.g. %even = load i32 // Insert Position
- // %add = add i32 %even // Use of %even
- // %odd = load i32
- //
- // store i32 %even
- // %odd = add i32 // Def of %odd
- // store i32 %odd // Insert Position
- Instruction *InsertPos;
-};
-} // end namespace llvm
-
-namespace {
-
-/// Drive the analysis of interleaved memory accesses in the loop.
-///
-/// Use this class to analyze interleaved accesses only when we can vectorize
-/// a loop. Otherwise it's meaningless to do analysis as the vectorization
-/// on interleaved accesses is unsafe.
-///
-/// The analysis collects interleave groups and records the relationships
-/// between the member and the group in a map.
-class InterleavedAccessInfo {
-public:
- InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L,
- DominatorTree *DT, LoopInfo *LI,
- const LoopAccessInfo *LAI)
- : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {}
-
- ~InterleavedAccessInfo() {
- SmallPtrSet<InterleaveGroup *, 4> DelSet;
- // Avoid releasing a pointer twice.
- for (auto &I : InterleaveGroupMap)
- DelSet.insert(I.second);
- for (auto *Ptr : DelSet)
- delete Ptr;
- }
-
- /// Analyze the interleaved accesses and collect them in interleave
- /// groups. Substitute symbolic strides using \p Strides.
- void analyzeInterleaving();
-
- /// Check if \p Instr belongs to any interleave group.
- bool isInterleaved(Instruction *Instr) const {
- return InterleaveGroupMap.find(Instr) != InterleaveGroupMap.end();
- }
-
- /// Get the interleave group that \p Instr belongs to.
- ///
- /// \returns nullptr if doesn't have such group.
- InterleaveGroup *getInterleaveGroup(Instruction *Instr) const {
- auto Group = InterleaveGroupMap.find(Instr);
- if (Group == InterleaveGroupMap.end())
- return nullptr;
- return Group->second;
- }
-
- /// Returns true if an interleaved group that may access memory
- /// out-of-bounds requires a scalar epilogue iteration for correctness.
- bool requiresScalarEpilogue() const { return RequiresScalarEpilogue; }
-
-private:
- /// A wrapper around ScalarEvolution, used to add runtime SCEV checks.
- /// Simplifies SCEV expressions in the context of existing SCEV assumptions.
- /// The interleaved access analysis can also add new predicates (for example
- /// by versioning strides of pointers).
- PredicatedScalarEvolution &PSE;
-
- Loop *TheLoop;
- DominatorTree *DT;
- LoopInfo *LI;
- const LoopAccessInfo *LAI;
-
- /// True if the loop may contain non-reversed interleaved groups with
- /// out-of-bounds accesses. We ensure we don't speculatively access memory
- /// out-of-bounds by executing at least one scalar epilogue iteration.
- bool RequiresScalarEpilogue = false;
-
- /// Holds the relationships between the members and the interleave group.
- DenseMap<Instruction *, InterleaveGroup *> InterleaveGroupMap;
-
- /// Holds dependences among the memory accesses in the loop. It maps a source
- /// access to a set of dependent sink accesses.
- DenseMap<Instruction *, SmallPtrSet<Instruction *, 2>> Dependences;
-
- /// The descriptor for a strided memory access.
- struct StrideDescriptor {
- StrideDescriptor() = default;
- StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size,
- unsigned Align)
- : Stride(Stride), Scev(Scev), Size(Size), Align(Align) {}
-
- // The access's stride. It is negative for a reverse access.
- int64_t Stride = 0;
-
- // The scalar expression of this access.
- const SCEV *Scev = nullptr;
-
- // The size of the memory object.
- uint64_t Size = 0;
-
- // The alignment of this access.
- unsigned Align = 0;
- };
-
- /// A type for holding instructions and their stride descriptors.
- using StrideEntry = std::pair<Instruction *, StrideDescriptor>;
-
- /// Create a new interleave group with the given instruction \p Instr,
- /// stride \p Stride and alignment \p Align.
- ///
- /// \returns the newly created interleave group.
- InterleaveGroup *createInterleaveGroup(Instruction *Instr, int Stride,
- unsigned Align) {
- assert(!isInterleaved(Instr) && "Already in an interleaved access group");
- InterleaveGroupMap[Instr] = new InterleaveGroup(Instr, Stride, Align);
- return InterleaveGroupMap[Instr];
- }
-
- /// Release the group and remove all the relationships.
- void releaseGroup(InterleaveGroup *Group) {
- for (unsigned i = 0; i < Group->getFactor(); i++)
- if (Instruction *Member = Group->getMember(i))
- InterleaveGroupMap.erase(Member);
-
- delete Group;
- }
-
- /// Collect all the accesses with a constant stride in program order.
- void collectConstStrideAccesses(
- MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo,
- const ValueToValueMap &Strides);
-
- /// Returns true if \p Stride is allowed in an interleaved group.
- static bool isStrided(int Stride) {
- unsigned Factor = std::abs(Stride);
- return Factor >= 2 && Factor <= MaxInterleaveGroupFactor;
- }
-
- /// Returns true if \p BB is a predicated block.
- bool isPredicated(BasicBlock *BB) const {
- return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
- }
-
- /// Returns true if LoopAccessInfo can be used for dependence queries.
- bool areDependencesValid() const {
- return LAI && LAI->getDepChecker().getDependences();
- }
-
- /// Returns true if memory accesses \p A and \p B can be reordered, if
- /// necessary, when constructing interleaved groups.
- ///
- /// \p A must precede \p B in program order. We return false if reordering is
- /// not necessary or is prevented because \p A and \p B may be dependent.
- bool canReorderMemAccessesForInterleavedGroups(StrideEntry *A,
- StrideEntry *B) const {
- // Code motion for interleaved accesses can potentially hoist strided loads
- // and sink strided stores. The code below checks the legality of the
- // following two conditions:
- //
- // 1. Potentially moving a strided load (B) before any store (A) that
- // precedes B, or
- //
- // 2. Potentially moving a strided store (A) after any load or store (B)
- // that A precedes.
- //
- // It's legal to reorder A and B if we know there isn't a dependence from A
- // to B. Note that this determination is conservative since some
- // dependences could potentially be reordered safely.
-
- // A is potentially the source of a dependence.
- auto *Src = A->first;
- auto SrcDes = A->second;
-
- // B is potentially the sink of a dependence.
- auto *Sink = B->first;
- auto SinkDes = B->second;
-
- // Code motion for interleaved accesses can't violate WAR dependences.
- // Thus, reordering is legal if the source isn't a write.
- if (!Src->mayWriteToMemory())
- return true;
-
- // At least one of the accesses must be strided.
- if (!isStrided(SrcDes.Stride) && !isStrided(SinkDes.Stride))
- return true;
-
- // If dependence information is not available from LoopAccessInfo,
- // conservatively assume the instructions can't be reordered.
- if (!areDependencesValid())
- return false;
-
- // If we know there is a dependence from source to sink, assume the
- // instructions can't be reordered. Otherwise, reordering is legal.
- return Dependences.find(Src) == Dependences.end() ||
- !Dependences.lookup(Src).count(Sink);
- }
-
- /// Collect the dependences from LoopAccessInfo.
- ///
- /// We process the dependences once during the interleaved access analysis to
- /// enable constant-time dependence queries.
- void collectDependences() {
- if (!areDependencesValid())
- return;
- auto *Deps = LAI->getDepChecker().getDependences();
- for (auto Dep : *Deps)
- Dependences[Dep.getSource(*LAI)].insert(Dep.getDestination(*LAI));
- }
-};
-
-} // end anonymous namespace
-
static void emitMissedWarning(Function *F, Loop *L,
const LoopVectorizeHints &LH,
OptimizationRemarkEmitter *ORE) {
@@ -2288,7 +1917,7 @@ void InnerLoopVectorizer::vectorizeInter
Type *ScalarTy = getMemInstValueType(Instr);
unsigned InterleaveFactor = Group->getFactor();
Type *VecTy = VectorType::get(ScalarTy, InterleaveFactor * VF);
- Type *PtrTy = VecTy->getPointerTo(getMemInstAddressSpace(Instr));
+ Type *PtrTy = VecTy->getPointerTo(getLoadStoreAddressSpace(Instr));
// Prepare for the new pointers.
setDebugLocFromInst(Builder, Ptr);
@@ -2431,13 +2060,13 @@ void InnerLoopVectorizer::vectorizeMemor
Type *ScalarDataTy = getMemInstValueType(Instr);
Type *DataTy = VectorType::get(ScalarDataTy, VF);
Value *Ptr = getLoadStorePointerOperand(Instr);
- unsigned Alignment = getMemInstAlignment(Instr);
+ unsigned Alignment = getLoadStoreAlignment(Instr);
// An alignment of 0 means target abi alignment. We need to use the scalar's
// target abi alignment in such a case.
const DataLayout &DL = Instr->getModule()->getDataLayout();
if (!Alignment)
Alignment = DL.getABITypeAlignment(ScalarDataTy);
- unsigned AddressSpace = getMemInstAddressSpace(Instr);
+ unsigned AddressSpace = getLoadStoreAddressSpace(Instr);
// Determine if the pointer operand of the access is either consecutive or
// reverse consecutive.
@@ -4700,318 +4329,6 @@ void LoopVectorizationCostModel::collect
Uniforms[VF].insert(Worklist.begin(), Worklist.end());
}
-void InterleavedAccessInfo::collectConstStrideAccesses(
- MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo,
- const ValueToValueMap &Strides) {
- auto &DL = TheLoop->getHeader()->getModule()->getDataLayout();
-
- // Since it's desired that the load/store instructions be maintained in
- // "program order" for the interleaved access analysis, we have to visit the
- // blocks in the loop in reverse postorder (i.e., in a topological order).
- // Such an ordering will ensure that any load/store that may be executed
- // before a second load/store will precede the second load/store in
- // AccessStrideInfo.
- LoopBlocksDFS DFS(TheLoop);
- DFS.perform(LI);
- for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO()))
- for (auto &I : *BB) {
- auto *LI = dyn_cast<LoadInst>(&I);
- auto *SI = dyn_cast<StoreInst>(&I);
- if (!LI && !SI)
- continue;
-
- Value *Ptr = getLoadStorePointerOperand(&I);
- // We don't check wrapping here because we don't know yet if Ptr will be
- // part of a full group or a group with gaps. Checking wrapping for all
- // pointers (even those that end up in groups with no gaps) will be overly
- // conservative. For full groups, wrapping should be ok since if we would
- // wrap around the address space we would do a memory access at nullptr
- // even without the transformation. The wrapping checks are therefore
- // deferred until after we've formed the interleaved groups.
- int64_t Stride = getPtrStride(PSE, Ptr, TheLoop, Strides,
- /*Assume=*/true, /*ShouldCheckWrap=*/false);
-
- const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
- PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
- uint64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
-
- // An alignment of 0 means target ABI alignment.
- unsigned Align = getMemInstAlignment(&I);
- if (!Align)
- Align = DL.getABITypeAlignment(PtrTy->getElementType());
-
- AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size, Align);
- }
-}
-
-// Analyze interleaved accesses and collect them into interleaved load and
-// store groups.
-//
-// When generating code for an interleaved load group, we effectively hoist all
-// loads in the group to the location of the first load in program order. When
-// generating code for an interleaved store group, we sink all stores to the
-// location of the last store. This code motion can change the order of load
-// and store instructions and may break dependences.
-//
-// The code generation strategy mentioned above ensures that we won't violate
-// any write-after-read (WAR) dependences.
-//
-// E.g., for the WAR dependence: a = A[i]; // (1)
-// A[i] = b; // (2)
-//
-// The store group of (2) is always inserted at or below (2), and the load
-// group of (1) is always inserted at or above (1). Thus, the instructions will
-// never be reordered. All other dependences are checked to ensure the
-// correctness of the instruction reordering.
-//
-// The algorithm visits all memory accesses in the loop in bottom-up program
-// order. Program order is established by traversing the blocks in the loop in
-// reverse postorder when collecting the accesses.
-//
-// We visit the memory accesses in bottom-up order because it can simplify the
-// construction of store groups in the presence of write-after-write (WAW)
-// dependences.
-//
-// E.g., for the WAW dependence: A[i] = a; // (1)
-// A[i] = b; // (2)
-// A[i + 1] = c; // (3)
-//
-// We will first create a store group with (3) and (2). (1) can't be added to
-// this group because it and (2) are dependent. However, (1) can be grouped
-// with other accesses that may precede it in program order. Note that a
-// bottom-up order does not imply that WAW dependences should not be checked.
-void InterleavedAccessInfo::analyzeInterleaving() {
- LLVM_DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n");
- const ValueToValueMap &Strides = LAI->getSymbolicStrides();
-
- // Holds all accesses with a constant stride.
- MapVector<Instruction *, StrideDescriptor> AccessStrideInfo;
- collectConstStrideAccesses(AccessStrideInfo, Strides);
-
- if (AccessStrideInfo.empty())
- return;
-
- // Collect the dependences in the loop.
- collectDependences();
-
- // Holds all interleaved store groups temporarily.
- SmallSetVector<InterleaveGroup *, 4> StoreGroups;
- // Holds all interleaved load groups temporarily.
- SmallSetVector<InterleaveGroup *, 4> LoadGroups;
-
- // Search in bottom-up program order for pairs of accesses (A and B) that can
- // form interleaved load or store groups. In the algorithm below, access A
- // precedes access B in program order. We initialize a group for B in the
- // outer loop of the algorithm, and then in the inner loop, we attempt to
- // insert each A into B's group if:
- //
- // 1. A and B have the same stride,
- // 2. A and B have the same memory object size, and
- // 3. A belongs in B's group according to its distance from B.
- //
- // Special care is taken to ensure group formation will not break any
- // dependences.
- for (auto BI = AccessStrideInfo.rbegin(), E = AccessStrideInfo.rend();
- BI != E; ++BI) {
- Instruction *B = BI->first;
- StrideDescriptor DesB = BI->second;
-
- // Initialize a group for B if it has an allowable stride. Even if we don't
- // create a group for B, we continue with the bottom-up algorithm to ensure
- // we don't break any of B's dependences.
- InterleaveGroup *Group = nullptr;
- if (isStrided(DesB.Stride)) {
- Group = getInterleaveGroup(B);
- if (!Group) {
- LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B
- << '\n');
- Group = createInterleaveGroup(B, DesB.Stride, DesB.Align);
- }
- if (B->mayWriteToMemory())
- StoreGroups.insert(Group);
- else
- LoadGroups.insert(Group);
- }
-
- for (auto AI = std::next(BI); AI != E; ++AI) {
- Instruction *A = AI->first;
- StrideDescriptor DesA = AI->second;
-
- // Our code motion strategy implies that we can't have dependences
- // between accesses in an interleaved group and other accesses located
- // between the first and last member of the group. Note that this also
- // means that a group can't have more than one member at a given offset.
- // The accesses in a group can have dependences with other accesses, but
- // we must ensure we don't extend the boundaries of the group such that
- // we encompass those dependent accesses.
- //
- // For example, assume we have the sequence of accesses shown below in a
- // stride-2 loop:
- //
- // (1, 2) is a group | A[i] = a; // (1)
- // | A[i-1] = b; // (2) |
- // A[i-3] = c; // (3)
- // A[i] = d; // (4) | (2, 4) is not a group
- //
- // Because accesses (2) and (3) are dependent, we can group (2) with (1)
- // but not with (4). If we did, the dependent access (3) would be within
- // the boundaries of the (2, 4) group.
- if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI)) {
- // If a dependence exists and A is already in a group, we know that A
- // must be a store since A precedes B and WAR dependences are allowed.
- // Thus, A would be sunk below B. We release A's group to prevent this
- // illegal code motion. A will then be free to form another group with
- // instructions that precede it.
- if (isInterleaved(A)) {
- InterleaveGroup *StoreGroup = getInterleaveGroup(A);
- StoreGroups.remove(StoreGroup);
- releaseGroup(StoreGroup);
- }
-
- // If a dependence exists and A is not already in a group (or it was
- // and we just released it), B might be hoisted above A (if B is a
- // load) or another store might be sunk below A (if B is a store). In
- // either case, we can't add additional instructions to B's group. B
- // will only form a group with instructions that it precedes.
- break;
- }
-
- // At this point, we've checked for illegal code motion. If either A or B
- // isn't strided, there's nothing left to do.
- if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride))
- continue;
-
- // Ignore A if it's already in a group or isn't the same kind of memory
- // operation as B.
- // Note that mayReadFromMemory() isn't mutually exclusive to mayWriteToMemory
- // in the case of atomic loads. We shouldn't see those here, canVectorizeMemory()
- // should have returned false - except for the case we asked for optimization
- // remarks.
- if (isInterleaved(A) || (A->mayReadFromMemory() != B->mayReadFromMemory())
- || (A->mayWriteToMemory() != B->mayWriteToMemory()))
- continue;
-
- // Check rules 1 and 2. Ignore A if its stride or size is different from
- // that of B.
- if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size)
- continue;
-
- // Ignore A if the memory object of A and B don't belong to the same
- // address space
- if (getMemInstAddressSpace(A) != getMemInstAddressSpace(B))
- continue;
-
- // Calculate the distance from A to B.
- const SCEVConstant *DistToB = dyn_cast<SCEVConstant>(
- PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev));
- if (!DistToB)
- continue;
- int64_t DistanceToB = DistToB->getAPInt().getSExtValue();
-
- // Check rule 3. Ignore A if its distance to B is not a multiple of the
- // size.
- if (DistanceToB % static_cast<int64_t>(DesB.Size))
- continue;
-
- // Ignore A if either A or B is in a predicated block. Although we
- // currently prevent group formation for predicated accesses, we may be
- // able to relax this limitation in the future once we handle more
- // complicated blocks.
- if (isPredicated(A->getParent()) || isPredicated(B->getParent()))
- continue;
-
- // The index of A is the index of B plus A's distance to B in multiples
- // of the size.
- int IndexA =
- Group->getIndex(B) + DistanceToB / static_cast<int64_t>(DesB.Size);
-
- // Try to insert A into B's group.
- if (Group->insertMember(A, IndexA, DesA.Align)) {
- LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n'
- << " into the interleave group with" << *B
- << '\n');
- InterleaveGroupMap[A] = Group;
-
- // Set the first load in program order as the insert position.
- if (A->mayReadFromMemory())
- Group->setInsertPos(A);
- }
- } // Iteration over A accesses.
- } // Iteration over B accesses.
-
- // Remove interleaved store groups with gaps.
- for (InterleaveGroup *Group : StoreGroups)
- if (Group->getNumMembers() != Group->getFactor()) {
- LLVM_DEBUG(
- dbgs() << "LV: Invalidate candidate interleaved store group due "
- "to gaps.\n");
- releaseGroup(Group);
- }
- // Remove interleaved groups with gaps (currently only loads) whose memory
- // accesses may wrap around. We have to revisit the getPtrStride analysis,
- // this time with ShouldCheckWrap=true, since collectConstStrideAccesses does
- // not check wrapping (see documentation there).
- // FORNOW we use Assume=false;
- // TODO: Change to Assume=true but making sure we don't exceed the threshold
- // of runtime SCEV assumptions checks (thereby potentially failing to
- // vectorize altogether).
- // Additional optional optimizations:
- // TODO: If we are peeling the loop and we know that the first pointer doesn't
- // wrap then we can deduce that all pointers in the group don't wrap.
- // This means that we can forcefully peel the loop in order to only have to
- // check the first pointer for no-wrap. When we'll change to use Assume=true
- // we'll only need at most one runtime check per interleaved group.
- for (InterleaveGroup *Group : LoadGroups) {
- // Case 1: A full group. Can Skip the checks; For full groups, if the wide
- // load would wrap around the address space we would do a memory access at
- // nullptr even without the transformation.
- if (Group->getNumMembers() == Group->getFactor())
- continue;
-
- // Case 2: If first and last members of the group don't wrap this implies
- // that all the pointers in the group don't wrap.
- // So we check only group member 0 (which is always guaranteed to exist),
- // and group member Factor - 1; If the latter doesn't exist we rely on
- // peeling (if it is a non-reveresed accsess -- see Case 3).
- Value *FirstMemberPtr = getLoadStorePointerOperand(Group->getMember(0));
- if (!getPtrStride(PSE, FirstMemberPtr, TheLoop, Strides, /*Assume=*/false,
- /*ShouldCheckWrap=*/true)) {
- LLVM_DEBUG(
- dbgs() << "LV: Invalidate candidate interleaved group due to "
- "first group member potentially pointer-wrapping.\n");
- releaseGroup(Group);
- continue;
- }
- Instruction *LastMember = Group->getMember(Group->getFactor() - 1);
- if (LastMember) {
- Value *LastMemberPtr = getLoadStorePointerOperand(LastMember);
- if (!getPtrStride(PSE, LastMemberPtr, TheLoop, Strides, /*Assume=*/false,
- /*ShouldCheckWrap=*/true)) {
- LLVM_DEBUG(
- dbgs() << "LV: Invalidate candidate interleaved group due to "
- "last group member potentially pointer-wrapping.\n");
- releaseGroup(Group);
- }
- } else {
- // Case 3: A non-reversed interleaved load group with gaps: We need
- // to execute at least one scalar epilogue iteration. This will ensure
- // we don't speculatively access memory out-of-bounds. We only need
- // to look for a member at index factor - 1, since every group must have
- // a member at index zero.
- if (Group->isReverse()) {
- LLVM_DEBUG(
- dbgs() << "LV: Invalidate candidate interleaved group due to "
- "a reverse access with gaps.\n");
- releaseGroup(Group);
- continue;
- }
- LLVM_DEBUG(
- dbgs() << "LV: Interleaved group requires epilogue iteration.\n");
- RequiresScalarEpilogue = true;
- }
- }
-}
-
Optional<unsigned> LoopVectorizationCostModel::computeMaxVF(bool OptForSize) {
if (Legal->getRuntimePointerChecking()->Need && TTI.hasBranchDivergence()) {
// TODO: It may by useful to do since it's still likely to be dynamically
@@ -5813,8 +5130,8 @@ unsigned LoopVectorizationCostModel::get
Type *ValTy = getMemInstValueType(I);
auto SE = PSE.getSE();
- unsigned Alignment = getMemInstAlignment(I);
- unsigned AS = getMemInstAddressSpace(I);
+ unsigned Alignment = getLoadStoreAlignment(I);
+ unsigned AS = getLoadStoreAddressSpace(I);
Value *Ptr = getLoadStorePointerOperand(I);
Type *PtrTy = ToVectorTy(Ptr->getType(), VF);
@@ -5852,9 +5169,9 @@ unsigned LoopVectorizationCostModel::get
unsigned VF) {
Type *ValTy = getMemInstValueType(I);
Type *VectorTy = ToVectorTy(ValTy, VF);
- unsigned Alignment = getMemInstAlignment(I);
+ unsigned Alignment = getLoadStoreAlignment(I);
Value *Ptr = getLoadStorePointerOperand(I);
- unsigned AS = getMemInstAddressSpace(I);
+ unsigned AS = getLoadStoreAddressSpace(I);
int ConsecutiveStride = Legal->isConsecutivePtr(Ptr);
assert((ConsecutiveStride == 1 || ConsecutiveStride == -1) &&
@@ -5888,7 +5205,7 @@ unsigned LoopVectorizationCostModel::get
unsigned VF) {
Type *ValTy = getMemInstValueType(I);
Type *VectorTy = ToVectorTy(ValTy, VF);
- unsigned Alignment = getMemInstAlignment(I);
+ unsigned Alignment = getLoadStoreAlignment(I);
Value *Ptr = getLoadStorePointerOperand(I);
return TTI.getAddressComputationCost(VectorTy) +
@@ -5900,7 +5217,7 @@ unsigned LoopVectorizationCostModel::get
unsigned VF) {
Type *ValTy = getMemInstValueType(I);
Type *VectorTy = ToVectorTy(ValTy, VF);
- unsigned AS = getMemInstAddressSpace(I);
+ unsigned AS = getLoadStoreAddressSpace(I);
auto Group = getInterleavedAccessGroup(I);
assert(Group && "Fail to get an interleaved access group.");
@@ -5934,8 +5251,8 @@ unsigned LoopVectorizationCostModel::get
// moment.
if (VF == 1) {
Type *ValTy = getMemInstValueType(I);
- unsigned Alignment = getMemInstAlignment(I);
- unsigned AS = getMemInstAddressSpace(I);
+ unsigned Alignment = getLoadStoreAlignment(I);
+ unsigned AS = getLoadStoreAddressSpace(I);
return TTI.getAddressComputationCost(ValTy) +
TTI.getMemoryOpCost(I->getOpcode(), ValTy, Alignment, AS, I);
Modified: llvm/trunk/test/Transforms/LoopVectorize/SystemZ/mem-interleaving-costs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/SystemZ/mem-interleaving-costs.ll?rev=342027&r1=342026&r2=342027&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/SystemZ/mem-interleaving-costs.ll (original)
+++ llvm/trunk/test/Transforms/LoopVectorize/SystemZ/mem-interleaving-costs.ll Wed Sep 12 01:01:57 2018
@@ -1,6 +1,6 @@
; REQUIRES: asserts
; RUN: opt -mtriple=s390x-unknown-linux -mcpu=z13 -loop-vectorize \
-; RUN: -force-vector-width=4 -debug-only=loop-vectorize \
+; RUN: -force-vector-width=4 -debug-only=loop-vectorize,vectorutils \
; RUN: -disable-output < %s 2>&1 | FileCheck %s
;
; Check that the loop vectorizer performs memory interleaving with accurate
More information about the llvm-commits
mailing list