[llvm] r214859 - Teach the SLP Vectorizer that keeping some values live over a callsite can have a cost.
James Molloy
james.molloy at arm.com
Tue Aug 5 05:30:34 PDT 2014
Author: jamesm
Date: Tue Aug 5 07:30:34 2014
New Revision: 214859
URL: http://llvm.org/viewvc/llvm-project?rev=214859&view=rev
Log:
Teach the SLP Vectorizer that keeping some values live over a callsite can have a cost.
Some types, such as 128-bit vector types on AArch64, don't have any callee-saved registers. So if a value needs to stay live over a callsite, it must be spilled and refilled. This cost is now taken into account.
Added:
llvm/trunk/test/Transforms/SLPVectorizer/AArch64/load-store-q.ll
Modified:
llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h
llvm/trunk/lib/Analysis/TargetTransformInfo.cpp
llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
Modified: llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h?rev=214859&r1=214858&r2=214859&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h (original)
+++ llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h Tue Aug 5 07:30:34 2014
@@ -416,6 +416,13 @@ public:
virtual unsigned getAddressComputationCost(Type *Ty,
bool IsComplex = false) const;
+ /// \returns The cost, if any, of keeping values of the given types alive
+ /// over a callsite.
+ ///
+ /// Some types may require the use of register classes that do not have
+ /// any callee-saved registers, so would require a spill and fill.
+ virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys) const;
+
/// @}
/// Analysis group identification.
Modified: llvm/trunk/lib/Analysis/TargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/TargetTransformInfo.cpp?rev=214859&r1=214858&r2=214859&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/TargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Analysis/TargetTransformInfo.cpp Tue Aug 5 07:30:34 2014
@@ -230,6 +230,11 @@ unsigned TargetTransformInfo::getReducti
return PrevTTI->getReductionCost(Opcode, Ty, IsPairwise);
}
+unsigned TargetTransformInfo::getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys)
+ const {
+ return PrevTTI->getCostOfKeepingLiveOverCall(Tys);
+}
+
namespace {
struct NoTTI final : ImmutablePass, TargetTransformInfo {
@@ -613,6 +618,11 @@ struct NoTTI final : ImmutablePass, Targ
unsigned getReductionCost(unsigned, Type *, bool) const override {
return 1;
}
+
+ unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys) const override {
+ return 0;
+ }
+
};
} // end anonymous namespace
Modified: llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp?rev=214859&r1=214858&r2=214859&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp Tue Aug 5 07:30:34 2014
@@ -124,6 +124,9 @@ public:
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace) const override;
+
+ unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys) const override;
+
/// @}
};
@@ -498,3 +501,15 @@ unsigned AArch64TTI::getMemoryOpCost(uns
return LT.first;
}
+
+unsigned AArch64TTI::getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys) const {
+ unsigned Cost = 0;
+ for (auto *I : Tys) {
+ if (!I->isVectorTy())
+ continue;
+ if (I->getScalarSizeInBits() * I->getVectorNumElements() == 128)
+ Cost += getMemoryOpCost(Instruction::Store, I, 128, 0) +
+ getMemoryOpCost(Instruction::Load, I, 128, 0);
+ }
+ return Cost;
+}
Modified: llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp?rev=214859&r1=214858&r2=214859&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp Tue Aug 5 07:30:34 2014
@@ -361,6 +361,10 @@ public:
/// Returns the vectorized root.
Value *vectorizeTree();
+ /// \returns the cost incurred by unwanted spills and fills, caused by
+ /// holding live values over call sites.
+ int getSpillCost();
+
/// \returns the vectorization cost of the subtree that starts at \p VL.
/// A negative number means that this is profitable.
int getTreeCost();
@@ -1543,6 +1547,68 @@ bool BoUpSLP::isFullyVectorizableTinyTre
return true;
}
+int BoUpSLP::getSpillCost() {
+ // Walk from the bottom of the tree to the top, tracking which values are
+ // live. When we see a call instruction that is not part of our tree,
+ // query TTI to see if there is a cost to keeping values live over it
+ // (for example, if spills and fills are required).
+ unsigned BundleWidth = VectorizableTree.front().Scalars.size();
+ int Cost = 0;
+
+ SmallPtrSet<Instruction*, 4> LiveValues;
+ Instruction *PrevInst = nullptr;
+
+ for (unsigned N = 0; N < VectorizableTree.size(); ++N) {
+ Instruction *Inst = dyn_cast<Instruction>(VectorizableTree[N].Scalars[0]);
+ if (!Inst)
+ continue;
+
+ if (!PrevInst) {
+ PrevInst = Inst;
+ continue;
+ }
+
+ DEBUG(
+ dbgs() << "SLP: #LV: " << LiveValues.size();
+ for (auto *X : LiveValues)
+ dbgs() << " " << X->getName();
+ dbgs() << ", Looking at ";
+ Inst->dump();
+ );
+
+ // Update LiveValues.
+ LiveValues.erase(PrevInst);
+ for (auto &J : PrevInst->operands()) {
+ if (isa<Instruction>(&*J) && ScalarToTreeEntry.count(&*J))
+ LiveValues.insert(cast<Instruction>(&*J));
+ }
+
+ // Now find the sequence of instructions between PrevInst and Inst.
+ BasicBlock::reverse_iterator InstIt(Inst), PrevInstIt(PrevInst);
+ --PrevInstIt;
+ while (InstIt != PrevInstIt) {
+ if (PrevInstIt == PrevInst->getParent()->rend()) {
+ PrevInstIt = Inst->getParent()->rbegin();
+ continue;
+ }
+
+ if (isa<CallInst>(&*PrevInstIt) && &*PrevInstIt != PrevInst) {
+ SmallVector<Type*, 4> V;
+ for (auto *II : LiveValues)
+ V.push_back(VectorType::get(II->getType(), BundleWidth));
+ Cost += TTI->getCostOfKeepingLiveOverCall(V);
+ }
+
+ ++PrevInstIt;
+ }
+
+ PrevInst = Inst;
+ }
+
+ DEBUG(dbgs() << "SLP: SpillCost=" << Cost << "\n");
+ return Cost;
+}
+
int BoUpSLP::getTreeCost() {
int Cost = 0;
DEBUG(dbgs() << "SLP: Calculating cost for tree of size " <<
@@ -1578,6 +1644,8 @@ int BoUpSLP::getTreeCost() {
I->Lane);
}
+ Cost += getSpillCost();
+
DEBUG(dbgs() << "SLP: Total Cost " << Cost + ExtractCost<< ".\n");
return Cost + ExtractCost;
}
Added: llvm/trunk/test/Transforms/SLPVectorizer/AArch64/load-store-q.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/AArch64/load-store-q.ll?rev=214859&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SLPVectorizer/AArch64/load-store-q.ll (added)
+++ llvm/trunk/test/Transforms/SLPVectorizer/AArch64/load-store-q.ll Tue Aug 5 07:30:34 2014
@@ -0,0 +1,46 @@
+; RUN: opt -S -basicaa -slp-vectorizer < %s | FileCheck %s
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-ios5.0.0"
+
+; Holding a value live over a call boundary may require
+; spills and fills. This is the case for <2 x double>,
+; as it occupies a Q register of which there are no
+; callee-saves.
+
+; CHECK: load double
+; CHECK: load double
+; CHECK: call void @g
+; CHECK: store double
+; CHECK: store double
+define void @f(double* %p, double* %q) {
+ %addr2 = getelementptr double* %q, i32 1
+ %addr = getelementptr double* %p, i32 1
+ %x = load double* %p
+ %y = load double* %addr
+ call void @g()
+ store double %x, double* %q
+ store double %y, double* %addr2
+ ret void
+}
+declare void @g()
+
+; Check we deal with loops correctly.
+;
+; CHECK: store <2 x double>
+; CHECK: load <2 x double>
+define void @f2(double* %p, double* %q) {
+entry:
+ br label %loop
+
+loop:
+ %p1 = phi double [0.0, %entry], [%x, %loop]
+ %p2 = phi double [0.0, %entry], [%y, %loop]
+ %addr2 = getelementptr double* %q, i32 1
+ %addr = getelementptr double* %p, i32 1
+ store double %p1, double* %q
+ store double %p2, double* %addr2
+
+ %x = load double* %p
+ %y = load double* %addr
+ br label %loop
+}
More information about the llvm-commits
mailing list