[llvm] r174422 - Loop Vectorizer: Refactor code to compute vectorized memory instruction cost
Arnold Schwaighofer
aschwaighofer at apple.com
Tue Feb 5 10:46:42 PST 2013
Author: arnolds
Date: Tue Feb 5 12:46:41 2013
New Revision: 174422
URL: http://llvm.org/viewvc/llvm-project?rev=174422&view=rev
Log:
Loop Vectorizer: Refactor code to compute vectorized memory instruction cost
Introduce a helper class that computes the cost of memory access instructions.
No functionality change intended.
Modified:
llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=174422&r1=174421&r2=174422&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Tue Feb 5 12:46:41 2013
@@ -560,6 +560,11 @@ public:
/// \return information about the register usage of the loop.
RegisterUsage calculateRegisterUsage();
+ /// A helper function for converting Scalar types to vector types.
+ /// If the incoming type is void, we return void. If the VF is 1, we return
+ /// the scalar type.
+ static Type* ToVectorTy(Type *Scalar, unsigned VF);
+
private:
/// Returns the expected execution cost. The unit of the cost does
/// not matter because we use the 'cost' units to compare different
@@ -571,11 +576,6 @@ private:
/// width. Vector width of one means scalar.
unsigned getInstructionCost(Instruction *I, unsigned VF);
- /// A helper function for converting Scalar types to vector types.
- /// If the incoming type is void, we return void. If the VF is 1, we return
- /// the scalar type.
- static Type* ToVectorTy(Type *Scalar, unsigned VF);
-
/// Returns whether the instruction is a load or store and will be a emitted
/// as a vector operation.
bool isConsecutiveLoadOrStore(Instruction *I);
@@ -594,6 +594,177 @@ private:
DataLayout *DL;
};
+/// A helper class to compute the cost of a memory operation (load or store).
+class MemoryCostComputation {
+public:
+ /// \brief This function computes the cost of a memory instruction, either of
+ /// a load or of a store.
+ /// \param Inst a pointer to a LoadInst or a StoreInst.
+ /// \param VF the vector factor to use.
+ /// \param TTI the target transform information used to obtain costs.
+ /// \param Legality the legality class used by this function to obtain the
+ /// access strid of the memory operation.
+ /// \returns the estimated cost of the memory instruction.
+ static unsigned computeCost(Value *Inst, unsigned VF,
+ const TargetTransformInfo &TTI,
+ LoopVectorizationLegality *Legality) {
+ if (StoreInst *Store = dyn_cast<StoreInst>(Inst))
+ return StoreCost(Store, VF, TTI, Legality).cost();
+
+ return LoadCost(cast<LoadInst>(Inst), VF, TTI, Legality).cost();
+ }
+
+private:
+ /// An helper class to compute the cost of vectorize memory instruction. It is
+ /// subclassed by load and store cost computation classes who fill the fields
+ /// with values that require knowing about the concrete Load/StoreInst class.
+ class MemoryOpCost {
+ public:
+ /// \return the cost of vectorizing the memory access instruction.
+ unsigned cost() {
+ if (VectorFactor == 1)
+ return TTI.getMemoryOpCost(Opcode, VectorTy, Alignment, AddressSpace);
+
+ if ((Stride = Legality->isConsecutivePtr(PointerOperand)))
+ return costOfWideMemInst();
+
+ return costOfScalarizedMemInst();
+ }
+
+ protected:
+ /// The pointer operand of the memory instruction.
+ Value *PointerOperand;
+ /// The scalar type of the memory access.
+ Type *ScalarTy;
+ /// The vector type of the memory access.
+ Type *VectorTy;
+ /// The vector factor by which we vectorize.
+ unsigned VectorFactor;
+ /// The stride of the memory access.
+ int Stride;
+ /// The alignment of the memory operation.
+ unsigned Alignment;
+ /// The address space of the memory operation.
+ unsigned AddressSpace;
+ /// The opcode of the memory instruction.
+ unsigned Opcode;
+ /// Are we looking at a load or store instruction.
+ bool IsLoadInst;
+ const TargetTransformInfo &TTI;
+ LoopVectorizationLegality *Legality;
+
+ /// Constructs a helper class to compute the cost of a memory instruction.
+ /// \param VF the vector factor (the length of the vector).
+ /// \param TI the target transform information used by this class to obtain
+ /// costs.
+ /// \param L the legality class used by this class to obtain the access
+ /// stride of the memory operation.
+ MemoryOpCost(unsigned VF, const TargetTransformInfo &TI,
+ LoopVectorizationLegality *L) :
+ VectorFactor(VF), TTI(TI), Legality(L) {
+ }
+
+ private:
+ /// \return the cost if the memory instruction is scalarized.
+ unsigned costOfScalarizedMemInst() {
+ unsigned Cost = 0;
+ Cost += costOfExtractFromPointerVector();
+ Cost += costOfExtractFromValueVector();
+ Cost += VectorFactor * TTI.getMemoryOpCost(Opcode, ScalarTy, Alignment,
+ AddressSpace);
+ Cost += costOfInsertIntoValueVector();
+ return Cost;
+ }
+
+ /// \return the cost of extracting the pointers out of the pointer vector.
+ unsigned costOfExtractFromPointerVector() {
+ Type *PtrTy = getVectorizedPointerOperandType();
+ return costOfVectorInstForAllElems(Instruction::ExtractElement, PtrTy);
+ }
+
+ /// \return the cost for extracting values out of the value vector if the
+ /// memory instruction is a store and zero otherwise.
+ unsigned costOfExtractFromValueVector() {
+ if (IsLoadInst)
+ return 0;
+
+ return costOfVectorInstForAllElems(Instruction::ExtractElement, VectorTy);
+ }
+
+ /// \return the cost of insert values into the value vector if the memory
+ /// instruction was a load and zero otherwise.
+ unsigned costOfInsertIntoValueVector() {
+ if (!IsLoadInst)
+ return 0;
+
+ return costOfVectorInstForAllElems(Instruction::InsertElement, VectorTy);
+ }
+
+ /// \return the cost of a vector memory instruction.
+ unsigned costOfWideMemInst() {
+ unsigned Cost = TTI.getMemoryOpCost(Opcode, VectorTy, Alignment,
+ AddressSpace);
+ // Reverse stride.
+ if (Stride < 0)
+ Cost += TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy,
+ 0);
+ return Cost;
+ }
+
+ /// Helper function to compute the cost of one insert- or extractelement
+ /// instruction per vector element.
+ /// \param VecOpcode the vector instruction opcode (Can be either
+ /// InsertElement or an ExtractElement).
+ /// \param Ty the vector type the vector instruction operates on.
+ /// \return the cost of an vector instruction applied to each vector
+ /// element.
+ unsigned costOfVectorInstForAllElems(unsigned VecOpcode, Type *Ty) {
+ unsigned Cost = 0;
+ for (unsigned i = 0; i < VectorFactor; ++i)
+ Cost += TTI.getVectorInstrCost(VecOpcode, Ty, i);
+ return Cost;
+ }
+
+ /// \return a vectorized type for the pointer operand.
+ Type * getVectorizedPointerOperandType() {
+ Type *PointerOpTy = PointerOperand->getType();
+ return LoopVectorizationCostModel::ToVectorTy(PointerOpTy, VectorFactor);
+ }
+ };
+
+ /// Implementation of the abstract memory cost base class. Sets field of base
+ /// class whose value depends on the LoadInst.
+ class LoadCost : public MemoryOpCost {
+ public:
+ LoadCost(LoadInst *Load, unsigned VF, const TargetTransformInfo &TI,
+ LoopVectorizationLegality *L) : MemoryOpCost(VF, TI, L) {
+ PointerOperand = Load->getPointerOperand();
+ ScalarTy = Load->getType();
+ VectorTy = LoopVectorizationCostModel::ToVectorTy(ScalarTy, VF);
+ Alignment = Load->getAlignment();
+ AddressSpace = Load->getPointerAddressSpace();
+ Opcode = Load->getOpcode();
+ IsLoadInst = true;
+ }
+ };
+
+ /// Implementation of the abstract memory cost base class. Sets field of base
+ /// class whose value depends on the StoreInst.
+ class StoreCost : public MemoryOpCost {
+ public:
+ StoreCost(StoreInst *Store, unsigned VF, const TargetTransformInfo &TI,
+ LoopVectorizationLegality *L) : MemoryOpCost(VF, TI, L) {
+ PointerOperand = Store->getPointerOperand();
+ ScalarTy = Store->getValueOperand()->getType();
+ VectorTy = LoopVectorizationCostModel::ToVectorTy(ScalarTy, VF);
+ Alignment = Store->getAlignment();
+ AddressSpace = Store->getPointerAddressSpace();
+ Opcode = Store->getOpcode();
+ IsLoadInst = false;
+ }
+ };
+};
+
/// The LoopVectorize Pass.
struct LoopVectorize : public LoopPass {
/// Pass identification, replacement for typeid
@@ -3097,83 +3268,11 @@ LoopVectorizationCostModel::getInstructi
VectorTy = ToVectorTy(ValTy, VF);
return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy);
}
+ case Instruction::Load:
case Instruction::Store: {
- StoreInst *SI = cast<StoreInst>(I);
- Type *ValTy = SI->getValueOperand()->getType();
- VectorTy = ToVectorTy(ValTy, VF);
-
- if (VF == 1)
- return TTI.getMemoryOpCost(I->getOpcode(), VectorTy,
- SI->getAlignment(),
- SI->getPointerAddressSpace());
-
- // Scalarized stores.
- int Stride = Legal->isConsecutivePtr(SI->getPointerOperand());
- bool Reverse = Stride < 0;
- if (0 == Stride) {
- unsigned Cost = 0;
-
- // The cost of extracting from the value vector and pointer vector.
- Type *PtrTy = ToVectorTy(I->getOperand(0)->getType(), VF);
- for (unsigned i = 0; i < VF; ++i) {
- Cost += TTI.getVectorInstrCost(Instruction::ExtractElement, VectorTy,
- i);
- Cost += TTI.getVectorInstrCost(Instruction::ExtractElement, PtrTy, i);
- }
-
- // The cost of the scalar stores.
- Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(),
- SI->getAlignment(),
- SI->getPointerAddressSpace());
- return Cost;
- }
-
- // Wide stores.
- unsigned Cost = TTI.getMemoryOpCost(I->getOpcode(), VectorTy,
- SI->getAlignment(),
- SI->getPointerAddressSpace());
- if (Reverse)
- Cost += TTI.getShuffleCost(TargetTransformInfo::SK_Reverse,
- VectorTy, 0);
- return Cost;
- }
- case Instruction::Load: {
- LoadInst *LI = cast<LoadInst>(I);
-
- if (VF == 1)
- return TTI.getMemoryOpCost(I->getOpcode(), VectorTy, LI->getAlignment(),
- LI->getPointerAddressSpace());
-
- // Scalarized loads.
- int Stride = Legal->isConsecutivePtr(LI->getPointerOperand());
- bool Reverse = Stride < 0;
- if (0 == Stride) {
- unsigned Cost = 0;
- Type *PtrTy = ToVectorTy(I->getOperand(0)->getType(), VF);
-
- // The cost of extracting from the pointer vector.
- for (unsigned i = 0; i < VF; ++i)
- Cost += TTI.getVectorInstrCost(Instruction::ExtractElement, PtrTy, i);
-
- // The cost of inserting data to the result vector.
- for (unsigned i = 0; i < VF; ++i)
- Cost += TTI.getVectorInstrCost(Instruction::InsertElement, VectorTy, i);
-
- // The cost of the scalar stores.
- Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), RetTy->getScalarType(),
- LI->getAlignment(),
- LI->getPointerAddressSpace());
- return Cost;
- }
-
- // Wide loads.
- unsigned Cost = TTI.getMemoryOpCost(I->getOpcode(), VectorTy,
- LI->getAlignment(),
- LI->getPointerAddressSpace());
- if (Reverse)
- Cost += TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy, 0);
- return Cost;
+ return MemoryCostComputation::computeCost(I, VF, TTI, Legal);
}
+
case Instruction::ZExt:
case Instruction::SExt:
case Instruction::FPToUI:
More information about the llvm-commits
mailing list