[llvm-commits] [llvm] r166642 - in /llvm/trunk: include/llvm/Target/TargetLowering.h include/llvm/Target/TargetTransformImpl.h lib/Target/TargetTransformImpl.cpp lib/Transforms/IPO/PassManagerBuilder.cpp lib/Transforms/Vectorize/LoopVectorize.cpp test/Transforms/LoopVectorize/cost-model.ll
Hal Finkel
hfinkel at anl.gov
Wed Oct 24 20:27:59 PDT 2012
----- Original Message -----
> From: "Nadav Rotem" <nrotem at apple.com>
> To: llvm-commits at cs.uiuc.edu
> Sent: Wednesday, October 24, 2012 6:47:38 PM
> Subject: [llvm-commits] [llvm] r166642 - in /llvm/trunk: include/llvm/Target/TargetLowering.h
> include/llvm/Target/TargetTransformImpl.h lib/Target/TargetTransformImpl.cpp
> lib/Transforms/IPO/PassManagerBuilder.cpp lib/Transforms/Vectorize/LoopVectorize.cpp
> test/Transforms/LoopVectorize/cost-model.ll
>
> Author: nadav
> Date: Wed Oct 24 18:47:38 2012
> New Revision: 166642
>
> URL: http://llvm.org/viewvc/llvm-project?rev=166642&view=rev
> Log:
> Implement a basic cost model for vector and scalar instructions.
>
> Modified:
> llvm/trunk/include/llvm/Target/TargetLowering.h
> llvm/trunk/include/llvm/Target/TargetTransformImpl.h
> llvm/trunk/lib/Target/TargetTransformImpl.cpp
> llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp
> llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
> llvm/trunk/test/Transforms/LoopVectorize/cost-model.ll
>
> Modified: llvm/trunk/include/llvm/Target/TargetLowering.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetLowering.h?rev=166642&r1=166641&r2=166642&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/Target/TargetLowering.h (original)
> +++ llvm/trunk/include/llvm/Target/TargetLowering.h Wed Oct 24
> 18:47:38 2012
> @@ -1958,6 +1958,7 @@
>
> ValueTypeActionImpl ValueTypeActions;
>
> +public:
> LegalizeKind
> getTypeConversion(LLVMContext &Context, EVT VT) const {
> // If this is a simple type, use the ComputeRegisterProp
> mechanism.
> @@ -2074,6 +2075,7 @@
> return LegalizeKind(TypeSplitVector, NVT);
> }
>
> +private:
> std::vector<std::pair<EVT, const TargetRegisterClass*> >
> AvailableRegClasses;
>
> /// TargetDAGCombineArray - Targets can specify ISD nodes that
> they would
>
> Modified: llvm/trunk/include/llvm/Target/TargetTransformImpl.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetTransformImpl.h?rev=166642&r1=166641&r2=166642&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/Target/TargetTransformImpl.h (original)
> +++ llvm/trunk/include/llvm/Target/TargetTransformImpl.h Wed Oct 24
> 18:47:38 2012
> @@ -16,6 +16,7 @@
> #define LLVM_TARGET_TARGET_TRANSFORMATION_IMPL_H
>
> #include "llvm/TargetTransformInfo.h"
> +#include "llvm/CodeGen/ValueTypes.h"
>
> namespace llvm {
>
> @@ -51,6 +52,10 @@
> private:
> const TargetLowering *TLI;
>
> + /// Estimate the cost of type-legalization and the legalized type.
> + std::pair<unsigned, EVT>
> + getTypeLegalizationCost(LLVMContext &C, EVT Ty) const;
> +
> public:
> explicit VectorTargetTransformImpl(const TargetLowering *TL) :
> TLI(TL) {}
>
>
> Modified: llvm/trunk/lib/Target/TargetTransformImpl.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/TargetTransformImpl.cpp?rev=166642&r1=166641&r2=166642&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/TargetTransformImpl.cpp (original)
> +++ llvm/trunk/lib/Target/TargetTransformImpl.cpp Wed Oct 24 18:47:38
> 2012
> @@ -9,6 +9,7 @@
>
> #include "llvm/Target/TargetTransformImpl.h"
> #include "llvm/Target/TargetLowering.h"
> +#include <utility>
>
> using namespace llvm;
>
> @@ -53,11 +54,131 @@
> // Calls used by the vectorizers.
> //
> //===----------------------------------------------------------------------===//
> +int InstructionOpcodeToISD(unsigned Opcode) {
> + static const int OpToISDTbl[] = {
> + /*Instruction::Ret */ 0, // Opcode numbering start at
> #1.
> + /*Instruction::Br */ 0,
> + /*Instruction::Switch */ 0,
> + /*Instruction::IndirectBr */ 0,
> + /*Instruction::Invoke */ 0,
> + /*Instruction::Resume */ 0,
> + /*Instruction::Unreachable */ 0,
> + /*Instruction::Add */ ISD::ADD,
> + /*Instruction::FAdd */ ISD::FADD,
> + /*Instruction::Sub */ ISD::SUB,
> + /*Instruction::FSub */ ISD::FSUB,
> + /*Instruction::Mul */ ISD::MUL,
> + /*Instruction::FMul */ ISD::FMUL,
> + /*Instruction::UDiv */ ISD::UDIV,
> + /*Instruction::SDiv */ ISD::UDIV,
> + /*Instruction::FDiv */ ISD::FDIV,
> + /*Instruction::URem */ ISD::UREM,
> + /*Instruction::SRem */ ISD::SREM,
> + /*Instruction::FRem */ ISD::FREM,
> + /*Instruction::Shl */ ISD::SHL,
> + /*Instruction::LShr */ ISD::SRL,
> + /*Instruction::AShr */ ISD::SRA,
> + /*Instruction::And */ ISD::AND,
> + /*Instruction::Or */ ISD::OR,
> + /*Instruction::Xor */ ISD::XOR,
> + /*Instruction::Alloca */ 0,
> + /*Instruction::Load */ ISD::LOAD,
> + /*Instruction::Store */ ISD::STORE,
> + /*Instruction::GetElementPtr */ 0,
This should correspond to an ADD + MUL? Given that we have a vectorized GEP, I'd think that we certainly don't want this to be zero.
> + /*Instruction::Fence */ 0,
> + /*Instruction::AtomicCmpXchg */ 0,
> + /*Instruction::AtomicRMW */ 0,
> + /*Instruction::Trunc */ ISD::TRUNCATE,
> + /*Instruction::ZExt */ ISD::ZERO_EXTEND,
> + /*Instruction::SExt */ ISD::SEXTLOAD,
> + /*Instruction::FPToUI */ ISD::FP_TO_UINT,
> + /*Instruction::FPToSI */ ISD::FP_TO_SINT,
> + /*Instruction::UIToFP */ ISD::UINT_TO_FP,
> + /*Instruction::SIToFP */ ISD::SINT_TO_FP,
> + /*Instruction::FPTrunc */ ISD::FP_ROUND,
> + /*Instruction::FPExt */ ISD::FP_EXTEND,
> + /*Instruction::PtrToInt */ ISD::BITCAST,
> + /*Instruction::IntToPtr */ ISD::BITCAST,
> + /*Instruction::BitCast */ ISD::BITCAST,
> + /*Instruction::ICmp */ ISD::SETCC,
> + /*Instruction::FCmp */ ISD::SETCC,
> + /*Instruction::PHI */ 0,
> + /*Instruction::Call */ 0,
> + /*Instruction::Select */ ISD::SELECT,
> + /*Instruction::UserOp1 */ 0,
> + /*Instruction::UserOp2 */ 0,
> + /*Instruction::VAArg */ 0,
> + /*Instruction::ExtractElement*/ ISD::EXTRACT_VECTOR_ELT,
> + /*Instruction::InsertElement */ ISD::INSERT_VECTOR_ELT,
> + /*Instruction::ShuffleVector */ ISD::VECTOR_SHUFFLE,
> + /*Instruction::ExtractValue */ ISD::MERGE_VALUES,
> + /*Instruction::InsertValue */ ISD::MERGE_VALUES,
> + /*Instruction::LandingPad */ 0};
> +
> + assert((Instruction::Ret == 1) && (Instruction::LandingPad == 58)
> &&
> + "Instruction order had changed");
If only we had a static_assert ;)
-Hal
> +
> + // Opcode numbering starts at #1 but the table starts at #0, so we
> subtract
> + // one from the opcode number.
> + return OpToISDTbl[Opcode - 1];
> +}
> +
> +std::pair<unsigned, EVT>
> +VectorTargetTransformImpl::getTypeLegalizationCost(LLVMContext &C,
> + EVT Ty)
> const {
> + unsigned Cost = 1;
> + // We keep legalizing the type until we find a legal kind. We
> assume that
> + // the only operation that costs anything is the split. After
> splitting
> + // we need to handle two types.
> + while (true) {
> + TargetLowering::LegalizeKind LK = TLI->getTypeConversion(C, Ty);
> +
> + if (LK.first == TargetLowering::TypeLegal)
> + return std::make_pair(Cost, LK.second);
> +
> + if (LK.first == TargetLowering::TypeSplitVector)
> + Cost *= 2;
> +
> + // Keep legalizing the type.
> + Ty = LK.second;
> + }
> +}
>
> unsigned
> VectorTargetTransformImpl::getInstrCost(unsigned Opcode, Type *Ty1,
> Type *Ty2) const {
> - return 1;
> + // Check if any of the operands are vector operands.
> + int ISD = InstructionOpcodeToISD(Opcode);
> +
> + // Selects on vectors are actually vector selects.
> + if (ISD == ISD::SELECT) {
> + assert(Ty2 && "Ty2 must hold the select type");
> + if (Ty2->isVectorTy())
> + ISD = ISD::VSELECT;
> + }
> +
> + // If we don't have any information about this instruction assume
> it costs 1.
> + if (ISD == 0)
> + return 1;
> +
> + assert(Ty1 && "We need to have at least one type");
> +
> + // From this stage we look at the legalized type.
> + std::pair<unsigned, EVT> LT =
> + getTypeLegalizationCost(Ty1->getContext(),
> TLI->getValueType(Ty1));
> +
> + if (TLI->isOperationLegalOrCustom(ISD, LT.second)) {
> + // The operation is legal. Assume it costs 1. Multiply
> + // by the type-legalization overhead.
> + return LT.first * 1;
> + }
> +
> + unsigned NumElem =
> + (LT.second.isVector() ? LT.second.getVectorNumElements() : 1);
> +
> + // We will probably scalarize this instruction. Assume that the
> cost is the
> + // number of the vector elements.
> + return LT.first * NumElem * 1;
> }
>
> unsigned
> @@ -69,5 +190,9 @@
> VectorTargetTransformImpl::getMemoryOpCost(unsigned Opcode, Type
> *Src,
> unsigned Alignment,
> unsigned AddressSpace)
> const {
> - return 1;
> + // From this stage we look at the legalized type.
> + std::pair<unsigned, EVT> LT =
> + getTypeLegalizationCost(Src->getContext(),
> TLI->getValueType(Src));
> + // Assume that all loads of legal types cost 1.
> + return LT.first;
> }
>
> Modified: llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp?rev=166642&r1=166641&r2=166642&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp (original)
> +++ llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp Wed Oct 24
> 18:47:38 2012
> @@ -185,7 +185,7 @@
> MPM.add(createLoopIdiomPass()); // Recognize idioms
> like memset.
> MPM.add(createLoopDeletionPass()); // Delete dead loops
>
> - if (Vectorize) {
> + if (Vectorize || true) {
> MPM.add(createLoopVectorizePass());
> MPM.add(createLICMPass());
> }
>
> Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=166642&r1=166641&r2=166642&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
> +++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Wed Oct 24
> 18:47:38 2012
> @@ -300,10 +300,10 @@
> class LoopVectorizationCostModel {
> public:
> /// C'tor.
> - LoopVectorizationCostModel(Loop *Lp, ScalarEvolution *Se,
> DataLayout *Dl,
> + LoopVectorizationCostModel(Loop *Lp, ScalarEvolution *Se,
> LoopVectorizationLegality *Leg,
> const VectorTargetTransformInfo *Vtti):
> - TheLoop(Lp), SE(Se), DL(Dl), Legal(Leg), VTTI(Vtti) { }
> + TheLoop(Lp), SE(Se), Legal(Leg), VTTI(Vtti) { }
>
> /// Returns the most profitable vectorization factor for the loop
> that is
> /// smaller or equal to the VF argument. This method checks every
> power
> @@ -325,8 +325,7 @@
> Loop *TheLoop;
> /// Scev analysis.
> ScalarEvolution *SE;
> - /// DataLayout analysis.
> - DataLayout *DL;
> +
> /// Vectorization legality.
> LoopVectorizationLegality *Legal;
> /// Vector target information.
> @@ -372,7 +371,7 @@
> if (TTI)
> VTTI = TTI->getVectorTargetTransformInfo();
> // Use the cost model.
> - LoopVectorizationCostModel CM(L, SE, DL, &LVL, VTTI);
> + LoopVectorizationCostModel CM(L, SE, &LVL, VTTI);
> VF = CM.findBestVectorizationFactor();
>
> if (VF == 1) {
> @@ -1432,11 +1431,12 @@
> // For each instruction in the old loop.
> for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it !=
> e; ++it) {
> Instruction *Inst = it;
> - Cost += getInstructionCost(Inst, VF);
> + unsigned C = getInstructionCost(Inst, VF);
> + Cost += C;
> + DEBUG(dbgs() << "LV: Found an estimated cost of "<< C <<" for VF
> "<< VF <<
> + " For instruction: "<< *Inst << "\n");
> }
>
> - // Return the cost divided by VF, because we will be executing
> - // less iterations of the vector form.
> return Cost;
> }
>
> @@ -1444,11 +1444,13 @@
> LoopVectorizationCostModel::getInstructionCost(Instruction *I,
> unsigned VF) {
> assert(VTTI && "Invalid vector target transformation info");
> switch (I->getOpcode()) {
> + case Instruction::GetElementPtr:
> + return 0;
> case Instruction::Br: {
> return VTTI->getInstrCost(I->getOpcode());
> }
> case Instruction::PHI:
> - // PHIs are handled the same as the binary instructions below.
> + return 0;
> case Instruction::Add:
> case Instruction::FAdd:
> case Instruction::Sub:
> @@ -1493,11 +1495,17 @@
> // Scalarized stores.
> if (!Legal->isConsecutiveGep(SI->getPointerOperand())) {
> unsigned Cost = 0;
> - unsigned ExtCost =
> VTTI->getInstrCost(Instruction::ExtractElement, VTy);
> - // The cost of extracting from the vector value.
> - Cost += VF * ExtCost;
> + if (VF != 1) {
> + unsigned ExtCost =
> VTTI->getInstrCost(Instruction::ExtractElement,
> + VTy);
> + // The cost of extracting from the value vector and
> pointer vector.
> + Cost += VF * (ExtCost * 2);
> + }
> // The cost of the scalar stores.
> - Cost += VF * VTTI->getInstrCost(I->getOpcode(),
> VTy->getScalarType());
> + Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(),
> + VTy->getScalarType(),
> + SI->getAlignment(),
> +
> SI->getPointerAddressSpace());
> return Cost;
> }
>
> @@ -1512,11 +1520,18 @@
> // Scalarized loads.
> if (!Legal->isConsecutiveGep(LI->getPointerOperand())) {
> unsigned Cost = 0;
> - unsigned InCost =
> VTTI->getInstrCost(Instruction::InsertElement, VTy);
> - // The cost of inserting the loaded value into the result
> vector.
> - Cost += VF * InCost;
> + if (VF != 1) {
> + unsigned InCost =
> VTTI->getInstrCost(Instruction::InsertElement, VTy);
> + unsigned ExCost =
> VTTI->getInstrCost(Instruction::ExtractValue, VTy);
> +
> + // The cost of inserting the loaded value into the result
> vector, and
> + // extracting from a vector of pointers.
> + Cost += VF * (InCost + ExCost);
> + }
> // The cost of the scalar stores.
> - Cost += VF * VTTI->getInstrCost(I->getOpcode(),
> VTy->getScalarType());
> + Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(),
> VTy->getScalarType(),
> + LI->getAlignment(),
> +
> LI->getPointerAddressSpace());
> return Cost;
> }
>
>
> Modified: llvm/trunk/test/Transforms/LoopVectorize/cost-model.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/cost-model.ll?rev=166642&r1=166641&r2=166642&view=diff
> ==============================================================================
> --- llvm/trunk/test/Transforms/LoopVectorize/cost-model.ll (original)
> +++ llvm/trunk/test/Transforms/LoopVectorize/cost-model.ll Wed Oct 24
> 18:47:38 2012
> @@ -8,10 +8,8 @@
> @d = common global [2048 x i32] zeroinitializer, align 16
> @a = common global [2048 x i32] zeroinitializer, align 16
>
> -; At this point the cost model is pretty bad and we are vectorizing
> the code below.
> -; TODO: This code should not be vectorized on x86.
> ;CHECK: cost_model_1
> -;CHECK: <4 x i32>
> +;CHECK-NOT: <4 x i32>
> ;CHECK: ret void
> define void @cost_model_1() nounwind uwtable noinline ssp {
> entry:
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
--
Hal Finkel
Postdoctoral Appointee
Leadership Computing Facility
Argonne National Laboratory
More information about the llvm-commits
mailing list