[llvm-commits] [llvm] r166642 - in /llvm/trunk: include/llvm/Target/TargetLowering.h include/llvm/Target/TargetTransformImpl.h lib/Target/TargetTransformImpl.cpp lib/Transforms/IPO/PassManagerBuilder.cpp lib/Transforms/Vectorize/LoopVectorize.cpp test/Transforms/LoopVectorize/cost-model.ll

Hal Finkel hfinkel at anl.gov
Wed Oct 24 20:27:59 PDT 2012


----- Original Message -----
> From: "Nadav Rotem" <nrotem at apple.com>
> To: llvm-commits at cs.uiuc.edu
> Sent: Wednesday, October 24, 2012 6:47:38 PM
> Subject: [llvm-commits] [llvm] r166642 - in /llvm/trunk: include/llvm/Target/TargetLowering.h
> include/llvm/Target/TargetTransformImpl.h lib/Target/TargetTransformImpl.cpp
> lib/Transforms/IPO/PassManagerBuilder.cpp lib/Transforms/Vectorize/LoopVectorize.cpp
> test/Transforms/LoopVectorize/cost-model.ll
> 
> Author: nadav
> Date: Wed Oct 24 18:47:38 2012
> New Revision: 166642
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=166642&view=rev
> Log:
> Implement a basic cost model for vector and scalar instructions.
> 
> Modified:
>     llvm/trunk/include/llvm/Target/TargetLowering.h
>     llvm/trunk/include/llvm/Target/TargetTransformImpl.h
>     llvm/trunk/lib/Target/TargetTransformImpl.cpp
>     llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp
>     llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
>     llvm/trunk/test/Transforms/LoopVectorize/cost-model.ll
> 
> Modified: llvm/trunk/include/llvm/Target/TargetLowering.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetLowering.h?rev=166642&r1=166641&r2=166642&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/Target/TargetLowering.h (original)
> +++ llvm/trunk/include/llvm/Target/TargetLowering.h Wed Oct 24
> 18:47:38 2012
> @@ -1958,6 +1958,7 @@
>  
>    ValueTypeActionImpl ValueTypeActions;
>  
> +public:
>    LegalizeKind
>    getTypeConversion(LLVMContext &Context, EVT VT) const {
>      // If this is a simple type, use the ComputeRegisterProp
>      mechanism.
> @@ -2074,6 +2075,7 @@
>      return LegalizeKind(TypeSplitVector, NVT);
>    }
>  
> +private:
>    std::vector<std::pair<EVT, const TargetRegisterClass*> >
>    AvailableRegClasses;
>  
>    /// TargetDAGCombineArray - Targets can specify ISD nodes that
>    they would
> 
> Modified: llvm/trunk/include/llvm/Target/TargetTransformImpl.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetTransformImpl.h?rev=166642&r1=166641&r2=166642&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/Target/TargetTransformImpl.h (original)
> +++ llvm/trunk/include/llvm/Target/TargetTransformImpl.h Wed Oct 24
> 18:47:38 2012
> @@ -16,6 +16,7 @@
>  #define LLVM_TARGET_TARGET_TRANSFORMATION_IMPL_H
>  
>  #include "llvm/TargetTransformInfo.h"
> +#include "llvm/CodeGen/ValueTypes.h"
>  
>  namespace llvm {
>  
> @@ -51,6 +52,10 @@
>  private:
>    const TargetLowering *TLI;
>  
> +  /// Estimate the cost of type-legalization and the legalized type.
> +  std::pair<unsigned, EVT>
> +  getTypeLegalizationCost(LLVMContext &C, EVT Ty) const;
> +
>  public:
>    explicit VectorTargetTransformImpl(const TargetLowering *TL) :
>    TLI(TL) {}
>    
> 
> Modified: llvm/trunk/lib/Target/TargetTransformImpl.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/TargetTransformImpl.cpp?rev=166642&r1=166641&r2=166642&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/TargetTransformImpl.cpp (original)
> +++ llvm/trunk/lib/Target/TargetTransformImpl.cpp Wed Oct 24 18:47:38
> 2012
> @@ -9,6 +9,7 @@
>  
>  #include "llvm/Target/TargetTransformImpl.h"
>  #include "llvm/Target/TargetLowering.h"
> +#include <utility>
>  
>  using namespace llvm;
>  
> @@ -53,11 +54,131 @@
>  // Calls used by the vectorizers.
>  //
>  //===----------------------------------------------------------------------===//
> +int InstructionOpcodeToISD(unsigned Opcode) {
> +  static const int OpToISDTbl[] = {
> +    /*Instruction::Ret           */ 0, // Opcode numbering start at
> #1.
> +    /*Instruction::Br            */ 0,
> +    /*Instruction::Switch        */ 0,
> +    /*Instruction::IndirectBr    */ 0,
> +    /*Instruction::Invoke        */ 0,
> +    /*Instruction::Resume        */ 0,
> +    /*Instruction::Unreachable   */ 0,
> +    /*Instruction::Add           */ ISD::ADD,
> +    /*Instruction::FAdd          */ ISD::FADD,
> +    /*Instruction::Sub           */ ISD::SUB,
> +    /*Instruction::FSub          */ ISD::FSUB,
> +    /*Instruction::Mul           */ ISD::MUL,
> +    /*Instruction::FMul          */ ISD::FMUL,
> +    /*Instruction::UDiv          */ ISD::UDIV,
> +    /*Instruction::SDiv          */ ISD::UDIV,
> +    /*Instruction::FDiv          */ ISD::FDIV,
> +    /*Instruction::URem          */ ISD::UREM,
> +    /*Instruction::SRem          */ ISD::SREM,
> +    /*Instruction::FRem          */ ISD::FREM,
> +    /*Instruction::Shl           */ ISD::SHL,
> +    /*Instruction::LShr          */ ISD::SRL,
> +    /*Instruction::AShr          */ ISD::SRA,
> +    /*Instruction::And           */ ISD::AND,
> +    /*Instruction::Or            */ ISD::OR,
> +    /*Instruction::Xor           */ ISD::XOR,
> +    /*Instruction::Alloca        */ 0,
> +    /*Instruction::Load          */ ISD::LOAD,
> +    /*Instruction::Store         */ ISD::STORE,
> +    /*Instruction::GetElementPtr */ 0,

This should correspond to an ADD + MUL? Given that we have a vectorized GEP, I'd think that we certainly don't want this to be zero.

> +    /*Instruction::Fence         */ 0,
> +    /*Instruction::AtomicCmpXchg */ 0,
> +    /*Instruction::AtomicRMW     */ 0,
> +    /*Instruction::Trunc         */ ISD::TRUNCATE,
> +    /*Instruction::ZExt          */ ISD::ZERO_EXTEND,
> +    /*Instruction::SExt          */ ISD::SEXTLOAD,
> +    /*Instruction::FPToUI        */ ISD::FP_TO_UINT,
> +    /*Instruction::FPToSI        */ ISD::FP_TO_SINT,
> +    /*Instruction::UIToFP        */ ISD::UINT_TO_FP,
> +    /*Instruction::SIToFP        */ ISD::SINT_TO_FP,
> +    /*Instruction::FPTrunc       */ ISD::FP_ROUND,
> +    /*Instruction::FPExt         */ ISD::FP_EXTEND,
> +    /*Instruction::PtrToInt      */ ISD::BITCAST,
> +    /*Instruction::IntToPtr      */ ISD::BITCAST,
> +    /*Instruction::BitCast       */ ISD::BITCAST,
> +    /*Instruction::ICmp          */ ISD::SETCC,
> +    /*Instruction::FCmp          */ ISD::SETCC,
> +    /*Instruction::PHI           */ 0,
> +    /*Instruction::Call          */ 0,
> +    /*Instruction::Select        */ ISD::SELECT,
> +    /*Instruction::UserOp1       */ 0,
> +    /*Instruction::UserOp2       */ 0,
> +    /*Instruction::VAArg         */ 0,
> +    /*Instruction::ExtractElement*/ ISD::EXTRACT_VECTOR_ELT,
> +    /*Instruction::InsertElement */ ISD::INSERT_VECTOR_ELT,
> +    /*Instruction::ShuffleVector */ ISD::VECTOR_SHUFFLE,
> +    /*Instruction::ExtractValue  */ ISD::MERGE_VALUES,
> +    /*Instruction::InsertValue   */ ISD::MERGE_VALUES,
> +    /*Instruction::LandingPad    */ 0};
> +
> +  assert((Instruction::Ret == 1) && (Instruction::LandingPad == 58)
> &&
> +         "Instruction order had changed");

If only we had a static_assert ;)

 -Hal

> +
> +  // Opcode numbering starts at #1 but the table starts at #0, so we
> subtract
> +  // one from the opcode number.
> +  return OpToISDTbl[Opcode - 1];
> +}
> +
> +std::pair<unsigned, EVT>
> +VectorTargetTransformImpl::getTypeLegalizationCost(LLVMContext &C,
> +                                                         EVT Ty)
> const {
> +  unsigned Cost = 1;
> +  // We keep legalizing the type until we find a legal kind. We
> assume that
> +  // the only operation that costs anything is the split. After
> splitting
> +  // we need to handle two types.
> +  while (true) {
> +    TargetLowering::LegalizeKind LK = TLI->getTypeConversion(C, Ty);
> +
> +    if (LK.first == TargetLowering::TypeLegal)
> +      return std::make_pair(Cost, LK.second);
> +
> +    if (LK.first == TargetLowering::TypeSplitVector)
> +      Cost *= 2;
> +
> +    // Keep legalizing the type.
> +    Ty = LK.second;
> +  }
> +}
>  
>  unsigned
>  VectorTargetTransformImpl::getInstrCost(unsigned Opcode, Type *Ty1,
>                                          Type *Ty2) const {
> -  return 1;
> +  // Check if any of the operands are vector operands.
> +  int ISD = InstructionOpcodeToISD(Opcode);
> +
> +  // Selects on vectors are actually vector selects.
> +  if (ISD == ISD::SELECT) {
> +    assert(Ty2 && "Ty2 must hold the select type");
> +    if (Ty2->isVectorTy())
> +    ISD = ISD::VSELECT;
> +  }
> +
> +  // If we don't have any information about this instruction assume
> it costs 1.
> +  if (ISD == 0)
> +    return 1;
> +
> +  assert(Ty1 && "We need to have at least one type");
> +
> +  // From this stage we look at the legalized type.
> +  std::pair<unsigned, EVT>  LT =
> +  getTypeLegalizationCost(Ty1->getContext(),
> TLI->getValueType(Ty1));
> +
> +  if (TLI->isOperationLegalOrCustom(ISD, LT.second)) {
> +    // The operation is legal. Assume it costs 1. Multiply
> +    // by the type-legalization overhead.
> +    return LT.first * 1;
> +  }
> +
> +  unsigned NumElem =
> +  (LT.second.isVector() ? LT.second.getVectorNumElements() : 1);
> +
> +  // We will probably scalarize this instruction. Assume that the
> cost is the
> +  // number of the vector elements.
> +  return LT.first * NumElem * 1;
>  }
>  
>  unsigned
> @@ -69,5 +190,9 @@
>  VectorTargetTransformImpl::getMemoryOpCost(unsigned Opcode, Type
>  *Src,
>                                             unsigned Alignment,
>                                             unsigned AddressSpace)
>                                             const {
> -  return 1;
> +  // From this stage we look at the legalized type.
> +  std::pair<unsigned, EVT>  LT =
> +  getTypeLegalizationCost(Src->getContext(),
> TLI->getValueType(Src));
> +  // Assume that all loads of legal types cost 1.
> +  return LT.first;
>  }
> 
> Modified: llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp?rev=166642&r1=166641&r2=166642&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp (original)
> +++ llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp Wed Oct 24
> 18:47:38 2012
> @@ -185,7 +185,7 @@
>    MPM.add(createLoopIdiomPass());             // Recognize idioms
>    like memset.
>    MPM.add(createLoopDeletionPass());          // Delete dead loops
>  
> -  if (Vectorize) {
> +  if (Vectorize || true) {
>      MPM.add(createLoopVectorizePass());
>      MPM.add(createLICMPass());
>    }
> 
> Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=166642&r1=166641&r2=166642&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
> +++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Wed Oct 24
> 18:47:38 2012
> @@ -300,10 +300,10 @@
>  class LoopVectorizationCostModel {
>  public:
>    /// C'tor.
> -  LoopVectorizationCostModel(Loop *Lp, ScalarEvolution *Se,
> DataLayout *Dl,
> +  LoopVectorizationCostModel(Loop *Lp, ScalarEvolution *Se,
>                               LoopVectorizationLegality *Leg,
>                               const VectorTargetTransformInfo *Vtti):
> -  TheLoop(Lp), SE(Se), DL(Dl), Legal(Leg), VTTI(Vtti) { }
> +  TheLoop(Lp), SE(Se), Legal(Leg), VTTI(Vtti) { }
>  
>    /// Returns the most profitable vectorization factor for the loop
>    that is
>    /// smaller or equal to the VF argument. This method checks every
>    power
> @@ -325,8 +325,7 @@
>    Loop *TheLoop;
>    /// Scev analysis.
>    ScalarEvolution *SE;
> -  /// DataLayout analysis.
> -  DataLayout *DL;
> +
>    /// Vectorization legality.
>    LoopVectorizationLegality *Legal;
>    /// Vector target information.
> @@ -372,7 +371,7 @@
>        if (TTI)
>          VTTI = TTI->getVectorTargetTransformInfo();
>        // Use the cost model.
> -      LoopVectorizationCostModel CM(L, SE, DL, &LVL, VTTI);
> +      LoopVectorizationCostModel CM(L, SE, &LVL, VTTI);
>        VF = CM.findBestVectorizationFactor();
>  
>        if (VF == 1) {
> @@ -1432,11 +1431,12 @@
>    // For each instruction in the old loop.
>    for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it !=
>    e; ++it) {
>      Instruction *Inst = it;
> -    Cost += getInstructionCost(Inst, VF);
> +    unsigned C = getInstructionCost(Inst, VF);
> +    Cost += C;
> +    DEBUG(dbgs() << "LV: Found an estimated cost of "<< C <<" for VF
> "<< VF <<
> +          " For instruction: "<< *Inst << "\n");
>    }
>  
> -  // Return the cost divided by VF, because we will be executing
> -  // less iterations of the vector form.
>    return Cost;
>  }
>  
> @@ -1444,11 +1444,13 @@
>  LoopVectorizationCostModel::getInstructionCost(Instruction *I,
>  unsigned VF) {
>    assert(VTTI && "Invalid vector target transformation info");
>    switch (I->getOpcode()) {
> +    case Instruction::GetElementPtr:
> +      return 0;
>      case Instruction::Br: {
>        return VTTI->getInstrCost(I->getOpcode());
>      }
>      case Instruction::PHI:
> -      // PHIs are handled the same as the binary instructions below.
> +      return 0;
>      case Instruction::Add:
>      case Instruction::FAdd:
>      case Instruction::Sub:
> @@ -1493,11 +1495,17 @@
>        // Scalarized stores.
>        if (!Legal->isConsecutiveGep(SI->getPointerOperand())) {
>          unsigned Cost = 0;
> -        unsigned ExtCost =
> VTTI->getInstrCost(Instruction::ExtractElement, VTy);
> -        // The cost of extracting from the vector value.
> -        Cost += VF * ExtCost;
> +        if (VF != 1) {
> +          unsigned ExtCost =
> VTTI->getInstrCost(Instruction::ExtractElement,
> +                                                VTy);
> +          // The cost of extracting from the value vector and
> pointer vector.
> +          Cost += VF * (ExtCost * 2);
> +        }
>          // The cost of the scalar stores.
> -        Cost += VF * VTTI->getInstrCost(I->getOpcode(),
> VTy->getScalarType());
> +        Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(),
> +                                           VTy->getScalarType(),
> +                                           SI->getAlignment(),
> +
>                                           SI->getPointerAddressSpace());
>          return Cost;
>        }
>  
> @@ -1512,11 +1520,18 @@
>        // Scalarized loads.
>        if (!Legal->isConsecutiveGep(LI->getPointerOperand())) {
>          unsigned Cost = 0;
> -        unsigned InCost =
> VTTI->getInstrCost(Instruction::InsertElement, VTy);
> -        // The cost of inserting the loaded value into the result
> vector.
> -        Cost += VF * InCost;
> +        if (VF != 1) {
> +          unsigned InCost =
> VTTI->getInstrCost(Instruction::InsertElement, VTy);
> +          unsigned ExCost =
> VTTI->getInstrCost(Instruction::ExtractValue, VTy);
> +
> +          // The cost of inserting the loaded value into the result
> vector, and
> +          // extracting from a vector of pointers.
> +          Cost += VF * (InCost + ExCost);
> +        }
>          // The cost of the scalar stores.
> -        Cost += VF * VTTI->getInstrCost(I->getOpcode(),
> VTy->getScalarType());
> +        Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(),
> VTy->getScalarType(),
> +                                           LI->getAlignment(),
> +
>                                           LI->getPointerAddressSpace());
>          return Cost;
>        }
>  
> 
> Modified: llvm/trunk/test/Transforms/LoopVectorize/cost-model.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/cost-model.ll?rev=166642&r1=166641&r2=166642&view=diff
> ==============================================================================
> --- llvm/trunk/test/Transforms/LoopVectorize/cost-model.ll (original)
> +++ llvm/trunk/test/Transforms/LoopVectorize/cost-model.ll Wed Oct 24
> 18:47:38 2012
> @@ -8,10 +8,8 @@
>  @d = common global [2048 x i32] zeroinitializer, align 16
>  @a = common global [2048 x i32] zeroinitializer, align 16
>  
> -; At this point the cost model is pretty bad and we are vectorizing
> the code below.
> -; TODO: This code should not be vectorized on x86.
>  ;CHECK: cost_model_1
> -;CHECK: <4 x i32>
> +;CHECK-NOT: <4 x i32>
>  ;CHECK: ret void
>  define void @cost_model_1() nounwind uwtable noinline ssp {
>  entry:
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
> 

-- 
Hal Finkel
Postdoctoral Appointee
Leadership Computing Facility
Argonne National Laboratory



More information about the llvm-commits mailing list