[llvm-commits] [llvm] r166836 - in /llvm/trunk: include/llvm/Target/TargetLowering.h include/llvm/Target/TargetTransformImpl.h include/llvm/TargetTransformInfo.h lib/Target/TargetTransformImpl.cpp lib/Transforms/Vectorize/LoopVectorize.cpp test/T

Sun Oct 28 10:09:00 PDT 2012

On Fri, Oct 26, 2012 at 4:49 PM, Nadav Rotem <nrotem at apple.com> wrote:
> Author: nadav
> Date: Fri Oct 26 18:49:28 2012
> New Revision: 166836
>
> URL: http://llvm.org/viewvc/llvm-project?rev=166836&view=rev
> Log:
>
> Refactor the VectorTargetTransformInfo interface.
>
> Add getCostXXX calls for different families of opcodes, such as casts, arithmetic, cmp, etc.
>
> Port the LoopVectorizer to the new API.
>
> The LoopVectorizer now finds instructions which will remain uniform after vectorization. It uses this information when calculating the cost of these instructions.

PR14199 claims this introduced a circular library dependency. Perhaps
you could check that out.

> Modified:
>     llvm/trunk/include/llvm/Target/TargetLowering.h
>     llvm/trunk/include/llvm/Target/TargetTransformImpl.h
>     llvm/trunk/include/llvm/TargetTransformInfo.h
>     llvm/trunk/lib/Target/TargetTransformImpl.cpp
>     llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
>     llvm/trunk/test/Transforms/LoopVectorize/X86/cost-model.ll
>
> Modified: llvm/trunk/include/llvm/Target/TargetLowering.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetLowering.h?rev=166836&r1=166835&r2=166836&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/Target/TargetLowering.h (original)
> +++ llvm/trunk/include/llvm/Target/TargetLowering.h Fri Oct 26 18:49:28 2012
> @@ -411,6 +411,13 @@
>         getOperationAction(Op, VT) == Custom);
>    }
>
> +  /// isOperationExpand - Return true if the specified operation is illegal on
> +  /// this target or unlikely to be made legal with custom lowering. This is
> +  /// used to help guide high-level lowering decisions.
> +  bool isOperationExpand(unsigned Op, EVT VT) const {
> +    return (!isTypeLegal(VT) || getOperationAction(Op, VT) == Expand);
> +  }
> +
>    /// isOperationLegal - Return true if the specified operation is legal on this
>    /// target.
>    bool isOperationLegal(unsigned Op, EVT VT) const {
>
> Modified: llvm/trunk/include/llvm/Target/TargetTransformImpl.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetTransformImpl.h?rev=166836&r1=166835&r2=166836&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/Target/TargetTransformImpl.h (original)
> +++ llvm/trunk/include/llvm/Target/TargetTransformImpl.h Fri Oct 26 18:49:28 2012
> @@ -56,15 +56,32 @@
>    std::pair<unsigned, EVT>
>    getTypeLegalizationCost(LLVMContext &C, EVT Ty) const;
>
> +  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
> +  /// are set if the result needs to be inserted and/or extracted from vectors.
> +  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
> +
>  public:
>    explicit VectorTargetTransformImpl(const TargetLowering *TL) : TLI(TL) {}
> -
> +
>    virtual ~VectorTargetTransformImpl() {}
>
>    virtual unsigned getInstrCost(unsigned Opcode, Type *Ty1, Type *Ty2) const;
>
> +  virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const;
> +
>    virtual unsigned getBroadcastCost(Type *Tp) const;
>
> +  virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
> +                                    Type *Src) const;
> +
> +  virtual unsigned getCFInstrCost(unsigned Opcode) const;
> +
> +  virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
> +                                      Type *CondTy) const;
> +
> +  virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
> +                                      unsigned Index) const;
> +
>    virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
>                                     unsigned Alignment,
>                                     unsigned AddressSpace) const;
>
> Modified: llvm/trunk/include/llvm/TargetTransformInfo.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/TargetTransformInfo.h?rev=166836&r1=166835&r2=166836&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/TargetTransformInfo.h (original)
> +++ llvm/trunk/include/llvm/TargetTransformInfo.h Fri Oct 26 18:49:28 2012
> @@ -143,13 +143,43 @@
>      return 1;
>    }
>
> +  /// Returns the expected cost of arithmetic ops, such as mul, xor, fsub, etc.
> +  virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const {
> +    return 1;
> +  }
> +
>    /// Returns the cost of a vector broadcast of a scalar at place zero to a
>    /// vector of type 'Tp'.
>    virtual unsigned getBroadcastCost(Type *Tp) const {
>      return 1;
>    }
>
> -  /// Returns the cost of Load and Store instructions.
> +  /// Returns the expected cost of cast instructions, such as bitcast, trunc,
> +  /// zext, etc.
> +  virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
> +                                    Type *Src) const {
> +    return 1;
> +  }
> +
> +  /// Returns the expected cost of control-flow related instrutctions such as
> +  /// Phi, Ret, Br.
> +  virtual unsigned getCFInstrCost(unsigned Opcode) const {
> +    return 1;
> +  }
> +
> +  /// Returns the expected cost of compare and select instructions.
> +  virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
> +                                      Type *CondTy = 0) const {
> +    return 1;
> +  }
> +
> +  /// Returns the expected cost of vector Insert and Extract.
> +  virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
> +                                      unsigned Index = 0) const {
> +    return 1;
> +  }
> +
> +  /// Returns the cost of Load and Store instructions.
>    virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
>                                     unsigned Alignment,
>                                     unsigned AddressSpace) const {
>
> Modified: llvm/trunk/lib/Target/TargetTransformImpl.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/TargetTransformImpl.cpp?rev=166836&r1=166835&r2=166836&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/TargetTransformImpl.cpp (original)
> +++ llvm/trunk/lib/Target/TargetTransformImpl.cpp Fri Oct 26 18:49:28 2012
> @@ -126,7 +126,7 @@
>
>  std::pair<unsigned, EVT>
>  VectorTargetTransformImpl::getTypeLegalizationCost(LLVMContext &C,
> -                                                         EVT Ty) const {
> +                                                   EVT Ty) const {
>    unsigned Cost = 1;
>    // We keep legalizing the type until we find a legal kind. We assume that
>    // the only operation that costs anything is the split. After splitting
> @@ -135,7 +135,7 @@
>      TargetLowering::LegalizeKind LK = TLI->getTypeConversion(C, Ty);
>
>      if (LK.first == TargetLowering::TypeLegal)
> -      return std::make_pair(Cost, LK.second);
> +      return std::make_pair(Cost, Ty);
>
>      if (LK.first == TargetLowering::TypeSplitVector)
>        Cost *= 2;
> @@ -146,44 +146,144 @@
>  }
>
>  unsigned
> -VectorTargetTransformImpl::getInstrCost(unsigned Opcode, Type *Ty1,
> -                                        Type *Ty2) const {
> +VectorTargetTransformImpl::getScalarizationOverhead(Type *Ty,
> +                                                    bool Insert,
> +                                                    bool Extract) const {
> +  assert (Ty->isVectorTy() && "Can only scalarize vectors");
> +   unsigned Cost = 0;
> +
> +  for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
> +    if (Insert)
> +      Cost += getVectorInstrCost(Instruction::InsertElement, Ty, i);
> +    if (Extract)
> +      Cost += getVectorInstrCost(Instruction::ExtractElement, Ty, i);
> +  }
> +
> +  return Cost;
> +}
> +
> +unsigned VectorTargetTransformImpl::getArithmeticInstrCost(unsigned Opcode,
> +                                                           Type *Ty) const {
>    // Check if any of the operands are vector operands.
>    int ISD = InstructionOpcodeToISD(Opcode);
> +  assert(ISD && "Invalid opcode");
> +
> +  std::pair<unsigned, EVT> LT =
> +  getTypeLegalizationCost(Ty->getContext(), TLI->getValueType(Ty));
> +
> +  if (!TLI->isOperationExpand(ISD, LT.second)) {
> +    // The operation is legal. Assume it costs 1. Multiply
> +    // by the type-legalization overhead.
> +    return LT.first * 1;
> +  }
>
> -  // If we don't have any information about this instruction assume it costs 1.
> -  if (ISD == 0)
> -    return 1;
> +  // Else, assume that we need to scalarize this op.
> +  if (Ty->isVectorTy()) {
> +    unsigned Num = Ty->getVectorNumElements();
> +    unsigned Cost = getArithmeticInstrCost(Opcode, Ty->getScalarType());
> +    // return the cost of multiple scalar invocation plus the cost of inserting
> +    // and extracting the values.
> +    return getScalarizationOverhead(Ty, true, true) + Num * Cost;
> +  }
> +
> +  // We don't know anything about this scalar instruction.
> +  return 1;
> +}
> +
> +unsigned VectorTargetTransformImpl::getBroadcastCost(Type *Tp) const {
> +  return 1;
> +}
> +
> +unsigned VectorTargetTransformImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
> +                                  Type *Src) const {
> +  assert(Src->isVectorTy() == Dst->isVectorTy() && "Invalid input types");
> +  int ISD = InstructionOpcodeToISD(Opcode);
> +  assert(ISD && "Invalid opcode");
> +
> +  std::pair<unsigned, EVT> SrcLT =
> +  getTypeLegalizationCost(Src->getContext(), TLI->getValueType(Src));
>
> +  std::pair<unsigned, EVT> DstLT =
> +  getTypeLegalizationCost(Dst->getContext(), TLI->getValueType(Dst));
> +
> +  // If the cast is between same-sized registers, then the check is simple.
> +  if (SrcLT.first == DstLT.first &&
> +      SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
> +    // Just check the op cost:
> +    if (!TLI->isOperationExpand(ISD, DstLT.second)) {
> +      // The operation is legal. Assume it costs 1. Multiply
> +      // by the type-legalization overhead.
> +      return SrcLT.first * 1;
> +    }
> +  }
> +
> +  // Otherwise, assume that the cast is scalarized.
> +  if (Dst->isVectorTy()) {
> +    unsigned Num = Dst->getVectorNumElements();
> +    unsigned Cost = getCastInstrCost(Opcode, Src->getScalarType(),
> +                                     Dst->getScalarType());
> +    // return the cost of multiple scalar invocation plus the cost of inserting
> +    // and extracting the values.
> +    return getScalarizationOverhead(Dst, true, true) + Num * Cost;
> +  }
> +
> +  // Unknown scalar opcode.
> +  return 1;
> +}
> +
> +unsigned VectorTargetTransformImpl::getCFInstrCost(unsigned Opcode) const {
> +  return 1;
> +}
> +
> +unsigned VectorTargetTransformImpl::getCmpSelInstrCost(unsigned Opcode,
> +                                                       Type *ValTy,
> +                                                       Type *CondTy) const {
> +  int ISD = InstructionOpcodeToISD(Opcode);
> +  assert(ISD && "Invalid opcode");
> +
>    // Selects on vectors are actually vector selects.
>    if (ISD == ISD::SELECT) {
> -    assert(Ty2 && "Ty2 must hold the condition type");
> -    if (Ty2->isVectorTy())
> -    ISD = ISD::VSELECT;
> +    assert(CondTy && "CondTy must exist");
> +    if (CondTy->isVectorTy())
> +      ISD = ISD::VSELECT;
>    }
>
> -  assert(Ty1 && "We need to have at least one type");
> -
> -  // From this stage we look at the legalized type.
> -  std::pair<unsigned, EVT>  LT =
> -  getTypeLegalizationCost(Ty1->getContext(), TLI->getValueType(Ty1));
> +  std::pair<unsigned, EVT> LT =
> +  getTypeLegalizationCost(ValTy->getContext(), TLI->getValueType(ValTy));
>
> -  if (TLI->isOperationLegalOrCustom(ISD, LT.second)) {
> +  if (!TLI->isOperationExpand(ISD, LT.second)) {
>      // The operation is legal. Assume it costs 1. Multiply
>      // by the type-legalization overhead.
>      return LT.first * 1;
>    }
>
> -  unsigned NumElem =
> -    (LT.second.isVector() ? LT.second.getVectorNumElements() : 1);
> +  // Otherwise, assume that the cast is scalarized.
> +  if (ValTy->isVectorTy()) {
> +    unsigned Num = ValTy->getVectorNumElements();
> +    if (CondTy)
> +      CondTy = CondTy->getScalarType();
> +    unsigned Cost = getCmpSelInstrCost(Opcode, ValTy->getScalarType(),
> +                                       CondTy);
> +
> +    // return the cost of multiple scalar invocation plus the cost of inserting
> +    // and extracting the values.
> +    return getScalarizationOverhead(ValTy, true, false) + Num * Cost;
> +  }
>
> -  // We will probably scalarize this instruction. Assume that the cost is the
> -  // number of the vector elements.
> -  return LT.first * NumElem * 1;
> +  // Unknown scalar opcode.
> +  return 1;
> +}
> +
> +/// Returns the expected cost of Vector Insert and Extract.
> +unsigned VectorTargetTransformImpl::getVectorInstrCost(unsigned Opcode,
> +                                                       Type *Val,
> +                                                       unsigned Index) const {
> +  return 1;
>  }
>
>  unsigned
> -VectorTargetTransformImpl::getBroadcastCost(Type *Tp) const {
> +VectorTargetTransformImpl::getInstrCost(unsigned Opcode, Type *Ty1,
> +                                        Type *Ty2) const {
>    return 1;
>  }
>
> @@ -191,17 +291,15 @@
>  VectorTargetTransformImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
>                                             unsigned Alignment,
>                                             unsigned AddressSpace) const {
> -  // From this stage we look at the legalized type.
> -  std::pair<unsigned, EVT>  LT =
> +  std::pair<unsigned, EVT> LT =
>    getTypeLegalizationCost(Src->getContext(), TLI->getValueType(Src));
> +
>    // Assume that all loads of legal types cost 1.
>    return LT.first;
>  }
>
>  unsigned
>  VectorTargetTransformImpl::getNumberOfParts(Type *Tp) const {
> -  std::pair<unsigned, EVT>  LT =
> -  getTypeLegalizationCost(Tp->getContext(), TLI->getValueType(Tp));
> -  return LT.first;
> +  return TLI->getNumRegisters(Tp->getContext(), TLI->getValueType(Tp));
>  }
>
>
> Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=166836&r1=166835&r2=166836&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
> +++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Fri Oct 26 18:49:28 2012
> @@ -108,7 +108,7 @@
>      createEmptyLoop(Legal);
>      /// Widen each instruction in the old loop to a new one in the new loop.
>      /// Use the Legality module to find the induction and reduction variables.
> -   vectorizeLoop(Legal);
> +    vectorizeLoop(Legal);
>      // register the new loop.
>      cleanup();
>   }
> @@ -254,6 +254,9 @@
>    /// This check allows us to vectorize A[idx] into a wide load/store.
>    bool isConsecutiveGep(Value *Ptr);
>
> +  /// Returns true if this instruction will remain scalar after vectorization.
> +  bool isUniformAfterVectorization(Instruction* I) {return Uniforms.count(I);}
> +
>  private:
>    /// Check if a single basic block loop is vectorizable.
>    /// At this point we know that this is a loop with a constant trip count
> @@ -291,6 +294,9 @@
>    /// Allowed outside users. This holds the reduction
>    /// vars which can be accessed from outside the loop.
>    SmallPtrSet<Value*, 4> AllowedExit;
> +  /// This set holds the variables which are known to be uniform after
> +  /// vectorization.
> +  SmallPtrSet<Instruction*, 4> Uniforms;
>  };
>
>  /// LoopVectorizationCostModel - estimates the expected speedups due to
> @@ -1177,9 +1183,40 @@
>        return false;
>    }
>
> -  // If the memory dependencies do not prevent us from
> -  // vectorizing, then vectorize.
> -  return canVectorizeMemory(BB);
> +  // Don't vectorize if the memory dependencies do not allow vectorization.
> +  if (!canVectorizeMemory(BB))
> +    return false;
> +
> +  // We now know that the loop is vectorizable!
> +  // Collect variables that will remain uniform after vectorization.
> +  std::vector<Value*> Worklist;
> +
> +  // Start with the conditional branch and walk up the block.
> +  Worklist.push_back(BB.getTerminator()->getOperand(0));
> +
> +  while (Worklist.size()) {
> +    Instruction *I = dyn_cast<Instruction>(Worklist.back());
> +    Worklist.pop_back();
> +    // Look at instructions inside this block.
> +    if (!I) continue;
> +    if (I->getParent() != &BB) continue;
> +
> +    // Stop when reaching PHI nodes.
> +    if (isa<PHINode>(I)) {
> +      assert(I == Induction && "Found a uniform PHI that is not the induction");
> +      break;
> +    }
> +
> +    // This is a known uniform.
> +    Uniforms.insert(I);
> +
> +    // Insert all operands.
> +    for (int i=0, Op = I->getNumOperands(); i < Op; ++i) {
> +      Worklist.push_back(I->getOperand(i));
> +    }
> +  }
> +
> +  return true;
>  }
>
>  bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) {
> @@ -1484,9 +1521,15 @@
>  LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
>    assert(VTTI && "Invalid vector target transformation info");
>
> +  // If we know that this instruction will remain uniform, check the cost of
> +  // the scalar version.
> +  if (Legal->isUniformAfterVectorization(I))
> +    VF = 1;
> +
>    Type *RetTy = I->getType();
>    Type *VectorTy = ToVectorTy(RetTy, VF);
>
> +
>    // TODO: We need to estimate the cost of intrinsic calls.
>    switch (I->getOpcode()) {
>      case Instruction::GetElementPtr:
> @@ -1495,7 +1538,7 @@
>        // generate vector geps.
>        return 0;
>      case Instruction::Br: {
> -      return VTTI->getInstrCost(I->getOpcode());
> +      return VTTI->getCFInstrCost(I->getOpcode());
>      }
>      case Instruction::PHI:
>        return 0;
> @@ -1517,7 +1560,7 @@
>      case Instruction::And:
>      case Instruction::Or:
>      case Instruction::Xor: {
> -      return VTTI->getInstrCost(I->getOpcode(), VectorTy);
> +      return VTTI->getArithmeticInstrCost(I->getOpcode(), VectorTy);
>      }
>      case Instruction::Select: {
>        SelectInst *SI = cast<SelectInst>(I);
> @@ -1527,13 +1570,13 @@
>        if (ScalarCond)
>          CondTy = VectorType::get(CondTy, VF);
>
> -      return VTTI->getInstrCost(I->getOpcode(), VectorTy, CondTy);
> +      return VTTI->getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy);
>      }
>      case Instruction::ICmp:
>      case Instruction::FCmp: {
>        Type *ValTy = I->getOperand(0)->getType();
>        VectorTy = ToVectorTy(ValTy, VF);
> -      return VTTI->getInstrCost(I->getOpcode(), VectorTy);
> +      return VTTI->getCmpSelInstrCost(I->getOpcode(), VectorTy);
>      }
>      case Instruction::Store: {
>        StoreInst *SI = cast<StoreInst>(I);
> @@ -1602,7 +1645,7 @@
>      case Instruction::FPTrunc:
>      case Instruction::BitCast: {
>        Type *SrcVecTy = ToVectorTy(I->getOperand(0)->getType(), VF);
> -      return VTTI->getInstrCost(I->getOpcode(), VectorTy, SrcVecTy);
> +      return VTTI->getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy);
>      }
>      default: {
>        // We are scalarizing the instruction. Return the cost of the scalar
>
> Modified: llvm/trunk/test/Transforms/LoopVectorize/X86/cost-model.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/X86/cost-model.ll?rev=166836&r1=166835&r2=166836&view=diff
> ==============================================================================
> --- llvm/trunk/test/Transforms/LoopVectorize/X86/cost-model.ll (original)
> +++ llvm/trunk/test/Transforms/LoopVectorize/X86/cost-model.ll Fri Oct 26 18:49:28 2012
> @@ -9,7 +9,7 @@
>  @a = common global [2048 x i32] zeroinitializer, align 16
>
>  ;CHECK: cost_model_1
> -;CHECK: <4 x i32>
> +;CHECK-NOT: <4 x i32>
>  ;CHECK: ret void
>  define void @cost_model_1() nounwind uwtable noinline ssp {
>  entry:
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits