[llvm-commits] [llvm] r166865 - /llvm/trunk/lib/Transforms/Vectorize/BBVectorize.cpp

Fri Oct 26 21:42:55 PDT 2012

Hi Hal, 

I am running some benchmarks and I see strange results.  The x86 backend is weird. Expensive integer arithmetic ops <8 x i32> which are unsupported on AVX, are marked as 'custom' because they have custom lowering.  We don't have a good way of knowing that  'ADD <8 x i32>' is actually slow on AVX.  On the other hand, easy operations such as FCMP <4 x float>, which should be straight forward, are marked as 'Expand', and they are lowered in a DAGCombine op. 

Nadav

On Oct 26, 2012, at 9:33 PM, Hal Finkel <hfinkel at anl.gov> wrote:

> Author: hfinkel
> Date: Fri Oct 26 23:33:48 2012
> New Revision: 166865
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=166865&view=rev
> Log:
> Update BBVectorize to use the new VTTI instr. cost interfaces.
> 
> The monolithic interface for instruction costs has been split into
> several functions. This is the corresponding change. No functionality
> change is intended.
> 
> Modified:
>    llvm/trunk/lib/Transforms/Vectorize/BBVectorize.cpp
> 
> Modified: llvm/trunk/lib/Transforms/Vectorize/BBVectorize.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/BBVectorize.cpp?rev=166865&r1=166864&r2=166865&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Transforms/Vectorize/BBVectorize.cpp (original)
> +++ llvm/trunk/lib/Transforms/Vectorize/BBVectorize.cpp Fri Oct 26 23:33:48 2012
> @@ -484,6 +484,61 @@
>       return 1;
>     }
> 
> +    // Returns the cost of the provided instruction using VTTI.
> +    // This does not handle loads and stores.
> +    unsigned getInstrCost(unsigned Opcode, Type *T1, Type *T2) {
> +      switch (Opcode) {
> +      default: break;
> +      case Instruction::GetElementPtr:
> +        // We mark this instruction as zero-cost because scalar GEPs are usually
> +        // lowered to the intruction addressing mode. At the moment we don't
> +        // generate vector GEPs.
> +        return 0;
> +      case Instruction::Br:
> +        return VTTI->getCFInstrCost(Opcode);
> +      case Instruction::PHI:
> +        return 0;
> +      case Instruction::Add:
> +      case Instruction::FAdd:
> +      case Instruction::Sub:
> +      case Instruction::FSub:
> +      case Instruction::Mul:
> +      case Instruction::FMul:
> +      case Instruction::UDiv:
> +      case Instruction::SDiv:
> +      case Instruction::FDiv:
> +      case Instruction::URem:
> +      case Instruction::SRem:
> +      case Instruction::FRem:
> +      case Instruction::Shl:
> +      case Instruction::LShr:
> +      case Instruction::AShr:
> +      case Instruction::And:
> +      case Instruction::Or:
> +      case Instruction::Xor:
> +        return VTTI->getArithmeticInstrCost(Opcode, T1);
> +      case Instruction::Select:
> +      case Instruction::ICmp:
> +      case Instruction::FCmp:
> +        return VTTI->getCmpSelInstrCost(Opcode, T1, T2);
> +      case Instruction::ZExt:
> +      case Instruction::SExt:
> +      case Instruction::FPToUI:
> +      case Instruction::FPToSI:
> +      case Instruction::FPExt:
> +      case Instruction::PtrToInt:
> +      case Instruction::IntToPtr:
> +      case Instruction::SIToFP:
> +      case Instruction::UIToFP:
> +      case Instruction::Trunc:
> +      case Instruction::FPTrunc:
> +      case Instruction::BitCast:
> +        return VTTI->getCastInstrCost(Opcode, T1, T2);
> +      }
> +
> +      return 1;
> +    }
> +
>     // This determines the relative offset of two loads or stores, returning
>     // true if the offset could be determined to be some constant value.
>     // For example, if OffsetInElmts == 1, then J accesses the memory directly
> @@ -834,11 +889,11 @@
>         return false;
>       }
>     } else if (VTTI) {
> -      unsigned ICost = VTTI->getInstrCost(I->getOpcode(), IT1, IT2);
> -      unsigned JCost = VTTI->getInstrCost(J->getOpcode(), JT1, JT2);
> +      unsigned ICost = getInstrCost(I->getOpcode(), IT1, IT2);
> +      unsigned JCost = getInstrCost(J->getOpcode(), JT1, JT2);
>       Type *VT1 = getVecTypeForPair(IT1, JT1),
>            *VT2 = getVecTypeForPair(IT2, JT2);
> -      unsigned VCost = VTTI->getInstrCost(I->getOpcode(), VT1, VT2);
> +      unsigned VCost = getInstrCost(I->getOpcode(), VT1, VT2);
> 
>       if (VCost > ICost + JCost)
>         return false;
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits