[llvm-commits] [llvm] r167347 - in /llvm/trunk: include/llvm/Target/TargetTransformImpl.h lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86ISelLowering.h test/Analysis/CostModel/X86/arith.ll test/Analysis/CostModel/X86/vectorized-loop.ll

Sat Nov 3 11:52:49 PDT 2012

On Nov 2, 2012, at 5:39 PM, Nadav Rotem <nrotem at apple.com> wrote:

> Author: nadav
> Date: Fri Nov  2 19:39:56 2012
> New Revision: 167347
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=167347&view=rev
> Log:
> X86 CostModel: Add support for a some of the common arithmetic instructions for SSE4, AVX and AVX2.

Hi Nadav,

Is there some way to have tblgen generate this information from the .td files?

-Chris

> 
> 
> Added:
>    llvm/trunk/test/Analysis/CostModel/X86/arith.ll
> Modified:
>    llvm/trunk/include/llvm/Target/TargetTransformImpl.h
>    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>    llvm/trunk/lib/Target/X86/X86ISelLowering.h
>    llvm/trunk/test/Analysis/CostModel/X86/vectorized-loop.ll
> 
> Modified: llvm/trunk/include/llvm/Target/TargetTransformImpl.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetTransformImpl.h?rev=167347&r1=167346&r2=167347&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/Target/TargetTransformImpl.h (original)
> +++ llvm/trunk/include/llvm/Target/TargetTransformImpl.h Fri Nov  2 19:39:56 2012
> @@ -51,7 +51,7 @@
> };
> 
> class VectorTargetTransformImpl : public VectorTargetTransformInfo {
> -private:
> +protected:
>   const TargetLowering *TLI;
> 
>   /// Estimate the cost of type-legalization and the legalized type.
> 
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=167347&r1=167346&r2=167347&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Nov  2 19:39:56 2012
> @@ -17504,3 +17504,73 @@
> 
>   return Res;
> }
> +
> +unsigned
> +X86VectorTargetTransformInfo::getArithmeticInstrCost(unsigned Opcode,
> +                                                     Type *Ty) const {
> +  const X86Subtarget &ST =
> +  TLI->getTargetMachine().getSubtarget<X86Subtarget>();
> +
> +  // Fix some of the inaccuracies of the target independent estimation.
> +  if (Ty->isVectorTy() && ST.hasSSE41()) {
> +    unsigned NumElem = Ty->getVectorNumElements();
> +    unsigned SizeInBits = Ty->getScalarType()->getScalarSizeInBits();
> +
> +    bool Is2 = (NumElem == 2);
> +    bool Is4 = (NumElem == 4);
> +    bool Is8 = (NumElem == 8);
> +    bool Is32bits = (SizeInBits == 32);
> +    bool Is64bits = (SizeInBits == 64);
> +    bool HasAvx = ST.hasAVX();
> +    bool HasAvx2 = ST.hasAVX2();
> +
> +    switch (Opcode) {
> +      case Instruction::Add:
> +      case Instruction::Sub:
> +      case Instruction::Mul: {
> +        // Only AVX2 has support for 8-wide integer operations.
> +        if (Is32bits && (Is4 || (Is8 && HasAvx2))) return 1;
> +        if (Is64bits && (Is2 || (Is4 && HasAvx2))) return 1;
> +
> +        // We don't have to completly scalarize unsupported ops. We can
> +        // issue two half-sized operations (with some overhead).
> +        // We don't need to extract the lower part of the YMM to the XMM.
> +        // Extract the upper, two ops, insert the upper = 4.
> +        if (Is32bits && Is8 && HasAvx) return 4;
> +        if (Is64bits && Is4 && HasAvx) return 4;
> +        break;
> +      }
> +      case Instruction::FAdd:
> +      case Instruction::FSub:
> +      case Instruction::FMul: {
> +        // AVX has support for 8-wide float operations.
> +        if (Is32bits && (Is4 || (Is8 && HasAvx))) return 1;
> +        if (Is64bits && (Is2 || (Is4 && HasAvx))) return 1;
> +        break;
> +      }
> +      case Instruction::Shl:
> +      case Instruction::LShr:
> +      case Instruction::AShr:
> +      case Instruction::And:
> +      case Instruction::Or:
> +      case Instruction::Xor: {
> +        // AVX has support for 8-wide integer bitwise operations.
> +        if (Is32bits && (Is4 || (Is8 && HasAvx))) return 1;
> +        if (Is64bits && (Is2 || (Is4 && HasAvx))) return 1;
> +        break;
> +      }
> +    }
> +  }
> +
> +  return VectorTargetTransformImpl::getArithmeticInstrCost(Opcode, Ty);
> +}
> +
> +unsigned
> +X86VectorTargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val,
> +                                    unsigned Index) const {
> +  // Floating point scalars are already located in index #0.
> +  if (Val->getScalarType()->isFloatingPointTy() && Index == 0)
> +    return 0;
> +  return VectorTargetTransformImpl::getVectorInstrCost(Opcode, Val, Index);
> +}
> +
> 
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=167347&r1=167346&r2=167347&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Fri Nov  2 19:39:56 2012
> @@ -953,13 +953,10 @@
>     explicit X86VectorTargetTransformInfo(const TargetLowering *TL) :
>     VectorTargetTransformImpl(TL) {}
> 
> +    virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const;
> +
>     virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
> -                                        unsigned Index) const {
> -      // Floating point scalars are already located in index #0.
> -      if (Val->getScalarType()->isFloatingPointTy() && Index == 0)
> -        return 0;
> -      return VectorTargetTransformImpl::getVectorInstrCost(Opcode, Val, Index);
> -    }
> +                                        unsigned Index) const;
>   };
> 
> }
> 
> Added: llvm/trunk/test/Analysis/CostModel/X86/arith.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/arith.ll?rev=167347&view=auto
> ==============================================================================
> --- llvm/trunk/test/Analysis/CostModel/X86/arith.ll (added)
> +++ llvm/trunk/test/Analysis/CostModel/X86/arith.ll Fri Nov  2 19:39:56 2012
> @@ -0,0 +1,40 @@
> +; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
> +
> +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
> +target triple = "x86_64-apple-macosx10.8.0"
> +
> +define i32 @add(i32 %arg) {
> +  ;CHECK: cost of 1 {{.*}} add
> +  %A = add <4 x i32> undef, undef
> +  ;CHECK: cost of 4 {{.*}} add
> +  %B = add <8 x i32> undef, undef
> +  ;CHECK: cost of 1 {{.*}} add
> +  %C = add <2 x i64> undef, undef
> +  ;CHECK: cost of 4 {{.*}} add
> +  %D = add <4 x i64> undef, undef
> +  ;CHECK: cost of 1 {{.*}} ret
> +  ret i32 undef
> +}
> +
> +
> +define i32 @xor(i32 %arg) {
> +  ;CHECK: cost of 1 {{.*}} xor
> +  %A = xor <4 x i32> undef, undef
> +  ;CHECK: cost of 1 {{.*}} xor
> +  %B = xor <8 x i32> undef, undef
> +  ;CHECK: cost of 1 {{.*}} xor
> +  %C = xor <2 x i64> undef, undef
> +  ;CHECK: cost of 1 {{.*}} xor
> +  %D = xor <4 x i64> undef, undef
> +  ;CHECK: cost of 1 {{.*}} ret
> +  ret i32 undef
> +}
> +
> +
> +define i32 @fmul(i32 %arg) {
> +  ;CHECK: cost of 1 {{.*}} fmul
> +  %A = fmul <4 x float> undef, undef
> +  ;CHECK: cost of 1 {{.*}} fmul
> +  %B = fmul <8 x float> undef, undef
> +  ret i32 undef
> +}
> 
> Modified: llvm/trunk/test/Analysis/CostModel/X86/vectorized-loop.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/vectorized-loop.ll?rev=167347&r1=167346&r2=167347&view=diff
> ==============================================================================
> --- llvm/trunk/test/Analysis/CostModel/X86/vectorized-loop.ll (original)
> +++ llvm/trunk/test/Analysis/CostModel/X86/vectorized-loop.ll Fri Nov  2 19:39:56 2012
> @@ -30,10 +30,12 @@
>   %5 = bitcast i32* %4 to <8 x i32>*
>   ;CHECK: cost of 1 {{.*}} load
>   %6 = load <8 x i32>* %5, align 4
> +  ;CHECK: cost of 4 {{.*}} mul
>   %7 = mul nsw <8 x i32> %6, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
>   %8 = getelementptr inbounds i32* %A, i64 %index
>   %9 = bitcast i32* %8 to <8 x i32>*
>   %10 = load <8 x i32>* %9, align 4
> +  ;CHECK: cost of 4 {{.*}} add
>   %11 = add nsw <8 x i32> %10, %7
>   ;CHECK: cost of 1 {{.*}} store
>   store <8 x i32> %11, <8 x i32>* %9, align 4
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits