[llvm-commits] [llvm] r170830 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86ISelLowering.h test/Analysis/CostModel/X86/load_store.ll test/Analysis/CostModel/X86/vectorized-loop.ll
Nadav Rotem
nrotem at apple.com
Thu Dec 20 17:33:59 PST 2012
Author: nadav
Date: Thu Dec 20 19:33:59 2012
New Revision: 170830
URL: http://llvm.org/viewvc/llvm-project?rev=170830&view=rev
Log:
Improve the X86 cost model for loads and stores.
Added:
llvm/trunk/test/Analysis/CostModel/X86/load_store.ll
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.h
llvm/trunk/test/Analysis/CostModel/X86/vectorized-loop.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=170830&r1=170829&r2=170830&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Dec 20 19:33:59 2012
@@ -17818,6 +17818,30 @@
return VectorTargetTransformImpl::getArithmeticInstrCost(Opcode, Ty);
}
+
+unsigned
+X86VectorTargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src,
+ unsigned Alignment,
+ unsigned AddressSpace) const {
+ // Legalize the type.
+ std::pair<unsigned, MVT> LT = getTypeLegalizationCost(Src);
+ assert(Opcode == Instruction::Load || Opcode == Instruction::Store &&
+ "Invalid Opcode");
+
+ const X86Subtarget &ST =
+ TLI->getTargetMachine().getSubtarget<X86Subtarget>();
+
+ // Each load/store unit costs 1.
+ unsigned Cost = LT.first * 1;
+
+ // On Sandybridge 256bit load/stores are double pumped
+ // (but not on Haswell).
+ if (LT.second.getSizeInBits() > 128 && !ST.hasAVX2())
+ Cost*=2;
+
+ return Cost;
+}
+
unsigned
X86VectorTargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val,
unsigned Index) const {
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=170830&r1=170829&r2=170830&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Thu Dec 20 19:33:59 2012
@@ -953,6 +953,10 @@
virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const;
+ virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
+ unsigned Alignment,
+ unsigned AddressSpace) const;
+
virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
unsigned Index) const;
Added: llvm/trunk/test/Analysis/CostModel/X86/load_store.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/load_store.ll?rev=170830&view=auto
==============================================================================
--- llvm/trunk/test/Analysis/CostModel/X86/load_store.ll (added)
+++ llvm/trunk/test/Analysis/CostModel/X86/load_store.ll Thu Dec 20 19:33:59 2012
@@ -0,0 +1,64 @@
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+define i32 @stores(i32 %arg) {
+
+ ;CHECK: cost of 1 {{.*}} store
+ store i8 undef, i8* undef, align 4
+ ;CHECK: cost of 1 {{.*}} store
+ store i16 undef, i16* undef, align 4
+ ;CHECK: cost of 1 {{.*}} store
+ store i32 undef, i32* undef, align 4
+ ;CHECK: cost of 1 {{.*}} store
+ store i64 undef, i64* undef, align 4
+ ;CHECK: cost of 2 {{.*}} store
+ store i128 undef, i128* undef, align 4
+
+ ;CHECK: cost of 1 {{.*}} store
+ store <4 x i16> undef, <4 x i16>* undef, align 4
+ ;CHECK: cost of 1 {{.*}} store
+ store <4 x i32> undef, <4 x i32>* undef, align 4
+ ;CHECK: cost of 2 {{.*}} store
+ store <4 x i64> undef, <4 x i64>* undef, align 4
+
+ ;CHECK: cost of 1 {{.*}} store
+ store <8 x i16> undef, <8 x i16>* undef, align 4
+ ;CHECK: cost of 2 {{.*}} store
+ store <8 x i32> undef, <8 x i32>* undef, align 4
+ ;CHECK: cost of 4 {{.*}} store
+ store <8 x i64> undef, <8 x i64>* undef, align 4
+
+ ret i32 undef
+}
+define i32 @loads(i32 %arg) {
+ ;CHECK: cost of 1 {{.*}} load
+ load i8* undef, align 4
+ ;CHECK: cost of 1 {{.*}} load
+ load i16* undef, align 4
+ ;CHECK: cost of 1 {{.*}} load
+ load i32* undef, align 4
+ ;CHECK: cost of 1 {{.*}} load
+ load i64* undef, align 4
+ ;CHECK: cost of 2 {{.*}} load
+ load i128* undef, align 4
+
+ ;CHECK: cost of 1 {{.*}} load
+ load <2 x i32>* undef, align 4
+ ;CHECK: cost of 1 {{.*}} load
+ load <4 x i32>* undef, align 4
+ ;CHECK: cost of 2 {{.*}} load
+ load <8 x i32>* undef, align 4
+
+
+ ;CHECK: cost of 1 {{.*}} load
+ load <2 x i64>* undef, align 4
+ ;CHECK: cost of 2 {{.*}} load
+ load <4 x i64>* undef, align 4
+ ;CHECK: cost of 4 {{.*}} load
+ load <8 x i64>* undef, align 4
+
+ ret i32 undef
+}
+
Modified: llvm/trunk/test/Analysis/CostModel/X86/vectorized-loop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/vectorized-loop.ll?rev=170830&r1=170829&r2=170830&view=diff
==============================================================================
--- llvm/trunk/test/Analysis/CostModel/X86/vectorized-loop.ll (original)
+++ llvm/trunk/test/Analysis/CostModel/X86/vectorized-loop.ll Thu Dec 20 19:33:59 2012
@@ -28,16 +28,17 @@
%4 = getelementptr inbounds i32* %B, i64 %3
;CHECK: cost of 0 {{.*}} bitcast
%5 = bitcast i32* %4 to <8 x i32>*
- ;CHECK: cost of 1 {{.*}} load
+ ;CHECK: cost of 2 {{.*}} load
%6 = load <8 x i32>* %5, align 4
;CHECK: cost of 4 {{.*}} mul
%7 = mul nsw <8 x i32> %6, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
%8 = getelementptr inbounds i32* %A, i64 %index
%9 = bitcast i32* %8 to <8 x i32>*
+ ;CHECK: cost of 2 {{.*}} load
%10 = load <8 x i32>* %9, align 4
;CHECK: cost of 4 {{.*}} add
%11 = add nsw <8 x i32> %10, %7
- ;CHECK: cost of 1 {{.*}} store
+ ;CHECK: cost of 2 {{.*}} store
store <8 x i32> %11, <8 x i32>* %9, align 4
%index.next = add i64 %index, 8
%12 = icmp eq i64 %index.next, %end.idx.rnd.down
More information about the llvm-commits
mailing list