[llvm] r217674 - [ARM] Teach the cost model that cross-class copies are costly.

James Molloy james.molloy at arm.com
Fri Sep 12 06:29:40 PDT 2014


Author: jamesm
Date: Fri Sep 12 08:29:40 2014
New Revision: 217674

URL: http://llvm.org/viewvc/llvm-project?rev=217674&view=rev
Log:
[ARM] Teach the cost model that cross-class copies are costly.

Cross-class copies being expensive is actually a trait of the microarchitecture, but as I haven't yet seen an example of a microarchitecture where they're cheap it seems best to just enable this by default, covering the non-mcpu build case.


Modified:
    llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp
    llvm/trunk/test/Analysis/CostModel/ARM/cast.ll

Modified: llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp?rev=217674&r1=217673&r2=217674&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp Fri Sep 12 08:29:40 2014
@@ -390,6 +390,13 @@ unsigned ARMTTI::getVectorInstrCost(unsi
       ValTy->getScalarSizeInBits() <= 32)
     return 3;
 
+  // Cross-class copies are expensive on many microarchitectures,
+  // so assume they are expensive by default.
+  if ((Opcode == Instruction::InsertElement ||
+       Opcode == Instruction::ExtractElement) &&
+      ValTy->getVectorElementType()->isIntegerTy())
+    return 3;
+
   return TargetTransformInfo::getVectorInstrCost(Opcode, ValTy, Index);
 }
 

Modified: llvm/trunk/test/Analysis/CostModel/ARM/cast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/ARM/cast.ll?rev=217674&r1=217673&r2=217674&view=diff
==============================================================================
--- llvm/trunk/test/Analysis/CostModel/ARM/cast.ll (original)
+++ llvm/trunk/test/Analysis/CostModel/ARM/cast.ll Fri Sep 12 08:29:40 2014
@@ -221,35 +221,35 @@ define i32 @casts() {
   %r96 = fptoui <2 x float> undef to <2 x i32>
   ; CHECK: cost of 1 {{.*}} fptosi
   %r97 = fptosi <2 x float> undef to <2 x i32>
-  ; CHECK: cost of 28 {{.*}} fptoui
+  ; CHECK: cost of 32 {{.*}} fptoui
   %r98 = fptoui <2 x float> undef to <2 x i64>
-  ; CHECK: cost of 28 {{.*}} fptosi
+  ; CHECK: cost of 32 {{.*}} fptosi
   %r99 = fptosi <2 x float> undef to <2 x i64>
 
-  ; CHECK: cost of 8 {{.*}} fptoui
+  ; CHECK: cost of 16 {{.*}} fptoui
   %r100 = fptoui <2 x double> undef to <2 x i1>
-  ; CHECK: cost of 8 {{.*}} fptosi
+  ; CHECK: cost of 16 {{.*}} fptosi
   %r101 = fptosi <2 x double> undef to <2 x i1>
-  ; CHECK: cost of 8 {{.*}} fptoui
+  ; CHECK: cost of 16 {{.*}} fptoui
   %r102 = fptoui <2 x double> undef to <2 x i8>
-  ; CHECK: cost of 8 {{.*}} fptosi
+  ; CHECK: cost of 16 {{.*}} fptosi
   %r103 = fptosi <2 x double> undef to <2 x i8>
-  ; CHECK: cost of 8 {{.*}} fptoui
+  ; CHECK: cost of 16 {{.*}} fptoui
   %r104 = fptoui <2 x double> undef to <2 x i16>
-  ; CHECK: cost of 8 {{.*}} fptosi
+  ; CHECK: cost of 16 {{.*}} fptosi
   %r105 = fptosi <2 x double> undef to <2 x i16>
   ; CHECK: cost of 2 {{.*}} fptoui
   %r106 = fptoui <2 x double> undef to <2 x i32>
   ; CHECK: cost of 2 {{.*}} fptosi
   %r107 = fptosi <2 x double> undef to <2 x i32>
-  ; CHECK: cost of 28 {{.*}} fptoui
+  ; CHECK: cost of 32 {{.*}} fptoui
   %r108 = fptoui <2 x double> undef to <2 x i64>
-  ; CHECK: cost of 28 {{.*}} fptosi
+  ; CHECK: cost of 32 {{.*}} fptosi
   %r109 = fptosi <2 x double> undef to <2 x i64>
 
-  ; CHECK: cost of 16 {{.*}} fptoui
+  ; CHECK: cost of 32 {{.*}} fptoui
   %r110 = fptoui <4 x float> undef to <4 x i1>
-  ; CHECK: cost of 16 {{.*}} fptosi
+  ; CHECK: cost of 32 {{.*}} fptosi
   %r111 = fptosi <4 x float> undef to <4 x i1>
   ; CHECK: cost of 3 {{.*}} fptoui
   %r112 = fptoui <4 x float> undef to <4 x i8>
@@ -263,39 +263,39 @@ define i32 @casts() {
   %r116 = fptoui <4 x float> undef to <4 x i32>
   ; CHECK: cost of 1 {{.*}} fptosi
   %r117 = fptosi <4 x float> undef to <4 x i32>
-  ; CHECK: cost of 56 {{.*}} fptoui
+  ; CHECK: cost of 64 {{.*}} fptoui
   %r118 = fptoui <4 x float> undef to <4 x i64>
-  ; CHECK: cost of 56 {{.*}} fptosi
+  ; CHECK: cost of 64 {{.*}} fptosi
   %r119 = fptosi <4 x float> undef to <4 x i64>
 
-  ; CHECK: cost of 16 {{.*}} fptoui
+  ; CHECK: cost of 32 {{.*}} fptoui
   %r120 = fptoui <4 x double> undef to <4 x i1>
-  ; CHECK: cost of 16 {{.*}} fptosi
+  ; CHECK: cost of 32 {{.*}} fptosi
   %r121 = fptosi <4 x double> undef to <4 x i1>
-  ; CHECK: cost of 16 {{.*}} fptoui
+  ; CHECK: cost of 32 {{.*}} fptoui
   %r122 = fptoui <4 x double> undef to <4 x i8>
-  ; CHECK: cost of 16 {{.*}} fptosi
+  ; CHECK: cost of 32 {{.*}} fptosi
   %r123 = fptosi <4 x double> undef to <4 x i8>
-  ; CHECK: cost of 16 {{.*}} fptoui
+  ; CHECK: cost of 32 {{.*}} fptoui
   %r124 = fptoui <4 x double> undef to <4 x i16>
-  ; CHECK: cost of 16 {{.*}} fptosi
+  ; CHECK: cost of 32 {{.*}} fptosi
   %r125 = fptosi <4 x double> undef to <4 x i16>
-  ; CHECK: cost of 16 {{.*}} fptoui
+  ; CHECK: cost of 32 {{.*}} fptoui
   %r126 = fptoui <4 x double> undef to <4 x i32>
-  ; CHECK: cost of 16 {{.*}} fptosi
+  ; CHECK: cost of 32 {{.*}} fptosi
   %r127 = fptosi <4 x double> undef to <4 x i32>
-  ; CHECK: cost of 56 {{.*}} fptoui
+  ; CHECK: cost of 64 {{.*}} fptoui
   %r128 = fptoui <4 x double> undef to <4 x i64>
-  ; CHECK: cost of 56 {{.*}} fptosi
+  ; CHECK: cost of 64 {{.*}} fptosi
   %r129 = fptosi <4 x double> undef to <4 x i64>
 
-  ; CHECK: cost of 32 {{.*}} fptoui
+  ; CHECK: cost of 64 {{.*}} fptoui
   %r130 = fptoui <8 x float> undef to <8 x i1>
-  ; CHECK: cost of 32 {{.*}} fptosi
+  ; CHECK: cost of 64 {{.*}} fptosi
   %r131 = fptosi <8 x float> undef to <8 x i1>
-  ; CHECK: cost of 32 {{.*}} fptoui
+  ; CHECK: cost of 64 {{.*}} fptoui
   %r132 = fptoui <8 x float> undef to <8 x i8>
-  ; CHECK: cost of 32 {{.*}} fptosi
+  ; CHECK: cost of 64 {{.*}} fptosi
   %r133 = fptosi <8 x float> undef to <8 x i8>
   ; CHECK: cost of 4 {{.*}} fptoui
   %r134 = fptoui <8 x float> undef to <8 x i16>
@@ -305,39 +305,39 @@ define i32 @casts() {
   %r136 = fptoui <8 x float> undef to <8 x i32>
   ; CHECK: cost of 2 {{.*}} fptosi
   %r137 = fptosi <8 x float> undef to <8 x i32>
-  ; CHECK: cost of 112 {{.*}} fptoui
+  ; CHECK: cost of 128 {{.*}} fptoui
   %r138 = fptoui <8 x float> undef to <8 x i64>
-  ; CHECK: cost of 112 {{.*}} fptosi
+  ; CHECK: cost of 128 {{.*}} fptosi
   %r139 = fptosi <8 x float> undef to <8 x i64>
 
-  ; CHECK: cost of 32 {{.*}} fptoui
+  ; CHECK: cost of 64 {{.*}} fptoui
   %r140 = fptoui <8 x double> undef to <8 x i1>
-  ; CHECK: cost of 32 {{.*}} fptosi
+  ; CHECK: cost of 64 {{.*}} fptosi
   %r141 = fptosi <8 x double> undef to <8 x i1>
-  ; CHECK: cost of 32 {{.*}} fptoui
+  ; CHECK: cost of 64 {{.*}} fptoui
   %r142 = fptoui <8 x double> undef to <8 x i8>
-  ; CHECK: cost of 32 {{.*}} fptosi
+  ; CHECK: cost of 64 {{.*}} fptosi
   %r143 = fptosi <8 x double> undef to <8 x i8>
-  ; CHECK: cost of 32 {{.*}} fptoui
+  ; CHECK: cost of 64 {{.*}} fptoui
   %r144 = fptoui <8 x double> undef to <8 x i16>
-  ; CHECK: cost of 32 {{.*}} fptosi
+  ; CHECK: cost of 64 {{.*}} fptosi
   %r145 = fptosi <8 x double> undef to <8 x i16>
-  ; CHECK: cost of 32 {{.*}} fptoui
+  ; CHECK: cost of 64 {{.*}} fptoui
   %r146 = fptoui <8 x double> undef to <8 x i32>
-  ; CHECK: cost of 32 {{.*}} fptosi
+  ; CHECK: cost of 64 {{.*}} fptosi
   %r147 = fptosi <8 x double> undef to <8 x i32>
-  ; CHECK: cost of 112 {{.*}} fptoui
+  ; CHECK: cost of 128 {{.*}} fptoui
   %r148 = fptoui <8 x double> undef to <8 x i64>
-  ; CHECK: cost of 112 {{.*}} fptosi
+  ; CHECK: cost of 128 {{.*}} fptosi
   %r149 = fptosi <8 x double> undef to <8 x i64>
 
-  ; CHECK: cost of 64 {{.*}} fptoui
+  ; CHECK: cost of 128 {{.*}} fptoui
   %r150 = fptoui <16 x float> undef to <16 x i1>
-  ; CHECK: cost of 64 {{.*}} fptosi
+  ; CHECK: cost of 128 {{.*}} fptosi
   %r151 = fptosi <16 x float> undef to <16 x i1>
-  ; CHECK: cost of 64 {{.*}} fptoui
+  ; CHECK: cost of 128 {{.*}} fptoui
   %r152 = fptoui <16 x float> undef to <16 x i8>
-  ; CHECK: cost of 64 {{.*}} fptosi
+  ; CHECK: cost of 128 {{.*}} fptosi
   %r153 = fptosi <16 x float> undef to <16 x i8>
   ; CHECK: cost of 8 {{.*}} fptoui
   %r154 = fptoui <16 x float> undef to <16 x i16>
@@ -347,30 +347,30 @@ define i32 @casts() {
   %r156 = fptoui <16 x float> undef to <16 x i32>
   ; CHECK: cost of 4 {{.*}} fptosi
   %r157 = fptosi <16 x float> undef to <16 x i32>
-  ; CHECK: cost of 224 {{.*}} fptoui
+  ; CHECK: cost of 256 {{.*}} fptoui
   %r158 = fptoui <16 x float> undef to <16 x i64>
-  ; CHECK: cost of 224 {{.*}} fptosi
+  ; CHECK: cost of 256 {{.*}} fptosi
   %r159 = fptosi <16 x float> undef to <16 x i64>
 
-  ; CHECK: cost of 64 {{.*}} fptoui
+  ; CHECK: cost of 128 {{.*}} fptoui
   %r160 = fptoui <16 x double> undef to <16 x i1>
-  ; CHECK: cost of 64 {{.*}} fptosi
+  ; CHECK: cost of 128 {{.*}} fptosi
   %r161 = fptosi <16 x double> undef to <16 x i1>
-  ; CHECK: cost of 64 {{.*}} fptoui
+  ; CHECK: cost of 128 {{.*}} fptoui
   %r162 = fptoui <16 x double> undef to <16 x i8>
-  ; CHECK: cost of 64 {{.*}} fptosi
+  ; CHECK: cost of 128 {{.*}} fptosi
   %r163 = fptosi <16 x double> undef to <16 x i8>
-  ; CHECK: cost of 64 {{.*}} fptoui
+  ; CHECK: cost of 128 {{.*}} fptoui
   %r164 = fptoui <16 x double> undef to <16 x i16>
-  ; CHECK: cost of 64 {{.*}} fptosi
+  ; CHECK: cost of 128 {{.*}} fptosi
   %r165 = fptosi <16 x double> undef to <16 x i16>
-  ; CHECK: cost of 64 {{.*}} fptoui
+  ; CHECK: cost of 128 {{.*}} fptoui
   %r166 = fptoui <16 x double> undef to <16 x i32>
-  ; CHECK: cost of 64 {{.*}} fptosi
+  ; CHECK: cost of 128 {{.*}} fptosi
   %r167 = fptosi <16 x double> undef to <16 x i32>
-  ; CHECK: cost of 224 {{.*}} fptoui
+  ; CHECK: cost of 256 {{.*}} fptoui
   %r168 = fptoui <16 x double> undef to <16 x i64>
-  ; CHECK: cost of 224 {{.*}} fptosi
+  ; CHECK: cost of 256 {{.*}} fptosi
   %r169 = fptosi <16 x double> undef to <16 x i64>
 
   ; CHECK: cost of 8 {{.*}} uitofp





More information about the llvm-commits mailing list