[llvm] r274642 - [TTI] The cost model should not assume vector casts get completely scalarized

Michael Kuperstein via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 6 10:30:56 PDT 2016


Author: mkuper
Date: Wed Jul  6 12:30:56 2016
New Revision: 274642

URL: http://llvm.org/viewvc/llvm-project?rev=274642&view=rev
Log:
[TTI] The cost model should not assume vector casts get completely scalarized

The cost model should not assume vector casts get completely scalarized, since
on targets that have vector support, the common case is a partial split up to
the legal vector size. So, when a vector cast  gets split, the resulting casts
end up legal and cheap.

Instead of pessimistically assuming scalarization, base TTI can use the costs
the concrete TTI provides for the split vector, plus a fudge factor to account
for the cost of the split itself. This fudge factor is currently 1 by default,
except on AMDGPU where inserts and extracts are considered free.

Differential Revision: http://reviews.llvm.org/D21251

Modified:
    llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h
    llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
    llvm/trunk/test/Analysis/CostModel/ARM/cast.ll
    llvm/trunk/test/Analysis/CostModel/PowerPC/ext.ll
    llvm/trunk/test/Analysis/CostModel/X86/sitofp.ll
    llvm/trunk/test/Analysis/CostModel/X86/uitofp.ll
    llvm/trunk/test/Transforms/LoopVectorize/X86/gather_scatter.ll

Modified: llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h?rev=274642&r1=274641&r2=274642&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h (original)
+++ llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h Wed Jul  6 12:30:56 2016
@@ -315,6 +315,8 @@ public:
     }
 
     // Else, assume that we need to scalarize this op.
+    // TODO: If one of the types get legalized by splitting, handle this
+    // similarly to what getCastInstrCost() does.
     if (Ty->isVectorTy()) {
       unsigned Num = Ty->getVectorNumElements();
       unsigned Cost = static_cast<T *>(this)
@@ -409,12 +411,25 @@ public:
           return SrcLT.first * 1;
       }
 
-      // If we are converting vectors and the operation is illegal, or
-      // if the vectors are legalized to different types, estimate the
-      // scalarization costs.
-      // TODO: This is probably a big overestimate. For splits, we should have
-      // something like getTypeLegalizationCost() + 2 * getCastInstrCost().
-      // The same applies to getCmpSelInstrCost() and getArithmeticInstrCost()
+      // If we are legalizing by splitting, query the concrete TTI for the cost
+      // of casting the original vector twice. We also need to factor int the
+      // cost of the split itself. Count that as 1, to be consistent with
+      // TLI->getTypeLegalizationCost().
+      if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) ==
+           TargetLowering::TypeSplitVector) ||
+          (TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) ==
+           TargetLowering::TypeSplitVector)) {
+        Type *SplitDst = VectorType::get(Dst->getVectorElementType(),
+                                         Dst->getVectorNumElements() / 2);
+        Type *SplitSrc = VectorType::get(Src->getVectorElementType(),
+                                         Src->getVectorNumElements() / 2);
+        T *TTI = static_cast<T *>(this);
+        return TTI->getVectorSplitCost() +
+               (2 * TTI->getCastInstrCost(Opcode, SplitDst, SplitSrc));
+      }
+
+      // In other cases where the source or destination are illegal, assume
+      // the operation will get scalarized.
       unsigned Num = Dst->getVectorNumElements();
       unsigned Cost = static_cast<T *>(this)->getCastInstrCost(
           Opcode, Dst->getScalarType(), Src->getScalarType());
@@ -472,6 +487,8 @@ public:
     }
 
     // Otherwise, assume that the cast is scalarized.
+    // TODO: If one of the types get legalized by splitting, handle this
+    // similarly to what getCastInstrCost() does.
     if (ValTy->isVectorTy()) {
       unsigned Num = ValTy->getVectorNumElements();
       if (CondTy)
@@ -480,8 +497,7 @@ public:
           Opcode, ValTy->getScalarType(), CondTy);
 
       // Return the cost of multiple scalar invocation plus the cost of
-      // inserting
-      // and extracting the values.
+      // inserting and extracting the values.
       return getScalarizationOverhead(ValTy, true, false) + Num * Cost;
     }
 
@@ -906,6 +922,8 @@ public:
     return ShuffleCost + ArithCost + getScalarizationOverhead(Ty, false, true);
   }
 
+  unsigned getVectorSplitCost() { return 1; }
+
   /// @}
 };
 

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h?rev=274642&r1=274641&r2=274642&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h Wed Jul  6 12:30:56 2016
@@ -96,6 +96,8 @@ public:
 
   int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
   bool isSourceOfDivergence(const Value *V) const;
+
+  unsigned getVectorSplitCost() { return 0; }
 };
 
 } // end namespace llvm

Modified: llvm/trunk/test/Analysis/CostModel/ARM/cast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/ARM/cast.ll?rev=274642&r1=274641&r2=274642&view=diff
==============================================================================
--- llvm/trunk/test/Analysis/CostModel/ARM/cast.ll (original)
+++ llvm/trunk/test/Analysis/CostModel/ARM/cast.ll Wed Jul  6 12:30:56 2016
@@ -264,39 +264,39 @@ define i32 @casts() {
   %r116 = fptoui <4 x float> undef to <4 x i32>
   ; CHECK:  Found an estimated cost of 1 for instruction:   %r117 = fptosi <4 x float> undef to <4 x i32>
   %r117 = fptosi <4 x float> undef to <4 x i32>
-  ; CHECK:  Found an estimated cost of 64 for instruction:   %r118 = fptoui <4 x float> undef to <4 x i64>
+  ; CHECK:  Found an estimated cost of 65 for instruction:   %r118 = fptoui <4 x float> undef to <4 x i64>
   %r118 = fptoui <4 x float> undef to <4 x i64>
-  ; CHECK:  Found an estimated cost of 64 for instruction:   %r119 = fptosi <4 x float> undef to <4 x i64>
+  ; CHECK:  Found an estimated cost of 65 for instruction:   %r119 = fptosi <4 x float> undef to <4 x i64>
   %r119 = fptosi <4 x float> undef to <4 x i64>
 
-  ; CHECK:  Found an estimated cost of 32 for instruction:   %r120 = fptoui <4 x double> undef to <4 x i1>
+  ; CHECK:  Found an estimated cost of 33 for instruction:   %r120 = fptoui <4 x double> undef to <4 x i1>
   %r120 = fptoui <4 x double> undef to <4 x i1>
-  ; CHECK:  Found an estimated cost of 32 for instruction:   %r121 = fptosi <4 x double> undef to <4 x i1>
+  ; CHECK:  Found an estimated cost of 33 for instruction:   %r121 = fptosi <4 x double> undef to <4 x i1>
   %r121 = fptosi <4 x double> undef to <4 x i1>
-  ; CHECK:  Found an estimated cost of 32 for instruction:   %r122 = fptoui <4 x double> undef to <4 x i8>
+  ; CHECK:  Found an estimated cost of 33 for instruction:   %r122 = fptoui <4 x double> undef to <4 x i8>
   %r122 = fptoui <4 x double> undef to <4 x i8>
-  ; CHECK:  Found an estimated cost of 32 for instruction:   %r123 = fptosi <4 x double> undef to <4 x i8>
+  ; CHECK:  Found an estimated cost of 33 for instruction:   %r123 = fptosi <4 x double> undef to <4 x i8>
   %r123 = fptosi <4 x double> undef to <4 x i8>
-  ; CHECK:  Found an estimated cost of 32 for instruction:   %r124 = fptoui <4 x double> undef to <4 x i16>
+  ; CHECK:  Found an estimated cost of 33 for instruction:   %r124 = fptoui <4 x double> undef to <4 x i16>
   %r124 = fptoui <4 x double> undef to <4 x i16>
-  ; CHECK:  Found an estimated cost of 32 for instruction:   %r125 = fptosi <4 x double> undef to <4 x i16>
+  ; CHECK:  Found an estimated cost of 33 for instruction:   %r125 = fptosi <4 x double> undef to <4 x i16>
   %r125 = fptosi <4 x double> undef to <4 x i16>
-  ; CHECK:  Found an estimated cost of 32 for instruction:   %r126 = fptoui <4 x double> undef to <4 x i32>
+  ; CHECK:  Found an estimated cost of 5 for instruction:   %r126 = fptoui <4 x double> undef to <4 x i32>
   %r126 = fptoui <4 x double> undef to <4 x i32>
-  ; CHECK:  Found an estimated cost of 32 for instruction:   %r127 = fptosi <4 x double> undef to <4 x i32>
+  ; CHECK:  Found an estimated cost of 5 for instruction:   %r127 = fptosi <4 x double> undef to <4 x i32>
   %r127 = fptosi <4 x double> undef to <4 x i32>
-  ; CHECK:  Found an estimated cost of 64 for instruction:   %r128 = fptoui <4 x double> undef to <4 x i64>
+  ; CHECK:  Found an estimated cost of 65 for instruction:   %r128 = fptoui <4 x double> undef to <4 x i64>
   %r128 = fptoui <4 x double> undef to <4 x i64>
-  ; CHECK:  Found an estimated cost of 64 for instruction:   %r129 = fptosi <4 x double> undef to <4 x i64>
+  ; CHECK:  Found an estimated cost of 65 for instruction:   %r129 = fptosi <4 x double> undef to <4 x i64>
   %r129 = fptosi <4 x double> undef to <4 x i64>
 
-  ; CHECK:  Found an estimated cost of 64 for instruction:   %r130 = fptoui <8 x float> undef to <8 x i1>
+  ; CHECK:  Found an estimated cost of 65 for instruction:   %r130 = fptoui <8 x float> undef to <8 x i1>
   %r130 = fptoui <8 x float> undef to <8 x i1>
-  ; CHECK:  Found an estimated cost of 64 for instruction:   %r131 = fptosi <8 x float> undef to <8 x i1>
+  ; CHECK:  Found an estimated cost of 65 for instruction:   %r131 = fptosi <8 x float> undef to <8 x i1>
   %r131 = fptosi <8 x float> undef to <8 x i1>
-  ; CHECK:  Found an estimated cost of 64 for instruction:   %r132 = fptoui <8 x float> undef to <8 x i8>
+  ; CHECK:  Found an estimated cost of 7 for instruction:   %r132 = fptoui <8 x float> undef to <8 x i8>
   %r132 = fptoui <8 x float> undef to <8 x i8>
-  ; CHECK:  Found an estimated cost of 64 for instruction:   %r133 = fptosi <8 x float> undef to <8 x i8>
+  ; CHECK:  Found an estimated cost of 7 for instruction:   %r133 = fptosi <8 x float> undef to <8 x i8>
   %r133 = fptosi <8 x float> undef to <8 x i8>
   ; CHECK:  Found an estimated cost of 4 for instruction:   %r134 = fptoui <8 x float> undef to <8 x i16>
   %r134 = fptoui <8 x float> undef to <8 x i16>
@@ -306,39 +306,39 @@ define i32 @casts() {
   %r136 = fptoui <8 x float> undef to <8 x i32>
   ; CHECK:  Found an estimated cost of 2 for instruction:   %r137 = fptosi <8 x float> undef to <8 x i32>
   %r137 = fptosi <8 x float> undef to <8 x i32>
-  ; CHECK:  Found an estimated cost of 128 for instruction:   %r138 = fptoui <8 x float> undef to <8 x i64>
+  ; CHECK:  Found an estimated cost of 131 for instruction:   %r138 = fptoui <8 x float> undef to <8 x i64>
   %r138 = fptoui <8 x float> undef to <8 x i64>
-  ; CHECK:  Found an estimated cost of 128 for instruction:   %r139 = fptosi <8 x float> undef to <8 x i64>
+  ; CHECK:  Found an estimated cost of 131 for instruction:   %r139 = fptosi <8 x float> undef to <8 x i64>
   %r139 = fptosi <8 x float> undef to <8 x i64>
 
-  ; CHECK:  Found an estimated cost of 64 for instruction:   %r140 = fptoui <8 x double> undef to <8 x i1>
+  ; CHECK:  Found an estimated cost of 67 for instruction:   %r140 = fptoui <8 x double> undef to <8 x i1>
   %r140 = fptoui <8 x double> undef to <8 x i1>
-  ; CHECK:  Found an estimated cost of 64 for instruction:   %r141 = fptosi <8 x double> undef to <8 x i1>
+  ; CHECK:  Found an estimated cost of 67 for instruction:   %r141 = fptosi <8 x double> undef to <8 x i1>
   %r141 = fptosi <8 x double> undef to <8 x i1>
-  ; CHECK:  Found an estimated cost of 64 for instruction:   %r142 = fptoui <8 x double> undef to <8 x i8>
+  ; CHECK:  Found an estimated cost of 67 for instruction:   %r142 = fptoui <8 x double> undef to <8 x i8>
   %r142 = fptoui <8 x double> undef to <8 x i8>
-  ; CHECK:  Found an estimated cost of 64 for instruction:   %r143 = fptosi <8 x double> undef to <8 x i8>
+  ; CHECK:  Found an estimated cost of 67 for instruction:   %r143 = fptosi <8 x double> undef to <8 x i8>
   %r143 = fptosi <8 x double> undef to <8 x i8>
-  ; CHECK:  Found an estimated cost of 64 for instruction:   %r144 = fptoui <8 x double> undef to <8 x i16>
+  ; CHECK:  Found an estimated cost of 67 for instruction:   %r144 = fptoui <8 x double> undef to <8 x i16>
   %r144 = fptoui <8 x double> undef to <8 x i16>
-  ; CHECK:  Found an estimated cost of 64 for instruction:   %r145 = fptosi <8 x double> undef to <8 x i16>
+  ; CHECK:  Found an estimated cost of 67 for instruction:   %r145 = fptosi <8 x double> undef to <8 x i16>
   %r145 = fptosi <8 x double> undef to <8 x i16>
-  ; CHECK:  Found an estimated cost of 64 for instruction:   %r146 = fptoui <8 x double> undef to <8 x i32>
+  ; CHECK:  Found an estimated cost of 11 for instruction:   %r146 = fptoui <8 x double> undef to <8 x i32>
   %r146 = fptoui <8 x double> undef to <8 x i32>
-  ; CHECK:  Found an estimated cost of 64 for instruction:   %r147 = fptosi <8 x double> undef to <8 x i32>
+  ; CHECK:  Found an estimated cost of 11 for instruction:   %r147 = fptosi <8 x double> undef to <8 x i32>
   %r147 = fptosi <8 x double> undef to <8 x i32>
-  ; CHECK:  Found an estimated cost of 128 for instruction:   %r148 = fptoui <8 x double> undef to <8 x i64>
+  ; CHECK:  Found an estimated cost of 131 for instruction:   %r148 = fptoui <8 x double> undef to <8 x i64>
   %r148 = fptoui <8 x double> undef to <8 x i64>
-  ; CHECK:  Found an estimated cost of 128 for instruction:   %r149 = fptosi <8 x double> undef to <8 x i64>
+  ; CHECK:  Found an estimated cost of 131 for instruction:   %r149 = fptosi <8 x double> undef to <8 x i64>
   %r149 = fptosi <8 x double> undef to <8 x i64>
 
-  ; CHECK:  Found an estimated cost of 128 for instruction:   %r150 = fptoui <16 x float> undef to <16 x i1>
+  ; CHECK:  Found an estimated cost of 131 for instruction:   %r150 = fptoui <16 x float> undef to <16 x i1>
   %r150 = fptoui <16 x float> undef to <16 x i1>
-  ; CHECK:  Found an estimated cost of 128 for instruction:   %r151 = fptosi <16 x float> undef to <16 x i1>
+  ; CHECK:  Found an estimated cost of 131 for instruction:   %r151 = fptosi <16 x float> undef to <16 x i1>
   %r151 = fptosi <16 x float> undef to <16 x i1>
- ; CHECK:  Found an estimated cost of 128 for instruction:   %r152 = fptoui <16 x float> undef to <16 x i8>
+ ; CHECK:  Found an estimated cost of 15 for instruction:   %r152 = fptoui <16 x float> undef to <16 x i8>
   %r152 = fptoui <16 x float> undef to <16 x i8>
-  ; CHECK:  Found an estimated cost of 128 for instruction:   %r153 = fptosi <16 x float> undef to <16 x i8>
+  ; CHECK:  Found an estimated cost of 15 for instruction:   %r153 = fptosi <16 x float> undef to <16 x i8>
   %r153 = fptosi <16 x float> undef to <16 x i8>
   ; CHECK:  Found an estimated cost of 8 for instruction:   %r154 = fptoui <16 x float> undef to <16 x i16>
   %r154 = fptoui <16 x float> undef to <16 x i16>
@@ -348,30 +348,30 @@ define i32 @casts() {
   %r156 = fptoui <16 x float> undef to <16 x i32>
   ; CHECK:  Found an estimated cost of 4 for instruction:   %r157 = fptosi <16 x float> undef to <16 x i32>
   %r157 = fptosi <16 x float> undef to <16 x i32>
-  ; CHECK:  Found an estimated cost of 256 for instruction:   %r158 = fptoui <16 x float> undef to <16 x i64>
+  ; CHECK:  Found an estimated cost of 263 for instruction:   %r158 = fptoui <16 x float> undef to <16 x i64>
   %r158 = fptoui <16 x float> undef to <16 x i64>
-  ; CHECK:  Found an estimated cost of 256 for instruction:   %r159 = fptosi <16 x float> undef to <16 x i64>
+  ; CHECK:  Found an estimated cost of 263 for instruction:   %r159 = fptosi <16 x float> undef to <16 x i64>
   %r159 = fptosi <16 x float> undef to <16 x i64>
 
-  ; CHECK:  Found an estimated cost of 128 for instruction:   %r160 = fptoui <16 x double> undef to <16 x i1>
+  ; CHECK:  Found an estimated cost of 135 for instruction:   %r160 = fptoui <16 x double> undef to <16 x i1>
   %r160 = fptoui <16 x double> undef to <16 x i1>
-  ; CHECK:  Found an estimated cost of 128 for instruction:   %r161 = fptosi <16 x double> undef to <16 x i1>
+  ; CHECK:  Found an estimated cost of 135 for instruction:   %r161 = fptosi <16 x double> undef to <16 x i1>
   %r161 = fptosi <16 x double> undef to <16 x i1>
-  ; CHECK:  Found an estimated cost of 128 for instruction:   %r162 = fptoui <16 x double> undef to <16 x i8>
+  ; CHECK:  Found an estimated cost of 135 for instruction:   %r162 = fptoui <16 x double> undef to <16 x i8>
   %r162 = fptoui <16 x double> undef to <16 x i8>
-  ; CHECK:  Found an estimated cost of 128 for instruction:   %r163 = fptosi <16 x double> undef to <16 x i8>
+  ; CHECK:  Found an estimated cost of 135 for instruction:   %r163 = fptosi <16 x double> undef to <16 x i8>
   %r163 = fptosi <16 x double> undef to <16 x i8>
-  ; CHECK:  Found an estimated cost of 128 for instruction:   %r164 = fptoui <16 x double> undef to <16 x i16>
+  ; CHECK:  Found an estimated cost of 135 for instruction:   %r164 = fptoui <16 x double> undef to <16 x i16>
   %r164 = fptoui <16 x double> undef to <16 x i16>
-  ; CHECK:  Found an estimated cost of 128 for instruction:   %r165 = fptosi <16 x double> undef to <16 x i16>
+  ; CHECK:  Found an estimated cost of 135 for instruction:   %r165 = fptosi <16 x double> undef to <16 x i16>
   %r165 = fptosi <16 x double> undef to <16 x i16>
-  ; CHECK:  Found an estimated cost of 128 for instruction:   %r166 = fptoui <16 x double> undef to <16 x i32>
+  ; CHECK:  Found an estimated cost of 23 for instruction:   %r166 = fptoui <16 x double> undef to <16 x i32>
   %r166 = fptoui <16 x double> undef to <16 x i32>
-  ; CHECK:  Found an estimated cost of 128 for instruction:   %r167 = fptosi <16 x double> undef to <16 x i32>
+  ; CHECK:  Found an estimated cost of 23 for instruction:   %r167 = fptosi <16 x double> undef to <16 x i32>
   %r167 = fptosi <16 x double> undef to <16 x i32>
-  ; CHECK:  Found an estimated cost of 256 for instruction:   %r168 = fptoui <16 x double> undef to <16 x i64>
+  ; CHECK:  Found an estimated cost of 263 for instruction:   %r168 = fptoui <16 x double> undef to <16 x i64>
   %r168 = fptoui <16 x double> undef to <16 x i64>
-  ; CHECK:  Found an estimated cost of 256 for instruction:   %r169 = fptosi <16 x double> undef to <16 x i64>
+  ; CHECK:  Found an estimated cost of 263 for instruction:   %r169 = fptosi <16 x double> undef to <16 x i64>
   %r169 = fptosi <16 x double> undef to <16 x i64>
 
   ; CHECK:  Found an estimated cost of 12 for instruction:   %r170 = uitofp <2 x i1> undef to <2 x float>
@@ -432,39 +432,39 @@ define i32 @casts() {
   %r196 = uitofp <4 x i32> undef to <4 x float>
   ; CHECK:  Found an estimated cost of 1 for instruction:   %r197 = sitofp <4 x i32> undef to <4 x float>
   %r197 = sitofp <4 x i32> undef to <4 x float>
-  ; CHECK:  Found an estimated cost of 56 for instruction:   %r198 = uitofp <4 x i64> undef to <4 x float>
+  ; CHECK:  Found an estimated cost of 57 for instruction:   %r198 = uitofp <4 x i64> undef to <4 x float>
   %r198 = uitofp <4 x i64> undef to <4 x float>
-  ; CHECK:  Found an estimated cost of 56 for instruction:   %r199 = sitofp <4 x i64> undef to <4 x float>
+  ; CHECK:  Found an estimated cost of 57 for instruction:   %r199 = sitofp <4 x i64> undef to <4 x float>
   %r199 = sitofp <4 x i64> undef to <4 x float>
 
-  ; CHECK:  Found an estimated cost of 16 for instruction:   %r200 = uitofp <4 x i1> undef to <4 x double>
+  ; CHECK:  Found an estimated cost of 17 for instruction:   %r200 = uitofp <4 x i1> undef to <4 x double>
   %r200 = uitofp <4 x i1> undef to <4 x double>
-  ; CHECK:  Found an estimated cost of 16 for instruction:   %r201 = sitofp <4 x i1> undef to <4 x double>
+  ; CHECK:  Found an estimated cost of 17 for instruction:   %r201 = sitofp <4 x i1> undef to <4 x double>
   %r201 = sitofp <4 x i1> undef to <4 x double>
-  ; CHECK:  Found an estimated cost of 16 for instruction:   %r202 = uitofp <4 x i8> undef to <4 x double>
+  ; CHECK:  Found an estimated cost of 9 for instruction:   %r202 = uitofp <4 x i8> undef to <4 x double>
   %r202 = uitofp <4 x i8> undef to <4 x double>
-  ; CHECK:  Found an estimated cost of 16 for instruction:   %r203 = sitofp <4 x i8> undef to <4 x double>
+  ; CHECK:  Found an estimated cost of 9 for instruction:   %r203 = sitofp <4 x i8> undef to <4 x double>
   %r203 = sitofp <4 x i8> undef to <4 x double>
-  ; CHECK:  Found an estimated cost of 16 for instruction:   %r204 = uitofp <4 x i16> undef to <4 x double>
+  ; CHECK:  Found an estimated cost of 7 for instruction:   %r204 = uitofp <4 x i16> undef to <4 x double>
   %r204 = uitofp <4 x i16> undef to <4 x double>
-  ; CHECK:  Found an estimated cost of 16 for instruction:   %r205 = sitofp <4 x i16> undef to <4 x double>
+  ; CHECK:  Found an estimated cost of 7 for instruction:   %r205 = sitofp <4 x i16> undef to <4 x double>
   %r205 = sitofp <4 x i16> undef to <4 x double>
-  ; CHECK:  Found an estimated cost of 16 for instruction:   %r206 = uitofp <4 x i32> undef to <4 x double>
+  ; CHECK:  Found an estimated cost of 5 for instruction:   %r206 = uitofp <4 x i32> undef to <4 x double>
   %r206 = uitofp <4 x i32> undef to <4 x double>
-  ; CHECK:  Found an estimated cost of 16 for instruction:   %r207 = sitofp <4 x i32> undef to <4 x double>
+  ; CHECK:  Found an estimated cost of 5 for instruction:   %r207 = sitofp <4 x i32> undef to <4 x double>
   %r207 = sitofp <4 x i32> undef to <4 x double>
-  ; CHECK:  Found an estimated cost of 48 for instruction:   %r208 = uitofp <4 x i64> undef to <4 x double>
+  ; CHECK:  Found an estimated cost of 49 for instruction:   %r208 = uitofp <4 x i64> undef to <4 x double>
   %r208 = uitofp <4 x i64> undef to <4 x double>
-  ; CHECK:  Found an estimated cost of 48 for instruction:   %r209 = sitofp <4 x i64> undef to <4 x double>
+  ; CHECK:  Found an estimated cost of 49 for instruction:   %r209 = sitofp <4 x i64> undef to <4 x double>
   %r209 = sitofp <4 x i64> undef to <4 x double>
 
-  ; CHECK:  Found an estimated cost of 48 for instruction:   %r210 = uitofp <8 x i1> undef to <8 x float>
+  ; CHECK:  Found an estimated cost of 7 for instruction:   %r210 = uitofp <8 x i1> undef to <8 x float>
   %r210 = uitofp <8 x i1> undef to <8 x float>
-  ; CHECK:  Found an estimated cost of 48 for instruction:   %r211 = sitofp <8 x i1> undef to <8 x float>
+  ; CHECK:  Found an estimated cost of 7 for instruction:   %r211 = sitofp <8 x i1> undef to <8 x float>
   %r211 = sitofp <8 x i1> undef to <8 x float>
-  ; CHECK:  Found an estimated cost of 48 for instruction:   %r212 = uitofp <8 x i8> undef to <8 x float>
+  ; CHECK:  Found an estimated cost of 7 for instruction:   %r212 = uitofp <8 x i8> undef to <8 x float>
   %r212 = uitofp <8 x i8> undef to <8 x float>
-  ; CHECK:  Found an estimated cost of 48 for instruction:   %r213 = sitofp <8 x i8> undef to <8 x float>
+  ; CHECK:  Found an estimated cost of 7 for instruction:   %r213 = sitofp <8 x i8> undef to <8 x float>
   %r213 = sitofp <8 x i8> undef to <8 x float>
   ; CHECK:  Found an estimated cost of 4 for instruction:   %r214 = uitofp <8 x i16> undef to <8 x float>
   %r214 = uitofp <8 x i16> undef to <8 x float>
@@ -474,39 +474,39 @@ define i32 @casts() {
   %r216 = uitofp <8 x i32> undef to <8 x float>
   ; CHECK:  Found an estimated cost of 2 for instruction:   %r217 = sitofp <8 x i32> undef to <8 x float>
   %r217 = sitofp <8 x i32> undef to <8 x float>
-  ; CHECK:  Found an estimated cost of 112 for instruction:   %r218 = uitofp <8 x i64> undef to <8 x float>
+  ; CHECK:  Found an estimated cost of 115 for instruction:   %r218 = uitofp <8 x i64> undef to <8 x float>
   %r218 = uitofp <8 x i64> undef to <8 x float>
-  ; CHECK:  Found an estimated cost of 112 for instruction:   %r219 = sitofp <8 x i64> undef to <8 x float>
+  ; CHECK:  Found an estimated cost of 115 for instruction:   %r219 = sitofp <8 x i64> undef to <8 x float>
   %r219 = sitofp <8 x i64> undef to <8 x float>
 
-  ; CHECK:  Found an estimated cost of 32 for instruction:   %r220 = uitofp <8 x i1> undef to <8 x double>
+  ; CHECK:  Found an estimated cost of 35 for instruction:   %r220 = uitofp <8 x i1> undef to <8 x double>
   %r220 = uitofp <8 x i1> undef to <8 x double>
-  ; CHECK:  Found an estimated cost of 32 for instruction:   %r221 = sitofp <8 x i1> undef to <8 x double>
+  ; CHECK:  Found an estimated cost of 35 for instruction:   %r221 = sitofp <8 x i1> undef to <8 x double>
   %r221 = sitofp <8 x i1> undef to <8 x double>
-  ; CHECK:  Found an estimated cost of 32 for instruction:   %r222 = uitofp <8 x i8> undef to <8 x double>
+  ; CHECK:  Found an estimated cost of 19 for instruction:   %r222 = uitofp <8 x i8> undef to <8 x double>
   %r222 = uitofp <8 x i8> undef to <8 x double>
-  ; CHECK:  Found an estimated cost of 32 for instruction:   %r223 = sitofp <8 x i8> undef to <8 x double>
+  ; CHECK:  Found an estimated cost of 19 for instruction:   %r223 = sitofp <8 x i8> undef to <8 x double>
   %r223 = sitofp <8 x i8> undef to <8 x double>
-  ; CHECK:  Found an estimated cost of 32 for instruction:   %r224 = uitofp <8 x i16> undef to <8 x double>
+  ; CHECK:  Found an estimated cost of 15 for instruction:   %r224 = uitofp <8 x i16> undef to <8 x double>
   %r224 = uitofp <8 x i16> undef to <8 x double>
-  ; CHECK:  Found an estimated cost of 32 for instruction:   %r225 = sitofp <8 x i16> undef to <8 x double>
+  ; CHECK:  Found an estimated cost of 15 for instruction:   %r225 = sitofp <8 x i16> undef to <8 x double>
   %r225 = sitofp <8 x i16> undef to <8 x double>
-  ; CHECK:  Found an estimated cost of 32 for instruction:   %r226 = uitofp <8 x i16> undef to <8 x double>
+  ; CHECK:  Found an estimated cost of 15 for instruction:   %r226 = uitofp <8 x i16> undef to <8 x double>
   %r226 = uitofp <8 x i16> undef to <8 x double>
-  ; CHECK:  Found an estimated cost of 32 for instruction:   %r227 = sitofp <8 x i16> undef to <8 x double>
+  ; CHECK:  Found an estimated cost of 15 for instruction:   %r227 = sitofp <8 x i16> undef to <8 x double>
   %r227 = sitofp <8 x i16> undef to <8 x double>
-  ; CHECK:  Found an estimated cost of 96 for instruction:   %r228 = uitofp <8 x i64> undef to <8 x double>
+  ; CHECK:  Found an estimated cost of 99 for instruction:   %r228 = uitofp <8 x i64> undef to <8 x double>
   %r228 = uitofp <8 x i64> undef to <8 x double>
-  ; CHECK:  Found an estimated cost of 96 for instruction:   %r229 = sitofp <8 x i64> undef to <8 x double>
+  ; CHECK:  Found an estimated cost of 99 for instruction:   %r229 = sitofp <8 x i64> undef to <8 x double>
   %r229 = sitofp <8 x i64> undef to <8 x double>
 
-  ; CHECK:  Found an estimated cost of 96 for instruction:   %r230 = uitofp <16 x i1> undef to <16 x float>
+  ; CHECK:  Found an estimated cost of 15 for instruction:   %r230 = uitofp <16 x i1> undef to <16 x float>
   %r230 = uitofp <16 x i1> undef to <16 x float>
-  ; CHECK:  Found an estimated cost of 96 for instruction:   %r231 = sitofp <16 x i1> undef to <16 x float>
+  ; CHECK:  Found an estimated cost of 15 for instruction:   %r231 = sitofp <16 x i1> undef to <16 x float>
   %r231 = sitofp <16 x i1> undef to <16 x float>
-  ; CHECK:  Found an estimated cost of 96 for instruction:   %r232 = uitofp <16 x i8> undef to <16 x float>
+  ; CHECK:  Found an estimated cost of 15 for instruction:   %r232 = uitofp <16 x i8> undef to <16 x float>
   %r232 = uitofp <16 x i8> undef to <16 x float>
-  ; CHECK:  Found an estimated cost of 96 for instruction:   %r233 = sitofp <16 x i8> undef to <16 x float>
+  ; CHECK:  Found an estimated cost of 15 for instruction:   %r233 = sitofp <16 x i8> undef to <16 x float>
   %r233 = sitofp <16 x i8> undef to <16 x float>
   ; CHECK:  Found an estimated cost of 8 for instruction:   %r234 = uitofp <16 x i16> undef to <16 x float>
   %r234 = uitofp <16 x i16> undef to <16 x float>
@@ -516,30 +516,30 @@ define i32 @casts() {
   %r236 = uitofp <16 x i32> undef to <16 x float>
   ; CHECK:  Found an estimated cost of 4 for instruction:   %r237 = sitofp <16 x i32> undef to <16 x float>
   %r237 = sitofp <16 x i32> undef to <16 x float>
-  ; CHECK:  Found an estimated cost of 224 for instruction:   %r238 = uitofp <16 x i64> undef to <16 x float>
+  ; CHECK:  Found an estimated cost of 231 for instruction:   %r238 = uitofp <16 x i64> undef to <16 x float>
   %r238 = uitofp <16 x i64> undef to <16 x float>
-  ; CHECK:  Found an estimated cost of 224 for instruction:   %r239 = sitofp <16 x i64> undef to <16 x float>
+  ; CHECK:  Found an estimated cost of 231 for instruction:   %r239 = sitofp <16 x i64> undef to <16 x float>
   %r239 = sitofp <16 x i64> undef to <16 x float>
 
-  ; CHECK:  Found an estimated cost of 64 for instruction:   %r240 = uitofp <16 x i1> undef to <16 x double>
+  ; CHECK:  Found an estimated cost of 71 for instruction:   %r240 = uitofp <16 x i1> undef to <16 x double>
   %r240 = uitofp <16 x i1> undef to <16 x double>
-  ; CHECK:  Found an estimated cost of 64 for instruction:   %r241 = sitofp <16 x i1> undef to <16 x double>
+  ; CHECK:  Found an estimated cost of 71 for instruction:   %r241 = sitofp <16 x i1> undef to <16 x double>
   %r241 = sitofp <16 x i1> undef to <16 x double>
-  ; CHECK:  Found an estimated cost of 64 for instruction:   %r242 = uitofp <16 x i8> undef to <16 x double>
+  ; CHECK:  Found an estimated cost of 39 for instruction:   %r242 = uitofp <16 x i8> undef to <16 x double>
   %r242 = uitofp <16 x i8> undef to <16 x double>
-  ; CHECK:  Found an estimated cost of 64 for instruction:   %r243 = sitofp <16 x i8> undef to <16 x double>
+  ; CHECK:  Found an estimated cost of 39 for instruction:   %r243 = sitofp <16 x i8> undef to <16 x double>
   %r243 = sitofp <16 x i8> undef to <16 x double>
-  ; CHECK:  Found an estimated cost of 64 for instruction:   %r244 = uitofp <16 x i16> undef to <16 x double>
+  ; CHECK:  Found an estimated cost of 31 for instruction:   %r244 = uitofp <16 x i16> undef to <16 x double>
   %r244 = uitofp <16 x i16> undef to <16 x double>
-  ; CHECK:  Found an estimated cost of 64 for instruction:   %r245 = sitofp <16 x i16> undef to <16 x double>
+  ; CHECK:  Found an estimated cost of 31 for instruction:   %r245 = sitofp <16 x i16> undef to <16 x double>
   %r245 = sitofp <16 x i16> undef to <16 x double>
-  ; CHECK:  Found an estimated cost of 64 for instruction:   %r246 = uitofp <16 x i16> undef to <16 x double>
+  ; CHECK:  Found an estimated cost of 31 for instruction:   %r246 = uitofp <16 x i16> undef to <16 x double>
   %r246 = uitofp <16 x i16> undef to <16 x double>
-  ;  CHECK:  Found an estimated cost of 64 for instruction:   %r247 = sitofp <16 x i16> undef to <16 x double>
+  ; CHECK:  Found an estimated cost of 31 for instruction:   %r247 = sitofp <16 x i16> undef to <16 x double>
   %r247 = sitofp <16 x i16> undef to <16 x double>
-  ; CHECK:   Found an estimated cost of 192 for instruction:   %r248 = uitofp <16 x i64> undef to <16 x double>
+  ; CHECK:   Found an estimated cost of 199 for instruction:   %r248 = uitofp <16 x i64> undef to <16 x double>
   %r248 = uitofp <16 x i64> undef to <16 x double>
-  ; CHECK:   Found an estimated cost of 192 for instruction:   %r249 = sitofp <16 x i64> undef to <16 x double>
+  ; CHECK:   Found an estimated cost of 199 for instruction:   %r249 = sitofp <16 x i64> undef to <16 x double>
   %r249 = sitofp <16 x i64> undef to <16 x double>
 
   ; CHECK:   Found an estimated cost of 0 for instruction:   ret i32 undef

Modified: llvm/trunk/test/Analysis/CostModel/PowerPC/ext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/PowerPC/ext.ll?rev=274642&r1=274641&r2=274642&view=diff
==============================================================================
--- llvm/trunk/test/Analysis/CostModel/PowerPC/ext.ll (original)
+++ llvm/trunk/test/Analysis/CostModel/PowerPC/ext.ll Wed Jul  6 12:30:56 2016
@@ -13,7 +13,7 @@ define void @exts() {
   ; CHECK: cost of 1 {{.*}} sext
   %v3 = sext <4 x i16> undef to <4 x i32>
 
-  ; CHECK: cost of 112 {{.*}} sext
+  ; CHECK: cost of 3 {{.*}} sext
   %v4 = sext <8 x i16> undef to <8 x i32>
 
   ret void

Modified: llvm/trunk/test/Analysis/CostModel/X86/sitofp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/sitofp.ll?rev=274642&r1=274641&r2=274642&view=diff
==============================================================================
--- llvm/trunk/test/Analysis/CostModel/X86/sitofp.ll (original)
+++ llvm/trunk/test/Analysis/CostModel/X86/sitofp.ll Wed Jul  6 12:30:56 2016
@@ -40,10 +40,10 @@ define <8 x double> @sitofpv8i8v8double(
   ; SSE2: cost of 80 {{.*}} sitofp
   ;
   ; AVX1-LABEL: sitofpv8i8v8double
-  ; AVX1: cost of 20 {{.*}} sitofp
+  ; AVX1: cost of 7 {{.*}} sitofp
   ;
   ; AVX2-LABEL: sitofpv8i8v8double
-  ; AVX2: cost of 20 {{.*}} sitofp
+  ; AVX2: cost of 7 {{.*}} sitofp
   ;
   ; AVX512F-LABEL: sitofpv8i8v8double
   ; AVX512F: cost of 2 {{.*}} sitofp
@@ -56,13 +56,13 @@ define <16 x double> @sitofpv16i8v16doub
   ; SSE2: cost of 160 {{.*}} sitofp
   ;
   ; AVX1-LABEL: sitofpv16i8v16double
-  ; AVX1: cost of 40 {{.*}} sitofp
+  ; AVX1: cost of 15 {{.*}} sitofp
   ;
   ; AVX2-LABEL: sitofpv16i8v16double
-  ; AVX2: cost of 40 {{.*}} sitofp
+  ; AVX2: cost of 15 {{.*}} sitofp
   ;
   ; AVX512F-LABEL: sitofpv16i8v16double
-  ; AVX512F: cost of 44 {{.*}} sitofp
+  ; AVX512F: cost of 5 {{.*}} sitofp
   %1 = sitofp <16 x i8> %a to <16 x double>
   ret <16 x double> %1
 }
@@ -72,13 +72,13 @@ define <32 x double> @sitofpv32i8v32doub
   ; SSE2: cost of 320 {{.*}} sitofp
   ;
   ; AVX1-LABEL: sitofpv32i8v32double
-  ; AVX1: cost of 80 {{.*}} sitofp
+  ; AVX1: cost of 31 {{.*}} sitofp
   ;
   ; AVX2-LABEL: sitofpv32i8v32double
-  ; AVX2: cost of 80 {{.*}} sitofp
+  ; AVX2: cost of 31 {{.*}} sitofp
   ;
   ; AVX512F-LABEL: sitofpv32i8v32double
-  ; AVX512F: cost of 88 {{.*}} sitofp
+  ; AVX512F: cost of 11 {{.*}} sitofp
   %1 = sitofp <32 x i8> %a to <32 x double>
   ret <32 x double> %1
 }
@@ -120,10 +120,10 @@ define <8 x double> @sitofpv8i16v8double
   ; SSE2: cost of 80 {{.*}} sitofp
   ;
   ; AVX1-LABEL: sitofpv8i16v8double
-  ; AVX1: cost of 20 {{.*}} sitofp
+  ; AVX1: cost of 7 {{.*}} sitofp
   ;
   ; AVX2-LABEL: sitofpv8i16v8double
-  ; AVX2: cost of 20 {{.*}} sitofp
+  ; AVX2: cost of 7 {{.*}} sitofp
   ;
   ; AVX512F-LABEL: sitofpv8i16v8double
   ; AVX512F: cost of 2 {{.*}} sitofp
@@ -136,13 +136,13 @@ define <16 x double> @sitofpv16i16v16dou
   ; SSE2: cost of 160 {{.*}} sitofp
   ;
   ; AVX1-LABEL: sitofpv16i16v16double
-  ; AVX1: cost of 40 {{.*}} sitofp
+  ; AVX1: cost of 15 {{.*}} sitofp
   ;
   ; AVX2-LABEL: sitofpv16i16v16double
-  ; AVX2: cost of 40 {{.*}} sitofp
+  ; AVX2: cost of 15 {{.*}} sitofp
   ;
   ; AVX512F-LABEL: sitofpv16i16v16double
-  ; AVX512F: cost of 44 {{.*}} sitofp
+  ; AVX512F: cost of 5 {{.*}} sitofp
   %1 = sitofp <16 x i16> %a to <16 x double>
   ret <16 x double> %1
 }
@@ -152,13 +152,13 @@ define <32 x double> @sitofpv32i16v32dou
   ; SSE2: cost of 320 {{.*}} sitofp
   ;
   ; AVX1-LABEL: sitofpv32i16v32double
-  ; AVX1: cost of 80 {{.*}} sitofp
+  ; AVX1: cost of 31 {{.*}} sitofp
   ;
   ; AVX2-LABEL: sitofpv32i16v32double
-  ; AVX2: cost of 80 {{.*}} sitofp
+  ; AVX2: cost of 31 {{.*}} sitofp
   ;
   ; AVX512F-LABEL: sitofpv32i16v32double
-  ; AVX512F: cost of 88 {{.*}} sitofp
+  ; AVX512F: cost of 11 {{.*}} sitofp
   %1 = sitofp <32 x i16> %a to <32 x double>
   ret <32 x double> %1
 }
@@ -200,10 +200,10 @@ define <8 x double> @sitofpv8i32v8double
   ; SSE2: cost of 80 {{.*}} sitofp
   ;
   ; AVX1-LABEL: sitofpv8i32v8double
-  ; AVX1: cost of 20 {{.*}} sitofp
+  ; AVX1: cost of 3 {{.*}} sitofp
   ;
   ; AVX2-LABEL: sitofpv8i32v8double
-  ; AVX2: cost of 20 {{.*}} sitofp
+  ; AVX2: cost of 3 {{.*}} sitofp
   ;
   ; AVX512F-LABEL: sitofpv8i32v8double
   ; AVX512F: cost of 1 {{.*}} sitofp
@@ -216,13 +216,13 @@ define <16 x double> @sitofpv16i32v16dou
   ; SSE2: cost of 160 {{.*}} sitofp
   ;
   ; AVX1-LABEL: sitofpv16i32v16double
-  ; AVX1: cost of 40 {{.*}} sitofp
+  ; AVX1: cost of 7 {{.*}} sitofp
   ;
   ; AVX2-LABEL: sitofpv16i32v16double
-  ; AVX2: cost of 40 {{.*}} sitofp
+  ; AVX2: cost of 7 {{.*}} sitofp
   ;
   ; AVX512F-LABEL: sitofpv16i32v16double
-  ; AVX512F: cost of 44 {{.*}} sitofp
+  ; AVX512F: cost of 3 {{.*}} sitofp
   %1 = sitofp <16 x i32> %a to <16 x double>
   ret <16 x double> %1
 }
@@ -232,13 +232,13 @@ define <32 x double> @sitofpv32i32v32dou
   ; SSE2: cost of 320 {{.*}} sitofp
   ;
   ; AVX1-LABEL: sitofpv32i32v32double
-  ; AVX1: cost of 80 {{.*}} sitofp
+  ; AVX1: cost of 15 {{.*}} sitofp
   ;
   ; AVX2-LABEL: sitofpv32i32v32double
-  ; AVX2: cost of 80 {{.*}} sitofp
+  ; AVX2: cost of 15 {{.*}} sitofp
   ;
   ; AVX512F-LABEL: sitofpv32i32v32double
-  ; AVX512F: cost of 88 {{.*}} sitofp
+  ; AVX512F: cost of 7 {{.*}} sitofp
   %1 = sitofp <32 x i32> %a to <32 x double>
   ret <32 x double> %1
 }
@@ -280,10 +280,10 @@ define <8 x double> @sitofpv8i64v8double
   ; SSE2: cost of 80 {{.*}} sitofp
   ;
   ; AVX1-LABEL: sitofpv8i64v8double
-  ; AVX1: cost of 20 {{.*}} sitofp
+  ; AVX1: cost of 21 {{.*}} sitofp
   ;
   ; AVX2-LABEL: sitofpv8i64v8double
-  ; AVX2: cost of 20 {{.*}} sitofp
+  ; AVX2: cost of 21 {{.*}} sitofp
   ;
   ; AVX512F-LABEL: sitofpv8i64v8double
   ; AVX512F: cost of 22 {{.*}} sitofp
@@ -296,13 +296,13 @@ define <16 x double> @sitofpv16i64v16dou
   ; SSE2: cost of 160 {{.*}} sitofp
   ;
   ; AVX1-LABEL: sitofpv16i64v16double
-  ; AVX1: cost of 40 {{.*}} sitofp
+  ; AVX1: cost of 43 {{.*}} sitofp
   ;
   ; AVX2-LABEL: sitofpv16i64v16double
-  ; AVX2: cost of 40 {{.*}} sitofp
+  ; AVX2: cost of 43 {{.*}} sitofp
   ;
   ; AVX512F-LABEL: sitofpv16i64v16double
-  ; AVX512F: cost of 44 {{.*}} sitofp
+  ; AVX512F: cost of 45 {{.*}} sitofp
   %1 = sitofp <16 x i64> %a to <16 x double>
   ret <16 x double> %1
 }
@@ -312,13 +312,13 @@ define <32 x double> @sitofpv32i64v32dou
   ; SSE2: cost of 320 {{.*}} sitofp
   ;
   ; AVX1-LABEL: sitofpv32i64v32double
-  ; AVX1: cost of 80 {{.*}} sitofp
+  ; AVX1: cost of 87 {{.*}} sitofp
   ;
   ; AVX2-LABEL: sitofpv32i64v32double
-  ; AVX2: cost of 80 {{.*}} sitofp
+  ; AVX2: cost of 87 {{.*}} sitofp
   ;
   ; AVX512F-LABEL: sitofpv32i64v32double
-  ; AVX512F: cost of 88 {{.*}} sitofp
+  ; AVX512F: cost of 91 {{.*}} sitofp
   %1 = sitofp <32 x i64> %a to <32 x double>
   ret <32 x double> %1
 }
@@ -376,10 +376,10 @@ define <16 x float> @sitofpv16i8v16float
   ; SSE2: cost of 8 {{.*}} sitofp
   ;
   ; AVX1-LABEL: sitofpv16i8v16float
-  ; AVX1: cost of 44 {{.*}} sitofp
+  ; AVX1: cost of 17 {{.*}} sitofp
   ;
   ; AVX2-LABEL: sitofpv16i8v16float
-  ; AVX2: cost of 44 {{.*}} sitofp
+  ; AVX2: cost of 17 {{.*}} sitofp
   ;
   ; AVX512F-LABEL: sitofpv16i8v16float
   ; AVX512F: cost of 2 {{.*}} sitofp
@@ -392,13 +392,13 @@ define <32 x float> @sitofpv32i8v32float
   ; SSE2: cost of 16 {{.*}} sitofp
   ;
   ; AVX1-LABEL: sitofpv32i8v32float
-  ; AVX1: cost of 88 {{.*}} sitofp
+  ; AVX1: cost of 35 {{.*}} sitofp
   ;
   ; AVX2-LABEL: sitofpv32i8v32float
-  ; AVX2: cost of 88 {{.*}} sitofp
+  ; AVX2: cost of 35 {{.*}} sitofp
   ;
   ; AVX512F-LABEL: sitofpv32i8v32float
-  ; AVX512F: cost of 92 {{.*}} sitofp
+  ; AVX512F: cost of 5 {{.*}} sitofp
   %1 = sitofp <32 x i8> %a to <32 x float>
   ret <32 x float> %1
 }
@@ -456,10 +456,10 @@ define <16 x float> @sitofpv16i16v16floa
   ; SSE2: cost of 30 {{.*}} sitofp
   ;
   ; AVX1-LABEL: sitofpv16i16v16float
-  ; AVX1: cost of 44 {{.*}} sitofp
+  ; AVX1: cost of 11 {{.*}} sitofp
   ;
   ; AVX2-LABEL: sitofpv16i16v16float
-  ; AVX2: cost of 44 {{.*}} sitofp
+  ; AVX2: cost of 11 {{.*}} sitofp
   ;
   ; AVX512F-LABEL: sitofpv16i16v16float
   ; AVX512F: cost of 2 {{.*}} sitofp
@@ -472,13 +472,13 @@ define <32 x float> @sitofpv32i16v32floa
   ; SSE2: cost of 60 {{.*}} sitofp
   ;
   ; AVX1-LABEL: sitofpv32i16v32float
-  ; AVX1: cost of 88 {{.*}} sitofp
+  ; AVX1: cost of 23 {{.*}} sitofp
   ;
   ; AVX2-LABEL: sitofpv32i16v32float
-  ; AVX2: cost of 88 {{.*}} sitofp
+  ; AVX2: cost of 23 {{.*}} sitofp
   ;
   ; AVX512F-LABEL: sitofpv32i16v32float
-  ; AVX512F: cost of 92 {{.*}} sitofp
+  ; AVX512F: cost of 5 {{.*}} sitofp
   %1 = sitofp <32 x i16> %a to <32 x float>
   ret <32 x float> %1
 }
@@ -536,10 +536,10 @@ define <16 x float> @sitofpv16i32v16floa
   ; SSE2: cost of 60 {{.*}} sitofp
   ;
   ; AVX1-LABEL: sitofpv16i32v16float
-  ; AVX1: cost of 44 {{.*}} sitofp
+  ; AVX1: cost of 3 {{.*}} sitofp
   ;
   ; AVX2-LABEL: sitofpv16i32v16float
-  ; AVX2: cost of 44 {{.*}} sitofp
+  ; AVX2: cost of 3 {{.*}} sitofp
   ;
   ; AVX512F-LABEL: sitofpv16i32v16float
   ; AVX512F: cost of 1 {{.*}} sitofp
@@ -552,13 +552,13 @@ define <32 x float> @sitofpv32i32v32floa
   ; SSE2: cost of 120 {{.*}} sitofp
   ;
   ; AVX1-LABEL: sitofpv32i32v32float
-  ; AVX1: cost of 88 {{.*}} sitofp
+  ; AVX1: cost of 7 {{.*}} sitofp
   ;
   ; AVX2-LABEL: sitofpv32i32v32float
-  ; AVX2: cost of 88 {{.*}} sitofp
+  ; AVX2: cost of 7 {{.*}} sitofp
   ;
   ; AVX512F-LABEL: sitofpv32i32v32float
-  ; AVX512F: cost of 92 {{.*}} sitofp
+  ; AVX512F: cost of 3 {{.*}} sitofp
   %1 = sitofp <32 x i32> %a to <32 x float>
   ret <32 x float> %1
 }
@@ -600,10 +600,10 @@ define <8 x float> @sitofpv8i64v8float(<
   ; SSE2: cost of 60 {{.*}} sitofp
   ;
   ; AVX1-LABEL: sitofpv8i64v8float
-  ; AVX1: cost of 22 {{.*}} sitofp
+  ; AVX1: cost of 21 {{.*}} sitofp
   ;
   ; AVX2-LABEL: sitofpv8i64v8float
-  ; AVX2: cost of 22 {{.*}} sitofp
+  ; AVX2: cost of 21 {{.*}} sitofp
   ;
   ; AVX512F-LABEL: sitofpv8i64v8float
   ; AVX512F: cost of 22 {{.*}} sitofp
@@ -616,13 +616,13 @@ define <16 x float> @sitofpv16i64v16floa
   ; SSE2: cost of 120 {{.*}} sitofp
   ;
   ; AVX1-LABEL: sitofpv16i64v16float
-  ; AVX1: cost of 44 {{.*}} sitofp
+  ; AVX1: cost of 43 {{.*}} sitofp
   ;
   ; AVX2-LABEL: sitofpv16i64v16float
-  ; AVX2: cost of 44 {{.*}} sitofp
+  ; AVX2: cost of 43 {{.*}} sitofp
   ;
   ; AVX512F-LABEL: sitofpv16i64v16float
-  ; AVX512F: cost of 46 {{.*}} sitofp
+  ; AVX512F: cost of 45 {{.*}} sitofp
   %1 = sitofp <16 x i64> %a to <16 x float>
   ret <16 x float> %1
 }
@@ -632,13 +632,13 @@ define <32 x float> @sitofpv32i64v32floa
   ; SSE2: cost of 240 {{.*}} sitofp
   ;
   ; AVX1-LABEL: sitofpv32i64v32float
-  ; AVX1: cost of 88 {{.*}} sitofp
+  ; AVX1: cost of 87 {{.*}} sitofp
   ;
   ; AVX2-LABEL: sitofpv32i64v32float
-  ; AVX2: cost of 88 {{.*}} sitofp
+  ; AVX2: cost of 87 {{.*}} sitofp
   ;
   ; AVX512F-LABEL: sitofpv32i64v32float
-  ; AVX512F: cost of 92 {{.*}} sitofp
+  ; AVX512F: cost of 91 {{.*}} sitofp
   %1 = sitofp <32 x i64> %a to <32 x float>
   ret <32 x float> %1
 }
@@ -648,10 +648,10 @@ define <8 x double> @sitofpv8i1v8double(
   ; SSE2: cost of 80 {{.*}} sitofp
   ;
   ; AVX1-LABEL: sitofpv8i1v8double
-  ; AVX1: cost of 20 {{.*}} sitofp
+  ; AVX1: cost of 7 {{.*}} sitofp
   ;
   ; AVX2-LABEL: sitofpv8i1v8double
-  ; AVX2: cost of 20 {{.*}} sitofp
+  ; AVX2: cost of 7 {{.*}} sitofp
   ;
   ; AVX512F-LABEL: sitofpv8i1v8double
   ; AVX512F: cost of 4 {{.*}} sitofp
@@ -665,10 +665,10 @@ define <16 x float> @sitofpv16i1v16float
   ; SSE2: cost of 8 {{.*}} sitofp
   ;
   ; AVX1-LABEL: sitofpv16i1v16float
-  ; AVX1: cost of 44 {{.*}} sitofp
+  ; AVX1: cost of 17 {{.*}} sitofp
   ;
   ; AVX2-LABEL: sitofpv16i1v16float
-  ; AVX2: cost of 44 {{.*}} sitofp
+  ; AVX2: cost of 17 {{.*}} sitofp
   ;
   ; AVX512F-LABEL: sitofpv16i1v16float
   ; AVX512F: cost of 3 {{.*}} sitofp

Modified: llvm/trunk/test/Analysis/CostModel/X86/uitofp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/uitofp.ll?rev=274642&r1=274641&r2=274642&view=diff
==============================================================================
--- llvm/trunk/test/Analysis/CostModel/X86/uitofp.ll (original)
+++ llvm/trunk/test/Analysis/CostModel/X86/uitofp.ll Wed Jul  6 12:30:56 2016
@@ -41,10 +41,10 @@ define <8 x double> @uitofpv8i8v8double(
   ; SSE2: cost of 80 {{.*}} uitofp
   ;
   ; AVX1-LABEL: uitofpv8i8v8double
-  ; AVX1: cost of 20 {{.*}} uitofp
+  ; AVX1: cost of 5 {{.*}} uitofp
   ;
   ; AVX2-LABEL: uitofpv8i8v8double
-  ; AVX2: cost of 20 {{.*}} uitofp
+  ; AVX2: cost of 5 {{.*}} uitofp
   ;
   ; AVX512F-LABEL: uitofpv8i8v8double
   ; AVX512F: cost of 2 {{.*}} uitofp
@@ -57,13 +57,13 @@ define <16 x double> @uitofpv16i8v16doub
   ; SSE2: cost of 160 {{.*}} uitofp
   ;
   ; AVX1-LABEL: uitofpv16i8v16double
-  ; AVX1: cost of 40 {{.*}} uitofp
+  ; AVX1: cost of 11 {{.*}} uitofp
   ;
   ; AVX2-LABEL: uitofpv16i8v16double
-  ; AVX2: cost of 40 {{.*}} uitofp
+  ; AVX2: cost of 11 {{.*}} uitofp
   ;
   ; AVX512F-LABEL: uitofpv16i8v16double
-  ; AVX512F: cost of 44 {{.*}} uitofp
+  ; AVX512F: cost of 5 {{.*}} uitofp
   %1 = uitofp <16 x i8> %a to <16 x double>
   ret <16 x double> %1
 }
@@ -73,13 +73,13 @@ define <32 x double> @uitofpv32i8v32doub
   ; SSE2: cost of 320 {{.*}} uitofp
   ;
   ; AVX1-LABEL: uitofpv32i8v32double
-  ; AVX1: cost of 80 {{.*}} uitofp
+  ; AVX1: cost of 23 {{.*}} uitofp
   ;
   ; AVX2-LABEL: uitofpv32i8v32double
-  ; AVX2: cost of 80 {{.*}} uitofp
+  ; AVX2: cost of 23 {{.*}} uitofp
   ;
   ; AVX512F-LABEL: uitofpv32i8v32double
-  ; AVX512F: cost of 88 {{.*}} uitofp
+  ; AVX512F: cost of 11 {{.*}} uitofp
   %1 = uitofp <32 x i8> %a to <32 x double>
   ret <32 x double> %1
 }
@@ -121,10 +121,10 @@ define <8 x double> @uitofpv8i16v8double
   ; SSE2: cost of 80 {{.*}} uitofp
   ;
   ; AVX1-LABEL: uitofpv8i16v8double
-  ; AVX1: cost of 20 {{.*}} uitofp
+  ; AVX1: cost of 5 {{.*}} uitofp
   ;
   ; AVX2-LABEL: uitofpv8i16v8double
-  ; AVX2: cost of 20 {{.*}} uitofp
+  ; AVX2: cost of 5 {{.*}} uitofp
   ;
   ; AVX512F-LABEL: uitofpv8i16v8double
   ; AVX512F: cost of 2 {{.*}} uitofp
@@ -137,13 +137,13 @@ define <16 x double> @uitofpv16i16v16dou
   ; SSE2: cost of 160 {{.*}} uitofp
   ;
   ; AVX1-LABEL: uitofpv16i16v16double
-  ; AVX1: cost of 40 {{.*}} uitofp
+  ; AVX1: cost of 11 {{.*}} uitofp
   ;
   ; AVX2-LABEL: uitofpv16i16v16double
-  ; AVX2: cost of 40 {{.*}} uitofp
+  ; AVX2: cost of 11 {{.*}} uitofp
   ;
   ; AVX512F-LABEL: uitofpv16i16v16double
-  ; AVX512F: cost of 44 {{.*}} uitofp
+  ; AVX512F: cost of 5 {{.*}} uitofp
   %1 = uitofp <16 x i16> %a to <16 x double>
   ret <16 x double> %1
 }
@@ -153,13 +153,13 @@ define <32 x double> @uitofpv32i16v32dou
   ; SSE2: cost of 320 {{.*}} uitofp
   ;
   ; AVX1-LABEL: uitofpv32i16v32double
-  ; AVX1: cost of 80 {{.*}} uitofp
+  ; AVX1: cost of 23 {{.*}} uitofp
   ;
   ; AVX2-LABEL: uitofpv32i16v32double
-  ; AVX2: cost of 80 {{.*}} uitofp
+  ; AVX2: cost of 23 {{.*}} uitofp
   ;
   ; AVX512F-LABEL: uitofpv32i16v32double
-  ; AVX512F: cost of 88 {{.*}} uitofp
+  ; AVX512F: cost of 11 {{.*}} uitofp
   %1 = uitofp <32 x i16> %a to <32 x double>
   ret <32 x double> %1
 }
@@ -201,10 +201,10 @@ define <8 x double> @uitofpv8i32v8double
   ; SSE2: cost of 80 {{.*}} uitofp
   ;
   ; AVX1-LABEL: uitofpv8i32v8double
-  ; AVX1: cost of 20 {{.*}} uitofp
+  ; AVX1: cost of 13 {{.*}} uitofp
   ;
   ; AVX2-LABEL: uitofpv8i32v8double
-  ; AVX2: cost of 20 {{.*}} uitofp
+  ; AVX2: cost of 13 {{.*}} uitofp
   ;
   ; AVX512F-LABEL: uitofpv8i32v8double
   ; AVX512F: cost of 1 {{.*}} uitofp
@@ -217,13 +217,13 @@ define <16 x double> @uitofpv16i32v16dou
   ; SSE2: cost of 160 {{.*}} uitofp
   ;
   ; AVX1-LABEL: uitofpv16i32v16double
-  ; AVX1: cost of 40 {{.*}} uitofp
+  ; AVX1: cost of 27 {{.*}} uitofp
   ;
   ; AVX2-LABEL: uitofpv16i32v16double
-  ; AVX2: cost of 40 {{.*}} uitofp
+  ; AVX2: cost of 27 {{.*}} uitofp
   ;
   ; AVX512F-LABEL: uitofpv16i32v16double
-  ; AVX512F: cost of 44 {{.*}} uitofp
+  ; AVX512F: cost of 3 {{.*}} uitofp
   %1 = uitofp <16 x i32> %a to <16 x double>
   ret <16 x double> %1
 }
@@ -233,13 +233,13 @@ define <32 x double> @uitofpv32i32v32dou
   ; SSE2: cost of 320 {{.*}} uitofp
   ;
   ; AVX1-LABEL: uitofpv32i32v32double
-  ; AVX1: cost of 80 {{.*}} uitofp
+  ; AVX1: cost of 55 {{.*}} uitofp
   ;
   ; AVX2-LABEL: uitofpv32i32v32double
-  ; AVX2: cost of 80 {{.*}} uitofp
+  ; AVX2: cost of 55 {{.*}} uitofp
   ;
   ; AVX512F-LABEL: uitofpv32i32v32double
-  ; AVX512F: cost of 88 {{.*}} uitofp
+  ; AVX512F: cost of 7 {{.*}} uitofp
   %1 = uitofp <32 x i32> %a to <32 x double>
   ret <32 x double> %1
 }
@@ -257,7 +257,7 @@ define <2 x double> @uitofpv2i64v2double
   ; AVX512F-LABEL: uitofpv2i64v2double
   ; AVX512F: cost of 5 {{.*}} uitofp
   ;
-  ; AVX512DQ: uitofpv2i64v2double
+  ; AVX512DQ-LABEL: uitofpv2i64v2double
   ; AVX512DQ: cost of 1 {{.*}} uitofp
   %1 = uitofp <2 x i64> %a to <2 x double>
   ret <2 x double> %1
@@ -276,7 +276,7 @@ define <4 x double> @uitofpv4i64v4double
   ; AVX512F-LABEL: uitofpv4i64v4double
   ; AVX512F: cost of 12 {{.*}} uitofp
   ;
-  ; AVX512DQ: uitofpv4i64v4double
+  ; AVX512DQ-LABEL: uitofpv4i64v4double
   ; AVX512DQ: cost of 1 {{.*}} uitofp
   %1 = uitofp <4 x i64> %a to <4 x double>
   ret <4 x double> %1
@@ -287,15 +287,15 @@ define <8 x double> @uitofpv8i64v8double
   ; SSE2: cost of 80 {{.*}} uitofp
   ;
   ; AVX1-LABEL: uitofpv8i64v8double
-  ; AVX1: cost of 20 {{.*}} uitofp
+  ; AVX1: cost of 81 {{.*}} uitofp
   ;
   ; AVX2-LABEL: uitofpv8i64v8double
-  ; AVX2: cost of 20 {{.*}} uitofp
+  ; AVX2: cost of 81 {{.*}} uitofp
   ;
   ; AVX512F-LABEL: uitofpv8i64v8double
   ; AVX512F: cost of 26 {{.*}} uitofp
   ;
-  ; AVX512DQ: uitofpv8i64v8double
+  ; AVX512DQ-LABEL: uitofpv8i64v8double
   ; AVX512DQ: cost of 1 {{.*}} uitofp
   %1 = uitofp <8 x i64> %a to <8 x double>
   ret <8 x double> %1
@@ -306,16 +306,16 @@ define <16 x double> @uitofpv16i64v16dou
   ; SSE2: cost of 160 {{.*}} uitofp
   ;
   ; AVX1-LABEL: uitofpv16i64v16double
-  ; AVX1: cost of 40 {{.*}} uitofp
+  ; AVX1: cost of 163 {{.*}} uitofp
   ;
   ; AVX2-LABEL: uitofpv16i64v16double
-  ; AVX2: cost of 40 {{.*}} uitofp
+  ; AVX2: cost of 163 {{.*}} uitofp
   ;
   ; AVX512F-LABEL: uitofpv16i64v16double
-  ; AVX512F: cost of 44 {{.*}} uitofp
+  ; AVX512F: cost of 53 {{.*}} uitofp
   ;
-  ; AVX512DQ: uitofpv16i64v16double
-  ; AVX512DQ: cost of 44 {{.*}} uitofp
+  ; AVX512DQ-LABEL: uitofpv16i64v16double
+  ; AVX512DQ: cost of 3 {{.*}} uitofp
   %1 = uitofp <16 x i64> %a to <16 x double>
   ret <16 x double> %1
 }
@@ -325,16 +325,16 @@ define <32 x double> @uitofpv32i64v32dou
   ; SSE2: cost of 320 {{.*}} uitofp
   ;
   ; AVX1-LABEL: uitofpv32i64v32double
-  ; AVX1: cost of 80 {{.*}} uitofp
+  ; AVX1: cost of 327 {{.*}} uitofp
   ;
   ; AVX2-LABEL: uitofpv32i64v32double
-  ; AVX2: cost of 80 {{.*}} uitofp
+  ; AVX2: cost of 327 {{.*}} uitofp
   ;
   ; AVX512F-LABEL: uitofpv32i64v32double
-  ; AVX512F: cost of 88 {{.*}} uitofp
+  ; AVX512F: cost of 107 {{.*}} uitofp
   ;
-  ; AVX512DQ: uitofpv32i64v32double
-  ; AVX512DQ: cost of 88 {{.*}} uitofp
+  ; AVX512DQ-LABEL: uitofpv32i64v32double
+  ; AVX512DQ: cost of 2 {{.*}} uitofp
   %1 = uitofp <32 x i64> %a to <32 x double>
   ret <32 x double> %1
 }
@@ -392,10 +392,10 @@ define <16 x float> @uitofpv16i8v16float
   ; SSE2: cost of 8 {{.*}} uitofp
   ;
   ; AVX1-LABEL: uitofpv16i8v16float
-  ; AVX1: cost of 44 {{.*}} uitofp
+  ; AVX1: cost of 11 {{.*}} uitofp
   ;
   ; AVX2-LABEL: uitofpv16i8v16float
-  ; AVX2: cost of 44 {{.*}} uitofp
+  ; AVX2: cost of 11 {{.*}} uitofp
   ;
   ; AVX512F-LABEL: uitofpv16i8v16float
   ; AVX512F: cost of 2 {{.*}} uitofp
@@ -408,13 +408,13 @@ define <32 x float> @uitofpv32i8v32float
   ; SSE2: cost of 16 {{.*}} uitofp
   ;
   ; AVX1-LABEL: uitofpv32i8v32float
-  ; AVX1: cost of 88 {{.*}} uitofp
+  ; AVX1: cost of 23 {{.*}} uitofp
   ;
   ; AVX2-LABEL: uitofpv32i8v32float
-  ; AVX2: cost of 88 {{.*}} uitofp
+  ; AVX2: cost of 23 {{.*}} uitofp
   ;
   ; AVX512F-LABEL: uitofpv32i8v32float
-  ; AVX512F: cost of 92 {{.*}} uitofp
+  ; AVX512F: cost of 5 {{.*}} uitofp
   %1 = uitofp <32 x i8> %a to <32 x float>
   ret <32 x float> %1
 }
@@ -472,10 +472,10 @@ define <16 x float> @uitofpv16i16v16floa
   ; SSE2: cost of 30 {{.*}} uitofp
   ;
   ; AVX1-LABEL: uitofpv16i16v16float
-  ; AVX1: cost of 44 {{.*}} uitofp
+  ; AVX1: cost of 11 {{.*}} uitofp
   ;
   ; AVX2-LABEL: uitofpv16i16v16float
-  ; AVX2: cost of 44 {{.*}} uitofp
+  ; AVX2: cost of 11 {{.*}} uitofp
   ;
   ; AVX512F-LABEL: uitofpv16i16v16float
   ; AVX512F: cost of 2 {{.*}} uitofp
@@ -488,13 +488,13 @@ define <32 x float> @uitofpv32i16v32floa
   ; SSE2: cost of 60 {{.*}} uitofp
   ;
   ; AVX1-LABEL: uitofpv32i16v32float
-  ; AVX1: cost of 88 {{.*}} uitofp
+  ; AVX1: cost of 23 {{.*}} uitofp
   ;
   ; AVX2-LABEL: uitofpv32i16v32float
-  ; AVX2: cost of 88 {{.*}} uitofp
+  ; AVX2: cost of 23 {{.*}} uitofp
   ;
   ; AVX512F-LABEL: uitofpv32i16v32float
-  ; AVX512F: cost of 92 {{.*}} uitofp
+  ; AVX512F: cost of 5 {{.*}} uitofp
   %1 = uitofp <32 x i16> %a to <32 x float>
   ret <32 x float> %1
 }
@@ -552,10 +552,10 @@ define <16 x float> @uitofpv16i32v16floa
   ; SSE2: cost of 32 {{.*}} uitofp
   ;
   ; AVX1-LABEL: uitofpv16i32v16float
-  ; AVX1: cost of 44 {{.*}} uitofp
+  ; AVX1: cost of 19 {{.*}} uitofp
   ;
   ; AVX2-LABEL: uitofpv16i32v16float
-  ; AVX2: cost of 44 {{.*}} uitofp
+  ; AVX2: cost of 17 {{.*}} uitofp
   ;
   ; AVX512F-LABEL: uitofpv16i32v16float
   ; AVX512F: cost of 1 {{.*}} uitofp
@@ -568,13 +568,13 @@ define <32 x float> @uitofpv32i32v32floa
   ; SSE2: cost of 64 {{.*}} uitofp
   ;
   ; AVX1-LABEL: uitofpv32i32v32float
-  ; AVX1: cost of 88 {{.*}} uitofp
+  ; AVX1: cost of 39 {{.*}} uitofp
   ;
   ; AVX2-LABEL: uitofpv32i32v32float
-  ; AVX2: cost of 88 {{.*}} uitofp
+  ; AVX2: cost of 35 {{.*}} uitofp
   ;
   ; AVX512F-LABEL: uitofpv32i32v32float
-  ; AVX512F: cost of 92 {{.*}} uitofp
+  ; AVX512F: cost of 3 {{.*}} uitofp
   %1 = uitofp <32 x i32> %a to <32 x float>
   ret <32 x float> %1
 }
@@ -616,10 +616,10 @@ define <8 x float> @uitofpv8i64v8float(<
   ; SSE2: cost of 60 {{.*}} uitofp
   ;
   ; AVX1-LABEL: uitofpv8i64v8float
-  ; AVX1: cost of 22 {{.*}} uitofp
+  ; AVX1: cost of 21 {{.*}} uitofp
   ;
   ; AVX2-LABEL: uitofpv8i64v8float
-  ; AVX2: cost of 22 {{.*}} uitofp
+  ; AVX2: cost of 21 {{.*}} uitofp
   ;
   ; AVX512F-LABEL: uitofpv8i64v8float
   ; AVX512F: cost of 22 {{.*}} uitofp
@@ -632,13 +632,13 @@ define <16 x float> @uitofpv16i64v16floa
   ; SSE2: cost of 120 {{.*}} uitofp
   ;
   ; AVX1-LABEL: uitofpv16i64v16float
-  ; AVX1: cost of 44 {{.*}} uitofp
+  ; AVX1: cost of 43 {{.*}} uitofp
   ;
   ; AVX2-LABEL: uitofpv16i64v16float
-  ; AVX2: cost of 44 {{.*}} uitofp
+  ; AVX2: cost of 43 {{.*}} uitofp
   ;
   ; AVX512F-LABEL: uitofpv16i64v16float
-  ; AVX512F: cost of 46 {{.*}} uitofp
+  ; AVX512F: cost of 45 {{.*}} uitofp
   %1 = uitofp <16 x i64> %a to <16 x float>
   ret <16 x float> %1
 }
@@ -648,13 +648,13 @@ define <32 x float> @uitofpv32i64v32floa
   ; SSE2: cost of 240 {{.*}} uitofp
   ;
   ; AVX1-LABEL: uitofpv32i64v32float
-  ; AVX1: cost of 88 {{.*}} uitofp
+  ; AVX1: cost of 87 {{.*}} uitofp
   ;
   ; AVX2-LABEL: uitofpv32i64v32float
-  ; AVX2: cost of 88 {{.*}} uitofp
+  ; AVX2: cost of 87 {{.*}} uitofp
   ;
   ; AVX512F-LABEL: uitofpv32i64v32float
-  ; AVX512F: cost of 92 {{.*}} uitofp
+  ; AVX512F: cost of 91 {{.*}} uitofp
   %1 = uitofp <32 x i64> %a to <32 x float>
   ret <32 x float> %1
 }

Modified: llvm/trunk/test/Transforms/LoopVectorize/X86/gather_scatter.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/X86/gather_scatter.ll?rev=274642&r1=274641&r2=274642&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/X86/gather_scatter.ll (original)
+++ llvm/trunk/test/Transforms/LoopVectorize/X86/gather_scatter.ll Wed Jul  6 12:30:56 2016
@@ -17,9 +17,9 @@ target triple = "x86_64-pc_linux"
 ;}
 
 ;AVX512-LABEL: @foo1
-;AVX512:  llvm.masked.load.v8i32
-;AVX512: llvm.masked.gather.v8f32
-;AVX512: llvm.masked.store.v8f32
+;AVX512: llvm.masked.load.v16i32
+;AVX512: llvm.masked.gather.v16f32
+;AVX512: llvm.masked.store.v16f32
 ;AVX512: ret void
 
 ; Function Attrs: nounwind uwtable




More information about the llvm-commits mailing list