[PATCH] Include legalization cost when computing scalarization cost. When analyzing vectors of element type that require legalization, the legalization cost must be included to get an accurate estimation of the total cost of the scalarized vector. The...

Nadav Rotem nrotem at apple.com
Thu Mar 6 14:26:55 PST 2014


+    }
+    if (Extract) {
+      Cost += LT.first *
+              TopTTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, i);

I think that the type in this function call should be LT.second because we are evaluating the code of the legalized type. 



On Mar 6, 2014, at 2:23 PM, Raul Silvera <rsilvera at google.com> wrote:

> Hi hfinkel, nadav,
> 
> ...inaccurate cost triggered some unprofitable SLP vectorization on 32-bit x86.
> 
> Include legalization cost when computing scalarization cost
> 
> http://llvm-reviews.chandlerc.com/D2992
> 
> Files:
>  lib/CodeGen/BasicTargetTransformInfo.cpp
>  test/Analysis/CostModel/ARM/cast.ll
> 
> Index: lib/CodeGen/BasicTargetTransformInfo.cpp
> ===================================================================
> --- lib/CodeGen/BasicTargetTransformInfo.cpp
> +++ lib/CodeGen/BasicTargetTransformInfo.cpp
> @@ -20,7 +20,6 @@
> #include "llvm/Analysis/TargetTransformInfo.h"
> #include "llvm/Target/TargetLowering.h"
> #include <utility>
> -
> using namespace llvm;
> 
> namespace {
> @@ -206,12 +205,19 @@
>                                             bool Extract) const {
>   assert (Ty->isVectorTy() && "Can only scalarize vectors");
>   unsigned Cost = 0;
> -
> -  for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
> -    if (Insert)
> -      Cost += TopTTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
> -    if (Extract)
> -      Cost += TopTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
> +  VectorType *VecTy = cast<VectorType>(Ty);
> +  std::pair<unsigned, MVT> LT =
> +      getTLI()->getTypeLegalizationCost(VecTy->getElementType());
> +
> +  for (int i = 0, e = VecTy->getVectorNumElements(); i < e; ++i) {
> +    if (Insert) {
> +      Cost += LT.first *
> +              TopTTI->getVectorInstrCost(Instruction::InsertElement, VecTy, i);
> +    }
> +    if (Extract) {
> +      Cost += LT.first *
> +              TopTTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, i);
> +    }
>   }
> 
>   return Cost;
> Index: test/Analysis/CostModel/ARM/cast.ll
> ===================================================================
> --- test/Analysis/CostModel/ARM/cast.ll
> +++ test/Analysis/CostModel/ARM/cast.ll
> @@ -221,9 +221,9 @@
>   %r96 = fptoui <2 x float> undef to <2 x i32>
>   ; CHECK: cost of 1 {{.*}} fptosi
>   %r97 = fptosi <2 x float> undef to <2 x i32>
> -  ; CHECK: cost of 24 {{.*}} fptoui
> +  ; CHECK: cost of 28 {{.*}} fptoui
>   %r98 = fptoui <2 x float> undef to <2 x i64>
> -  ; CHECK: cost of 24 {{.*}} fptosi
> +  ; CHECK: cost of 28 {{.*}} fptosi
>   %r99 = fptosi <2 x float> undef to <2 x i64>
> 
>   ; CHECK: cost of 8 {{.*}} fptoui
> @@ -242,9 +242,9 @@
>   %r106 = fptoui <2 x double> undef to <2 x i32>
>   ; CHECK: cost of 2 {{.*}} fptosi
>   %r107 = fptosi <2 x double> undef to <2 x i32>
> -  ; CHECK: cost of 24 {{.*}} fptoui
> +  ; CHECK: cost of 28 {{.*}} fptoui
>   %r108 = fptoui <2 x double> undef to <2 x i64>
> -  ; CHECK: cost of 24 {{.*}} fptosi
> +  ; CHECK: cost of 28 {{.*}} fptosi
>   %r109 = fptosi <2 x double> undef to <2 x i64>
> 
>   ; CHECK: cost of 16 {{.*}} fptoui
> @@ -263,9 +263,9 @@
>   %r116 = fptoui <4 x float> undef to <4 x i32>
>   ; CHECK: cost of 1 {{.*}} fptosi
>   %r117 = fptosi <4 x float> undef to <4 x i32>
> -  ; CHECK: cost of 48 {{.*}} fptoui
> +  ; CHECK: cost of 56 {{.*}} fptoui
>   %r118 = fptoui <4 x float> undef to <4 x i64>
> -  ; CHECK: cost of 48 {{.*}} fptosi
> +  ; CHECK: cost of 56 {{.*}} fptosi
>   %r119 = fptosi <4 x float> undef to <4 x i64>
> 
>   ; CHECK: cost of 16 {{.*}} fptoui
> @@ -284,9 +284,9 @@
>   %r126 = fptoui <4 x double> undef to <4 x i32>
>   ; CHECK: cost of 16 {{.*}} fptosi
>   %r127 = fptosi <4 x double> undef to <4 x i32>
> -  ; CHECK: cost of 48 {{.*}} fptoui
> +  ; CHECK: cost of 56 {{.*}} fptoui
>   %r128 = fptoui <4 x double> undef to <4 x i64>
> -  ; CHECK: cost of 48 {{.*}} fptosi
> +  ; CHECK: cost of 56 {{.*}} fptosi
>   %r129 = fptosi <4 x double> undef to <4 x i64>
> 
>   ; CHECK: cost of 32 {{.*}} fptoui
> @@ -305,9 +305,9 @@
>   %r136 = fptoui <8 x float> undef to <8 x i32>
>   ; CHECK: cost of 2 {{.*}} fptosi
>   %r137 = fptosi <8 x float> undef to <8 x i32>
> -  ; CHECK: cost of 96 {{.*}} fptoui
> +  ; CHECK: cost of 112 {{.*}} fptoui
>   %r138 = fptoui <8 x float> undef to <8 x i64>
> -  ; CHECK: cost of 96 {{.*}} fptosi
> +  ; CHECK: cost of 112 {{.*}} fptosi
>   %r139 = fptosi <8 x float> undef to <8 x i64>
> 
>   ; CHECK: cost of 32 {{.*}} fptoui
> @@ -326,9 +326,9 @@
>   %r146 = fptoui <8 x double> undef to <8 x i32>
>   ; CHECK: cost of 32 {{.*}} fptosi
>   %r147 = fptosi <8 x double> undef to <8 x i32>
> -  ; CHECK: cost of 96 {{.*}} fptoui
> +  ; CHECK: cost of 112 {{.*}} fptoui
>   %r148 = fptoui <8 x double> undef to <8 x i64>
> -  ; CHECK: cost of 96 {{.*}} fptosi
> +  ; CHECK: cost of 112 {{.*}} fptosi
>   %r149 = fptosi <8 x double> undef to <8 x i64>
> 
>   ; CHECK: cost of 64 {{.*}} fptoui
> @@ -347,9 +347,9 @@
>   %r156 = fptoui <16 x float> undef to <16 x i32>
>   ; CHECK: cost of 4 {{.*}} fptosi
>   %r157 = fptosi <16 x float> undef to <16 x i32>
> -  ; CHECK: cost of 192 {{.*}} fptoui
> +  ; CHECK: cost of 224 {{.*}} fptoui
>   %r158 = fptoui <16 x float> undef to <16 x i64>
> -  ; CHECK: cost of 192 {{.*}} fptosi
> +  ; CHECK: cost of 224 {{.*}} fptosi
>   %r159 = fptosi <16 x float> undef to <16 x i64>
> 
>   ; CHECK: cost of 64 {{.*}} fptoui
> @@ -368,9 +368,9 @@
>   %r166 = fptoui <16 x double> undef to <16 x i32>
>   ; CHECK: cost of 64 {{.*}} fptosi
>   %r167 = fptosi <16 x double> undef to <16 x i32>
> -  ; CHECK: cost of 192 {{.*}} fptoui
> +  ; CHECK: cost of 224 {{.*}} fptoui
>   %r168 = fptoui <16 x double> undef to <16 x i64>
> -  ; CHECK: cost of 192 {{.*}} fptosi
> +  ; CHECK: cost of 224 {{.*}} fptosi
>   %r169 = fptosi <16 x double> undef to <16 x i64>
> 
>   ; CHECK: cost of 8 {{.*}} uitofp
> <D2992.1.patch>




More information about the llvm-commits mailing list