[llvm] r203509 - When analyzing vectors of element type that require legalization,

Raul E. Silvera rsilvera at google.com
Mon Mar 10 15:59:13 PDT 2014


Author: rsilvera
Date: Mon Mar 10 17:59:13 2014
New Revision: 203509

URL: http://llvm.org/viewvc/llvm-project?rev=203509&view=rev
Log:
When analyzing vectors of element type that require legalization,
the legalization cost must be included to get an accurate
estimation of the total cost of the scalarized vector.
The inaccurate cost triggered unprofitable SLP vectorization on
32-bit X86.

Summary:
Include legalization overhead when computing scalarization cost

Reviewers: hfinkel, nadav

CC: chandlerc, rnk, llvm-commits

Differential Revision: http://llvm-reviews.chandlerc.com/D2992

Added:
    llvm/trunk/test/Analysis/CostModel/X86/scalarize.ll
Modified:
    llvm/trunk/lib/CodeGen/BasicTargetTransformInfo.cpp
    llvm/trunk/test/Analysis/CostModel/ARM/cast.ll

Modified: llvm/trunk/lib/CodeGen/BasicTargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/BasicTargetTransformInfo.cpp?rev=203509&r1=203508&r2=203509&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/BasicTargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/CodeGen/BasicTargetTransformInfo.cpp Mon Mar 10 17:59:13 2014
@@ -20,7 +20,6 @@
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include <utility>
-
 using namespace llvm;
 
 namespace {
@@ -405,7 +404,9 @@ unsigned BasicTTI::getCmpSelInstrCost(un
 
 unsigned BasicTTI::getVectorInstrCost(unsigned Opcode, Type *Val,
                                       unsigned Index) const {
-  return 1;
+  std::pair<unsigned, MVT> LT =  getTLI()->getTypeLegalizationCost(Val->getScalarType());
+
+  return LT.first;
 }
 
 unsigned BasicTTI::getMemoryOpCost(unsigned Opcode, Type *Src,

Modified: llvm/trunk/test/Analysis/CostModel/ARM/cast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/ARM/cast.ll?rev=203509&r1=203508&r2=203509&view=diff
==============================================================================
--- llvm/trunk/test/Analysis/CostModel/ARM/cast.ll (original)
+++ llvm/trunk/test/Analysis/CostModel/ARM/cast.ll Mon Mar 10 17:59:13 2014
@@ -221,9 +221,9 @@ define i32 @casts() {
   %r96 = fptoui <2 x float> undef to <2 x i32>
   ; CHECK: cost of 1 {{.*}} fptosi
   %r97 = fptosi <2 x float> undef to <2 x i32>
-  ; CHECK: cost of 24 {{.*}} fptoui
+  ; CHECK: cost of 28 {{.*}} fptoui
   %r98 = fptoui <2 x float> undef to <2 x i64>
-  ; CHECK: cost of 24 {{.*}} fptosi
+  ; CHECK: cost of 28 {{.*}} fptosi
   %r99 = fptosi <2 x float> undef to <2 x i64>
 
   ; CHECK: cost of 8 {{.*}} fptoui
@@ -242,9 +242,9 @@ define i32 @casts() {
   %r106 = fptoui <2 x double> undef to <2 x i32>
   ; CHECK: cost of 2 {{.*}} fptosi
   %r107 = fptosi <2 x double> undef to <2 x i32>
-  ; CHECK: cost of 24 {{.*}} fptoui
+  ; CHECK: cost of 28 {{.*}} fptoui
   %r108 = fptoui <2 x double> undef to <2 x i64>
-  ; CHECK: cost of 24 {{.*}} fptosi
+  ; CHECK: cost of 28 {{.*}} fptosi
   %r109 = fptosi <2 x double> undef to <2 x i64>
 
   ; CHECK: cost of 16 {{.*}} fptoui
@@ -263,9 +263,9 @@ define i32 @casts() {
   %r116 = fptoui <4 x float> undef to <4 x i32>
   ; CHECK: cost of 1 {{.*}} fptosi
   %r117 = fptosi <4 x float> undef to <4 x i32>
-  ; CHECK: cost of 48 {{.*}} fptoui
+  ; CHECK: cost of 56 {{.*}} fptoui
   %r118 = fptoui <4 x float> undef to <4 x i64>
-  ; CHECK: cost of 48 {{.*}} fptosi
+  ; CHECK: cost of 56 {{.*}} fptosi
   %r119 = fptosi <4 x float> undef to <4 x i64>
 
   ; CHECK: cost of 16 {{.*}} fptoui
@@ -284,9 +284,9 @@ define i32 @casts() {
   %r126 = fptoui <4 x double> undef to <4 x i32>
   ; CHECK: cost of 16 {{.*}} fptosi
   %r127 = fptosi <4 x double> undef to <4 x i32>
-  ; CHECK: cost of 48 {{.*}} fptoui
+  ; CHECK: cost of 56 {{.*}} fptoui
   %r128 = fptoui <4 x double> undef to <4 x i64>
-  ; CHECK: cost of 48 {{.*}} fptosi
+  ; CHECK: cost of 56 {{.*}} fptosi
   %r129 = fptosi <4 x double> undef to <4 x i64>
 
   ; CHECK: cost of 32 {{.*}} fptoui
@@ -305,9 +305,9 @@ define i32 @casts() {
   %r136 = fptoui <8 x float> undef to <8 x i32>
   ; CHECK: cost of 2 {{.*}} fptosi
   %r137 = fptosi <8 x float> undef to <8 x i32>
-  ; CHECK: cost of 96 {{.*}} fptoui
+  ; CHECK: cost of 112 {{.*}} fptoui
   %r138 = fptoui <8 x float> undef to <8 x i64>
-  ; CHECK: cost of 96 {{.*}} fptosi
+  ; CHECK: cost of 112 {{.*}} fptosi
   %r139 = fptosi <8 x float> undef to <8 x i64>
 
   ; CHECK: cost of 32 {{.*}} fptoui
@@ -326,9 +326,9 @@ define i32 @casts() {
   %r146 = fptoui <8 x double> undef to <8 x i32>
   ; CHECK: cost of 32 {{.*}} fptosi
   %r147 = fptosi <8 x double> undef to <8 x i32>
-  ; CHECK: cost of 96 {{.*}} fptoui
+  ; CHECK: cost of 112 {{.*}} fptoui
   %r148 = fptoui <8 x double> undef to <8 x i64>
-  ; CHECK: cost of 96 {{.*}} fptosi
+  ; CHECK: cost of 112 {{.*}} fptosi
   %r149 = fptosi <8 x double> undef to <8 x i64>
 
   ; CHECK: cost of 64 {{.*}} fptoui
@@ -347,9 +347,9 @@ define i32 @casts() {
   %r156 = fptoui <16 x float> undef to <16 x i32>
   ; CHECK: cost of 4 {{.*}} fptosi
   %r157 = fptosi <16 x float> undef to <16 x i32>
-  ; CHECK: cost of 192 {{.*}} fptoui
+  ; CHECK: cost of 224 {{.*}} fptoui
   %r158 = fptoui <16 x float> undef to <16 x i64>
-  ; CHECK: cost of 192 {{.*}} fptosi
+  ; CHECK: cost of 224 {{.*}} fptosi
   %r159 = fptosi <16 x float> undef to <16 x i64>
 
   ; CHECK: cost of 64 {{.*}} fptoui
@@ -368,9 +368,9 @@ define i32 @casts() {
   %r166 = fptoui <16 x double> undef to <16 x i32>
   ; CHECK: cost of 64 {{.*}} fptosi
   %r167 = fptosi <16 x double> undef to <16 x i32>
-  ; CHECK: cost of 192 {{.*}} fptoui
+  ; CHECK: cost of 224 {{.*}} fptoui
   %r168 = fptoui <16 x double> undef to <16 x i64>
-  ; CHECK: cost of 192 {{.*}} fptosi
+  ; CHECK: cost of 224 {{.*}} fptosi
   %r169 = fptosi <16 x double> undef to <16 x i64>
 
   ; CHECK: cost of 8 {{.*}} uitofp

Added: llvm/trunk/test/Analysis/CostModel/X86/scalarize.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/scalarize.ll?rev=203509&view=auto
==============================================================================
--- llvm/trunk/test/Analysis/CostModel/X86/scalarize.ll (added)
+++ llvm/trunk/test/Analysis/CostModel/X86/scalarize.ll Mon Mar 10 17:59:13 2014
@@ -0,0 +1,41 @@
+; RUN: opt < %s  -cost-model -analyze -mtriple=i386 -mcpu=corei7-avx | FileCheck %s -check-prefix=CHECK32
+; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s -check-prefix=CHECK64
+
+; Test vector scalarization costs.
+; RUN: llc < %s -march=x86 -mcpu=i386
+; RUN: llc < %s -march=x86 -mcpu=yonah
+
+%i4 = type <4 x i32>
+%i8 = type <2 x i64>
+
+;;; TEST HANDLING OF VARIOUS VECTOR SIZES
+
+declare %i4 @llvm.bswap.v4i32(%i4)
+declare %i8 @llvm.bswap.v2i64(%i8)
+
+declare %i4 @llvm.ctpop.v4i32(%i4)
+declare %i8 @llvm.ctpop.v2i64(%i8)
+
+; CHECK32-LABEL: test_scalarized_intrinsics
+; CHECK64-LABEL: test_scalarized_intrinsics
+define void @test_scalarized_intrinsics() {
+        %r1 = add %i8 undef, undef
+
+; CHECK32: cost of 12 {{.*}}bswap.v4i32
+; CHECK64: cost of 12 {{.*}}bswap.v4i32
+        %r2 = call %i4 @llvm.bswap.v4i32(%i4 undef)
+; CHECK32: cost of 10 {{.*}}bswap.v2i64
+; CHECK64: cost of 6 {{.*}}bswap.v2i64
+        %r3 = call %i8 @llvm.bswap.v2i64(%i8 undef)
+
+; CHECK32: cost of 12 {{.*}}ctpop.v4i32
+; CHECK64: cost of 12 {{.*}}ctpop.v4i32
+        %r4 = call %i4 @llvm.ctpop.v4i32(%i4 undef)
+; CHECK32: cost of 10 {{.*}}ctpop.v2i64
+; CHECK64: cost of 6 {{.*}}ctpop.v2i64
+        %r5 = call %i8 @llvm.ctpop.v2i64(%i8 undef)
+
+; CHECK32: ret
+; CHECK64: ret
+        ret void
+}





More information about the llvm-commits mailing list