[PATCH] Include legalization cost when computing scalarization cost. When analyzing vectors of element type that require legalization, the legalization cost must be included to get an accurate estimation of the total cost of the scalarized vector. The...
Raul Silvera
rsilvera at google.com
Thu Mar 6 17:11:29 PST 2014
Added testcase to check for scalarization cost of bswap and ctpop
on 32-bit and 64-bit.
Hi hfinkel, nadav,
http://llvm-reviews.chandlerc.com/D2992
CHANGE SINCE LAST DIFF
http://llvm-reviews.chandlerc.com/D2992?vs=7611&id=7620#toc
Files:
lib/CodeGen/BasicTargetTransformInfo.cpp
test/Analysis/CostModel/ARM/cast.ll
test/Analysis/CostModel/X86/scalarize.ll
Index: lib/CodeGen/BasicTargetTransformInfo.cpp
===================================================================
--- lib/CodeGen/BasicTargetTransformInfo.cpp
+++ lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -20,7 +20,6 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Target/TargetLowering.h"
#include <utility>
-
using namespace llvm;
namespace {
@@ -206,12 +205,19 @@
bool Extract) const {
assert (Ty->isVectorTy() && "Can only scalarize vectors");
unsigned Cost = 0;
-
- for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
- if (Insert)
- Cost += TopTTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
- if (Extract)
- Cost += TopTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
+ VectorType *VecTy = cast<VectorType>(Ty);
+ std::pair<unsigned, MVT> LT =
+ getTLI()->getTypeLegalizationCost(VecTy->getElementType());
+
+ for (int i = 0, e = VecTy->getVectorNumElements(); i < e; ++i) {
+ if (Insert) {
+ Cost += LT.first *
+ TopTTI->getVectorInstrCost(Instruction::InsertElement, VecTy, i);
+ }
+ if (Extract) {
+ Cost += LT.first *
+ TopTTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, i);
+ }
}
return Cost;
Index: test/Analysis/CostModel/ARM/cast.ll
===================================================================
--- test/Analysis/CostModel/ARM/cast.ll
+++ test/Analysis/CostModel/ARM/cast.ll
@@ -221,9 +221,9 @@
%r96 = fptoui <2 x float> undef to <2 x i32>
; CHECK: cost of 1 {{.*}} fptosi
%r97 = fptosi <2 x float> undef to <2 x i32>
- ; CHECK: cost of 24 {{.*}} fptoui
+ ; CHECK: cost of 28 {{.*}} fptoui
%r98 = fptoui <2 x float> undef to <2 x i64>
- ; CHECK: cost of 24 {{.*}} fptosi
+ ; CHECK: cost of 28 {{.*}} fptosi
%r99 = fptosi <2 x float> undef to <2 x i64>
; CHECK: cost of 8 {{.*}} fptoui
@@ -242,9 +242,9 @@
%r106 = fptoui <2 x double> undef to <2 x i32>
; CHECK: cost of 2 {{.*}} fptosi
%r107 = fptosi <2 x double> undef to <2 x i32>
- ; CHECK: cost of 24 {{.*}} fptoui
+ ; CHECK: cost of 28 {{.*}} fptoui
%r108 = fptoui <2 x double> undef to <2 x i64>
- ; CHECK: cost of 24 {{.*}} fptosi
+ ; CHECK: cost of 28 {{.*}} fptosi
%r109 = fptosi <2 x double> undef to <2 x i64>
; CHECK: cost of 16 {{.*}} fptoui
@@ -263,9 +263,9 @@
%r116 = fptoui <4 x float> undef to <4 x i32>
; CHECK: cost of 1 {{.*}} fptosi
%r117 = fptosi <4 x float> undef to <4 x i32>
- ; CHECK: cost of 48 {{.*}} fptoui
+ ; CHECK: cost of 56 {{.*}} fptoui
%r118 = fptoui <4 x float> undef to <4 x i64>
- ; CHECK: cost of 48 {{.*}} fptosi
+ ; CHECK: cost of 56 {{.*}} fptosi
%r119 = fptosi <4 x float> undef to <4 x i64>
; CHECK: cost of 16 {{.*}} fptoui
@@ -284,9 +284,9 @@
%r126 = fptoui <4 x double> undef to <4 x i32>
; CHECK: cost of 16 {{.*}} fptosi
%r127 = fptosi <4 x double> undef to <4 x i32>
- ; CHECK: cost of 48 {{.*}} fptoui
+ ; CHECK: cost of 56 {{.*}} fptoui
%r128 = fptoui <4 x double> undef to <4 x i64>
- ; CHECK: cost of 48 {{.*}} fptosi
+ ; CHECK: cost of 56 {{.*}} fptosi
%r129 = fptosi <4 x double> undef to <4 x i64>
; CHECK: cost of 32 {{.*}} fptoui
@@ -305,9 +305,9 @@
%r136 = fptoui <8 x float> undef to <8 x i32>
; CHECK: cost of 2 {{.*}} fptosi
%r137 = fptosi <8 x float> undef to <8 x i32>
- ; CHECK: cost of 96 {{.*}} fptoui
+ ; CHECK: cost of 112 {{.*}} fptoui
%r138 = fptoui <8 x float> undef to <8 x i64>
- ; CHECK: cost of 96 {{.*}} fptosi
+ ; CHECK: cost of 112 {{.*}} fptosi
%r139 = fptosi <8 x float> undef to <8 x i64>
; CHECK: cost of 32 {{.*}} fptoui
@@ -326,9 +326,9 @@
%r146 = fptoui <8 x double> undef to <8 x i32>
; CHECK: cost of 32 {{.*}} fptosi
%r147 = fptosi <8 x double> undef to <8 x i32>
- ; CHECK: cost of 96 {{.*}} fptoui
+ ; CHECK: cost of 112 {{.*}} fptoui
%r148 = fptoui <8 x double> undef to <8 x i64>
- ; CHECK: cost of 96 {{.*}} fptosi
+ ; CHECK: cost of 112 {{.*}} fptosi
%r149 = fptosi <8 x double> undef to <8 x i64>
; CHECK: cost of 64 {{.*}} fptoui
@@ -347,9 +347,9 @@
%r156 = fptoui <16 x float> undef to <16 x i32>
; CHECK: cost of 4 {{.*}} fptosi
%r157 = fptosi <16 x float> undef to <16 x i32>
- ; CHECK: cost of 192 {{.*}} fptoui
+ ; CHECK: cost of 224 {{.*}} fptoui
%r158 = fptoui <16 x float> undef to <16 x i64>
- ; CHECK: cost of 192 {{.*}} fptosi
+ ; CHECK: cost of 224 {{.*}} fptosi
%r159 = fptosi <16 x float> undef to <16 x i64>
; CHECK: cost of 64 {{.*}} fptoui
@@ -368,9 +368,9 @@
%r166 = fptoui <16 x double> undef to <16 x i32>
; CHECK: cost of 64 {{.*}} fptosi
%r167 = fptosi <16 x double> undef to <16 x i32>
- ; CHECK: cost of 192 {{.*}} fptoui
+ ; CHECK: cost of 224 {{.*}} fptoui
%r168 = fptoui <16 x double> undef to <16 x i64>
- ; CHECK: cost of 192 {{.*}} fptosi
+ ; CHECK: cost of 224 {{.*}} fptosi
%r169 = fptosi <16 x double> undef to <16 x i64>
; CHECK: cost of 8 {{.*}} uitofp
Index: test/Analysis/CostModel/X86/scalarize.ll
===================================================================
--- /dev/null
+++ test/Analysis/CostModel/X86/scalarize.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -cost-model -analyze -mtriple=i386 -mcpu=corei7-avx | FileCheck %s -check-prefix=CHECK32
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s -check-prefix=CHECK64
+
+; Test vector scalarization costs.
+; RUN: llc < %s -march=x86 -mcpu=i386
+; RUN: llc < %s -march=x86 -mcpu=yonah
+
+%i4 = type <4 x i32>
+%i8 = type <2 x i64>
+
+;;; TEST HANDLING OF VARIOUS VECTOR SIZES
+
+declare %i4 @llvm.bswap.v4i32(%i4)
+declare %i8 @llvm.bswap.v2i64(%i8)
+
+declare %i4 @llvm.ctpop.v4i32(%i4)
+declare %i8 @llvm.ctpop.v2i64(%i8)
+
+; CHECK32-LABEL: test_scalarized_intrinsics
+; CHECK64-LABEL: test_scalarized_intrinsics
+define void @test_scalarized_intrinsics() {
+ %r1 = add %i8 undef, undef
+
+; CHECK32: cost of 12 {{.*}}bswap.v4i32
+; CHECK64: cost of 12 {{.*}}bswap.v4i32
+ %r2 = call %i4 @llvm.bswap.v4i32(%i4 undef)
+; CHECK32: cost of 10 {{.*}}bswap.v2i64
+; CHECK64: cost of 6 {{.*}}bswap.v2i64
+ %r3 = call %i8 @llvm.bswap.v2i64(%i8 undef)
+
+; CHECK32: cost of 12 {{.*}}ctpop.v4i32
+; CHECK64: cost of 12 {{.*}}ctpop.v4i32
+ %r4 = call %i4 @llvm.ctpop.v4i32(%i4 undef)
+; CHECK32: cost of 10 {{.*}}ctpop.v2i64
+; CHECK64: cost of 6 {{.*}}ctpop.v2i64
+ %r5 = call %i8 @llvm.ctpop.v2i64(%i8 undef)
+
+; CHECK32: ret
+; CHECK64: ret
+ ret void
+}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D2992.2.patch
Type: text/x-patch
Size: 6608 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140306/4a056787/attachment.bin>
More information about the llvm-commits
mailing list