[llvm] 8ef67fa - [CostModel][X86] Account for older SSE targets with slow fp->int conversions

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 8 10:08:44 PDT 2021


Author: Simon Pilgrim
Date: 2021-07-08T18:08:24+01:00
New Revision: 8ef67fa9d22952dba578fed318a7cb520c50a09b

URL: https://github.com/llvm/llvm-project/commit/8ef67fa9d22952dba578fed318a7cb520c50a09b
DIFF: https://github.com/llvm/llvm-project/commit/8ef67fa9d22952dba578fed318a7cb520c50a09b.diff

LOG: [CostModel][X86] Account for older SSE targets with slow fp->int conversions

Both the conversion cost and the xmm->gpr transfer cost tend to be a lot higher on early SSE targets

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86TargetTransformInfo.cpp
    llvm/test/Analysis/CostModel/X86/fptosi.ll
    llvm/test/Analysis/CostModel/X86/fptoui.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 33cfa2df53917..d8de5249af302 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -2076,9 +2076,15 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
     { ISD::UINT_TO_FP,  MVT::v4f32,  MVT::v4i64, 22 },
     { ISD::UINT_TO_FP,  MVT::v2f64,  MVT::v2i64,  4 },
 
+    { ISD::FP_TO_SINT,  MVT::i32,    MVT::f32,    1 },
+    { ISD::FP_TO_SINT,  MVT::i64,    MVT::f32,    1 },
+    { ISD::FP_TO_SINT,  MVT::i32,    MVT::f64,    1 },
+    { ISD::FP_TO_SINT,  MVT::i64,    MVT::f64,    1 },
     { ISD::FP_TO_SINT,  MVT::v2i8,   MVT::v2f32,  3 },
     { ISD::FP_TO_SINT,  MVT::v2i8,   MVT::v2f64,  3 },
 
+    { ISD::FP_TO_UINT,  MVT::i32,    MVT::f32,    1 },
+    { ISD::FP_TO_UINT,  MVT::i32,    MVT::f64,    1 },
     { ISD::FP_TO_UINT,  MVT::v2i8,   MVT::v2f32,  3 },
     { ISD::FP_TO_UINT,  MVT::v2i8,   MVT::v2f64,  3 },
     { ISD::FP_TO_UINT,  MVT::v4i16,  MVT::v4f32,  2 },
@@ -2115,6 +2121,10 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
     { ISD::UINT_TO_FP,  MVT::v2f64,  MVT::v2i64, 15 },
     { ISD::UINT_TO_FP,  MVT::v4f32,  MVT::v2i64, 18 },
 
+    { ISD::FP_TO_SINT,  MVT::i32,    MVT::f32,    4 },
+    { ISD::FP_TO_SINT,  MVT::i64,    MVT::f32,    4 },
+    { ISD::FP_TO_SINT,  MVT::i32,    MVT::f64,    4 },
+    { ISD::FP_TO_SINT,  MVT::i64,    MVT::f64,    4 },
     { ISD::FP_TO_SINT,  MVT::v2i8,   MVT::v2f32,  4 },
     { ISD::FP_TO_SINT,  MVT::v2i16,  MVT::v2f32,  2 },
     { ISD::FP_TO_SINT,  MVT::v4i8,   MVT::v4f32,  3 },
@@ -2123,7 +2133,9 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
     { ISD::FP_TO_SINT,  MVT::v2i8,   MVT::v2f64,  4 },
     { ISD::FP_TO_SINT,  MVT::v2i32,  MVT::v2f64,  1 },
 
+    { ISD::FP_TO_UINT,  MVT::i32,    MVT::f32,    4 },
     { ISD::FP_TO_UINT,  MVT::i64,    MVT::f32,    4 },
+    { ISD::FP_TO_UINT,  MVT::i32,    MVT::f64,    4 },
     { ISD::FP_TO_UINT,  MVT::i64,    MVT::f64,   15 },
     { ISD::FP_TO_UINT,  MVT::v2i8,   MVT::v2f32,  4 },
     { ISD::FP_TO_UINT,  MVT::v2i8,   MVT::v2f64,  4 },

diff  --git a/llvm/test/Analysis/CostModel/X86/fptosi.ll b/llvm/test/Analysis/CostModel/X86/fptosi.ll
index 4c9aa6cafc8ee..904320b06ee91 100644
--- a/llvm/test/Analysis/CostModel/X86/fptosi.ll
+++ b/llvm/test/Analysis/CostModel/X86/fptosi.ll
@@ -1,21 +1,21 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
-; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42
+; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2
+; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42
 ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx  | FileCheck %s --check-prefixes=AVX,AVX1
 ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
 ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
 ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512,AVX512DQ
 ;
-; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=SSE,SLM
-; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,SSE42
+; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=SLM
+; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=SSE42
 ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1
 
 define i32 @fptosi_double_i64(i32 %arg) {
 ; SSE2-LABEL: 'fptosi_double_i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi double undef to i64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptosi double undef to i64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'fptosi_double_i64'
@@ -61,12 +61,19 @@ define i32 @fptosi_double_i64(i32 %arg) {
 }
 
 define i32 @fptosi_double_i32(i32 %arg) {
-; SSE-LABEL: 'fptosi_double_i32'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi double undef to i32
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; SSE2-LABEL: 'fptosi_double_i32'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = fptosi double undef to i32
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SSE42-LABEL: 'fptosi_double_i32'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi double undef to i32
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'fptosi_double_i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi double undef to i32
@@ -88,6 +95,13 @@ define i32 @fptosi_double_i32(i32 %arg) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SLM-LABEL: 'fptosi_double_i32'
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi double undef to i32
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I32 = fptosi double undef to i32
   %V2I32 = fptosi <2 x double> undef to <2 x i32>
@@ -98,7 +112,7 @@ define i32 @fptosi_double_i32(i32 %arg) {
 
 define i32 @fptosi_double_i16(i32 %arg) {
 ; SSE2-LABEL: 'fptosi_double_i16'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = fptosi double undef to i16
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16>
@@ -148,7 +162,7 @@ define i32 @fptosi_double_i16(i32 %arg) {
 
 define i32 @fptosi_double_i8(i32 %arg) {
 ; SSE2-LABEL: 'fptosi_double_i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = fptosi double undef to i8
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>
@@ -198,11 +212,11 @@ define i32 @fptosi_double_i8(i32 %arg) {
 
 define i32 @fptosi_float_i64(i32 %arg) {
 ; SSE2-LABEL: 'fptosi_float_i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi float undef to i64
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptosi float undef to i64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 116 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'fptosi_float_i64'
@@ -254,13 +268,21 @@ define i32 @fptosi_float_i64(i32 %arg) {
 }
 
 define i32 @fptosi_float_i32(i32 %arg) {
-; SSE-LABEL: 'fptosi_float_i32'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi float undef to i32
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptosi <2 x float> undef to <2 x i32>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = fptosi <4 x float> undef to <4 x i32>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = fptosi <8 x float> undef to <8 x i32>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = fptosi <16 x float> undef to <16 x i32>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; SSE2-LABEL: 'fptosi_float_i32'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = fptosi float undef to i32
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptosi <2 x float> undef to <2 x i32>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = fptosi <4 x float> undef to <4 x i32>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = fptosi <8 x float> undef to <8 x i32>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = fptosi <16 x float> undef to <16 x i32>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SSE42-LABEL: 'fptosi_float_i32'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi float undef to i32
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptosi <2 x float> undef to <2 x i32>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = fptosi <4 x float> undef to <4 x i32>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = fptosi <8 x float> undef to <8 x i32>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = fptosi <16 x float> undef to <16 x i32>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'fptosi_float_i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi float undef to i32
@@ -285,6 +307,14 @@ define i32 @fptosi_float_i32(i32 %arg) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = fptosi <8 x float> undef to <8 x i32>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = fptosi <16 x float> undef to <16 x i32>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SLM-LABEL: 'fptosi_float_i32'
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi float undef to i32
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptosi <2 x float> undef to <2 x i32>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = fptosi <4 x float> undef to <4 x i32>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = fptosi <8 x float> undef to <8 x i32>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = fptosi <16 x float> undef to <16 x i32>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I32 = fptosi float undef to i32
   %V2I32 = fptosi <2 x float> undef to <2 x i32>
@@ -296,7 +326,7 @@ define i32 @fptosi_float_i32(i32 %arg) {
 
 define i32 @fptosi_float_i16(i32 %arg) {
 ; SSE2-LABEL: 'fptosi_float_i16'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = fptosi float undef to i16
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
@@ -353,7 +383,7 @@ define i32 @fptosi_float_i16(i32 %arg) {
 
 define i32 @fptosi_float_i8(i32 %arg) {
 ; SSE2-LABEL: 'fptosi_float_i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = fptosi float undef to i8
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>

diff  --git a/llvm/test/Analysis/CostModel/X86/fptoui.ll b/llvm/test/Analysis/CostModel/X86/fptoui.ll
index dbe62a3e80314..d09a74b431d22 100644
--- a/llvm/test/Analysis/CostModel/X86/fptoui.ll
+++ b/llvm/test/Analysis/CostModel/X86/fptoui.ll
@@ -1,13 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
-; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42
+; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2
+; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42
 ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx  | FileCheck %s --check-prefixes=AVX,AVX1
 ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
 ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
 ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512,AVX512DQ
 ;
-; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=SSE,SLM
-; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,SSE42
+; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=SLM
+; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=SSE42
 ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1
 
 define i32 @fptoui_double_i64(i32 %arg) {
@@ -62,10 +62,10 @@ define i32 @fptoui_double_i64(i32 %arg) {
 
 define i32 @fptoui_double_i32(i32 %arg) {
 ; SSE2-LABEL: 'fptoui_double_i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui double undef to i32
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = fptoui <2 x double> undef to <2 x i32>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V4I32 = fptoui <4 x double> undef to <4 x i32>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = fptoui double undef to i32
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2I32 = fptoui <2 x double> undef to <2 x i32>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V4I32 = fptoui <4 x double> undef to <4 x i32>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'fptoui_double_i32'
@@ -112,7 +112,7 @@ define i32 @fptoui_double_i32(i32 %arg) {
 
 define i32 @fptoui_double_i16(i32 %arg) {
 ; SSE2-LABEL: 'fptoui_double_i16'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = fptoui double undef to i16
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16>
@@ -162,7 +162,7 @@ define i32 @fptoui_double_i16(i32 %arg) {
 
 define i32 @fptoui_double_i8(i32 %arg) {
 ; SSE2-LABEL: 'fptoui_double_i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = fptoui double undef to i8
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>
@@ -268,13 +268,21 @@ define i32 @fptoui_float_i64(i32 %arg) {
 }
 
 define i32 @fptoui_float_i32(i32 %arg) {
-; SSE-LABEL: 'fptoui_float_i32'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui float undef to i32
-; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = fptoui <2 x float> undef to <2 x i32>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = fptoui <4 x float> undef to <4 x i32>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = fptoui <8 x float> undef to <8 x i32>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; SSE2-LABEL: 'fptoui_float_i32'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = fptoui float undef to i32
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = fptoui <2 x float> undef to <2 x i32>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = fptoui <4 x float> undef to <4 x i32>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = fptoui <8 x float> undef to <8 x i32>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SSE42-LABEL: 'fptoui_float_i32'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui float undef to i32
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = fptoui <2 x float> undef to <2 x i32>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = fptoui <4 x float> undef to <4 x i32>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = fptoui <8 x float> undef to <8 x i32>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'fptoui_float_i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui float undef to i32
@@ -299,6 +307,14 @@ define i32 @fptoui_float_i32(i32 %arg) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = fptoui <8 x float> undef to <8 x i32>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SLM-LABEL: 'fptoui_float_i32'
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui float undef to i32
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = fptoui <2 x float> undef to <2 x i32>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = fptoui <4 x float> undef to <4 x i32>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = fptoui <8 x float> undef to <8 x i32>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I32 = fptoui float undef to i32
   %V2I32 = fptoui <2 x float> undef to <2 x i32>
@@ -310,7 +326,7 @@ define i32 @fptoui_float_i32(i32 %arg) {
 
 define i32 @fptoui_float_i16(i32 %arg) {
 ; SSE2-LABEL: 'fptoui_float_i16'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = fptoui float undef to i16
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
@@ -367,7 +383,7 @@ define i32 @fptoui_float_i16(i32 %arg) {
 
 define i32 @fptoui_float_i8(i32 %arg) {
 ; SSE2-LABEL: 'fptoui_float_i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = fptoui float undef to i8
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>


        


More information about the llvm-commits mailing list