[llvm] r345261 - [CostModel][X86] Add realistic i64 uitofp f64 scalar costs
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 25 05:42:10 PDT 2018
Author: rksimon
Date: Thu Oct 25 05:42:10 2018
New Revision: 345261
URL: http://llvm.org/viewvc/llvm-project?rev=345261&view=rev
Log:
[CostModel][X86] Add realistic i64 uitofp f64 scalar costs
Modified:
llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
llvm/trunk/test/Analysis/CostModel/X86/uitofp.ll
llvm/trunk/test/Transforms/SLPVectorizer/X86/uitofp.ll
Modified: llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp?rev=345261&r1=345260&r2=345261&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp Thu Oct 25 05:42:10 2018
@@ -1291,6 +1291,8 @@ int X86TTIImpl::getCastInstrCost(unsigne
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 12 },
{ ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, 26 },
+ { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 1 },
+
{ ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 },
{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f64, 1 },
@@ -1444,6 +1446,7 @@ int X86TTIImpl::getCastInstrCost(unsigne
{ ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 3 },
{ ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 6 },
+ { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 4 },
};
static const TypeConversionCostTblEntry SSE2ConversionTbl[] = {
@@ -1470,6 +1473,8 @@ int X86TTIImpl::getCastInstrCost(unsigne
{ ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 3 },
+ { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 6 },
+
{ ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i8, 1 },
{ ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i8, 6 },
{ ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 2 },
Modified: llvm/trunk/test/Analysis/CostModel/X86/uitofp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/uitofp.ll?rev=345261&r1=345260&r2=345261&view=diff
==============================================================================
--- llvm/trunk/test/Analysis/CostModel/X86/uitofp.ll (original)
+++ llvm/trunk/test/Analysis/CostModel/X86/uitofp.ll Thu Oct 25 05:42:10 2018
@@ -120,14 +120,14 @@ define i32 @uitofp_i32_double() {
define i32 @uitofp_i64_double() {
; SSE-LABEL: 'uitofp_i64_double'
-; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f64 = uitofp i64 undef to double
+; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cvt_i64_f64 = uitofp i64 undef to double
; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v2i64_v2f64 = uitofp <2 x i64> undef to <2 x double>
; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cvt_v4i64_v4f64 = uitofp <4 x i64> undef to <4 x double>
; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %cvt_v8i64_v8f64 = uitofp <8 x i64> undef to <8 x double>
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX-LABEL: 'uitofp_i64_double'
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f64 = uitofp i64 undef to double
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_i64_f64 = uitofp i64 undef to double
; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cvt_v2i64_v2f64 = uitofp <2 x i64> undef to <2 x double>
; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v4i64_v4f64 = uitofp <4 x i64> undef to <4 x double>
; AVX-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %cvt_v8i64_v8f64 = uitofp <8 x i64> undef to <8 x double>
@@ -148,7 +148,7 @@ define i32 @uitofp_i64_double() {
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; BTVER2-LABEL: 'uitofp_i64_double'
-; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f64 = uitofp i64 undef to double
+; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_i64_f64 = uitofp i64 undef to double
; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cvt_v2i64_v2f64 = uitofp <2 x i64> undef to <2 x double>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v4i64_v4f64 = uitofp <4 x i64> undef to <4 x double>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %cvt_v8i64_v8f64 = uitofp <8 x i64> undef to <8 x double>
Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/uitofp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/uitofp.ll?rev=345261&r1=345260&r2=345261&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SLPVectorizer/X86/uitofp.ll (original)
+++ llvm/trunk/test/Transforms/SLPVectorizer/X86/uitofp.ll Thu Oct 25 05:42:10 2018
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mtriple=x86_64-unknown -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=bdver1 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256 --check-prefix=AVX1
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=bdver1 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256 --check-prefix=XOP
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256 --check-prefix=AVX2
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@@ -69,20 +69,32 @@ define void @uitofp_4i64_4f64() #0 {
; SSE-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
; SSE-NEXT: ret void
;
-; AVX256-LABEL: @uitofp_4i64_4f64(
-; AVX256-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
-; AVX256-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
-; AVX256-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
-; AVX256-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
-; AVX256-NEXT: [[CVT0:%.*]] = uitofp i64 [[LD0]] to double
-; AVX256-NEXT: [[CVT1:%.*]] = uitofp i64 [[LD1]] to double
-; AVX256-NEXT: [[CVT2:%.*]] = uitofp i64 [[LD2]] to double
-; AVX256-NEXT: [[CVT3:%.*]] = uitofp i64 [[LD3]] to double
-; AVX256-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-; AVX256-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-; AVX256-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
-; AVX256-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
-; AVX256-NEXT: ret void
+; AVX1-LABEL: @uitofp_4i64_4f64(
+; AVX1-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
+; AVX1-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
+; AVX1-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
+; AVX1-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
+; AVX1-NEXT: [[CVT0:%.*]] = uitofp i64 [[LD0]] to double
+; AVX1-NEXT: [[CVT1:%.*]] = uitofp i64 [[LD1]] to double
+; AVX1-NEXT: [[CVT2:%.*]] = uitofp i64 [[LD2]] to double
+; AVX1-NEXT: [[CVT3:%.*]] = uitofp i64 [[LD3]] to double
+; AVX1-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
+; AVX1-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+; AVX1-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
+; AVX1-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+; AVX1-NEXT: ret void
+;
+; XOP-LABEL: @uitofp_4i64_4f64(
+; XOP-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
+; XOP-NEXT: [[TMP2:%.*]] = uitofp <4 x i64> [[TMP1]] to <4 x double>
+; XOP-NEXT: store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
+; XOP-NEXT: ret void
+;
+; AVX2-LABEL: @uitofp_4i64_4f64(
+; AVX2-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
+; AVX2-NEXT: [[TMP2:%.*]] = uitofp <4 x i64> [[TMP1]] to <4 x double>
+; AVX2-NEXT: store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
+; AVX2-NEXT: ret void
;
; AVX512-LABEL: @uitofp_4i64_4f64(
; AVX512-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
@@ -133,32 +145,50 @@ define void @uitofp_8i64_8f64() #0 {
; SSE-NEXT: store double [[CVT7]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
; SSE-NEXT: ret void
;
-; AVX256-LABEL: @uitofp_8i64_8f64(
-; AVX256-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
-; AVX256-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
-; AVX256-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
-; AVX256-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
-; AVX256-NEXT: [[LD4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4), align 32
-; AVX256-NEXT: [[LD5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 5), align 8
-; AVX256-NEXT: [[LD6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 6), align 16
-; AVX256-NEXT: [[LD7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 7), align 8
-; AVX256-NEXT: [[CVT0:%.*]] = uitofp i64 [[LD0]] to double
-; AVX256-NEXT: [[CVT1:%.*]] = uitofp i64 [[LD1]] to double
-; AVX256-NEXT: [[CVT2:%.*]] = uitofp i64 [[LD2]] to double
-; AVX256-NEXT: [[CVT3:%.*]] = uitofp i64 [[LD3]] to double
-; AVX256-NEXT: [[CVT4:%.*]] = uitofp i64 [[LD4]] to double
-; AVX256-NEXT: [[CVT5:%.*]] = uitofp i64 [[LD5]] to double
-; AVX256-NEXT: [[CVT6:%.*]] = uitofp i64 [[LD6]] to double
-; AVX256-NEXT: [[CVT7:%.*]] = uitofp i64 [[LD7]] to double
-; AVX256-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-; AVX256-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-; AVX256-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
-; AVX256-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
-; AVX256-NEXT: store double [[CVT4]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32
-; AVX256-NEXT: store double [[CVT5]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
-; AVX256-NEXT: store double [[CVT6]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16
-; AVX256-NEXT: store double [[CVT7]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
-; AVX256-NEXT: ret void
+; AVX1-LABEL: @uitofp_8i64_8f64(
+; AVX1-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
+; AVX1-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
+; AVX1-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
+; AVX1-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
+; AVX1-NEXT: [[LD4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4), align 32
+; AVX1-NEXT: [[LD5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 5), align 8
+; AVX1-NEXT: [[LD6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 6), align 16
+; AVX1-NEXT: [[LD7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 7), align 8
+; AVX1-NEXT: [[CVT0:%.*]] = uitofp i64 [[LD0]] to double
+; AVX1-NEXT: [[CVT1:%.*]] = uitofp i64 [[LD1]] to double
+; AVX1-NEXT: [[CVT2:%.*]] = uitofp i64 [[LD2]] to double
+; AVX1-NEXT: [[CVT3:%.*]] = uitofp i64 [[LD3]] to double
+; AVX1-NEXT: [[CVT4:%.*]] = uitofp i64 [[LD4]] to double
+; AVX1-NEXT: [[CVT5:%.*]] = uitofp i64 [[LD5]] to double
+; AVX1-NEXT: [[CVT6:%.*]] = uitofp i64 [[LD6]] to double
+; AVX1-NEXT: [[CVT7:%.*]] = uitofp i64 [[LD7]] to double
+; AVX1-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
+; AVX1-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+; AVX1-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
+; AVX1-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+; AVX1-NEXT: store double [[CVT4]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32
+; AVX1-NEXT: store double [[CVT5]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
+; AVX1-NEXT: store double [[CVT6]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16
+; AVX1-NEXT: store double [[CVT7]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
+; AVX1-NEXT: ret void
+;
+; XOP-LABEL: @uitofp_8i64_8f64(
+; XOP-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
+; XOP-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4) to <4 x i64>*), align 32
+; XOP-NEXT: [[TMP3:%.*]] = uitofp <4 x i64> [[TMP1]] to <4 x double>
+; XOP-NEXT: [[TMP4:%.*]] = uitofp <4 x i64> [[TMP2]] to <4 x double>
+; XOP-NEXT: store <4 x double> [[TMP3]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
+; XOP-NEXT: store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 32
+; XOP-NEXT: ret void
+;
+; AVX2-LABEL: @uitofp_8i64_8f64(
+; AVX2-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
+; AVX2-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4) to <4 x i64>*), align 32
+; AVX2-NEXT: [[TMP3:%.*]] = uitofp <4 x i64> [[TMP1]] to <4 x double>
+; AVX2-NEXT: [[TMP4:%.*]] = uitofp <4 x i64> [[TMP2]] to <4 x double>
+; AVX2-NEXT: store <4 x double> [[TMP3]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
+; AVX2-NEXT: store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 32
+; AVX2-NEXT: ret void
;
; AVX512-LABEL: @uitofp_8i64_8f64(
; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @src64 to <8 x i64>*), align 64
More information about the llvm-commits
mailing list