[llvm] r217863 - AVX-512: added cost for some AVX-512 instructions
Elena Demikhovsky
elena.demikhovsky at intel.com
Tue Sep 16 00:57:38 PDT 2014
Author: delena
Date: Tue Sep 16 02:57:37 2014
New Revision: 217863
URL: http://llvm.org/viewvc/llvm-project?rev=217863&view=rev
Log:
AVX-512: added cost for some AVX-512 instructions
Modified:
llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
llvm/trunk/test/Analysis/CostModel/X86/cast.ll
llvm/trunk/test/Analysis/CostModel/X86/cmp.ll
Modified: llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp?rev=217863&r1=217862&r2=217863&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp Tue Sep 16 02:57:37 2014
@@ -225,6 +225,15 @@ unsigned X86TTI::getArithmeticInstrCost(
return LT.first * AVX2UniformConstCostTable[Idx].Cost;
}
+ static const CostTblEntry<MVT::SimpleValueType> AVX512CostTable[] = {
+ { ISD::SHL, MVT::v16i32, 1 },
+ { ISD::SRL, MVT::v16i32, 1 },
+ { ISD::SRA, MVT::v16i32, 1 },
+ { ISD::SHL, MVT::v8i64, 1 },
+ { ISD::SRL, MVT::v8i64, 1 },
+ { ISD::SRA, MVT::v8i64, 1 },
+ };
+
static const CostTblEntry<MVT::SimpleValueType> AVX2CostTable[] = {
// Shifts on v4i64/v8i32 on AVX2 is legal even though we declare to
// customize them to detect the cases where shift amount is a scalar one.
@@ -260,6 +269,11 @@ unsigned X86TTI::getArithmeticInstrCost(
{ ISD::UDIV, MVT::v4i64, 4*20 },
};
+ if (ST->hasAVX512()) {
+ int Idx = CostTableLookup(AVX512CostTable, ISD, LT.second);
+ if (Idx != -1)
+ return LT.first * AVX512CostTable[Idx].Cost;
+ }
// Look for AVX2 lowering tricks.
if (ST->hasAVX2()) {
if (ISD == ISD::SHL && LT.second == MVT::v16i16 &&
@@ -580,6 +594,38 @@ unsigned X86TTI::getCastInstrCost(unsign
return LTSrc.first * SSE2ConvTbl[Idx].Cost;
}
+ static const TypeConversionCostTblEntry<MVT::SimpleValueType>
+ AVX512ConversionTbl[] = {
+ { ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, 1 },
+ { ISD::FP_EXTEND, MVT::v8f64, MVT::v16f32, 3 },
+ { ISD::FP_ROUND, MVT::v8f32, MVT::v8f64, 1 },
+ { ISD::FP_ROUND, MVT::v16f32, MVT::v8f64, 3 },
+
+ { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 1 },
+ { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 1 },
+ { ISD::TRUNCATE, MVT::v8i16, MVT::v8i64, 1 },
+ { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 1 },
+ { ISD::TRUNCATE, MVT::v16i32, MVT::v8i64, 4 },
+
+ // v16i1 -> v16i32 - load + broadcast
+ { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i1, 2 },
+
+ { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 1 },
+ { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 1 },
+ { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 1 },
+ { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 1 },
+ { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v16i32, 3 },
+ { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v16i32, 3 },
+
+ };
+
+ if (ST->hasAVX512()) {
+ int Idx = ConvertCostTableLookup(AVX512ConversionTbl, ISD, LTDest.second,
+ LTSrc.second);
+ if (Idx != -1)
+ return AVX512ConversionTbl[Idx].Cost;
+ }
EVT SrcTy = TLI->getValueType(Src);
EVT DstTy = TLI->getValueType(Dst);
@@ -612,6 +658,9 @@ unsigned X86TTI::getCastInstrCost(unsign
{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 2 },
{ ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 2 },
{ ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 4 },
+
+ { ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, 3 },
+ { ISD::FP_ROUND, MVT::v8f32, MVT::v8f64, 3 },
};
static const TypeConversionCostTblEntry<MVT::SimpleValueType>
@@ -738,6 +787,19 @@ unsigned X86TTI::getCmpSelInstrCost(unsi
{ ISD::SETCC, MVT::v32i8, 1 },
};
+ static const CostTblEntry<MVT::SimpleValueType> AVX512CostTbl[] = {
+ { ISD::SETCC, MVT::v8i64, 1 },
+ { ISD::SETCC, MVT::v16i32, 1 },
+ { ISD::SETCC, MVT::v8f64, 1 },
+ { ISD::SETCC, MVT::v16f32, 1 },
+ };
+
+ if (ST->hasAVX512()) {
+ int Idx = CostTableLookup(AVX512CostTbl, ISD, MTy);
+ if (Idx != -1)
+ return LT.first * AVX512CostTbl[Idx].Cost;
+ }
+
if (ST->hasAVX2()) {
int Idx = CostTableLookup(AVX2CostTbl, ISD, MTy);
if (Idx != -1)
Modified: llvm/trunk/test/Analysis/CostModel/X86/cast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/cast.ll?rev=217863&r1=217862&r2=217863&view=diff
==============================================================================
--- llvm/trunk/test/Analysis/CostModel/X86/cast.ll (original)
+++ llvm/trunk/test/Analysis/CostModel/X86/cast.ll Tue Sep 16 02:57:37 2014
@@ -1,3 +1,4 @@
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX512
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX2
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX
@@ -83,6 +84,19 @@ define i32 @zext_sext(<8 x i1> %in) {
;CHECK-AVX: cost of 4 {{.*}} zext
%D = zext <4 x i32> undef to <4 x i64>
+ ;CHECK-AVX512: cost of 3 {{.*}} %D1 = zext
+ %D1 = zext <16 x i32> undef to <16 x i64>
+
+ ;CHECK-AVX512: cost of 3 {{.*}} %D2 = sext
+ %D2 = sext <16 x i32> undef to <16 x i64>
+
+ ;CHECK-AVX512: cost of 1 {{.*}} %D3 = zext
+ %D3 = zext <16 x i16> undef to <16 x i32>
+ ;CHECK-AVX512: cost of 1 {{.*}} %D4 = zext
+ %D4 = zext <16 x i8> undef to <16 x i32>
+ ;CHECK-AVX512: cost of 2 {{.*}} %D5 = zext
+ %D5 = zext <16 x i1> undef to <16 x i32>
+
;CHECK-AVX2: cost of 2 {{.*}} trunc
;CHECK-AVX: cost of 4 {{.*}} trunc
%E = trunc <4 x i64> undef to <4 x i32>
@@ -101,8 +115,12 @@ define i32 @zext_sext(<8 x i1> %in) {
;CHECK-AVX2: cost of 4 {{.*}} trunc
;CHECK-AVX: cost of 9 {{.*}} trunc
+ ;CHECK_AVX512: cost of 1 {{.*}} G = trunc
%G = trunc <8 x i64> undef to <8 x i32>
+ ;CHECK-AVX512: cost of 4 {{.*}} %G1 = trunc
+ %G1 = trunc <16 x i64> undef to <16 x i32>
+
ret i32 undef
}
@@ -211,3 +229,24 @@ define void @uitofp8(<8 x i1> %a, <8 x i
%D1 = uitofp <8 x i32> %d to <8 x float>
ret void
}
+
+define void @fp_conv(<8 x float> %a, <16 x float>%b) {
+;CHECK-LABEL: for function 'fp_conv'
+ ; CHECK-AVX512: cost of 1 {{.*}} fpext
+ %A1 = fpext <8 x float> %a to <8 x double>
+
+ ; CHECK-AVX512: cost of 3 {{.*}} fpext
+ %A2 = fpext <16 x float> %b to <16 x double>
+
+ ; CHECK-AVX2: cost of 3 {{.*}} %A3 = fpext
+ ; CHECK-AVX512: cost of 1 {{.*}} %A3 = fpext
+ %A3 = fpext <8 x float> %a to <8 x double>
+
+ ; CHECK-AVX2: cost of 3 {{.*}} %A4 = fptrunc
+ ; CHECK-AVX512: cost of 1 {{.*}} %A4 = fptrunc
+ %A4 = fptrunc <8 x double> undef to <8 x float>
+
+ ; CHECK-AVX512: cost of 3 {{.*}} %A5 = fptrunc
+ %A5 = fptrunc <16 x double> undef to <16 x float>
+ ret void
+}
Modified: llvm/trunk/test/Analysis/CostModel/X86/cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/cmp.ll?rev=217863&r1=217862&r2=217863&view=diff
==============================================================================
--- llvm/trunk/test/Analysis/CostModel/X86/cmp.ll (original)
+++ llvm/trunk/test/Analysis/CostModel/X86/cmp.ll Tue Sep 16 02:57:37 2014
@@ -1,5 +1,6 @@
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck -check-prefix=CHECK -check-prefix=AVX1 %s
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx2 | FileCheck -check-prefix=CHECK -check-prefix=AVX2 %s
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=knl | FileCheck -check-prefix=CHECK -check-prefix=AVX512 %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
@@ -22,6 +23,11 @@ define i32 @cmp(i32 %arg) {
;AVX2: cost of 1 {{.*}} fcmp
%E = fcmp olt <4 x double> undef, undef
+ ; AVX512: cost of 1 {{.*}} %E1 = fcmp
+ %E1 = fcmp olt <16 x float> undef, undef
+ ; AVX512: cost of 2 {{.*}} %E2 = fcmp
+ %E2 = fcmp olt <16 x double> undef, undef
+
; -- integers --
;AVX1: cost of 1 {{.*}} icmp
@@ -49,6 +55,11 @@ define i32 @cmp(i32 %arg) {
;AVX2: cost of 1 {{.*}} icmp
%M = icmp eq <32 x i8> undef, undef
+ ; AVX512: cost of 1 {{.*}} %M1 = icmp
+ %M1 = icmp eq <16 x i32> undef, undef
+ ; AVX512: cost of 2 {{.*}} %M2 = icmp
+ %M2 = icmp eq <16 x i64> undef, undef
+
;CHECK: cost of 0 {{.*}} ret
ret i32 undef
}
More information about the llvm-commits
mailing list