[llvm] r291390 - [CostModel][X86] Moved legal uniform shift costs earlier.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sun Jan 8 05:12:04 PST 2017
Author: rksimon
Date: Sun Jan 8 07:12:03 2017
New Revision: 291390
URL: http://llvm.org/viewvc/llvm-project?rev=291390&view=rev
Log:
[CostModel][X86] Moved legal uniform shift costs earlier.
XOP was prematurely matching, doubling the cost of ashr/lshr uniform shifts.
Modified:
llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
llvm/trunk/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
llvm/trunk/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
Modified: llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp?rev=291390&r1=291389&r2=291390&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp Sun Jan 8 07:12:03 2017
@@ -207,6 +207,43 @@ int X86TTIImpl::getArithmeticInstrCost(
return LT.first * Entry->Cost;
}
+ static const CostTblEntry AVX2UniformCostTable[] = {
+ // Uniform splats are cheaper for the following instructions.
+ { ISD::SHL, MVT::v16i16, 1 }, // psllw.
+ { ISD::SRL, MVT::v16i16, 1 }, // psrlw.
+ { ISD::SRA, MVT::v16i16, 1 }, // psraw.
+ };
+
+ if (ST->hasAVX2() &&
+ ((Op2Info == TargetTransformInfo::OK_UniformConstantValue) ||
+ (Op2Info == TargetTransformInfo::OK_UniformValue))) {
+ if (const auto *Entry =
+ CostTableLookup(AVX2UniformCostTable, ISD, LT.second))
+ return LT.first * Entry->Cost;
+ }
+
+ static const CostTblEntry SSE2UniformCostTable[] = {
+ // Uniform splats are cheaper for the following instructions.
+ { ISD::SHL, MVT::v8i16, 1 }, // psllw.
+ { ISD::SHL, MVT::v4i32, 1 }, // pslld
+ { ISD::SHL, MVT::v2i64, 1 }, // psllq.
+
+ { ISD::SRL, MVT::v8i16, 1 }, // psrlw.
+ { ISD::SRL, MVT::v4i32, 1 }, // psrld.
+ { ISD::SRL, MVT::v2i64, 1 }, // psrlq.
+
+ { ISD::SRA, MVT::v8i16, 1 }, // psraw.
+ { ISD::SRA, MVT::v4i32, 1 }, // psrad.
+ };
+
+ if (ST->hasSSE2() &&
+ ((Op2Info == TargetTransformInfo::OK_UniformConstantValue) ||
+ (Op2Info == TargetTransformInfo::OK_UniformValue))) {
+ if (const auto *Entry =
+ CostTableLookup(SSE2UniformCostTable, ISD, LT.second))
+ return LT.first * Entry->Cost;
+ }
+
static const CostTblEntry AVX512DQCostTable[] = {
{ ISD::MUL, MVT::v2i64, 1 },
{ ISD::MUL, MVT::v4i64, 1 },
@@ -291,20 +328,6 @@ int X86TTIImpl::getArithmeticInstrCost(
return LT.first * Entry->Cost;
}
- static const CostTblEntry AVX2UniformCostTable[] = {
- // Uniform splats are cheaper for the following instructions.
- { ISD::SRL, MVT::v16i16, 1 }, // psrlw.
- { ISD::SRA, MVT::v16i16, 1 }, // psraw.
- };
-
- if (ST->hasAVX2() &&
- ((Op2Info == TargetTransformInfo::OK_UniformConstantValue) ||
- (Op2Info == TargetTransformInfo::OK_UniformValue))) {
- if (const auto *Entry =
- CostTableLookup(AVX2UniformCostTable, ISD, LT.second))
- return LT.first * Entry->Cost;
- }
-
static const CostTblEntry XOPShiftCostTable[] = {
// 128bit shifts take 1cy, but right shifts require negation beforehand.
{ ISD::SHL, MVT::v16i8, 1 },
@@ -339,31 +362,23 @@ int X86TTIImpl::getArithmeticInstrCost(
if (const auto *Entry = CostTableLookup(XOPShiftCostTable, ISD, LT.second))
return LT.first * Entry->Cost;
- static const CostTblEntry SSE2UniformCostTable[] = {
+ static const CostTblEntry SSE2UniformShiftCostTable[] = {
// Uniform splats are cheaper for the following instructions.
{ ISD::SHL, MVT::v16i8, 1 }, // psllw.
{ ISD::SHL, MVT::v32i8, 2 }, // psllw.
- { ISD::SHL, MVT::v8i16, 1 }, // psllw.
{ ISD::SHL, MVT::v16i16, 2 }, // psllw.
- { ISD::SHL, MVT::v4i32, 1 }, // pslld
{ ISD::SHL, MVT::v8i32, 2 }, // pslld
- { ISD::SHL, MVT::v2i64, 1 }, // psllq.
{ ISD::SHL, MVT::v4i64, 2 }, // psllq.
{ ISD::SRL, MVT::v16i8, 1 }, // psrlw.
{ ISD::SRL, MVT::v32i8, 2 }, // psrlw.
- { ISD::SRL, MVT::v8i16, 1 }, // psrlw.
{ ISD::SRL, MVT::v16i16, 2 }, // psrlw.
- { ISD::SRL, MVT::v4i32, 1 }, // psrld.
{ ISD::SRL, MVT::v8i32, 2 }, // psrld.
- { ISD::SRL, MVT::v2i64, 1 }, // psrlq.
{ ISD::SRL, MVT::v4i64, 2 }, // psrlq.
{ ISD::SRA, MVT::v16i8, 4 }, // psrlw, pand, pxor, psubb.
{ ISD::SRA, MVT::v32i8, 8 }, // psrlw, pand, pxor, psubb.
- { ISD::SRA, MVT::v8i16, 1 }, // psraw.
{ ISD::SRA, MVT::v16i16, 2 }, // psraw.
- { ISD::SRA, MVT::v4i32, 1 }, // psrad.
{ ISD::SRA, MVT::v8i32, 2 }, // psrad.
{ ISD::SRA, MVT::v2i64, 4 }, // 2 x psrad + shuffle.
{ ISD::SRA, MVT::v4i64, 8 }, // 2 x psrad + shuffle.
@@ -373,7 +388,7 @@ int X86TTIImpl::getArithmeticInstrCost(
((Op2Info == TargetTransformInfo::OK_UniformConstantValue) ||
(Op2Info == TargetTransformInfo::OK_UniformValue))) {
if (const auto *Entry =
- CostTableLookup(SSE2UniformCostTable, ISD, LT.second))
+ CostTableLookup(SSE2UniformShiftCostTable, ISD, LT.second))
return LT.first * Entry->Cost;
}
Modified: llvm/trunk/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/vshift-ashr-cost.ll?rev=291390&r1=291389&r2=291390&view=diff
==============================================================================
--- llvm/trunk/test/Analysis/CostModel/X86/vshift-ashr-cost.ll (original)
+++ llvm/trunk/test/Analysis/CostModel/X86/vshift-ashr-cost.ll Sun Jan 8 07:12:03 2017
@@ -529,8 +529,7 @@ define <4 x i32> @splatconstant_shift_v4
; AVX: Found an estimated cost of 1 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; AVX512: Found an estimated cost of 1 for instruction: %shift
-; XOPAVX: Found an estimated cost of 2 for instruction: %shift
-; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
+; XOP: Found an estimated cost of 1 for instruction: %shift
%shift = ashr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
ret <4 x i32> %shift
}
@@ -568,7 +567,7 @@ define <8 x i16> @splatconstant_shift_v8
; AVX: Found an estimated cost of 1 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; AVX512: Found an estimated cost of 1 for instruction: %shift
-; XOP: Found an estimated cost of 2 for instruction: %shift
+; XOP: Found an estimated cost of 1 for instruction: %shift
%shift = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
ret <8 x i16> %shift
}
Modified: llvm/trunk/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/vshift-lshr-cost.ll?rev=291390&r1=291389&r2=291390&view=diff
==============================================================================
--- llvm/trunk/test/Analysis/CostModel/X86/vshift-lshr-cost.ll (original)
+++ llvm/trunk/test/Analysis/CostModel/X86/vshift-lshr-cost.ll Sun Jan 8 07:12:03 2017
@@ -501,8 +501,7 @@ define <2 x i64> @splatconstant_shift_v2
; AVX: Found an estimated cost of 1 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; AVX512: Found an estimated cost of 1 for instruction: %shift
-; XOPAVX: Found an estimated cost of 2 for instruction: %shift
-; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
+; XOP: Found an estimated cost of 1 for instruction: %shift
%shift = lshr <2 x i64> %a, <i64 7, i64 7>
ret <2 x i64> %shift
}
@@ -540,8 +539,7 @@ define <4 x i32> @splatconstant_shift_v4
; AVX: Found an estimated cost of 1 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; AVX512: Found an estimated cost of 1 for instruction: %shift
-; XOPAVX: Found an estimated cost of 2 for instruction: %shift
-; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
+; XOP: Found an estimated cost of 1 for instruction: %shift
%shift = lshr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
ret <4 x i32> %shift
}
@@ -579,7 +577,7 @@ define <8 x i16> @splatconstant_shift_v8
; AVX: Found an estimated cost of 1 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; AVX512: Found an estimated cost of 1 for instruction: %shift
-; XOP: Found an estimated cost of 2 for instruction: %shift
+; XOP: Found an estimated cost of 1 for instruction: %shift
%shift = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
ret <8 x i16> %shift
}
More information about the llvm-commits
mailing list