[llvm] r291352 - [CostModel][X86] Reordered AVX1 arithmetic cost LUT into descending target order. NFCI.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat Jan 7 09:03:51 PST 2017
Author: rksimon
Date: Sat Jan 7 11:03:51 2017
New Revision: 291352
URL: http://llvm.org/viewvc/llvm-project?rev=291352&view=rev
Log:
[CostModel][X86] Reordered AVX1 arithmetic cost LUT into descending target order. NFCI.
Modified:
llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
Modified: llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp?rev=291352&r1=291351&r2=291352&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp Sat Jan 7 11:03:51 2017
@@ -440,6 +440,33 @@ int X86TTIImpl::getArithmeticInstrCost(
ISD = ISD::MUL;
}
+ static const CostTblEntry AVX1CostTable[] = {
+ // We don't have to scalarize unsupported ops. We can issue two half-sized
+ // operations and we only need to extract the upper YMM half.
+ // Two ops + 1 extract + 1 insert = 4.
+ { ISD::MUL, MVT::v16i16, 4 },
+ { ISD::MUL, MVT::v8i32, 4 },
+ { ISD::SUB, MVT::v32i8, 4 },
+ { ISD::ADD, MVT::v32i8, 4 },
+ { ISD::SUB, MVT::v16i16, 4 },
+ { ISD::ADD, MVT::v16i16, 4 },
+ { ISD::SUB, MVT::v8i32, 4 },
+ { ISD::ADD, MVT::v8i32, 4 },
+ { ISD::SUB, MVT::v4i64, 4 },
+ { ISD::ADD, MVT::v4i64, 4 },
+
+ // A v4i64 multiply is custom lowered as two split v2i64 vectors that then
+ // are lowered as a series of long multiplies(3), shifts(3) and adds(2)
+ // Because we believe v4i64 to be a legal type, we must also include the
+ // extract+insert in the cost table. Therefore, the cost here is 18
+ // instead of 8.
+ { ISD::MUL, MVT::v4i64, 18 },
+ };
+
+ if (ST->hasAVX() && !ST->hasAVX2())
+ if (const auto *Entry = CostTableLookup(AVX1CostTable, ISD, LT.second))
+ return LT.first * Entry->Cost;
+
static const CostTblEntry SSE42CostTable[] = {
{ ISD::FDIV, MVT::f32, 14 }, // Nehalem from http://www.agner.org/
{ ISD::FDIV, MVT::v4f32, 14 }, // Nehalem from http://www.agner.org/
@@ -529,33 +556,6 @@ int X86TTIImpl::getArithmeticInstrCost(
if (const auto *Entry = CostTableLookup(SSE2CostTable, ISD, LT.second))
return LT.first * Entry->Cost;
- static const CostTblEntry AVX1CostTable[] = {
- // We don't have to scalarize unsupported ops. We can issue two half-sized
- // operations and we only need to extract the upper YMM half.
- // Two ops + 1 extract + 1 insert = 4.
- { ISD::MUL, MVT::v16i16, 4 },
- { ISD::MUL, MVT::v8i32, 4 },
- { ISD::SUB, MVT::v32i8, 4 },
- { ISD::ADD, MVT::v32i8, 4 },
- { ISD::SUB, MVT::v16i16, 4 },
- { ISD::ADD, MVT::v16i16, 4 },
- { ISD::SUB, MVT::v8i32, 4 },
- { ISD::ADD, MVT::v8i32, 4 },
- { ISD::SUB, MVT::v4i64, 4 },
- { ISD::ADD, MVT::v4i64, 4 },
- // A v4i64 multiply is custom lowered as two split v2i64 vectors that then
- // are lowered as a series of long multiplies(3), shifts(3) and adds(2)
- // Because we believe v4i64 to be a legal type, we must also include the
- // extract+insert in the cost table. Therefore, the cost here is 18
- // instead of 8.
- { ISD::MUL, MVT::v4i64, 18 },
- };
-
- // Look for AVX1 lowering tricks.
- if (ST->hasAVX() && !ST->hasAVX2())
- if (const auto *Entry = CostTableLookup(AVX1CostTable, ISD, LT.second))
- return LT.first * Entry->Cost;
-
static const CostTblEntry SSE1CostTable[] = {
{ ISD::FDIV, MVT::f32, 17 }, // Pentium III from http://www.agner.org/
{ ISD::FDIV, MVT::v4f32, 34 }, // Pentium III from http://www.agner.org/
More information about the llvm-commits
mailing list