[llvm] r291364 - [CostModel][X86] Generalized cost calculation of SHL by constant -> MUL conversion.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat Jan 7 13:33:00 PST 2017
Author: rksimon
Date: Sat Jan 7 15:33:00 2017
New Revision: 291364
URL: http://llvm.org/viewvc/llvm-project?rev=291364&view=rev
Log:
[CostModel][X86] Generalized cost calculation of SHL by constant -> MUL conversion.
Modified:
llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
Modified: llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp?rev=291364&r1=291363&r2=291364&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp Sat Jan 7 15:33:00 2017
@@ -409,21 +409,9 @@ int X86TTIImpl::getArithmeticInstrCost(
Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) {
MVT VT = LT.second;
// Vector shift left by non uniform constant can be lowered
- // into vector multiply (pmullw/pmulld).
- if ((VT == MVT::v8i16 && ST->hasSSE2()) ||
- (VT == MVT::v4i32 && ST->hasSSE41()))
- return LT.first;
-
- // v16i16 and v8i32 shifts by non-uniform constants are lowered into a
- // sequence of extract + two vector multiply + insert.
- if ((VT == MVT::v8i32 || VT == MVT::v16i16) &&
- (ST->hasAVX() && !ST->hasAVX2()))
- ISD = ISD::MUL;
-
- // A vector shift left by non uniform constant is converted
- // into a vector multiply; the new multiply is eventually
- // lowered into a sequence of shuffles and 2 x pmuludq.
- if (VT == MVT::v4i32 && ST->hasSSE2())
+ // into vector multiply.
+ if (((VT == MVT::v8i16 || VT == MVT::v4i32) && ST->hasSSE2()) ||
+ ((VT == MVT::v16i16 || VT == MVT::v8i32) && ST->hasAVX()))
ISD = ISD::MUL;
}
@@ -534,6 +522,7 @@ int X86TTIImpl::getArithmeticInstrCost(
{ ISD::SRA, MVT::v4i64, 2*12 }, // srl/xor/sub sequence.
{ ISD::MUL, MVT::v16i8, 12 }, // extend/pmullw/trunc sequence.
+ { ISD::MUL, MVT::v8i16, 1 }, // pmullw
{ ISD::MUL, MVT::v4i32, 6 }, // 3*pmuludq/4*shuffle
{ ISD::MUL, MVT::v2i64, 8 }, // 3*pmuludq/3*shift/2*add
@@ -549,13 +538,13 @@ int X86TTIImpl::getArithmeticInstrCost(
// generally a bad idea. Assume somewhat arbitrarily that we have to be able
// to hide "20 cycles" for each lane.
{ ISD::SDIV, MVT::v16i8, 16*20 },
- { ISD::SDIV, MVT::v8i16, 8*20 },
- { ISD::SDIV, MVT::v4i32, 4*20 },
- { ISD::SDIV, MVT::v2i64, 2*20 },
+ { ISD::SDIV, MVT::v8i16, 8*20 },
+ { ISD::SDIV, MVT::v4i32, 4*20 },
+ { ISD::SDIV, MVT::v2i64, 2*20 },
{ ISD::UDIV, MVT::v16i8, 16*20 },
- { ISD::UDIV, MVT::v8i16, 8*20 },
- { ISD::UDIV, MVT::v4i32, 4*20 },
- { ISD::UDIV, MVT::v2i64, 2*20 },
+ { ISD::UDIV, MVT::v8i16, 8*20 },
+ { ISD::UDIV, MVT::v4i32, 4*20 },
+ { ISD::UDIV, MVT::v2i64, 2*20 },
};
if (ST->hasSSE2())
More information about the llvm-commits
mailing list