[llvm] r291657 - [X86] updating TTI costs for arithmetic instructions on X86\SLM arch.
Mohammed Agabaria via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 11 00:23:38 PST 2017
Author: magabari
Date: Wed Jan 11 02:23:37 2017
New Revision: 291657
URL: http://llvm.org/viewvc/llvm-project?rev=291657&view=rev
Log:
[X86] updating TTI costs for arithmetic instructions on X86\SLM arch.
updated instructions:
pmulld, pmullw, pmulhw, mulsd, mulps, mulpd, divss, divps, divsd, divpd, addpd and subpd.
special optimization case which replaces pmulld with pmullw\pmulhw\pshuf seq.
In case if the real operands bitwidth <= 16.
Differential Revision: https://reviews.llvm.org/D28104
Added:
llvm/trunk/test/Analysis/CostModel/X86/slm-arith-costs.ll
llvm/trunk/test/Transforms/LoopVectorize/X86/mul_slm_16bit.ll
Modified:
llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h
llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h
llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h
llvm/trunk/lib/Analysis/CostModel.cpp
llvm/trunk/lib/Analysis/TargetTransformInfo.cpp
llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.h
llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp
llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.h
llvm/trunk/lib/Target/Lanai/LanaiTargetTransformInfo.h
llvm/trunk/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
llvm/trunk/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.h
llvm/trunk/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
llvm/trunk/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
llvm/trunk/lib/Target/X86/X86TargetTransformInfo.h
llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
Modified: llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h?rev=291657&r1=291656&r2=291657&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h (original)
+++ llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h Wed Jan 11 02:23:37 2017
@@ -518,11 +518,15 @@ public:
unsigned getMaxInterleaveFactor(unsigned VF) const;
/// \return The expected cost of arithmetic ops, such as mul, xor, fsub, etc.
+ /// \p Args is an optional argument which holds the instruction operands
+ /// values so the TTI can analyize those values searching for special
+ /// cases\optimizations based on those values.
int getArithmeticInstrCost(
unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
OperandValueKind Opd2Info = OK_AnyValue,
OperandValueProperties Opd1PropInfo = OP_None,
- OperandValueProperties Opd2PropInfo = OP_None) const;
+ OperandValueProperties Opd2PropInfo = OP_None,
+ ArrayRef<const Value *> Args = ArrayRef<const Value *>()) const;
/// \return The cost of a shuffle instruction of kind Kind and of type Tp.
/// The index and subtype parameters are used by the subvector insertion and
@@ -763,7 +767,8 @@ public:
getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
OperandValueKind Opd2Info,
OperandValueProperties Opd1PropInfo,
- OperandValueProperties Opd2PropInfo) = 0;
+ OperandValueProperties Opd2PropInfo,
+ ArrayRef<const Value *> Args) = 0;
virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) = 0;
virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) = 0;
@@ -984,9 +989,10 @@ public:
getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
OperandValueKind Opd2Info,
OperandValueProperties Opd1PropInfo,
- OperandValueProperties Opd2PropInfo) override {
+ OperandValueProperties Opd2PropInfo,
+ ArrayRef<const Value *> Args) override {
return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
- Opd1PropInfo, Opd2PropInfo);
+ Opd1PropInfo, Opd2PropInfo, Args);
}
int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) override {
Modified: llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h?rev=291657&r1=291656&r2=291657&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h (original)
+++ llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h Wed Jan 11 02:23:37 2017
@@ -306,7 +306,8 @@ public:
TTI::OperandValueKind Opd1Info,
TTI::OperandValueKind Opd2Info,
TTI::OperandValueProperties Opd1PropInfo,
- TTI::OperandValueProperties Opd2PropInfo) {
+ TTI::OperandValueProperties Opd2PropInfo,
+ ArrayRef<const Value *> Args) {
return 1;
}
@@ -427,6 +428,63 @@ public:
return VF;
}
protected:
+ // Obtain the minimum required size to hold the value (without the sign)
+ // In case of a vector it returns the min required size for one element.
+ unsigned minRequiredElementSize(const Value* Val, bool &isSigned) {
+ if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) {
+ const auto* VectorValue = cast<Constant>(Val);
+
+ // In case of a vector need to pick the max between the min
+ // required size for each element
+ auto *VT = cast<VectorType>(Val->getType());
+
+ // Assume unsigned elements
+ isSigned = false;
+
+ // The max required size is the total vector width divided by num
+ // of elements in the vector
+ unsigned MaxRequiredSize = VT->getBitWidth() / VT->getNumElements();
+
+ unsigned MinRequiredSize = 0;
+ for(unsigned i = 0, e = VT->getNumElements(); i < e; ++i) {
+ if (auto* IntElement =
+ dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) {
+ bool signedElement = IntElement->getValue().isNegative();
+ // Get the element min required size.
+ unsigned ElementMinRequiredSize =
+ IntElement->getValue().getMinSignedBits() - 1;
+ // In case one element is signed then all the vector is signed.
+ isSigned |= signedElement;
+ // Save the max required bit size between all the elements.
+ MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize);
+ }
+ else {
+ // not an int constant element
+ return MaxRequiredSize;
+ }
+ }
+ return MinRequiredSize;
+ }
+
+ if (const auto* CI = dyn_cast<ConstantInt>(Val)) {
+ isSigned = CI->getValue().isNegative();
+ return CI->getValue().getMinSignedBits() - 1;
+ }
+
+ if (const auto* Cast = dyn_cast<SExtInst>(Val)) {
+ isSigned = true;
+ return Cast->getSrcTy()->getScalarSizeInBits() - 1;
+ }
+
+ if (const auto* Cast = dyn_cast<ZExtInst>(Val)) {
+ isSigned = false;
+ return Cast->getSrcTy()->getScalarSizeInBits();
+ }
+
+ isSigned = false;
+ return Val->getType()->getScalarSizeInBits();
+ }
+
bool isStridedAccess(const SCEV *Ptr) {
return Ptr && isa<SCEVAddRecExpr>(Ptr);
}
Modified: llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h?rev=291657&r1=291656&r2=291657&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h (original)
+++ llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h Wed Jan 11 02:23:37 2017
@@ -308,7 +308,8 @@ public:
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
- TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None) {
+ TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
+ ArrayRef<const Value *> Args = ArrayRef<const Value *>()) {
// Check if any of the operands are vector operands.
const TargetLoweringBase *TLI = getTLI();
int ISD = TLI->InstructionOpcodeToISD(Opcode);
Modified: llvm/trunk/lib/Analysis/CostModel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/CostModel.cpp?rev=291657&r1=291656&r2=291657&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/CostModel.cpp (original)
+++ llvm/trunk/lib/Analysis/CostModel.cpp Wed Jan 11 02:23:37 2017
@@ -438,8 +438,11 @@ unsigned CostModelAnalysis::getInstructi
getOperandInfo(I->getOperand(0));
TargetTransformInfo::OperandValueKind Op2VK =
getOperandInfo(I->getOperand(1));
+ SmallVector<const Value*, 2> Operands(I->operand_values());
return TTI->getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK,
- Op2VK);
+ Op2VK, TargetTransformInfo::OP_None,
+ TargetTransformInfo::OP_None,
+ Operands);
}
case Instruction::Select: {
const SelectInst *SI = cast<SelectInst>(I);
Modified: llvm/trunk/lib/Analysis/TargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/TargetTransformInfo.cpp?rev=291657&r1=291656&r2=291657&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/TargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Analysis/TargetTransformInfo.cpp Wed Jan 11 02:23:37 2017
@@ -277,9 +277,10 @@ unsigned TargetTransformInfo::getMaxInte
int TargetTransformInfo::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
- OperandValueProperties Opd2PropInfo) const {
+ OperandValueProperties Opd2PropInfo,
+ ArrayRef<const Value *> Args) const {
int Cost = TTIImpl->getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
- Opd1PropInfo, Opd2PropInfo);
+ Opd1PropInfo, Opd2PropInfo, Args);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
Modified: llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp?rev=291657&r1=291656&r2=291657&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp Wed Jan 11 02:23:37 2017
@@ -374,7 +374,7 @@ int AArch64TTIImpl::getVectorInstrCost(u
int AArch64TTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
- TTI::OperandValueProperties Opd2PropInfo) {
+ TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args) {
// Legalize the type.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
Modified: llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.h?rev=291657&r1=291656&r2=291657&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.h (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.h Wed Jan 11 02:23:37 2017
@@ -102,7 +102,8 @@ public:
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
- TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
+ TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
+ ArrayRef<const Value *> Args = ArrayRef<const Value *>());
int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr);
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp?rev=291657&r1=291656&r2=291657&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp Wed Jan 11 02:23:37 2017
@@ -110,7 +110,7 @@ unsigned AMDGPUTTIImpl::getMaxInterleave
int AMDGPUTTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
- TTI::OperandValueProperties Opd2PropInfo) {
+ TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args ) {
EVT OrigTy = TLI->getValueType(DL, Ty);
if (!OrigTy.isSimple()) {
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h?rev=291657&r1=291656&r2=291657&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h Wed Jan 11 02:23:37 2017
@@ -83,7 +83,8 @@ public:
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
- TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
+ TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
+ ArrayRef<const Value *> Args = ArrayRef<const Value *>());
unsigned getCFInstrCost(unsigned Opcode);
Modified: llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp?rev=291657&r1=291656&r2=291657&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp Wed Jan 11 02:23:37 2017
@@ -433,7 +433,8 @@ int ARMTTIImpl::getShuffleCost(TTI::Shuf
int ARMTTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
- TTI::OperandValueProperties Opd2PropInfo) {
+ TTI::OperandValueProperties Opd2PropInfo,
+ ArrayRef<const Value *> Args) {
int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
Modified: llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.h?rev=291657&r1=291656&r2=291657&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.h (original)
+++ llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.h Wed Jan 11 02:23:37 2017
@@ -114,7 +114,8 @@ public:
TTI::OperandValueKind Op1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Op2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
- TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
+ TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
+ ArrayRef<const Value *> Args = ArrayRef<const Value *>());
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace);
Modified: llvm/trunk/lib/Target/Lanai/LanaiTargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Lanai/LanaiTargetTransformInfo.h?rev=291657&r1=291656&r2=291657&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Lanai/LanaiTargetTransformInfo.h (original)
+++ llvm/trunk/lib/Target/Lanai/LanaiTargetTransformInfo.h Wed Jan 11 02:23:37 2017
@@ -54,7 +54,8 @@ public:
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
- TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None) {
+ TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
+ ArrayRef<const Value *> Args = ArrayRef<const Value *>()) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
switch (ISD) {
Modified: llvm/trunk/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp?rev=291657&r1=291656&r2=291657&view=diff
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp Wed Jan 11 02:23:37 2017
@@ -115,7 +115,7 @@ bool NVPTXTTIImpl::isSourceOfDivergence(
int NVPTXTTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
- TTI::OperandValueProperties Opd2PropInfo) {
+ TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args) {
// Legalize the type.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
Modified: llvm/trunk/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXTargetTransformInfo.h?rev=291657&r1=291656&r2=291657&view=diff
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTXTargetTransformInfo.h (original)
+++ llvm/trunk/lib/Target/NVPTX/NVPTXTargetTransformInfo.h Wed Jan 11 02:23:37 2017
@@ -54,7 +54,8 @@ public:
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
- TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
+ TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
+ ArrayRef<const Value *> Args = ArrayRef<const Value *>());
void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
};
Modified: llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.cpp?rev=291657&r1=291656&r2=291657&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.cpp Wed Jan 11 02:23:37 2017
@@ -281,7 +281,7 @@ unsigned PPCTTIImpl::getMaxInterleaveFac
int PPCTTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
- TTI::OperandValueProperties Opd2PropInfo) {
+ TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args) {
assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
// Fallback to the default implementation.
Modified: llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.h?rev=291657&r1=291656&r2=291657&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.h (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.h Wed Jan 11 02:23:37 2017
@@ -71,7 +71,8 @@ public:
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
- TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
+ TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
+ ArrayRef<const Value *> Args = ArrayRef<const Value *>());
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
Modified: llvm/trunk/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp?rev=291657&r1=291656&r2=291657&view=diff
==============================================================================
--- llvm/trunk/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp Wed Jan 11 02:23:37 2017
@@ -46,7 +46,7 @@ unsigned WebAssemblyTTIImpl::getRegister
unsigned WebAssemblyTTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
- TTI::OperandValueProperties Opd2PropInfo) {
+ TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args) {
unsigned Cost = BasicTTIImplBase<WebAssemblyTTIImpl>::getArithmeticInstrCost(
Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo);
Modified: llvm/trunk/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h?rev=291657&r1=291656&r2=291657&view=diff
==============================================================================
--- llvm/trunk/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h (original)
+++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h Wed Jan 11 02:23:37 2017
@@ -61,7 +61,8 @@ public:
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
- TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
+ TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
+ ArrayRef<const Value *> Args = ArrayRef<const Value *>());
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
/// @}
Modified: llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp?rev=291657&r1=291656&r2=291657&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp Wed Jan 11 02:23:37 2017
@@ -114,15 +114,62 @@ unsigned X86TTIImpl::getMaxInterleaveFac
}
int X86TTIImpl::getArithmeticInstrCost(
- unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
- TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
- TTI::OperandValueProperties Opd2PropInfo) {
+ unsigned Opcode, Type *Ty,
+ TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info,
+ TTI::OperandValueProperties Opd1PropInfo,
+ TTI::OperandValueProperties Opd2PropInfo,
+ ArrayRef<const Value *> Args) {
// Legalize the type.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
+ static const CostTblEntry SLMCostTable[] = {
+ { ISD::MUL, MVT::v4i32, 11 }, // pmulld
+ { ISD::MUL, MVT::v8i16, 2 }, // pmullw
+ { ISD::MUL, MVT::v16i8, 14 }, // extend/pmullw/trunc sequence.
+ { ISD::FMUL, MVT::f64, 2 }, // mulsd
+ { ISD::FMUL, MVT::v2f64, 4 }, // mulpd
+ { ISD::FMUL, MVT::v4f32, 2 }, // mulps
+ { ISD::FDIV, MVT::f32, 17 }, // divss
+ { ISD::FDIV, MVT::v4f32, 39 }, // divps
+ { ISD::FDIV, MVT::f64, 32 }, // divsd
+ { ISD::FDIV, MVT::v2f64, 69 }, // divpd
+ { ISD::FADD, MVT::v2f64, 2 }, // addpd
+ { ISD::FSUB, MVT::v2f64, 2 }, // subpd
+ // v2i64/v4i64 mul is custom lowered as a series of long
+ // multiplies(3), shifts(3) and adds(2).
+ // slm muldq version throughput is 2
+ { ISD::MUL, MVT::v2i64, 11 },
+ };
+
+ if (ST->isSLM()) {
+ if (Args.size() == 2 && ISD == ISD::MUL && LT.second == MVT::v4i32) {
+ // Check if the operands can be shrinked into a smaller datatype.
+ bool Op1Signed = false;
+ unsigned Op1MinSize = BaseT::minRequiredElementSize(Args[0], Op1Signed);
+ bool Op2Signed = false;
+ unsigned Op2MinSize = BaseT::minRequiredElementSize(Args[1], Op2Signed);
+
+ bool signedMode = Op1Signed | Op2Signed;
+ unsigned OpMinSize = std::max(Op1MinSize, Op2MinSize);
+
+ if (OpMinSize <= 7)
+ return LT.first * 3; // pmullw/sext
+ if (!signedMode && OpMinSize <= 8)
+ return LT.first * 3; // pmullw/zext
+ if (OpMinSize <= 15)
+ return LT.first * 5; // pmullw/pmulhw/pshuf
+ if (!signedMode && OpMinSize <= 16)
+ return LT.first * 5; // pmullw/pmulhw/pshuf
+ }
+ if (const auto *Entry = CostTableLookup(SLMCostTable, ISD,
+ LT.second)) {
+ return LT.first * Entry->Cost;
+ }
+ }
+
if (ISD == ISD::SDIV &&
Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) {
Modified: llvm/trunk/lib/Target/X86/X86TargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86TargetTransformInfo.h?rev=291657&r1=291656&r2=291657&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86TargetTransformInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86TargetTransformInfo.h Wed Jan 11 02:23:37 2017
@@ -60,7 +60,8 @@ public:
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
- TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
+ TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
+ ArrayRef<const Value *> Args = ArrayRef<const Value *>());
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=291657&r1=291656&r2=291657&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Wed Jan 11 02:23:37 2017
@@ -80,6 +80,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/IR/Verifier.h"
@@ -6949,9 +6950,9 @@ unsigned LoopVectorizationCostModel::get
} else if (Legal->isUniform(Op2)) {
Op2VK = TargetTransformInfo::OK_UniformValue;
}
-
- return TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy, Op1VK, Op2VK,
- Op1VP, Op2VP);
+ SmallVector<const Value *, 4> Operands(I->operand_values());
+ return TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy, Op1VK,
+ Op2VK, Op1VP, Op2VP, Operands);
}
case Instruction::Select: {
SelectInst *SI = cast<SelectInst>(I);
Added: llvm/trunk/test/Analysis/CostModel/X86/slm-arith-costs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/slm-arith-costs.ll?rev=291657&view=auto
==============================================================================
--- llvm/trunk/test/Analysis/CostModel/X86/slm-arith-costs.ll (added)
+++ llvm/trunk/test/Analysis/CostModel/X86/slm-arith-costs.ll Wed Jan 11 02:23:37 2017
@@ -0,0 +1,317 @@
+; RUN: opt < %s -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefix=SLM
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; 8bit mul
+define i8 @slm-costs_8_scalar_mul(i8 %a, i8 %b) {
+entry:
+; SLM: cost of 1 {{.*}} mul nsw i8
+ %res = mul nsw i8 %a, %b
+ ret i8 %res
+}
+
+define <2 x i8> @slm-costs_8_v2_mul(<2 x i8> %a, <2 x i8> %b) {
+entry:
+; SLM: cost of 11 {{.*}} mul nsw <2 x i8>
+ %res = mul nsw <2 x i8> %a, %b
+ ret <2 x i8> %res
+}
+
+define <4 x i8> @slm-costs_8_v4_mul(<4 x i8> %a, <4 x i8> %b) {
+entry:
+; SLM: cost of 3 {{.*}} mul nsw <4 x i8>
+ %res = mul nsw <4 x i8> %a, %b
+ ret <4 x i8> %res
+}
+
+define <4 x i32> @slm-costs_8_v4_zext_mul(<4 x i8> %a) {
+entry:
+; SLM: cost of 3 {{.*}} mul nsw <4 x i32>
+ %zext = zext <4 x i8> %a to <4 x i32>
+ %res = mul nsw <4 x i32> %zext, <i32 255, i32 255, i32 255, i32 255>
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @slm-costs_8_v4_zext_mul_fail(<4 x i8> %a) {
+entry:
+; SLM: cost of 5 {{.*}} mul nsw <4 x i32>
+ %zext = zext <4 x i8> %a to <4 x i32>
+ %res = mul nsw <4 x i32> %zext, <i32 255, i32 255, i32 -1, i32 255>
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @slm-costs_8_v4_zext_mul_fail_2(<4 x i8> %a) {
+entry:
+; SLM: cost of 5 {{.*}} mul nsw <4 x i32>
+ %zext = zext <4 x i8> %a to <4 x i32>
+ %res = mul nsw <4 x i32> %zext, <i32 255, i32 256, i32 255, i32 255>
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @slm-costs_8_v4_sext_mul(<4 x i8> %a) {
+entry:
+; SLM: cost of 3 {{.*}} mul nsw <4 x i32>
+ %sext = sext <4 x i8> %a to <4 x i32>
+ %res = mul nsw <4 x i32> %sext, <i32 127, i32 -128, i32 127, i32 -128>
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @slm-costs_8_v4_sext_mul_fail(<4 x i8> %a) {
+entry:
+; SLM: cost of 5 {{.*}} mul nsw <4 x i32>
+ %sext = sext <4 x i8> %a to <4 x i32>
+ %res = mul nsw <4 x i32> %sext, <i32 127, i32 -128, i32 128, i32 -128>
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @slm-costs_8_v4_sext_mul_fail_2(<4 x i8> %a) {
+entry:
+; SLM: cost of 5 {{.*}} mul nsw <4 x i32>
+ %sext = sext <4 x i8> %a to <4 x i32>
+ %res = mul nsw <4 x i32> %sext, <i32 127, i32 -129, i32 127, i32 -128>
+ ret <4 x i32> %res
+}
+
+define <8 x i8> @slm-costs_8_v8_mul(<8 x i8> %a, <8 x i8> %b) {
+entry:
+; SLM: cost of 2 {{.*}} mul nsw <8 x i8>
+ %res = mul nsw <8 x i8> %a, %b
+ ret <8 x i8> %res
+}
+
+define <16 x i8> @slm-costs_8_v16_mul(<16 x i8> %a, <16 x i8> %b) {
+entry:
+; SLM: cost of 14 {{.*}} mul nsw <16 x i8>
+ %res = mul nsw <16 x i8> %a, %b
+ ret <16 x i8> %res
+}
+
+; 16bit mul
+define i16 @slm-costs_16_scalar_mul(i16 %a, i16 %b) {
+entry:
+; SLM: cost of 1 {{.*}} mul nsw i16
+ %res = mul nsw i16 %a, %b
+ ret i16 %res
+}
+
+define <2 x i16> @slm-costs_16_v2_mul(<2 x i16> %a, <2 x i16> %b) {
+entry:
+; SLM: cost of 11 {{.*}} mul nsw <2 x i16>
+ %res = mul nsw <2 x i16> %a, %b
+ ret <2 x i16> %res
+}
+
+define <4 x i16> @slm-costs_16_v4_mul(<4 x i16> %a, <4 x i16> %b) {
+entry:
+; SLM: cost of 5 {{.*}} mul nsw <4 x i16>
+ %res = mul nsw <4 x i16> %a, %b
+ ret <4 x i16> %res
+}
+
+define <4 x i32> @slm-costs_16_v4_zext_mul(<4 x i16> %a) {
+entry:
+; SLM: cost of 5 {{.*}} mul nsw <4 x i32>
+ %zext = zext <4 x i16> %a to <4 x i32>
+ %res = mul nsw <4 x i32> %zext, <i32 65535, i32 65535, i32 65535, i32 65535>
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @slm-costs_16_v4_zext_mul_fail(<4 x i16> %a) {
+entry:
+; SLM: cost of 11 {{.*}} mul nsw <4 x i32>
+ %zext = zext <4 x i16> %a to <4 x i32>
+ %res = mul nsw <4 x i32> %zext, <i32 -1, i32 65535, i32 65535, i32 65535>
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @slm-costs_16_v4_zext_mul_fail_2(<4 x i16> %a) {
+entry:
+; SLM: cost of 11 {{.*}} mul nsw <4 x i32>
+ %zext = zext <4 x i16> %a to <4 x i32>
+ %res = mul nsw <4 x i32> %zext, <i32 65536, i32 65535, i32 65535, i32 65535>
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @slm-costs_16_v4_sext_mul(<4 x i16> %a) {
+entry:
+; SLM: cost of 5 {{.*}} mul nsw <4 x i32>
+ %sext = sext <4 x i16> %a to <4 x i32>
+ %res = mul nsw <4 x i32> %sext, <i32 32767, i32 -32768, i32 32767, i32 -32768>
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @slm-costs_16_v4_sext_mul_fail(<4 x i16> %a) {
+entry:
+; SLM: cost of 11 {{.*}} mul nsw <4 x i32>
+ %sext = sext <4 x i16> %a to <4 x i32>
+ %res = mul nsw <4 x i32> %sext, <i32 32767, i32 -32768, i32 32768, i32 -32768>
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @slm-costs_16_v4_sext_mul_fail_2(<4 x i16> %a) {
+entry:
+; SLM: cost of 11 {{.*}} mul nsw <4 x i32>
+ %sext = sext <4 x i16> %a to <4 x i32>
+ %res = mul nsw <4 x i32> %sext, <i32 32767, i32 -32768, i32 32767, i32 -32769>
+ ret <4 x i32> %res
+}
+
+define <8 x i16> @slm-costs_16_v8_mul(<8 x i16> %a, <8 x i16> %b) {
+entry:
+; SLM: cost of 2 {{.*}} mul nsw <8 x i16>
+ %res = mul nsw <8 x i16> %a, %b
+ ret <8 x i16> %res
+}
+
+define <16 x i16> @slm-costs_16_v16_mul(<16 x i16> %a, <16 x i16> %b) {
+entry:
+; SLM: cost of 4 {{.*}} mul nsw <16 x i16>
+ %res = mul nsw <16 x i16> %a, %b
+ ret <16 x i16> %res
+}
+
+; 32bit mul
+define i32 @slm-costs_32_scalar_mul(i32 %a, i32 %b) {
+entry:
+; SLM: cost of 1 {{.*}} mul nsw i32
+ %res = mul nsw i32 %a, %b
+ ret i32 %res
+}
+
+define <2 x i32> @slm-costs_32_v2_mul(<2 x i32> %a, <2 x i32> %b) {
+entry:
+; SLM: cost of 11 {{.*}} mul nsw <2 x i32>
+ %res = mul nsw <2 x i32> %a, %b
+ ret <2 x i32> %res
+}
+
+define <4 x i32> @slm-costs_32_v4_mul(<4 x i32> %a, <4 x i32> %b) {
+entry:
+; SLM: cost of 11 {{.*}} mul nsw <4 x i32>
+ %res = mul nsw <4 x i32> %a, %b
+ ret <4 x i32> %res
+}
+
+define <8 x i32> @slm-costs_32_v8_mul(<8 x i32> %a, <8 x i32> %b) {
+entry:
+; SLM: cost of 22 {{.*}} mul nsw <8 x i32>
+ %res = mul nsw <8 x i32> %a, %b
+ ret <8 x i32> %res
+}
+
+define <16 x i32> @slm-costs_32_v16_mul(<16 x i32> %a, <16 x i32> %b) {
+entry:
+; SLM: cost of 44 {{.*}} mul nsw <16 x i32>
+ %res = mul nsw <16 x i32> %a, %b
+ ret <16 x i32> %res
+}
+
+; 64bit mul
+define i64 @slm-costs_64_scalar_mul(i64 %a, i64 %b) {
+entry:
+; SLM: cost of 1 {{.*}} mul nsw i64
+ %res = mul nsw i64 %a, %b
+ ret i64 %res
+}
+
+define <2 x i64> @slm-costs_64_v2_mul(<2 x i64> %a, <2 x i64> %b) {
+entry:
+; SLM: cost of 11 {{.*}} mul nsw <2 x i64>
+ %res = mul nsw <2 x i64> %a, %b
+ ret <2 x i64> %res
+}
+
+define <4 x i64> @slm-costs_64_v4_mul(<4 x i64> %a, <4 x i64> %b) {
+entry:
+; SLM: cost of 22 {{.*}} mul nsw <4 x i64>
+ %res = mul nsw <4 x i64> %a, %b
+ ret <4 x i64> %res
+}
+
+define <8 x i64> @slm-costs_64_v8_mul(<8 x i64> %a, <8 x i64> %b) {
+entry:
+; SLM: cost of 44 {{.*}} mul nsw <8 x i64>
+ %res = mul nsw <8 x i64> %a, %b
+ ret <8 x i64> %res
+}
+
+define <16 x i64> @slm-costs_64_v16_mul(<16 x i64> %a, <16 x i64> %b) {
+entry:
+; SLM: cost of 88 {{.*}} mul nsw <16 x i64>
+ %res = mul nsw <16 x i64> %a, %b
+ ret <16 x i64> %res
+}
+
+; mulsd
+define double @slm-costs_mulsd(double %a, double %b) {
+entry:
+; SLM: cost of 2 {{.*}} fmul double
+ %res = fmul double %a, %b
+ ret double %res
+}
+
+; mulpd
+define <2 x double> @slm-costs_mulpd(<2 x double> %a, <2 x double> %b) {
+entry:
+; SLM: cost of 4 {{.*}} fmul <2 x double>
+ %res = fmul <2 x double> %a, %b
+ ret <2 x double> %res
+}
+
+; mulps
+define <4 x float> @slm-costs_mulps(<4 x float> %a, <4 x float> %b) {
+entry:
+; SLM: cost of 2 {{.*}} fmul <4 x float>
+ %res = fmul <4 x float> %a, %b
+ ret <4 x float> %res
+}
+
+; divss
+define float @slm-costs_divss(float %a, float %b) {
+entry:
+; SLM: cost of 17 {{.*}} fdiv float
+ %res = fdiv float %a, %b
+ ret float %res
+}
+
+; divps
+define <4 x float> @slm-costs_divps(<4 x float> %a, <4 x float> %b) {
+entry:
+; SLM: cost of 39 {{.*}} fdiv <4 x float>
+ %res = fdiv <4 x float> %a, %b
+ ret <4 x float> %res
+}
+
+; divsd
+define double @slm-costs_divsd(double %a, double %b) {
+entry:
+; SLM: cost of 32 {{.*}} fdiv double
+ %res = fdiv double %a, %b
+ ret double %res
+}
+
+; divpd
+define <2 x double> @slm-costs_divpd(<2 x double> %a, <2 x double> %b) {
+entry:
+; SLM: cost of 69 {{.*}} fdiv <2 x double>
+ %res = fdiv <2 x double> %a, %b
+ ret <2 x double> %res
+}
+
+; addpd
+define <2 x double> @slm-costs_addpd(<2 x double> %a, <2 x double> %b) {
+entry:
+; SLM: cost of 2 {{.*}} fadd <2 x double>
+ %res = fadd <2 x double> %a, %b
+ ret <2 x double> %res
+}
+
+; subpd
+define <2 x double> @slm-costs_subpd(<2 x double> %a, <2 x double> %b) {
+entry:
+; SLM: cost of 2 {{.*}} fsub <2 x double>
+ %res = fsub <2 x double> %a, %b
+ ret <2 x double> %res
+}
+
Added: llvm/trunk/test/Transforms/LoopVectorize/X86/mul_slm_16bit.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/X86/mul_slm_16bit.ll?rev=291657&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/X86/mul_slm_16bit.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/X86/mul_slm_16bit.ll Wed Jan 11 02:23:37 2017
@@ -0,0 +1,144 @@
+; RUN: opt < %s -S -debug -loop-vectorize -mcpu=slm 2>&1 | FileCheck %s --check-prefix=SLM
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i8 @mul_i8(i8* %dataA, i8* %dataB, i32 %N) {
+entry:
+ %cmp12 = icmp eq i32 %N, 0
+ br i1 %cmp12, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ %wide.trip.count = zext i32 %N to i64
+ br label %for.body
+
+for.cond.cleanup.loopexit: ; preds = %for.body
+ %phitmp = trunc i32 %add4 to i8
+ br label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
+ %acc.0.lcssa = phi i8 [ 0, %entry ], [ %phitmp, %for.cond.cleanup.loopexit ]
+ ret i8 %acc.0.lcssa
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+ %acc.013 = phi i32 [ %add4, %for.body ], [ 0, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds i8, i8* %dataA, i64 %indvars.iv
+ %0 = load i8, i8* %arrayidx, align 1
+ %conv = sext i8 %0 to i32
+ %arrayidx2 = getelementptr inbounds i8, i8* %dataB, i64 %indvars.iv
+ %1 = load i8, i8* %arrayidx2, align 1
+ %conv3 = sext i8 %1 to i32
+; sources of the mul is sext\sext from i8
+; use pmullw\sext seq.
+; SLM: cost of 3 for VF 4 {{.*}} mul nsw i32
+ %mul = mul nsw i32 %conv3, %conv
+; sources of the mul is zext\sext from i8
+; use pmulhw\pmullw\pshuf
+; SLM: cost of 5 for VF 4 {{.*}} mul nsw i32
+ %conv4 = zext i8 %1 to i32
+ %mul2 = mul nsw i32 %conv4, %conv
+ %sum0 = add i32 %mul, %mul2
+; sources of the mul is zext\zext from i8
+; use pmullw\zext
+; SLM: cost of 3 for VF 4 {{.*}} mul nsw i32
+ %conv5 = zext i8 %0 to i32
+ %mul3 = mul nsw i32 %conv5, %conv4
+ %sum1 = add i32 %sum0, %mul3
+; sources of the mul is sext\-120
+; use pmullw\sext
+; SLM: cost of 3 for VF 4 {{.*}} mul nsw i32
+ %mul4 = mul nsw i32 -120, %conv3
+ %sum2 = add i32 %sum1, %mul4
+; sources of the mul is sext\250
+; use pmulhw\pmullw\pshuf
+; SLM: cost of 5 for VF 4 {{.*}} mul nsw i32
+ %mul5 = mul nsw i32 250, %conv3
+ %sum3 = add i32 %sum2, %mul5
+; sources of the mul is zext\-120
+; use pmulhw\pmullw\pshuf
+; SLM: cost of 5 for VF 4 {{.*}} mul nsw i32
+ %mul6 = mul nsw i32 -120, %conv4
+ %sum4 = add i32 %sum3, %mul6
+; sources of the mul is zext\250
+; use pmullw\zext
+; SLM: cost of 3 for VF 4 {{.*}} mul nsw i32
+ %mul7 = mul nsw i32 250, %conv4
+ %sum5 = add i32 %sum4, %mul7
+ %add = add i32 %acc.013, 5
+ %add4 = add i32 %add, %sum5
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
+ br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
+}
+
+define i16 @mul_i16(i16* %dataA, i16* %dataB, i32 %N) {
+entry:
+ %cmp12 = icmp eq i32 %N, 0
+ br i1 %cmp12, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ %wide.trip.count = zext i32 %N to i64
+ br label %for.body
+
+for.cond.cleanup.loopexit: ; preds = %for.body
+ %phitmp = trunc i32 %add4 to i16
+ br label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
+ %acc.0.lcssa = phi i16 [ 0, %entry ], [ %phitmp, %for.cond.cleanup.loopexit ]
+ ret i16 %acc.0.lcssa
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+ %acc.013 = phi i32 [ %add4, %for.body ], [ 0, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds i16, i16* %dataA, i64 %indvars.iv
+ %0 = load i16, i16* %arrayidx, align 1
+ %conv = sext i16 %0 to i32
+ %arrayidx2 = getelementptr inbounds i16, i16* %dataB, i64 %indvars.iv
+ %1 = load i16, i16* %arrayidx2, align 1
+ %conv3 = sext i16 %1 to i32
+; sources of the mul is sext\sext from i16
+; use pmulhw\pmullw\pshuf seq.
+; SLM: cost of 5 for VF 4 {{.*}} mul nsw i32
+ %mul = mul nsw i32 %conv3, %conv
+; sources of the mul is zext\sext from i16
+; use pmulld
+; SLM: cost of 11 for VF 4 {{.*}} mul nsw i32
+ %conv4 = zext i16 %1 to i32
+ %mul2 = mul nsw i32 %conv4, %conv
+ %sum0 = add i32 %mul, %mul2
+; sources of the mul is zext\zext from i16
+; use pmulhw\pmullw\zext
+; SLM: cost of 5 for VF 4 {{.*}} mul nsw i32
+ %conv5 = zext i16 %0 to i32
+ %mul3 = mul nsw i32 %conv5, %conv4
+ %sum1 = add i32 %sum0, %mul3
+; sources of the mul is sext\-32000
+; use pmulhw\pmullw\sext
+; SLM: cost of 5 for VF 4 {{.*}} mul nsw i32
+ %mul4 = mul nsw i32 -32000, %conv3
+ %sum2 = add i32 %sum1, %mul4
+; sources of the mul is sext\64000
+; use pmulld
+; SLM: cost of 11 for VF 4 {{.*}} mul nsw i32
+ %mul5 = mul nsw i32 64000, %conv3
+ %sum3 = add i32 %sum2, %mul5
+; sources of the mul is zext\-32000
+; use pmulld
+; SLM: cost of 11 for VF 4 {{.*}} mul nsw i32
+ %mul6 = mul nsw i32 -32000, %conv4
+ %sum4 = add i32 %sum3, %mul6
+; sources of the mul is zext\64000
+; use pmulhw\pmullw\zext
+; SLM: cost of 5 for VF 4 {{.*}} mul nsw i32
+ %mul7 = mul nsw i32 250, %conv4
+ %sum5 = add i32 %sum4, %mul7
+ %add = add i32 %acc.013, 5
+ %add4 = add i32 %add, %sum5
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
+ br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
+}
+
+
More information about the llvm-commits
mailing list