[llvm] 8aee783 - [VP] Cost model for VPMemory operations on PowerPC.
Bardia Mahjour via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 7 11:20:19 PST 2021
Author: Bardia Mahjour
Date: 2021-12-07T14:19:09-05:00
New Revision: 8aee78336691d1009cf0a62b806b31c2dc7c2cf3
URL: https://github.com/llvm/llvm-project/commit/8aee78336691d1009cf0a62b806b31c2dc7c2cf3
DIFF: https://github.com/llvm/llvm-project/commit/8aee78336691d1009cf0a62b806b31c2dc7c2cf3.diff
LOG: [VP] Cost model for VPMemory operations on PowerPC.
PPC Implementation of getVPMemoryOpCost and hasActiveVectorLength.
Reviewed By: Roland Froese
Differential Revision: https://reviews.llvm.org/D109417
Added:
Modified:
llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
Removed:
################################################################################
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 5d6f58a77a398..aab6591d5f187 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -1388,3 +1388,86 @@ bool PPCTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
return false;
}
+
+bool PPCTTIImpl::hasActiveVectorLength(unsigned Opcode, Type *DataType,
+ Align Alignment) const {
+ // Only load and stores instructions can have variable vector length on Power.
+ if (Opcode != Instruction::Load && Opcode != Instruction::Store)
+ return false;
+ // Loads/stores with length instructions use bits 0-7 of the GPR operand and
+ // therefore cannot be used in 32-bit mode.
+ if ((!ST->hasP9Vector() && !ST->hasP10Vector()) || !ST->isPPC64())
+ return false;
+ if (auto *VecTy = dyn_cast<FixedVectorType>(DataType)) {
+ unsigned VecWidth = DataType->getPrimitiveSizeInBits();
+ return VecWidth == 128;
+ }
+ Type *ScalarTy = DataType->getScalarType();
+
+ if (ScalarTy->isPointerTy())
+ return true;
+
+ if (ScalarTy->isFloatTy() || ScalarTy->isDoubleTy())
+ return true;
+
+ if (!ScalarTy->isIntegerTy())
+ return false;
+
+ unsigned IntWidth = ScalarTy->getIntegerBitWidth();
+ return IntWidth == 8 || IntWidth == 16 || IntWidth == 32 || IntWidth == 64;
+}
+
+InstructionCost PPCTTIImpl::getVPMemoryOpCost(unsigned Opcode, Type *Src,
+ Align Alignment,
+ unsigned AddressSpace,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I) {
+ InstructionCost Cost = BaseT::getVPMemoryOpCost(Opcode, Src, Alignment,
+ AddressSpace, CostKind, I);
+ if (TLI->getValueType(DL, Src, true) == MVT::Other)
+ return Cost;
+ // TODO: Handle other cost kinds.
+ if (CostKind != TTI::TCK_RecipThroughput)
+ return Cost;
+
+ assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
+ "Invalid Opcode");
+
+ auto *SrcVTy = dyn_cast<FixedVectorType>(Src);
+ assert(SrcVTy && "Expected a vector type for VP memory operations");
+
+ if (hasActiveVectorLength(Opcode, Src, Alignment)) {
+ std::pair<InstructionCost, MVT> LT =
+ TLI->getTypeLegalizationCost(DL, SrcVTy);
+
+ InstructionCost CostFactor =
+ vectorCostAdjustmentFactor(Opcode, Src, nullptr);
+ if (!CostFactor.isValid())
+ return InstructionCost::getMax();
+
+ InstructionCost Cost = LT.first * CostFactor;
+ assert(Cost.isValid() && "Expected valid cost");
+
+ // On P9 but not on P10, if the op is misaligned then it will cause a
+ // pipeline flush. Otherwise the VSX masked memops cost the same as unmasked
+ // ones.
+ const Align DesiredAlignment(16);
+ if (Alignment >= DesiredAlignment || ST->getCPUDirective() != PPC::DIR_PWR9)
+ return Cost;
+
+ // Since alignment may be under estimated, we try to compute the probability
+ // that the actual address is aligned to the desired boundary. For example
+ // an 8-byte aligned load is assumed to be actually 16-byte aligned half the
+ // time, while a 4-byte aligned load has a 25% chance of being 16-byte
+ // aligned.
+ float AlignmentProb = ((float)Alignment.value()) / DesiredAlignment.value();
+ float MisalignmentProb = 1.0 - AlignmentProb;
+ return (MisalignmentProb * P9PipelineFlushEstimate) +
+ (AlignmentProb * *Cost.getValue());
+ }
+
+ // Usually we should not get to this point, but the following is an attempt to
+ // model the cost of legalization. Currently we can only lower intrinsics with
+ // evl but no mask, on Power 9/10. Otherwise, we must scalarize.
+ return getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind);
+}
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 7aeb0c59d5036..ad926705a330a 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -137,6 +137,17 @@ class PPCTTIImpl : public BasicTTIImplBase<PPCTTIImpl> {
bool areFunctionArgsABICompatible(const Function *Caller,
const Function *Callee,
SmallPtrSetImpl<Argument *> &Args) const;
+ bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
+ Align Alignment) const;
+ InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
+ unsigned AddressSpace,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr);
+
+private:
+ // The following constant is used for estimating costs on power9.
+ static const InstructionCost::CostType P9PipelineFlushEstimate = 80;
+
/// @}
};
More information about the llvm-commits
mailing list