[llvm] 8aee783 - [VP] Cost model for VPMemory operations on PowerPC.

Bardia Mahjour via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 7 11:20:19 PST 2021


Author: Bardia Mahjour
Date: 2021-12-07T14:19:09-05:00
New Revision: 8aee78336691d1009cf0a62b806b31c2dc7c2cf3

URL: https://github.com/llvm/llvm-project/commit/8aee78336691d1009cf0a62b806b31c2dc7c2cf3
DIFF: https://github.com/llvm/llvm-project/commit/8aee78336691d1009cf0a62b806b31c2dc7c2cf3.diff

LOG: [VP] Cost model for VPMemory operations on PowerPC.

PPC Implementation of getVPMemoryOpCost and hasActiveVectorLength.

Reviewed By: Roland Froese

Differential Revision: https://reviews.llvm.org/D109417

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
    llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 5d6f58a77a398..aab6591d5f187 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -1388,3 +1388,86 @@ bool PPCTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
 
   return false;
 }
+
+bool PPCTTIImpl::hasActiveVectorLength(unsigned Opcode, Type *DataType,
+                                       Align Alignment) const {
+  // Only load and stores instructions can have variable vector length on Power.
+  if (Opcode != Instruction::Load && Opcode != Instruction::Store)
+    return false;
+  // Loads/stores with length instructions use bits 0-7 of the GPR operand and
+  // therefore cannot be used in 32-bit mode.
+  if ((!ST->hasP9Vector() && !ST->hasP10Vector()) || !ST->isPPC64())
+    return false;
+  if (auto *VecTy = dyn_cast<FixedVectorType>(DataType)) {
+    unsigned VecWidth = DataType->getPrimitiveSizeInBits();
+    return VecWidth == 128;
+  }
+  Type *ScalarTy = DataType->getScalarType();
+
+  if (ScalarTy->isPointerTy())
+    return true;
+
+  if (ScalarTy->isFloatTy() || ScalarTy->isDoubleTy())
+    return true;
+
+  if (!ScalarTy->isIntegerTy())
+    return false;
+
+  unsigned IntWidth = ScalarTy->getIntegerBitWidth();
+  return IntWidth == 8 || IntWidth == 16 || IntWidth == 32 || IntWidth == 64;
+}
+
+InstructionCost PPCTTIImpl::getVPMemoryOpCost(unsigned Opcode, Type *Src,
+                                              Align Alignment,
+                                              unsigned AddressSpace,
+                                              TTI::TargetCostKind CostKind,
+                                              const Instruction *I) {
+  InstructionCost Cost = BaseT::getVPMemoryOpCost(Opcode, Src, Alignment,
+                                                  AddressSpace, CostKind, I);
+  if (TLI->getValueType(DL, Src, true) == MVT::Other)
+    return Cost;
+  // TODO: Handle other cost kinds.
+  if (CostKind != TTI::TCK_RecipThroughput)
+    return Cost;
+
+  assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
+         "Invalid Opcode");
+
+  auto *SrcVTy = dyn_cast<FixedVectorType>(Src);
+  assert(SrcVTy && "Expected a vector type for VP memory operations");
+
+  if (hasActiveVectorLength(Opcode, Src, Alignment)) {
+    std::pair<InstructionCost, MVT> LT =
+        TLI->getTypeLegalizationCost(DL, SrcVTy);
+
+    InstructionCost CostFactor =
+        vectorCostAdjustmentFactor(Opcode, Src, nullptr);
+    if (!CostFactor.isValid())
+      return InstructionCost::getMax();
+
+    InstructionCost Cost = LT.first * CostFactor;
+    assert(Cost.isValid() && "Expected valid cost");
+
+    // On P9 but not on P10, if the op is misaligned then it will cause a
+    // pipeline flush. Otherwise the VSX masked memops cost the same as unmasked
+    // ones.
+    const Align DesiredAlignment(16);
+    if (Alignment >= DesiredAlignment || ST->getCPUDirective() != PPC::DIR_PWR9)
+      return Cost;
+
+    // Since alignment may be under estimated, we try to compute the probability
+    // that the actual address is aligned to the desired boundary. For example
+    // an 8-byte aligned load is assumed to be actually 16-byte aligned half the
+    // time, while a 4-byte aligned load has a 25% chance of being 16-byte
+    // aligned.
+    float AlignmentProb = ((float)Alignment.value()) / DesiredAlignment.value();
+    float MisalignmentProb = 1.0 - AlignmentProb;
+    return (MisalignmentProb * P9PipelineFlushEstimate) +
+           (AlignmentProb * *Cost.getValue());
+  }
+
+  // Usually we should not get to this point, but the following is an attempt to
+  // model the cost of legalization. Currently we can only lower intrinsics with
+  // evl but no mask, on Power 9/10. Otherwise, we must scalarize.
+  return getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind);
+}

diff  --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 7aeb0c59d5036..ad926705a330a 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -137,6 +137,17 @@ class PPCTTIImpl : public BasicTTIImplBase<PPCTTIImpl> {
   bool areFunctionArgsABICompatible(const Function *Caller,
                                     const Function *Callee,
                                     SmallPtrSetImpl<Argument *> &Args) const;
+  bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
+                             Align Alignment) const;
+  InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
+                                    unsigned AddressSpace,
+                                    TTI::TargetCostKind CostKind,
+                                    const Instruction *I = nullptr);
+
+private:
+  // The following constant is used for estimating costs on power9.
+  static const InstructionCost::CostType P9PipelineFlushEstimate = 80;
+
   /// @}
 };
 


        


More information about the llvm-commits mailing list