[PATCH] D109417: Cost model for VPMemory operations on PowerPC.

Bardia Mahjour via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 26 13:19:48 PST 2021


bmahjour updated this revision to Diff 390113.
bmahjour added a comment.

Made the following changes:

- Made `hasActiveVectorLength()` return false for anything other than load/stores.
- Only halve the cost of pipeline flush for 8-byte aligned accesses. For smaller alignments we consider the full cost.
- Replaced the calculation of the scalarization cost with `getMaskedMemoryOpCost()`.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D109417/new/

https://reviews.llvm.org/D109417

Files:
  llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
  llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h


Index: llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
===================================================================
--- llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -137,6 +137,17 @@
   bool areFunctionArgsABICompatible(const Function *Caller,
                                     const Function *Callee,
                                     SmallPtrSetImpl<Argument *> &Args) const;
+  bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
+                             Align Alignment) const;
+  InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
+                                    unsigned AddressSpace,
+                                    TTI::TargetCostKind CostKind,
+                                    const Instruction *I = nullptr);
+
+private:
+  // The following constant is used for estimating costs on power9.
+  static const InstructionCost::CostType P9PipelineFlushEstimate = 80;
+
   /// @}
 };
 
Index: llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
===================================================================
--- llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -1337,3 +1337,71 @@
 
   return false;
 }
+
+bool PPCTTIImpl::hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const {
+  // Only load and stores instructions can have variable vector length on Power.
+  if (Opcode != Instruction::Load && Opcode != Instruction::Store)
+    return false;
+  // Loads/stores with length instructions use bits 0-7 of the GPR operand and
+  // therefore cannot be used in 32-bit mode.
+  if ((!ST->hasP9Vector() && !ST->hasP10Vector()) || !ST->isPPC64())
+    return false;
+  if (auto *VecTy = dyn_cast<FixedVectorType>(DataType)) {
+    unsigned VecWidth = DataType->getPrimitiveSizeInBits();
+    return VecWidth == 128;
+  }
+  Type *ScalarTy = DataType->getScalarType();
+
+  if (ScalarTy->isPointerTy())
+    return true;
+
+  if (ScalarTy->isFloatTy() || ScalarTy->isDoubleTy())
+    return true;
+
+  if (!ScalarTy->isIntegerTy())
+    return false;
+
+  unsigned IntWidth = ScalarTy->getIntegerBitWidth();
+  return IntWidth == 8 || IntWidth == 16 || IntWidth == 32 || IntWidth == 64;
+}
+
+InstructionCost PPCTTIImpl::getVPMemoryOpCost(unsigned Opcode, Type *Src,
+                                              Align Alignment,
+                                              unsigned AddressSpace,
+                                              TTI::TargetCostKind CostKind,
+                                              const Instruction *I) {
+  InstructionCost Cost = BaseT::getVPMemoryOpCost(Opcode, Src, Alignment,
+                                                  AddressSpace, CostKind, I);
+  if (TLI->getValueType(DL, Src, true) == MVT::Other)
+    return Cost;
+  // TODO: Handle other cost kinds.
+  if (CostKind != TTI::TCK_RecipThroughput)
+    return Cost;
+
+  assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
+         "Invalid Opcode");
+
+  auto *SrcVTy = dyn_cast<FixedVectorType>(Src);
+  assert(SrcVTy && "Expected a vector type for VP memory operations");
+
+  if (hasActiveVectorLength(Opcode, Src, Alignment)) {
+    std::pair<InstructionCost, MVT> LT =
+        TLI->getTypeLegalizationCost(DL, SrcVTy);
+    InstructionCost Cost = vectorCostAdjustment(LT.first, Opcode, Src, nullptr);
+
+    // On P9 but not on P10, if the op is misaligned then it will cause a
+    // pipeline flush. Otherwise the VSX masked memops cost the same as unmasked
+    // ones.
+    if (Alignment >= 16 || ST->getCPUDirective() != PPC::DIR_PWR9)
+      return Cost;
+
+    // We assume 8-byte aligned data will actually be 16-byte aligned half
+    // the time, so we halve the flush cost for those cases.
+    return ((Alignment == 8) ? P9PipelineFlushEstimate / 2 : P9PipelineFlushEstimate);
+  }
+
+  // Usually we should not get to this point, but the following is an attempt to
+  // model the cost of legalization. Currently we can only lower intrinsics with
+  // evl but no mask, on Power 9/10. Otherwise, we must scalarize.
+  return getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind);
+}


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D109417.390113.patch
Type: text/x-patch
Size: 4269 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20211126/bb01f033/attachment.bin>


More information about the llvm-commits mailing list