[llvm] [PowerPC] cost modeling for length type VP intrinsic load/store (PR #168938)

via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 20 11:51:56 PST 2025


https://github.com/RolandF77 created https://github.com/llvm/llvm-project/pull/168938

Override and fill in the target hooks for PPC that allow opt to cost using length style VP intrinsics for load/store.

>From 2fa5e1ed9a81bd86681a3284eb14d56d0d8c7c93 Mon Sep 17 00:00:00 2001
From: Roland Froese <froese at ca.ibm.com>
Date: Thu, 20 Nov 2025 19:40:16 +0000
Subject: [PATCH] cost modeling for EVL vp.load/vp.store

---
 .../Target/PowerPC/PPCTargetTransformInfo.cpp | 63 +++++++++++++++++++
 .../Target/PowerPC/PPCTargetTransformInfo.h   | 12 ++++
 .../CostModel/PowerPC/ld-st-with-length.ll    | 19 ++++++
 3 files changed, 94 insertions(+)
 create mode 100644 llvm/test/Analysis/CostModel/PowerPC/ld-st-with-length.ll

diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index fbed34277dbab..f17a7a87313c6 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -24,6 +24,10 @@ using namespace llvm;
 
 #define DEBUG_TYPE "ppctti"
 
+static cl::opt<bool> PPCEVL("ppc-evl",
+                            cl::desc("Allow EVL type vp.load/vp.store"),
+                            cl::init(false), cl::Hidden);
+
 static cl::opt<bool> Pwr9EVL("ppc-pwr9-evl",
                              cl::desc("Allow vp.load and vp.store for pwr9"),
                              cl::init(false), cl::Hidden);
@@ -1078,3 +1082,62 @@ PPCTTIImpl::getVPLegalizationStrategy(const VPIntrinsic &PI) const {
 
   return VPLegalization(VPLegalization::Legal, VPLegalization::Legal);
 }
+
+bool PPCTTIImpl::hasActiveVectorLength() const {
+  unsigned CPU =  ST->getCPUDirective();
+  if (!PPCEVL)
+    return false;
+  if (CPU == PPC::DIR_PWR10 || CPU == PPC::DIR_PWR_FUTURE ||
+      (Pwr9EVL && CPU == PPC::DIR_PWR9))
+    return true;
+  return false;
+}
+
+static inline bool isLegalLoadWithLengthType(EVT VT) {
+  if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 &&
+      VT != MVT::i8)
+    return false;
+  return true;
+}
+
+bool PPCTTIImpl::isLegalMaskedLoad(Type *DataType, Align Alignment,
+                                   unsigned AddressSpace) const {
+  if (!hasActiveVectorLength())
+    return false;
+  if (!isLegalLoadWithLengthType(TLI->getValueType(DL, DataType, true)))
+    return false;
+  return true;
+}
+
+bool PPCTTIImpl::isLegalMaskedStore(Type *DataType, Align Alignment,
+                                    unsigned AddressSpace) const {
+  return isLegalMaskedLoad(DataType, Alignment, AddressSpace);
+}
+
+InstructionCost
+PPCTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy,
+                                  Align Alignment,
+                                  unsigned AddressSpace,
+                                  TTI::TargetCostKind CostKind) const {
+  InstructionCost BaseCost =
+      BaseT::getMaskedMemoryOpCost(Opcode, DataTy, Alignment, AddressSpace,
+                                   CostKind);
+
+  if (Opcode != Instruction::Load && Opcode != Instruction::Store)
+    return BaseCost;
+  auto VecTy = dyn_cast<FixedVectorType>(DataTy);
+  if (!VecTy)
+    return BaseCost;
+  if (!isLegalMaskedLoad(VecTy->getScalarType(), Alignment, AddressSpace))
+    return BaseCost;
+  if (VecTy->getPrimitiveSizeInBits() > 128)
+    return BaseCost;
+
+  // Is scalar compare + select + maybe shift + vector load
+  InstructionCost Adj = vectorCostAdjustmentFactor(Opcode, DataTy, nullptr);
+  InstructionCost Cost = 2 + Adj;
+  if (ST->getCPUDirective() != PPC::DIR_PWR_FUTURE ||
+      VecTy->getScalarSizeInBits() != 8)
+    Cost += 1; // need shift
+  return Cost;
+}
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index f80ebdbce7f64..f7fd5da94b1cb 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -153,6 +153,18 @@ class PPCTTIImpl final : public BasicTTIImplBase<PPCTTIImpl> {
   TargetTransformInfo::VPLegalization
   getVPLegalizationStrategy(const VPIntrinsic &PI) const override;
 
+  bool hasActiveVectorLength() const override;
+
+  bool isLegalMaskedStore(Type *DataType, Align Alignment,
+                          unsigned AddressSpace) const override;
+  bool isLegalMaskedLoad(Type *DataType, Align Alignment,
+                         unsigned AddressSpace) const override;
+
+  InstructionCost
+  getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment,
+                        unsigned AddressSpace,
+                        TTI::TargetCostKind CostKind) const override;
+
 private:
   // The following constant is used for estimating costs on power9.
   static const InstructionCost::CostType P9PipelineFlushEstimate = 80;
diff --git a/llvm/test/Analysis/CostModel/PowerPC/ld-st-with-length.ll b/llvm/test/Analysis/CostModel/PowerPC/ld-st-with-length.ll
new file mode 100644
index 0000000000000..df5b5b68677a4
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/PowerPC/ld-st-with-length.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -mcpu=pwr9 -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefix=P9
+; RUN: opt < %s -mcpu=pwr10 -ppc-evl -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefix=P10
+; RUN: opt < %s -mcpu=future -ppc-evl -passes="print<cost-model>" 2>&1 -disable-output  | FileCheck %s --check-prefix=FUTURE
+target datalayout = "e-m:e-Fn32-i64:64-i128:128-n32:64-S128-v256:256:256-v512:512:512"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+define void @bar(ptr %base, <2 x ptr> %base.vec) {
+; P9: cost of 16 for {{.*}} @llvm.masked.load.v2i8.p0
+; P10: cost of 4 for {{.*}} @llvm.masked.load.v2i8.p0
+; FUTURE: cost of 3 for {{.*}} @llvm.masked.load.v2i8.p0
+; P9: cost of 12 for {{.*}} @llvm.masked.store.v2i8.p0
+; P10: cost of 4 for {{.*}} @llvm.masked.store.v2i8.p0
+; FUTURE: cost of 3 for {{.*}} @llvm.masked.store.v2i8.p0
+  %x2 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr %base, i32 1, <2 x i1> undef, <2 x i8> undef)
+
+  call void @llvm.masked.store.v2i8.p0(<2 x i8> undef, ptr %base, i32 1, <2 x i1> undef)
+
+  ret void
+}



More information about the llvm-commits mailing list