[llvm] [PowerPC] cost modeling for length type VP intrinsic load/store (PR #168938)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 20 11:51:56 PST 2025
https://github.com/RolandF77 created https://github.com/llvm/llvm-project/pull/168938
Override and fill in the target hooks for PPC that allow opt to cost using length style VP intrinsics for load/store.
>From 2fa5e1ed9a81bd86681a3284eb14d56d0d8c7c93 Mon Sep 17 00:00:00 2001
From: Roland Froese <froese at ca.ibm.com>
Date: Thu, 20 Nov 2025 19:40:16 +0000
Subject: [PATCH] cost modeling for EVL vp.load/vp.store
---
.../Target/PowerPC/PPCTargetTransformInfo.cpp | 63 +++++++++++++++++++
.../Target/PowerPC/PPCTargetTransformInfo.h | 12 ++++
.../CostModel/PowerPC/ld-st-with-length.ll | 19 ++++++
3 files changed, 94 insertions(+)
create mode 100644 llvm/test/Analysis/CostModel/PowerPC/ld-st-with-length.ll
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index fbed34277dbab..f17a7a87313c6 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -24,6 +24,10 @@ using namespace llvm;
#define DEBUG_TYPE "ppctti"
+static cl::opt<bool> PPCEVL("ppc-evl",
+ cl::desc("Allow EVL type vp.load/vp.store"),
+ cl::init(false), cl::Hidden);
+
static cl::opt<bool> Pwr9EVL("ppc-pwr9-evl",
cl::desc("Allow vp.load and vp.store for pwr9"),
cl::init(false), cl::Hidden);
@@ -1078,3 +1082,62 @@ PPCTTIImpl::getVPLegalizationStrategy(const VPIntrinsic &PI) const {
return VPLegalization(VPLegalization::Legal, VPLegalization::Legal);
}
+
+bool PPCTTIImpl::hasActiveVectorLength() const {
+ unsigned CPU = ST->getCPUDirective();
+ if (!PPCEVL)
+ return false;
+ if (CPU == PPC::DIR_PWR10 || CPU == PPC::DIR_PWR_FUTURE ||
+ (Pwr9EVL && CPU == PPC::DIR_PWR9))
+ return true;
+ return false;
+}
+
+static inline bool isLegalLoadWithLengthType(EVT VT) {
+ if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 &&
+ VT != MVT::i8)
+ return false;
+ return true;
+}
+
+bool PPCTTIImpl::isLegalMaskedLoad(Type *DataType, Align Alignment,
+ unsigned AddressSpace) const {
+ if (!hasActiveVectorLength())
+ return false;
+ if (!isLegalLoadWithLengthType(TLI->getValueType(DL, DataType, true)))
+ return false;
+ return true;
+}
+
+bool PPCTTIImpl::isLegalMaskedStore(Type *DataType, Align Alignment,
+ unsigned AddressSpace) const {
+ return isLegalMaskedLoad(DataType, Alignment, AddressSpace);
+}
+
+InstructionCost
+PPCTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy,
+ Align Alignment,
+ unsigned AddressSpace,
+ TTI::TargetCostKind CostKind) const {
+ InstructionCost BaseCost =
+ BaseT::getMaskedMemoryOpCost(Opcode, DataTy, Alignment, AddressSpace,
+ CostKind);
+
+ if (Opcode != Instruction::Load && Opcode != Instruction::Store)
+ return BaseCost;
+ auto VecTy = dyn_cast<FixedVectorType>(DataTy);
+ if (!VecTy)
+ return BaseCost;
+ if (!isLegalMaskedLoad(VecTy->getScalarType(), Alignment, AddressSpace))
+ return BaseCost;
+ if (VecTy->getPrimitiveSizeInBits() > 128)
+ return BaseCost;
+
+ // Is scalar compare + select + maybe shift + vector load
+ InstructionCost Adj = vectorCostAdjustmentFactor(Opcode, DataTy, nullptr);
+ InstructionCost Cost = 2 + Adj;
+ if (ST->getCPUDirective() != PPC::DIR_PWR_FUTURE ||
+ VecTy->getScalarSizeInBits() != 8)
+ Cost += 1; // need shift
+ return Cost;
+}
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index f80ebdbce7f64..f7fd5da94b1cb 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -153,6 +153,18 @@ class PPCTTIImpl final : public BasicTTIImplBase<PPCTTIImpl> {
TargetTransformInfo::VPLegalization
getVPLegalizationStrategy(const VPIntrinsic &PI) const override;
+ bool hasActiveVectorLength() const override;
+
+ bool isLegalMaskedStore(Type *DataType, Align Alignment,
+ unsigned AddressSpace) const override;
+ bool isLegalMaskedLoad(Type *DataType, Align Alignment,
+ unsigned AddressSpace) const override;
+
+ InstructionCost
+ getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment,
+ unsigned AddressSpace,
+ TTI::TargetCostKind CostKind) const override;
+
private:
// The following constant is used for estimating costs on power9.
static const InstructionCost::CostType P9PipelineFlushEstimate = 80;
diff --git a/llvm/test/Analysis/CostModel/PowerPC/ld-st-with-length.ll b/llvm/test/Analysis/CostModel/PowerPC/ld-st-with-length.ll
new file mode 100644
index 0000000000000..df5b5b68677a4
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/PowerPC/ld-st-with-length.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -mcpu=pwr9 -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefix=P9
+; RUN: opt < %s -mcpu=pwr10 -ppc-evl -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefix=P10
+; RUN: opt < %s -mcpu=future -ppc-evl -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefix=FUTURE
+target datalayout = "e-m:e-Fn32-i64:64-i128:128-n32:64-S128-v256:256:256-v512:512:512"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+define void @bar(ptr %base, <2 x ptr> %base.vec) {
+; P9: cost of 16 for {{.*}} @llvm.masked.load.v2i8.p0
+; P10: cost of 4 for {{.*}} @llvm.masked.load.v2i8.p0
+; FUTURE: cost of 3 for {{.*}} @llvm.masked.load.v2i8.p0
+; P9: cost of 12 for {{.*}} @llvm.masked.store.v2i8.p0
+; P10: cost of 4 for {{.*}} @llvm.masked.store.v2i8.p0
+; FUTURE: cost of 3 for {{.*}} @llvm.masked.store.v2i8.p0
+ %x2 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr %base, i32 1, <2 x i1> undef, <2 x i8> undef)
+
+ call void @llvm.masked.store.v2i8.p0(<2 x i8> undef, ptr %base, i32 1, <2 x i1> undef)
+
+ ret void
+}
More information about the llvm-commits
mailing list