[llvm] [PowerPC] cost modeling for length type VP intrinsic load/store (PR #168938)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 3 11:54:23 PST 2025
https://github.com/RolandF77 updated https://github.com/llvm/llvm-project/pull/168938
>From 2fa5e1ed9a81bd86681a3284eb14d56d0d8c7c93 Mon Sep 17 00:00:00 2001
From: Roland Froese <froese at ca.ibm.com>
Date: Thu, 20 Nov 2025 19:40:16 +0000
Subject: [PATCH 1/5] cost modeling for EVL vp.load/vp.store
---
.../Target/PowerPC/PPCTargetTransformInfo.cpp | 63 +++++++++++++++++++
.../Target/PowerPC/PPCTargetTransformInfo.h | 12 ++++
.../CostModel/PowerPC/ld-st-with-length.ll | 19 ++++++
3 files changed, 94 insertions(+)
create mode 100644 llvm/test/Analysis/CostModel/PowerPC/ld-st-with-length.ll
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index fbed34277dbab..f17a7a87313c6 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -24,6 +24,10 @@ using namespace llvm;
#define DEBUG_TYPE "ppctti"
+static cl::opt<bool> PPCEVL("ppc-evl",
+ cl::desc("Allow EVL type vp.load/vp.store"),
+ cl::init(false), cl::Hidden);
+
static cl::opt<bool> Pwr9EVL("ppc-pwr9-evl",
cl::desc("Allow vp.load and vp.store for pwr9"),
cl::init(false), cl::Hidden);
@@ -1078,3 +1082,62 @@ PPCTTIImpl::getVPLegalizationStrategy(const VPIntrinsic &PI) const {
return VPLegalization(VPLegalization::Legal, VPLegalization::Legal);
}
+
+bool PPCTTIImpl::hasActiveVectorLength() const {
+ unsigned CPU = ST->getCPUDirective();
+ if (!PPCEVL)
+ return false;
+ if (CPU == PPC::DIR_PWR10 || CPU == PPC::DIR_PWR_FUTURE ||
+ (Pwr9EVL && CPU == PPC::DIR_PWR9))
+ return true;
+ return false;
+}
+
+static inline bool isLegalLoadWithLengthType(EVT VT) {
+ if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 &&
+ VT != MVT::i8)
+ return false;
+ return true;
+}
+
+bool PPCTTIImpl::isLegalMaskedLoad(Type *DataType, Align Alignment,
+ unsigned AddressSpace) const {
+ if (!hasActiveVectorLength())
+ return false;
+ if (!isLegalLoadWithLengthType(TLI->getValueType(DL, DataType, true)))
+ return false;
+ return true;
+}
+
+bool PPCTTIImpl::isLegalMaskedStore(Type *DataType, Align Alignment,
+ unsigned AddressSpace) const {
+ return isLegalMaskedLoad(DataType, Alignment, AddressSpace);
+}
+
+InstructionCost
+PPCTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy,
+ Align Alignment,
+ unsigned AddressSpace,
+ TTI::TargetCostKind CostKind) const {
+ InstructionCost BaseCost =
+ BaseT::getMaskedMemoryOpCost(Opcode, DataTy, Alignment, AddressSpace,
+ CostKind);
+
+ if (Opcode != Instruction::Load && Opcode != Instruction::Store)
+ return BaseCost;
+ auto VecTy = dyn_cast<FixedVectorType>(DataTy);
+ if (!VecTy)
+ return BaseCost;
+ if (!isLegalMaskedLoad(VecTy->getScalarType(), Alignment, AddressSpace))
+ return BaseCost;
+ if (VecTy->getPrimitiveSizeInBits() > 128)
+ return BaseCost;
+
+ // Is scalar compare + select + maybe shift + vector load
+ InstructionCost Adj = vectorCostAdjustmentFactor(Opcode, DataTy, nullptr);
+ InstructionCost Cost = 2 + Adj;
+ if (ST->getCPUDirective() != PPC::DIR_PWR_FUTURE ||
+ VecTy->getScalarSizeInBits() != 8)
+ Cost += 1; // need shift
+ return Cost;
+}
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index f80ebdbce7f64..f7fd5da94b1cb 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -153,6 +153,18 @@ class PPCTTIImpl final : public BasicTTIImplBase<PPCTTIImpl> {
TargetTransformInfo::VPLegalization
getVPLegalizationStrategy(const VPIntrinsic &PI) const override;
+ bool hasActiveVectorLength() const override;
+
+ bool isLegalMaskedStore(Type *DataType, Align Alignment,
+ unsigned AddressSpace) const override;
+ bool isLegalMaskedLoad(Type *DataType, Align Alignment,
+ unsigned AddressSpace) const override;
+
+ InstructionCost
+ getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment,
+ unsigned AddressSpace,
+ TTI::TargetCostKind CostKind) const override;
+
private:
// The following constant is used for estimating costs on power9.
static const InstructionCost::CostType P9PipelineFlushEstimate = 80;
diff --git a/llvm/test/Analysis/CostModel/PowerPC/ld-st-with-length.ll b/llvm/test/Analysis/CostModel/PowerPC/ld-st-with-length.ll
new file mode 100644
index 0000000000000..df5b5b68677a4
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/PowerPC/ld-st-with-length.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -mcpu=pwr9 -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefix=P9
+; RUN: opt < %s -mcpu=pwr10 -ppc-evl -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefix=P10
+; RUN: opt < %s -mcpu=future -ppc-evl -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefix=FUTURE
+target datalayout = "e-m:e-Fn32-i64:64-i128:128-n32:64-S128-v256:256:256-v512:512:512"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+define void @bar(ptr %base, <2 x ptr> %base.vec) {
+; P9: cost of 16 for {{.*}} @llvm.masked.load.v2i8.p0
+; P10: cost of 4 for {{.*}} @llvm.masked.load.v2i8.p0
+; FUTURE: cost of 3 for {{.*}} @llvm.masked.load.v2i8.p0
+; P9: cost of 12 for {{.*}} @llvm.masked.store.v2i8.p0
+; P10: cost of 4 for {{.*}} @llvm.masked.store.v2i8.p0
+; FUTURE: cost of 3 for {{.*}} @llvm.masked.store.v2i8.p0
+ %x2 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr %base, i32 1, <2 x i1> undef, <2 x i8> undef)
+
+ call void @llvm.masked.store.v2i8.p0(<2 x i8> undef, ptr %base, i32 1, <2 x i1> undef)
+
+ ret void
+}
>From 5d98a9b96ffcc808319ccc546d5b33a64a2f7eda Mon Sep 17 00:00:00 2001
From: Roland Froese <froese at ca.ibm.com>
Date: Fri, 21 Nov 2025 19:44:19 +0000
Subject: [PATCH 2/5] fix checks
---
.../Target/PowerPC/PPCTargetTransformInfo.cpp | 22 +++++++++----------
.../Target/PowerPC/PPCTargetTransformInfo.h | 3 +--
.../CostModel/PowerPC/ld-st-with-length.ll | 6 ++---
3 files changed, 15 insertions(+), 16 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index f17a7a87313c6..fe4c6b73900d6 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -1084,7 +1084,7 @@ PPCTTIImpl::getVPLegalizationStrategy(const VPIntrinsic &PI) const {
}
bool PPCTTIImpl::hasActiveVectorLength() const {
- unsigned CPU = ST->getCPUDirective();
+ unsigned CPU = ST->getCPUDirective();
if (!PPCEVL)
return false;
if (CPU == PPC::DIR_PWR10 || CPU == PPC::DIR_PWR_FUTURE ||
@@ -1094,8 +1094,7 @@ bool PPCTTIImpl::hasActiveVectorLength() const {
}
static inline bool isLegalLoadWithLengthType(EVT VT) {
- if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 &&
- VT != MVT::i8)
+ if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8)
return false;
return true;
}
@@ -1115,16 +1114,17 @@ bool PPCTTIImpl::isLegalMaskedStore(Type *DataType, Align Alignment,
}
InstructionCost
-PPCTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy,
- Align Alignment,
- unsigned AddressSpace,
+PPCTTIImpl::getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA,
TTI::TargetCostKind CostKind) const {
- InstructionCost BaseCost =
- BaseT::getMaskedMemoryOpCost(Opcode, DataTy, Alignment, AddressSpace,
- CostKind);
+ Type *DataTy = MICA.getDataType();
+ Align Alignment = MICA.getAlignment();
+ unsigned Opcode = MICA.getID() == Intrinsic::masked_load
+ ? Instruction::Load
+ : Instruction::Store;
+ unsigned AddressSpace = MICA.getAddressSpace();
+
+ InstructionCost BaseCost = BaseT::getMaskedMemoryOpCost(MICA, CostKind);
- if (Opcode != Instruction::Load && Opcode != Instruction::Store)
- return BaseCost;
auto VecTy = dyn_cast<FixedVectorType>(DataTy);
if (!VecTy)
return BaseCost;
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index f7fd5da94b1cb..ac665f99a5ddc 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -161,8 +161,7 @@ class PPCTTIImpl final : public BasicTTIImplBase<PPCTTIImpl> {
unsigned AddressSpace) const override;
InstructionCost
- getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment,
- unsigned AddressSpace,
+ getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA,
TTI::TargetCostKind CostKind) const override;
private:
diff --git a/llvm/test/Analysis/CostModel/PowerPC/ld-st-with-length.ll b/llvm/test/Analysis/CostModel/PowerPC/ld-st-with-length.ll
index df5b5b68677a4..fe0b035e062e0 100644
--- a/llvm/test/Analysis/CostModel/PowerPC/ld-st-with-length.ll
+++ b/llvm/test/Analysis/CostModel/PowerPC/ld-st-with-length.ll
@@ -4,16 +4,16 @@
target datalayout = "e-m:e-Fn32-i64:64-i128:128-n32:64-S128-v256:256:256-v512:512:512"
target triple = "powerpc64le-unknown-linux-gnu"
-define void @bar(ptr %base, <2 x ptr> %base.vec) {
+define void @bar(ptr %base, <2 x i8> %val) {
; P9: cost of 16 for {{.*}} @llvm.masked.load.v2i8.p0
; P10: cost of 4 for {{.*}} @llvm.masked.load.v2i8.p0
; FUTURE: cost of 3 for {{.*}} @llvm.masked.load.v2i8.p0
; P9: cost of 12 for {{.*}} @llvm.masked.store.v2i8.p0
; P10: cost of 4 for {{.*}} @llvm.masked.store.v2i8.p0
; FUTURE: cost of 3 for {{.*}} @llvm.masked.store.v2i8.p0
- %x2 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr %base, i32 1, <2 x i1> undef, <2 x i8> undef)
+ %x2 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr %base, i32 1, <2 x i1> <i1 1, i1 1>, <2 x i8> %val)
- call void @llvm.masked.store.v2i8.p0(<2 x i8> undef, ptr %base, i32 1, <2 x i1> undef)
+ call void @llvm.masked.store.v2i8.p0(<2 x i8> %x2, ptr %base, i32 1, <2 x i1> <i1 1, i1 1>)
ret void
}
>From 5c7d0b1e5a6917d80f8e3b3a06853e8969d3c82d Mon Sep 17 00:00:00 2001
From: Roland Froese <froese at ca.ibm.com>
Date: Fri, 21 Nov 2025 20:49:30 +0000
Subject: [PATCH 3/5] formatting
---
llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index fe4c6b73900d6..2c547eb68437e 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -1118,9 +1118,8 @@ PPCTTIImpl::getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA,
TTI::TargetCostKind CostKind) const {
Type *DataTy = MICA.getDataType();
Align Alignment = MICA.getAlignment();
- unsigned Opcode = MICA.getID() == Intrinsic::masked_load
- ? Instruction::Load
- : Instruction::Store;
+ unsigned Opcode = MICA.getID() == Intrinsic::masked_load ? Instruction::Load
+ : Instruction::Store;
unsigned AddressSpace = MICA.getAddressSpace();
InstructionCost BaseCost = BaseT::getMaskedMemoryOpCost(MICA, CostKind);
>From 000794a7f1ca084cfc4660a663e824a34b9a1bc7 Mon Sep 17 00:00:00 2001
From: Roland Froese <froese at ca.ibm.com>
Date: Fri, 28 Nov 2025 19:29:06 +0000
Subject: [PATCH 4/5] address comments
---
.../Target/PowerPC/PPCTargetTransformInfo.cpp | 34 +++++++++++--------
1 file changed, 19 insertions(+), 15 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 2c547eb68437e..11ac63ebe0302 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -1093,19 +1093,16 @@ bool PPCTTIImpl::hasActiveVectorLength() const {
return false;
}
-static inline bool isLegalLoadWithLengthType(EVT VT) {
- if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8)
- return false;
- return true;
-}
-
bool PPCTTIImpl::isLegalMaskedLoad(Type *DataType, Align Alignment,
unsigned AddressSpace) const {
if (!hasActiveVectorLength())
return false;
- if (!isLegalLoadWithLengthType(TLI->getValueType(DL, DataType, true)))
- return false;
- return true;
+ auto IsLegalLoadWithLengthType = [](EVT VT) {
+ if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8)
+ return false;
+ return true;
+ };
+ return IsLegalLoadWithLengthType(TLI->getValueType(DL, DataType, true));
}
bool PPCTTIImpl::isLegalMaskedStore(Type *DataType, Align Alignment,
@@ -1127,16 +1124,23 @@ PPCTTIImpl::getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA,
auto VecTy = dyn_cast<FixedVectorType>(DataTy);
if (!VecTy)
return BaseCost;
- if (!isLegalMaskedLoad(VecTy->getScalarType(), Alignment, AddressSpace))
- return BaseCost;
+ if (Opcode == Instruction::Load) {
+ if (!isLegalMaskedLoad(VecTy->getScalarType(), Alignment, AddressSpace))
+ return BaseCost;
+ } else {
+ if (!isLegalMaskedStore(VecTy->getScalarType(), Alignment, AddressSpace))
+ return BaseCost;
+ }
if (VecTy->getPrimitiveSizeInBits() > 128)
return BaseCost;
- // Is scalar compare + select + maybe shift + vector load
- InstructionCost Adj = vectorCostAdjustmentFactor(Opcode, DataTy, nullptr);
- InstructionCost Cost = 2 + Adj;
+ // Cost is 1 (scalar compare) + 1 (scalar select) +
+ // 1 * vectorCostAdjustmentFactor (vector load with length)
+ // Maybe + 1 (scalar shift)
+ InstructionCost Cost = 1 + 1 +
+ vectorCostAdjustmentFactor(Opcode, DataTy, nullptr);
if (ST->getCPUDirective() != PPC::DIR_PWR_FUTURE ||
VecTy->getScalarSizeInBits() != 8)
- Cost += 1; // need shift
+ Cost += 1; // need shift for length
return Cost;
}
>From 290a878ac14c670e2ea1b637fa99cecc540ad113 Mon Sep 17 00:00:00 2001
From: Roland Froese <froese at ca.ibm.com>
Date: Wed, 3 Dec 2025 19:53:27 +0000
Subject: [PATCH 5/5] update
---
.../Target/PowerPC/PPCTargetTransformInfo.cpp | 29 ++++++++---
.../Target/PowerPC/PPCTargetTransformInfo.h | 12 +++--
.../CostModel/PowerPC/ld-st-with-length.ll | 50 ++++++++++++++++---
3 files changed, 73 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 11ac63ebe0302..44ef8597d39ee 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -905,6 +905,17 @@ InstructionCost PPCTTIImpl::getInterleavedMemoryOpCost(
InstructionCost
PPCTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind) const {
+/*
+ if (ICA.getID() == Intrinsic::vp_load) {
+ MemIntrinsicCostAttributes MICA(Intrinsic::masked_load, ICA.getReturnType(), Align(1), 0);
+ return getMaskedMemoryOpCost(MICA, CostKind);
+ }
+ if (ICA.getID() == Intrinsic::vp_store) {
+ Type *Ty = ICA.getArgTypes()[0];
+ MemIntrinsicCostAttributes MICA(Intrinsic::masked_store, ICA.getArgTypes()[0], Align(1), 0);
+ return getMaskedMemoryOpCost(MICA, CostKind);
+ }
+*/
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
}
@@ -1087,26 +1098,28 @@ bool PPCTTIImpl::hasActiveVectorLength() const {
unsigned CPU = ST->getCPUDirective();
if (!PPCEVL)
return false;
- if (CPU == PPC::DIR_PWR10 || CPU == PPC::DIR_PWR_FUTURE ||
- (Pwr9EVL && CPU == PPC::DIR_PWR9))
- return true;
- return false;
+ return CPU == PPC::DIR_PWR10 || CPU == PPC::DIR_PWR_FUTURE ||
+ (Pwr9EVL && CPU == PPC::DIR_PWR9);
}
bool PPCTTIImpl::isLegalMaskedLoad(Type *DataType, Align Alignment,
- unsigned AddressSpace) const {
+ unsigned AddressSpace,
+ TTI::MaskKind MaskKind) const {
if (!hasActiveVectorLength())
return false;
+
auto IsLegalLoadWithLengthType = [](EVT VT) {
if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8)
return false;
return true;
};
+
return IsLegalLoadWithLengthType(TLI->getValueType(DL, DataType, true));
}
bool PPCTTIImpl::isLegalMaskedStore(Type *DataType, Align Alignment,
- unsigned AddressSpace) const {
+ unsigned AddressSpace,
+ TTI::MaskKind MaskKind) const {
return isLegalMaskedLoad(DataType, Alignment, AddressSpace);
}
@@ -1137,8 +1150,8 @@ PPCTTIImpl::getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA,
// Cost is 1 (scalar compare) + 1 (scalar select) +
// 1 * vectorCostAdjustmentFactor (vector load with length)
// Maybe + 1 (scalar shift)
- InstructionCost Cost = 1 + 1 +
- vectorCostAdjustmentFactor(Opcode, DataTy, nullptr);
+ InstructionCost Cost =
+ 1 + 1 + vectorCostAdjustmentFactor(Opcode, DataTy, nullptr);
if (ST->getCPUDirective() != PPC::DIR_PWR_FUTURE ||
VecTy->getScalarSizeInBits() != 8)
Cost += 1; // need shift for length
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index ac665f99a5ddc..3aec5841190b3 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -155,10 +155,14 @@ class PPCTTIImpl final : public BasicTTIImplBase<PPCTTIImpl> {
bool hasActiveVectorLength() const override;
- bool isLegalMaskedStore(Type *DataType, Align Alignment,
- unsigned AddressSpace) const override;
- bool isLegalMaskedLoad(Type *DataType, Align Alignment,
- unsigned AddressSpace) const override;
+ bool
+ isLegalMaskedStore(Type *DataType, Align Alignment, unsigned AddressSpace,
+ TTI::MaskKind MaskKind =
+ TTI::MaskKind::VariableOrConstantMask) const override;
+ bool
+ isLegalMaskedLoad(Type *DataType, Align Alignment, unsigned AddressSpace,
+ TTI::MaskKind MaskKind =
+ TTI::MaskKind::VariableOrConstantMask) const override;
InstructionCost
getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA,
diff --git a/llvm/test/Analysis/CostModel/PowerPC/ld-st-with-length.ll b/llvm/test/Analysis/CostModel/PowerPC/ld-st-with-length.ll
index fe0b035e062e0..58631e8af22c0 100644
--- a/llvm/test/Analysis/CostModel/PowerPC/ld-st-with-length.ll
+++ b/llvm/test/Analysis/CostModel/PowerPC/ld-st-with-length.ll
@@ -6,14 +6,52 @@ target triple = "powerpc64le-unknown-linux-gnu"
define void @bar(ptr %base, <2 x i8> %val) {
; P9: cost of 16 for {{.*}} @llvm.masked.load.v2i8.p0
-; P10: cost of 4 for {{.*}} @llvm.masked.load.v2i8.p0
-; FUTURE: cost of 3 for {{.*}} @llvm.masked.load.v2i8.p0
; P9: cost of 12 for {{.*}} @llvm.masked.store.v2i8.p0
+; P9: cost of 16 for {{.*}} @llvm.masked.load.v2i16.p0
+; P9: cost of 12 for {{.*}} @llvm.masked.store.v2i16.p0
+; P9: cost of 16 for {{.*}} @llvm.masked.load.v2i32.p0
+; P9: cost of 12 for {{.*}} @llvm.masked.store.v2i32.p0
+; P9: cost of 12 for {{.*}} @llvm.masked.load.v2i64.p0
+; P9: cost of 10 for {{.*}} @llvm.masked.store.v2i64.p0
+; P9: cost of 36 for {{.*}} @llvm.masked.load.v3i64.p0
+; P9: cost of 15 for {{.*}} @llvm.masked.store.v3i64.p0
+; P9: cost of 32 for {{.*}} @llvm.masked.load.v4i15.p0
+; P9: cost of 24 for {{.*}} @llvm.masked.store.v4i15.p0
+; P10: cost of 4 for {{.*}} @llvm.masked.load.v2i8.p0
; P10: cost of 4 for {{.*}} @llvm.masked.store.v2i8.p0
+; P10: cost of 4 for {{.*}} @llvm.masked.load.v2i16.p0
+; P10: cost of 4 for {{.*}} @llvm.masked.store.v2i16.p0
+; P10: cost of 4 for {{.*}} @llvm.masked.load.v2i32.p0
+; P10: cost of 4 for {{.*}} @llvm.masked.store.v2i32.p0
+; P10: cost of 4 for {{.*}} @llvm.masked.load.v2i64.p0
+; P10: cost of 4 for {{.*}} @llvm.masked.store.v2i64.p0
+; P10: cost of 24 for {{.*}} @llvm.masked.load.v3i64.p0
+; P10: cost of 12 for {{.*}} @llvm.masked.store.v3i64.p0
+; P10: cost of 16 for {{.*}} @llvm.masked.load.v4i15.p0
+; P10: cost of 16 for {{.*}} @llvm.masked.store.v4i15.p0
+; FUTURE: cost of 3 for {{.*}} @llvm.masked.load.v2i8.p0
; FUTURE: cost of 3 for {{.*}} @llvm.masked.store.v2i8.p0
- %x2 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr %base, i32 1, <2 x i1> <i1 1, i1 1>, <2 x i8> %val)
-
- call void @llvm.masked.store.v2i8.p0(<2 x i8> %x2, ptr %base, i32 1, <2 x i1> <i1 1, i1 1>)
-
+; FUTURE: cost of 4 for {{.*}} @llvm.masked.load.v2i16.p0
+; FUTURE: cost of 4 for {{.*}} @llvm.masked.store.v2i16.p0
+; FUTURE: cost of 4 for {{.*}} @llvm.masked.load.v2i32.p0
+; FUTURE: cost of 4 for {{.*}} @llvm.masked.store.v2i32.p0
+; FUTURE: cost of 4 for {{.*}} @llvm.masked.load.v2i64.p0
+; FUTURE: cost of 4 for {{.*}} @llvm.masked.store.v2i64.p0
+; FUTURE: cost of 24 for {{.*}} @llvm.masked.load.v3i64.p0
+; FUTURE: cost of 12 for {{.*}} @llvm.masked.store.v3i64.p0
+; FUTURE: cost of 16 for {{.*}} @llvm.masked.load.v4i15.p0
+; FUTURE: cost of 16 for {{.*}} @llvm.masked.store.v4i15.p0
+ %x1 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr %base, i32 1, <2 x i1> <i1 1, i1 1>, <2 x i8> %val)
+ call void @llvm.masked.store.v2i8.p0(<2 x i8> %x1, ptr %base, i32 1, <2 x i1> <i1 1, i1 1>)
+ %x2 = call <2 x i16> @llvm.masked.load.v2i16.p0(ptr %base, i32 1, <2 x i1> <i1 1, i1 1>, <2 x i16> <i16 0, i16 0>)
+ call void @llvm.masked.store.v2i16.p0(<2 x i16> %x2, ptr %base, i32 1, <2 x i1> <i1 1, i1 1>)
+ %x3 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %base, i32 1, <2 x i1> <i1 1, i1 1>, <2 x i32> <i32 0, i32 0>)
+ call void @llvm.masked.store.v2i32.p0(<2 x i32> %x3, ptr %base, i32 1, <2 x i1> <i1 1, i1 1>)
+ %x4 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr %base, i32 1, <2 x i1> <i1 1, i1 1>, <2 x i64> <i64 0, i64 0>)
+ call void @llvm.masked.store.v2i64.p0(<2 x i64> %x4, ptr %base, i32 1, <2 x i1> <i1 1, i1 1>)
+ %x5 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr %base, i32 1, <3 x i1> <i1 1, i1 1, i1 1>, <3 x i64> <i64 0, i64 0, i64 0>)
+ call void @llvm.masked.store.v3i64.p0(<3 x i64> %x5, ptr %base, i32 1, <3 x i1> <i1 1, i1 1, i1 1>)
+ %x6 = call <4 x i15> @llvm.masked.load.v4i15.p0(ptr %base, i32 1, <4 x i1> <i1 1, i1 1, i1 1, i1 1>, <4 x i15> <i15 0, i15 0, i15 0, i15 0>)
+ call void @llvm.masked.store.v4i15.p0(<4 x i15> %x6, ptr %base, i32 1, <4 x i1> <i1 1, i1 1, i1 1, i1 1>)
ret void
}
More information about the llvm-commits
mailing list