[llvm] [TTI][CostModel] Add cost modeling for expandload and compressstore intrinsics (PR #122882)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 14 02:16:53 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-analysis
Author: Sergey Kachkov (skachkov-sc)
<details>
<summary>Changes</summary>
This patch adds methods for cost estimation for llvm.masked.expandload/llvm.masked.compressstore intrinsics in TTI. If backend doesn't support custom lowering of these intrinsics it will be processed by ScalarizeMaskedMemIntrin so we estimate its cost via getCommonMaskedMemoryOpCost as gather/scatter operation; for RISC-V backend, this patch implements custom hook to calculate the cost based on current lowering scheme.
This can be useful in future for implementing support of these intrinsics in VPlan (I've found there were some intentions to do this in the past, for example: https://github.com/llvm/llvm-project/pull/83467)
---
Patch is 518.31 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/122882.diff
13 Files Affected:
- (modified) llvm/include/llvm/Analysis/TargetTransformInfo.h (+24)
- (modified) llvm/include/llvm/Analysis/TargetTransformInfoImpl.h (+7)
- (modified) llvm/include/llvm/CodeGen/BasicTTIImpl.h (+23)
- (modified) llvm/lib/Analysis/TargetTransformInfo.cpp (+9)
- (modified) llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp (+37)
- (modified) llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h (+5)
- (modified) llvm/test/Analysis/CostModel/RISCV/gep.ll (+4-4)
- (modified) llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll (+174)
- (modified) llvm/test/Analysis/CostModel/X86/masked-intrinsic-codesize.ll (+288-288)
- (modified) llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll (+288-288)
- (modified) llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll (+288-288)
- (modified) llvm/test/Analysis/CostModel/X86/masked-intrinsic-latency.ll (+288-288)
- (modified) llvm/test/Analysis/CostModel/X86/masked-intrinsic-sizelatency.ll (+288-288)
``````````diff
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 752313ab15858c..7254b6d18cdf27 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1492,6 +1492,19 @@ class TargetTransformInfo {
Align Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
const Instruction *I = nullptr) const;
+ /// \return The cost of Expand Load or Compress Store operation
+ /// \p Opcode - is a type of memory access Load or Store
+ /// \p Src - a vector type of the data to be loaded or stored
+ /// \p VariableMask - true when the memory access is predicated with a mask
+ /// that is not a compile-time constant
+ /// \p Alignment - alignment of single element
+ /// \p I - the optional original context instruction, if one exists, e.g. the
+ /// load/store to transform or the call to the gather/scatter intrinsic
+ InstructionCost getConsecutiveMemoryOpCost(
+ unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
+ const Instruction *I = nullptr) const;
+
/// \return The cost of strided memory operations.
/// \p Opcode - is a type of memory access Load or Store
/// \p DataTy - a vector type of the data to be loaded or stored
@@ -2178,6 +2191,10 @@ class TargetTransformInfo::Concept {
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr) = 0;
virtual InstructionCost
+ getConsecutiveMemoryOpCost(unsigned Opcode, Type *DataTy, bool VariableMask,
+ Align Alignment, TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr) = 0;
+ virtual InstructionCost
getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
bool VariableMask, Align Alignment,
TTI::TargetCostKind CostKind,
@@ -2898,6 +2915,13 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
Alignment, CostKind, I);
}
InstructionCost
+ getConsecutiveMemoryOpCost(unsigned Opcode, Type *DataTy, bool VariableMask,
+ Align Alignment, TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr) override {
+ return Impl.getConsecutiveMemoryOpCost(Opcode, DataTy, VariableMask,
+ Alignment, CostKind, I);
+ }
+ InstructionCost
getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
bool VariableMask, Align Alignment,
TTI::TargetCostKind CostKind,
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 9c74b2a0c31df1..38c5a5d66c3d16 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -765,6 +765,13 @@ class TargetTransformInfoImplBase {
return 1;
}
+ InstructionCost
+ getConsecutiveMemoryOpCost(unsigned Opcode, Type *DataTy, bool VariableMask,
+ Align Alignment, TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr) const {
+ return 1;
+ }
+
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy,
const Value *Ptr, bool VariableMask,
Align Alignment,
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index c9f142d64ae9e4..b6a70dacc11ebb 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1469,6 +1469,14 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
true, CostKind);
}
+ InstructionCost getConsecutiveMemoryOpCost(unsigned Opcode, Type *DataTy,
+ bool VariableMask, Align Alignment,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr) {
+ return getCommonMaskedMemoryOpCost(Opcode, DataTy, Alignment, VariableMask,
+ true, CostKind);
+ }
+
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy,
const Value *Ptr, bool VariableMask,
Align Alignment,
@@ -1777,6 +1785,21 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
return thisT()->getGatherScatterOpCost(Instruction::Load, RetTy, Args[0],
VarMask, Alignment, CostKind, I);
}
+ case Intrinsic::masked_compressstore: {
+ const Value *Data = Args[0];
+ const Value *Mask = Args[2];
+ Align Alignment = I->getParamAlign(1).valueOrOne();
+ return thisT()->getConsecutiveMemoryOpCost(
+ Instruction::Store, Data->getType(), !isa<Constant>(Mask), Alignment,
+ CostKind, I);
+ }
+ case Intrinsic::masked_expandload: {
+ const Value *Mask = Args[1];
+ Align Alignment = I->getParamAlign(0).valueOrOne();
+ return thisT()->getConsecutiveMemoryOpCost(Instruction::Load, RetTy,
+ !isa<Constant>(Mask),
+ Alignment, CostKind, I);
+ }
case Intrinsic::experimental_vp_strided_store: {
const Value *Data = Args[0];
const Value *Ptr = Args[1];
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index b32dffa9f0fe86..dc298ce469d96e 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1135,6 +1135,15 @@ InstructionCost TargetTransformInfo::getGatherScatterOpCost(
return Cost;
}
+InstructionCost TargetTransformInfo::getConsecutiveMemoryOpCost(
+ unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment,
+ TTI::TargetCostKind CostKind, const Instruction *I) const {
+ InstructionCost Cost = TTIImpl->getConsecutiveMemoryOpCost(
+ Opcode, DataTy, VariableMask, Alignment, CostKind, I);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
+}
+
InstructionCost TargetTransformInfo::getStridedMemoryOpCost(
unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const {
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index add82dc80c4290..5e1893d632f8d5 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -838,6 +838,43 @@ InstructionCost RISCVTTIImpl::getGatherScatterOpCost(
return NumLoads * MemOpCost;
}
+InstructionCost RISCVTTIImpl::getConsecutiveMemoryOpCost(
+ unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment,
+ TTI::TargetCostKind CostKind, const Instruction *I) {
+ bool IsLegal = (Opcode == Instruction::Store &&
+ isLegalMaskedCompressStore(DataTy, Alignment)) ||
+ (Opcode == Instruction::Load &&
+ isLegalMaskedExpandLoad(DataTy, Alignment));
+ if (!IsLegal || CostKind != TTI::TCK_RecipThroughput)
+ return BaseT::getConsecutiveMemoryOpCost(Opcode, DataTy, VariableMask,
+ Alignment, CostKind, I);
+ // Example compressstore sequence:
+ // vsetivli zero, 8, e32, m2, ta, ma (ignored)
+ // vcompress.vm v10, v8, v0
+ // vcpop.m a1, v0
+ // vsetvli zero, a1, e32, m2, ta, ma
+ // vse32.v v10, (a0)
+ // Example expandload sequence:
+ // vsetivli zero, 8, e8, mf2, ta, ma (ignored)
+ // vcpop.m a1, v0
+ // vsetvli zero, a1, e32, m2, ta, ma
+ // vle32.v v10, (a0)
+ // vsetivli zero, 8, e32, m2, ta, ma
+ // viota.m v12, v0
+ // vrgather.vv v8, v10, v12, v0.t
+ auto MemOpCost = getMemoryOpCost(Opcode, DataTy, Alignment, 0, CostKind);
+ auto LT = getTypeLegalizationCost(DataTy);
+ SmallVector<unsigned, 4> Opcodes{RISCV::VSETVLI};
+ if (VariableMask)
+ Opcodes.push_back(RISCV::VCPOP_M);
+ if (Opcode == Instruction::Store)
+ Opcodes.append({RISCV::VCOMPRESS_VM});
+ else
+ Opcodes.append({RISCV::VSETIVLI, RISCV::VIOTA_M, RISCV::VRGATHER_VV});
+ return MemOpCost +
+ LT.first * getRISCVInstructionCost(Opcodes, LT.second, CostKind);
+}
+
InstructionCost RISCVTTIImpl::getStridedMemoryOpCost(
unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) {
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 9b364391f0fa47..4d110521424e4d 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -166,6 +166,11 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
TTI::TargetCostKind CostKind,
const Instruction *I);
+ InstructionCost getConsecutiveMemoryOpCost(unsigned Opcode, Type *Src,
+ bool VariableMask, Align Alignment,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr);
+
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy,
const Value *Ptr, bool VariableMask,
Align Alignment,
diff --git a/llvm/test/Analysis/CostModel/RISCV/gep.ll b/llvm/test/Analysis/CostModel/RISCV/gep.ll
index 44c0b5bb71070e..99c74c4635fc70 100644
--- a/llvm/test/Analysis/CostModel/RISCV/gep.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/gep.ll
@@ -268,7 +268,7 @@ define void @non_foldable_vector_uses(ptr %base, <2 x ptr> %base.vec) {
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = getelementptr i8, <2 x ptr> %base.vec, <2 x i32> <i32 42, i32 43>
; RVI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %x3 = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %3, i32 1, <2 x i1> undef, <2 x i8> undef)
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = getelementptr i8, ptr %base, i32 42
-; RVI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x4 = call <2 x i8> @llvm.masked.expandload.v2i8(ptr %4, <2 x i1> undef, <2 x i8> undef)
+; RVI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %x4 = call <2 x i8> @llvm.masked.expandload.v2i8(ptr %4, <2 x i1> undef, <2 x i8> undef)
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = getelementptr i8, ptr %base, i32 42
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %x5 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr %5, <2 x i1> undef, i32 undef)
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = getelementptr i8, ptr %base, i32 42
@@ -280,7 +280,7 @@ define void @non_foldable_vector_uses(ptr %base, <2 x ptr> %base.vec) {
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = getelementptr i8, <2 x ptr> %base.vec, <2 x i32> <i32 42, i32 43>
; RVI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v2i8.v2p0(<2 x i8> undef, <2 x ptr> %9, i32 1, <2 x i1> undef)
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = getelementptr i8, ptr %base, i32 42
-; RVI-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v2i8(<2 x i8> undef, ptr %10, <2 x i1> undef)
+; RVI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.compressstore.v2i8(<2 x i8> undef, ptr %10, <2 x i1> undef)
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = getelementptr i8, ptr %base, i32 42
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.vp.store.v2i8.p0(<2 x i8> undef, ptr %11, <2 x i1> undef, i32 undef)
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = getelementptr i8, ptr %base, i32 42
@@ -338,7 +338,7 @@ define void @foldable_vector_uses(ptr %base, <2 x ptr> %base.vec) {
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %3 = getelementptr i8, <2 x ptr> %base.vec, <2 x i32> zeroinitializer
; RVI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %x3 = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %3, i32 1, <2 x i1> undef, <2 x i8> undef)
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %4 = getelementptr i8, ptr %base, i32 0
-; RVI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x4 = call <2 x i8> @llvm.masked.expandload.v2i8(ptr %4, <2 x i1> undef, <2 x i8> undef)
+; RVI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %x4 = call <2 x i8> @llvm.masked.expandload.v2i8(ptr %4, <2 x i1> undef, <2 x i8> undef)
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %5 = getelementptr i8, ptr %base, i32 0
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %x5 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr %5, <2 x i1> undef, i32 undef)
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %6 = getelementptr i8, ptr %base, i32 0
@@ -350,7 +350,7 @@ define void @foldable_vector_uses(ptr %base, <2 x ptr> %base.vec) {
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %9 = getelementptr i8, <2 x ptr> %base.vec, <2 x i32> zeroinitializer
; RVI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v2i8.v2p0(<2 x i8> undef, <2 x ptr> %9, i32 1, <2 x i1> undef)
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %10 = getelementptr i8, ptr %base, i32 0
-; RVI-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v2i8(<2 x i8> undef, ptr %10, <2 x i1> undef)
+; RVI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.compressstore.v2i8(<2 x i8> undef, ptr %10, <2 x i1> undef)
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %11 = getelementptr i8, ptr %base, i32 0
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.vp.store.v2i8.p0(<2 x i8> undef, ptr %11, <2 x i1> undef, i32 undef)
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %12 = getelementptr i8, ptr %base, i32 0
diff --git a/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll b/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll
index 5126a6a0a3cbcd..b18d6a067fca0f 100644
--- a/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll
@@ -1453,6 +1453,146 @@ define void @store() {
ret void
}
+define void @expand_load() {
+; CHECK-LABEL: 'expand_load'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %t1 = call <2 x i8> @llvm.masked.expandload.v2i8(ptr undef, <2 x i1> undef, <2 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %t2 = call <4 x i8> @llvm.masked.expandload.v4i8(ptr undef, <4 x i1> undef, <4 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %t3 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> undef, <8 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %t4 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> undef, <16 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %t5 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr align 8 undef, <2 x i1> undef, <2 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %t6 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr align 8 undef, <4 x i1> undef, <4 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %t7 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr align 8 undef, <8 x i1> undef, <8 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %t8 = call <16 x i64> @llvm.masked.expandload.v16i64(ptr align 8 undef, <16 x i1> undef, <16 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %t9 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> undef, <2 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %t10 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> undef, <4 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %t11 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> undef, <8 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %t12 = call <16 x i64> @llvm.masked.expandload.v16i64(ptr undef, <16 x i1> undef, <16 x i64> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %t13 = call <vscale x 2 x i8> @llvm.masked.expandload.nxv2i8(ptr undef, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %t14 = call <vscale x 4 x i8> @llvm.masked.expandload.nxv4i8(ptr undef, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %t15 = call <vscale x 8 x i8> @llvm.masked.expandload.nxv8i8(ptr undef, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %t16 = call <vscale x 16 x i8> @llvm.masked.expandload.nxv16i8(ptr undef, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %t17 = call <vscale x 2 x i64> @llvm.masked.expandload.nxv2i64(ptr align 8 undef, <vscale x 2 x i1> undef, <vscale x 2 x i64> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %t18 = call <vscale x 4 x i64> @llvm.masked.expandload.nxv4i64(ptr align 8 undef, <vscale x 4 x i1> undef, <vscale x 4 x i64> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %t19 = call <vscale x 8 x i64> @llvm.masked.expandload.nxv8i64(ptr align 8 undef, <vscale x 8 x i1> undef, <vscale x 8 x i64> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %t20 = call <vscale x 16 x i64> @llvm.masked.expandload.nxv16i64(ptr align 8 undef, <vscale x 16 x i1> undef, <vscale x 16 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; TYPEBASED-LABEL: 'expand_load'
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %t1 = call <2 x i8> @llvm.masked.expandload.v2i8(ptr undef, <2 x i1> undef, <2 x i8> undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %t2 = call <4 x i8> @llvm.masked.expandload.v4i8(ptr undef, <4 x i1> undef, <4 x i8> undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %t3 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> undef, <8 x i8> undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %t4 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> undef, <16 x i8> undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %t5 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr align 8 undef, <2 x i1> undef, <2 x i64> undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %t6 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr align 8 undef, <4 x i1> undef, <4 x i64> undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %t7 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr align 8 undef, <8 x i1> undef, <8 x i64> undef)
+; TYPEBASED-NEXT: Cost Model...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/122882
More information about the llvm-commits
mailing list