[llvm] [TTI][RISCV] Add cost modelling for intrinsic vp.load.ff (PR #160470)
Shih-Po Hung via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 25 00:47:23 PDT 2025
https://github.com/arcbbb updated https://github.com/llvm/llvm-project/pull/160470
>From 0a3596d5ed617ebafadc8ff019f95ac14626ee66 Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Wed, 24 Sep 2025 00:20:52 -0700
Subject: [PATCH 1/7] [TTI] Add cost modelling for intrinsic vp.load.ff
Split out from #151300 to isolate TargetTransformInfo cost modelling for
fault-only-first loads from VPlan implementation details.
This change adds costing support for vp.load.ff independently of the
VPlan work.
---
.../llvm/Analysis/TargetTransformInfo.h | 5 +
.../llvm/Analysis/TargetTransformInfoImpl.h | 6 +
llvm/include/llvm/CodeGen/BasicTTIImpl.h | 9 ++
llvm/lib/Analysis/TargetTransformInfo.cpp | 8 +
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 16 ++
llvm/lib/Target/RISCV/RISCVISelLowering.h | 4 +
.../Target/RISCV/RISCVTargetTransformInfo.cpp | 11 ++
.../Target/RISCV/RISCVTargetTransformInfo.h | 4 +
.../Analysis/CostModel/RISCV/vp-intrinsics.ll | 152 +++++++++++-------
9 files changed, 155 insertions(+), 60 deletions(-)
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 41ff54f0781a2..cb5674917b830 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1616,6 +1616,11 @@ class TargetTransformInfo {
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
bool UseMaskForCond = false, bool UseMaskForGaps = false) const;
+ /// \return The cost of vp intrinsic vp.load.ff.
+ LLVM_ABI InstructionCost getFaultOnlyFirstLoadCost(
+ Type *DataTy, Align Alignment,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
+
/// A helper function to determine the type of reduction algorithm used
/// for a given \p Opcode and set of FastMathFlags \p FMF.
static bool requiresOrderedReduction(std::optional<FastMathFlags> FMF) {
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 566e1cf51631a..c762ea1dafa78 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -885,6 +885,12 @@ class TargetTransformInfoImplBase {
return 1;
}
+ virtual InstructionCost
+ getFaultOnlyFirstLoadCost(Type *DataTy, Align Alignment,
+ TTI::TargetCostKind CostKind) const {
+ return InstructionCost::getInvalid();
+ }
+
virtual InstructionCost
getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind) const {
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index dce423fc1b18b..fd19ae0b88333 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1795,6 +1795,15 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
}
}
+ if (ICA.getID() == Intrinsic::vp_load_ff) {
+ Type *RetTy = ICA.getReturnType();
+ assert(RetTy->isStructTy() && "expected struct return");
+ Type *DataTy = cast<StructType>(RetTy)->getElementType(0);
+ Align Alignment;
+ if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst()))
+ Alignment = VPI->getPointerAlignment().valueOrOne();
+ return thisT()->getFaultOnlyFirstLoadCost(DataTy, Alignment, CostKind);
+ }
if (ICA.getID() == Intrinsic::vp_scatter) {
if (ICA.isTypeBasedOnly()) {
IntrinsicCostAttributes MaskedScatter(
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 09b50c5270e57..5a0a861a04a63 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1217,6 +1217,14 @@ InstructionCost TargetTransformInfo::getInterleavedMemoryOpCost(
return Cost;
}
+InstructionCost TargetTransformInfo::getFaultOnlyFirstLoadCost(
+ Type *DataTy, Align Alignment, TTI::TargetCostKind CostKind) const {
+ InstructionCost Cost =
+ TTIImpl->getFaultOnlyFirstLoadCost(DataTy, Alignment, CostKind);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
+}
+
InstructionCost
TargetTransformInfo::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind) const {
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 8070a512ab078..6c6340e65306b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -24788,6 +24788,22 @@ bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType,
return true;
}
+bool RISCVTargetLowering::isLegalFaultOnlyFirstLoad(EVT DataType,
+ Align Alignment) const {
+ if (!Subtarget.hasVInstructions())
+ return false;
+
+ EVT ScalarType = DataType.getScalarType();
+ if (!isLegalElementTypeForRVV(ScalarType))
+ return false;
+
+ if (!Subtarget.enableUnalignedVectorMem() &&
+ Alignment < ScalarType.getStoreSize())
+ return false;
+
+ return true;
+}
+
MachineInstr *
RISCVTargetLowering::EmitKCFICheck(MachineBasicBlock &MBB,
MachineBasicBlock::instr_iterator &MBBI,
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 3f81ed74c12ed..1ddc06602a25e 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -425,6 +425,10 @@ class RISCVTargetLowering : public TargetLowering {
/// alignment is legal.
bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const;
+ /// Return true if a fault-only-first load of the given result type and
+ /// alignment is legal.
+ bool isLegalFaultOnlyFirstLoad(EVT DataType, Align Alignment) const;
+
unsigned getMaxSupportedInterleaveFactor() const override { return 8; }
bool fallBackToDAGISel(const Instruction &Inst) const override;
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index a06faa414a2ef..f452040fd9563 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1069,6 +1069,17 @@ InstructionCost RISCVTTIImpl::getInterleavedMemoryOpCost(
return MemCost + ShuffleCost;
}
+InstructionCost
+RISCVTTIImpl::getFaultOnlyFirstLoadCost(Type *DataTy, Align Alignment,
+ TTI::TargetCostKind CostKind) const {
+ EVT DataTypeVT = TLI->getValueType(DL, DataTy);
+ if (!TLI->isLegalFaultOnlyFirstLoad(DataTypeVT, Alignment))
+ return BaseT::getFaultOnlyFirstLoadCost(DataTy, Alignment, CostKind);
+
+ return getMemoryOpCost(Instruction::Load, DataTy, Alignment, 0, CostKind,
+ {TTI::OK_AnyValue, TTI::OP_None}, nullptr);
+}
+
InstructionCost RISCVTTIImpl::getGatherScatterOpCost(
unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const {
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 47e0a250d285a..4059461aaf585 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -188,6 +188,10 @@ class RISCVTTIImpl final : public BasicTTIImplBase<RISCVTTIImpl> {
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond = false, bool UseMaskForGaps = false) const override;
+ InstructionCost
+ getFaultOnlyFirstLoadCost(Type *DataTy, Align Alignment,
+ TTI::TargetCostKind CostKind) const override;
+
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
const Value *Ptr, bool VariableMask,
Align Alignment,
diff --git a/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll b/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll
index 71746caf35f2e..a361b9363f8f1 100644
--- a/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll
@@ -840,70 +840,102 @@ define void @load() {
; CHECK-LABEL: 'load'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t0 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr undef, <2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t1 = load <2 x i8>, ptr undef, align 2
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t2 = call <4 x i8> @llvm.vp.load.v4i8.p0(ptr undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t3 = load <4 x i8>, ptr undef, align 4
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t4 = call <8 x i8> @llvm.vp.load.v8i8.p0(ptr undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t5 = load <8 x i8>, ptr undef, align 8
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t6 = call <16 x i8> @llvm.vp.load.v16i8.p0(ptr undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t7 = load <16 x i8>, ptr undef, align 16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t8 = call <2 x i64> @llvm.vp.load.v2i64.p0(ptr undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t9 = load <2 x i64>, ptr undef, align 16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t10 = call <4 x i64> @llvm.vp.load.v4i64.p0(ptr undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t12 = load <4 x i64>, ptr undef, align 32
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t13 = call <8 x i64> @llvm.vp.load.v8i64.p0(ptr undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t14 = load <8 x i64>, ptr undef, align 64
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t15 = call <16 x i64> @llvm.vp.load.v16i64.p0(ptr undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t16 = load <16 x i64>, ptr undef, align 128
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t17 = call <vscale x 2 x i8> @llvm.vp.load.nxv2i8.p0(ptr undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t18 = load <vscale x 2 x i8>, ptr undef, align 2
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t19 = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8.p0(ptr undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t20 = load <vscale x 4 x i8>, ptr undef, align 4
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t21 = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8.p0(ptr undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t22 = load <vscale x 8 x i8>, ptr undef, align 8
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t23 = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t24 = load <vscale x 16 x i8>, ptr undef, align 16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t25 = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t26 = load <vscale x 2 x i64>, ptr undef, align 16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t27 = call <vscale x 4 x i64> @llvm.vp.load.nxv4i64.p0(ptr undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t28 = load <vscale x 4 x i64>, ptr undef, align 32
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t29 = call <vscale x 8 x i64> @llvm.vp.load.nxv8i64.p0(ptr undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t30 = load <vscale x 8 x i64>, ptr undef, align 64
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t31 = call <vscale x 16 x i64> @llvm.vp.load.nxv16i64.p0(ptr undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t32 = load <vscale x 16 x i64>, ptr undef, align 128
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t2 = call { <2 x i8>, i32 } @llvm.vp.load.ff.v2i8.p0(ptr align 1 undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t3 = call <4 x i8> @llvm.vp.load.v4i8.p0(ptr undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t4 = load <4 x i8>, ptr undef, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t5 = call { <4 x i8>, i32 } @llvm.vp.load.ff.v4i8.p0(ptr align 1 undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t6 = call <8 x i8> @llvm.vp.load.v8i8.p0(ptr undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t7 = load <8 x i8>, ptr undef, align 8
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t8 = call { <8 x i8>, i32 } @llvm.vp.load.ff.v8i8.p0(ptr align 1 undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t9 = call <16 x i8> @llvm.vp.load.v16i8.p0(ptr undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t10 = load <16 x i8>, ptr undef, align 16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t11 = call { <16 x i8>, i32 } @llvm.vp.load.ff.v16i8.p0(ptr align 1 undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t12 = call <2 x i64> @llvm.vp.load.v2i64.p0(ptr undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t13 = load <2 x i64>, ptr undef, align 16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t14 = call { <2 x i64>, i32 } @llvm.vp.load.ff.v2i64.p0(ptr align 8 undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t15 = call <4 x i64> @llvm.vp.load.v4i64.p0(ptr undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t16 = load <4 x i64>, ptr undef, align 32
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t17 = call { <4 x i64>, i32 } @llvm.vp.load.ff.v4i64.p0(ptr align 8 undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t18 = call <8 x i64> @llvm.vp.load.v8i64.p0(ptr undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t19 = load <8 x i64>, ptr undef, align 64
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t20 = call { <8 x i64>, i32 } @llvm.vp.load.ff.v8i64.p0(ptr align 8 undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t21 = call <16 x i64> @llvm.vp.load.v16i64.p0(ptr undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t22 = load <16 x i64>, ptr undef, align 128
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t23 = call { <16 x i64>, i32 } @llvm.vp.load.ff.v16i64.p0(ptr align 8 undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t24 = call <vscale x 2 x i8> @llvm.vp.load.nxv2i8.p0(ptr undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t25 = load <vscale x 2 x i8>, ptr undef, align 2
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t26 = call { <vscale x 2 x i8>, i32 } @llvm.vp.load.ff.nxv2i8.p0(ptr align 1 undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t27 = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8.p0(ptr undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t28 = load <vscale x 4 x i8>, ptr undef, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t29 = call { <vscale x 4 x i8>, i32 } @llvm.vp.load.ff.nxv4i8.p0(ptr align 1 undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t30 = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8.p0(ptr undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t31 = load <vscale x 8 x i8>, ptr undef, align 8
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t32 = call { <vscale x 8 x i8>, i32 } @llvm.vp.load.ff.nxv8i8.p0(ptr align 1 undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t33 = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr undef, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t34 = load <vscale x 16 x i8>, ptr undef, align 16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t35 = call { <vscale x 16 x i8>, i32 } @llvm.vp.load.ff.nxv16i8.p0(ptr align 1 undef, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t36 = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t37 = load <vscale x 2 x i64>, ptr undef, align 16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t38 = call { <vscale x 2 x i64>, i32 } @llvm.vp.load.ff.nxv2i64.p0(ptr align 8 undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t39 = call <vscale x 4 x i64> @llvm.vp.load.nxv4i64.p0(ptr undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t40 = load <vscale x 4 x i64>, ptr undef, align 32
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t41 = call { <vscale x 4 x i64>, i32 } @llvm.vp.load.ff.nxv4i64.p0(ptr align 8 undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t42 = call <vscale x 8 x i64> @llvm.vp.load.nxv8i64.p0(ptr undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t43 = load <vscale x 8 x i64>, ptr undef, align 64
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t44 = call { <vscale x 8 x i64>, i32 } @llvm.vp.load.ff.nxv8i64.p0(ptr align 8 undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t45 = call <vscale x 16 x i64> @llvm.vp.load.nxv16i64.p0(ptr undef, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t46 = load <vscale x 16 x i64>, ptr undef, align 128
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t47 = call { <vscale x 16 x i64>, i32 } @llvm.vp.load.ff.nxv16i64.p0(ptr align 8 undef, <vscale x 16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%t0 = call <2 x i8> @llvm.vp.load.v2i8(ptr undef, <2 x i1> undef, i32 undef)
%t1 = load <2 x i8>, ptr undef
- %t2 = call <4 x i8> @llvm.vp.load.v4i8(ptr undef, <4 x i1> undef, i32 undef)
- %t3 = load <4 x i8>, ptr undef
- %t4 = call <8 x i8> @llvm.vp.load.v8i8(ptr undef, <8 x i1> undef, i32 undef)
- %t5 = load <8 x i8>, ptr undef
- %t6 = call <16 x i8> @llvm.vp.load.v16i8(ptr undef, <16 x i1> undef, i32 undef)
- %t7 = load <16 x i8>, ptr undef
- %t8 = call <2 x i64> @llvm.vp.load.v2i64(ptr undef, <2 x i1> undef, i32 undef)
- %t9 = load <2 x i64>, ptr undef
- %t10 = call <4 x i64> @llvm.vp.load.v4i64(ptr undef, <4 x i1> undef, i32 undef)
- %t12 = load <4 x i64>, ptr undef
- %t13 = call <8 x i64> @llvm.vp.load.v8i64(ptr undef, <8 x i1> undef, i32 undef)
- %t14 = load <8 x i64>, ptr undef
- %t15 = call <16 x i64> @llvm.vp.load.v16i64(ptr undef, <16 x i1> undef, i32 undef)
- %t16 = load <16 x i64>, ptr undef
- %t17 = call <vscale x 2 x i8> @llvm.vp.load.nxv2i8(ptr undef, <vscale x 2 x i1> undef, i32 undef)
- %t18 = load <vscale x 2 x i8>, ptr undef
- %t19 = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8(ptr undef, <vscale x 4 x i1> undef, i32 undef)
- %t20 = load <vscale x 4 x i8>, ptr undef
- %t21 = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8(ptr undef, <vscale x 8 x i1> undef, i32 undef)
- %t22 = load <vscale x 8 x i8>, ptr undef
- %t23 = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8(ptr undef, <vscale x 16 x i1> undef, i32 undef)
- %t24 = load <vscale x 16 x i8>, ptr undef
- %t25 = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64(ptr undef, <vscale x 2 x i1> undef, i32 undef)
- %t26 = load <vscale x 2 x i64>, ptr undef
- %t27 = call <vscale x 4 x i64> @llvm.vp.load.nxv4i64(ptr undef, <vscale x 4 x i1> undef, i32 undef)
- %t28 = load <vscale x 4 x i64>, ptr undef
- %t29 = call <vscale x 8 x i64> @llvm.vp.load.nxv8i64(ptr undef, <vscale x 8 x i1> undef, i32 undef)
- %t30 = load <vscale x 8 x i64>, ptr undef
- %t31 = call <vscale x 16 x i64> @llvm.vp.load.nxv16i64(ptr undef, <vscale x 16 x i1> undef, i32 undef)
- %t32 = load <vscale x 16 x i64>, ptr undef
+ %t2 = call { <2 x i8>, i32 } @llvm.vp.load.ff.v2i8.p0(ptr align 1 undef, <2 x i1> undef, i32 undef)
+ %t3 = call <4 x i8> @llvm.vp.load.v4i8(ptr undef, <4 x i1> undef, i32 undef)
+ %t4 = load <4 x i8>, ptr undef
+ %t5 = call { <4 x i8>, i32 } @llvm.vp.load.ff.v4i8.p0(ptr align 1 undef, <4 x i1> undef, i32 undef)
+ %t6 = call <8 x i8> @llvm.vp.load.v8i8(ptr undef, <8 x i1> undef, i32 undef)
+ %t7 = load <8 x i8>, ptr undef
+ %t8 = call { <8 x i8>, i32 } @llvm.vp.load.ff.v8i8.p0(ptr align 1 undef, <8 x i1> undef, i32 undef)
+ %t9 = call <16 x i8> @llvm.vp.load.v16i8(ptr undef, <16 x i1> undef, i32 undef)
+ %t10 = load <16 x i8>, ptr undef
+ %t11 = call { <16 x i8>, i32 } @llvm.vp.load.ff.v16i8.p0(ptr align 1 undef, <16 x i1> undef, i32 undef)
+ %t12 = call <2 x i64> @llvm.vp.load.v2i64(ptr undef, <2 x i1> undef, i32 undef)
+ %t13 = load <2 x i64>, ptr undef
+ %t14 = call { <2 x i64>, i32 } @llvm.vp.load.ff.v2i64.p0(ptr align 8 undef, <2 x i1> undef, i32 undef)
+ %t15 = call <4 x i64> @llvm.vp.load.v4i64(ptr undef, <4 x i1> undef, i32 undef)
+ %t16 = load <4 x i64>, ptr undef
+ %t17 = call { <4 x i64>, i32 } @llvm.vp.load.ff.v4i64.p0(ptr align 8 undef, <4 x i1> undef, i32 undef)
+ %t18 = call <8 x i64> @llvm.vp.load.v8i64(ptr undef, <8 x i1> undef, i32 undef)
+ %t19 = load <8 x i64>, ptr undef
+ %t20 = call { <8 x i64>, i32 } @llvm.vp.load.ff.v8i64.p0(ptr align 8 undef, <8 x i1> undef, i32 undef)
+ %t21 = call <16 x i64> @llvm.vp.load.v16i64(ptr undef, <16 x i1> undef, i32 undef)
+ %t22 = load <16 x i64>, ptr undef
+ %t23 = call { <16 x i64>, i32 } @llvm.vp.load.ff.v16i64.p0(ptr align 8 undef, <16 x i1> undef, i32 undef)
+ %t24 = call <vscale x 2 x i8> @llvm.vp.load.nxv2i8(ptr undef, <vscale x 2 x i1> undef, i32 undef)
+ %t25 = load <vscale x 2 x i8>, ptr undef
+ %t26 = call { <vscale x 2 x i8>, i32 } @llvm.vp.load.ff.nxv2i8.p0(ptr align 1 undef, <vscale x 2 x i1> undef, i32 undef)
+ %t27 = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8(ptr undef, <vscale x 4 x i1> undef, i32 undef)
+ %t28 = load <vscale x 4 x i8>, ptr undef
+ %t29 = call { <vscale x 4 x i8>, i32 } @llvm.vp.load.ff.nxv4i8.p0(ptr align 1 undef, <vscale x 4 x i1> undef, i32 undef)
+ %t30 = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8(ptr undef, <vscale x 8 x i1> undef, i32 undef)
+ %t31 = load <vscale x 8 x i8>, ptr undef
+ %t32 = call { <vscale x 8 x i8>, i32 } @llvm.vp.load.ff.nxv8i8.p0(ptr align 1 undef, <vscale x 8 x i1> undef, i32 undef)
+ %t33 = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8(ptr undef, <vscale x 16 x i1> undef, i32 undef)
+ %t34 = load <vscale x 16 x i8>, ptr undef
+ %t35 = call { <vscale x 16 x i8>, i32 } @llvm.vp.load.ff.nxv16i8.p0(ptr align 1 undef, <vscale x 16 x i1> undef, i32 undef)
+ %t36 = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64(ptr undef, <vscale x 2 x i1> undef, i32 undef)
+ %t37 = load <vscale x 2 x i64>, ptr undef
+ %t38 = call { <vscale x 2 x i64>, i32 } @llvm.vp.load.ff.nxv2i64.p0(ptr align 8 undef, <vscale x 2 x i1> undef, i32 undef)
+ %t39 = call <vscale x 4 x i64> @llvm.vp.load.nxv4i64(ptr undef, <vscale x 4 x i1> undef, i32 undef)
+ %t40 = load <vscale x 4 x i64>, ptr undef
+ %t41 = call { <vscale x 4 x i64>, i32 } @llvm.vp.load.ff.nxv4i64.p0(ptr align 8 undef, <vscale x 4 x i1> undef, i32 undef)
+ %t42 = call <vscale x 8 x i64> @llvm.vp.load.nxv8i64(ptr undef, <vscale x 8 x i1> undef, i32 undef)
+ %t43 = load <vscale x 8 x i64>, ptr undef
+ %t44 = call { <vscale x 8 x i64>, i32 } @llvm.vp.load.ff.nxv8i64.p0(ptr align 8 undef, <vscale x 8 x i1> undef, i32 undef)
+ %t45 = call <vscale x 16 x i64> @llvm.vp.load.nxv16i64(ptr undef, <vscale x 16 x i1> undef, i32 undef)
+ %t46 = load <vscale x 16 x i64>, ptr undef
+ %t47 = call { <vscale x 16 x i64>, i32 } @llvm.vp.load.ff.nxv16i64.p0(ptr align 8 undef, <vscale x 16 x i1> undef, i32 undef)
ret void
}
>From 7ec608460293d4ab89665a7218fada9dd2ab840c Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Wed, 24 Sep 2025 02:33:35 -0700
Subject: [PATCH 2/7] Replace undef with live-in ptr
---
.../Analysis/CostModel/RISCV/vp-intrinsics.ll | 194 +++++++++---------
1 file changed, 97 insertions(+), 97 deletions(-)
diff --git a/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll b/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll
index a361b9363f8f1..bc3533e9c51c5 100644
--- a/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll
@@ -836,106 +836,106 @@ define void @abs() {
ret void
}
-define void @load() {
+define void @load(ptr %src) {
; CHECK-LABEL: 'load'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t0 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t1 = load <2 x i8>, ptr undef, align 2
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t2 = call { <2 x i8>, i32 } @llvm.vp.load.ff.v2i8.p0(ptr align 1 undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t3 = call <4 x i8> @llvm.vp.load.v4i8.p0(ptr undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t4 = load <4 x i8>, ptr undef, align 4
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t5 = call { <4 x i8>, i32 } @llvm.vp.load.ff.v4i8.p0(ptr align 1 undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t6 = call <8 x i8> @llvm.vp.load.v8i8.p0(ptr undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t7 = load <8 x i8>, ptr undef, align 8
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t8 = call { <8 x i8>, i32 } @llvm.vp.load.ff.v8i8.p0(ptr align 1 undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t9 = call <16 x i8> @llvm.vp.load.v16i8.p0(ptr undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t10 = load <16 x i8>, ptr undef, align 16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t11 = call { <16 x i8>, i32 } @llvm.vp.load.ff.v16i8.p0(ptr align 1 undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t12 = call <2 x i64> @llvm.vp.load.v2i64.p0(ptr undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t13 = load <2 x i64>, ptr undef, align 16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t14 = call { <2 x i64>, i32 } @llvm.vp.load.ff.v2i64.p0(ptr align 8 undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t15 = call <4 x i64> @llvm.vp.load.v4i64.p0(ptr undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t16 = load <4 x i64>, ptr undef, align 32
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t17 = call { <4 x i64>, i32 } @llvm.vp.load.ff.v4i64.p0(ptr align 8 undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t18 = call <8 x i64> @llvm.vp.load.v8i64.p0(ptr undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t19 = load <8 x i64>, ptr undef, align 64
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t20 = call { <8 x i64>, i32 } @llvm.vp.load.ff.v8i64.p0(ptr align 8 undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t21 = call <16 x i64> @llvm.vp.load.v16i64.p0(ptr undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t22 = load <16 x i64>, ptr undef, align 128
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t23 = call { <16 x i64>, i32 } @llvm.vp.load.ff.v16i64.p0(ptr align 8 undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t24 = call <vscale x 2 x i8> @llvm.vp.load.nxv2i8.p0(ptr undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t25 = load <vscale x 2 x i8>, ptr undef, align 2
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t26 = call { <vscale x 2 x i8>, i32 } @llvm.vp.load.ff.nxv2i8.p0(ptr align 1 undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t27 = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8.p0(ptr undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t28 = load <vscale x 4 x i8>, ptr undef, align 4
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t29 = call { <vscale x 4 x i8>, i32 } @llvm.vp.load.ff.nxv4i8.p0(ptr align 1 undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t30 = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8.p0(ptr undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t31 = load <vscale x 8 x i8>, ptr undef, align 8
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t32 = call { <vscale x 8 x i8>, i32 } @llvm.vp.load.ff.nxv8i8.p0(ptr align 1 undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t33 = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t34 = load <vscale x 16 x i8>, ptr undef, align 16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t35 = call { <vscale x 16 x i8>, i32 } @llvm.vp.load.ff.nxv16i8.p0(ptr align 1 undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t36 = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t37 = load <vscale x 2 x i64>, ptr undef, align 16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t38 = call { <vscale x 2 x i64>, i32 } @llvm.vp.load.ff.nxv2i64.p0(ptr align 8 undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t39 = call <vscale x 4 x i64> @llvm.vp.load.nxv4i64.p0(ptr undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t40 = load <vscale x 4 x i64>, ptr undef, align 32
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t41 = call { <vscale x 4 x i64>, i32 } @llvm.vp.load.ff.nxv4i64.p0(ptr align 8 undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t42 = call <vscale x 8 x i64> @llvm.vp.load.nxv8i64.p0(ptr undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t43 = load <vscale x 8 x i64>, ptr undef, align 64
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t44 = call { <vscale x 8 x i64>, i32 } @llvm.vp.load.ff.nxv8i64.p0(ptr align 8 undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t45 = call <vscale x 16 x i64> @llvm.vp.load.nxv16i64.p0(ptr undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t46 = load <vscale x 16 x i64>, ptr undef, align 128
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t47 = call { <vscale x 16 x i64>, i32 } @llvm.vp.load.ff.nxv16i64.p0(ptr align 8 undef, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t0 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr %src, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t1 = load <2 x i8>, ptr %src, align 2
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t2 = call { <2 x i8>, i32 } @llvm.vp.load.ff.v2i8.p0(ptr align 1 %src, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t3 = call <4 x i8> @llvm.vp.load.v4i8.p0(ptr %src, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t4 = load <4 x i8>, ptr %src, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t5 = call { <4 x i8>, i32 } @llvm.vp.load.ff.v4i8.p0(ptr align 1 %src, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t6 = call <8 x i8> @llvm.vp.load.v8i8.p0(ptr %src, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t7 = load <8 x i8>, ptr %src, align 8
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t8 = call { <8 x i8>, i32 } @llvm.vp.load.ff.v8i8.p0(ptr align 1 %src, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t9 = call <16 x i8> @llvm.vp.load.v16i8.p0(ptr %src, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t10 = load <16 x i8>, ptr %src, align 16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t11 = call { <16 x i8>, i32 } @llvm.vp.load.ff.v16i8.p0(ptr align 1 %src, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t12 = call <2 x i64> @llvm.vp.load.v2i64.p0(ptr %src, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t13 = load <2 x i64>, ptr %src, align 16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t14 = call { <2 x i64>, i32 } @llvm.vp.load.ff.v2i64.p0(ptr align 8 %src, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t15 = call <4 x i64> @llvm.vp.load.v4i64.p0(ptr %src, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t16 = load <4 x i64>, ptr %src, align 32
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t17 = call { <4 x i64>, i32 } @llvm.vp.load.ff.v4i64.p0(ptr align 8 %src, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t18 = call <8 x i64> @llvm.vp.load.v8i64.p0(ptr %src, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t19 = load <8 x i64>, ptr %src, align 64
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t20 = call { <8 x i64>, i32 } @llvm.vp.load.ff.v8i64.p0(ptr align 8 %src, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t21 = call <16 x i64> @llvm.vp.load.v16i64.p0(ptr %src, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t22 = load <16 x i64>, ptr %src, align 128
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t23 = call { <16 x i64>, i32 } @llvm.vp.load.ff.v16i64.p0(ptr align 8 %src, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t24 = call <vscale x 2 x i8> @llvm.vp.load.nxv2i8.p0(ptr %src, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t25 = load <vscale x 2 x i8>, ptr %src, align 2
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t26 = call { <vscale x 2 x i8>, i32 } @llvm.vp.load.ff.nxv2i8.p0(ptr align 1 %src, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t27 = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8.p0(ptr %src, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t28 = load <vscale x 4 x i8>, ptr %src, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t29 = call { <vscale x 4 x i8>, i32 } @llvm.vp.load.ff.nxv4i8.p0(ptr align 1 %src, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t30 = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8.p0(ptr %src, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t31 = load <vscale x 8 x i8>, ptr %src, align 8
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t32 = call { <vscale x 8 x i8>, i32 } @llvm.vp.load.ff.nxv8i8.p0(ptr align 1 %src, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t33 = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr %src, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t34 = load <vscale x 16 x i8>, ptr %src, align 16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t35 = call { <vscale x 16 x i8>, i32 } @llvm.vp.load.ff.nxv16i8.p0(ptr align 1 %src, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t36 = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr %src, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t37 = load <vscale x 2 x i64>, ptr %src, align 16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t38 = call { <vscale x 2 x i64>, i32 } @llvm.vp.load.ff.nxv2i64.p0(ptr align 8 %src, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t39 = call <vscale x 4 x i64> @llvm.vp.load.nxv4i64.p0(ptr %src, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t40 = load <vscale x 4 x i64>, ptr %src, align 32
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t41 = call { <vscale x 4 x i64>, i32 } @llvm.vp.load.ff.nxv4i64.p0(ptr align 8 %src, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t42 = call <vscale x 8 x i64> @llvm.vp.load.nxv8i64.p0(ptr %src, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t43 = load <vscale x 8 x i64>, ptr %src, align 64
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t44 = call { <vscale x 8 x i64>, i32 } @llvm.vp.load.ff.nxv8i64.p0(ptr align 8 %src, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t45 = call <vscale x 16 x i64> @llvm.vp.load.nxv16i64.p0(ptr %src, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t46 = load <vscale x 16 x i64>, ptr %src, align 128
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t47 = call { <vscale x 16 x i64>, i32 } @llvm.vp.load.ff.nxv16i64.p0(ptr align 8 %src, <vscale x 16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %t0 = call <2 x i8> @llvm.vp.load.v2i8(ptr undef, <2 x i1> undef, i32 undef)
- %t1 = load <2 x i8>, ptr undef
- %t2 = call { <2 x i8>, i32 } @llvm.vp.load.ff.v2i8.p0(ptr align 1 undef, <2 x i1> undef, i32 undef)
- %t3 = call <4 x i8> @llvm.vp.load.v4i8(ptr undef, <4 x i1> undef, i32 undef)
- %t4 = load <4 x i8>, ptr undef
- %t5 = call { <4 x i8>, i32 } @llvm.vp.load.ff.v4i8.p0(ptr align 1 undef, <4 x i1> undef, i32 undef)
- %t6 = call <8 x i8> @llvm.vp.load.v8i8(ptr undef, <8 x i1> undef, i32 undef)
- %t7 = load <8 x i8>, ptr undef
- %t8 = call { <8 x i8>, i32 } @llvm.vp.load.ff.v8i8.p0(ptr align 1 undef, <8 x i1> undef, i32 undef)
- %t9 = call <16 x i8> @llvm.vp.load.v16i8(ptr undef, <16 x i1> undef, i32 undef)
- %t10 = load <16 x i8>, ptr undef
- %t11 = call { <16 x i8>, i32 } @llvm.vp.load.ff.v16i8.p0(ptr align 1 undef, <16 x i1> undef, i32 undef)
- %t12 = call <2 x i64> @llvm.vp.load.v2i64(ptr undef, <2 x i1> undef, i32 undef)
- %t13 = load <2 x i64>, ptr undef
- %t14 = call { <2 x i64>, i32 } @llvm.vp.load.ff.v2i64.p0(ptr align 8 undef, <2 x i1> undef, i32 undef)
- %t15 = call <4 x i64> @llvm.vp.load.v4i64(ptr undef, <4 x i1> undef, i32 undef)
- %t16 = load <4 x i64>, ptr undef
- %t17 = call { <4 x i64>, i32 } @llvm.vp.load.ff.v4i64.p0(ptr align 8 undef, <4 x i1> undef, i32 undef)
- %t18 = call <8 x i64> @llvm.vp.load.v8i64(ptr undef, <8 x i1> undef, i32 undef)
- %t19 = load <8 x i64>, ptr undef
- %t20 = call { <8 x i64>, i32 } @llvm.vp.load.ff.v8i64.p0(ptr align 8 undef, <8 x i1> undef, i32 undef)
- %t21 = call <16 x i64> @llvm.vp.load.v16i64(ptr undef, <16 x i1> undef, i32 undef)
- %t22 = load <16 x i64>, ptr undef
- %t23 = call { <16 x i64>, i32 } @llvm.vp.load.ff.v16i64.p0(ptr align 8 undef, <16 x i1> undef, i32 undef)
- %t24 = call <vscale x 2 x i8> @llvm.vp.load.nxv2i8(ptr undef, <vscale x 2 x i1> undef, i32 undef)
- %t25 = load <vscale x 2 x i8>, ptr undef
- %t26 = call { <vscale x 2 x i8>, i32 } @llvm.vp.load.ff.nxv2i8.p0(ptr align 1 undef, <vscale x 2 x i1> undef, i32 undef)
- %t27 = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8(ptr undef, <vscale x 4 x i1> undef, i32 undef)
- %t28 = load <vscale x 4 x i8>, ptr undef
- %t29 = call { <vscale x 4 x i8>, i32 } @llvm.vp.load.ff.nxv4i8.p0(ptr align 1 undef, <vscale x 4 x i1> undef, i32 undef)
- %t30 = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8(ptr undef, <vscale x 8 x i1> undef, i32 undef)
- %t31 = load <vscale x 8 x i8>, ptr undef
- %t32 = call { <vscale x 8 x i8>, i32 } @llvm.vp.load.ff.nxv8i8.p0(ptr align 1 undef, <vscale x 8 x i1> undef, i32 undef)
- %t33 = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8(ptr undef, <vscale x 16 x i1> undef, i32 undef)
- %t34 = load <vscale x 16 x i8>, ptr undef
- %t35 = call { <vscale x 16 x i8>, i32 } @llvm.vp.load.ff.nxv16i8.p0(ptr align 1 undef, <vscale x 16 x i1> undef, i32 undef)
- %t36 = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64(ptr undef, <vscale x 2 x i1> undef, i32 undef)
- %t37 = load <vscale x 2 x i64>, ptr undef
- %t38 = call { <vscale x 2 x i64>, i32 } @llvm.vp.load.ff.nxv2i64.p0(ptr align 8 undef, <vscale x 2 x i1> undef, i32 undef)
- %t39 = call <vscale x 4 x i64> @llvm.vp.load.nxv4i64(ptr undef, <vscale x 4 x i1> undef, i32 undef)
- %t40 = load <vscale x 4 x i64>, ptr undef
- %t41 = call { <vscale x 4 x i64>, i32 } @llvm.vp.load.ff.nxv4i64.p0(ptr align 8 undef, <vscale x 4 x i1> undef, i32 undef)
- %t42 = call <vscale x 8 x i64> @llvm.vp.load.nxv8i64(ptr undef, <vscale x 8 x i1> undef, i32 undef)
- %t43 = load <vscale x 8 x i64>, ptr undef
- %t44 = call { <vscale x 8 x i64>, i32 } @llvm.vp.load.ff.nxv8i64.p0(ptr align 8 undef, <vscale x 8 x i1> undef, i32 undef)
- %t45 = call <vscale x 16 x i64> @llvm.vp.load.nxv16i64(ptr undef, <vscale x 16 x i1> undef, i32 undef)
- %t46 = load <vscale x 16 x i64>, ptr undef
- %t47 = call { <vscale x 16 x i64>, i32 } @llvm.vp.load.ff.nxv16i64.p0(ptr align 8 undef, <vscale x 16 x i1> undef, i32 undef)
+ %t0 = call <2 x i8> @llvm.vp.load.v2i8(ptr %src, <2 x i1> undef, i32 undef)
+ %t1 = load <2 x i8>, ptr %src
+ %t2 = call { <2 x i8>, i32 } @llvm.vp.load.ff.v2i8.p0(ptr align 1 %src, <2 x i1> undef, i32 undef)
+ %t3 = call <4 x i8> @llvm.vp.load.v4i8(ptr %src, <4 x i1> undef, i32 undef)
+ %t4 = load <4 x i8>, ptr %src
+ %t5 = call { <4 x i8>, i32 } @llvm.vp.load.ff.v4i8.p0(ptr align 1 %src, <4 x i1> undef, i32 undef)
+ %t6 = call <8 x i8> @llvm.vp.load.v8i8(ptr %src, <8 x i1> undef, i32 undef)
+ %t7 = load <8 x i8>, ptr %src
+ %t8 = call { <8 x i8>, i32 } @llvm.vp.load.ff.v8i8.p0(ptr align 1 %src, <8 x i1> undef, i32 undef)
+ %t9 = call <16 x i8> @llvm.vp.load.v16i8(ptr %src, <16 x i1> undef, i32 undef)
+ %t10 = load <16 x i8>, ptr %src
+ %t11 = call { <16 x i8>, i32 } @llvm.vp.load.ff.v16i8.p0(ptr align 1 %src, <16 x i1> undef, i32 undef)
+ %t12 = call <2 x i64> @llvm.vp.load.v2i64(ptr %src, <2 x i1> undef, i32 undef)
+ %t13 = load <2 x i64>, ptr %src
+ %t14 = call { <2 x i64>, i32 } @llvm.vp.load.ff.v2i64.p0(ptr align 8 %src, <2 x i1> undef, i32 undef)
+ %t15 = call <4 x i64> @llvm.vp.load.v4i64(ptr %src, <4 x i1> undef, i32 undef)
+ %t16 = load <4 x i64>, ptr %src
+ %t17 = call { <4 x i64>, i32 } @llvm.vp.load.ff.v4i64.p0(ptr align 8 %src, <4 x i1> undef, i32 undef)
+ %t18 = call <8 x i64> @llvm.vp.load.v8i64(ptr %src, <8 x i1> undef, i32 undef)
+ %t19 = load <8 x i64>, ptr %src
+ %t20 = call { <8 x i64>, i32 } @llvm.vp.load.ff.v8i64.p0(ptr align 8 %src, <8 x i1> undef, i32 undef)
+ %t21 = call <16 x i64> @llvm.vp.load.v16i64(ptr %src, <16 x i1> undef, i32 undef)
+ %t22 = load <16 x i64>, ptr %src
+ %t23 = call { <16 x i64>, i32 } @llvm.vp.load.ff.v16i64.p0(ptr align 8 %src, <16 x i1> undef, i32 undef)
+ %t24 = call <vscale x 2 x i8> @llvm.vp.load.nxv2i8(ptr %src, <vscale x 2 x i1> undef, i32 undef)
+ %t25 = load <vscale x 2 x i8>, ptr %src
+ %t26 = call { <vscale x 2 x i8>, i32 } @llvm.vp.load.ff.nxv2i8.p0(ptr align 1 %src, <vscale x 2 x i1> undef, i32 undef)
+ %t27 = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8(ptr %src, <vscale x 4 x i1> undef, i32 undef)
+ %t28 = load <vscale x 4 x i8>, ptr %src
+ %t29 = call { <vscale x 4 x i8>, i32 } @llvm.vp.load.ff.nxv4i8.p0(ptr align 1 %src, <vscale x 4 x i1> undef, i32 undef)
+ %t30 = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8(ptr %src, <vscale x 8 x i1> undef, i32 undef)
+ %t31 = load <vscale x 8 x i8>, ptr %src
+ %t32 = call { <vscale x 8 x i8>, i32 } @llvm.vp.load.ff.nxv8i8.p0(ptr align 1 %src, <vscale x 8 x i1> undef, i32 undef)
+ %t33 = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8(ptr %src, <vscale x 16 x i1> undef, i32 undef)
+ %t34 = load <vscale x 16 x i8>, ptr %src
+ %t35 = call { <vscale x 16 x i8>, i32 } @llvm.vp.load.ff.nxv16i8.p0(ptr align 1 %src, <vscale x 16 x i1> undef, i32 undef)
+ %t36 = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64(ptr %src, <vscale x 2 x i1> undef, i32 undef)
+ %t37 = load <vscale x 2 x i64>, ptr %src
+ %t38 = call { <vscale x 2 x i64>, i32 } @llvm.vp.load.ff.nxv2i64.p0(ptr align 8 %src, <vscale x 2 x i1> undef, i32 undef)
+ %t39 = call <vscale x 4 x i64> @llvm.vp.load.nxv4i64(ptr %src, <vscale x 4 x i1> undef, i32 undef)
+ %t40 = load <vscale x 4 x i64>, ptr %src
+ %t41 = call { <vscale x 4 x i64>, i32 } @llvm.vp.load.ff.nxv4i64.p0(ptr align 8 %src, <vscale x 4 x i1> undef, i32 undef)
+ %t42 = call <vscale x 8 x i64> @llvm.vp.load.nxv8i64(ptr %src, <vscale x 8 x i1> undef, i32 undef)
+ %t43 = load <vscale x 8 x i64>, ptr %src
+ %t44 = call { <vscale x 8 x i64>, i32 } @llvm.vp.load.ff.nxv8i64.p0(ptr align 8 %src, <vscale x 8 x i1> undef, i32 undef)
+ %t45 = call <vscale x 16 x i64> @llvm.vp.load.nxv16i64(ptr %src, <vscale x 16 x i1> undef, i32 undef)
+ %t46 = load <vscale x 16 x i64>, ptr %src
+ %t47 = call { <vscale x 16 x i64>, i32 } @llvm.vp.load.ff.nxv16i64.p0(ptr align 8 %src, <vscale x 16 x i1> undef, i32 undef)
ret void
}
>From 3cdef8fcc129ddd5d083ca35d539a17b31a74485 Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Wed, 24 Sep 2025 17:52:22 -0700
Subject: [PATCH 3/7] Remove uneeded assertion
---
llvm/include/llvm/CodeGen/BasicTTIImpl.h | 1 -
1 file changed, 1 deletion(-)
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index fd19ae0b88333..17a7d91cf10e6 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1797,7 +1797,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
if (ICA.getID() == Intrinsic::vp_load_ff) {
Type *RetTy = ICA.getReturnType();
- assert(RetTy->isStructTy() && "expected struct return");
Type *DataTy = cast<StructType>(RetTy)->getElementType(0);
Align Alignment;
if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst()))
>From b1cc6fbb098ba1c8e856c1384e48184a3f6f8a87 Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Wed, 24 Sep 2025 17:55:20 -0700
Subject: [PATCH 4/7] Rename getFaultOnlyFirstLoadCost getFaultFirstLoadCost
---
llvm/include/llvm/Analysis/TargetTransformInfo.h | 2 +-
llvm/include/llvm/Analysis/TargetTransformInfoImpl.h | 4 ++--
llvm/include/llvm/CodeGen/BasicTTIImpl.h | 2 +-
llvm/lib/Analysis/TargetTransformInfo.cpp | 7 ++++---
llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 6 +++---
llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h | 4 ++--
6 files changed, 13 insertions(+), 12 deletions(-)
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index cb5674917b830..d8a7bb14348c6 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1617,7 +1617,7 @@ class TargetTransformInfo {
bool UseMaskForCond = false, bool UseMaskForGaps = false) const;
/// \return The cost of vp intrinsic vp.load.ff.
- LLVM_ABI InstructionCost getFaultOnlyFirstLoadCost(
+ LLVM_ABI InstructionCost getFaultFirstLoadCost(
Type *DataTy, Align Alignment,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index c762ea1dafa78..b88470e44fbc6 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -886,8 +886,8 @@ class TargetTransformInfoImplBase {
}
virtual InstructionCost
- getFaultOnlyFirstLoadCost(Type *DataTy, Align Alignment,
- TTI::TargetCostKind CostKind) const {
+ getFaultFirstLoadCost(Type *DataTy, Align Alignment,
+ TTI::TargetCostKind CostKind) const {
return InstructionCost::getInvalid();
}
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 17a7d91cf10e6..5e13206b5a79d 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1801,7 +1801,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
Align Alignment;
if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst()))
Alignment = VPI->getPointerAlignment().valueOrOne();
- return thisT()->getFaultOnlyFirstLoadCost(DataTy, Alignment, CostKind);
+ return thisT()->getFaultFirstLoadCost(DataTy, Alignment, CostKind);
}
if (ICA.getID() == Intrinsic::vp_scatter) {
if (ICA.isTypeBasedOnly()) {
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 5a0a861a04a63..4fbacc61db0b2 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1217,10 +1217,11 @@ InstructionCost TargetTransformInfo::getInterleavedMemoryOpCost(
return Cost;
}
-InstructionCost TargetTransformInfo::getFaultOnlyFirstLoadCost(
- Type *DataTy, Align Alignment, TTI::TargetCostKind CostKind) const {
+InstructionCost
+TargetTransformInfo::getFaultFirstLoadCost(Type *DataTy, Align Alignment,
+ TTI::TargetCostKind CostKind) const {
InstructionCost Cost =
- TTIImpl->getFaultOnlyFirstLoadCost(DataTy, Alignment, CostKind);
+ TTIImpl->getFaultFirstLoadCost(DataTy, Alignment, CostKind);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index f452040fd9563..34fb82cb1bda9 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1070,11 +1070,11 @@ InstructionCost RISCVTTIImpl::getInterleavedMemoryOpCost(
}
InstructionCost
-RISCVTTIImpl::getFaultOnlyFirstLoadCost(Type *DataTy, Align Alignment,
- TTI::TargetCostKind CostKind) const {
+RISCVTTIImpl::getFaultFirstLoadCost(Type *DataTy, Align Alignment,
+ TTI::TargetCostKind CostKind) const {
EVT DataTypeVT = TLI->getValueType(DL, DataTy);
if (!TLI->isLegalFaultOnlyFirstLoad(DataTypeVT, Alignment))
- return BaseT::getFaultOnlyFirstLoadCost(DataTy, Alignment, CostKind);
+ return BaseT::getFaultFirstLoadCost(DataTy, Alignment, CostKind);
return getMemoryOpCost(Instruction::Load, DataTy, Alignment, 0, CostKind,
{TTI::OK_AnyValue, TTI::OP_None}, nullptr);
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 4059461aaf585..31b93dff8efb6 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -189,8 +189,8 @@ class RISCVTTIImpl final : public BasicTTIImplBase<RISCVTTIImpl> {
bool UseMaskForCond = false, bool UseMaskForGaps = false) const override;
InstructionCost
- getFaultOnlyFirstLoadCost(Type *DataTy, Align Alignment,
- TTI::TargetCostKind CostKind) const override;
+ getFaultFirstLoadCost(Type *DataTy, Align Alignment,
+ TTI::TargetCostKind CostKind) const override;
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
const Value *Ptr, bool VariableMask,
>From 4d0a568eac6a7fdf51a0a33cde8eeeeb722270d7 Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Wed, 24 Sep 2025 18:10:09 -0700
Subject: [PATCH 5/7] remove tests for regular loads
---
.../Analysis/CostModel/RISCV/vp-intrinsics.ll | 156 +++++++-----------
1 file changed, 62 insertions(+), 94 deletions(-)
diff --git a/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll b/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll
index bc3533e9c51c5..ba792d8f0955b 100644
--- a/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll
@@ -839,103 +839,71 @@ define void @abs() {
define void @load(ptr %src) {
; CHECK-LABEL: 'load'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t0 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr %src, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t1 = load <2 x i8>, ptr %src, align 2
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t2 = call { <2 x i8>, i32 } @llvm.vp.load.ff.v2i8.p0(ptr align 1 %src, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t3 = call <4 x i8> @llvm.vp.load.v4i8.p0(ptr %src, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t4 = load <4 x i8>, ptr %src, align 4
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t5 = call { <4 x i8>, i32 } @llvm.vp.load.ff.v4i8.p0(ptr align 1 %src, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t6 = call <8 x i8> @llvm.vp.load.v8i8.p0(ptr %src, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t7 = load <8 x i8>, ptr %src, align 8
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t8 = call { <8 x i8>, i32 } @llvm.vp.load.ff.v8i8.p0(ptr align 1 %src, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t9 = call <16 x i8> @llvm.vp.load.v16i8.p0(ptr %src, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t10 = load <16 x i8>, ptr %src, align 16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t11 = call { <16 x i8>, i32 } @llvm.vp.load.ff.v16i8.p0(ptr align 1 %src, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t12 = call <2 x i64> @llvm.vp.load.v2i64.p0(ptr %src, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t13 = load <2 x i64>, ptr %src, align 16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t14 = call { <2 x i64>, i32 } @llvm.vp.load.ff.v2i64.p0(ptr align 8 %src, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t15 = call <4 x i64> @llvm.vp.load.v4i64.p0(ptr %src, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t16 = load <4 x i64>, ptr %src, align 32
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t17 = call { <4 x i64>, i32 } @llvm.vp.load.ff.v4i64.p0(ptr align 8 %src, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t18 = call <8 x i64> @llvm.vp.load.v8i64.p0(ptr %src, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t19 = load <8 x i64>, ptr %src, align 64
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t20 = call { <8 x i64>, i32 } @llvm.vp.load.ff.v8i64.p0(ptr align 8 %src, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t21 = call <16 x i64> @llvm.vp.load.v16i64.p0(ptr %src, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t22 = load <16 x i64>, ptr %src, align 128
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t23 = call { <16 x i64>, i32 } @llvm.vp.load.ff.v16i64.p0(ptr align 8 %src, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t24 = call <vscale x 2 x i8> @llvm.vp.load.nxv2i8.p0(ptr %src, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t25 = load <vscale x 2 x i8>, ptr %src, align 2
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t26 = call { <vscale x 2 x i8>, i32 } @llvm.vp.load.ff.nxv2i8.p0(ptr align 1 %src, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t27 = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8.p0(ptr %src, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t28 = load <vscale x 4 x i8>, ptr %src, align 4
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t29 = call { <vscale x 4 x i8>, i32 } @llvm.vp.load.ff.nxv4i8.p0(ptr align 1 %src, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t30 = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8.p0(ptr %src, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t31 = load <vscale x 8 x i8>, ptr %src, align 8
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t32 = call { <vscale x 8 x i8>, i32 } @llvm.vp.load.ff.nxv8i8.p0(ptr align 1 %src, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t33 = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr %src, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t34 = load <vscale x 16 x i8>, ptr %src, align 16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t35 = call { <vscale x 16 x i8>, i32 } @llvm.vp.load.ff.nxv16i8.p0(ptr align 1 %src, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t36 = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr %src, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t37 = load <vscale x 2 x i64>, ptr %src, align 16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t38 = call { <vscale x 2 x i64>, i32 } @llvm.vp.load.ff.nxv2i64.p0(ptr align 8 %src, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t39 = call <vscale x 4 x i64> @llvm.vp.load.nxv4i64.p0(ptr %src, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t40 = load <vscale x 4 x i64>, ptr %src, align 32
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t41 = call { <vscale x 4 x i64>, i32 } @llvm.vp.load.ff.nxv4i64.p0(ptr align 8 %src, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t42 = call <vscale x 8 x i64> @llvm.vp.load.nxv8i64.p0(ptr %src, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t43 = load <vscale x 8 x i64>, ptr %src, align 64
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t44 = call { <vscale x 8 x i64>, i32 } @llvm.vp.load.ff.nxv8i64.p0(ptr align 8 %src, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t45 = call <vscale x 16 x i64> @llvm.vp.load.nxv16i64.p0(ptr %src, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t46 = load <vscale x 16 x i64>, ptr %src, align 128
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t47 = call { <vscale x 16 x i64>, i32 } @llvm.vp.load.ff.nxv16i64.p0(ptr align 8 %src, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t1 = call { <2 x i8>, i32 } @llvm.vp.load.ff.v2i8.p0(ptr align 1 %src, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t2 = call <4 x i8> @llvm.vp.load.v4i8.p0(ptr %src, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t3 = call { <4 x i8>, i32 } @llvm.vp.load.ff.v4i8.p0(ptr align 1 %src, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t4 = call <8 x i8> @llvm.vp.load.v8i8.p0(ptr %src, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t5 = call { <8 x i8>, i32 } @llvm.vp.load.ff.v8i8.p0(ptr align 1 %src, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t6 = call <16 x i8> @llvm.vp.load.v16i8.p0(ptr %src, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t7 = call { <16 x i8>, i32 } @llvm.vp.load.ff.v16i8.p0(ptr align 1 %src, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t8 = call <2 x i64> @llvm.vp.load.v2i64.p0(ptr %src, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t9 = call { <2 x i64>, i32 } @llvm.vp.load.ff.v2i64.p0(ptr align 8 %src, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t10 = call <4 x i64> @llvm.vp.load.v4i64.p0(ptr %src, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t11 = call { <4 x i64>, i32 } @llvm.vp.load.ff.v4i64.p0(ptr align 8 %src, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t12 = call <8 x i64> @llvm.vp.load.v8i64.p0(ptr %src, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t13 = call { <8 x i64>, i32 } @llvm.vp.load.ff.v8i64.p0(ptr align 8 %src, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t14 = call <16 x i64> @llvm.vp.load.v16i64.p0(ptr %src, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t15 = call { <16 x i64>, i32 } @llvm.vp.load.ff.v16i64.p0(ptr align 8 %src, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t16 = call <vscale x 2 x i8> @llvm.vp.load.nxv2i8.p0(ptr %src, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t17 = call { <vscale x 2 x i8>, i32 } @llvm.vp.load.ff.nxv2i8.p0(ptr align 1 %src, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t18 = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8.p0(ptr %src, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t19 = call { <vscale x 4 x i8>, i32 } @llvm.vp.load.ff.nxv4i8.p0(ptr align 1 %src, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t20 = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8.p0(ptr %src, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t21 = call { <vscale x 8 x i8>, i32 } @llvm.vp.load.ff.nxv8i8.p0(ptr align 1 %src, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t22 = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr %src, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t23 = call { <vscale x 16 x i8>, i32 } @llvm.vp.load.ff.nxv16i8.p0(ptr align 1 %src, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t24 = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr %src, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t25 = call { <vscale x 2 x i64>, i32 } @llvm.vp.load.ff.nxv2i64.p0(ptr align 8 %src, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t26 = call <vscale x 4 x i64> @llvm.vp.load.nxv4i64.p0(ptr %src, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t27 = call { <vscale x 4 x i64>, i32 } @llvm.vp.load.ff.nxv4i64.p0(ptr align 8 %src, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t28 = call <vscale x 8 x i64> @llvm.vp.load.nxv8i64.p0(ptr %src, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t29 = call { <vscale x 8 x i64>, i32 } @llvm.vp.load.ff.nxv8i64.p0(ptr align 8 %src, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t30 = call <vscale x 16 x i64> @llvm.vp.load.nxv16i64.p0(ptr %src, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t31 = call { <vscale x 16 x i64>, i32 } @llvm.vp.load.ff.nxv16i64.p0(ptr align 8 %src, <vscale x 16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%t0 = call <2 x i8> @llvm.vp.load.v2i8(ptr %src, <2 x i1> undef, i32 undef)
- %t1 = load <2 x i8>, ptr %src
- %t2 = call { <2 x i8>, i32 } @llvm.vp.load.ff.v2i8.p0(ptr align 1 %src, <2 x i1> undef, i32 undef)
- %t3 = call <4 x i8> @llvm.vp.load.v4i8(ptr %src, <4 x i1> undef, i32 undef)
- %t4 = load <4 x i8>, ptr %src
- %t5 = call { <4 x i8>, i32 } @llvm.vp.load.ff.v4i8.p0(ptr align 1 %src, <4 x i1> undef, i32 undef)
- %t6 = call <8 x i8> @llvm.vp.load.v8i8(ptr %src, <8 x i1> undef, i32 undef)
- %t7 = load <8 x i8>, ptr %src
- %t8 = call { <8 x i8>, i32 } @llvm.vp.load.ff.v8i8.p0(ptr align 1 %src, <8 x i1> undef, i32 undef)
- %t9 = call <16 x i8> @llvm.vp.load.v16i8(ptr %src, <16 x i1> undef, i32 undef)
- %t10 = load <16 x i8>, ptr %src
- %t11 = call { <16 x i8>, i32 } @llvm.vp.load.ff.v16i8.p0(ptr align 1 %src, <16 x i1> undef, i32 undef)
- %t12 = call <2 x i64> @llvm.vp.load.v2i64(ptr %src, <2 x i1> undef, i32 undef)
- %t13 = load <2 x i64>, ptr %src
- %t14 = call { <2 x i64>, i32 } @llvm.vp.load.ff.v2i64.p0(ptr align 8 %src, <2 x i1> undef, i32 undef)
- %t15 = call <4 x i64> @llvm.vp.load.v4i64(ptr %src, <4 x i1> undef, i32 undef)
- %t16 = load <4 x i64>, ptr %src
- %t17 = call { <4 x i64>, i32 } @llvm.vp.load.ff.v4i64.p0(ptr align 8 %src, <4 x i1> undef, i32 undef)
- %t18 = call <8 x i64> @llvm.vp.load.v8i64(ptr %src, <8 x i1> undef, i32 undef)
- %t19 = load <8 x i64>, ptr %src
- %t20 = call { <8 x i64>, i32 } @llvm.vp.load.ff.v8i64.p0(ptr align 8 %src, <8 x i1> undef, i32 undef)
- %t21 = call <16 x i64> @llvm.vp.load.v16i64(ptr %src, <16 x i1> undef, i32 undef)
- %t22 = load <16 x i64>, ptr %src
- %t23 = call { <16 x i64>, i32 } @llvm.vp.load.ff.v16i64.p0(ptr align 8 %src, <16 x i1> undef, i32 undef)
- %t24 = call <vscale x 2 x i8> @llvm.vp.load.nxv2i8(ptr %src, <vscale x 2 x i1> undef, i32 undef)
- %t25 = load <vscale x 2 x i8>, ptr %src
- %t26 = call { <vscale x 2 x i8>, i32 } @llvm.vp.load.ff.nxv2i8.p0(ptr align 1 %src, <vscale x 2 x i1> undef, i32 undef)
- %t27 = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8(ptr %src, <vscale x 4 x i1> undef, i32 undef)
- %t28 = load <vscale x 4 x i8>, ptr %src
- %t29 = call { <vscale x 4 x i8>, i32 } @llvm.vp.load.ff.nxv4i8.p0(ptr align 1 %src, <vscale x 4 x i1> undef, i32 undef)
- %t30 = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8(ptr %src, <vscale x 8 x i1> undef, i32 undef)
- %t31 = load <vscale x 8 x i8>, ptr %src
- %t32 = call { <vscale x 8 x i8>, i32 } @llvm.vp.load.ff.nxv8i8.p0(ptr align 1 %src, <vscale x 8 x i1> undef, i32 undef)
- %t33 = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8(ptr %src, <vscale x 16 x i1> undef, i32 undef)
- %t34 = load <vscale x 16 x i8>, ptr %src
- %t35 = call { <vscale x 16 x i8>, i32 } @llvm.vp.load.ff.nxv16i8.p0(ptr align 1 %src, <vscale x 16 x i1> undef, i32 undef)
- %t36 = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64(ptr %src, <vscale x 2 x i1> undef, i32 undef)
- %t37 = load <vscale x 2 x i64>, ptr %src
- %t38 = call { <vscale x 2 x i64>, i32 } @llvm.vp.load.ff.nxv2i64.p0(ptr align 8 %src, <vscale x 2 x i1> undef, i32 undef)
- %t39 = call <vscale x 4 x i64> @llvm.vp.load.nxv4i64(ptr %src, <vscale x 4 x i1> undef, i32 undef)
- %t40 = load <vscale x 4 x i64>, ptr %src
- %t41 = call { <vscale x 4 x i64>, i32 } @llvm.vp.load.ff.nxv4i64.p0(ptr align 8 %src, <vscale x 4 x i1> undef, i32 undef)
- %t42 = call <vscale x 8 x i64> @llvm.vp.load.nxv8i64(ptr %src, <vscale x 8 x i1> undef, i32 undef)
- %t43 = load <vscale x 8 x i64>, ptr %src
- %t44 = call { <vscale x 8 x i64>, i32 } @llvm.vp.load.ff.nxv8i64.p0(ptr align 8 %src, <vscale x 8 x i1> undef, i32 undef)
- %t45 = call <vscale x 16 x i64> @llvm.vp.load.nxv16i64(ptr %src, <vscale x 16 x i1> undef, i32 undef)
- %t46 = load <vscale x 16 x i64>, ptr %src
- %t47 = call { <vscale x 16 x i64>, i32 } @llvm.vp.load.ff.nxv16i64.p0(ptr align 8 %src, <vscale x 16 x i1> undef, i32 undef)
+ %t1 = call { <2 x i8>, i32 } @llvm.vp.load.ff.v2i8.p0(ptr align 1 %src, <2 x i1> undef, i32 undef)
+ %t2 = call <4 x i8> @llvm.vp.load.v4i8(ptr %src, <4 x i1> undef, i32 undef)
+ %t3 = call { <4 x i8>, i32 } @llvm.vp.load.ff.v4i8.p0(ptr align 1 %src, <4 x i1> undef, i32 undef)
+ %t4 = call <8 x i8> @llvm.vp.load.v8i8(ptr %src, <8 x i1> undef, i32 undef)
+ %t5 = call { <8 x i8>, i32 } @llvm.vp.load.ff.v8i8.p0(ptr align 1 %src, <8 x i1> undef, i32 undef)
+ %t6 = call <16 x i8> @llvm.vp.load.v16i8(ptr %src, <16 x i1> undef, i32 undef)
+ %t7 = call { <16 x i8>, i32 } @llvm.vp.load.ff.v16i8.p0(ptr align 1 %src, <16 x i1> undef, i32 undef)
+ %t8 = call <2 x i64> @llvm.vp.load.v2i64(ptr %src, <2 x i1> undef, i32 undef)
+ %t9 = call { <2 x i64>, i32 } @llvm.vp.load.ff.v2i64.p0(ptr align 8 %src, <2 x i1> undef, i32 undef)
+ %t10 = call <4 x i64> @llvm.vp.load.v4i64(ptr %src, <4 x i1> undef, i32 undef)
+ %t11 = call { <4 x i64>, i32 } @llvm.vp.load.ff.v4i64.p0(ptr align 8 %src, <4 x i1> undef, i32 undef)
+ %t12 = call <8 x i64> @llvm.vp.load.v8i64(ptr %src, <8 x i1> undef, i32 undef)
+ %t13 = call { <8 x i64>, i32 } @llvm.vp.load.ff.v8i64.p0(ptr align 8 %src, <8 x i1> undef, i32 undef)
+ %t14 = call <16 x i64> @llvm.vp.load.v16i64(ptr %src, <16 x i1> undef, i32 undef)
+ %t15 = call { <16 x i64>, i32 } @llvm.vp.load.ff.v16i64.p0(ptr align 8 %src, <16 x i1> undef, i32 undef)
+ %t16 = call <vscale x 2 x i8> @llvm.vp.load.nxv2i8(ptr %src, <vscale x 2 x i1> undef, i32 undef)
+ %t17 = call { <vscale x 2 x i8>, i32 } @llvm.vp.load.ff.nxv2i8.p0(ptr align 1 %src, <vscale x 2 x i1> undef, i32 undef)
+ %t18 = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8(ptr %src, <vscale x 4 x i1> undef, i32 undef)
+ %t19 = call { <vscale x 4 x i8>, i32 } @llvm.vp.load.ff.nxv4i8.p0(ptr align 1 %src, <vscale x 4 x i1> undef, i32 undef)
+ %t20 = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8(ptr %src, <vscale x 8 x i1> undef, i32 undef)
+ %t21 = call { <vscale x 8 x i8>, i32 } @llvm.vp.load.ff.nxv8i8.p0(ptr align 1 %src, <vscale x 8 x i1> undef, i32 undef)
+ %t22 = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8(ptr %src, <vscale x 16 x i1> undef, i32 undef)
+ %t23 = call { <vscale x 16 x i8>, i32 } @llvm.vp.load.ff.nxv16i8.p0(ptr align 1 %src, <vscale x 16 x i1> undef, i32 undef)
+ %t24 = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64(ptr %src, <vscale x 2 x i1> undef, i32 undef)
+ %t25 = call { <vscale x 2 x i64>, i32 } @llvm.vp.load.ff.nxv2i64.p0(ptr align 8 %src, <vscale x 2 x i1> undef, i32 undef)
+ %t26 = call <vscale x 4 x i64> @llvm.vp.load.nxv4i64(ptr %src, <vscale x 4 x i1> undef, i32 undef)
+ %t27 = call { <vscale x 4 x i64>, i32 } @llvm.vp.load.ff.nxv4i64.p0(ptr align 8 %src, <vscale x 4 x i1> undef, i32 undef)
+ %t28 = call <vscale x 8 x i64> @llvm.vp.load.nxv8i64(ptr %src, <vscale x 8 x i1> undef, i32 undef)
+ %t29 = call { <vscale x 8 x i64>, i32 } @llvm.vp.load.ff.nxv8i64.p0(ptr align 8 %src, <vscale x 8 x i1> undef, i32 undef)
+ %t30 = call <vscale x 16 x i64> @llvm.vp.load.nxv16i64(ptr %src, <vscale x 16 x i1> undef, i32 undef)
+ %t31 = call { <vscale x 16 x i64>, i32 } @llvm.vp.load.ff.nxv16i64.p0(ptr align 8 %src, <vscale x 16 x i1> undef, i32 undef)
ret void
}
>From 79e0c3d1d93eff7dea1cf02895fd0d16757964ed Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Wed, 24 Sep 2025 20:16:35 -0700
Subject: [PATCH 6/7] Rename isLegalFaultOnlyFirstLoad to isLegalFaultFirstLoad
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 4 ++--
llvm/lib/Target/RISCV/RISCVISelLowering.h | 2 +-
llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 2 +-
3 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 6c6340e65306b..0828e47a0a9c5 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -24788,8 +24788,8 @@ bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType,
return true;
}
-bool RISCVTargetLowering::isLegalFaultOnlyFirstLoad(EVT DataType,
- Align Alignment) const {
+bool RISCVTargetLowering::isLegalFaultFirstLoad(EVT DataType,
+ Align Alignment) const {
if (!Subtarget.hasVInstructions())
return false;
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 1ddc06602a25e..34f63df6795c6 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -427,7 +427,7 @@ class RISCVTargetLowering : public TargetLowering {
/// Return true if a fault-only-first load of the given result type and
/// alignment is legal.
- bool isLegalFaultOnlyFirstLoad(EVT DataType, Align Alignment) const;
+ bool isLegalFaultFirstLoad(EVT DataType, Align Alignment) const;
unsigned getMaxSupportedInterleaveFactor() const override { return 8; }
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 34fb82cb1bda9..325d7e64eab8d 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1073,7 +1073,7 @@ InstructionCost
RISCVTTIImpl::getFaultFirstLoadCost(Type *DataTy, Align Alignment,
TTI::TargetCostKind CostKind) const {
EVT DataTypeVT = TLI->getValueType(DL, DataTy);
- if (!TLI->isLegalFaultOnlyFirstLoad(DataTypeVT, Alignment))
+ if (!TLI->isLegalFaultFirstLoad(DataTypeVT, Alignment))
return BaseT::getFaultFirstLoadCost(DataTy, Alignment, CostKind);
return getMemoryOpCost(Instruction::Load, DataTy, Alignment, 0, CostKind,
>From 75b4f568154458eb8d2b1ef4fccaefa62f61d5ae Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Thu, 25 Sep 2025 00:43:00 -0700
Subject: [PATCH 7/7] Rename FaultFirstLoad to FirstFaultLoad
---
llvm/include/llvm/Analysis/TargetTransformInfo.h | 2 +-
llvm/include/llvm/Analysis/TargetTransformInfoImpl.h | 2 +-
llvm/include/llvm/CodeGen/BasicTTIImpl.h | 2 +-
llvm/lib/Analysis/TargetTransformInfo.cpp | 4 ++--
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 2 +-
llvm/lib/Target/RISCV/RISCVISelLowering.h | 2 +-
llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 6 +++---
llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h | 2 +-
8 files changed, 11 insertions(+), 11 deletions(-)
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index d8a7bb14348c6..5affb9310d7f6 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1617,7 +1617,7 @@ class TargetTransformInfo {
bool UseMaskForCond = false, bool UseMaskForGaps = false) const;
/// \return The cost of vp intrinsic vp.load.ff.
- LLVM_ABI InstructionCost getFaultFirstLoadCost(
+ LLVM_ABI InstructionCost getFirstFaultLoadCost(
Type *DataTy, Align Alignment,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index b88470e44fbc6..bb299becfdcba 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -886,7 +886,7 @@ class TargetTransformInfoImplBase {
}
virtual InstructionCost
- getFaultFirstLoadCost(Type *DataTy, Align Alignment,
+ getFirstFaultLoadCost(Type *DataTy, Align Alignment,
TTI::TargetCostKind CostKind) const {
return InstructionCost::getInvalid();
}
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 5e13206b5a79d..34a61b03c6e38 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1801,7 +1801,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
Align Alignment;
if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst()))
Alignment = VPI->getPointerAlignment().valueOrOne();
- return thisT()->getFaultFirstLoadCost(DataTy, Alignment, CostKind);
+ return thisT()->getFirstFaultLoadCost(DataTy, Alignment, CostKind);
}
if (ICA.getID() == Intrinsic::vp_scatter) {
if (ICA.isTypeBasedOnly()) {
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 4fbacc61db0b2..c356075a08642 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1218,10 +1218,10 @@ InstructionCost TargetTransformInfo::getInterleavedMemoryOpCost(
}
InstructionCost
-TargetTransformInfo::getFaultFirstLoadCost(Type *DataTy, Align Alignment,
+TargetTransformInfo::getFirstFaultLoadCost(Type *DataTy, Align Alignment,
TTI::TargetCostKind CostKind) const {
InstructionCost Cost =
- TTIImpl->getFaultFirstLoadCost(DataTy, Alignment, CostKind);
+ TTIImpl->getFirstFaultLoadCost(DataTy, Alignment, CostKind);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 0828e47a0a9c5..457b6009428f6 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -24788,7 +24788,7 @@ bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType,
return true;
}
-bool RISCVTargetLowering::isLegalFaultFirstLoad(EVT DataType,
+bool RISCVTargetLowering::isLegalFirstFaultLoad(EVT DataType,
Align Alignment) const {
if (!Subtarget.hasVInstructions())
return false;
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 34f63df6795c6..f9ed914ee84ff 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -427,7 +427,7 @@ class RISCVTargetLowering : public TargetLowering {
/// Return true if a fault-only-first load of the given result type and
/// alignment is legal.
- bool isLegalFaultFirstLoad(EVT DataType, Align Alignment) const;
+ bool isLegalFirstFaultLoad(EVT DataType, Align Alignment) const;
unsigned getMaxSupportedInterleaveFactor() const override { return 8; }
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 325d7e64eab8d..c1068ae133a15 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1070,11 +1070,11 @@ InstructionCost RISCVTTIImpl::getInterleavedMemoryOpCost(
}
InstructionCost
-RISCVTTIImpl::getFaultFirstLoadCost(Type *DataTy, Align Alignment,
+RISCVTTIImpl::getFirstFaultLoadCost(Type *DataTy, Align Alignment,
TTI::TargetCostKind CostKind) const {
EVT DataTypeVT = TLI->getValueType(DL, DataTy);
- if (!TLI->isLegalFaultFirstLoad(DataTypeVT, Alignment))
- return BaseT::getFaultFirstLoadCost(DataTy, Alignment, CostKind);
+ if (!TLI->isLegalFirstFaultLoad(DataTypeVT, Alignment))
+ return BaseT::getFirstFaultLoadCost(DataTy, Alignment, CostKind);
return getMemoryOpCost(Instruction::Load, DataTy, Alignment, 0, CostKind,
{TTI::OK_AnyValue, TTI::OP_None}, nullptr);
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 31b93dff8efb6..377249dfa2954 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -189,7 +189,7 @@ class RISCVTTIImpl final : public BasicTTIImplBase<RISCVTTIImpl> {
bool UseMaskForCond = false, bool UseMaskForGaps = false) const override;
InstructionCost
- getFaultFirstLoadCost(Type *DataTy, Align Alignment,
+ getFirstFaultLoadCost(Type *DataTy, Align Alignment,
TTI::TargetCostKind CostKind) const override;
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
More information about the llvm-commits
mailing list