[llvm] [TTI][RISCV] Add cost modelling for intrinsic vp.load.ff (PR #169890)
Shih-Po Hung via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 28 01:56:57 PST 2025
https://github.com/arcbbb created https://github.com/llvm/llvm-project/pull/169890
This patch is a rework of #160470 (which was reverted).
With getMemIntrinsicCost() now available, we can re‑land the change and reduce vp_load_ff boilerplate.
>From eef4161ce6d8f398b9d1b6a6b801b82766ecc693 Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Fri, 28 Nov 2025 01:40:26 -0800
Subject: [PATCH] [TTI][RISCV] Add cost modelling for intrinsic vp.load.ff
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This patch is a rework of #160470 (which was reverted).
With getMemIntrinsicCost() now available, we can re‑land the change and
reduce vp_load_ff boilerplate.
---
.../llvm/Analysis/TargetTransformInfo.h | 3 +-
llvm/include/llvm/CodeGen/BasicTTIImpl.h | 13 +-
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 16 +++
llvm/lib/Target/RISCV/RISCVISelLowering.h | 4 +
.../Target/RISCV/RISCVTargetTransformInfo.cpp | 18 +++
.../Target/RISCV/RISCVTargetTransformInfo.h | 4 +
.../Analysis/CostModel/RISCV/vp-intrinsics.ll | 130 +++++++++---------
7 files changed, 121 insertions(+), 67 deletions(-)
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index e24e22da5681b..99525607f744a 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -157,7 +157,8 @@ class MemIntrinsicCostAttributes {
Alignment(Alignment) {}
LLVM_ABI MemIntrinsicCostAttributes(Intrinsic::ID Id, Type *DataTy,
- Align Alignment, unsigned AddressSpace)
+ Align Alignment,
+ unsigned AddressSpace = 0)
: DataTy(DataTy), IID(Id), AddressSpace(AddressSpace),
Alignment(Alignment) {}
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index b1beb68feca46..4d9cfea5d1bab 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1817,7 +1817,16 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
}
}
}
-
+ if (ICA.getID() == Intrinsic::vp_load_ff) {
+ Type *RetTy = ICA.getReturnType();
+ Type *DataTy = cast<StructType>(RetTy)->getElementType(0);
+ Align Alignment;
+ if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst()))
+ Alignment = VPI->getPointerAlignment().valueOrOne();
+ return thisT()->getMemIntrinsicInstrCost(
+ MemIntrinsicCostAttributes(ICA.getID(), DataTy, Alignment),
+ CostKind);
+ }
if (ICA.getID() == Intrinsic::vp_scatter) {
if (ICA.isTypeBasedOnly()) {
IntrinsicCostAttributes MaskedScatter(
@@ -3076,6 +3085,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
case Intrinsic::masked_compressstore:
case Intrinsic::masked_expandload:
return thisT()->getExpandCompressMemoryOpCost(MICA, CostKind);
+ case Intrinsic::vp_load_ff:
+ return InstructionCost::getInvalid();
default:
llvm_unreachable("unexpected intrinsic");
}
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index be53f51afe79f..4550e40166525 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -25364,6 +25364,22 @@ bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType,
return true;
}
+bool RISCVTargetLowering::isLegalFirstFaultLoad(EVT DataType,
+ Align Alignment) const {
+ if (!Subtarget.hasVInstructions())
+ return false;
+
+ EVT ScalarType = DataType.getScalarType();
+ if (!isLegalElementTypeForRVV(ScalarType))
+ return false;
+
+ if (!Subtarget.enableUnalignedVectorMem() &&
+ Alignment < ScalarType.getStoreSize())
+ return false;
+
+ return true;
+}
+
MachineInstr *
RISCVTargetLowering::EmitKCFICheck(MachineBasicBlock &MBB,
MachineBasicBlock::instr_iterator &MBBI,
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 9b46936f195e6..69fcada6494a2 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -429,6 +429,10 @@ class RISCVTargetLowering : public TargetLowering {
/// alignment is legal.
bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const;
+ /// Return true if a fault-only-first load of the given result type and
+ /// alignment is legal.
+ bool isLegalFirstFaultLoad(EVT DataType, Align Alignment) const;
+
unsigned getMaxSupportedInterleaveFactor() const override { return 8; }
bool fallBackToDAGISel(const Instruction &Inst) const override;
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 4788a428d7e64..1cc7efb0ba135 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1007,6 +1007,24 @@ InstructionCost RISCVTTIImpl::getScalarizationOverhead(
return Cost;
}
+InstructionCost
+RISCVTTIImpl::getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA,
+ TTI::TargetCostKind CostKind) const {
+ Type *DataTy = MICA.getDataType();
+ Align Alignment = MICA.getAlignment();
+ switch (MICA.getID()) {
+ case Intrinsic::vp_load_ff: {
+ EVT DataTypeVT = TLI->getValueType(DL, DataTy);
+ if (!TLI->isLegalFirstFaultLoad(DataTypeVT, Alignment))
+ return BaseT::getMemIntrinsicInstrCost(MICA, CostKind);
+
+ return getMemoryOpCost(Instruction::Load, DataTy, Alignment, 0, CostKind,
+ {TTI::OK_AnyValue, TTI::OP_None}, nullptr);
+ }
+ }
+ return BaseT::getMemIntrinsicInstrCost(MICA, CostKind);
+}
+
InstructionCost
RISCVTTIImpl::getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA,
TTI::TargetCostKind CostKind) const {
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 5efa330b3ad71..2a73fe6255382 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -143,6 +143,10 @@ class RISCVTTIImpl final : public BasicTTIImplBase<RISCVTTIImpl> {
bool shouldConsiderVectorizationRegPressure() const override { return true; }
+ InstructionCost
+ getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA,
+ TTI::TargetCostKind CostKind) const override;
+
InstructionCost
getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA,
TTI::TargetCostKind CostKind) const override;
diff --git a/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll b/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll
index 71746caf35f2e..ba792d8f0955b 100644
--- a/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll
@@ -836,74 +836,74 @@ define void @abs() {
ret void
}
-define void @load() {
+define void @load(ptr %src) {
; CHECK-LABEL: 'load'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t0 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t1 = load <2 x i8>, ptr undef, align 2
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t2 = call <4 x i8> @llvm.vp.load.v4i8.p0(ptr undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t3 = load <4 x i8>, ptr undef, align 4
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t4 = call <8 x i8> @llvm.vp.load.v8i8.p0(ptr undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t5 = load <8 x i8>, ptr undef, align 8
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t6 = call <16 x i8> @llvm.vp.load.v16i8.p0(ptr undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t7 = load <16 x i8>, ptr undef, align 16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t8 = call <2 x i64> @llvm.vp.load.v2i64.p0(ptr undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t9 = load <2 x i64>, ptr undef, align 16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t10 = call <4 x i64> @llvm.vp.load.v4i64.p0(ptr undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t12 = load <4 x i64>, ptr undef, align 32
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t13 = call <8 x i64> @llvm.vp.load.v8i64.p0(ptr undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t14 = load <8 x i64>, ptr undef, align 64
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t15 = call <16 x i64> @llvm.vp.load.v16i64.p0(ptr undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t16 = load <16 x i64>, ptr undef, align 128
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t17 = call <vscale x 2 x i8> @llvm.vp.load.nxv2i8.p0(ptr undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t18 = load <vscale x 2 x i8>, ptr undef, align 2
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t19 = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8.p0(ptr undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t20 = load <vscale x 4 x i8>, ptr undef, align 4
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t21 = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8.p0(ptr undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t22 = load <vscale x 8 x i8>, ptr undef, align 8
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t23 = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t24 = load <vscale x 16 x i8>, ptr undef, align 16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t25 = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t26 = load <vscale x 2 x i64>, ptr undef, align 16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t27 = call <vscale x 4 x i64> @llvm.vp.load.nxv4i64.p0(ptr undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t28 = load <vscale x 4 x i64>, ptr undef, align 32
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t29 = call <vscale x 8 x i64> @llvm.vp.load.nxv8i64.p0(ptr undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t30 = load <vscale x 8 x i64>, ptr undef, align 64
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t31 = call <vscale x 16 x i64> @llvm.vp.load.nxv16i64.p0(ptr undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t32 = load <vscale x 16 x i64>, ptr undef, align 128
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t0 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr %src, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t1 = call { <2 x i8>, i32 } @llvm.vp.load.ff.v2i8.p0(ptr align 1 %src, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t2 = call <4 x i8> @llvm.vp.load.v4i8.p0(ptr %src, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t3 = call { <4 x i8>, i32 } @llvm.vp.load.ff.v4i8.p0(ptr align 1 %src, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t4 = call <8 x i8> @llvm.vp.load.v8i8.p0(ptr %src, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t5 = call { <8 x i8>, i32 } @llvm.vp.load.ff.v8i8.p0(ptr align 1 %src, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t6 = call <16 x i8> @llvm.vp.load.v16i8.p0(ptr %src, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t7 = call { <16 x i8>, i32 } @llvm.vp.load.ff.v16i8.p0(ptr align 1 %src, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t8 = call <2 x i64> @llvm.vp.load.v2i64.p0(ptr %src, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t9 = call { <2 x i64>, i32 } @llvm.vp.load.ff.v2i64.p0(ptr align 8 %src, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t10 = call <4 x i64> @llvm.vp.load.v4i64.p0(ptr %src, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t11 = call { <4 x i64>, i32 } @llvm.vp.load.ff.v4i64.p0(ptr align 8 %src, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t12 = call <8 x i64> @llvm.vp.load.v8i64.p0(ptr %src, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t13 = call { <8 x i64>, i32 } @llvm.vp.load.ff.v8i64.p0(ptr align 8 %src, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t14 = call <16 x i64> @llvm.vp.load.v16i64.p0(ptr %src, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t15 = call { <16 x i64>, i32 } @llvm.vp.load.ff.v16i64.p0(ptr align 8 %src, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t16 = call <vscale x 2 x i8> @llvm.vp.load.nxv2i8.p0(ptr %src, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t17 = call { <vscale x 2 x i8>, i32 } @llvm.vp.load.ff.nxv2i8.p0(ptr align 1 %src, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t18 = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8.p0(ptr %src, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t19 = call { <vscale x 4 x i8>, i32 } @llvm.vp.load.ff.nxv4i8.p0(ptr align 1 %src, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t20 = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8.p0(ptr %src, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t21 = call { <vscale x 8 x i8>, i32 } @llvm.vp.load.ff.nxv8i8.p0(ptr align 1 %src, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t22 = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr %src, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t23 = call { <vscale x 16 x i8>, i32 } @llvm.vp.load.ff.nxv16i8.p0(ptr align 1 %src, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t24 = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr %src, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t25 = call { <vscale x 2 x i64>, i32 } @llvm.vp.load.ff.nxv2i64.p0(ptr align 8 %src, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t26 = call <vscale x 4 x i64> @llvm.vp.load.nxv4i64.p0(ptr %src, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t27 = call { <vscale x 4 x i64>, i32 } @llvm.vp.load.ff.nxv4i64.p0(ptr align 8 %src, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t28 = call <vscale x 8 x i64> @llvm.vp.load.nxv8i64.p0(ptr %src, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t29 = call { <vscale x 8 x i64>, i32 } @llvm.vp.load.ff.nxv8i64.p0(ptr align 8 %src, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t30 = call <vscale x 16 x i64> @llvm.vp.load.nxv16i64.p0(ptr %src, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t31 = call { <vscale x 16 x i64>, i32 } @llvm.vp.load.ff.nxv16i64.p0(ptr align 8 %src, <vscale x 16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %t0 = call <2 x i8> @llvm.vp.load.v2i8(ptr undef, <2 x i1> undef, i32 undef)
- %t1 = load <2 x i8>, ptr undef
- %t2 = call <4 x i8> @llvm.vp.load.v4i8(ptr undef, <4 x i1> undef, i32 undef)
- %t3 = load <4 x i8>, ptr undef
- %t4 = call <8 x i8> @llvm.vp.load.v8i8(ptr undef, <8 x i1> undef, i32 undef)
- %t5 = load <8 x i8>, ptr undef
- %t6 = call <16 x i8> @llvm.vp.load.v16i8(ptr undef, <16 x i1> undef, i32 undef)
- %t7 = load <16 x i8>, ptr undef
- %t8 = call <2 x i64> @llvm.vp.load.v2i64(ptr undef, <2 x i1> undef, i32 undef)
- %t9 = load <2 x i64>, ptr undef
- %t10 = call <4 x i64> @llvm.vp.load.v4i64(ptr undef, <4 x i1> undef, i32 undef)
- %t12 = load <4 x i64>, ptr undef
- %t13 = call <8 x i64> @llvm.vp.load.v8i64(ptr undef, <8 x i1> undef, i32 undef)
- %t14 = load <8 x i64>, ptr undef
- %t15 = call <16 x i64> @llvm.vp.load.v16i64(ptr undef, <16 x i1> undef, i32 undef)
- %t16 = load <16 x i64>, ptr undef
- %t17 = call <vscale x 2 x i8> @llvm.vp.load.nxv2i8(ptr undef, <vscale x 2 x i1> undef, i32 undef)
- %t18 = load <vscale x 2 x i8>, ptr undef
- %t19 = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8(ptr undef, <vscale x 4 x i1> undef, i32 undef)
- %t20 = load <vscale x 4 x i8>, ptr undef
- %t21 = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8(ptr undef, <vscale x 8 x i1> undef, i32 undef)
- %t22 = load <vscale x 8 x i8>, ptr undef
- %t23 = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8(ptr undef, <vscale x 16 x i1> undef, i32 undef)
- %t24 = load <vscale x 16 x i8>, ptr undef
- %t25 = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64(ptr undef, <vscale x 2 x i1> undef, i32 undef)
- %t26 = load <vscale x 2 x i64>, ptr undef
- %t27 = call <vscale x 4 x i64> @llvm.vp.load.nxv4i64(ptr undef, <vscale x 4 x i1> undef, i32 undef)
- %t28 = load <vscale x 4 x i64>, ptr undef
- %t29 = call <vscale x 8 x i64> @llvm.vp.load.nxv8i64(ptr undef, <vscale x 8 x i1> undef, i32 undef)
- %t30 = load <vscale x 8 x i64>, ptr undef
- %t31 = call <vscale x 16 x i64> @llvm.vp.load.nxv16i64(ptr undef, <vscale x 16 x i1> undef, i32 undef)
- %t32 = load <vscale x 16 x i64>, ptr undef
+ %t0 = call <2 x i8> @llvm.vp.load.v2i8(ptr %src, <2 x i1> undef, i32 undef)
+ %t1 = call { <2 x i8>, i32 } @llvm.vp.load.ff.v2i8.p0(ptr align 1 %src, <2 x i1> undef, i32 undef)
+ %t2 = call <4 x i8> @llvm.vp.load.v4i8(ptr %src, <4 x i1> undef, i32 undef)
+ %t3 = call { <4 x i8>, i32 } @llvm.vp.load.ff.v4i8.p0(ptr align 1 %src, <4 x i1> undef, i32 undef)
+ %t4 = call <8 x i8> @llvm.vp.load.v8i8(ptr %src, <8 x i1> undef, i32 undef)
+ %t5 = call { <8 x i8>, i32 } @llvm.vp.load.ff.v8i8.p0(ptr align 1 %src, <8 x i1> undef, i32 undef)
+ %t6 = call <16 x i8> @llvm.vp.load.v16i8(ptr %src, <16 x i1> undef, i32 undef)
+ %t7 = call { <16 x i8>, i32 } @llvm.vp.load.ff.v16i8.p0(ptr align 1 %src, <16 x i1> undef, i32 undef)
+ %t8 = call <2 x i64> @llvm.vp.load.v2i64(ptr %src, <2 x i1> undef, i32 undef)
+ %t9 = call { <2 x i64>, i32 } @llvm.vp.load.ff.v2i64.p0(ptr align 8 %src, <2 x i1> undef, i32 undef)
+ %t10 = call <4 x i64> @llvm.vp.load.v4i64(ptr %src, <4 x i1> undef, i32 undef)
+ %t11 = call { <4 x i64>, i32 } @llvm.vp.load.ff.v4i64.p0(ptr align 8 %src, <4 x i1> undef, i32 undef)
+ %t12 = call <8 x i64> @llvm.vp.load.v8i64(ptr %src, <8 x i1> undef, i32 undef)
+ %t13 = call { <8 x i64>, i32 } @llvm.vp.load.ff.v8i64.p0(ptr align 8 %src, <8 x i1> undef, i32 undef)
+ %t14 = call <16 x i64> @llvm.vp.load.v16i64(ptr %src, <16 x i1> undef, i32 undef)
+ %t15 = call { <16 x i64>, i32 } @llvm.vp.load.ff.v16i64.p0(ptr align 8 %src, <16 x i1> undef, i32 undef)
+ %t16 = call <vscale x 2 x i8> @llvm.vp.load.nxv2i8(ptr %src, <vscale x 2 x i1> undef, i32 undef)
+ %t17 = call { <vscale x 2 x i8>, i32 } @llvm.vp.load.ff.nxv2i8.p0(ptr align 1 %src, <vscale x 2 x i1> undef, i32 undef)
+ %t18 = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8(ptr %src, <vscale x 4 x i1> undef, i32 undef)
+ %t19 = call { <vscale x 4 x i8>, i32 } @llvm.vp.load.ff.nxv4i8.p0(ptr align 1 %src, <vscale x 4 x i1> undef, i32 undef)
+ %t20 = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8(ptr %src, <vscale x 8 x i1> undef, i32 undef)
+ %t21 = call { <vscale x 8 x i8>, i32 } @llvm.vp.load.ff.nxv8i8.p0(ptr align 1 %src, <vscale x 8 x i1> undef, i32 undef)
+ %t22 = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8(ptr %src, <vscale x 16 x i1> undef, i32 undef)
+ %t23 = call { <vscale x 16 x i8>, i32 } @llvm.vp.load.ff.nxv16i8.p0(ptr align 1 %src, <vscale x 16 x i1> undef, i32 undef)
+ %t24 = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64(ptr %src, <vscale x 2 x i1> undef, i32 undef)
+ %t25 = call { <vscale x 2 x i64>, i32 } @llvm.vp.load.ff.nxv2i64.p0(ptr align 8 %src, <vscale x 2 x i1> undef, i32 undef)
+ %t26 = call <vscale x 4 x i64> @llvm.vp.load.nxv4i64(ptr %src, <vscale x 4 x i1> undef, i32 undef)
+ %t27 = call { <vscale x 4 x i64>, i32 } @llvm.vp.load.ff.nxv4i64.p0(ptr align 8 %src, <vscale x 4 x i1> undef, i32 undef)
+ %t28 = call <vscale x 8 x i64> @llvm.vp.load.nxv8i64(ptr %src, <vscale x 8 x i1> undef, i32 undef)
+ %t29 = call { <vscale x 8 x i64>, i32 } @llvm.vp.load.ff.nxv8i64.p0(ptr align 8 %src, <vscale x 8 x i1> undef, i32 undef)
+ %t30 = call <vscale x 16 x i64> @llvm.vp.load.nxv16i64(ptr %src, <vscale x 16 x i1> undef, i32 undef)
+ %t31 = call { <vscale x 16 x i64>, i32 } @llvm.vp.load.ff.nxv16i64.p0(ptr align 8 %src, <vscale x 16 x i1> undef, i32 undef)
ret void
}
More information about the llvm-commits
mailing list